`L iYddlZddlZddlZddlZddlZddlZddlZddlmZddl m Z ddl m Z ddl mZmZddlmZddlmZddlmZddlZddlZdd lmZmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%dd l&m'Z'm(Z(m)Z)m*Z*m+Z+dd l,m-Z-dd l.m/Z/dd l0m1Z1GddZ2dZ3ejhddZ5ejhddZ6ejhdZ7ejhdZ8ejrjudde e2gdZ;dZdZ?ejrjuddgddgfd Z@ejrjud!d"d#d$gd%fd&d'd(gd)fd*d+d,d-d.gfgd/ZAd0ZBejrjud1d2id3d4gfd5id3gfd6d7d8id9d:gfgd;ZCd<ZDd=ZEd>ZFd?ZGejrjud@edAdBdCdDdEgfe%dFdGdHdDgfe!dIdJdHdDdEgfe"dKdKdHdDdLdMgfedNdOddDgfedPdQd4dDgfeedRSdTdUd4dDgfgdVZHejrjudWeejeJfeejejfeejeJfe!ejeJfe"ejejfe%ejeJfgdXZKdYZLdZZMd[ZNd\ZOd]ZPd^ZQd_ZRd`ZSdaZTdbZUy)cN)partial) resources)Path)dumpsloadsMock) HTTPError)urlparse) clear_data_home fetch_file get_data_homeload_breast_cancer load_diabetes load_digits load_files load_iris load_linnerudload_sample_imageload_sample_images load_wine)RemoteFileMetadata$_derive_folder_and_filename_from_url _fetch_remote load_csv_dataload_gzip_compressed_csv_datacheck_as_frame)scale)BunchceZdZdZdZdZy) _DummyPathz8Minimal class that implements the os.PathLike interface.c||_yNpath)selfr&s f/mnt/ssd/data/python-lab/Trading/venv/lib/python3.12/site-packages/sklearn/datasets/tests/test_base.py__init__z_DummyPath.__init__0s  c|jSr$r%)r's r( __fspath__z_DummyPath.__fspath__3s yyr*N)__name__ __module__ __qualname____doc__r)r,r*r(r"r"-sBr*r"cntjj|rtj|yyr$)osr&isdirshutilrmtreer%s r( _remove_dirr77s# ww}}T dr*module)scopec#^Kt|jd}|t|yw)Nscikit_learn_data_home_teststrmktempr7tmpdir_factorytmp_files r( data_homerB<s)>(()FGHH N+-c#^Kt|jd}|t|yw)Nscikit_learn_load_files_testr<r?s r(load_files_rootrFCs)>(()GHIH NrCc#Ktj|}tj|d}|jd|j t |t |yw)NdirF)rIdelete Hello World! )tempfilemkdtempNamedTemporaryFilewritecloser=r7)rFtest_category_dir1 sample_files r(test_category_dir_1rSJsZ!))o>--2DUSK'(  !!"#sA(A*c#hKtj|}t|t|yw)NrH)rLrMr=r7)rFtest_category_dir2s r(test_category_dir_2rVTs,!))o>  !!"#s02path_containercH|||}t|}||k(sJtjj|sJ|||}t |tjj|rJt|}tjj|sJy)N)rB)rr3r&existsr )rWrBs r(test_data_homerZ[s!"9-  2I  !! ! 77>>) $$ $!"9- i(ww~~i(( ( 2I 77>>) $$ $r*ct|}t|jdk(sJt|jdk(sJ|jJy)Nr)rlen filenames target_namesDESCR)rFress r(test_default_empty_load_filesraosL _ %C s}}  "" " s A %% % 99  r*ct|}t|jdk(sJt|jdk(sJ|jJ|j dgk(sJy)NrK)rr\r]r^r_datarSrVrFr`s r(test_default_load_filesrgvsb _ %C s}}  "" " s A %% % 99   88)* ** *r*cftjj|jtjj }t |d|gd}t|jdk(sJt|jdk(sJ|jdk(sJ|jdgk(sJy)Ntestutf-8) description categoriesencodingrcz Hello World! ) r3r&abspathsplitseppoprr\r]r^r_re)rSrVrFcategoryr`s r(.test_load_files_w_categories_desc_and_encodingrs~sww2399"&&AEEGH V W C s}}  "" " s A %% % 99   88() )) )r*ct|d}t|jdk(sJt|jdk(sJ|jJ|j dJy)NF) load_contentrcrdre)rr\r]r^r_getrfs r(test_load_files_wo_load_contentrwsd _5 9C s}}  "" " s A %% % 99   776? "" "r*allowed_extensionsz.txtz.jsoncF|dz }|jd}|Dcgc]}||z  }}|D]}|jdt||}t|Dcgc]}|j|vst |c}t|j k(sJycc}wcc}w)z;Check the behaviour of `allowed_extension` in `load_files`.sub)z file1.txtz file2.jsonz file3.jsonzfile4.mdshello)rxN)mkdir write_bytesrsetsuffixr=r])tmp_pathrxdfilesfpathspr`s r("test_load_files_allowed_extensionsrs 5AGGI AE! "qQU "E "   h X2D EC H15G)GAH IS N   #Is BB, BzHfilename, expected_n_samples, expected_n_features, expected_target_namesz wine_data.csv )class_0class_1class_2iris.csv)setosa versicolor virginicazbreast_cancer.csv9 malignantbenignct|\}}}|jd|k(sJ|jd|k(sJ|jd|k(sJtjj ||y)Nrrc)rshapenptestingassert_array_equal)filenameexpected_n_samplesexpected_n_featuresexpected_target_names actual_data actual_targetactual_target_namess r(test_load_csv_datars{7DH6M3K 3   Q #5 55 5   Q #6 66 6   q !%7 77 7JJ!!"57LMr*cd}d}t|}t||}t|dk(sJt|dk(sJtjj |d|dtjj |d|dtjj |d |d |d j d sJy) Nrziris.rstdata_file_namerdescr_file_namerrrcrdz.. _iris_dataset:)rr\rrr startswith)rrres_without_descrres_with_descrs r(test_load_csv_data_with_descrrsN O%^D"%N ~ ! ## #  !Q && &JJ!!."35Fq5IJJJ!!."35Fq5IJJJ!!."35Fq5IJ "  ( ()< == =r*z filename, kwargs, expected_shapezdiabetes_data_raw.csv.gz diabetes_target.csv.gzz digits.csv.gz delimiter,AcPt|fi|}|jt|k(sJyr$)rrtuple)rkwargsexpected_shapers r("test_load_gzip_compressed_csv_datars,0CFCK   n 5 55 5r*cd}d}t|}t||\}}tjj|||j dsJy)Nrz diabetes.rstrrz.. _diabetes_dataset:)rrrrr)rr expected_datardescrs r(-test_load_gzip_compressed_csv_data_with_descrrsV-N$O1PM6%'K JJ!!+}=   3 44 4r*c& t}t|jdk(sJt|jdk(sJ|j}t j |dddddft j gdtjk(sJt j |dddddft j gdtjk(sJ|jsJy#t$rtjdYywxYw)Nrdr))dtyperc)rdr3Could not load sample images, PIL is not available.) rr\imagesr]rallarrayuint8r_ ImportErrorwarningswarn)r`rs r(test_load_sample_imagesrs M "3::!###3==!Q&&&vvfQi1a(BHH_BHH,UUVVVvvfQi1a(BHH[,QQRRRyyy M KLMsC,C//DDc td}|jdk(sJ|jdk(sJy#t$rt j dYywxYw)Nz china.jpgr)iirr)rrrrrr)chinas r(test_load_sample_imagersTM!+.{{g%%%{{m+++ M KLMs-0AAcptd}|jjdk(sJ|jjdk(sJt |j dk(sJ|jsJt}tjjt|jdz |jdy ) zTest to check that we load a scaled version by default but that we can get an unscaled version when setting `scaled=False`.F)scaledrrrrgT5@g-C6?)atolN) rrertargetsizer\ feature_namesr_rrassert_allcloser) diabetes_rawdiabetes_defaults r(test_load_diabetes_rawrs!.L    " "i // /    # #s ** * |)) *b 00 0    $JJ l H-/?/D/D5r*zEloader_func, data_shape, target_shape, n_target, has_descr, filenames)rr)rrdTr)rr)rr)rr)r)r data_filenametarget_filenamer)r)r@)r )n_class)Qr)rc |}t|tsJ|jj|k(sJ|jj|k(sJt |drt |j|dk(sJ|t |j|k(sJ|r|jsJ|rRd|vsJt|Dcgc]4}||vxr,tj|d||z j6c}sJyycc}w)Nrrc data_module) isinstancer rerrhasattrr\rr^r_rrris_file) loader_func data_shape target_shapen_target has_descrr]bunchrs r( test_loaderr s( ME eU ## # ::  z )) ) <<   -- -uo&5&&':a=8885%%&(222{{{%%%# U Q__U=%9:U1XENNPQ     s69C:z%loader_func, data_dtype, target_dtypec0|}t||||y)N)expected_data_dtypeexpected_target_dtyper)r data_dtype target_dtypedefault_results r(test_toy_dataset_frame_dtyper7s!]N&* r*cztd}tt|}d|_|d|jk(sJy)Nx)ry)r rrrrbunch_from_pkls r(test_loads_dumps_bunchrLs: CLE5<(NN # ."2"2 22 2r*ctd}d|jd<tt|}|jdk(sJ|ddk(sJd|_|jdk(sJ|ddk(sJy)Noriginal)keyzset from __dict__rchanged)r __dict__rrrrs r(8test_bunch_pickle_generated_with_0_16_and_read_with_0_17rSs j !E0ENN55<(N    ++ + % J .. .#N    ** * % I -- -r*c6t}dt|vsJy)Nre)rrI)res r(test_bunch_dirrhs ;D SY  r*cd}tjt|5ddlm}dddd}tjt|5ddlm}dddy#1swY6xYw#1swYyxYw)zLCheck that we raise the ethical warning when trying to import `load_boston`.z8The Boston housing prices dataset has an ethical problemmatchr) load_bostonNzBcannot import name 'non_existing_function' from 'sklearn.datasets')non_existing_function)pytestraisesrsklearn.datasetsrr)msgrrs r(test_load_boston_errorr ns` DC {# .101 OC {# .;:;; 11 ;;sA A&A#&A/c d}td|d}tt|dddtj}|j d|t jtd 5}t jtd 5t|d d ddd|jdk(sJ|D]}t|jd|k(rJt|d k(sJ dddy#1swYXxYw#1swYyxYw)z'Check retry mechanism in _fetch_remote.z8https://scikit-learn.org/this_file_does_not_exist.tar.gz invalid_fileN Not Found)urlcoder hdrsfp side_effect"sklearn.datasets._base.urlretrievezRetry downloadingrzHTTP Error 404rr) n_retriesdelayrzRetry downloading from url: )rr r ioBytesIOsetattrrwarns UserWarningrr call_countr=messager\) monkeypatchrinvalid_remote_fileurlretrieve_mockrecordrs r(1test_fetch_remote_raise_warnings_with_invalid_urlr%zs EC,^S$G#;Tbjjl  <>NO k)< =  ]]9,< = E -! D E **a/// JAqyy>'CC5%II II J6{a   E E  s*)C7C+9C7C7+C4 0C77Dctd\}}|dk(sJ|dk(sJtd\}}|dk(sJ|dk(sJtd\}}|dk(sJ|dk(sJtd\}}|dk(sJ|d k(sJtd \}}|dk(sJ|d k(sJtd \}}|dk(sJ|d k(sJtd \}}|dk(sJ|d k(sJtd\}}|dk(sJ|dk(sJtd\}}|dk(sJ|dk(sJtd\}}|dk(sJ|dk(sJtd\}}|dk(sJ|dk(sJtd\}}|dk(sJ|d k(sJtjtd5tddddy#1swYyxYw)Nzhttps://example.com/file.tar.gz example.comz file.tar.gzu2https://example.com/نمونه نماینده.datauنمونه-نماینده.dataz)https://example.com/path/to-/.file.tar.gzzexample.com/path_tozhttps://example.com/downloaded_filezhttps://example.comz2https://example.com/path/@to/data.json?param=valuez data.jsonz4https://example.com/path/@@to._/-_.data.json.#anchorz"https://example.com//some_file.txtz some_file.txtzhttp://example/../some_file.txtexamplez'https://example.com/!.'.,/some_file.txtz+https://example.com/a/!.'.,/b/some_file.txtzexample.com/a_bzhttps://example.com/!.'.,z Invalid URLrz https:/../)rrr ValueError)folderrs r((test_derive_folder_and_filename_from_urlr,si;)FH ] "" " } $$ $;<FH ] "" " 7 77 7;3FH * ** * } $$ $;FH * ** * { "" ";,FH ] "" "  && &;)FH Y    && &;1FH ] "" "  && &;5FH & && &  && &;._urlretrieve_mocks^;' SM&&,,S1 i'//1Ck4> > K)+Z8r*rr)r5r6s` r(_mock_urlretriever7s9 - ..r*ct|}|dz }|j|dz }d}|j|d|dz }|j|dz }|jdd|d z }|jt|}|j d ||j d t | t d } | |dz dz k(sJ| jd|k(sJt d} | |dz dz dz k(sJ| jd|jdk(sJtjd} tjt5tj| 5t dddddddd|dz dz } t| j| dz gk(sJy#1swYNO.)0L#(  M 9L H HH H  & & & 8K GG G"6 Y6DGWWWW  & & & 8Or+rcrd) r{rr?r7rr r@runlink)r rEr5rFrGrOr"rJs r(test_fetch_file_without_sha256rRsv,,}-K[</0I&K w7,,}-K(5<>NO#(  l : :: :  & & & 8K GG G  & &! ++ +#(  l : :: :  & & & 8K GG G  & &! ++ +"(  l : :: :  & & & 8K GG G  & &! ++ +r*c|jd}t|dz }d}|j|dtj|j j }|jd}t|}|jd|td|| }||dz k(sJ|jd|k(sJ|jd k(sJtd|| }||dz k(sJ|jd|k(sJ|jd k(sJ|jd dd |d } tj| 5td|| }||dz k(sJ|jd|k(sJ|jdk(sJ dddtd|| }||dz k(sJ|jd|k(sJ|jdk(sJ|jtd|| }||dz k(sJ|jd|k(sJ|jdk(sJtd|}||dz k(sJ|jd|k(sJ|jdk(sJd} d} tj d|d| d} tj"t$| 5tj| 5td|| ddddddy#1swYKxYw#1swYxYw#1swYyxYw)Nr5r9r:rjr;rOrr>)r+sha256rczcorrupted contentszQSHA256 checksum of existing local file data.jsonl \(.*\) differs from expected \(z9\): re-downloading from https://example.com/data.jsonl \.rrdrrPdeadbabecafebeefzdiffers from expectedz#The SHA256 checksum of data.jsonl (z) differs from expected (z).)r{rr?hashlibrT read_bytes hexdigestr7rr r@rrrrQrArBrOSError) r rEr5rFrGexpected_sha256rOr"rJ expected_msgnon_matching_sha256rKexpected_error_msgs r(test_fetch_file_with_sha256r^?s,,}-K[</0I&K w7nnY%9%9%;<FFHO,,}-K(5<>NO#(_  l : :: :  & & & 8K GG G  & &! ++ +#(_  l : :: :  & & & 8K GG G  & &! ++ +  !5 H ++:*;>>> **G*< KKK**a/// 0#(_  l : :: :  & & & 8K GG G  & &! ++ +"(_  l : :: :  & & & 8K GG G  & &! ++ +#(  l : :: :  & & & 8K GG G  & &! ++ +-2 -o->?() - w&8 9 \\ 4 5  0"*  S00T  s1AKK'.K=K'KK$ K''K0)VrVrr3rAr5rLr functoolsr importlibrpathlibrpicklerr unittest.mockr urllib.errorr urllib.parser numpyrrr r r rrrrrrrrrrsklearn.datasets._baserrrrr"sklearn.datasets.tests.test_commonrsklearn.preprocessingr sklearn.utilsr r"r7fixturerBrFrSrVmark parametrizerZrargrsrwrrrrrrrrrfloat64intrrrrr r%r,r7rMrRr^r1r*r(rps "!     >' h  h  $$$$ )D$ +CD%E%&+ *#-67:K/LM N N #r#DE S!BC c2 X'>?NN>$& #R#r3 !2u- ;,tRj966 5 M M  K Y4*F Ivq$3 Hfa |<     / 0    64r: j'2tR8 a (*gr4L& '& .+ RZZ-  BJJ/ bjj#& BJJ$  BJJ/ BJJ$   3.* ; .D;N/1Uh%,PSr*