K iEr*ddlZddlZddlZddlZddlZddlZddlZddlZddlZddl Z ddl Z ddl m Z ddl mZ ddlZddlZddlZddlmZddlZddlZddlmZddlZddlmZddlm Z m!Z!m"Z"m#Z#m$Z$ ddl%Z& ddl'm(Z) ddl*m+Z,ejZjPZ.GddZ/dZ0d Z1d Z2ejfd Z4ejfd Z5ejfd dZ6ejfdZ(ejfddgddgdZ7ejZjVdZ8dZ9ejZjVdZ:ejZjVdZ;ejZjVdZejZjVdZ?ejZjVdZ@d ZAd!ZBd"ZCejZjd#gd$d%eEd&eFd'eFfd(ZGd)ZHd*ZIejZjVd+ZJejZjVd,ZKejZjVd-ZLd.ZMd/ZNejZjd0ejd1d2d3d4ggejZjd5ddgejZjVd6ZPejZjVd7ZQejZjVejZjd8ZSd9ZTd:ZUejZjVd;ZVejZjVd d<ZWejZjVd=ZXejZjJejZjVd>ZYejZjVd?ZZejZjVd@Z[ejZjVdAZ\ejZjJejZjVdBZ]ejZjVdCZ^ejZjVdDZ_d dEZ`ejZjJejZjVdFZaejZjVdGZbejZjJejZjVdHZcejZjVdIZdejZjVdJZeejZjVdKZfejZjVdLZgejZjVdMZhejZjVdNZiejZjJejZjVdOZjejZjVejZjdPdQdRgdSZkejZjVejZjdTddgejZjdPdUdVgdWZlejZjdPdXdYgdZZmejZjdPd[d\gd]Znd^Zod_ZpejZjVejZjJd`ZqdaZrdbZsdcZtddZudeZvdfZwdgZxejZjVdhZydiZzd djZ{dkZ|dlZ}dmZ~ejZjVdnZejZjVdoZejZjVdpZejZjVdqZejZjVdrZejZjVdsZejZjVdtZejZjVduZejZjVdvZejZjVdwZdxZdyZdzZd{ZejZjd|ddgd}Zd~ZejZjVdZejZjVdZejZjVdZejZjVdZdZdZejZjVejZjdddgejZjdddgejZjdTddgejZjdgdgdfgdgdfgdgdfgdgdfgdgdfgdgdfgdgdfgdZejZjJdZejfdZejZjVejZjdZejZjVejZjdZejZjVejZjdZejZjVejZjdZejZjVdZejZjVdZejZjJdZejZjVdZejZjVdZejZjVdZdZdZdZdZejZjVdZejZjVdZdZejZjRdZejZjRdZdZejZjRdZejZjJdZejZjJejZjdgddZdZdZdZejZjJdZejZjJdZejZjJdZdZdZdZejZjJejZjdgddZdZejZjVejZjJdZejZjVejZjJejZjyejzdk(ddZejZjVejZjJdZejZjVdZejZjVejZjJdZd„ZdÄZejZjVejZjJdĄZejZjVejZjJdńZejZjVejZjJdƄZejZjVejZjJdDŽZejZjVdȄZejZjVejZjJdɄZejZjVejZjJdʄZd˄ZejZjJejZjVd̄ZejZjVejZjJd̈́Zd΄Z d dτZejZjVdЄZejZjVejZjJdфZd҄ZdӄZdԄZejZjVejZjdՄZdքZejZjJdׄZאd d؄ZdلZdڄZejZjVdۄZejZjVd܄ZejZjVd݄ZejZjVdބZejZjVejZjJd߄ZejZjVejZjJdZejZjVejZjJdZdZdZdZdZdZejZjejZjVdZdZejZjVdZejZjVdZejZjVejZjJdZdZejZjVdZejZjVejZjdZdZejZjVejZjdZejZjVdZejZjPdZejZjPdZejZjPdZejZjPdZejZjPdZejZjPdZejZjPdZejZjdddgdZejZjdddgdZdZdZejZjdddZdZejZjVdZdZdZejZjdddgdZdZd Zd Zy#e$rdZYwxYw#e$rdZ&YwxYw#e$rdZ)YwxYw#e$rdZ,YwxYw(N)copytree)quote)is_threading_enabled)FSProtocolClass ProxyHandler_configure_s3_limited_user_filesystem_uri change_cwdceZdZdZddZy)TableStreamWrapperc||_yNtable)selfrs `/mnt/ssd/data/python-lab/Trading/venv/lib/python3.12/site-packages/pyarrow/tests/test_dataset.py__init__zTableStreamWrapper.__init__Fs  Nc8|jj|Sr)r__arrow_c_stream__)rrequested_schemas rrz%TableStreamWrapper.__arrow_c_stream__Iszz,,-=>>rr)__name__ __module__ __qualname__rrrrr r Es ?rr c 4ddl}ddl}|jddd}|jd}|jgd}g}t |D].}|j ||t |t|f||z }0tj|gdS) Nri)days)greenblueyellowredorange)dateindexvaluecolorcolumns) datetime itertools timedeltacyclerangeappendfloatnextpd DataFrame)nr+r,dayintervalcolorsdatais r_generate_datar;Ms (  D!Q 'C!x!!q)H __I JF D 1X S!U1XtF|45 x <<&I JJrc tjtjdtjtjdtjtjdtj tjdtj g}tjj||d}|jS)Nr%r&r'r(F)schemapreserve_index) par=fielddate32int64float64stringTable from_pandasreplace_schema_metadata)dfr=rs r_table_from_pandasrI]s YY % "((*% "**,' "))+& F HH F5 IE  ( ( **rc:|jD]|}|j5}t|tjsJ|j rJ|j sJ|jsJ|jrJ ddd~y#1swYxYwr) get_fragmentsopen isinstancer? NativeFileclosedseekablereadablewritable)datasetfragmentnfs r+assert_dataset_fragment_convenience_methodsrVhs))+% ]]_ %b"--0 00yy =;;= =;;= ={{} $$}  % %% % %s A!BB ctj}ddg}t|D]\}}|d|d}|j||j |5}t t dt ttt dt ttt d|gdzt dDcgc]}|dzt|dzdc}g}tjdtjfd tjfd tjfd tjfd tjtjtjdfg}tj || } tj"j%| g} t'j(| |ddd|Scc}w#1swYxYw)Nz subdir/1/xxxz subdir/2/yyyz/file.parquetrabi64f64strconststructr=)fs_MockFileSystem enumerate create_diropen_output_streamlistr/mapr1r_r?r=rBrCrDra record_batchrE from_batchespq write_table) mockfs directoriesr: directorypathoutjr9r=batchrs rrnrnss    !F K "+.' 9E!H-)$  & &t , 'U1XSa)*SeAh'(a8=aA1q1u3q1u:.A DYY # % $"((*%299288:BIIK%HIJ  FOOD8EHH))5'2E NN5# &% ' ''. MB  ' 's A(G!=GC7G!G!!G+ c ddlm}m}ddlm}|fdt  fd}|j |d|||}tj fd}||fS) Nr)LocalFileSystem PyFileSystemr)rc^|Dchc]}jt|c}Scc}wr)normalize_pathr_)pathsplocalfss r normalizedz#open_logging_fs..normalizeds&8=>1&&s1v.>>>s!*cjt|}j||jj |Sr)ryr_add_fsopen_input_file)rrqr|openeds rrz(open_logging_fs..open_input_files8%%c$i0 4xx''--rrc3Kj d|k(sJy#|k(sJwxYwwr)clear)expected_openedr}rs r assert_opensz%open_logging_fs..assert_openssI  E f%O)DD DD:f%O)DD DDsA-AAA) pyarrow.fsrvrwtest_fsrsetsetattr contextlibcontextmanager) monkeypatchrvrwrrrcrr|r}rs @@@ropen_logging_fsrso8%G?UF.  &7I l7+ ,BEE | rmodule)scopec |jjjd|jjjdtd}t j }t |}td||dzDcgc]}|j|||dzzc}\}}}}|jdt |}ttd||dzDcgc]}|j|||dzzc}D]D\}} d|d } |j| 5} tjt| | dddF|jd |j|j j"j$|j&gD]b\} } d | dd | d } | d} |j| |j| 5} tjt| | dddd|jd|j|j j"j(|j j"j*gD]b\} } d| dd| d } | d} |j| |j| 5} tjt| | dddd|jd|jdD]Y\} } d| } | d} |j| |j| 5} tjt| | ddd[|Scc}wcc}w#1swYQxYw#1swYxYw#1swYxYw#1swYxYw)Npandasparquetrplain z plain/chunk-rXr=zschema//rz/chunk.parquethivez hive/year=z/month= hive_colorr(zhive_color/color=)configpyarrowrequiresr;rcrdlenr/ilocrfrergrlrmrIgroupbyr%dt dayofweekr(yearmonth)requestrHrnr5r:df_adf_bdf_cdf_dchunkrqrrpartfolders r multisourcefsrsN NN##H- NN##I.  B    !F BA9>q!QT9JKAbgga!Q$/KD$d g D AU1aB=OPtyy1QU73PQ;5aS)  & &t , ; NN-e4c : ; ;;  h||TYY\\%;%;TZZ$HI; e47)1T!WI.(&!  & &t , ; NN-e4c : ; ; ; f||TYY\\%6%6 8J8J$KL; ed1gYgd1gY7(&!  & &t , ; NN-e4c : ; ; ; l#||G,; e$TF+(&!  & &t , ; NN-e4c : ; ; ; MML Q ; ; ; ; ; ; ; ;s<L>M M M M" M/M M "M, /M8 c tj}tjdd}tjd}tj t jt jdt jt jdt jg|_ tj||||}|jS)NsubdirT recursivegroupkey)dsParquetFileFormatrc FileSelectorFileSystemFactoryOptionsDirectoryPartitioningr?r=r@int32rD partitioningFileSystemDatasetFactoryfinish)rnformatselectoroptionsfactorys rrSrSs  ! ! #Fx48H))(3G33 HHWbhhj ) HHUBIIK (  G ))&(FGLG >> rTFthreadedserial)paramsidscB|jGfdd}|S)z] Fixture which allows dataset scanning operations to be run with/without threads cFeZdZfdZfdZdZdZdZdZdZ dZ y ) dataset_reader..readerc|_yr use_threads)rrs rrz'dataset_reader..reader.__init__s *D rc.d|vr td|d<y)Nrz9Invalid use of dataset_reader, do not specify use_threads) Exception)rkwargsrs r _patch_kwargsz,dataset_reader..reader._patch_kwargs s)&$&&%0F= !rcH|j||jdi|SNr)rto_tablerrSrs rrz'dataset_reader..reader.to_tables&   v &#7##-f- -rcH|j||jdi|Sr)r to_batchesrs rrz)dataset_reader..reader.to_batches&   v &%7%%// /rcH|j||jdi|Sr)rscannerrs rrz&dataset_reader..reader.scanners$   v &"7??,V, ,rcJ|j||j|fi|Sr)rhead)rrSnum_rowsrs rrz#dataset_reader..reader.heads&   v &7<<3F3 3rcJ|j||j|fi|Sr)rtake)rrSindicesrs rrz#dataset_reader..reader.take!s&   v &7<<262 2rcH|j||jdi|Sr)r count_rowsrs rrz)dataset_reader..reader.count_rows%rrN) rrrrrrrrrrrrsrreaderrs+ + 0 . 0 - 4 3 0rr)param)rrrs @rdataset_readerrs"--K"0"0H 8Orc * tjtjdtjg}t j }ddg}t ddDcgc]}t jd|k(}}t||Dcgc]\}}|j|||}}}t jdt jdk(} t j||||| } tjj|||||| } | | fD]"} t| tjsJt| jtj sJ| jj| sJt!| j"t!|k(sJt%| j'}t|||D]2\} }}| jj|sJ| j(|k(sJt| jtj sJt| tj*sJ| j,d gk(sJ| j.dk(sJt%| j1}| j.t3|cxk(rdk(sJJt|d tj*sJ|d j(|k(sJ|d j,d gk(sJ|d j.dk(r3Jt%| j't jdd k( }t3|d k(r#Jt j||||} | jjt jdsJtjj||||} | jjt jdsJ| j'D]2} | jjt jdr2Jt5j6t8d5t j|||dddt5j6t8d5t j|||ddddt5j6t8d5tjj||dddycc}wcc}}w#1swYxYw#1swYfxYw#1swYyxYw)Nr`subdir/1/xxx/file0.parquetsubdir/2/yyy/file1.parquetrrYrleveli9)r=r filesystemroot_partition)r=rr partitionsrrfilterr=rrTzincorrect typematch)r=rrr)r?r=r@rBrrr/zip make_fragmentscalarFileSystemDataset from_pathsrMrpartition_expressionequalsrfilesrhrKrqParquetFileFragment row_groupsnum_row_groupssplit_by_row_grouprpytestraises TypeError)rnr= file_formatrzxrrqr fragmentsrdataset_from_fragmentsdataset_from_pathsrSrT partitionrow_group_fragmentss rtest_filesystem_datasetr,s YY "((*%F&&(K )+G HE16q!=A"((6"a'=J=#&uj#9;T4**4>;I;XXg&"))D/9N11&.--88 f[Vn9 +,>?#'2#7#7888'.."*>*>???++22>BBB7==!SZ///..01 ),Y E)J > %Hi0077 B BB==D( ((hoor/C/CD DDh(>(>? ??&&1#- --**a/ //"&x'B'B'D"E **c2E.FK!K KK KK1!4b6L6LM MM&q)..$6 66&q)44; ;;&q)88A= == >..bhhw6G16L.MN 9~"""/#4""&G  ' ' . .ryy ?? ?""-- f[V.G  ' ' . .ryy ?? ?))+E,,33BIIdODDDE y(8 9= Y V<= y(8 9C Yv$/ CC y(8 9G '' +'FGG{>;j==CCGGs0#S&S+S1>S=;"T 1S:=T Tctjtjdtjg}t j }dg}tj j|||tj}|jtjt5|j|dddy#1swYyxYw)Nf1znonexistingfile.arrowr)r?r=r@rBr IpcFileFormatrrrcrvrKrrFileNotFoundErrorr)rr=rrzrSs r1test_filesystem_dataset_no_filesystem_interactionr ts YY rxxz"F""$K $ %E""-- f[%%'.G   ( ))()))s 2C  Cct|tjsJt|jtj sJt j gdt j}t j gdt j}|j|D]b}t|tjsJ|jdj|sJ|jdj|rbJ|j|jD]D}t|tjsJt|j tj"rDJ|j%|}t|tj&sJt)|dk(sJtj*ddk(}|j%d|}|j-d j/}|dddgk(sJ|d d d gk(sJt1|d dd gk(sJt1|d ddgk(sJtj*ddk(}|j%d|}|j-d j/}|dgdk(sJ|d gdk(sJ|d gdk(sJ|d gdk(sJtj*dtj*d tj*ddk(d}|j%d|}|j-dj/}t3|gdk(sJ|dgdk(sJ|d gdk(sJ|dgdk(sJt5|y)NrrrrYrtyperrrr]T)rrrr^?rrxxxyyy)rar\1)rrrr)r@rr)rrrr)rrrr)r]r^new)rr*) rrrrrrrYrYrr) rrr@r@rrrr) FFTTFFFFTT)rMrDatasetr=r?SchemaarrayrBrCr RecordBatchcolumnrr scan_batchesTaggedRecordBatchrTFragmentrrErr@sort_by to_pydictsortedrhrV) rSr expected_i64 expected_f64rtr conditionresult projections r test_datasetr*s: grzz ** * gnnbii 00 088O"((*=L88O"**,?L**734%000||A%%l333||A%%l3334  ''0==?7%!5!5666%.."++6667  # #G ,E eRXX && & u:  1$I   $y  AF ^^G $ . . 0F %=QF "" " %=RH $$ $ &/ "q!f ,, , &- UEN 22 2)S0I   $y  AF ^^G $ . . 0F %=L (( ( %=0 00 0 '?l ** * %=8 88 8xxxxxx(C/J   $  CF ^^E " , , .F <0 00 0 %=: :: : %=; ;; ; %=7 77 7/8rcb|jdd}t|}|jdk(sJy)N)fragment_readaheadbatch_readahead)rr2 num_columns)rSrrts rtest_scanner_optionsr2s4  B JG ME    !! !rc|j|tj}t|tj sJt jtj5|j|dgddd|j|dgtj}|j|jk(sJ|jtjdtjfgk(sJt|tj sJ|j}|jD].}|j|jk(sJ|jdk(r.J||j!j#k(sJ|j|jk(sJt%|j&D]=}tj(|g}|j+||j+|k(r=Jt jtj,5|j+tj(|j&gddd|j&|j/k(sJ|j|gdtj}|j}gd}|j0|k(sJ|j3d}|d j5d gd zd gd zzk(sJ|dj5d gd zdgd zzk(sJ|dj5d gdzk(sJ|dj5dgdzk(sJy#1swYxYw#1swYxYw)N) memory_poolunknownr)r])r*r4r) __filename__fragment_index __batch_index__last_in_fragmentr7r6rrrrr8rr9T)rr?default_memory_poolrMrScannerrr ArrowInvaliddataset_schemar=projected_schemarBrrr1 to_readerread_allr/rrrArrowIndexErrorr column_namesr" to_pylist) rSrrrrtr:rexpected_names sorted_tables r test_scannerrFs@$$R335%7G grzz ** * r '=w <=$$Wug131G1G1I%KG  ! !W^^ 33 3  # #ryy5"((*2E1F'G GG G grzz ** *    E##%&||w77777  A%%%& G%%'002 22 2 <<733 33 3 5>> "<((A3-zz'"gll7&;;;;< r)) *1 RXXu~~./01 >>W//1 11 1$$W7M241G1G1I %KG    E=N    // /==!34L  % / / 1 %&* %&* + ,, , * + 5 5 7QC!GqcAg||d'|(t j@|d'|(} | jC} | j!|sJ ddd~t;j<5}t j |}t j>||d'|(d} t#j$tDd)*5t j@|d'tGd(} ddd| J dddy#1swYxYw#1swYxYw#1swYxYw#1swYHxYw#1swYKxYw#1swYyxYw)+Nr]r^z other objectrrrc3$K|]}|du ywrr.0rs r z$test_partitioning..e.{rfrgz/alpha=0/beta=3/rz/alpha=xyz/beta=3/)z/alpha=one/beta=2/z /alpha=one/z /beta=two/otherc3$K|]}|du ywrrrcs rrez$test_partitioning..rfrgz3_3.14_ prefix_3_aaa_)firstsecondthird dictionariesrc3DK|]}tjywrrandomrdrOs rrez$test_partitioning..%I!fmmo%I r[rr\r f2rnamesripcrrz,Expected Partitioning or PartitioningFactoryr)$r?r=r@rBrCrrHivePartitioningFilenamePartitioningrMr]rrwallparse Expressionrrrr<ris_null dictionaryint8rDrrCrr/tempfileTemporaryDirectory write_datasetrSr ValueErrorint) r=r_rexprrW shouldfailrpartitioning_schematempdir load_backload_back_tables rtest_partitioningrSs+ YY  #  %F**B,?,?))+.V} ,888uV},,,~--- .YY "((*%  %F++F3L |(( )Q .. . <,";";< << <   j )D dBMM ** *!Q&288E?d+BCH ;;x  r ',?+,   e $Dxx A%H ;;x  233FVT TT T YY "((*% $F&&vUCL |(( )Q .. . <,";";< << <   0 1D ' biil * & RYYq\ ) +  ;;x    2 3D '  " " $(8BIIaL(H I  ;;x  I+ ]]2?? + +   z * + ++ 2..vWM MM M YY "((*%  %F**62L |(( )Q .. . <,";";< << <   i (D dBMM ** *!Q&288E?d+BCH ;;x  r ',?+, 2226FS SS S YY "((*%  bggi=>F++eRXX.J%KLL  $ $Q ' // /  $ $Q ' 1 1 38$ $$ $ 233FN NN N** HHWbhhj ) HHUBMM"'')RYY[A B  28889  L  $ $Q ' // /  $ $Q ' 1 1 38$ $$ $ HH rRXX%IuRy%II #sebj()+# E ))fbiik%:$;<**B,?,?))+ 1  ( ( * 1g !45L   UG$)  F 750<>I'002O"))%0 00 1 1 1  $ $ &!'//0CD   %L B ]]:!OQ O 75s1vNI O   !!A,,< + +$,,L 1 1 O O !!sVgg( g5AhA h#"h hg%(g2 5g?h h hh$c $tjtjdtjtjdtjg}t j |t j|t j|t j |dt j|dt j|ddg}|D]'}|j|j||k(r'Jy)Nr]r^rhrirm)rjrn) r?r=r@rBrCrrrrloadsdumps) pickle_moduler=partsrs rtest_partitioning_picklingrs YY  #  %F   ( F# '   &A @ FV5Q  EF""=#6#6t#<=EEEFrz@flavor, expected_defined_partition, expected_undefined_partition))r)zfoo=A/bar=ant%20beerr)r)z A/ant beerr)r)rz A_ant bee_)rrOflavorexpected_defined_partitionexpected_undefined_partitionctjdtjfdtjfg}tt||}|j t jddk(t jddk(z|k(sJ|jdj|jt jddk(t jddk(zsJ|j t jddk(t jddk(zt jddk(t jddk(zz|k(sJ|j t jddk(t jddk(zt jddk(t jddk(zz|k(sJ|dk7rVtjtjd 5|j t jddk(dddy|j t jddk(d k(sJy#1swYyxYw) Nfoobarrbzant beeArrzDNo partition key for foo but a key was provided subsequently for barr)z bar=ant%20beer)r?r=rDgetattrrrpcr@rjoinrrrr<)rrrrrs r test_dataset_partitioning_formatrsD))eRYY[%9E299;;O$PQ&72v&.ABL RXXe_ 9bhhuoQT>TUV % & &   chh'AB C J J %C BHHUOy$@A   hhuo*rxx#/E F9,%C1GH J  &  & & hhuo*rxx#/E F9,%C1GH J  (  ( (##]] OO(  B   "((5/Y"> A B B""RXXe_ %ADI     B Bs (I22I;ctjtjgdgdd}tjd}tjd}|j |dz||z |dz|j d|z d  }tjgd gd gd gdd }|j|sJy)NrrrY)rrrrZr[r\rrrC)za+1zb-aza*2za/br)rrYr)rr)rr)?rg?)rrSr?rr@rcastr)rSr[r\r(rWs r$test_expression_arithmetic_operatorsr,sjj "BCDG  A  A   1u1u1uvvi 1$ ' F xxH == "" "rc|dDcgc]}tj||k(c}\}}}tj|ddik(sJtj|tj|k(sJtj||z|zdDcic]}||c}k(sJtjddk\}tj|ik(sJtj||zddik(sJtjdj }tj|ddik(sJycc}wcc}w)Nabcr[drY)rr@get_partition_keys_get_partition_keysr)fr[r\cnopenulls rtest_partition_keysr=s )./Arxx{a/GAq!  #Sz 11 1  #r'='=a'@ @@ @ Q +e/D1/D DD D 88C=A D  &" ,, , T *sCj 88 8 88C= "D  &3+ 55 500Es D4 D9ctj}tjddg}tjd}tjtj}tjtj}|j t k(sJ|j ddhk(sJ|jdk(sJ|jdk(sJ|jtjk(sJ|jtjk(sJ|jtjusJ|jtjusJ||k(sJ||k7sJ||k7sJ||k7sJ||k7sJd|_|jtjk(sJ||k(sJtj|_|jtjk(sJ||k7sJtj|_ |jtjusJ||k(sJtj|_ |jtjusJ||k7sJy) Nr[r\dictionary_columnsmscoerce_int96_timestamp_unit binary_type list_typens) rParquetReadOptionsr? binary_view LargeListTyperrrrbinaryrListType large_binary)opts1opts2opts3opts4opts5s rtest_parquet_read_optionsrKs  ! ! #E  ! !c3Z @E  ! !d CE  ! !bnn.> ?E  ! !B,<,< =E  # #su ,, ,  # #Sz 11 1  , , 44 4  , , 44 4    ++ +    0 00 0 ??bkk )) ) ??b.. .. . E>> E>> E>> E>> E>>E    ++ + E>>)E    1 11 1 E>>kkEO ??bkk )) ) E>>&&EO ??b.. .. . E>>rctj}tjdh}tjd}tjtj}tjtj}|j tj k(sJ|j tj dgk(sJ|j tj dk(sJ|j tj tjk(sJ|j tj tjk(sJy)Nr[rsrrr)rrr?rr read_optionsr)pff1pff2pff3pff4pff5s r%test_parquet_file_format_read_optionsrus"   !D  C5 9D  C @D  BNN,< =D  "*:*: ;D    5 5 7 77 7    5 5# O OO O    5 5$'!) )) )    5 5NN$!& && &    5 5""!$ $$ $rctj}tjd}tjdd}tjdd}tjdd }tjd }tjd dd }tjd|}|jdusJ|j dk(sJt r|jdusJ|jdk(sJ|jdk(sJ|jdusJ|jdusJ|j dk(sJt r|jdusJ|jdusJ|j dk(sJt r|jdusJ|jdusJ|j dk(sJt r|jdusJ|jdk(sJ|jd k(sJ|jdusJt r|jdusJ|j|k(sJ|j|jk7sJ||k(sJ||k7sJ||k7sJ||k7sJ||k7sJ||k7sJ||k7sJy)N buffer_sizei T)ruse_buffered_streamF)r pre_bufferi@i)thrift_string_size_limitthrift_container_size_limitpage_checksum_verificationrS)hole_size_limitrange_size_limitlazy)r cache_optionsi@B) rParquetFragmentScanOptionsr? CacheOptionsrrrrrrrr)rrrrropts6 cache_optsopts7s rtest_parquet_scan_optionsrs  ) ) +E  ) )d ;E  ) )t 5E  ) )e NE  ) )!'$* -E  ) )#' )EdDJ  ) )T TE  $ $ -- -    %% %4'''  ) )[ 88 8  , , 99 9  + +u 44 4  $ $ -- -    %% %4'''  $ $ ,, ,    %% %4'''  $ $ -- -    %% %5(((  ) )V 33 3  , , 66 6  + +t 33 34'''   * ,, ,   %"5"5 55 5 E>> E>> E>> E>> E>> E>> E>>rc tjtjtjtjj ddtjtjj ddgtjtjj dd tjtjtjj dd  tjtjj d dg} |jtjtg|jtjtjdhtjdtjddddg|D]'}|j|j!||k(r'Jy#t$rYwxYw)N T) delimiterignore_empty_linesrYr) skip_rowsrBri)r block_sizeignorenewlines_in_valuesunexpected_field_behavior) parse_optionsFrrr[r)rr{i)rrrr)rr  CsvFileFormatr?csv ParseOptions ReadOptionsJsonFileFormatjsonr0 OrcFileFormat ImportErrorrlextendrrr)rformatsrs rtest_file_format_picklingr s   ,,t@D-F G bff&8&8ug'9'/ 0 bff&8&8E'9'+ ,  ''..$IQ/S T rww':':"(;(. /G  r'')* ~  "  SE :  T :  $( ),,/    T ""=#6#6{#CD SSST!    s#G>> H  H c tjtjtjj dtjtjj dtj tj tjjdd tj tjj dd g}t;|jtjd tjdg|D]'}|j|j||k(r'Jy)NT)strings_can_be_nullconvert_optionsrrFerrorrirrr)r)rCsvFragmentScanOptionsr?rConvertOptionsrJsonFragmentScanOptionsrrrlr rrr)rroptions r#test_fragment_scan_options_picklingrs' !!# !!FF11d1K M !!++u+= ? ""$ "" GG E;B ! D E "",,#,N P G ~  ) )d ;  ) )T :  J""=#6#6v#>?6IIIJrpaths_or_selectorrrrrrcx tjtjdh|}tjd}tjt j t jdt jt jdt jg|_ |jdk(sJ|jddgk(sJ|jd usJtj||||}|j}|jj!t j t jd t j"t jd t j$t jdt j&t jt jt jd t j"t jd t j(t j"t jdt jdt jt jdt jgd sJt+|j-t.sJt+|j1|tj2sJ|j4j!tj6dsJ|j1}t+|tj2sJ|j9}t j:gdt j"} t j:gdt j$} t j<j?t j:gdt jt j:djAt j} t j:tCdD cgc]} | dztE| dzdc} } |jG}tI|ddgddgD]M\\}}}}t j:|gdzt j}t j:|gdzt j}t j:|dz gdzt j"}|jJJ|jLdk(sJ|dj!| sJ|dj!| sJ|dj!| sJ|dj!|sJ|dj!| sJ|dj!|sJ|dj!|rNJ|jO}t+|t jPsJtS|dk(sJ|jLdk(sJycc} w)Nr_r)rrrrr.rOFr]r^r`rarZcheck_metadataTrrz 0 1 2 3 4rrYrrrrr0rrrr)*rrrrrr?r=r@rrDrpartition_base_dirselector_ignore_prefixesexclude_invalid_filesrinspectrrBrCrrarMinspect_schemasrhrrrrrrDictionaryArray from_arrayssplitr/r_rrrr1rrEr)rnrrrrrinspected_schemarSrr%r& expected_strr:expected_structiteratorrtrTrrexpected_group expected_keyexpected_constrs rtest_filesystem_factoryr-s ! !**ugFF ))(3G33 HHWbhhj ) HHUBIIK (  G  % % 11 1  + +Sz 99 9  ( (E 11 1))!67G( ??  # #BII  #  %  bhhj"))+>? "((*% 299288:+-99;&89 : "((*%  $ / % $    g--/ 66 6 gnn%56** ,, ,  ! ! ( (4 99 9nnG gr33 44 4ooG88O"((*=L88O"**,?L%%11 rxxz2 ""$299;7Lhh).q 3$%'(!e#a!e* = 34O##%H),X1vu~)N -%5#5'A+BHHJ?xx  < 519+/ C,,888  A%%%Qx|,,,Qx|,,,Qx|,,,Qx~...Qx///Qx~...Qx|,,, -    E eRXX && & u:      !! !+ 3sX7ctj}tjd||}|jD]}|j ||}|j dgk(sJ|j ||dg}||fD]P}t |tjsJ|j|k(sJt |jt|rPJ|j dgk(rJy)N/plainrrrr) rrrSrrrrMrrqrr)rparquet_formatrSrqrTrow_group_fragmentrs rtest_make_fragmentr4Rs))+Njjm .0G 4!//mD""qc)))+99$ FGS:J./ AAa!7!78 8866T> !>allD,?@ @@ A",,333 4rc r|\}}}}}}}}tj} |g} | Dcgc]}| j||} }tj| | |j|} | j } | j |sJ| jDcgc]'}| jj|j)}}t| |Dcgc]\}}| j|||}}}tj|| |j|}| j } | j |sJ| Dcgc]}d}}t| |Dcgc]\}}| j|||}}}tj|| |j|}tjtjj d5|j }ddd| Dcgc]}d}}t| |Dcgc]\}}| j|||}}}tj|| |j|}tjt"d5|j }dddycc}wcc}wcc}}wcc}wcc}}w#1swYxYwcc}wcc}}w#1swYyxYw) z Test passing file_size to make_fragment. Not all FS implementations make use of the file size (by implementing an OpenInputFile that takes a FileInfo), but s3 does, which is why it's used here. )rr=r) file_sizerzParquet file size is 1 bytesrNrzHTTP status 416)rrrrr=rrrr get_file_infosizerrrrlibr<OSError)s3_example_simplerrqrcurihostport access_key secret_keyrrzrrStblr sizes_truer8fragments_with_sizedataset_with_sizesizes_toosmallsizes_toolarges rtest_make_fragment_with_sizerGes@Q J J J( J"J'!J-J-J6c<tjd}tj|j d}t j }|j|}t|jtjsJtjgdgdgdggd}|j|j|sJ|j|j|}|j|j|jsJy)NzT alpha,num,animal a,12,dog b,11,cat c,10,rabbit utf-8r[r\r rdogcatrabbitrknumanimalr)textwrapdedentr? py_bufferencoderrrrMrL BufferReaderrrrrr)rrcontentbuffer csv_formatrTrWpickleds r"test_make_csv_fragment_from_bufferr^soo G \\'..1 2F!!#J''/H hmmor 77 7xx%139:H  " "8 , 3 3H == =!!-"5"5h"?@G  " "7 + 2 283D3D3F GG Grcd}tj|jd}tj}|j |}t |jtjsJtjgdgdgdggd}|j|j|sJ|j|j|}|j|j|jsJy)Nz{"alpha" : "a", "num": 12, "animal" : "dog"} {"alpha" : "b", "num": 11, "animal" : "cat"} {"alpha" : "c", "num": 10, "animal" : "rabbit"} rIrJrKrNrRr)r?rWrXrrrrMrLrYrrrrr)rrrZr[ json_formatrTrWr]s r#test_make_json_fragment_from_bufferras'AB##% uc"((0&&x077>>>%%m&9&9(&CD&&w/66u=== >rctjtddgdzdgdzdgdzzggd}t|dz }t j ||d g| t j|d d | }||fS)Nr-rr[rr\rrtest_parquet_datasetr)partition_cols chunk_sizerr)rrr)r?rr/r_rlwrite_to_datasetrrS)rrorrrqrSs r_create_dataset_for_fragmentsrqs HH qA37SEAI 12" E w// 0Dt(.xJHjj YV G '>rct|\}}t|j}t|dk(sJ|d}ddg}|jj |k(sJ|j j|j|j|jk(sJ|jjtjddk(sJ|j|}|j|k(sJ|j|j!dj#ddsJ|j||j$}|jgd k(sJ|j|j#ddsJ|j|j$j'dk(sJ|j||j$tjddk }|jgd k(sJy) Nrrr rrr[rrbr)r=r)rqrhrKrphysical_schemarrr!rqrrrrr@rrB remove_columnslicer=remove)rrrrSrrphysical_namesr(s rtest_fragmentsrxs27;NE7W**,-I y>Q  ! AD\N   " "n 44 4 88  AFFALL 1Q5F5F FF F ! ! ( (&)9S)@ AA A $ $Q 'F   . 00 0 ==,,Q/55a; << < $ $Qw~~ $ >F   "6 66 6 ==Q* ++ +   4 4Q 7 77 7 $ $ '..$!);%=F   "6 66 6rctjtddgdzdgdzzgddg}t|dz }t j ||dg t jtjd gd }t j|d |}|jt jddk\}tt|dk(sJy)Nr-rrrcolrrrmrn)rrrrrrr)r?rr/r_rlrprrr=rSrKr@rrh)rrrqrrSrs rtest_fragments_implicit_castr}'s HHeAha1#' 125&/ JE w// 0DtVH= ??299&6%78 HDjjidCG%%RXXf-=-B%CI tI 1 $$ $rc. t|\ } d fd }t|jd}|j}|j |j |}|j ||j |k(sJ|j|j|j|j}|j |j|j |sJ||d|j|j|j|j}||dtjddk|j|j|j|j}||ddgtjdd k |j|j|j|j}||dtjd d k(d |jjdddz} t!j"t$| 5|j|j|j|j}|j |tjd d k(dddy#1swYyxYw)Nc|jj||}|r|n j}|j|k(sJj|j |}|j |sJy)Nr=r*r)rr=rBruselectr)rT row_slicer*ractualrBrWrs rassert_yields_projectedz;test_fragments_reconstruct..assert_yields_projected9st""<<#A")wu/A/A ""l2225;; *11,?}}X&&&rr)r)rr)rrr rrrrRrr[z&No match for FieldRef.Name\(part\) in FrNN)rqrhrKrrrrrrqrrrrr@rs to_stringrrr) rrrrSrrTr2pickled_fragment new_fragmentpatternrs @rtest_fragments_reconstructr5s}27;NE76:'G))+,Q/H__N%**=+>+>x+HI  " " +44X> ?? ?"// x**%::0C3GI"// x**%::0!>4@   RXXf5E5LM NNNs &AJ  Jct|d\}}t|jd}t|j}t ||j cxk(rdk(sJJ|j |d|j}|jgdk(sJt |dk(sJ|j|jddsJ|djJ|dj dk(sJ|djdjdddddddk(sJt|jtjd dk d}t|jtjd dk}t |dk(sJ|j |dtjd dk }t |dk(sJy) Nrrorrbrrminmaxr rr r)rqrhrKrrrrr=rBrrur statisticsrr@)rrrrSrTrr(s r!test_fragments_parquet_row_groupsrrs27qINE7G))+,Q/Hx::<= " #x'>'> C! CC CC C  $ $Aw~~%7F   "6 66 6 v;!   ==Q* ++ + q ! , , 88 8 q ! 0 0A 55 5 q ! , ,Q / : :""?   G))$!1C)DEaHHx::288D>A;MNO " #q (( (  $ $Arxx~'9%;F v;!  rctjdtdi}tj||dz dt j |dz d}t|jd}|jj|j|jd d g }|jdk(sJ|j|jdk(sJt|j dk(sJy) Nr[r- test.parquetrrow_group_sizerrrrrYr1)r?rr/rlrmrrSrhrKrrrqrrensure_complete_metadatarr)rrrSoriginal_fragmentrTs r%test_fragments_parquet_num_row_groupsrs HHc58_ %ENN5'N21Ejj>1)DGW2245a8!''55 1 < <q66H  " "a '' ' %%'  " "a '' ' x"" #q (( (rctjtddgddg}|djd|d<t j t j||dz d dlm }|j|dz }|j||jddk( }|jd |jk(jjsJy) Nr[r\rr)col1col2rcategoryztest_filter_dictionary.parquetrr)r3r4dictastyperlrmr?rpyarrow.datasetrSrr@r to_pandasr)rrrHrrSr(s r,test_fragments_parquet_row_groups_dictionaryrs dc !Q8 9BF"":.BvJNN288B<+K!KL bjj#CCDG  $ $WXRXXf5E5L $ MF GGAJ&**, , 1 1 3 7 7 99 9rc|\}}t|d|\}}t|jd}||jg5|j ddd|j ddgk(sJ|g5|j dddt |jtjsJ|jj|j|jddg}|j |j k(sJ|j |j d} | jdk(sJ| jdk(sJ| jJ|j!|j#|} ||jg5| j ddgk(sJ| j d} | jdk(sJ| jJ dddy#1swYxYw#1swYZxYw#1swYyxYw)Nrrorrrr1)rqrhrKrqrrrMmetadatarl FileMetaDatarrridrrrr) rrrrcrrOrSrTr row_grouprs r&test_fragments_parquet_ensure_metadatars&B .A"JAwG))+,Q/H x}}o &,))+,   1a& (( ( b ,))+, h'' 99 9??00 x**1v1L  " "h&9&9 99 9))+''*I <<1      "" "    ++ +%**=+>+>|+LM x}}o &0**q!f444$//2 ||q   ##/// 003,, ,,(00s%G:G# AG0G #G-0G9cX|\}}t||\}}t|jd}|g5|j|j |}ddd|j g5|j } ddd dgk(sJy#1swY:xYw#1swYxYw)Nrrr)rqrhrKrrrqr) rrrrcrrOrSrTrrs r)test_fragments_parquet_pickle_no_metadatars'B .w2FJAwG))+,Q/H b N(..}/B/B8/LMN ',,- .1%00 1 !   NN11s!B6 B B B)c&tjtjgdtjtjgdtjtjgdtj tjgdtj tjgdtjtjgdtjtjgdtjtjgdtjtjgdtjtjgdtjtjgdtjtjgdtjtjgdtjtjgdtj dtjgdtj dtjgdtj dtjgdtj"tjgdtj$tjgdtj&dtjgdtj(dggd  }t+|d z }t-j.||| |t1j2|d dfS)N)TNF)rr*)rg$@E@)r[Nzrrus)rrljt)booleanruint8int16uint16ruint32rBuint64r1doubleutf8rts[s]ts[ms]ts[us]rAdate64time32time64rtest_parquet_dataset_all_typesrrrr)r?rrbool_rrrrrrrBrfloat32rCrr timestamprArrrr_rlrprrS)rrorrqs r_create_dataset_all_typesrs HH HH("((* 5 HH["'') , HH["((* - HH["((* - HH["))+ . HH["((* - HH["))+ . HH["((* - HH["))+ . HH& 5 HH& 5 HH%rwwy 1 HH%ryy{ 3 HH[",,s"3 4 HH[",,t"4 5 HH[",,t"4 5 HH["))+ . HH("))+ 6 HH["))C. 1 HH["))D/ 2) , /- E^ w99 :Dt ; "**T)&I IIrc D t|\}}t|jd}ddl fd} fd} fd} j} j }t|j } | djJ| djd} | jdk(sJ| jdkDsJ| jiddd d d d d d dd d d dd d d dd d d dd d d dd d d dd d d dd d d dddd dddd dddd dddd d|d |d d d |d |d d d!|d |d d d"|d#d d$|d#d$d%d |d#d d |d#d$d&d |ddd |ddd d |dddd |dddd d d'k(sJy)(Nrc2jddddd|SNrrr+rr+s rdt_sz.test_parquet_fragment_statistics..dt_s,s )))$1aA>>rc :jdddddd|dzS)Nrrrrrrs rdt_msz/test_parquet_fragment_statistics..dt_ms-s&***4Aq!Q$GGrc 4jdddddd|Srrrs rdt_usz/test_parquet_fragment_statistics..dt_us.s"***4Aq!QBBrrYrrFTrrrrrrrrrrBrr1rrrrr[rrazrrrrArrrL)rrr) rrhrKr+r%timerrrtotal_byte_sizer) rrrSrTrrrr%rrrr+s @r test_parquet_fragment_statisticsr$sn/w7NE7G))+,Q/H>GB ==D ==Dx::<= q ! , , 88 8#A&11!4I    "" "  $ $t ++ +   $5.$"%$ 2&$ 2& $ !B' $ 2& $ !B'$ 2&$ !B'$ T*$ #d+$ C($ $t,$ ab2$ %(595$ %(595!$" $tQ*4a3DE#$$tQ*4a3DEq!Q-Q2?q!Q*41a3DE)$  rc<tjgdgdd}tj||dz dt j |dz d}t |jd j}|d jd jik(sJy) N)rrNN)r[r\NNrZrrrrrrr) r?rrlrmrrSrhrKrrr)rrrSrs r&test_parquet_fragment_statistics_nullsrPs HH-4JK LENN5'N21Ejj>1)DGW**,-a0CCEI Q< " "1 % 0 0B 66 6rc8tjgdgdddd}|j|dz dtj|dz d }t |j dj}|djdjik(sJy) N)r[r\r\rrrrZrrrenginerr) r3r4 to_parquetrrSrhrKrrr)rrHrSrs r'test_parquet_empty_row_group_statisticsr[s O)< =bq ABMM'N*9M=jj>1)DGW**,-a0CCEI Q< " "1 % 0 0B 66 6rct|d\}}t|jd}|jj t j ddk(sJt|jt j ddk(|j}t|dk(sJt|jt j ddk(|j}t|dk(sJy)Nrrrrr[rr=r\) rqrhrKrrrr@rr=r)rrrSrTrs r+test_fragments_parquet_row_groups_predicatergs27qINE7G))+,Q/H  ( ( / /0@C0G HH H ##288F+;s+B+2>> $ ;< " #q (( (##288F+;s+B+2>> $ ;< " #q (( (rc t|d\}}t|jd}|j}t|j }|j |j |}|j||j|k(sJ|j|j|j|jdg} |j| } | j|j|dsJ|j|j|j|jdh} |j| |jddgtjddk } | j ddgk(sJt#| dk(sJ|j|j|j|jdh} t%j&t(d 5|j| dddy#1swYyxYw) Nrrr)rrrr rrYrzreferences row group 2r)rqrhrKrrrrrrrqrrrr=rr@rBrrrrV) rrrrrSrTr2rrrr(s r-test_fragments_parquet_row_groups_reconstructr~s37qINE7G))+,Q/H__Nx::<=%**=+>+>x+HI  " " +44X> ?? ?"// x**%::30L $ $\ 2F ==001DQ1GH II I"// x**%::30L $ $U\\D&>xx~!%%F   4. 00 0 v;!  "// x**%::30L z)A B. -...s G99Hc|\}}t|d|\}}t|jd}|jddg}|g5|jdk(sJ|j ddgk(sJ|j dj J ddd|j|} | jddgddgdk(sJ|jg}|jdk(sJ|j gk(sJ|j||j} | jdk(sJ| j|ddsJy#1swYxYw) NrrrrY row_group_idsrrrb) rqrhrKsubsetrrrrr#r=rr rrrrcrrrSrTsubfragr(s r!test_fragments_parquet_subset_idsrsk'B 27q>@BNE7G))+,Q/HooQFo3G b <%%***!!aV+++!!!$//;;;<  $ $W -F    Aq!f!= == =ooBo/G  ! !Q && &    ## #  $ $WW^^ $ DF ??a   ==r ## #<>Ec||\}}t|d|\}}t|jd}|jt j ddk\}|g5|j dk(sJt|jdk(sJ|jdjJ ddd|j|} | jgdgddk(sJ|jt j dd kD}|j dk(sJ|jgk(sJ|j||j } | jdk(sJ| j|ddsJ|jt j d d k(|j }|j d k(sJy#1swY xYw)Nrrrr rYr)rrrrrrbrr[r)rqrhrKrrr@rrrrrr#r=rrrs r$test_fragments_parquet_subset_filterrs'B 27q>@BNE7G))+,Q/Hoobhhtn12G b <%%***7%%&!+++!!!$//;;;<  $ $W -F     !C CC Coobhhtnq01G  ! !Q && &    ## #  $ $WW^^ $ DF ??a   ==r ## #oobhhv.#5gnnoMG  ! !Q && &'<?G  ! !Q && &oobhhudE:a?@G  ! !Q && & r.L M3-123 #N 3 -1233 3333s)J )JJJ'ct|jd}t|dk(st|dk(sJt|\}}t j |d}t|jd}t|d|j jt|dk(sJ|dz }tjj||t j |d }t|jd}t|d |j jt|dk(sJy) Nrzbzbrrz* data.featherfeatherz,r<QrcB|j|j|Srrrrs rrr<QWWQWWQZ-@rc tjdd}tj}tjd}tj j ddg}|||}t|tjsJ||_ tj||||}|j}tjdtjfdtjfdtj fd tjfd tj"tjtj d fdtj$fdtj fg} |j'| sJtj(j } t| tjsJy) NrTrrrr]r^r_r`rarZ)rcrrrrrdiscoverrMPartitioningFactorypartitioning_factoryrr!r?r=rBrCrDrarrr) rnr]rrrrrrr&expected_schemahive_partitioning_factorys rtest_partitioning_factoryr:spDA  ! ! #F))(3G33< /1G1G HH Hrinfer_dictionaryc|Srrrs rrr^rrcB|j|j|Srr rs rrr^r rcftjdd}tj}tjd}tj j ddg|}||||_tj||||}|j} |rctjtjtj} | jdj| k(sJ|j!j#j%} | j'dj)d} tj*dgd zd gd zzj-} | j/| sJ|j!j#tjddk( } | j'dj)d} | j1dd } | j/| sJy| jdjtjk(sJy) NrTrrrrrrrrr)rcrrrrrr rrr!r?rrrDr@rrrcombine_chunksrrrrcrru)rnrr]rrrrrrinferred_schema expected_typerrrWs r$test_partitioning_factory_dictionaryr[s DA  ! ! #F))(3G33<< %+;==#*+?#OG ))!674Goo'O bhhj"))+> $$U+00MAAA ))+::<e$**1-88UGaK5'A+56HHJ}}X&&& ))%E1I)Je$**1->>!Q'}}X&&&$$U+00BIIK???rc|Srrrs rrrrrcB|j|j|Srr rs rrrr rcrtj}tj}t j dt j fg}t jt jtdg|}t j dt jdfdt jfg}t j dt jfdt jfg}t j t|t|z}dD]z} |j| |j| dz5} tjj!| |5} | j#|| j%dddddd|tj&d d } tj(d } tj*j-|}|||| _tj0|| || }|j3}||k(sJ|j5j7d tj8dj;t j i }|ddj=dk(sJtj*j-ddgd}|||| _tj0|| || }t|j5j?}|dj@jCtj8ddk(tj8ddk(zsJtj*|d}|||| _"tj0|| || }t|j5j?}|dj@jCtj8ddk(tj8ddk(zsJtj*j-|d}|||| _tj0|| || }tGjHtjJd5|j3}dddtj&dd } tj(d} tjLj-|}|||| _tj0|| || }|j3}||k(sJ|j5j7d tj8dj;t j i }|ddj=dk(sJtjLj-d}|||| _tj0|| || }t|j5j?}|dj@jCtj8ddk(tj8ddk(zsJtjL|d| _"tj0|| || }t|j5j?}|dj@jCtj8ddk(tj8ddk(zsJtjLj-|d}|||| _tj0|| || }tGjHtjJd5|j3}dddy#1swYxYw#1swYkxYw#1swY xYw#1swYyxYw)Nr]rrbr%rrD)z%directory/2021-05-04 00%3A00%3A00/%24z,hive/date=2021-05-04 00%3A00%3A00/string=%24 /0.featherrpTrdate_intr)r逎`rhri2021-05-04 00%3A00%3A00%24r=rj+Could not cast segments for partition fieldrr)'rcrdrr r?r=rBrrr/rrDrhrfrgrnew_filermcloserrrr rrr!rrr@ras_pyrKrrrrrr<r)r]rrnrr=rpartition_schemastring_partition_schema full_schemarpsinkwriterrrrrrrrrs r*test_partitioning_factory_segment_encodingr-s   !F    F YY +, -F HHbhhuRy)*6 :Eyy ",,s# $x&=>@ ii "))+ 299; 78:))DL40@+AABK  )$  & &y<'? @ Dv. &""5)     {d;H))+6G33<<=!#*+?#OG ))&(FGLGoo'O k )) ) ^^  & &BHHV$))"((*50 &F !9Q<   : -- -33<< V=5#*+?#OG ))&(FGLGW^^%3356I Q< , , 3 3 & 6 6 ( u $ & '' '++&:L"<?G))&(FGLGW^^%3356I Q< , , 3 3 & 6 6 ( u $ & '' '33<<&=:#*+?#OG ))&(FGLG rJ L,!//+, v6H))&1G..778!#*+?#OG ))&(FGLGoo'O k )) ) ^^  & &BHHV$))"((*50 &F !9Q<   : -- -..778!#*+?#OG ))&(FGLGW^^%3356I Q< , , 3 3 & 6 6 ( u $ & '' '..&:G))&(FGLGW^^%3356I Q< , , 3 3 & 6 6 ( u $ & '' '..77&8:#*+?#OG ))&(FGLG rJ L,!//+,,_    R,,N,,s<!^;"^^^ ,^-^ ^^ ^*-^6c|Srrrs rrrrrcB|j|j|Srr rs rrrr rcZ tj}tj}t j dt j fg}t jt jtdg|}t j dt jdfdt jfg}t j dt jfdt jfg}t j t|t|z}t j dt jdfdt jfg} t j dt jfdt jfg} d } |j| |j| d z5} tjj!| |5} | j#|| j%ddddddtj&d d }tj(d }tj*j-|}||||_tj0||||}|j3}||k(sJ|j5j7dtj8dj;t j i}|ddj=dk(sJtj*j-d}||||_tj0||||}t|j5j?}|dj@jCtj8ddk(tj8ddk(zsJtj*|d}||||_"tj0||||}t|j5j?}|dj@jCtj8ddk(tj8ddk(zsJtj*j-d}||||_tj0||||}t|j5j?}|dj@jCtj8ddk(tj8ddk(zsJtj*| d}||||_"tj0||||}t|j5j?}|dj@jCtj8ddk(tj8ddk(zsJtj*j-| d}||||_tj0||||}tGjHtjJd5|j3}dddy#1swYpxYw#1swYuxYw#1swYyxYw)Nr]rrbz test'; daterztest';[ string'ztest%27%3B%20dateztest%27%3B%5B%20string%27zLhive/test%27%3B%20date=2021-05-04 00%3A00%3A00/test%27%3B%5B%20string%27=%24rrTrrr)rr r<riz2021-05-04 00:00:00$rhr!r"r#r$r)&rcrdrr r?r=rBrrr/rrDrhrfrgrr%rmr&rrrr rrr!rrr@rr'rKrrrrrr<)r]rrnrr=rr(r)r*partition_schema_enstring_partition_schema_enrpr+r,rrrrrrrrs r;test_partitioning_factory_hive_segment_encoding_key_encodedr4s   !F    F YY +, -F HHbhhuRy)*6 :Eyy c* +.?-MNP ii  %(9299;'GHJ))DL40@+AABK)) r||C0 1 %ryy{ 3 56"$ ryy{ + %ryy{ 3 5"61I i  " "9|#; < VV__T6 * f   u % LLN  v6H))&1G..778!#*+?#OG ))&(FGLGoo'O k )) ) ^^  & &BHH]+00<0 &F !9Q<   : -- -..778 #*+?#OG ))&(FGLGW^^%3356I Q< , , 3 3 - $9 9 # $ + - .. .&&%9L"<?G))&(FGLGW^^%3356I Q< , , 3 3 - $9 9 # $ + - .. ...778!#*+?#OG ))&(FGLGW^^%3356I Q< , , 3 3 % &*C C - .% 7 9 :: :&&"V=L"<?G))&(FGLGW^^%3356I Q< , , 3 3 % &*C C - .% 7 9 :: :..77"V8=#*+?#OG ))&(FGLG rJ L,!//+,,u  v,,s0!Z7"ZZ-Z!Z ZZ!Z*c tjgdgdd}tjtjtj dtj tj dtj g}tjtj5tj||d|dddy#1swYyxYw)NryNrr7rrZr[r\rr) r?rrrr=r@rDrrr<rrrrs r/test_dictionary_partitioning_outer_nulls_raisesr:<s HH+/B CE ?? 288C-rxxRYY[/IJK MD r 'J DIJJJs 6CC"ctjgdgdd}tjt5t j ||ddddy#1swYyxYw)Nr6r8rZzbasename-{i}.arrow)r?rrrrrr)rrs rtest_positional_keywords_raisesr<DsJ HH+/B CE y !? )=>???s AA c0d}tjtjd|dztj|dzd}t j |d||dz dgt j |d|dz|dz dgt j|dz dg }|djdk(sJt j|dz dd g }|djd k(sJt j|dz dg }|djd k(sJy) Nirr)rr'onerr{twor)r'r) r?rrepeatnparangerlrp read_table num_chunks)r BATCH_SIZErs rtest_read_partition_keys_onlyrFJsJ HHyyJN+:>*, -E kz%1 ozA~%1 MM'E/E7 ;E < " "a '' ' MM'E/E73C DE < " "a '' ' MM'E/E7 ;E < " "a '' 'rc tj|}t|Dcgc]?}tjj tjj ||Ac}Scc}wr)oslistdiranyrqisdirr)basedirelementsels r _has_subdirsrOesEzz'"H 8LR bggll7B78L MMLsAA*c8tj|D]}tjj||}tjj |sCt j||}t |rt|||r|j|yr) rHrIrqrrK posixpathrO_do_list_all_dirsr0)rL path_so_farr(r true_nested norm_nesteds rrRrRjsn ZZ +ggll7A. 77== %#..a8KK(!+{FC k*+rc$g}t|d||S)Nr)rR)rLr(s r_list_all_dirsrWus Fgr6* MrcLtt|}|t|k(sJyr)rrW)rexpected_directoriesactual_directoriess r_check_dataset_directoriesr[{s&^G45 %9!: :: :rc vtjgdgdd}tjtjtj dtj tj dtj g}tj||d|t|gdy) Nr8r6rZr[r\rr)zx/xzy/yr) r?rrrr=r@rDrr[r9s r(test_dictionary_partitioning_inner_nullsr]sw HH?1AB CE ?? 288C-rxxRYY[/IJK MDUGEEw(; ?4 F34 ??6& 1D dB// 00 0 ??6 GD dB22 33 3 ??& )D dB22 33 3 z ". f-. z)G H: E&9: z "6 }566+,,44..::66sH3K +K%K&7K32K?+L  KK#&K03K<?L Lc tjtjdtjtjtj tjdtjtjtj g}tjj|}tjdd||}|jj|k(sJ|j}|jdjj|j dsJ|jdj#dgd zd gd zzk(sJ|jdjj|j dsJ|jdj#d gd zd gd zzk(sJy) Nrrrbrrrrrrrrrrr)r?r=r@rrrrDrrr rSrrrrrtypesrC)rnr=rrSrs r*test_directory_partitioning_dictionary_keyrhsuYY "-- 288:>?  bggi=>F  # # , ,F , ;DjjvDG    & && 00 0    E << % % , ,V\\!_ == = << * * ,a1#'0A AA A <<  # # * *6<<? ;; ; <<  ( ( *ugkUGaK.G GG Grc tjtjdtjtjtj tjdtjtjtj g}t jj|}t jdd||}|jj|k(sJ|j}ttdd}ttd d }|jdjj!|j"d sJ|jdj$D]3}|jj'}|j)||k(r3J|jdjj!|j"d sJ|jdj$D]3}|jj'}|j)||k(r3Jy) Nrrrbrrrfiir r)r?r=r@rrrrrr rSrrrhr/rrrrgchunksrCsort) rr=rrSryear_dictionarymonth_dictionaryrrs r%test_hive_partitioning_dictionary_keyros YY rwwy"((*=> "-- 288:>?F    ' 'v ' 6Djjy]G    & && 00 0    E5t,-OE!RL) <<  $ $ + +FLLO << <f%,,)!!++- ((() << % % , ,V\\!_ == =g&--*!!++- ))))*rc|,tjtddgdzdgdzzd}|dz }tj|||||fS) N rrrrrZrrr?rr/rlrm)base_dirrrrqs rrrsS }uQxrdQh".ABC n $DNN5$~> $;rc0tjtddgdzdgdzzd}|dz }tj||tjtdddgdzdgdzzd}|d z }tj||||f||ffS) NrqrrrrrZz test1.parquetrz test2.parquetrr)rstable1path1table2path2s r_create_directory_of_filesrys XXE!HB4!8rdQh+>? @F  &ENN65! XXE!RLtax2$(/BC DF  &ENN65! F eU^ ++rc||j|j|fD]K}|jj|jsJ|j |j|rKJyr)rrr=rr)rSrrpicklerrs r_check_datasetr|scw}}W]]7%;< =>~~$$U\\222&&w/66u===>rc t|tjsJ|t||gt|gfD]B}t j |fi|}t|tj sJt||||Dt|j5t j |jfi|}t|tj sJt||||dddy#1swYyxYwr) rMpathlibPathr_rrSrr|r parentname)rqrrr{rr{rSs r_check_dataset_from_pathrs dGLL )) )CIvD { 3@**T,V,'2#7#7888w~w?@ DKK @**TYY1&1'2#7#7888w~w?@@@s A C%%C.c<t|\}}t||||yrrrrrrrrqs rtest_open_dataset_single_filer s%g.KE4T5.-Hrc@t|d\}}t||||y)Nrrrrs rtest_deterministic_row_orderr s" &ga@KE4T5.-Hrcft|\}}tj|}t||||yr)ryr? concat_tablesr)rrrtablesrOrs rtest_open_dataset_directoryr s.*73IFA   V $EWe^]Krc t|\}\}}tj|}tj||gtjt |t |gg}||Dcgc]"}|j |j|$c}z }|D]M} | jj|jsJ|j| } | j|rMJycc}wr) ryr?rrrSr_rrr=rr) rrrrrvrxrdatasetsrrSr(s rtest_open_dataset_list_of_filesr& s7@FNUE   V $E E5>" CJE +,H =E89 M//23H$~~$$U\\222((1}}U###$ s,'C+ct|\}}t|}tj|}|jj |jsJtj|t j}|jj |jsJtjt5tj|t jdddy#1swYyxYw)Nr) rrrrSr=rrcrvrrrrd)rrrqfspathdataset1dataset2s r#test_open_dataset_filesystem_fspathr9 s&g.KE4 T "Fzz&!H ?? ! !%,, // /zz&R-?-?-ABH ?? ! !%,, // / y !< 6b&8&8&:;<< ?B INNw/G HB   $B   $B   $B >r>>>"b\7 &&}':':1'=>&&x0B6667rc|dz }|jt|\}}|Dcgc]}|j|}}t|5t j |}|j |}t|ttt|k(sJ dddt j |t|} |j | } t j |} |j | } t j |tj} |j | }| cxk(r | cxk(r|k(sJJycc}w#1swYxYw)Nz list-of-filesr) rryrr rrSrrsumrir rcrv)rrrprrzr{relative_pathsrrrrrrrt4s r!test_construct_from_list_of_filesru s,/)I OO.y9MFE6;<ammG,D FA 8/Ajj!Q G gr // / tG))+, - 22 2    E u:      !! ! w A %% %!!={{;;== ===rctjgd}|j}|jdk(sJ|jdk(sJy)Nrrr)rrSrrr1)emptyrs rtest_construct_empty_datasetr sD JJr% (E NN E >>Q      !! !rc .tjgdtjdtjfdtj fg}t jtd5|jdddy#1swYyxYw)Nrr[rr=zMultiple matches for .*a.* in r) rrSr?r=rBrDrrrr)rs r*test_construct_dataset_with_invalid_schemar sp JJr% bhhj biik;1 E z)I J s 1B  Bctj|tjdtj}tj|tjdtj}t j jt jtdgdgt j jt jtdgdg}tjtd 5tj||gdddd }tjt| 5tjgd dddd }tjt| 5tjddddd }tjt| 5tjfdtdDdddd}tjt| 5tjgdddd}tjt| 5tj|gdddd}tjt| 5tjdgdddd}tjt| 5tjdgdddy#1swYxYw#1swYxYw#1swYdxYw#1swY$xYw#1swYxYw#1swYxYw#1swYxYw#1swYyxYw)Nr/r/schemarr[rr\z"Expected.*FileSystemDatasetFactoryrzExpected a list of path-like or dataset objects, or a list of batches or tables. The given list contains the following types: intrzbExpected a path-like, list of path-likes or a list of Datasets instead of the given type: NoneTypezcExpected a path-like, list of path-likes or a list of Datasets instead of the given type: generatorc3"K|]}ywrr)rdrObatch1s rrez. s-qF- rYzEMust provide schema to construct in-memory dataset from an empty listzFItem has schema b: int64 which does not match expected schema a: int64z}Expected a list of path-like or dataset objects, or a list of batches or tables. The given list contains the following types:rzCExpected a list of tables or batches. The given list contains a int)rrrcrrr?rr$rr/rrrrSrInMemoryDataset)rchild1child2batch2rWrs @r)test_construct_from_invalid_sources_raiser s  ( ( !##%F  ( (  "##%F ^^ ' '%))<(=cU ' KF ^^ ' '%))<(=cU ' KF y(L M% FF#$%  y 1 9 .  y 1 4 /  y 1/ -E!H-./ P  z 2 2   y 1% FF#$% J  y 1  FA;  N  y 1( FA;'((a%%// %%  ((s`K-K:L (LL!L-L9 M-K7:LLL!L*-L69MMcLtjjtjt dgdg}tj j |j|g}tjj |g}tjgdtjgj}|tjgk(sJ|||g|g|fD]}tj|}|j||k(sJtt|jdk(sJt!|jj|k(sJtjj t|j#|k(rJy)Nrr[rrrr)r?rr$rr/RecordBatchReaderrkr=rErrSrrrrhrKr2r)rrtrr dataset_tablesourcerSs rtest_construct_in_memoryr sU NN & &r(;'%?@EIII Jrrc"tjjtjt dgdgtj j gd}fddffddffdjffD]\}}tjj |||}|jk(sJtjtj| 5|jdddy#1swYxYw) Nrr[rz#OneShotFragment was already scannedcZtjjjgSr)r?rrkr=rtsrrz$test_scan_iterator.. s#R))66 ug'rctSr)r rsrrz$test_scan_iterator.. s '.rc,fdtdDS)Nc3"K|]}ywrr)rdrOrts rrez7test_scan_iterator.... s.e.rr)r/rsrrz$test_scan_iterator.. s.U1X.rr=rr)r?rr$rr/rErkr=rr;rrrr<)rrrr=rrtrs @@rtest_scan_iteratorr s NN & &r(;' ?E * *DJJL 1XCaSk!  u{{1Q3*D>,ABC $$!qA37*aS1W4288:FHJ t rc t|\}}|jddg}t||||tjt |tj d}|jj|jsJt|5tjdtj d}|jj|jsJ dddtjt |d}|jj|jsJtjt |tj tjdtjfgd}|jjtjdtj}|jj|sJ|j}|jdtj dgd zd gd zzd gd zztj } |j| sJy#1swY{xYw) Nr[r\rr|rzdataset-partitioned/rrrYrrr)rrrrrSr_rr=rr r?rr0r@rrr) rrrrrqrrSrr(rWs r'test_open_dataset_partitioned_directoryr5 s27;J   sCj )ET5.-Hjj D v >@G >> !2!2 33 3 G 8**3*,//*HJ~~$$Z%6%67778 jjT8G >> !2!2 33 3jj D __ II *+ ,V=>Gll))"((62779*EFO >>  11 1    F""!qA37*aS1W42779EGH == "" ")88s AI##I-ct|\}}tjt|}|jj |jsJtjt|t j}|jj |jsJt|5tjdt j}dddjj |jsJtjt5tjt|t jdddy#1swYxYw#1swYyxYw)Nrr) rrrSr_r=rrcrvr rrr rd)rrrqrrdataset3s rtest_open_dataset_filesystemrZ s&g.KE4zz#d)$H ?? ! !%,, // /zz#d)0B0B0DEH ?? ! !%,, // / G O::n9K9K9MNO ?? ! !%,, // / ( )? 3t9););)=>?? OO ??s8*E&*3E2&E/2E;ct|\}}tjtd5t j |gddddy#1swYyxYw)Nz format 'blabla' is not supportedrblablar)rrrrrrS)rrOrqs r$test_open_dataset_unsupported_formatrq sD!'*GAt z)K L, D6(+,,,s A  Ac:t|\}}tj|}tj||g}t|tjsJ|j |j |}|j||j|k(sJyr)rrrSrMrrrr)rrrrOrqrSunionr]s rtest_open_union_datasetrx s!'*GAtjjG JJ) *E eR__ -- -!!-"5"5e"<=G  " "7 +~/F/Fu/M MM Mrctjd|d}tjtd5tj|gddddy#1swYyxYw)Nr/rr0zcannot pass any additionalrr)rrSrrr)rrs r.test_open_union_dataset_with_additional_kwargsr sH JJxM) LE z)E F. E79-...s AAc0tjt5tjdddddtjt j d5tjdddddy#1swYOxYw#1swYyxYw)Nzi-am-not-existing.arrowrrzcannot be relativerzfile:i-am-not-existing.arrow)rrr rrSr?r<rrr#test_open_dataset_non_existing_filer sx ( )< ,U;< r.B CA 1%@AA<<AAsBB B  Brrprrnrmpartition_keysrBCr)DEFr)rNrY)rNr)NrrYc tjtddgdzdgdzzd}d|dvxsd|dv}|d k(r|ry|d k(r(tjj d d g }d }d}nM|r"tj j |}n tj j  }d}|r|}nd}|dz } | j|\} } | D]Q} | D]J} | |j| xs|| xs|z }|jdtj||dz LStjt| |}fd}|jjtjd || djtjd || d}|jj!|sJy)NrqrrrrrZrrrppart1part2rz{0}/{1})rrnzpart1={0}/part2={1}__HIVE_DEFAULT_PARTITION__rST)parentsrrc8r`t|trtjntj}tj tj|St|trtjStjSr)rMr_r?rDrr)r value_typers rrz/test_partition_discovery..expected_type sY (23(<"((*J==Z8 8",S#"6299; FBHHJ Fr)r?rr/rrr rrrrlrmrSr_r=r0r@r)rrrnrrrhas_nullfmt null_valuebasepath part_keys1 part_keys2rrrqrSrrs ` rtest_partition_discoveryr s" HH583%!)seai*?@ AE~a((EDN14E,EH{"x{"//88 g 1A9C  ..77!18L..77!183L# &J5J"H NN+J 9 9E 5.J0CDED JJtJ $ NN5$"7 8  99jjX\BGG ll)) - 1 67 f - 1 67 >>  11 1rctjtjddgdt dd}t j |jdgjd}t j|||d t j|d t jjd  }tj|d|djd}|jj|sJt!|j#d}|j|jj|ddsJ|j$}|j'|j)|}|jj|sJ|j'|j)|}|j|jj|ddsJ|j|jj+j|ddj+sJ|j$j|sJy)Nrrrrrrzrrr|rrrTrrrz)rzrrrb)r?rrAr@r/rrrr=rrSrr rcrrrhrKrrrr) rrrrrSrWrT part_exprrs r4test_dataset_partitioned_dictionary_type_reconstructr s HHbiic A6uRyI JE ??5<<188 HDUG$yIjj ((1141HGxxe eFm&E&E&GHH     $ $X .. .G))+,Q/H   GNN  3 : :8BQ< HH H--I""=#6#6w#?@H     % %h // /""=#6#6x#@AH   GNN  3 : :8BQ< HH H   GNN  3 = = ? F F!      ( ( / / :: :rc Lddlm}|d\}}}}d|d|d|d|d }|j|\}}|jdt j d gd i} |j d 5} tj| | ddd| |||||||fS#1swYxYw) Nr FileSystem connections3://:z5@mybucket/data.parquet?scheme=http&endpoint_override=z&allow_bucket_creation=Truemybucketr[rzmybucket/data.parquet) rrfrom_urirfr?rrgrlrm) s3_serverrr=r>r?r@r<rcrqrrrs rr;r; s%)2<)@&D$ J  |1ZL)"V1TF*E G ""3'HBBMM* HHc9% &E   6 7#3 uc"# $CtZ CC##s 1BB#c|\}}}}}}}}tj|d}|j|j|sJtj|d|}|j|j|sJy)Nrrrr)rrSrr)r;rrrqrcr<rOrSs rtest_open_dataset_from_uri_s3r s~(9$E4S!Q1jjY/G  " "7 + 2 25 99 9jjiB?G  " "7 + 2 25 99 9rc|\}}}}}}}}tjd}|j|}tj|d|} |j | j |sJy)Nrrr)rcrr7rrSrr) r;rrrqrr<rOrfinfosrSs r test_open_dataset_from_fileinfosr  sj0A,E4S!Q1z*H  % %h /Fjj jIG  " "7 + 2 25 99 9rc |\}}}}}}}}tjd}ddlm} m} |j ||dd|d|i} t j|d| } | jj|sJ| | | } t j|d| } | jj|sJy) Ns3fsr) FSSpecHandlerrw endpoint_urlzhttp://r)rsecret client_kwargsrr) r importorskiprrrw S3FileSystemrrSrr) r;rrqrOr=r>r?r@r rrwrcrSs r$test_open_dataset_from_uri_s3_fsspecr& s=N9E4AtT:z   v &D6    gdV1TF3   BjjiB?G     $ $U ++ + mB' (BjjiB?G     $ $U ++ +rc |ddlm}|d\}}}}d}d}d|d|d|d |d |d|d }|j|\} }|d k(sJ| j|t j d gdi} | j |5} tj| | dddtj|d} | jj| sJdj||||} gd}|D]O\}}| j|}tj||d} | jj| rOJtjtj d5| jd }tjd|dddd}d}| j|}tjt"5}tjd|dddt%j&|jd||k(sJd }| j|}tjt"5}tjd|dddt%|j&|jd||k(sJy#1swYxYw#1swYxYw#1swYxYw#1swY[xYw)Nrrr theirbucketnested/folder/data.parquetrr@rz?scheme=http&endpoint_override=z&allow_bucket_creation=truez&theirbucket/nested/folder/data.parquetr[rrr3s3://{}:{}@{{}}?scheme=http&endpoint_override={}:{}))ztheirbucket/nested/folder/z /data.parquet)ztheirbucket/nested/folder data.parquet)ztheirbucket/nested/folder/data.parquet)ztheirbucket/nestedr)rz/nested/folder/data.parquet)rrr0zMissing bucket namerz'/theirbucket/nested/folder/data.parquetrzThe path component of the filesystem URI must point to a directory but it has a type: `{}`. The path component is `{}` and the given filesystem URI is `{}`ztheirbucket/doesnt/existrNotFoundFile)rrrrfr?rrgrlrmrrSrrrrrr<rr_r')rrr=r>r?r@bucketrqr<rcrrrrStemplateriprefixrexcs r-test_open_dataset_from_s3_with_filesystem_urir"@ s&)2<)@&D$ J F 'D  |1ZL&4&9"V1TF*E G ""3'HB ; ;; ;BMM& HHc9% &E   t $# uc"#jjY/G     $ $U ++ + >DD  D$   E0 oof%**Tc)D!((///0 r.C DNooc" <MN  ! &D //$ C z "3c >c23 syy>U\\*dC@ @@ @ 3D //$ C z "3c >c23 syy>U\\&$< << <]##4NN33 33s0J *)J J&J2 JJ#&J/2J;ct|\}}tjd}|jd}t j ||}|j j|j sJy)Nfsspecfiler)rrrrrrSr=r)rrrqr$r|rSs rtest_open_dataset_from_fsspecr& s\%g.KE4   *F'Gjj'2G >>  .. .rcJtjd}tjdgdi}|dz }t j |||j d}|j|djdsJtj}tjtj|}|j||}|j|j sJ|j#||}|j$j|j sJy)Nr$r[rrr%r)rrr?rrlrmrlsendswithrrrcrwrr!rr=rrs) rr$rrq fsspec_fsrrr=rTs rtest_file_format_inspect_fsspecr+ s  *F HHc9% &E ^ #DNN5$!!&)I <<  # , ,^ << < ! ! #F!1!1)!<=J ^^D* -F == && &##D*5H  # # * *5<< 88 8rc4|dz }tjddgdztdd}tj|j dgj d }tj|||d tjtj dtjd fgd }tj|d | }tjdtjdkD}|j||}|jdjgdk(sJddl}tjd|j dddkD}|j||}|jdjgdk(sJy)Ntest_partition_timestamps 2012-01-01z 2012-01-02rr)datesrr/rr|rrrrrr)rrYrr0rqrir)r?rr/rrrr=rrrSr@r3 TimestamprrrCr+)rrrqrrrSr'r+s rtest_filter_timestampr1 s\ 0 0D HH -1Bi E ??5<< 299& IDUDtIF ??299w S0A&B%CD"( *DjjidCG!BLL$>>I  # #GI # >E <<  ' ' )_ << <!$5H$5$5dAq$AAI  # #GI # >E <<  ' ' )_ << >Q  rc tjgdtdDcgc]}tjddd|c}tddDcgc]}tjdd|c}d}t ||\}}t j t|}tjt jdtjdd g}|j|| jd k(sJtjt jd d k\}|j|| jd k(sJtjt jd t jd}|j|d|i} | dj!gdk(sJycc}wcc}w)N)r[r\Nr[rrirrrrr[r\rrYrrrrr)r)r?rr/r+rrrSr_ris_inr@rrrhour days_betweenrC) rrr:rrOrqrSr3rr(s rtest_filter_compute_expressionr; sg HH '8=a A1haA. A5:1a[ Aha+ A E "'51GAtjjT#Ghhrxx}bhhSz&:;G  " "77 " ; D D II Iggbhhsm$)G  " "77 " ; D D II I ??288C="((3- 8D  $ $Wvtn $ EF &> # # % 88 8B As F< G c@tj|tjdtj}tj |g}t |jdk(sJtd|jDsJ|jdj|jsJ|jj|jsJt|jtjsJy)Nr/rrc3PK|]}t|tj ywr)rMr?r)rdrs rrez%test_dataset_union.. sKAz!RYY'Ks$&r)rrrcrrUnionDatasetFactoryrr"rrr!rMrr)rrrs rtest_dataset_unionr? s  ' 'rx0##% E$$eW-G w&&( )Q .. . K1H1H1JK KK K  " " $Q ' . .u}} ?? ? ??  # #EMMO 44 4 gnn& 33 3rc h tjd|d}tjd|dddg}tjd|dd }|j|jcxk7r|jk7sJJtj|||g}t|tjsJd }t j t| 5tj||g| dddtjd tjfdtjfdtjfdtjfdtjfdtjfdtjfg}|jj|sJ|jjj|sJtj||g}tjd tjfdtjfdtjfdtjfdtjfdtjfg}|jj|sJ|jjj|sJtjdtjfdtjfd tjfg}tj||g|}|jjj|sJtjdtjfdtjfdtjfg}tj||g|}|jjj|sJtj t#ddgdzdgdzzdggd}t%||\} } tj| } t j tj&d 5tj|| gdddy#1swYxYw#1swYyxYw)Nr/rr0rweekr(rrr/hiverz$cannot pass any additional argumentsrrr%r&r'rrrbr5rqrrrr abcdefghj)r%r'r&rrzUnable to merge)rrSr=rMrrrrr?rArBrCrDrrrrr/rArrowTypeError) rrrrchild3 assembledmsgrrrOrqchild4s r&test_union_dataset_from_other_datasetsrJ s ZZ]9 MF ZZ mI&,g%68F ZZM)%+-F ==FMM :V]] :: :: : FFF34I i 11 1 0C z -? FF# >?ii  "((* "**, "))+   "((*!O    " "? 33 3     & & - -o >> > FF+,Iii  "((* "**, "))+  "((* !O    " "? 33 3     & & - -o >> >ii "((* "))+ !O  FF+ODI     & & - -o >> >ii "((* "))+ BIIK !O  FF+ODI     & & - -o >> > HHeAhqB4!8 3[A5 7E!'7GAt ZZ F r((0A B% FF#$%%c??b%%s R:R(R%(R1cd}tjt|5tjgd|dddy#1swYyxYw)Nz8points to a directory, but only file paths are supportedr)r/rrCr)rrIsADirectoryErrorrrS)rrHs r4test_dataset_from_a_list_of_local_directories_raisesrMC s> DC ( 4M 1mLMMMs AA c tjtjd|tjd|tjd|g}tjdtjfdtj fdtj fdtjfg}|jj|sJtjtjd|tjd|tjd|d g}tjdtjfdtj fdtj fdtjfd tjfd tjfg}|jj|sJy) Nr/rrrCr%r&r'r(r)rrrr) rrSr?r=rArBrCrDrr)rrSrs r&test_union_dataset_filesystem_datasetsrOI sjjj 8 6 97 7}5G ii  "((* "**, "))+ !O >>  11 1jj 8 6 97 7}6JG ii  "((* "**, "))+  "((* !O >>  11 1rctjgdgdd}tj|dz dfd }d}|}||||j|j}|}|||tjddg}tjgdgdgd d g }|||tjdg}tjgdgd g }|||tjdd g}tjgdtj gd dgd dg }|||tjddg}t jtdz |}tj|d jd|d gd d g }|||tjd tjtjfdg}t jtdz |}|jj|sJtjtd5j!|dddy#1swYyxYw)NrrrrrZrc tjtdz |}||jj |sJ|jj |sJj |}|j |sJy)Nrrb)rrSr_r=rr)r=rWrrSr(rrs rr|z-test_specified_schema.._check_datasetn su**S>!9:6J  &>>((9 99>>((0 00((1}}X&&&r)r)r\rC)r[rBr\r[r)rrNNNrrr)r[rrbz#Unsupported cast from int64 to listrr)r?rrlrmr=rrrSr_rlist_rrrrrr)rrrr|r=rWrSs`` rtest_specified_schemarUi s HH9<8 9ENN5'N23'FH68U\\B\\FH68$YY(.9 :Fxxy1#sDH68$YY' (Fxx C51H68$YY7 8Fxx"47CE"Cj*H68$YY(89 :FjjW~56vFGxxs1s%"Cj*H68$YYbhhrxxz235EF GFjjW~56vFG >>  (( ( *B D)()))s I##I,c|dz }tjdgdi}tj||tjdtj fg}t jt|gdz|}|jj|sJ|j|}tjtd5|j}|jdddy#1swYyxYw)Nrr[rdrbz#Unsupported cast from int64 to nullr)r?rrlrmr=rrrSr_rrrrrr?r@)rrfnrr=rSrrs rtest_incompatible_schema_hangrY s > !B HHc9% &ENN5" YYbggi() *Fjj#b'S8G >>  (( ($$W-G *B D""$s !C66C?ctjtjgddtjgddd}t|dz }tj|5}tj ||j }|j|jd|jdddtj|tj }|j|}|j|sJt|d D]=}tj|| }|j|}|j|r=Jy#1swYxYw) NrrrrQrCrZz test.arrowrr)rarrow)r?rrr_ output_streamRecordBatchFileWriterr= write_batchrr&rrSr rrrV) rrrrqr+r,rSr( format_strs rtest_ipc_formatr` s) HH288IF;88LyAC DE w% &D  $ 4))$ =5++-a01  jjb&6&6&89G  $ $W -F ==  /8&$ **T*5((1}}U###$s *AE  E)c ddlm}tjtjgddtjgddd}t |d z }|j ||tj|tj }t|j}t|dtjsJ|j|}|jd |j!|sJt#|tj|d }|j|}|jd |j!|sJ|j|dg}|jd |j!|j%dgsJ|j|dtj&ddzi}|jd |j!tjdtjgddisJ|j)|dk(sJ|j)|tj&ddkDdk(sJy)NrorcrrrrQrCrZtest.orcrT)fullrcr\r)b2r)rrg333333?rYr[rr)rrcr?rrr_rmrrSrrhrKrM FileFragmentrvalidaterrVrr@r)rrrcrrqrSrr(s rtest_orc_formatri s HH288IF;88LyAC DE w# $DOOE4 jjb&6&6&89GW**,-I ilBOO 44 4  $ $W -F OOO ==  /8jje,G  $ $W -F OOO ==    $ $Wse $ ?@    $ $W - 22 2  $ $WRXXc]Q5F $ G1 LL Lrcddlm}tjtjgddtjgddd}t |d z }|j ||tj|d }t|j|}t|d k(sJ|djd k(sJ|dj|jdsJy)NrrbrrrrQrCrZrdrcrrrY)rrcr?rrr_rmrrSrhrrrr)rrrcrrqrSr(s rtest_orc_scan_optionsrk s HH288IF;88LyAC DE w# $DOOE4 jje,G .++G4 5F v;!   !9   "" " !9  E,,.q1 22 2rc ddlm}y#t$rKtjt d5t jdddddYy#1swYYyxYwwxYw)Nrrz'not built with support for the ORC filerrrcr)rrr rrrrrSrms rtest_orc_format_not_supportedrn sP*1 * ]] G  * JJs5 ) * * **s& $AAAA AAc xtjtd5tjt j dtdidddddtj}tjtd5|jdddy#1swYRxYw#1swYyxYw)Nz9Writing datasets not yet implemented for this file formatrr[rrcz/tmp)rrs) rrrrrr?rr/rmake_write_options)ofs r+test_orc_writer_not_implemented_for_datasetrr s I    HHc59% &uv     B I          s7B$ B0$B-0B9ctjtjgddtjgddd}t|dz }|j j |d t j|t j }|j|}|j|sJt|t j|d }|j|}|j|sJy) NrrBrrQrCrZtest.csvFr&rr) r?rrr_rto_csvrrSrrrrV)rrrrqrSr(s rtest_csv_formatrw& s HH288IG<88LyAC DE w# $D OOT/jjb&6&6&89G  $ $W -F ==  /8jje,G  $ $W -F ==  r compression)bz2gziplz4zstdctjj|stj|dt j t jgddt jgddd}tj}|dk7r|nd }t|d |z }|j|| 5}|jjd }|j|jddddt!j"|t!j$} |j'| } | j)|sJy#1swYWxYw)Nz support is not builtrrBrrQrCrZrzgzz test.csv.rxFrurIr)rCodec is_availablerskipr?rrrcrvr_rgrrvwriterXrrSrrr) rrxrrrsuffixrqr+csv_strrSr(s rtest_csv_format_compressedr9 s$ == % %k 2 {m#89: HH288IG<88LyAC DE##%J'61[tF w9VH-- .D  & &t & E,//#***7 7>>'*+ ,jjb&6&6&89G  $ $W -F ==  ,,s ?AEEc t|dz }t|d5}|jddddtj|d}|j |}|j tjdtjgdisJtj|tjtjjd  }|j |}|j tjd tjd d gisJtj|tjtjjd g }|j |}|j tjd tjgdisJy#1swYxYw)Nrtwzskipped col0 foo bar rrskipped)col0rrr)rrrrrrB)rrrr) r_rLrrrSrrr?rrrrr)rrrqr+rSr(s rtest_csv_format_optionsrT s| w# $D dC0D ./0jje,G  $ $W -F == )RXX&<=>? AA Ajjb&6&6VV''!'4'67G  $ $W -F ==6288UEN+C"DE FF Fjjb&6&6VV''eW'='?@G  $ $W -F == %"CDEF HH H00s G  Gc Xt|dz }t|d5}|jddddtj|tj t jjd}|j|}gd}|j|k(sJ|jt jt jd gt jd gt jdgt jd gdsJy#1swYxYw) Nrtrz 1,a,true,1 T)autogenerate_column_namesrr)f0r rrrr[)r_rLrrrSrr?rrrrBrrr)rrrqr+rSr(expected_column_namess r(test_csv_format_options_generate_columnsri s w# $D dC#D >"#jjb&6&6VV''$'G'IJG  $ $W -F4   "7 77 7 ==1#)+3%)+4&)9)+1##89 :: :##s D  D)c t|dz }t|d5}|jddddtj|d}t j jdgd}tj|tj jd  }|j|| }|jtjd tjgdisJtj|}tj||}|j|}|jtjd tjgdisJtj}|j|| }|jtjd tjgdisJy#1swYxYw)Nrtrzcol0 foo spam MYNULL rrMYNULLT) null_valuesrrr)rr)fragment_scan_optionsr)rspamNr)rrr)r_rLrrrSrrrrr?rrrrrr) rrrqr+rSrrr(r\s rtest_csv_fragment_optionsry s w# $D dC0D ./0jje,Gkk00hZEI1KO'''VV''5'9;G $ $WG $ LF ==62884I+J"KL MM M!!/BJjjj1G  $ $W -F ==62884I+J"KL MM M'')G  $ $WG $ LF == &"((#<=>? AA A%00s GGctjtjgddtjgddd}t|dz }|j j d d d j d d }t|d5}|j|dddtj|tj}|j|}|j|sJt|tj|d}|j|}|j|sJy#1swYxYw)NrrBrrQrCrZ test.jsonrecordsorientrr},{} {rrr)r?rrr_rto_jsonreplacerLrrrSrrrrVrrrrqrrrrSr(s rtest_json_formatr s HH288IG<88LyAC DE w$ %D //  # #9 # 5a ; C CE6 RC dCA  jjb&7&7&9:G  $ $W -F ==  /8jjf-G  $ $W -F ==  s EE c Btjtjgddtjgddd}t|dz }|j j d d d j d d }t|d5}|j|dddtjtd5tj|tjtjj!d}dddtj|tjtjj!d}|j#|}|j%|sJy#1swYxYw#1swYxYwNrrBrrQrCrZrrrrrrrrztry to increase block sizerrrrr@)r?rrr_rrrrLrrrrrrSrrrrrrs rtest_json_format_optionsr sL HH288IG<88LyAC DE w$ %D //  # #9 # 5a ; C CE6 RC dCA   z9 ;=**T"*;*;,,,:+<== jjb&7&7WW((B(7'9:G  $ $W -F ==  ==sF  A F FFctjtjgddtjgddd}t|dz }|j j d d d j d d }t|d5}|j|dddtjtd5tjtjjd}tj |tj"|}dddtjtjjd}tj |tj"|}|j%|}|j'|sJy#1swYxYw#1swYxYwr)r?rrr_rrrrLrrrrrrrrrSrrr) rrrrqrrrrrSr(s rtest_json_fragment_optionsr sl HH288IG<88LyAC DE w$ %D //  # #9 # 5a ; C CE6 RC dCA   z9 ;F,,,,,:<**T"*;*;G*DE F ((WW((B(79Gjjb&7&7&@AG  $ $W -F ==  FFsF3 AG3F=G cht|dz }dD]\}}t|d5}|j|dddtjdtj fdtj fg}tj dgdgd| }tjj| }tj| } tj|| } | jj|sJ| jj|rJy#1swYxYw) Nrt))latin-1sa,b un,lphant)utf16s a,b un,lphantwbr[r\un éléphantrZrbencodingrr)r_rLrr?r=rDrrrrrrSrr) rrrqr input_rowsr+rexpected_tablerrdataset_transcodeds r test_encodingr s w# $D!D* $  # JJz " #))c299;%7#ryy{9K$LM)5#8@OQvv))8)< &&LA ZZ[A!((//@@@!**,33NCCC%D  # #s D((D1 c,t|dz }t|d5}|jddddtjdtj fdtj fg}tj dgdgd| }tj|d | }tjtjjd 5|j|dddtjj!d}tj"|}tj||} | jj%|sJ| jj%|sJy#1swYRxYw#1swYxYw)Nrtrs,b un,lphantér\rr)rr\rbrrz invalid UTF8rrrrr)r_rLrr?r=rDrrrSrrrr9r<rrrrr) rrrqr+rrrSrrrs rtest_column_names_encodingr sJ w# $D dD 1T /01ii$ !4sBIIK6H IJOXXdV%1N46H   K/0M m- % 1UsB- c|dz }t|\}}tj|}|jj |jsJt |j dk(sJ|j}|jdk(sJy)Nrmr() rrparquet_datasetr=rrrrr)rrrrrSr(s rtest_parquet_dataset_factoryrns00I9)DM5  /G >>  .. . w}}  "" "    F ??b  rwin32z'Results in FileNotFoundError on Windows)reasonctjd}|dz }t|\}}|jd}t j t j |}tj||}|jj|jsJt|jdk(sJ|j}|jdk(sJy)Nr$rmr%rrr)rrrrrcrwrrrr=rrrrr) rr$rrrr*rrSr(s r#test_parquet_dataset_factory_fsspecrzs   *F00I9)DM5!!&)I!1!1)!<=J  :FG >>  .. . w}}  "" "    F ??b  rc|dz }tjdgdztjj dd}g}t j |t||t|dz }t j|j||tj|}|jj|jsJ|j}|jdk(sJy)Nrmrrrrr)r?rrAr{randnrlrpr_rr=rrrrr)rrrrrrSr(s r&test_parquet_dataset_factory_roundtriprs00I HHQC"HBIIOOB,?@ AE s9~2D K/0M m-  /G >>  .. .    F ??b  rc g}tdD]l}tjdtt|dz|dzdzi}||dz }t j ||||dj |dnt|dz }t jj||tj|}|j}|jdj}|ttdd k(sJy) Nrr rrXrrrrrW)r/r?rrhrlrm set_file_pathr_rr=rrrrrC) r metadatasr:r table_pathrrS scanned_table scanned_cols r"test_parquet_dataset_factory_orderrsI2Y4 4adQqS"H-. /1!H~-  ujYG" ##qcN3 4 +-.MellM9=  /G$$&M&&t,668K $uQ}- -- -rc|dz }t|\}}t|jddjt j |}|j j|j sJt|jdk(sJtjt5|jdddy#1swYyxYw)Ntest_parquet_dataset_invalid *.parquetrr)rrhglobunlinkrrr=rrrrrr r)rrrrrSs r$test_parquet_dataset_factory_invalidrs88I9)DM5 $%a(//1  /G >>  .. . w}}  "" " ( )s .CCctt|jd}tj|dj j }g}|D][}tj|j}|jt|j||j|]|dz }tj||||S)Nrrrr) rhr$rglobrl ParquetFiler=to_arrow_schemarrr_rr0r)r parquet_pathsr=rrqrrs r_create_metadata_filers  <=>M ^^M!, - 4 4 D D FF,>>$'00s4#3#3I#>?@!!(+,  +M 2D rc tjtjtdtjtj j dtjt jddgdggd}|jddi}tj|t|d g t||fS) Nrxr[r\rrrrr'rr{) r?rrr/rAr{rr@rGrlrpr_r)rrs r#_create_parquet_dataset_partitionedrs HH rRXXbiioob&9: C:r*+-# E  ) )5'*: ;Es9~vhG  +U 22rc|dz }t|\}}tjd}tj||}|jj |jsJt |jdk(sJ|j}|jdk(sJ|jjdjd }|j}tjj||y) N(test_parquet_dataset_factory_partitionedrr|rrrxr Tdrop)rrrrr=rrrrrr sort_values reset_indexr3testingassert_frame_equal)rrrrrrSr(rWs rrrsDDI>yIM5??&1L  \JG >>  .. . w}}  "" "    F ??b      + +D 1 = =4 = HF HJJ!!&(3rcF|dz }t|\}}tj|d}|jj |jsJd|jj vsJt |j}d|djj vsJy)N%test_parquet_dataset_factory_metadatarrkeyr) rrrr=rrrhrKrs)rrrrrSrs rrrsAAI>yIM5  VDG >>  .. . W^^,, ,, ,W**,-I Yq\11:: :: :rc|\}}|dz }t|\}}||g5tj|tjd|}ddd|g5t j }ddd|g5t j tj ddkDddd|g5djtj ddkDddd|g5dj} | djdddy#1swYxYw#1swYxYw#1swYxYw#1swYexYw#1swYyxYw)N#test_parquet_dataset_lazy_filteringrr|)rrr r) rrrrrhrKr@rr) rrrcrrrrOrSr rg_fragmentss rrrsb'B ??I5i@M1 }o &$$ 7 b 2..01 2 b 9 W " "288D>B#6 789 b =! ''(;<= b 3 |668 Q00233'2299==33s;-D5E 1E  +E'E%5D>E  EE"%E.cHtjdgdi}|dz }|j|tj|}|j |j }|j |dgj }d|jvsJ|j|dsJy)Nr[rrr)spandasTr) r3r4rrrSrr=rr)rrrHrqrSr=r>s rtest_dataset_schema_metadatar:s sI& 'B ^ #DMM$jjG  $ $W - 4 4F%..w.FMM  '' ' ==)$= ?? ?rctjdtjgddi}tj|t |dz tj dtjfg}tj|dz d|}|j|tjddkD }|dj|djd jdsJt|j!d }|j|tjddkD| }|dj|djd jdsJy) NrzrrrrrrrrrBrr)r?rrrlrmr_r=rBrrSrr@rrrurhrK)rrrr=rSfilteredrTs rtest_filter_mismatching_schemar Ls< HHeRXXlAB CENN5#g678YY +, -Fjj. 6CG &&wrxx7J&KH E? ! !%,"3"3G"<"B"B1"E FF FG))+,Q/H&&%1,V'=H E? ! !%,"3"3G"<"B"B1"E FF Frctjdjtt dd}t |dz }t j||dgtj|d}|j|}|j|dg }|jdj|jdsJy) Nza a b brrr*rr{rrr)) r?rr%rhr/r_rlrprrSrrr)rrrrqrSall_cols part_onlys r+test_dataset_project_only_partition_columnsrbs HHioo/U1XG HE w' (DtVH=jjF3G&&w/H''&'BI ??6 " ) ))*:*:6*B CC Crc tjdtjgddi}|dz }|j |dt j |dtjdtjfg }tjdtjgdtji}|j|j|sJy) NrzrSobjectdtypez(test_dataset_project_null_column.parquetrrrr) r3r4rArrrrSr?r=rBrrr)rrrHrrSrWs r test_dataset_project_null_columnrrs ubhh'9JK LB<*? @BGxx);RXXZ HIJH  " "7 + 2 28 << !9:jj>1)DG  $ $WXXc]HHSM&&wU&;((3-3&7 $F xxHHYW5&H == "" " y(@ A=#s<===s D::Ect|\}}tj|}t|jtj sJt |\}}tj|}t|jtj sJtj|d}|j}|Jt|tjsJ|jtjdtjfgk(sJt|jdk(sJ|jdtjgdtjk(sJtjtjdtjfgd}t|tjsJt|jdk(sJtd|jDsJtj||}|j}t|tjsJ|jtjdtjfgk(sJt|jdk(sJtd |jDsJtj|d}tjt!|j#|j|j$|j& }|jJ|d z }t)|\}}tj*|d}|j}|Jt|tjsJ|jtjdtj,fgk(sJt|jdk(sJt/|jdj1d d hk(sJy)Nrrrrr)rrrr|c3$K|]}|du ywrrrcs rrez6test_dataset_preserved_partitioning..4QqDy4rgc3$K|]}|du ywrrrcs rrez6test_dataset_preserved_partitioning..rrgrzdata-partitioned-metadatar[r\)rrrSrMrrrrr=r?rrrwrrrrhrKrrrrrDrrC) rrOrqrSrrrrrs r#test_dataset_preserved_partitioningr!sG "'*GAtjjG g**B,D,D EE E37;JjjG g**B,D,D EE EjjF3G   D    dB// 00 0 ;;"))fbhhj%9$:; ;; ; t  !Q && &   Q 288Irxxz#B BB B ??299vrxxz&:%;> >rc tjtjdtjtjdtjtj tj g}tjgdttdd|}t|dz }tj||dgtj|dz }|jdj|jdjk(sJ|jdj!|jdsJy) Nrzr)NNr[r[rrrbr*r{)r?r=r@rBrrrDrrhr/r_rlrprCrrCr)rr=rrq actual_tables r+test_write_to_dataset_given_null_just_worksr$sYY  # rxxz299;?@F HH4!%(^-5; =E w' (DtVH===>!9:L   v & 0 0  f  ' ' ) ** *   u % , ,U\\%-@ AA Arcddlm}|j||j|dfg}|j||S)Nr ascending)r)pyarrow.computecompute sort_indices SortOptionsr)tabsort_colrsorted_indicess r _sort_tabler.sA $R__ ^R^^h %<$=>@N 2773 ''rc`|xs|}tj||d|dt|jd}t |t |k(sJtj |d|}t |j|jt |j|sJy)Nr[Frrr*r) rrrhrrrSr.rr)rSrsexpected_filesr, base_dir_pathr file_pathsrs r_check_dataset_roundtripr5s!-XMWhw".ECm))#./J z?c.1 11 1zzgLBH x((*H 5 < <G$$&1 33 3rc|dz }|jt|}tj|}|dz }|dz g}t |t ||d||dz }|dz g}t |||d||dz }|jt |}tj|}|dz }|dz g}t |t ||d|y)Nrzsingle-file-targetrr[zsingle-file-target2rzsingle-directory-target)rrrrSr5r_ry)rrprOrStargetr2s rtest_write_datasetr8s-'I OOI&Ajj#G+ +F~-.NWc&k>3O, ,F~-.NWfnc6J,,I OO"9-Ajj#G 0 0F~-.NWc&k>3OrcR|dz }t|}tjd}tj||}|dz }|dz |dz dz |dz |dz dz g}tjt j d t j fgd}t|t||d |||d z }|d z |d z dz |d z |d z dz g}tjt j d t j fg}t|t||d ||y)N partitionedrr|rzpartitioned-hive-targetpart=arpart=brr partitioned-dir-targetr[r\) rrrrSr?r=rDr5r_)rrprOrrSr7expected_pathsrs rtest_write_dataset_partitionedr?*s6-'I+I6A??&1Ljj>G0 0F6H,~=6H,~=N// FBIIK()*6;VndF(* / /F fsl^3 fsl^3N// FBIIK()*,VndF(*rctjgdgdd}tj||ddgtj|ddg}|j }|Dchc]9}t tj|j|j;}}|hdk(sJ|j}|j|sJycc}w)Nr6r8rZrr\r>rr7r r?rrrrSrr_r~rrrrrrrrrrpartitioning_dirsrs r#test_write_dataset_with_field_namesrDKs HH+/B CEUGE#&%) 75uEI OOEBG=>GLLO ' ' 0 7 78  // /((*O  ! !% (( (  >C ctjgdgdd}tj||ddgdtj|dd}|j }|Dchc]9}t tj|j|j;}}|hd k(sJ|j}|j|sJycc}w) Nr6r8rZrr\r)rrpartitioning_flavorr>b=xb=yb=zrArBs r(test_write_dataset_with_field_names_hiverK\s HH+/B CEUGE#&%VE 75vFI OOEBG=>GLLO ' ' 0 7 78  5 55 5((*O  ! !% (( ( rEctjgdgdgdd}tj||ddgtj|ddg}t j 5}tj|jddg |ddgtj|ddg}|j}t|j|jd jk(sJ dddy#1swYyxYw) Nr6r8rrJrr\rrr)r[) r?rrrrSrrrrrr# drop_columnsrrrStempdir2rrs rtest_write_dataset_with_scannerrPms HH+/$& 'EUGE#&%)jjcUCG  $ $ &=( #s<!%se EJJxSEJ #,,.O--/++C0::<= == ===s 'BDD c  tj G fddt}tj|tj  t jt jdt jg}t jt jttdg| d d}d d  fd }tjj!||d tj" fd  }|j% t'j&fd}d}d}|dkr/ |kDr |k(rd }n" }t'j(d|dkr/|sJ d j+|j-y#d j+|j-wxYw)NceZdZfdZy)6test_write_dataset_with_backpressure..GatingFsc^j|jj||S)Nr)waitrrg)rrqr consumer_gates rrgzItest_write_dataset_with_backpressure..GatingFs.open_output_streams)    88..th.G GrN)rrrrg)rWsrGatingFsrSs  HrrXr9rrbr Tc3hKkr(sytjddz kr'yyw)Ng{Gz?r)rsleep)rt batches_readend keep_goingsrcounting_generatorz@test_write_dataset_with_backpressure..counting_generators:S  JJt  A LK S s,22rcHtjtdS)Nrr)rrr_) gating_fsrrsrrz6test_write_dataset_with_backpressure..s r'' S\) Kr)r7c2tjz Sr)r)startsrdurationz6test_write_dataset_with_backpressure..durations99;& &rFrr) threadingEventrrcrwrvr?r=r@rrjrrhr/rr;rkThreadrdrr\rr)rrXr=min_backpressurer` write_threadre last_valuebackpressure_probably_hitrtr]rWr^rbr_rrds` @@@@@@@@r$test_write_dataset_with_backpressurermsOO%M H<H ););)= >?I YY45 6F OORXXd5+;&<=>v NEL CJjj%%V&?G##KLL!  ' $)!j2o..:-04-) JJsO j2o)((   s8AF6F66$Gctjgdgdd}tj||ddgtj|ddg}t j 5}tj||ddgtj|ddg}|j}t|j|jk(sJ dddy#1swYyxYw)Nr8rr\rrr\r) r?rrrrSrrrrr#rNs rtest_write_dataset_with_datasetrps HH?; .compare_tables_ignoring_ordersZlln((-99t9Dlln((-99t9Dzz#rrrrJrr\ezc=2z foo.arrowoverwrite_or_ignore)rrexisting_data_behavior)rvrr[r\r)rrrrYrrdelete_matching)rr[r\rr)r?rrrr=r@rBrrrr<rrrrSrexists) rrprrru extra_table extra_file overwrittenreadbacks r test_write_dataset_existing_datars$I HH?; @Kzz'%'355=XZ !(K8     UIL!:KM((!5LIJKzz'%'355=XZ !(K8  "" ""9BBs HH)cft|Dcgc]}tj||c}Scc}wr)r/r{randint)r8rrrOs r_generate_random_int_arrayrs%.3Dk :FNN3 $ :: :s.cg}g}t|D]<}|jt|d||jdt|z>t j ||}|S)Nr)r8rrrr9r)r/r0rr_r?rj)num_of_columnsnum_of_recordsr9rBr:rjs r_generate_data_and_columnsrsn DL > "* .N343AC D C#a&L) * ??LAL rcttttj|j d|S)Nz**/*.)rrhr~rrbase_directoryrs r_get_num_of_files_generatedrs- tGLL055k]6KLM NNrc |dz }d d}d}d}t||}tj||d |tj|}| zdz}t ||k(sJg}t |D]V\} } |t| z } tj| d} |j| jjd X|t |k(sJ|t|k(sJt fd |DsJy) Nrrr#r)rmax_rows_per_filemax_rows_per_grouprrrc3(K|] }|k ywrr)rd file_rowcountrs rrez7test_write_dataset_max_rows_per_file..Ds<  11L\9Y'8(:<::i(L),==A |  3 33 3 |,C 6S[(**VI6%%g&6&6&8&>&>q&ABC #&<"= == = S!78 88 8 <$:< << L..K,(:%'::k*LL0s6{***VI6  """$ 0E    / 0 0 B8 ## #rc|dz }d}d}ddg}tjgdgdg|}tjgd gd g|}tjgd gd g|}tjgd gdg|}tjj||||g} t j tj ||tjfgd} |dz } t j| | | |d} | | |||\} }| |k(sJ|dz }d}t j| || ||d| ||||\} }| |kDsJy)Nrrrc1c2)rrrYrrr)r[r\rrrvr[r)rrr0r-rr)r[r\rrrvr)rqrrMrLrr)r[r\rrrvr)rjrrr,rr)r[r\rrrvr\rr|default)r9rsrrczt||}ttjj ||}||fS)Nr)rrr?r(unique)rrjrcol_idnum_of_files_generatednumber_of_partitionss r_get_compare_pairz._get_compare_pairs>!<&K"A"2::#4#4\&5I#JK%';;;rmax_1rYF)r9rsrrmax_open_filesr) r?rjrErkrrr=rDr)rrprpartition_column_idrBrecord_batch_1record_batch_2record_batch_3record_batch_4rr data_source_1rrr data_source_2rs r!test_write_dataset_max_open_filesrs$IK$+I+K+79N__+=+I+K+79N__+@+I+K+79N__+A+I+K+79N HH ! !>>#1>#C DE?? L!45ryy{CDEL )M%-".{D < M>;/ 110 "%9 99 9 'MN%-".{$2G M>;/ 110 "$8 88 8rc|dz }t|}tj|tjj d}|dz }|dz |dz dz |dz |dz dz g}tj t j|jjd gd t jddgi }t|t||d ||y) Nr:Trrr=r[rr\rrvr ) rrrSrr rr?r=r@rr5r_)rrprOrSr7r>rs r#test_write_dataset_partitioned_dictrs-'I+I6Ajj((1141HJG/ /F fsl^3 fsl^3N??299V$.&$'bhhSz235L VndF!#rc |dz }t|}tj|d}tjt j dt j fgd}|dz }g fd}tj||d|d | |d z d z |d z d z h}tttj }||k(sJ|dz } tj|| d|dtj|d|} tj| d|} | jj| jsJy)Nr:rrrr| partitioned1c<j|jyr)r0rq) written_file paths_writtens r file_visitorz4test_write_dataset_use_threads..file_visitors\../rrTrrrrr;part-0.featherr< partitioned2Fr0r)rrrSrr?r=rDrrrir~rrr) rrprOrSrtarget1rr>paths_written_settarget2result1result2rs @rtest_write_dataset_use_threadsrsB-'I+I6Ajj8G?? FBIIK()*6;L&GM0| (--(--NC m<=  .. .&G jjNGjjNG     $ $W%5%5%7 88 8rcRtjdtdi}|jd}t j ||dddt j |jd dj}d }|D]}t|}||kDs Jd ||} y) Nr[rSr) max_chunksizerT)rrpreserve_orderFrrz!Sequence expected to be ordered: ) r?rr/rrrrSrto_numpyr)rrrseqprevitemcurrs r-test_write_dataset_use_threads_preserve_orderrs HHc5;' (EQ/GWgi!%d< **W  & &5 & 9# > G G IC D4yd{E?uEE{rc  tjtjtdtjdtdDtjdgdzdgdzzggd}|dz }t j ||d d t |jd }|d z g}t|t|k(sJt j|dj}|j|sJ|dz }|dz |dz d z |dz |dz d z g}g g fd}t jtjdtjfgd}t j ||d d ||t |jd }t|t|k(sJ Dcgc]!}tj j#|#} } | k(sJt j|d|}|jj|sJt% dk(sJ D]} t'j(| |vrJycc}w)Nrxc3DK|]}tjywrrzr|s rrez#test_write_table.."r}r~r[rr\rrsingle dat_{i}.arrowrbasename_templaterr1z dat_0.arrowrrr:r;r<crj|jj|jyr)r0rqr8)r visited_paths visited_sizess rrz&test_write_table..file_visitor;s+\../\../rrrr|)rrrrrr)r?rrr/rrrhrrrSrrrr=rDrHrqgetsizerr~r) rrrsr4r>r(rrrq actual_sizes visited_pathrrs @@rtest_write_tabler s@ HH rRXX%IuRy%II #sebj()" #E !HUH'6yJhnnS)*J./N z?c.1 11 1 ZZ / 8 8 :F ==  &H8X0=@8X0=@N MM0?? FBIIK()*6;LUHY'6".\KhnnS)*J z?c.1 11 16CDdBGGOOD)DLD L (( ( ZZ\ JF ??  # #E ** * }  "" "%< ||L)^;;;< Es&Ic ptjtjtdtjdtdDtjdgdzdgdzzggd}tj|gdz}|d z }t j ||d t|jd t|d z gk(sJt j|d jj|sJ|dz }t j |g|d t|jd t|d z gk(sJt j|d jj|sJ|dz }t j |j|d t|jd t|d z gk(sJt j|d jj|sJ|dz }t j ||g|d t|jd t|d z gk(sJt j|d jjtj|gdzsJy)Nrc3DK|]}tjywrrzr|s rrez6test_write_table_multiple_fragments..Qr}r~r[rr\rrrrrrr1rrz single-listmultiplezmultiple-table) r?rrr/rrrrrrSrrr)rrrss r#test_write_table_multiple_fragmentsrOsn HH rRXX%IuRy%II #cUQY&'" #E   eWQY 'E!HUHY7 x~~c" #sH7G,G+H'I II I ::hu - 6 6 8 ? ? FF F&HeWhy9 x~~c" #sH7G,G+H'I II I ::hu - 6 6 8 ? ? FF F#HU%%')D x~~c" #s $ $%(' '' ' ::hu - 6 6 8 ? ? FF F))HeU^Xi@ x~~c" #s $ $%(' '' ' ::hu - 6 6 8 ? ? %#  rc tjtjtdtjdtdDtjdgdzdgdzzggd}|dz }t j d |j D||jd d t j|d j}|j|sJ|dz }tjj|j|j }t j ||d d t j|d j}|j|sJ|dz }t|}t j ||d d t j|d j}|j|sJy)Nrxc3DK|]}tjywrrzr|s rrez&test_write_iterable..ur}r~r[rr\rrinmemory_iterablec3 K|]}|ywrr)rdrts rrez&test_write_iterable..zs<e.r}r~r[rr\rrdataset_from_scannerrrrdataset_from_scanner2r r)zCannot specify a schemar)r=r)r?rrr/rrSrrrrrrrrr=)rrrrSrsr(s rtest_write_scannerrs HH rRXX%IuRy%II #sebj()" #EjjG//H^++9.  $ $RZZ%G HF ==  00H^++GdV+Di1  $ $RZZ%G HF ==tf- .. . z)B C@ //8( % Y @@@@s 3GG ctjtjtdtjdgdzdgdzzj gddg}t j |jdgj}|dz }t j||d | t jjdgd }t j|d | j}|j|sJy)Nrxr[rr\rzrrrSrrTrr)r?rrr/rcrrrr=rrr rSrr)rrrrspartitioning_readr(s r!test_write_table_partitioned_dictrs HH r #sebj();;=V_ E ??5<<#9#@#@AL"H x  0099 4:) ZZ->hj  ==  rc >tjtjtddtjt j dddj dtjt jdd gd ggd  }|d z }tj||dt|jd}|dz g}t|t|k(sJtj|dj}|j|sJdD]n}tj }|j#|}dt%|vsJ|d|z }tj||||t'j(|dz } |dk(rdnd} | j*| k(sJtj|dj}|j,} |dk(rB| jd| j/dj1tj2} |dvrC| jd| j/dj1tj4d} |j7| } |j| roJy)Nrxrrr.z datetime64[D]rzdatetime64[ns]r[r\rrrrrrr1part-0.parquet)1.02.42.6rz(r(rroptsmetaexpected_versionr=rWs rtest_write_dataset_parquetrsA HH r* <?CJJ   C:r*+  "  #E**HUHY7hnnS)*J!112N z?c.1 11 1 ZZ 3 < < >F ==  )'%%'(((99T$ZGGG6wi@@ dK+; ;<$+u$45%""&6666HY7@@B e ZZ6<<?#<#.r}r~r[rr\)r rchr1r csv_datasetrrr1z part-0.csvrrF)include_headercsv_dataset_noheaderr)r?rrr/rrrhrrrSrrrrrrr=rrp)rrrsr4r>r(rrs rtest_write_dataset_csvr so HH rRXX%IuRy%II #sebj()" #E &HUHU3hnnS)*J-.N z?c.1 11 1 ZZ / 8 8 :F ==    7;;+B+B\\'',C,)*F  $ $E $ :D//HUHV$G ZZ 0 9 9 ;F ==  rc JtjtjtdtjdtdDtjdgdzdgdzzggd}dfd }|d z }t j ||d | sJy) Nrxc3DK|]}tjywrrzr|s rrez:test_write_dataset_parquet_file_visitor.. r}r~r[rr\rrFcX|j|jjdk(rdyyy)NrYT)rr1)rvisitor_calleds rrz=test_write_dataset_parquet_file_visitor..file_visitors2  ! ! -%%11Q6!N7 .rrr)rr)r?rrr/rr)rrrrsr s @r'test_write_dataset_parquet_file_visitorrs HH rRXX%IuRy%II #sebj()" #E N" **HUHY".0 >rcr tdDcgc]}|gdzD]}|}}}tdDcgc]}|gdzD]}|dz }}}tj||dgdzdgdzzd}|dz }tjtj dtj fgd }g d fd }tj||d |d ||dz dz |dz dz h} tttj } | | k(sJ J jdk(sJycc}}wcc}}w)Nrrr[rxr\rr:rrr|cl|jr |jj|jyr)rr0rq)rrsample_metadatas rrzAtest_partition_dataset_parquet_file_visitor..file_visitor,s+  *33O\../rrTrr;rr<r) r/r?rrrr=rDrrrir~rr1) rrrf1_valsf2_valsrrrrr>rrrs @@r+test_partition_dataset_parquet_file_visitorrs]!&qCuglCdtCtCGC$)!HF5%2FtBwFwFGF HHG7"ebjC52:57 8E-'I?? FBIIK()*6;LMO0  y| H//H//NC m<=  .. .  && &  & &! ++ +?DFs D-D3c6tjdtjddgi}|djj dk(sJt j||dtj|dz }|djj dk(sJy)Nr[r.zEurope/Brussels)tzrrr) r?rr3r0rrrrrlrC)rrr(s r(test_write_dataset_arrow_schema_metadatarAs HHcBLL:KLMN OE :??  !2 22 2UGI6 ]]7%55 6F #;    "3 33 3rcddlm}tjdgdi}|j ddi}t j ||d|j|d z j}|jddik(sJy) Nrrr[rrvaluerrr) rrr?rrGrrrCr=r)rrrr=s r"test_write_dataset_schema_metadatarNsr HHc9% &E  ) )68*< =EUGI6   *: : ; B BF ??vx0 00 0rctjdgdi}|jddi}tj||dt j |dz j}|jddik(sJy)Nr[rrrrrr) r?rrGrrrlrCr=r)rrr=s r*test_write_dataset_schema_metadata_parquetrZso HHc9% &E  ) )68*< =EUGI6 ]]7%55 6 = =F ??vx0 00 0rc |\}}}}}}}}dj||||}tjtjt dtjdt dDtjdgdzdgdzzggd}t j tjd tjfgd } t j|d |d | t jd |dd j} | j|sJ|jd} t j|| d | t jd|dd j} | j|sJ|jd} t j|d| d | t jd|dd j} | j|sJy)Nrrxc3DK|]}tjywrrzr|s rrez(test_write_dataset_s3..pr}r~r[rr\rrrrr|zmybucket/datasetrrBrzmybucket/dataset2rrrzmybucket/dataset3) rr?rrr/rrr=rDrrSrr) r;rOrcr=r>r?r@ uri_templaterrr(r<s rtest_write_dataset_s3r es7H3Aq"atZ=DD  D$ 0 HH rRXX%IuRy%II #sebj()+# E ??299vryy{&;%<=f MD !b ZZr%fhj  ==     1 2CUC E ZZ5vhj  ==     j )C zc)$ZZ5vhj  ==  raC{ "Version": "2012-10-17", "Statement": [ { "Effect": "Allow", "Action": [ "s3:PutObject", "s3:ListBucket", "s3:GetObjectVersion" ], "Resource": [ "arn:aws:s3:::*" ] } ] }c ddlm}|d\}}}}t|tdd|dd|d|d}t j t j td t j d td Dt j d gd zd gd zzggd}tjt jdt jfgd}tj|d|dd|dtjd|ddj}|j|sJtj|d|dd|dtjd|ddj}|j|sJt!j"t$d5tj|d|dddddd|dd|d|dd }t!j"t$d!5tj|d|ddddddy#1swY[xYw#1swYyxYw)"Nr)rrtest_dataset_limited_user limited123rhttp)r?r@endpoint_overrideschemerxc3DK|]}tjywrrzr|s rrez1test_write_dataset_s3_put_only..r}r~r[rr\rrrrr|zexisting-bucketrFrw)rrrfrrxrrBTz&Bucket 'non-existing-bucket' not foundrznon-existing-bucket)rrrfrxlimited)r?r@r%r&allow_bucket_creationz(Access Denied|ACCESS_DENIED))rrr_minio_put_only_policyr?rrr/rrr=rDrrSrrrrr:) rrr=r>rOrcrrr(s rtest_write_dataset_s3_put_onlyr+s=(!.D$1y*@:LJ .!F!D6*  B HH rRXX%IuRy%II #rSEBJ&')# E ??299vryy{&;%<=f MD  RU4 ZZbVhj  ==    RT4 ZZbVhj  ==   wE G   (R#8   !F!D6*"  B w&E F   (R#8   !    s3H)H5)H25H>c tjdddgi}tj||dz tjtj dtj tjtjg}tjj|dz g|tjtj}|j|}|j|k(sJy)Nr[r)rzr=rr)r?rrlrmr=r@rrrDrrrrrcrvr)rrrr=fsdss r$test_dataset_null_to_dictionary_castr.s HHcD$<( )ENN5'N23 YY bmmBHHJ <=F    * *'(##%%%' + D  # #D )E <<6 !! !rc~tjgdgdd}tj||dz dtj|dz d}tjgdgdd }tj||d z dtj|d z d}|j |d d }|j tjgdgdgd dk(sJ|j |d d d}|j jd tjgdgdgddk(sJy)Nrrrr[r\rcolArrrrcrrZrr)colBcol3rr3r8rrNr3rr9 full outer) join_typerrrr5r[r\rNrrNr7r?rrrrSrrr"rrds1rds2r(s rtest_dataset_joinrE s%  BR46 **Wt^E 2C  BR46 **Wt^E 2C XXc66 *F ??  *!   XXc66\X BF ??  $ $V ,%%:1  rc|tjgdgdd}tj||dz dtj|dz d}tjgdgdd }tj||d z dtj|d z d}|j |d }|j tjgdgdgd d k(sJ|j |d dd}|j jd tjgdgdgdd k(sJy)Nr0r1r2rrrr4r6)r3r9rr3r:r;r<_rr= right_suffixr>r?r@rArBs rtest_dataset_join_unique_keyrJ(s#  BR46 **Wt^E 2C  BR46 **Wt^E 2C XXc6 "F ??  *!   XXc6\X MF ??  $ $V ,%%:1  rc tjgdgdgdd}tj||dz dtj|dz d}tjgdgd gd d}tj||d z dtj|d z d}|j |d d d}|j jd tjgdgdgdgdgdggdk(sJy)Nr0rrx<r1)r3r8colValsrrrr4r5rxrr6rr3r<rGrHr>)rrxrMNr?)rrxNr5r@)r3r8rNcolB_r colVals_rrrArBs rtest_dataset_join_collisionsrRGs " B R46 **Wt^E 2C " B R46 **Wt^E 2C XXc6\X MF ??  $ $V , : @ 1A AA Arc&tjjgdgdd}tj||dz dtj |dz d}tjjgdgdgd d }tj||d z dtj |d z d}|j |d d ddd}|jjd tjgdgdgddk(sJy)N)rrrrr0)r[r\r[r\rr2rrr)rrqr)r[r\g)rrg@)r8r9colCrr3rrr8r9onby toleranceright_onright_by)rNNNN)r3rrU) r?rE from_pydictrrrS join_asofrr"rrBs rtest_dataset_join_asofr^cs   ) BR46 **Wt^E 2C    B R46 **Wt^E 2C ]] 6Q&F ??  $ $V ,),:1  rctjgdgdgdd}tj||dz dtj|dz d}tjgdgd gd gd d }tj||d z dtj|d z d}|j |dddgd}|j jdtjgdgdgdgddk(sJy)Nr0rLr)r3r8rWrrrrOr6r4r)r8rNr3rWrrWr3r8rrWrXrY)NrNr3r8rWrN)r?rrrrSr]rr"rBs r"test_dataset_join_asof_multiple_byrbs  B R46 **Wt^E 2C "  B R46 **Wt^E 2C ]] &&)QF ??  $ $V ,$ :1  rctjdgdi}tj||dz dtj|dz d}tjgdgdd}tj||d z dtj|d z d}|j |dgd }|j tjgdgd d k(sJy)NrWrrrrr6r)rNrWrrr`)r7r7r)rWrN)r?rrrrSr]rrBs rtest_dataset_join_asof_empty_byrds  i BR46 **Wt^E 2C " BR46 **Wt^E 2C ]] qF ?? "*!  rc tjgdgdgdgdd}tj||dz dtj|dz d}tjgd gd gd gd gd d}tj||dz dtj|dz d}d}t j t|5|j|dddgddddgdddy#1swYyxYw)Nr0rLrr1rarrrrOr6)rWrZrr4r)r8rNcolUniqr3rWrzXColumns {'colVals'} present in both tables. AsofJoin does not support column collisions.rrWr3r8rrV) r?rrrrSrrrr])rrrCrrDrHs r!test_dataset_join_asof_collisionsrgs "  B R46 **Wt^E 2C ""  BR46 **Wt^E 2C 7 z -  Dff-VV$4     s C44C=dstypercmemc Rtjgdgdd}|dk(r6tj||dz dtj|dz d}n!|dk(rtj|}nt |j tjd d kj tjd d k(}|dk(rtjntj}t||sJ|jtjd gd gdk(sJ|jdtjd gd gdk(sJ|j tjd dkj tjd d kDjtjd dk7}|jtjdgdgdk(sJtj||dz dtj|dz d}|jtjd gd gdk(sJ|jtjtjddgd dgdd d}|jj!dtjd dgddgd dgdk(sJt#j$t&5|j ddddt#j$t(5|j+ddd|j,j/d } |j tjd d kj1| } | jtjd d dgik(sJt#j$tj25|j1| jdddy#1swYxYw#1swYxYw#1swYyxYw)Nrrrr-r[r\rrTr2rcrrrrir3rYrr[rrr-rrrr\r rrxr8rz right outerkeysr=r8)r3r8r)r?rrrrSrrrr@rrrMrrrrr"rrrrrKr=rvreplace_schemar<) rrhrrCr(rWr2r joinedschema_without_col2 newschemas rtest_dataset_filterrus $ B~ Wt^E:jj46 5jjn!!ZZ(1, - 4 4RXXf5E5L MF'-~r##2;M;MH fh '' ' ?? *!   ;;q>RXX'   BHHV$q( ) 0 0 1 %gRXXf-=-BgC ;;=BHH&   VWz1%@zz'J.u=H    "((,#   [[BHHRc .% /F ??  $ $V ,D Rc :1   y ! 4 z "**++A.  1n()    288A-$   r '> 12;;= >>>>s$/P"P PPPP&c tjgdgdd}tjgdgdd}|dk(rktj||dz d tj|dz d }tj||d z d tj|d z d }n6|d k(r+tj|}tj|}nt tj||fj tjd d ktjd dk(z}|jtjgdgddk(sJ|jtjtjddgddgddd}|jjd tjgdgdgddk(sJ|j tjd d k}|j tjd dk} tjtd5tj|| fdddy#1swYyxYw)Nrkrlr2)rqrrM)hr:lrcrrrrrir3rYrq)rrrq)r[r\rwrrxr[r\rmrz left outerrn)rrxN)r3rr8zcurrently not supportedr)r?rrrrSrrrr@rrr"rrr) rrhrrrCrDfiltered_union_dsrr filtered_ds1 filtered_ds2s rtest_union_dataset_filterr|&s& $ B  B~ Wt^E:jj46 Wt^E:jj46 5jjnjjn!! C:.55 & A "((6"2a"78  % % '2885,    # #BJJrxxRc 90% $.F ??  $ $V ,:1   ::bhhv.23L::bhhv.34L z)B C1 L,/0111s I//I8c|dz }t|\}}tj|}|j}|jdk(sJ|j t jddk}|jjdk(sJtjt5|jdddy#1swYyxYw)Ntest_parquet_dataset_filterrr rrx) rrrrrrrr@rrrrK)rrrrOrSr( filtered_dss rr~r~Vs77I5i@M1  /G    F ??b  ..$!!34K    ! * *b 00 0 z "$!!#$$$s )CC ctjtjtdgdg}t j |}dt j di}|j|}t j||dgdtjtd 5t j||dgdd d d y #1swYy xYw) z Ensure the projected schema is used to validate partitions for scanner https://issues.apache.org/jira/browse/ARROW-17228 rxoriginal_columnrrenamed_columnr)rrz0'Column original_column does not exist in schemarN) r?rrr/rrSr@rrrrKeyError)rr table_datasetr*rs r4test_write_dataset_with_scanner_use_projected_schemares HHbhhuRy)*3D2E FEJJu%M"((#45G##G#4G(8'9%I  O   W,=+>u    s )C  Cr)rrc |dk(rtjdtjddgddgddddgdd idgd d d gd ddigd gd}t j ||dz |t j |dz |}|jgd}|jddddgdd ddgdddd gd dddgddgk(sJy)Nrzpyarrow.parquetabc123qrs456rrbuttonrr)relementvaluesstructsscrollwindow)NrYrfizzbuzz)user_ida.dotted.field interactionrr)rzinteraction.typezinteraction.valueszinteraction.structsrr))rr)rrrrr) rrr?rrrrSrrC)rrrrCs rtest_read_table_nested_columnsr~s -. HH(H!5)*A(q6u~t&< >hff-=,> @& EUGg-f= **Ww&v 6C LL:  ;E ?? dq!f"5148A Oh,$T23q J!  rc xddlm}|dz }tjj tj gdtj tj gdtjgddg}|j||ddgd |j|dd tjtjdtj tjdtjg jj}||jdk(sJ|jd j}t!t#|}|Dcgc]}d t%|dz}}t!t'j(}||k(sJycc}w)Nr)rSzslash-writer-xrrrYrr)experiment/A/f.csvzexperiment/B/f.csvrzexperiment/C/k.csvzexperiment/M/i.csvexp_idexp_metarr)r9rsrrrG)rrrr=rz exp_meta=rr)rrSr?rEr$rrrrr=r@rrr"rrCr$rrrHrI)tmpdirrrqdt_tablerCr encoded_pathsr4s r!test_dataset_partition_with_slashrsr% $ $Dxx## "((*- ()+ 4%58@6L NH B  \" yy"((8RXXZ8((:rwwy9;<  hj!  z))(3 33 3q!++-Hc(m$HDLMD[5B#77MMM 4()J J && &Ns8F7cdtjtjdtjdtjdtjdg}gdgdg}tjj ||}t j||d z tj|d z d }|jjj|sJtj||d z d tj|d z d }|jjj|sJtj||g|d z d tj|d z d }|jjj|sJy)NrF)nullabler7TrNrNrb nulltest1rr nulltest2 nulltest3) r?r=r@rBrEr$rlrprrSrrr)rschema_nullablerfrrSs r'test_write_dataset_preserve_nullabilityrsWii bhhj51 bhhj40!23O )F HH  @Ew45jj;.yAG     $ $ + +O << <UGk1)Djj;.yAG     $ $ + +O << <eU^W{%:9Mjj;.yAG     $ $ + +O << yQjj7*9=G     $ $ + +OD + QQ QmU+Ww->yQjj7*9=G     $ $ + +N4 + PP PmU+Ww->y+-jj7*9=G     $ $ + +OD + QQ Qrc dD]_}dD]V}tjtjdtjtjdtjg}gdgdg}tjj ||}t j}|d|z }t j||d|j|| d t j|d }|jD]Z} tj| } | jd jd } | j |usJ| j"||zurZJYby)N)TFrr7rrrbwrite_page_index_r)write_statisticswrite_page_indexrw)rrrxrr)r?r=r@rBrEr$rrrrprSrrlrrrhas_offset_indexhas_column_index) rrrr=rfrrrsrCr%rccs r#test_write_dataset_write_page_indexrs_)R - R YYbhhj)bhhj) +,F 1FHH(((?E..0K#45E4F!GGH    (;;%5%5<(= **Xi8C  R++D1''*11!4**.>>>>**.>AQ.QQQQ  R- RRrctjtjgdtjgdgddg}|dk(r6tj||dz d tj |dz d }n!|d k(rtj |}nt |jdjjgd gd d k(sJ|jdgjjgdgdd k(sJ|jtjddkjdjjgdgdd k(sJtjjtjgdtjtjgdgddg}tj |}|jdg}|jj}|dgdk(sJ|dgdk(sJ|jdg}|jj}|dgdk(sJ|dgdk(sJy)N)rYrrrr)r\r[r\r[rrrorrcrrrri)r[r[r\r\rr)ror)r descending)rr\r\r[r[)rrrYrrr)r[r[r\r)rr0r0rr)rcarrfoobarr[r\)r[r)rr0r0r)rrrr)r[r&)r?rrrrrSrr"rr#rrr@rEr$rB)rrhrr sorted_tabsorted_tab_dicts rtest_dataset_sort_byrsJ HH ! *+  !E ~ $u= ZZ$u 5 5 ZZ !! ::h  ( ( * 4 4 6)!;   ::/0 1 : : < F F H)!M   99bhhx(1, . 6 6 hj!   HH  RXXZ0 01"3Z ! E E B012J ))+557O 3 = 00 0 3 #B BB B/01J ))+557O 3 = 00 0 3 #B BB BrcNtjdgdi}tjj}|j d}|dz }t j ||||t jd}tjj|}t j|| j}||k(sJ|d z }t||t|j} t| d k(sJ| d } t| j} | d | dk7sJ| d| d c| d <| d<| j| t jd} tjj| } t j|| j}||k7sJ|tjdgdik(sJt!j"t$d5t j|| j}dddy#1swYyxYw)zwCheck that checksum verification works for datasets created with ds.write_dataset and read with ds.dataset.to_tabler[rT)write_page_checksum correct_dir)r9rsrrr)default_fragment_scan_optionsr corrupted_dirrr$F)rrYrrzCRC checksum verificationrN)r?rrSrrprrrrrrhiterdirr bytearray read_bytes write_bytesrrr:)r table_origpq_write_formatroriginal_dir_pathpq_scan_opts_crcpq_read_format_crc table_checkcorrupted_dir_pathcorrupted_file_path_listcorrupted_file_pathbin_datapq_scan_opts_no_crcpq_read_format_no_crc table_corruptrOs r1test_checksum_write_dataset_read_dataset_to_tablerGs73 -.Jjj224O#66 7"M -/ "" 44#')55&668**!hj  $$ $!?2  23 $$6$>$>$@A ' (A -- -215,779:H B<8B< '' '!)"x|HRL(2,##H-77#(*JJ88&99;JJ#8::B(* J && & BHHc<%89 99 9 w&A B JJ %  (* s ,&HH$cd}d}tjt5}tjj j dddd|tjvs|t|jvsJtjj }d}tjt|5|j ddddy#1swYxYw#1swYyxYw)NzImake_write_options() should be called on an instance of ParquetFileFormatzqdescriptor 'make_write_options' for 'pyarrow._dataset_parquet.ParquetFileFormat' objects doesn't apply to a 'int'+z;make_write_options\(\) takes exactly 0 positional argumentsr) rrrr?rSrrpr_r')msg_1msg_2excinfopformatrHs rtest_make_write_options_errorrs-E(E y !""11)<hj     %#s!4 44 49 - +,-sA99BBrr)rrr(rr+rHr~rQr{sysrrUrfrshutilr urllib.parsernumpyrAr rrr?r'r(r pyarrow.csvrrrc pyarrow.json pyarrow.librpyarrow.tests.utilrrrr r rr3rrSrpyarrow.parquetrrlmark pytestmarkr r;rIrVfixturernrrrrr r*r2rFrPrTrXrZr`rr parametrizer_tuplerrrrrrr rrr-r4s3rGr^rarkrqrxr}rrrrrrrrrrrrrrrrrrrr-r4r:r<rFrOrRrWr[r]r_rdrhrorryr|rrrrrrrrrrrrrrrrrrrrrrrrrr;rr rr"r&r+r1r4r6r;r?rJrMrOrUrYr`rcrirkrnrrrwrrrrrrrrrrrrrskipifplatformrrrrrrrrrrr rrrr!r$r.r5r8r?rDrKrPrmrprrrrrrrrrrrrrrrrrr rrrrrr r*r+r.rErJrRr^rbrdrgrur|r~rrrrrrrrrrrrrrs$   ,,,  [[ ?? K +%D@h0 0f     H+ +\DGDGN),8989v"" -I-I`%%$--, 5 5 O OA!HF$F8 8 %8 #(8 8 v#" 6&&R$$"66r$TNJ2,BOOH-$$/t}5E"6E"P44$5-5-pH2H,">">J"77@ % %9N9Nx:))" : :%0%0P"5Jp''T7777)),&.&.R$$6''>   3 3F  F  @ACICI<+eT];  @AC@C<@@  @ACa,Ca,H  @ACR,CR,jJ? ((2N + ; =O"6JHH(*8,>@"II IILL $$$<<&JJ&77&  ,--==*"?(DJ" 67&"!#!#H??,,, NN. A+v)>?5$-8+eT];)i  o& ?# !< ,62<9@62r;;<DD& : :::,,0?=?=D//992==6FF  99( 4?%DM 2@6)6)r"$,#M#ML 3 3,*  "  $)   (H* : A0  (  (  ,D2@0G@) ,/  ,/^ ,!!CLLG+DF!F!(!!2..*  $344$ ; ;33N @ @ GG* D D = ==0:?:?zBB&(?C3& P PF**>)")"=&HHVF"-#-#`; O!<!%$K>K>\%$*1*1Z $ 2#5676"'J==.R:R@%$-C-C`FR'&5Ct B" B  B  Bs[AT3AT8AT?AT+TAT T AT TATTATTAT(T'AT(T+AT6T5AT6