K iTlddlZddlZddlmZddlZ ddlZddlZddl Z ddl m Z m Z ddlmZddlmZ ddlmZddl mZ ddlZddlmZddl mZej8j&Zej8j,dZd Zej8j,ej8jAd gd e jBd d dd ddfgd e jDd d dd ddfgd e jFd d dd ddfgd e jHdd dd ddfgde jJd ddd ddfgde jLd ddd ddfgde jNd ddd ddfgde jPdddd ddfgde jRdddd ddfgde jTdddd ddfdde+dddge jXdde+dj[dd ddfgde j\dd d!dd"dfgd#e jXdd$d%d ddfg d&Z/d'Z0ej8j,d(Z1d)Z2d*Z3d+Z4d,Z5d-Z6d.Z7d/Z8ej8j,d0Z9d1Z:ej8jwd2d3Zej8j,d6Z?d7Z@ej8jej8jd8ZCd9ZDd:ZEej8jAd;d<eFd=id<eFd=idfd<eFd=id>eFd=id?feFd=eFd=d@dAeFd=idBffdCZGej8jdDZIdEZJdFZKy#e$rdZYwxYw#e$rdZY~wxYw#e$rdxZZY}wxYw)GN) OrderedDict)_check_roundtripmake_sample_file)LocalFileSystem)util) _write_table)alltypes_samplec td}|jt|j}tj j ddt||_t|}t|j}|j}t||jt|k(sJ|j|dzk(sJ|jdk(sJ|jdk(sJd|j vsJt#|j$t&sJt#|jt(sJ|j*}|j*|usJt||dzk(sJt||d}t||j,|jdk(sJ|j.dk(sJ|j0dk(sJ|j0dk(sJ|j2d k(sJ|j4d k(sJ|d }|j6j8d k(sJt;j<t>5||dzdddt;j<t>5|d dddtA|jD]}|jC|}t#|tDjFsJt|tA|jD]:}|jI|} t#| tDjJsJt| <t;j<t>5|jCd dddt;j<t>5|jC|jdzddd|jCd}|jt|k(sJ|j|dzk(sJ|jLdkDsJt;j<t>5|jId } dddt;j<t>5|jI|dz} ddd|jId} | jNdk(sJ| jPdk(sJ| j2d k(sJ| jRdk(sJ| jTdk(sJ| jVdusJt#| jXtDjZsJ| j\dk(sJt_| j`ddhk(sJ| jbdusJ| jdJ| jfdkDsJ| jhdkDsJ| jjdkDsJt;j<tl5| jndddt;j<tl5| jpdddy#1swYxxYw#1swY^xYw#1swYxYw#1swYaxYw#1swYxYw#1swYxYw#1swYxYw#1swYyxYw)N'sizecolumnsri@B2.6z parquet-cppBOOLEANNONEFLOAT16boolTSNAPPYPLAINRLEF)9r reindexsortedrnprandomrandintlenindexrmetadatareprnum_rows num_columnsnum_row_groupsformat_version created_by isinstanceserialized_sizeintdictschemanamemax_definition_levelmax_repetition_level physical_typeconverted_type logical_typetypepytestraises IndexErrorrange row_grouppqRowGroupMetaDatacolumnColumnChunkMetaDatatotal_byte_size file_offset file_path num_valuespath_in_schema is_stats_set statistics Statistics compressionset encodingshas_dictionary_pagedictionary_page_offsetdata_page_offsettotal_compressed_sizetotal_uncompressed_sizeNotImplementedErrorhas_index_pageindex_page_offset) dffilehncolsmetar/col col_float16rgrg_metacol_metas i/mnt/ssd/data/python-lab/Trading/venv/lib/python3.12/site-packages/pyarrow/tests/parquet/test_metadata.pytest_parquet_metadata_apir]7s e $B F2::. /Byy  G#b' :BH R E  OE >>DJ ==CG ## #   uqy (( (   ! ## #   % '' ' DOO ++ + d**C 00 0 dmmT ** *\\F ;;&  v;%!) ## #L )CI 88rzz!} $$ $  # #q (( (  # #q (( (  # #q (( (    )) )    '' ')K  # # ( (I 55 5 z "uqy z "r D''(..$'2#6#6777 W ,,- C~~c*Hh(>(>? ?? N   z " r z "0 t**Q./0nnQG   s2w && &   %!) ++ +  " "Q && & z "&>>"%& z "->>%!),-~~a H   1 $$ $    ## #  ! !Y .. .   % '' '  " "f ,, ,  D (( ( h))2== 99 9   8 ++ + x!! "w&6 66 6  ' '5 00 0  * * 22 2  $ $q (( (  ) )A -- -  + +a // / * +  * +#""##g00&&--$  ##s`' V&V3WW WW'" W4 X&V03V=W  WW$'W14W=X ctjdgdi}tj||dz tj|dz }|j j djdjy)Narrztest_metadata_segfault.parquetr) patabler< write_table ParquetFiler$r;r>rF)tempdirrc parquet_files r\test_parquet_metadata_lifetimerhsb HHc9% &ENN5'$DDE>>',L"LML##A&--a0;;)datar6r3 min_value max_value null_countrCdistinct_count)rrrNINT32rroINT64)rrrNror)皙g@gffffff@N皙@FLOATrrrsDOUBLErbaaa BYTE_ARRAYrizutf-8)TFFTTrFTr)bs12Nsaaarzr{c\tjd|i}tjtjd|g} tj j || d} t| } | j} | jd} | jd}|j}|jsJt||j|sJt||j|sJ|j |k(sJ|j"|k(sJ|j$|k(sJ|j&|k(sJy)NrjF)r/safer)pd DataFramerbr/fieldTable from_pandasrr$r;r>rF has_min_max_closeminmaxrmrCrnr3)rjr6r3rkrlrmrCrnrSr/rcrTrVrZr[stats r\"test_parquet_column_statistics_apirs\ vtn %B YY./ 0F HH F ?E U #E >>DnnQG~~a H   D     $) ,, , $) ,, , ??j (( ( ??j (( (   . 00 0    .. .ric|tjk(rt||z dkS|tjk(rt||z dkS||k(S)NgHz>gvIh%<=)rbfloat32absfloat64)r6leftrights r\rrsP rzz|4%< 4''  4%< 5((u}rictjdtjtjgdi}t t j j|j}|jdjdjjrJ|jdjdjjJy)Ntzdatetime64[ns])dtyper)r~rSeriesNaTrrbrrr$r;r>rFrr)rSrVs r\&test_parquet_raise_on_unset_statisticsrs sBIIrvvh6FGH IB BHH004 5 > >D~~a ''*55AAA A >>!  # #A & 1 1 5 5 == =ricddtjfddtjfddtjft j ddddt j d dddtj d ft j ddddt j d dddtjd ft jd d dddddt jd d dddddtjd ft jd d dddddt jd d dddddtjd ft jd d dt jd d dtjftjdtjdtjddfg }t|D]\}\}}}tjj!tj"||g|gdg}t%|d|dz }t'j(||dt'j*|}|j,j/dj1dj2} | j4|k(sJ| j6|k(rJy)N l5f|~W luähnlichu öffentlichrrwmsusiz20.123z20.124 rr6rWexamplez.parquetrversion)rbuint64uint32utf8datetimetimetime32time64 timestampdatedate32decimalDecimal decimal128 enumerater from_arraysarraystrr<rdrer$r;r>rFrr) rfcasesimin_valmax_valtyprpathpfstatss r\%test_statistics_convert_logical_typesrsZ& 4*biik *- 3mmBAt,hmmBAt.LiiommBAt,hmmBAt.LiioaQ1d;aQ1d;ll4 "aQ1d;aQ1d;ll4 "mmD!R(mmD!R(iikooh'ooh'mmB"$! %E('0&6$" "GWc HH "((GW+=C"H!I"' *7wqc223 q$. ^^D ! %%a(//2==yyG###yyG###$ric tjjtdtjgdfdtjgdfg}t ||dz t j|dz }dD]@}|jdj|}|jdusJ|j@Jt ||d z d t j|d z }dD]@}|jdj|}|jd usJ|j@Jt ||d z dg t j|d z }|jdjd}|jdjd }|jdusJ|jd usJ|jJ|jJy)Nr_r`rvr_rvc data.parquet)rrrTz data2.parquetF)write_statisticsz data3.parquetr) rbr from_pydictrrrr< read_metadatar;r>rErF)rfrcrVrWcccc_acc_bs r\%test_parquet_write_disable_statisticsrs HH  "((9% & "((?+ ,   E .01  Gn4 5D) ^^A  % %c *$&&&}}((() /1EJ  Go5 6D% ^^A  % %c *%'''}}$$$% /1SEJ  Go5 6D >>!  # #A &D >>!  # #A &D    $$ $    %% % ?? && & ?? "" "ric0tjd}|jddddk(sJtjddd}|jddddk(sJtjdtj fdtj fg}tjd d tjdd f}tjj ||\}}|d k(sJ|d k(sJtjj|||}||k(sJtjj|d d}tjdddtjd ddf}||k(sJtjj|d}|dk(sJtjj |ddk(sJtjt5tjj|ddddtjtd5tjd dtjddf}tjj ||dddy#1swY|xYw#1swYyxYw)NrF column_index descending nulls_firstrT)rrr_rvrr)rvr)r_ ascendingat_end)r_rat_start)null_placement)rr)r_znot a valid sort orderzinconsistent null placementmatch)r) r< SortingColumnto_dictrbr/int64 to_ordering from_orderingr7r8 ValueError) sorting_colr/ sorting_cols sort_orderrsorting_cols_roundtrippedexpectedempty_sorting_colss r\test_parquet_sorting_columnr5s""2&K    %   ""14HK    %   YYbhhj)C+<= >F t, u-L"$!1!1!=!=fl!SJ B BB B X %% % " 0 0 > > N!, $ 44 4##11*:2GL u$? t>H 8 ## #))77C  ## #   ' ' 3~ EE E z "R &&v0OQR z)F G;   QD 1   QE 2  $$V\: ;;RR;;s$!J(AJ J  Jctjtjdtjfdtjfgtjd}t j ddt j ddg}tj j ||\}}|d k(sJt|dk(sJ|dd k(sJ|d d k(sJy) Nxyr_rvrTrrFr)za.xrr)rvr)rbr/structrr<rrr")r/sorting_columnsrrs r\"test_parquet_sorting_column_nestedrjs YY YYbhhj)C+<= > XXZF t, u-O "$!1!1!=!=fo!VJ X %% % z?a   a=1 11 1 a=. .. .ric2tjgdgdd}tjdddtjdd f}tj}t ||| tj |j}tj|}||jdjk(sJ|j}|jd d k(sJ|jd dk(sJ|jddk(sJy)Nr`rrrTrrF)rr)rr'rr&rar() rbrcr<rBufferOutputStreamr BufferReadergetvaluerr;rrget)rcrwriterreaderr$ metadata_dicts r\!test_parquet_file_sorting_columnsr|s HH9?; has_offset_indexhas_column_index)r rcrrr$rs r\test_parquet_file_page_indexrs) 7#y)*&&(UF5EF!23##F+    " ) )! ,""&6666""&6666 7ricddg}t|dz }tjgdgdddgddgdd ggd }tjj |}d}|D]X}g}t j|t||z | |d j|||d }E|j|d Zt|d 5}|j|dddt j|}|j} |j} | D]} | dk7s | | | | k(rJ| ddk(sJ| ddk(sJ| ddk(sJ| dd k(sJ| dd kDsJy#1swYxYw)NzARROW-1983-dataset.0zARROW-1983-dataset.1 _metadatar`)rrrraro)onetwothreemetadata_collectorrwbr,r'r&rr()rr~rrbrrr<rd set_file_pathappend_row_groupsopenwrite_metadata_filerr) rf filenamesmetapathrSrc_metafilenamerVfmd_mdkeys r\test_multi_dataset_metadatar&s')?@I7[()H a&1a&1a&) B HH  $E E- uc'H"45*. 0 Qh' =GE  # #DG ,- h % !!!$%   H %D B --/C' # #s8r#w& &&' }  "" " z?a    A %% %  !Q && &  1 $$ $%%s EE(czt|dz }tjddg}tj||tj |}t|dz }tjddg}tj||tj |}t|dz }tjddg}tj||tj |} t |t |k(sJt |t |k(sJt |t | k7sJy)N metadata1r_rrvr metadata2 metadata3)rvr)rrbr/r<write_metadatarhash) rfpath1schema1 parquet_meta1path2schema2 parquet_meta2path3schema3 parquet_meta3s r\test_metadata_hashingr8s +% &Eii)9:;Ggu%$$U+M +% &Eii)9:;Ggu%$$U+M +% &Eii)9:;Ggu%$$U+M  $}"5 55 5  $}"5 55 5  $}"5 55 5riz#ignore:Parquet format:FutureWarningct|dz }tjddg}tj||tj |}|jj }|j|sJ|jrd|jvsJdD]I}tj|||tj |}|dk(rdnd}|j|k(rIJtjd d gd d gd |}tj||dz tj t|dz }tj||||gtj |}|jd k(sJd} tjt| 5tjtjddg|||gdddy#1swYyxYw)Nr$r)r*s ARROW:schema)1.0z2.4rrr:rrrg?g?rrrrzLAppendRowGroups requires equal schemas. The two columns with index 0 differ.r)r_r)rvnull)rrbr/r<r-rto_arrow_schemaequalsr$r)rcrdr(r7r8 RuntimeError) rfrr/ parquet_metaschema_as_arrowrexpected_versionrcparquet_meta_multmsgs r\test_write_metadatarDs w# $D YY(89 :Ffd###D)L"))99;O  ! !& )) )o&>&>>>>)? &$8''- %,u$45%**.>>>> ? HHAq6R1& AENN5'N23##C.(@$ABL, )E((.  + +q 00 0 2C |3 /   II~}5 6 lL%A    s 0GGctjtjddgdddzi}tjt dg|}t |y) Nf0doublelargerirrr)rbr/rrcr:r) my_schemarcs r\test_table_large_metadatarJIsL 288D(34$+S8^#<>I HHeBi[ 3EUric0td}t|}t|}t||jddd}t|jt j sJ|jj|jsJ|j|jk(sJ|jj|jsJ|j|jk(sJ|jdk7sJ|jj|jrJ|j|jk7sJt|jdt jsJ|jdj|jdsJ|jd|jdk(sJ|jdj|jdrJ|jd|jdk7sJ|jddk7sJy)Nr r rzarbitrary objectrr) r rrr+r/r< ParquetSchemar= ColumnSchema)rSrTfileh2fileh3s r\test_compare_schemasrPRs e $B R E b !F bCaC1 2F ellB$4$4 55 5 <<  u|| ,, , <<5<< '' ' <<  v}} -- - <<6== (( ( <<- -- -||""6==11 1 <<6== (( ( ell1or 77 7 <<? ! !%,,q/ 22 2 <<?ell1o -- -||A%%ell1o66 6 <<?ell1o -- - <<?0 00 0ric"d}tjtj|tjj |dddg}|dz }t jj|}t||tj|}tj|d}|jj|sJ|jj|sJ|jjd |jd k(sJy) Nd)r#valuesr#rSrz test.parquetT) memory_mapspandas)r~rraranger randnrbrrrr< read_schemar/r=r$)rfNrS data_pathrcread1read2s r\test_read_schemar\ms A 1))//!$" $B .(I HH  $E " NN9 %E NN9 6E <<  u %% % <<  u %% % <<  +u~~i/H HH Hric\tjdtjgdi}tj||dz tj |dz }|j }t|ddk(sJt|ddddk(sJ|ddddd Jy) Nr_rrr row_groupsrrrrF)rbrcrr<rdrrr")rfrcr$rs r\#test_parquet_metadata_empty_to_dictr_s HHc288BW56 7ENN5'N23. 89H$$&M }\* +q 00 0 }\*1-i8 9Q >> >  &q )) 4Q 7 E MM Mric  d}d}tjt|Dcic]+}t|tj j d-c}}tj5}tj|||j}dddtjtj}tjtj|}t|D]}|j|tj5}|j||j}dddtjtj|}ycc}w#1swYxYw#1swYCxYw)Nrwir)rbrcr:rrr rVrr<rdrrrrr)NCOLSNREPEATSrrcoutbuforiginal_metadatar$s r\"test_metadata_exceeds_message_sizerfs3 EH HH5<Hac!fbiioob11H IE   C uc"lln(()=> 45H 8_6""#456   C$$S)lln 45HIs0E3,'E8 "F8FF ctjdgdi}d}t||z }d|z}tj||tj ||z }|j }tj |j|sJtj |tj|sJtj |d|j|sJtj|j|sJtj|tj|sJtj|d|j|sJtj|5tj |tj|sJtj|tj|sJ dddy#1swYyxYw)Nr_r`rzfile:/// filesystem) rbrcrr<rdrr/r=rrWr change_cwd)rfrcfnamerBfile_urir$r/s r\test_metadata_schema_filesystemrms HHc9% &E EGeO$II%HNN5)$%0H \\F  H % , ,X 66 6  o/ 1171A BB B   HWI. 006x0@ AA A >>( # * *6 22 2 >>o/ 117 @@ @ >> HWI. 006v ?? ?  !@ o/1171AB BB~~ o/117@ @@ @@@s A!G++G4ctjdgdi}tj5}tj|||j }dddtj tj}d}tjt|5|jddddy#1swYixYw#1swYyxYw)Nr_r`z#Argument 'other' has incorrect typer) rbrcrr<rdrrrr7r8 TypeErrorr=)rcrcrdrers r\test_metadata_equalsrps HHc9% &E   C uc"lln(()=> 1E y .'  &''  ''s'B="C =C Czt1,t2,expected_errorcol1rcol2z$The two columns with index 0 differ.)rqrrcol3z&This schema has 2 columns, other has 1cttj|}tj|}tj}tj}t j ||t j |||j d|j dt j|j}t j|j}|r;d} tjt| |z5|j|dddy|j|y#1swYyxYw)Nrz(AppendRowGroups requires equal schemas. r) rbrcioBytesIOr<rdseekrer$r7r8r>r) t1t2expected_errortable1table2buf1buf2meta1meta2prefixs r\$test_metadata_append_row_groups_diffrsXXb\F XXb\F ::r$)parquet_test_datadirr$key_value_metadata1key_value_metadata2s r\$test_column_chunk_key_value_metadatars 4 I!JKH",,Q/66q9BB 6S"Q QQ Q",,Q/66q9BB  && &ricd}tjt|d5tjdddtjt|d5tj dddtjt|d5tj dddtjt|d5tjdddtjt|d5tjdddy#1swYxYw#1swYxYw#1swYxYw#1swYmxYw#1swYyxYw)Ncd|dS)Nz Do not call z's constructor directlyr)rs r\rCz.test_internal_class_instantiation..msg saS 788rirGrParquetLogicalTyper?r= FileMetaData) r7r8ror<rGrr?r=r)rCs r\!test_internal_class_instantiationrs9 yL(9 :  y,@(A B    y,A(B C!  ! y,>(? @  yN(; <   !!s;D;#E!EE E,;EEE E),E5)Lrr collectionsrrunumpyr ImportErrorr7pyarrowrbpyarrow.tests.parquet.commonrr pyarrow.fsr pyarrow.testsrpyarrow.parquetparquetr<rpandasr~pandas.testingtestingtmr mark pytestmarkr]rh parametrizeuint8uint16rrint8int16rrrrchrbinaryencodebool_rrrrrrrrr rr&r8filterwarningsrDrJrPr\r_slow large_memoryrfrmrpr:rs3rrrrrir\rsn$# K& 9 <[[ Y#Y#x<  XRXXZ!Q1dC YRYY['1aAtD YRYY['1aAtD YRYY['1aAtD gbggi"aAtD hbhhj'2q!QE hbhhj'2q!QE hbhhj'2q!QE ' T31d ( dCAt c$iu -yryy{ #s4y//8!Q -hbhhj udAq$ 1)"))+ '4At 3)T/U)V/2>> $F#>2;j/$4*54p 7'%'%T64AB( C( V114II( N660 @F '/ eBi659-t4 eBi659-+-Bir+feBi-@-/ 2''.$$@'M B BNBs4N' N5O'N21N25O?O OO