K iXddlZddlZ ddlZddlZddlZddlm Z m Z ddl m Z ddl mZ ddlmZddlmZmZmZ ddlZddlmZddlmZmZej:j Zej:j,dZej:j,dZ ej:j,d Z!ej:j,d Z"ej:j,d Z#ej:j,d Z$ej:j,d Z%ej:j,dZ&ej:j,dZ'ej:j,dZ(ej:j,dZ)ej:j,dZ*ej:j,dZ+ej:j,dZ,ej:j,dZ-ej:j,dZ.ej:j,dZ/ej:j,dZ0ej:j,dZ1ej:j,dZ2ej:j,dZ3ej:j,dZ4ej:j,dZ5ej:j,dZ6ej:j,ej:jodgd ej:jod!d"d#gd$Z8ej:j,d%Z9ej:j,d&Z:y#e$rdZYwxYw#e$rdZYxwxYw#e$rdxZZYuwxYw)'N)LocalFileSystemSubTreeFileSystem)guid)Version) _read_table_test_dataframe _write_table)_roundtrip_pandas_dataframealltypes_samplecjtd}|dz }tjj|}d|jj vsJt ||tj|j }d|vsJtj|djd}|dddddd d gk(sJy) N'sizepandas_roundtrip.parquetspandasutf8 index_columnsranger)kindnamestartstopstep) r paTable from_pandasschemametadatar pq read_metadatajsonloadsdecode)tempdirdffilename arrow_tablerjss g/mnt/ssd/data/python-lab/Trading/venv/lib/python3.12/site-packages/pyarrow/tests/parquet/test_pandas.py#test_pandas_parquet_custom_metadatar*7s e $B33H((&&r*K  **33 33 3h')22H   HY'..v6 7B o G,0-.,-$/#0 00 0c tjtjdtjtjdtjtjdtj g}t jtjdtjtjdtjgdd}t jdd gd dgddgd}tjj||d }tjj||d }|jj|jd rJ|jj|jsJtj|dz |}|j!||j!|y)Nintfloatstringdtype)ABBAEDDAACDC)r-r.r/g?F)rpreserve_indexT)check_metadatazmerged.parquet)r)rrfieldint16float32r/pd DataFramenparangeuint8rrequalsr ParquetWriter write_table)r$rdf1df2table1table2writers r):test_merging_parquet_tables_with_different_pandas_metadatarJKsbYY  # "**,' 299;'F ,,yy"((+1BJJ/* C ,,1vt, C XX ! !#fU ! KF XX ! !#fU ! KF}}##FMM$#GG G ==   .. .   g(88 HF v vr+c td}tjjt t |j |j dddddg|_|dz }tjj|}|jjJt||tj|}|j}t!j"||y)N rlevel_1level_2namesr)r r= MultiIndex from_tupleslistzipcolumnsrrrrpandas_metadatar r read_pandas to_pandastmassert_frame_equal)r$r%r&r' table_readdf_reads r)%test_pandas_parquet_column_multiindexr^hs b !B** SRZZ"- ./)$+BJ 33H((&&r*K    - - 99 9h')J""$G"g&r+ctd}|dz }tjj|d}|jj }|drJ|dsJt ||tj|}|jj }|drJ|jj}|jj|k(sJ|j}tj||y)Nr rrFr8rrV) r rrrrrWr rrXrrYrZr[)r$r%r&r'r(r\r r]s r)rrrrgr )r|r%r'rks r)!test_pandas_can_write_nested_datar}si!  A  #Q '   D 4 B((&&r*K "Dd#r+c|dz }d}tjtj|tjtj|tj tj|tj tjj|dkDgdd}tjj|}|jd5}t||d dddtj|j!}t#|}|j%}t'j(||y#1swY^xYw) Nzpandas_pyfile_roundtrip.parquetr7r1r)foobarNbazqux)int64r<float64boolrpwbrcrd)r=r>r?r@rr<rrandomrandnrrropenr ioBytesIO read_bytesrrYrZr[) r$r&rr%r'fr|r\r]s r)$test_pandas_parquet_pyfile_roundtriprs::H D 4rxx099T499T4 %)5  B((&&r*K t 4[!U34 ::h))+ ,DT"J""$G"g&44s E  Ecd}tjjdtjtj |tj tj |tjtj |tjtj |tjtj |tjtj |tjtj |tjtj |tjtj |tjtj |tjtjj|dkDd }|dz }t j"j%|}dD]B}t'||d|t)|}|j+}t-j.||DdD]B}t'||d| t)|}|j+}t-j.||Dd D]q} | d k7r*t j0j2j5| s2t'||d| t)|}|j+}t-j.||sy) Nr rr1) rAuint16uint32uint64int8r;int32rr<rrr)TFrc)reuse_dictionary)rewrite_statistics)NONESNAPPYGZIPLZ4ZSTDr)re compression)r?rseedr=r>r@rArrrr;rrr<rrrrrr rrYrZr[libCodec is_available) r$rr%r&r'rr\r]rrs r))test_pandas_parquet_configuration_optionsrs DIINN1 4rxx0))D 2))D 2))D 2 $bhh/4rxx04rxx04rxx099T499T4 %)  B33H((&&r*K'+[(E$2 4 * &&( b'* +*+[(E&6 8 * &&( b'* +A+ 6 !FFLL--k: [(E!, . * &&( b'*+r+ctd}tjddt|zd|_d|j_t |ddi}tj||y)NdrrrLrflavorspark) rr?r@lenindexrr rZr[)r%results r)+test_spark_flavor_preserves_pandas_metadatarsT c "ByyBRL"-BHBHHM (h-@ AF&"%r+c&tjddtjdditjdtjdtjdtjdid}t|dz }tj|dj dd }t j j|}t||t|}|j}tj||y) Nz2017-06-30 01:31:00g*_c@z2017-06-30 01:32:00)closetime data.parquetzdatetime64[us]r1rFdrop) r= Timestampstrr> set_indexrrrr rrYrZr[)r$r|pathdfxtdfxr' result_dfs r) test_index_column_name_duplicaters LL. /1C LL. /1C LL. /%2 LL. /%2  D w' (D ,,t#3 4 > >vE > RC 88   $Dtd#K%%'I)S)r+cd}tt|}tjj gd|gddg}tj d|i|}t jj|}|dz }t||t|}|j|sJ|j}tj||y) Nr0)rrrfoobar some_numbersrPnumbers)rzdup_multi_index_levels.parquet)rTrr=rR from_arraysr>rrrr rrBrYrZr[) r$num_rowsrrr%tabler& result_tablers r) test_multiindex_duplicate_valuesr8sH5?#G MM % % (( & E y'*% 8B HH  $E99H!x(L << %% %&&(I)R(r+cd}tjtj|dddd}t |dz }|j }t j||y)Ncarat cut color clarity depth table price x y z 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 2.31 0.29 Premium I VS2 62.4 58.0 334 4.20 4.23 2.63 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 2.75 0.24 Very Good J VVS2 62.8 57.0 336 3.94 3.96 2.48 0.24 Very Good I VVS1 62.3 57.0 336 3.95 3.98 2.47 0.26 Very Good H SI1 61.9 55.0 337 4.07 4.11 2.53 0.22 Fair E VS2 65.1 61.0 337 3.87 3.78 2.49 0.23 Very Good H VS1 59.4 61.0 338 4.00 4.05 2.39\s{2,}rpythonsep index_colheaderenginezv0.7.1.parquet)r=read_csvrrrrYrZr[datadirexpected_stringexpectedrrs r)&test_backwards_compatible_index_namingrNs[ KO{{2::o6I%)!HFH "22 3E __ F&(+r+cd}tjtj|dgdddj }t |dz }|j }tj||y)Nrrcutcolorclarityrrrzv0.7.1.all-named-index.parquet) r=rrr sort_indexrrYrZr[rs r)1test_backwards_compatible_index_multi_level_namedrcsj KO{{ ?#-jl  "BB CE __ F&(+r+c.d}tjtj|dgdddj }|j j gd|_t|dz }|j}tj||y) Nrrrrrr)rNrzv0.7.1.some-named-index.parquet) r=rrrrr set_namesrrYrZr[rs r)6test_backwards_compatible_index_multi_level_some_namedr|s KO{{ ?# !<jl  ^^--.FGHN "CC DE __ F&(+r+c ^tdttjkrtjdtj gdgdtj dddd }tjjgd tj dddgd dg |_ |d z }t|}|j}tj||t|d g}|j}tj||d gjdy)Nz2.2.0zRegression in pandas 2.2.0rr0)g?g?g333333?z 2017-01-01r0zEurope/Brussels)periodstzabcrrPz'v0.7.1.column-metadata-handling.parquetrrqTr)rr= __version__pytestskipr> date_rangerRrrrrYrZr[ reset_index)rrrrrs r)2test_backwards_compatible_column_metadata_handlingrsw72>>22  01||lmmL!8IJ LMH]]..  |Q3D E Go/HN > >D  E __ F&(+  se E __ F&(C5/"="=4"="HIr+c"tjddgddggddg}|djd|d<|jdg}tj j |}t j}tj||tj|jj}t|jtjsJ|jj!|jsJy) Nrrrdc1c2rqcategory)r=r>astyperrrrrgrrDrXrhrY isinstancerCategoricalIndexrB)r%rbosref_dfs r))test_categorical_index_survives_roundtriprs SzC:.t EB$xz*BtH tf B HH  $E   !CNN5# ^^CLLN + 5 5 7F fllB$7$7 88 8 <<  rxx (( (r+ctjdtjgdgddi}tjj |}tj }tj|||j}tj|j}tj||y)Nr)rrrr)rrrT) categoriesordered)r=r> CategoricalrrrrgrrDrhrXrYrZr[)r%rrcontentsrs r))test_categorical_order_survives_roundtriprs sBNN$HI JB HH  $E   !CNN5#||~H ^^H % / / 1F&"%r+ctjdgdzdgdzd}|jddd}tjj |}tjj |}tj }tj||ddtj|j}|dj|dsJ|d j|d sJy) Nrg?)colr-rrcrL)re chunk_sizerr) r=r>rrrrrgrrD read_tablerhrB)r% df_categoryr table_catrlrs r)*test_pandas_categorical_na_type_row_groupsrs tfslC53;? @B))JzBCK HH  $E$$[1I   !CNN9c5R@ ]]3<<> *F !9  E!H %% % !9  E!H %% %r+cNtjgdd}gd}tjdtjj ||i}t j}tjt j||tj|jj}|jjdk(sJ|jj j"|k(j%sJt'j(||y)N)rrrrrrMrrr1)rrrx)rr)r?arrayr=r>r from_codesrrgrrDrrrhrYrr2catrallrZr[)codesrr%rlrs r)!test_pandas_categorical_roundtriprs HH+7 ;E&J sBNN55 *6&' (B   !CNN288B<% ]]3<<> * 4 4 6F 88>>Z '' ' HHLL # #z 1 6 6 88 8&"%r+cttjtdkrtjdtj dgdid}|j d}tj dgdi}|j d}tj|djtj|djk(sJtj|djjjjtj|djjjjk(sJt|dz }tjtj ||tj"|j%}t'j(||y) Nz1.3.0z:PyArrow backed string data type introduced in pandas 1.3.0r)rrrzstring[pyarrow]r1rz cat.parquet)rr=rrrr>rrr to_pylistrrvaluesrrrDrrrYrZr[)r$rErFrrs r))test_categories_with_string_pyarrow_dtyper sZr~~!11 PQ ,,23;L MC **Z C ,,23 4C **Z C 88CH  ' ' )RXXc#h-?-I-I-K KK K 88CHLL++22 3 = = ?288 C &&D((1  44 4 w& 'DNN288C=$' ]]4 * * ,F&#&r+c0tjdgdd}|djd|d<tj|}t j |t|dz dgt jt|dz j}tj|dg|dgt j |t|d z t jt|d z j}tj|dg|dgt j|t|d z t jt|d z j}tj|dg|dgy) NrrpartrrInt64case1rpartition_colscase2r) r=r>rrrrwrite_to_datasetrrrYrZr[rD)r$r%rrs r)5test_write_to_dataset_pandas_preserve_extensiondtypesr sF s95 6B5   )BuI HHRLE s7W$%vh]]3w01 2 < < >F&%/2ug;7s7W#456 ]]3w01 2 < < >F&%/2ug;7NN5#g678 ]]3w78 9 C C EF&%/2ug;7r+cjtjgdgdd}tjgdd|_t j |}|ddgj }|djd |d<tj|t|d z dg tjt|d z j}tj||tj|t|d z tjt|d z j}tj||tj|t|d z tjt|d z j}tj||y)N)rrrrrridxrrrrrrrr)r=r>IndexrrrcopyrrrrrrYrZr[rD)r$r%rdf_catrs r)+test_write_to_dataset_pandas_preserve_indexr sM yA BBxxe4BH HHRLE  % % 'FF^**:6F6N s7W$%vh]]3w01 2 < < >F&&)s7W#456 ]]3w01 2 < < >F&"%NN5#g678 ]]3w78 9 C C EF&"%r+r8)TFNmetadata_fname _metadata_common_metadatacd}d}|tz }|jg}g}g}t|D]} t|| } t j t j| |z| dz|zdd| _|| dz } tjj| | } | jd} | jjJt| | |j!| |j!| |j!| tjj | } t#j$| j||z t#j&|}d d g}|j)| j+}t j,|Dcgc]}|| c}}|d ur| jj.nd|j_t1j2||ycc}w)Nr7)rrrr1rrz.parquetr`rArprqF)rmkdirrrr=rr?r@rrrrreplace_schema_metadatarrr appendrwrite_metadataParquetDatasetrXrYconcatrrZr[)r$r8rnfilesrdirpath test_dataframespathsir%rrtable_for_metadatadatasetrVrrrs r)(test_dataset_read_pandas_common_metadatar09sF DG MMOI F E 6] T *88 IIa$hQ$g >W A3h'$$R$G--d3||$$,,,UD! b T#(-- >.(//>1IJ(G "G   1 ; ; =Fyyf5!G*56H'u4 $ NN&(+6s! G6ctjdgdi}|dz }t||tjdt t |t}|jtj|sJy)Nrrr) filesystem) r=r>r rrXrrrrBrr)r$r%r&rs r)%test_read_pandas_passthrough_keywordsr3ksi sI& 'B'HX ^^$S\?3DEF ==" && &r+cztjtjddgddggtjddgd}|dz }tjtj tj }tj tjd |tjd tj g}tjj||}t||tj|j}tj||y) N)id something)value2else)r5 something2)valueelse2rr)col1col2rr<r=)r=r>Seriesrmap_r/rr:rrr rrXrYrZr[)r$r%r&udtrr'rs r)test_read_pandas_map_fieldsrA{s  "4 5 !#5 6   5%.)  B'H ''"))+ryy{ +C YY-rxx /LM NF((&&r62Kh' ^^H % / / 1F&"%r+);rr!numpyr? ImportErrorrpyarrowr pyarrow.fsrr pyarrow.utilrpyarrow.vendored.versionrpyarrow.parquetparquetrpyarrow.tests.parquet.commonrrr pandasr=pandas.testingtestingrZr r mark pytestmarkr*rJr^rarnrrrtr}rrrrrrrrrrrrrr rr parametrizer0r3rAr+r)rRs $ 9, << ?[[ 00&8''$''0'' = ='' $ $ ''.)+)+X&&**:))*,,(,,0,,2JJ0 ) ) & &&&"&&$''.88(&&0)+>?)K9K+LM,,N@,,^ ' '&&K B BNBs3NN)N7N&%N&)N43N47 OO