gL i XdZddlZddlmZddlmZddlZddlZddlZ ddl Z ddl m Z m Z ddlmZddlmZddlmZmZmZmZmZddlZddlmZdd lmZdd lm Z m!Z!m"Z"m#Z#m$Z$ ddl%Z%d Z& ddl(Z(d Z)e jTjWd e jTjWdgZ,e jZe j\de jTj_e) xs edd dk(de jTjae dd ge j\de jTj_e& dgdZ1e jZdZ2e jZdZ3e jZdZ4e jZdZ5e jZd Z6e jZejjoejpjrejjoejpjtejjoejpjvejjyd!d"ejjyd#d"ejjyd$d"ejjyd%d"gd&Z= d:d'Z>d(Z?d)Z@d*ZAd+ZBd,ZCd-ZDd.ZEd/ZFd0ZGd1ZHGd2d3ZIGd4d5eIZJGd6d7eIZKGd8d9eIZLy#e'$rd Z&YwxYw#e'$rd Z)YwxYw);z test parquet compat N)Decimal)BytesIO)using_copy_on_writeusing_string_dtype) _get_option)is_platform_windows)pa_version_under11p0pa_version_under13p0pa_version_under15p0pa_version_under19p0pa_version_under20p0)Version)FastParquetImpl PyArrowImpl get_engine read_parquet to_parquetTFz2ignore:DataFrame._data is deprecated:FutureWarningz=ignore:Passing a BlockManager to DataFrame:DeprecationWarning fastparquetmode.data_managersilentarrayz4fastparquet is not installed or ArrayManager is usedreasonTODO(infer_string) fastparquetrstrictmarkspyarrowpyarrow is not installed)paramsc|jSNparamrequests b/mnt/ssd/data/python-lab/Trading/venv/lib/python3.12/site-packages/pandas/tests/io/test_parquet.pyenginer*@s4 ==c:tstjdy)Nr!r ) _HAVE_PYARROWpytestskipr+r)par1]s  ./ r+ctstjdn%tdddk(rtjdt r0|j tj jdd y ) Nzfastparquet is not installedrTrrz.ArrayManager is not supported with fastparquetrFrr)_HAVE_FASTPARQUETr.r/rr applymarkermarkxfailr's r)fpr7ds`  23 ( 6' A DE KK  %Ee  T  r+cdtjgdddtjddgS)NfooABr?r@columns)pd DataFrameIndexr0r+r) df_compatrFqs% <R SSr+c tjtdttddt j dddgdtj d d d }|S) Nabcr:@@float64dtypeTFT20130101r<periods)abdef)rCrDlistrangenparange date_range)dfs r)df_cross_compatr^vsR eeAqk"395$z15  B Ir+ctjtddtjdggdgdgdtt ddtj d d jd tj d d ddtjdggdtjdd tjdtjtjdgd S)NrHrScrSNr`)foobarsbazr=barbazr:rIr<u1rJrKrLrM@@rOrPrQ20130103) stringstring_with_nanstring_with_nonebytesunicodeintuintfloatfloat_with_nanbooldatetimedatetime_with_nat) rCrDrXrZnanrYr[astyper\ TimestampNaTr0r+r)df_fullr|s <<5k #RVVS1 0-,a $IIaO**40YYsCy9"BFFC0' j!< Z( Z("  r+z2019-01-04T16:41:24+0200z%Y-%m-%dT%H:%M:%S%zz2019-01-04T16:41:24+0215z2019-01-04T16:41:24-0200z2019-01-04T16:41:24-0215c|jSr$r%r's r)timezone_aware_date_listr~s ==r+c xsddixsi|r |d<|d<fd} &tj5| | dddy| | y#1swYyxYw)aVerify parquet serializer and deserializer produce the same results. Performs a pandas to disk and disk to pandas round trip, then compares the 2 resulting DataFrames to verify equality. Parameters ---------- df: Dataframe engine: str, optional 'pyarrow' or 'fastparquet' path: str, optional write_kwargs: dict of str:str, optional read_kwargs: dict of str:str, optional expected: DataFrame, optional Expected deserialization result, otherwise will be equal to `df` check_names: list of str, optional Closed set of column names to be compared check_like: bool, optional If True, ignore the order of index & columns. repeat: int, optional How many times to repeat the test compressionNr*ct|D]N}jfi tfi }dvrdjd<t j |Py)Nrmr:rm) check_names check_like check_dtype)rYrrloctmassert_frame_equal) repeat_actualrrrr]expectedpath read_kwargs write_kwargss r)comparez!check_round_trip..compareslv A BMM$ /, /!$6+6F H,59 12  ! !'%'   r+)r ensure_clean) r]r*rrrrrrrrrs ` ``````` r)check_round_triprsD 8M4#8L#K !' X & H   | __  $ FO     s A$$A-cddlm}|j|d}|jjj|k(sJy)zCheck partitions of a parquet file are as expected. Parameters ---------- path: str Path of the dataset. expected: iterable of str Expected partition names. rNhive) partitioning)pyarrow.datasetdatasetrschemanames)rrdsrs r)check_partition_namesrs9!jjFj3G    & & , , 88 8r+cd}tjt|5t|dddddy#1swYyxYw)Nz.engine must be one of 'pyarrow', 'fastparquet'matchr=re)r.raises ValueErrorr)rFmsgs r)test_invalid_enginers6 :C z -2E51222s5>c|r&ts |jjd|_tjdd5t |dddy#1swYyxYw)Nstrio.parquet.enginer )r rBryrCoption_contextr)rFr1using_infer_strings r)test_options_pyrsO"6%--44U;   . :$#$$$s AAcptjdd5t|dddy#1swYyxYw)NrrrCrr)rFr7s r)test_options_fprs2  . >$#$$$ ,5cptjdd5t|dddy#1swYyxYw)Nrautor)rFr7r1s r)test_options_autors2  . 7$#$$$rc^ttdtsJttdtsJt j dd5ttdtsJttdtsJttdtsJ dddt j dd5ttdtsJttdtsJttdtsJ dddt j dd5ttdtsJttdtsJttdtsJ dddy#1swYxYw#1swYxYw#1swYyxYw)Nr rrr) isinstancerrrrCr)r7r1s r)test_options_get_enginersq j+[ 99 9 j/ AA A  . :F*V,k:::*Y/===*]3_EEEF  . >F*V,o>>>*Y/===*]3_EEEF  . 7F*V,k:::*Y/===*]3_EEEFFFF FF FFs' AF >AF/AF# FF #F,cDddlm}|jd}|jd}tsdn$t t j t |k}tsdn$t tj t |k}txr| }txr| }|s|s|r6d|d}tjt|5td dddn1d }tjt|5td ddd|r6d|d }tjt|5td dddyd }tjt|5td dddyyy#1swYuxYw#1swYxYw#1swYyxYw#1swYyxYw) Nr)VERSIONSr rFzPandas requires version .z. or newer of .pyarrow.rrz%Missing optional dependency .pyarrow.z. or newer of .fastparquet.z)Missing optional dependency .fastparquet.) pandas.compat._optionalrgetr-rr __version__r3rr.r ImportErrorr)r pa_min_ver fp_min_verhave_pa_bad_versionhave_fp_bad_versionhave_usable_pahave_usable_fprs r)"test_get_engine_auto_error_messager2s1i(Jm,J  W(( )GJ,? ?!  [,, - 0C C #>+>'>N&B/B+BN . / |;RSE{%8 #6" # # # # # #  # # # #s0 E22 E>) F  F2E;>F FFc|}tj5}|j||dt||}tj||t||ddg}tj||ddgdddy#1swYyxYwN)r*rr*rSrU)r*rBrrrrrr^r1r7r]rresults r)test_cross_engine_pa_fpr]s B  6d d24 8d2. fb)d2SzB fb#sn5666 A#BB c|}tj5}|j||dt||}tj||t||ddg}tj||ddgdddy#1swYyxYwrrrs r)test_cross_engine_fp_parks B  6d d24 8d2. fb)d2SzB fb#sn5666rctjdgdi}d}tj5}tjt |dd5|j ||ddddddy#1swYxYw#1swYyxYw)NrSr9zqStarting with pandas version 3.0 all arguments of to_parquet except for the argument 'path' will be keyword-only.F)rcheck_stacklevelraise_on_extra_warnings)rCrDrrassert_produces_warning FutureWarningr)r*r]rrs r)!test_parquet_pos_args_deprecationrxs sI& 'B 1  (d  ' ' "$)   ( MM$ '  ((( ( (((s#A>A2!A>2A; 7A>>BceZdZdZdZy)Basectj5}tj||5t |||dddddddy#1swYxYw#1swYyxYw)Nrr)rrr.rr)selfr]r*excerr_msgrs r)check_error_on_writezBase.check_error_on_writesY __  ?$s'2 ?2tV> ? ? ? ? ? ? ?s!AAAA AA#ctj5}tj|5t|||dddddddy#1swYxYw#1swYyxYw)Nr)rrexternal_error_raisedr)rr]r*rrs r)check_external_error_on_writez"Base.check_external_error_on_writesY __  ?$))#. ?2tV> ? ? ? ? ? ? ?s!AA A A AA!N)__name__ __module__ __qualname__rrr0r+r)rrs ? ?r+rcZeZdZdZdZej jdgddZdZ dZ dZ d Z d Z d Zd Zd ZdZdZdZdZej jdgddZej j,ej j.dZy) TestBasicctjgdddtjdtjgdfD]}d}|j ||t |y)Nr9r:r=rPz+to_parquet only supports IO with DataFrames)rCSeriesrzrZrrr)rr*objrs r) test_errorzTestBasic.test_errors[ IIi  LL $ HHY    DC@C  % %c6:s C Dr+c tjtdttddd}ddg|_t ||y)NrHr:rIrlrqr=rerCrDrXrYrBr)rr*r]s r)test_columns_dtypeszTestBasic.test_columns_dtypess; \\T%[eAqk9JK LU^ V$r+r)NgzipsnappybrotlicVtjdgdi}t||d|iy)Nr?r9rrrCrDr)rr*rr]s r)test_compressionzTestBasic.test_compressions& \\3 * +V=+2NOr+c tjtdttddd}tjdtdi}t |||ddgiy)NrHr:rIrrlrBrrrCrDrXrYr)rr*r]rs r)test_read_columnszTestBasic.test_read_columnssS \\T%[eAqk9JK L<<4; 78  H:7N r+c tjttdtdd}tjdddgi}t ||||ddgid gdgd d y) NrIaabb)rqpartrqrr:partition_colsr)r==rS)filtersrB)rrrrrr)rr*tmp_pathr]rs r)test_read_filterszTestBasic.test_read_filterssm \\E!H~V   <<A0  *VH5%8$9ugN r+c|dk7}tjdgdi}t||gdtjddt dgd g}|D]Q}||_t |tjr |j jd|_t||| Sgd |_d |j _ t||y) Nrr?r9)r;r<rIrPr<rQrH)r:r<rI)r)rr:r;r=) rCrDrr\rXindexr DatetimeIndex _with_freqname)rr*rr]indexesrs r)test_write_indexzTestBasic.test_write_indexs - \\3 * +V$  MM*a 0 K    BEBH%!1!1288..t4 R[ A  B V$r+c|}tjdgdi}tjjgd}||_t ||y)Nr?r9)rSr:)rSr;)rTr:)rCrD MultiIndex from_tuplesrr)rr1r*r]rs r)test_write_multiindexzTestBasic.test_write_multiindexs@ \\3 * + ))*HIV$r+c |}tjddd}tjtjj dj dt|zdftd}tjjd d g|gd d g }|jd }||fD]-}||_ t||t||dddgi|ddg/y)Nz 01-Jan-2018z 01-Dec-2018MS)freqr;r<ABCrALevel1Level2leveldate)rrBr?r@rr)rCr\rDrZrandom default_rngstandard_normallenrXr from_productcopyrr)rr1r*datesr]index1index2rs r)test_multiindex_with_columnsz&TestBasic.test_multiindex_with_columnss m]F \\ II ! !! $ 4 4a#e*na5H IK ++ !5 )'61B, 4(f% EBH R ( FS#J(?"cSVZ.   r+ctjgdgdd}ddd}|jd}t||||tjgdgddgd  }t||||gd gd g}tjt t d t d Dcgc]}| c}d| }|jd}t||||ycc}w)Nr9)qrs)rSrTF)rrT)droprr)zyxwvutsrrrererfrfr=r=quxr%onetwor'r(r'r(r'r()r'r()rCrD reset_indexrrXrY)rr*r]rrarraysis r)test_write_ignoring_indexz#TestBasic.test_write_ignoring_indexs\\ @ A'+e< >>t>,V,R\\/ 2:O  V,R E D \\qNa+A1QB+A B& >>t>,V,R ,Bs) C c,tjjgd}tjtj j djd|}|dk(r|j||tdy|dk(r t||yy)Nrr;)rIr<rArColumn name must be a stringr ) rCrrrDrZrrrr TypeErrorr)rr* mi_columnsr]s r)test_write_column_multiindexz&TestBasic.test_write_column_multiindexs]]../MN \\ II ! !! $ 4 4V se E D \\ II ! !! $ 4 4V -1 )* c8,r+ctjtjdj ddt dj }|j||tdy)N rIr<aaarAzDuplicate column names found rCrDrZr[reshaperXrrrrr1r]s r)test_duplicate_columnsz)TestParquetPyArrow.test_duplicate_columnssI \\"))B-//15tE{ K P P R !!"b*6TUr+cttjdtjddi}t||y)NrS1 dayr<rQ)rCrDtimedelta_rangerrs r)test_timedeltaz!TestParquetPyArrow.test_timedeltas, \\3 2 27A FG HR r+cxtjdgdi}|j||tjy)NrSrSr:ri)rCrDrr ArrowExceptionrs r)test_unsupportedz#TestParquetPyArrow.test_unsupporteds/ \\3 . / **2r73I3IJr+ctjddtj}tj|dg}t r"|j ||tjyt||y)Nr; rMfp16rarB) rZr[float16rCrDr rr rr)rr1rar]s r)test_unsupported_float16z+TestParquetPyArrow.test_unsupported_float16sQyyBbjj1 \\tfX 6   . .r2w7M7M N R $r+zqPyArrow does not cleanup of partial files dumps when unsupported dtypes are passed to_parquet function in windowsrzfloat16 works on 15 path_typectjddtj}tj|dg}t j 5}||}t jtj5|j||dddtjj|rJ dddy#1swY4xYw#1swYyxYw)Nr;rrMrr)rr*)rZr[rrCrDrrrr rrosrisfile)rr1rrar]path_strrs r) test_unsupported_float16_cleanupz3TestParquetPyArrow.test_unsupported_float16_cleanup syyBbjj1 \\tfX 6 __  ,(X&D))'*@*@A 4 4 3 4ww~~d+ +++  , , 4 4 , ,s$,C?C)CC CCc tjtjtdtjgdtjgdtjgdgddd }t ||y) Nabcdef)rer=r=reNrerdrM)rSrTr`rSr`rT)rTr`rUT) categoriesorderedrSrTr`)rCrD CategoricalrXCategoricalDtyperrs r)test_categoricalz#TestParquetPyArrow.test_categorical sf \\^^DN3^^=--.CD ^^2.   " R r+ctjd}|jdi|}d|i}t|||jd||y)Ns3fs filesystem/pyarrow.parquetrrrr0)r.rT S3FileSystemrr)rrFs3_public_bucketr1s3sors3kws r)test_s3_roundtrip_explicit_fsz0TestParquetPyArrow.test_s3_roundtrip_explicit_fs5sW""6* T   & &B   $))**:;  r+cHd|i}t||d|jd||y)Nstorage_optionss3://rrrr)rrFrr1rs r)test_s3_roundtripz$TestParquetPyArrow.test_s3_roundtripBs7"4(  )..//?@  r+ partition_colr?c 8tjd|j}|rL|jtj |t j}d}||j|||<t|||d|jdd|i|d|ddd y) Nrcategoryrz /parquet_dirr)rrrTr:)rrrrrr) r.rTrrydictfromkeysrZint32rr)rrFrr1rr expected_dfpartition_col_types r)test_s3_roundtrip_for_dirz,TestParquetPyArrow.test_s3_roundtrip_for_dirNs F#nn&  %,,T]]="((-STK!+ )4])C)J)J"*K &    )../|<*D1"/##'  r+ctjdt}|j|t |}|r&t s |j jd|_tj||y)Nr r) r.rTrrrr rBryrr)rrFrbuffer df_from_bufs r)test_read_file_like_obj_supportz2TestParquetPyArrow.test_read_file_like_obj_supportus^I&V$"6* &: ) 1 1 8 8 ?I  i5r+cntjd|jdd|jddtjtd5t ddddtjtd5|j ddddy#1swY?xYw#1swYyxYw)Nr HOME TestingUser USERPROFILEz.*TestingUser.*rz~/file.parquet)r.rTsetenvrOSErrorrr)rrF monkeypatchs r)test_expand_userz#TestParquetPyArrow.test_expand_user~sI&6=1=-8 ]]7*< = + ) * + ]]7*< = 3  !1 2 3 3 + + 3 3s BB+B(+B4cddg}|}|j||dt||t|j|jk(sJy)Nrurqrrrrrshape)rrr1r|rr]s r)test_partition_cols_supportedz0TestParquetPyArrow.test_partition_cols_supportedsI %  h~4 Ph7H%++rxx777r+cd}|g}|}|j||dt||t|j|jk(sJy)Nrurr)rrr1r|rpartition_cols_listr]s r)test_partition_cols_stringz-TestParquetPyArrow.test_partition_cols_stringsO-.  h~4 Ph(;<H%++rxx777r+c|Sr$r0)xs r)zTestParquetPyArrow.sQr+rlz pathlib.Path)idscd}|g}|}||}|j||t|j|jk(sJy)Nr@)r)rrr) rrr1rFrrrr]rs r)test_partition_cols_pathlibz.TestParquetPyArrow.test_partition_cols_pathlibsO -. " d+> ?D!''288333r+cJtjgg}t||y)N)rrBrrs r)test_empty_dataframez'TestParquetPyArrow.test_empty_dataframes \\B /R r+cddl}tjdddgi}|j|jd|j g}|j t}t||d|i|y)Nrrr:)typerr) r rCrDrfieldbool_ryrur)rr1r r]rout_dfs r)test_write_with_schemaz)TestParquetPyArrow.test_write_with_schemasc \\3A- (s!I JK4Rx.@6Rr+ctjdtjtjgddtjgddtjgddd}|r&t r t |||jd d i n t ||tjd tjgd di}t ||y)Nr r9rNrMUInt32rarlrr`rrrSrE)r.rTrCrDrr rry)rr1rr]s r) test_additional_extension_arraysz3TestParquetPyArrow.test_additional_extension_arrayss I& \\YYy8YYy9YY/x@   "6 Rbiie .E F R $ \\3 / IJ KR r+ctjdtjdtjgddi}tj d|5|rNt r|jd}n|jd|d }|jjd|_n|jd|d }t||| dddy#1swYyxYw) Nr rSrastring[pyarrow]rMstring_storagerzstring[]r) r.rTrCrDrrr ryrBr)rr1rrr]rs r) test_pyarrow_backed_string_arrayz3TestParquetPyArrow.test_pyarrow_backed_string_arraysI& \\3 *:BS TU V   / @ 8!'!yy/H!yy7>2B!)DEH#+#3#3#:#:5#A 99w~.>a%@A Rh 7 8 8 8s A4CC c Htjdtjtjj gdtj dddtjjtjdddd}t||y) Nr ))rr:)r:r;)r<rIz 2012-01-01r<D)rRr rI)r`rUrV) r.rTrCrD IntervalIndexr period_range from_breaksr\rrs r)test_additional_extension_typesz2TestParquetPyArrow.test_additional_extension_typessz I& \\%%112JK__\13G%%11MM,D     R r+cd}tjdtjdddi}t||d|iy) Nz2.6rSz 2017-01-011nsrr rRversionr)rCrDr\r)rr1verr]s r)test_timestamp_nanosecondsz-TestParquetPyArrow.test_timestamp_nanosecondss; \\3 lPR ST URy#.>?r+chtjddd|gz}tj|d|i}|dd}tr |j j d|_|jtjjk7r ddl }|j jj|}|j|jdz }|j j!||_|dj"j!||d<t'||d | y#t$$rYwxYw) Nr 11.0.0 index_as_colrransr<F)rr)r.rTrCrDr ras_unittzinforvtimezoneutcpytzru utcoffset FixedOffset total_seconds tz_convertdtrr) rr1r~idxr]rroffsetrus r)test_timezone_aware_indexz,TestParquetPyArrow.test_timezone_aware_indexsIx0+,, \\>3*? @a5 %^^33D9HN # * *h.?.?.C.C C V../GH%%f&:&:&+3N+C+F+F+Q+QRT+U(RUXF  s D%% D10D1c6tjdtjdt t di}t j5}|j||t||dg}dddtdk(sJy#1swYxYw)Nr rSr<rrSrrrr:) r.rTrCrDrXrYrrrrr)rr1r]rrs r)test_filter_row_groupsz)TestParquetPyArrow.test_filter_row_groupssI& \\3U1X/ 0 __  F$ MM$rM *!$^4DEF F6{a F Fs #BBc tjtjj dj dgd}t j5}|j||t||}ddd|r;tjtjjjsJytjtjjjsJy#1swYxYw)Nr;)rr<)r?r@CrAr)rCrDrZrrrrrrrr_mgrcore internals ArrayManager BlockManager)rr1using_array_managerr]rrs r)test_read_parquet_managerz,TestParquetPyArrow.test_read_parquet_managers \\ II ! !! $ 4 4W = __  ,$ MM$rM *!$+F , fkk277+<+<+I+IJ JJfkk277+<+<+I+IJ JJ  , ,s  C::Dcddl}|}tjddd}|jd}||d<gd|d<|jj |}|j tj }trj|d jd |d <|d jd |d <|djtj|jd d|d<t||ddi|y)NrrPr<rsrtrvrwrx) types_mapperrvztimestamp[us][pyarrow]rwus)unitrurMr r*rr) r rCr\rTable from_pandas to_pandas ArrowDtyper ry timestampr)rr1r|r r]rypa_tablers r)&test_read_dtype_backend_pyarrow_configz9TestParquetPyArrow.test_read_dtype_backend_pyarrow_config%s mmJ6GHnnT"=1 ==,,R0%%2==%A #+J#7#>#>?W#XHZ ,45H,I,P,P(-H( )'/}&=&D&D /g//T>OPQ'H] #  ()4  r+c6tjdddgitjddgdd }|j}d dl}t |j t d kDr |jjd|_t||d d i|y)NrSr:r;r<rItestrzint64[pyarrow])rrNrrrMr r) rCrDrErr rrrryr)rr1r]rr s r),test_read_dtype_backend_pyarrow_config_indexz?TestParquetPyArrow.test_read_dtype_backend_pyarrow_config_indexCs \\ 1a&M1a&v!>FV 779 7&& ''(*; ;%^^223CDHN ()4  r+rBrr:rbrcz*https://github.com/apache/arrow/pull/44171)rrrc tjtdttddd}||_t ||y)NrHr:rIrr)rr1rBr]s r)test_columns_dtypes_not_invalidz2TestParquetPyArrow.test_columns_dtypes_not_invalidSs7&\\T%[eAqk9JK L R r+cvtjtjgdd}t||y)Nr custom namer$r#rCrDrErrs r)test_empty_columnsz%TestParquetPyArrow.test_empty_columnsks% \\} M NR r+c|dz }tjddgi}ddi|_|j||t ||}|j|jk(sJy)Nztest_df_metadata.pr:ratest_attributer)rCrDattrsrr)rrr1rr]new_dfs r)test_df_attrs_persistencez,TestParquetPyArrow.test_df_attrs_persistenceps_.. \\A3x ($a( d2 &d2.||rxx'''r+c |dz }tjdddgiddg}|j||tjdd 5t ||}dddtj t j }tjdddgi|tjddg| tjdgtr|stn|  }tj|y#1swYxYw) Nztest_string_inference.prSryrT)rarrfuture.infer_stringTna_valuerM)rarNrrB) rCrDrrr StringDtyperZrxrEr r[rr) rrr1rrr]rrNrs r)test_string_inferencez(TestParquetPyArrow.test_string_inferencexs33 \\c3Z0c C d2 &   4d ; 3!$r2F 3/<<Sz"((C:U3HH'0B    fh/ 3 3s C55C>znot supported before 11.0c ddl}|dz }tjdtdgid}|j ||j d|j dfgt|}trtjdd gid }n#tjdtdgid }tj||y) Nrz decimal.prSz123.00rrMr)r123zstring[python]r[) r rCrDrrr decimal128rr rr)rrr1rr]rrs r)test_roundtrip_decimalz)TestParquetPyArrow.test_roundtrip_decimals +% \\3!2 34hOH fh/r+c 0ddl}ddlm}|dz }|jd|j gd|j i}|j ||tjdd5t|}dddtjdgditjtjtjdgtjtj }tj |y#1swYxYw) Nrzlarge_string.prS)NrTr`r5Tr6rM)rarNrB)r pyarrow.parquetrbrUr large_stringrVrCrrrDr8rZrxrErr)rrr1rWrrUrrs r)#test_infer_string_large_string_typez6TestParquetPyArrow.test_infer_string_large_string_types$**#rxx(8"//:KLMN ud#   4d ; (!$'F (<<'(.."&&1HHcU".."&&*IJ  fh/ ( (s + D  DN)5rrrrzr|rrrrrr.r5r6rskipifr rmrpathlibPathrrrorrrrrrrrrrrrrrrr rr!r%r&r NotImplementedErrorrvr(r,r2r9r r=rAr0r+r)rqrqs !   -V !K% [[ ? [[009NO [[[3 *=> ,?P ,!* [[     [[     [[ [[ E    >6388 [[c;'h-G 4 4! S!& 8! @GB  K <  [[ F FLL kk''(.G( "!!$1a3!!!$1a3  $!%$! ! (0* [[,5PQ 0R 00r+rqcVeZdZdZdZdZdZdZdZdZ e jjdZ d Zd Zd Zd Ze jj%ed dZdZdZdZdZdZdZdZdZe jj%ed dZy)TestParquetFastParquetc|}tjddd}|jd}||d<tjdd|d<t ||y) NrPr<z US/EasternrtrvrrQ timedelta)rCr\rrr)rr7r|r]rys r)rzz!TestParquetFastParquet.test_basicsQ mmJlCnnT"=,,Wa@;R r+c tjtdttddd}t}d}ddg|_|j ||||ddg|_|j ||||tjd ddddtjd ddddg|_|j ||||y) NrHr:rIrr/rrbrcr&)rCrDrXrYr0rBrrv)rr7r]errrs r)test_columns_dtypes_invalidz2TestParquetFastParquet.test_columns_dtypes_invalids \\T%[eAqk9JK L,V  !!"b#s3f%  !!"b#s3   dAq!Q /   dAq!Q /   !!"b#s3r+ctjtjdj ddt dj }d}|j||t|y)NrrIr<rrAz9Cannot create parquet dataset with duplicate column namesrrr7r]rs r)rz-TestParquetFastParquet.test_duplicate_columnssM \\"))B-//15tE{ K P P RI !!"b*c:r+cddl}t|jtdkrTttjtdk\r/|j t j jdtjdgdi}tjddtjd gid }t|||d y)Nr 2024.11.0z2.0.0z$fastparquet uses np.float_ in numpy2rrS)TNFrHgrrMF)rr) rrrrZr4r.r5r6rCrDrxr)rr7r(rr]rs r)test_bool_with_nonez*TestParquetFastParquet.test_bool_with_nones ;** +gk.B Bw NNH W H    !!B"  \\3 34 5<<sBFFC&8 9K R(Fr+ctjdtjdddi}|j||tdtjdgdi}d}|j||t|y)NrS2013Mr<rrz"Can't infer object conversion type)rCrDrrrrNs r)rz'TestParquetFastParquet.test_unsupportedsd \\3S! LM N !!"b*d;\\3 . /2 !!"b*c:r+ctjdtjtdi}t ||y)NrSrH)rCrDrrXr)rr7r]s r)rz'TestParquetFastParquet.test_categoricals, \\3tE{ ;< =R r+cdttdi}tj|}t j 5}|j ||ddt||dg}dddtdk(sJy#1swYxYw)NrSr<r:)r*rrow_group_offsetsr r ) rXrYrCrDrrrrr)rr7rUr]rrs r)r z-TestParquetFastParquet.test_filter_row_groupss $uQx. ! \\!_ __  F$ MM$rtqM Q!$^4DEF F6{a F Fs %A>>Bc Jt||d|jdd|id|dy)Nrz/fastparquet.parquetr)rrrr)rrFrr7rs r)rz(TestParquetFastParquet.test_s3_roundtrip s7   )..//CD*D1)-$G  r+cddg}|}|j|d|dtjj|sJddl}|j t |dj}t|dk(sJy)Nrurqrr*rrrFr; rrrexistsr ParquetFilercatsrrrr7r|rr]ractual_partition_colss r)rz4TestParquetFastParquet.test_partition_cols_supporteds{ %   )  ww~~h''' 7 7 7H u M R R()Q...r+cd}|}|j|d|dtjj|sJddl}|j t |dj}t|dk(sJy)NrurrZrFr:r[r_s r)rz1TestParquetFastParquet.test_partition_cols_string$sw   )  ww~~h''' 7 7 7H u M R R()Q...r+cddg}|}|j|dd|tjj|sJddl}|j t |dj}t|dk(sJy)Nrurqr)r*r partition_onrFr;r[r_s r)test_partition_on_supportedz2TestParquetFastParquet.test_partition_on_supported4s{ %   '  ww~~h''' 7 7 7H u M R R()Q...r+cddg}|}d}tjt|5|j|dd||dddy#1swYyxYw)NrurqzYCannot use both partition_on and partition_cols. Use partition_cols for partitioning datarr)r*rrcr)r.rrr)rrr7r|rr]rs r)3test_error_on_using_partition_cols_and_partition_onzJTestParquetFastParquet.test_error_on_using_partition_cols_and_partition_onDsa!%   ]]:S 1  MM$ +-     s AA zfastparquet writes into Indexrchtj}|j}t|||y)Nr)rCrDrrrr7r]rs r)rz+TestParquetFastParquet.test_empty_dataframeWs&\\^779R(3r+cddl}t|jtdkDrPt|jtdkr/|jtj j dd|gz}tj|d|i}|j}d |j_ t||| y) Nrz2022.12rPzCfastparquet bug, see https://github.com/dask/fastparquet/issues/929rrrrrr) rrrr4r.r5r6rCrDrrrr)rr7r~r(rrr]rs r)rz0TestParquetFastParquet.test_timezone_aware_index^s ;** +gi.@ @W  # #F K F!    !!I" +,, \\>3*? @779%R(3r+ctjdddgi}tj5}|j |t j td5tjt5t|ddddddddt j td5t|dd ddddddy#1swYLxYw#1swYPxYw#1swY*xYw#1swYyxYw) NrSr:r;z!not supported for the fastparquetrrT)r*use_nullable_dtypesr rL) rCrDrrrr.rrrrr)rr7r]rs r)&test_use_nullable_dtypes_not_supportedz=TestParquetFastParquet.test_use_nullable_dtypes_not_supportedus \\3A- ( __  R$ MM$ z1TU W// >W mQUVW Wz1TU RT-yQ R  R RWW W W R R  R RsS-C3C5CC #C3/C'>C3C CC$ C3'C0 ,C33C<cdtjd5}tj|j dt j td5t|ddddtj|jddddy#1swY7xYw#1swYyxYw) N test.parquetsbreakitrrrF) missing_ok) rrrCrD write_bytesr.r Exceptionrunlink)rrs r)$test_close_file_handle_on_read_errorz;TestParquetFastParquet.test_close_file_handle_on_read_errors __^ , 8 LL  * *: 6y3 9T-8 9 LL  % % % 7  8 8 9 9 8 8s$AB&B$-B&B# B&&B/cXtjddgddgd}tjd5}t |j d5}|j |dddt||}dddtj|y#1swY5xYw#1swY,xYw)Nrr:r>r.rnwbr) rCrDrrrfencoderrr)rr*r]rrWrs r)test_bytes_file_namez+TestParquetFastParquet.test_bytes_file_names \\aV1a&9 : __^ , 7dkkmT* !a a  !"$v6F  7 fb)  ! ! 7 7s#B B B B B  B)cXtjdtjddgddgd}t j 5}tj td5|j|dd ddddddt j 5}tj|jd tj td5t|dd ddddddy#1swYxYw#1swYxYw#1swY*xYw#1swYyxYw) Nrrr:r>r.zfilesystem is not implementedrr=r*rrb) r.rTrCrDrrrrErrCrDrqrrr]rs r)test_filesystem_notimplementedz5TestParquetFastParquet.test_filesystem_notimplementeds M* \\aV1a&9 : __  L$#+J L d=U K L L __  K$ LL  * *6 2#+J KT-EJ K K K  L L L L K K K KID#C<8DAD D+D <D DDD D  D)cXtjdtjddgddgd}t j 5}tj td5|j|dd ddddddt j 5}tj|jd tj td5t|dd ddddddy#1swYxYw#1swYxYw#1swY*xYw#1swYyxYw) Nr rr:r>r.z1filesystem must be a pyarrow or fsspec FileSystemrr=rzrb) r.rTrCrDrrrrrrCrDrqrr{s r)test_invalid_filesystemz.TestParquetFastParquet.test_invalid_filesystems I& \\aV1a&9 : __  H$"U H d9 G H H __  G$ LL  * *6 2"U GT)F G G G  H H H H G G G Gr}c tjd}tjddgddgd}t j 5}tj td5|j|d|jd d i ddddddt j 5}tj|jd tj td5t|d|jd d i ddddddy#1swYxYw#1swYxYw#1swY*xYw#1swYyxYw) Nz pyarrow.fsrr:r>r.z8storage_options not supported with a pyarrow FileSystem.rr r=re)r*rrrb)r.rTrCrDrrrrErLocalFileSystemrCrDrqr)rpa_fsr]rs r).test_unsupported_pa_filesystem_storage_optionszETestParquetFastParquet.test_unsupported_pa_filesystem_storage_optionss*##L1 \\aV1a&9 : __  $#P  $$446%*EN   __  $ LL  * *6 2#P $$446%*EN            sID*#&D D*-AE- D6 ED' #D**D36D? ;EE c Ld}tjdttddi}t j d5}|j |tjt|5t|dddddddy#1swYxYw#1swYyxYw) NzPdtype_backend numpy is invalid, only 'numpy_nullable' and 'pyarrow' are allowed.rqr:rIz tmp.parquetrnumpy)rM) rCrDrXrYrrrr.rrr)rr*rr]rs r)test_invalid_dtype_backendz1TestParquetFastParquet.test_invalid_dtype_backends % \\5$uQ{"34 5 __] + :t MM$ z5 :T9 : : : : : : :s$-B/B=BB BB#ctjtjgdd}tjtjgdd}t|||y)Nrr*r$r#rr+rhs r)r,z)TestParquetFastParquet.test_empty_columnssB\\} M N<rsu  .-'M KKST KKG   ""))P"#6tDOQ#  !!&(; "    ++$$!!*D% ! 232   TT".h//334h//334h//334""#=?TU""#=?TU""#=?TU""#=?TU       ?D 9 2 $$$F((#V 6 6(" ? ?p-p-f c0c0db4Tb4e$Ms$$L+LLLL)(L)