K iƱddlZddlmZddlmZddlZddlmZddlZddl m Z ddl Z ddl Z ddl Z ddlZ ddlZddlZddlmZmZmZiaiaiadZdZd Zd Zd Zd Z d+d Z!dZ"dZ#dZ$dZ%dZ&dZ'dZ(dZ)dZ*dZ+d+dZ, d,dZ-dZ.d-dZ/dZ0 d.dZ1hdZ2dZ3d Z4d!Z5d+d"Z6 d+d#Z7d$Z8d%Z9d&Z:d'Z;d(Zy#e$rdZYwxYw)/N)Sequence)futures)deepcopy) zip_longest) _pandas_api frombytesis_threading_enabledc |ts0tjitjjdtjj dtjj dtjjdtjjdtjjdtjjdtjjdtjjd tjjd tjjd tjjd tjj d tjj"dtjj$dtjj&dtjj(dtjj*dtjj,dtjj.ditS)Nemptyboolint8int16int32int64uint8uint16uint32uint64float16float32float64datetimebytesunicode)_logical_type_mapupdatepalibType_NA Type_BOOL Type_INT8 Type_INT16 Type_INT32 Type_INT64 Type_UINT8 Type_UINT16 Type_UINT32 Type_UINT64Type_HALF_FLOAT Type_FLOAT Type_DOUBLE Type_DATE32 Type_DATE64 Type_TIME32 Type_TIME64 Type_BINARYType_FIXED_SIZE_BINARY Type_STRING[/mnt/ssd/data/python-lab/Trading/venv/lib/python3.12/site-packages/pyarrow/pandas_compat.pyget_logical_type_mapr7.s   " FFNNG" FF  f"  FF  f"  FF  w " FF  w " FF  w "  FF  w"  FF  "  FF  "  FF  "  FF " "I"  FF  y"  FF   "  FF  "  FF  " FF  !" " FF  #" $ FF   FF ) )7 FF   )"  , r5ct} ||jS#t$rt|tj j rYyt|tj jrdt|jdcYSt|tj jr|jdcYSdcYStjj|rYyYywxYw)N categoricalzlist[] datetimetzdatetimedecimalobject)r7idKeyError isinstancerrDictionaryTypeListTypeget_logical_type value_type TimestampTypetztypes is_decimal) arrow_typelogical_type_maps r6rDrDKs+-  ..   j"&&"7"7 8  BFFOO 4+J,A,ABC1E E  BFF$8$8 9#-==#<< L* L XX  , s'-C% =C% 2C%=C%C%$C%ctstjtjdtjdtj dtj dtjdtjdtjdtjdtjd tjd tjd d d tjdtjditS)Nr r rrrrrrrrr datetime64[D]rstringr)_numpy_logical_type_maprnpbool_r rrrrrrrrrstr_bytes_r4r5r6get_numpy_logical_type_maprT\s "&& HHf GGV HHg HHg HHg HHg IIx IIx IIx JJ JJ V GGX IIw(  #"r5cJt} ||jjS#t$rtt |jdrYyt |jj drt |jcYStj|}|dk(rYy|cYSwxYw)NrGr; datetime64rNr) rTdtypetyper@hasattrstr startswithr infer_dtype)pandas_collectionnumpy_logical_type_mapresults r6get_logical_type_from_numpyr`rs79 %&7&=&=&B&BCC   $**D 1  && ' 2 2< @(../ /(():; X   s!%B"9B"B"B"!B"c~|j}t|dk(rVt|d|}|Jt|j|j d}t|j j}||fSt|dr1dtjj|ji}d}||fSd}t|}||fS)Ncategorycat)num_categoriesorderedrGtimezonedatetime64[ns]) rWrZgetattrlen categoriesrecodesrYrrtzinfo_to_stringrG)columnrWcatsmetadataphysical_dtypes r6get_extension_dtype_inforqs LLE 5zZvuf-!$//2|| TZZ--. 8 ##   7 7 AB) 8 ##U 8 ##r5ct|}t|\}}|dk(r|j|jd}d}|Yt |t rt j|s4t |ts$td|dt|jt |tsJtt||||||dS)aConstruct the metadata for a given column Parameters ---------- column : pandas.Series or pandas.Index name : str arrow_type : pyarrow.DataType field_name : str Equivalent to `name` when `column` is a `Series`, otherwise if `column` is a pandas Index then `field_name` will not be the same as `name`. This is the name of the field in the arrow Table's schema. Returns ------- dict r=) precisionscaler>z)Column name must be a string. Got column z of type name field_name pandas_type numpy_typero) rDrqrsrtrAfloatrPisnanrZ TypeErrorrX__name__)rmrvrJrw logical_type string_dtypeextra_metadatas r6get_column_metadatars"$J/L#;F#C L.y #--%%   D%(RXXd^4%7vYDz""# %  j# &=D,<(== & #""  r5c ||Dcgc] }t|}}t|D cgc]} t| ts| c} } t|} |d| | z } || | z d} g}t |||| D]'\}}}}t ||||}|j |)g}|durMg}t || |D]\}}}t|tr|j5t|jts|j |jt |t|j||}|j |t|dkDr tjd|dtdg}t|jd |jg}t|jd |jjg}t ||D]"\}}t||}|j |$ngx}x}}d tj ||||zd t"j$d t&j(dj+diScc}wcc} w)aReturns a dictionary containing enough metadata to reconstruct a pandas DataFrame as an Arrow Table, including index columns. Parameters ---------- columns_to_convert : list[pd.Series] df : pandas.DataFrame column_names : list[str | None] column_field_names: list[str] index_levels : List[pd.Index] index_descriptors : List[Dict] preserve_index : bool types : List[pyarrow.DataType] Returns ------- dict N)rvrJrwFrz&The DataFrame has non-str index name `z@` which will be converted to string and not roundtrip correctly. stacklevellevelsnamesspandaspyarrow)libraryversion) index_columnscolumn_indexescolumnscreatorpandas_versionutf8)rZrirAdictziprappendrv_column_name_to_stringswarningswarn UserWarningrhr_get_simple_index_descriptorjsondumpsr __version__rrencode)columns_to_convertdf column_names index_levelsindex_descriptorspreserve_indexrHcolumn_field_namesrvdescrnum_serialized_index_levelsntypesdf_types index_typescolumn_metadatacolrwrJroindex_column_metadatanon_str_index_nameslevel descriptorrrrs r6construct_metadatars*!5AADc$iAA"%:K'G.8.E(-'G#HZF:f::;H!<<=>KO-01C\1CX.O))T:z&s2<2<> x( )U" -0{1B.D 3 )E:z*d+zz%jS.I#**5::6*,UZZ8%% H " ( ( 2! 3$ " #a ' MM89L8MN00  + X |< Gbjjoo->?vu- ,KE43E4@H  ! !( + ,FHGG1N 4::.,&)>>$>>*11    6&>  qB'Gs IIct|\}}t|}d|vrtjdtd|dk(r|rJddi}|||||dS) NmixedzlThe DataFrame has column names of mixed type. They will be converted to strings and not roundtrip correctly.rrrencodingUTF-8ru)rqr`rrr)rrvrrrxs r6rr$sp#;E#B L.-e4K+  @ A 'i!!!$g."""  r5cjt|tr|St|tr|jdSt|tr"tt t t |St|tr td|%t|trtj|r|St|S)a!Convert a column name (or level) to either a string or a recursive collection of strings. Parameters ---------- name : str or tuple Returns ------- value : str or tuple Examples -------- >>> name = 'foo' >>> _column_name_to_strings(name) 'foo' >>> name = ('foo', 'bar') >>> _column_name_to_strings(name) "('foo', 'bar')" >>> import pandas as pd >>> name = (1, pd.Timestamp('2017-02-01 00:00:00')) >>> _column_name_to_strings(name) "('1', '2017-02-01 00:00:00')" rz%Unsupported type for MultiIndex level) rArZrdecodetuplemaprrr|rzrPr{rvs r6rr8s2$ D% {{6"" D% 54d;<== D( #?@@ *T51bhhtn t9r5cn|j#|j|vrt|jSd|ddS)zReturn the name of an index level or a default name if `index.name` is None or is already a column name. Parameters ---------- index : pandas.Index i : int Returns ------- name : str __index_level_d__)rvr)indexirs r6_index_level_namer_s9 zz%**L"@&uzz22!uB''r5cTt|||}|jjs!tdt |j| t |||Sg}g}|durt |jng}g}g}|D]} || } t| } tj| rtd| d|j| |jd|j| |jt| g} g} t|D]\} }t|| |} t!|tj"j$r| t'|}n5|j||jd| }| j| | j||| z}|||| | |||fS)NzDuplicate column names found: FSparse pandas data (column ) not supported.)_resolve_columns_of_interestr is_unique ValueErrorlist$_get_columns_to_convert_given_schema_get_index_level_valuesrrr is_sparser|rrZ enumeraterrApd RangeIndex_get_range_index_descriptor)rschemarrrrrrconvert_fieldsrvrrindex_column_namesr index_levelr all_namess r6_get_columns_to_convertrrs*2vw?G ::  ,T"**-=,> ?  3BOOL.<5-H)  N -h&t,   %-dV3CDF F !!#&d#D!!!#d), -#L1 (; a> {KNN$=$= >&/  TF#''((H- s*C E C*)E*D  A EEc|}||jjvr"t|rt|t dd}|jj |S)z_ Get the index level of a DataFrame given 'name' (column name in an arrow Schema). r)rr_is_generated_index_nameintriget_level_values)rrvkeys r6rrsO C 288>>!&>t&D$s+,R01 88 $ $S ))r5cf tj||S#t$rt|cYSwxYwN)rrr|rZrs r6 _level_namers1 4 4ys 00cdt|jtj|dtj|dtj|ddS)Nrangestartstopstep)kindrvrrr)rrvrget_rangeindex_attribute)rs r6rrsMEJJ'55eWE44UFC44UFC  r5ctt|d|g}t|Dcgc]}|j|c}Scc}w)Nr)rirhrr)rnrs r6rrs: GE8eW -.A/4Qx 8!E " "1 % 88 8sAc| | td||j}|S| |Dcgc]}||jvs|}}|S|j}|Scc}w)NzJSchema and columns arguments are mutually exclusive, pass only one of them)rrr)rrrcs r6rrsv g1<= =  ,, N  %9bjj199 N** N :s AAc t|d||\}}}}}}} }g} | D]%} | j} tj| r"t j | dj } ntj| r\t| tjjr| jdn| dd}t j |dj } n]t| | jd\} } tjj| | } | !t j | dj } | j!| (t#| |||||| |}|| |fS)NT) from_pandasrr)rvaluesris_categoricalrarrayrXis_extension_array_dtyperArSeriesheadget_datetimetz_typerWr_ndarray_to_arrow_typerr)rrrrrr_rrrrHrrtype_r ros r6dataframe_to_typesr&sD ""dNG DY E    % %f -HHQD166E  1 1& 9!+;>>(("*AFF1I/0!u HHU5::E/FMFEFF11&%@E}5:: U "B m=N2DH eX %%r5c Ht||||\}}}} } } } } |Dt|t|j}}||dzkDr|dkDrtj}nd}t sd}fd}d}|dk(r&t | | Dcgc]\}}|||}}}ng}tj|5}t | | D]R\}}||jr|j|||1|j|j|||T dddt|D]3\}}t|tjs!|j||<5|Dcgc]}|j }}|Pg}t ||D]*\}}|jtj"||,tj$|}t'| ||| | |||}|j(rt+|j(n t-}|j/||j1|}d}t|dk(r< | dd} | dk(r.| dd }!| dd }"| dd }#tt3|!|"|#}|||fScc}}w#1swYmxYwcc}w#t4$rY(wxYw) Ndc|d}d}n|j}|j} tj||d}|s+|jdkDrtd|d|jd|S#tjtj tj f$r7}|xjd|jd|jfz c_|d}~wwxYw) NT)rXrsafezConversion failed for column z with type rzField z( was non-nullable but pandas column had z null values) nullablerXrr ArrowInvalidArrowNotImplementedErrorArrowTypeErrorargsrvrW null_countr)rrfield_nullablerr_ers r6convert_columnz+dataframe_to_arrays..convert_columnbs =!NE"^^NJJE XXc4dKF&"3"3a"7veW-$$*$5$5#6lDE E ++!!#  FF/zSYYKPS SFG  sA**2C2CCct|tjxrF|jjxr.t |j jtjSr) rArPndarrayflags contiguous issubclassrWrXinteger)arrs r6_can_definitely_zero_copyz6dataframe_to_arrays.._can_definitely_zero_copywsB3 +7 $$7399>>2::6 8r5rrrrrrr)rrirr cpu_countr rrThreadPoolExecutorrrsubmitrrAFuturer_rXrrrrorrr with_metadatarr)$rrrnthreadsrrrrrrrrrrnrowsncolsrrrfarraysexecutorr maybe_futxrHfieldsrvrpandas_metadataron_rowsrrrrs$ ` r6dataframe_to_arraysr!Is/r6>/68Y 2wBJJu 53; 519||~HH  !*8 1}!"4nEGa!A&GG  ' ' 1 IX.? I1,QXX6MM.A"67MM(//.!Q"GH  I I&f- /LAy)W^^4%,,.q  /$ $QVV $E $ ~y%0 1KD% MM"((4/ 0 16"(B m=N2DO-3OOx(H OOO$  ! !( +FF 6{a $Q'/Dw)!,W5(+F3(+F3U5$56 66 !![G I I %6   s+I=5A"J%J=;JJ  J! J!c4|jjtjk7r||fSt j |r4|2|j }|j}tj||}||fS|tj|j}||fSr) rWrXrPrVr is_datetimetzrGunitr timestampfrom_numpy_dtype)rrWrrGr$s r6rrs ||BMM)u}  'EM XXzz T2& 5= ##FLL1 5=r5cddlmcm}|jdd}|d}d|vr)tj j ||d|d}nd|vrtj|j\}} t||d} t jr2tjj|jd | d }nv|}|rr|j|||j | } | Sd |vrK|d }t#|dk(sJ||d} || } t%| ds t'd| j)|}n|}|r|j||S||fS)a Construct a pandas Block from the `item` dictionary coming from pyarrow's serialization or returned by arrow::python::ConvertTableToPandas. This function takes care of converting dictionary types to pandas categorical, Timestamp-with-timezones to the proper pandas Block, and conversion to pandas ExtensionBlock Parameters ---------- item : dict For basic types, this is a dictionary in the form of {'block': np.ndarray of values, 'placement': pandas block placement}. Additional keys are present for other types (dictionary, timezone, object). columns : Column names of the table being constructed, used for extension types extension_columns : dict Dictionary of {column_name: pandas_dtype} that includes all columns and corresponding dtypes that will be converted to a pandas ExtensionBlock. Returns ------- pandas Block rNblock placement dictionaryre)rjrerfrF)rWcopy)r)klassrWpy_arrayr__from_arrow__zGThis column does not support to be converted to a pandas ExtensionArray)r))pandas.core.internalscore internalsgetrcategorical_type from_codesrP datetime_datarWmake_datetimetz is_ge_v21rrview make_blockDatetimeTZBlockrirYrr.)itemrextension_columns return_block_int block_arrr)rr$rrWr(rv pandas_dtypes r6_reconstruct_blockrAs8)($'I[!It**55 $|"4O6% t ""9??3ad:&67  "..&&w'u5'CC Y.2.B.B.3(5 t :9~"""y|$(. |%56:; ;))#.si88I~r5ctjrd}tjj |}tj ||S)NnsrG)ris_v1rrstring_to_tzinfodatetimetz_type)r$rGs r6r6r6s:   $B  & &t 33r5c rg}g}|jj}|sK|I|d}|jdg}|d}t||}t ||||\}} t |||||} n8t jj|j} t |g|||} t|t|||} |j} tjj|||t!| j#} t j$r/ddlm}| Dcgc]}t+|| | d}}||| | }|Sdd lm}dd lm}| Dcgc]}t+|| | }}| | g}|||}t j4r|j7||j8}|S||}|Scc}wcc}w) Nrrrr)create_dataframe_from_blocksF)r=)rr) BlockManager) DataFrame)rrr2_add_any_metadata_reconstruct_index_get_extension_dtypesrrrnum_rows'_check_data_column_metadata_consistency_deserialize_column_indexrrrtable_to_blocksrkeysis_ge_v3pandas.api.internalsrIrAr/rJpandasrKr7 _from_mgraxes)optionstablerjignore_metadata types_mapper all_columnsrrrrext_columns_dtypesrrr_rIr;blocksrrJrKrXmgrs r6table_to_dataframera sKNll22O :%i0 (,,-=rB+O<!%9)%1B*5|E u2 ; gzC))%..92 2|Wj ,K8'{NKG%%L VV # #GUJ$();)@)@)B$CEFE   l$6U L  *&w O 6$  t\3E F  64(  "$$S#((3B 3B +  s F/F4> r r rrrrr>rrrrrrc|d}|xsg}i}tj|S|r7|jD](}|j}||} | | ||j<*|jD]X}|j}|j|vst |t js9 |j} | ||j<Z|D]} | d} | d} | |vs| tvstj| } t | tjsLt | tjjrL|s| |vrw t jj!|jj#| jr t%| ds| || <tj&r|s|jD]}|j|vst jj)|jsSt jj+|js*t jj-|js|j|vstjjt.j0||j<|S#t$rY7wxYw#t$r | d} YwxYw#t$rYCwxYw)a Based on the stored column pandas metadata and the extension types in the arrow schema, infer which columns should be converted to a pandas extension dtype. The 'numpy_type' field in the column metadata stores the string representation of the original pandas dtype (and, despite its name, not the 'pandas_type' field). Based on this string representation, a pandas/numpy dtype is constructed and then we can check if this dtype supports conversion from arrow. strings_to_categoricalrwrvryr.)na_value)rextension_dtyperrXrvrArBaseExtensionTypeto_pandas_dtypeNotImplementedErrorr@_pandas_supported_numpy_typesr@r StringDtyperH is_dictionaryrrYuses_string_dtype is_stringis_large_stringis_string_viewrPnan) rZcolumns_metadatar\rYrjrc ext_columnsrtypr@col_metarvrWs r6rNrNMs%%=>!rJK""*\\ 7E**C',L'*6 EJJ'  77jj ::[ (ZR=Q=Q-R 7"224 +7 EJJ'7%5 $L)D& { "u4Q'Q'33E:L, (C(CDlKNN,F,FG .1C 8811%,,2D2DT2J2O2OP$Q<)9:(4K%55:$$&/E\\ VEzz,""5::.88++EJJ788**5::6**J.*5..*D*Dbff*D*U EJJ'  V Y'   $F#D $($s7JJ :AJ5 JJ J21J25 KKc,td|DsJy)Nc3HK|]}|dduxrd|vxs|dduyw)rvNrwr4).0rs r6 z:_check_data_column_metadata_consistency..s<  6d  0|q0JQvYd5JJs ")all)r]s r6rPrPs#   r5c T|rY|Dcic]$}|jdt|d|d&}}|jDcgc]}|j||}}n |j}t|dkDr^tj j jtttj||Dcgc]}|d c}}n+tj j||r|ddnd}t|dkDr t||}|Scc}wcc}wcc}w)Nrwrvrrrr)r2rrrirr MultiIndex from_tuplesrrast literal_evalIndex"_reconstruct_columns_from_metadata) block_tabler]rrcolumns_name_dictrvcolumns_values col_indexrs r6rQrQs8!  EE, 7& B CQvY N  ;F:R:R 26  ! !$ -  %11 >Q..++77 S%%~6 76DE9V$E8  ..&& n!26!:RV'   >Q4WnM N9  Fs)DD ? D% c|Dcic]}|jd|d|}}g}g}|}|D]} t| trt||| ||\}} } | c)| ddk(rI| d} tj j | d| d| d| } t| t|k7rztd | d|j| |j| tj } t|d kDr!| jj|| } || fSt|d k(r5|d } t| | js| j| |d  } || fS| j |j} || fScc}w)Nrwrvrrrrr)rrvzUnrecognized index kind: rr{rr)r2rArZ_extract_index_levelrrrrirrr| from_arraysrrO)rZrr]r\rfield_name_to_metadata index_arrays index_names result_tablerr index_namerrs r6rMrMs  lAfI&)LKL"' eS !4H|U,BL5R 1L+z" 6]g %vJ%..33E'N49&M9>v9C4EK;3u:-8vHI IK(:&''* B <1 )),k)J   \ a Q%*HHUQH8E   enn-  YsE<c&||d}t||}|jj|}|dk(r|ddfS|j|}|j |} d| _|j |jj|}|| |fS)Nrv)r\) _backwards_compatible_index_namerget_field_indexrm to_pandasrv remove_column) rZrrwrr\ logical_namerrrrs r6rrs)*5f=L1*lKJ $$Z0ABwT4'' ,,q/C--\-:KK--++J7L j 00r5c(||k(r t|ry|S)a1Compute the name of an index column that is compatible with older versions of :mod:`pyarrow`. Parameters ---------- raw_name : str logical_name : str Returns ------- result : str Notes ----- * Part of :func:`~pyarrow.pandas_compat.table_to_blockmanager` N)r)raw_namers r6rrs$<$r)rrNrr{)rWrv)rrrhrr2rZrWoperator methodcallerrrPrSrrrrF to_datetime tz_convertrTas_unitr5rr=Decimalrastyperir|rrv)rrrrlabelsrr levels_dtypes new_levelsencoderr@ numpy_dtyperWrGrs r6rr[sj, BWh - :'F Wgt , 6F !, Nb!  E9  mS-=> |T * ,MJ##Hg6G,9'!(|[*<8 BII IIg&E \ )((q!*-j9;BNN5dN3>>rBE##% b&6&6u&=a&@A Y &NN((e)L'//!*<)LME KK5 [H%<L(LK%9%MM,.88F3K4D4D4A,COC(,9$S)=:@ q s5<<() /A$$/23 oa01uQx( ell1o.  /xx##GBIIf4E#FF e2sIctjj|}|jj djj |}|S)zB Make a datetime64 Series timezone-aware for the given tz r)rrrFdt tz_localizer)seriesrGs r6 make_tz_awarersA   $Bii##E*R 2 Mr5r)rNT)NNT)NFN)?r~collections.abcr concurrentrconcurrent.futures.threadr+rr= itertoolsrrrrrnumpyrP ImportErrorrr pyarrow.librrr rrOrr7rDrTr`rqrrrrrrrrrrrrrr!rrAr6rarirNrPrQrMrrrrrrrLrr4r5r6rsQ& $!!  DD:"#,"$&,b+/\~($N(&?QD;Q| *9  &FIM!a"H &BJ4JN4r!PfB2l?C1&0/ $&%.TUn:Ba& BsCCC