gL iHddlmZddlmZmZddlmZmZmZm Z ddl Z ddl m Z ddl Z ddlmZmZmZmZmZddlZddlZddlmZddlmZmZmZdd lmZdd lm Z dd l!m"Z"m#Z#m$Z$dd l%m&Z&dd l'm(Z(m)Z)ddl*m+Z+m,Z,erddl-m.Z.m/Z/m0Z0ddl1m2Z2m3Z3dZ4Gdde+Z5GddejZ6Gdde5Z7ddZ8ddZ9y)) annotations)abc defaultdict)HashableIteratorMappingSequenceN)StringIO)IO TYPE_CHECKING DefaultDictLiteralcast)lib)EmptyDataError ParserError ParserWarning)cache_readonly)find_stack_level) is_bool_dtype is_integeris_numeric_dtype) is_dict_like) dedup_namesis_potential_multi_index) ParserBaseparser_defaults) ArrayLike ReadCsvBufferScalar)Index MultiIndexucneZdZUded<dfd Zed dZd!dZ d" d#dZ d$dZ d" d%dZ d&d Z ed'd Z d(d Z ed Z d)d Zd*dZd+dZd,dZd*dZd-dZd.dZd/dZd/dZd/dZ d0dZd/dZd1dZ d2dZd3dZd"d4dZd5dZd6dZ xZ!S)7 PythonParserset[int]_no_thousands_columnsc t|d_g_d_d_|d_tj rj _n fd_t|d_ |d_ |d_ tjtrtj_ |d_|d _|d _|d _|d _|d _d_d|vr |d_|d_|d_|d_|d_t|t4rt7t8t|_n$t;|dsJj=|_d_jA\}_!_"jG|jH\_%_$_&}t5jJ_'jPs8jS\}_'_%d_*jH|_$j>,t5tWtYjJ_j[jJ_.j__0tYj0dk7r tcdy)zN Workhorse function for processing nested list into DataFrame Nrskiprowsc |jvSN)r()xselfs e/mnt/ssd/data/python-lab/Trading/venv/lib/python3.12/site-packages/pandas/io/parsers/python_parser.pyz'PythonParser.__init__..Zsa4==&8 skipfooter delimiter quotechar escapechar doublequoteskipinitialspacelineterminatorquotingskip_blank_linesFhas_index_namesverbose thousandsdecimalcommentreadlineTz'Only length-1 decimal markers supported)2super__init__databufposline_posr(callableskipfunc_validate_skipfooter_argr0r1r2 isinstancestrr3r4r5r6r7r8r9r:r;r<r=listrrhasattr _make_reader _col_indices_infer_columnsnum_original_columns unnamed_cols_extract_multi_indexer_columns index_namescolumns col_names orig_names_has_complex_date_col_get_index_name_name_processedrangelen_validate_parse_dates_presence_parse_date_cols_set_no_thousand_columnsr& ValueError)r,fkwdsrT_rS __class__s` r-rAzPythonParser.__init__Js *.  Z( DMM " MMDM8DM24 3EFk*k* dnnc * 0DN|, . $%7 8"#34I $%7 8$  $#'(9#:D I k*I I  a Xc]A.DI1j) ))))!,DI/3    !    %    / /      L   N +/t||*< ));?;O;O;Q 8[$/4<#'D '#.    $ $U3t||+<%= >D  $ C CDLL Q%)%B%B%D" t||  !FG G "r/ctj|j}|jd|d}n(tj|j}d|d|d}tj|S)Nz^[\-\+]?[0-9]*(z [0-9]*)?([0-9]?(E|e)\-?[0-9]+)?$z^[\-\+]?([0-9]+z |[0-9])*()reescaper<r;compile)r,r<regexr;s r-numzPythonParser.numsm))DLL) >> !&wi/OPE $..1I"9+Ywi@,- zz%  r/c j t dk(rjr tdGfddtj }|} |_n[j }j|ggd}jjs|sZxjdz c_ j }j|ggd}jjrW|sZttt|}|d}xjdz c_ xjdz c_ t jj|}|j|_t j t#||}j$j't|t j |d} | S fd } | } | S) Nr?z.MyDialect N) __name__ __module__ __qualname__r1r2r3r4r5r7r6r,sr- MyDialectrlsC NN  NN !__ ".. #'#8#8 ,,!%r/rrr)dialectT)rsstrictc3Kj}tj}|j|j D]#}|j|j %ywr*)r>rergsplitstrip)linepatr`seps r-_readz(PythonParser._make_reader.._readsXzz|jjoii --2D))DJJL112sA0A3)r1r[r6r_csvDialectr>_check_commentsrGrDrrKrJrESniffersniffreaderr rCextend) r,r`rrdiarxlines lines_strsniffedline_rdrrr{rzs `` @r-rMzPythonParser._make_readersnn ;#c(a-"" R &CKK &C # zz|,,tfX6q9mmDHH-UHHMH::>Dyydy##r/c|j|j}|j|j}i}i}t|jt rg|jD]W}|j|}|j |}t|tr||jvr|j|}|||<|||<Yn|j}|j }|j||||j||Sr*) _clean_mapping convertersrrI na_valuesdict na_fvaluesintrV_convert_to_ndarraysr:) r,rB clean_conv clean_dtypesclean_na_valuesclean_na_fvaluescolna_value na_fvalues r-rzPythonParser._convert_dataPs ((9 **4::6  dnnd +~~ 2>>#. OOC0 c3'Ct,F//#.C'/$(1 % 2#nnO# ((    LL     r/c|jy|j}t|tttj frt |dkDSy)NFr?)headerrIrKtuplenpndarrayr[)r,rs r-_have_mi_columnszPythonParser._have_mi_columnsss> ;;  ftUBJJ7 8v;? "r/c H |j}d}d}t}|j|j}|j}t |t t tjfr|rt ||ddzgz}n|g}g}t|D]u\}} |j} |j| kr |j} |j| kr g}g}t| D]N\}}|d k(r3|r d|d|}nd|}|j)||j)|>|j)|P|st-t.}t1t#|Dcgc]}||vr| c}|z}|D]}||}|}||}|dkDr|dkDr$|dz||<|d|}||vr|dz }n||}|dkDr$|j2t5|j2rl|j2j7|Q|j2j7|6|j2j9||j2j7|i|||<|dz||<nj|rh| |dk(r`t#|}|j:}| t#|nd}t#|}||k7r||z |kDs|dk(rd}dg|z}|j<dg|_|j)||j9|Dchc]}|| c}t#|dk(skt#|}x|r|j'| |j}|dn t#|}t#|t#|dkDrt#||kDr t%dt#|dkDr t?d|j@|jC|||n t#|}|jDIt#|t#|jDk7r(tG|jDDcgc]}|| c}g}n|g}n|jC||d|}nt#|jH}|}|s,t t1|g}|jC||d|}n|j@t#||k\r |jC|g||}t#|}nZtK|j@s,t#|t#|j@k7r t%d|g}|jC||d||||fS#t$r} d|jcxkr| krknnh|r| |dk7r^t tt|r|ddn|} ddj!| dt#| d} t%d | d |jd | |rD| dkDr?|r|j'|j)dgt#|dz|||fcYd} ~ cS|js t+d | |jdd} Yd} ~ 'd} ~ wwxYwcc}wcc}w#t$rd}YwxYwcc}w)NrTr?[,z ], len of z, zPassed header=z but only z lines in fileNo columns to parse from filez Unnamed: _level_.FzHNumber of passed names did not match number of header fields in the filez*Cannot pass names with multi-index columns)&rsetrrrIrKrrrr_buffered_linerE _next_linermaprJjoinr[r_ _clear_bufferappendrrrrZrrgetupdaterrC TypeErrorusecols_handle_usecolsrNsorted _header_linerF)r,rrP clear_bufferrQrhave_mi_columnsrTlevelhrrxerrjoimsg this_columnsthis_unnamed_colsrccol_namecountscol_loop_orderrold_col cur_countlcsicic unnamed_count first_linelen_first_data_rowncolss r-rOzPythonParser._infer_columns~s[   +.5 ;; "[[F"33O&4 ";<"!&\VBZ!^,< -/BGm4KPRVWPW+0L,06B;L(, ~DH|,##>O$P\!_$PQw<1$+.|+<(Qh =T""$ &!%!2J+5*wO0O 0 !'$$Wgaj%@,l::y%)4==.B.+rVBZ/?#3s?F3BKPV#WX !#((3- 3s8*BG(,SE' nF ##'26' ..0vGBK0@'@A&(S? X X X 5 X? XB>W;X'W;;X XXc|jy |j}|S#t$r2}|js t d||jdd}Yd}~|Sd}~wwxYw)Nr)rrrrr)r,rxrs r-rzPythonParser._header_line7sd ;; " !&&(D   !::$%DE3N::a=D  !s! A'AAc |j>t|jr|j|j|}ntd|jDrpt |dkDr t dg}|jD]E}t |tr" |j|j|5|j|Gn<|jDcgc] }||k\s | }}|rtd|d|j}|D cgc]%}t|D cgc] \}} ||vs | c} }'}}}} t||_|S#t $r|j|j|YwxYwcc}wcc} }wcc} }}w)zb Sets self._col_indices usecols_key is used if there are string usecols. c3<K|]}t|tywr*)rIrJ).0us r- z/PythonParser._handle_usecols..Us>AZ3'>sr?z4If using multiple headers, usecols must be integers.z>>w@#ll & )0Etq!A4DEG!'{ 3D ) *T 88{ST #Fs< E E9%E9F! E>.E>2F%E65E6>Fcpt|jdkDr|jdS|jS)zH Return a line from buffer, filling buffer if required. r)r[rCrrqs r-rzPythonParser._buffered_linevs/ txx=1 88A; ??$ $r/cR|s|St|dts|S|ds|S|dd}|tk7r|S|d}t|dkDrR|d|jk(r@d}|d}|ddj |dz}|||}t||dzkDr|||dzdz }n|dd}|g}||ddzS)a- Checks whether the file begins with the BOM character. If it does, remove it. In addition, if there is quoting in the field subsequent to the BOM, remove it as well because it technically takes place at the beginning of the name, not the middle of it. rr?N)rIrJ_BOMr[r2r) r, first_row first_elt first_row_bomstartquoteendnew_row new_row_lists r-_check_for_bomzPythonParser._check_for_boms  )A,, | aLO   !!  }  !mA&6$..&HE!!$E#))%014C$E#.G=!C!G+=q33$AB'G&-Y im++r/c0| xstd|DS)z Check if a line is empty or not. Parameters ---------- line : str, array-like The line of data to check. Returns ------- boolean : Whether or not the line is empty. c3"K|]}|  ywr*)rr+s r-rz.PythonParser._is_line_empty..s31u3s )all)r,rxs r-_is_line_emptyzPythonParser._is_line_emptysx333d333r/ct|jtr|j|jrT|jt |jk\rn1|xjdz c_|j|jrT |j |j|jgd}|xjdz c_|js/|j|j|jdz s|rn|jr|j|g}|r|d}n|j|jrT|xjdz c_|jJt|j|j|jrT |j|jdz}|xjdz c_|O|j |gd}|jr|j|g}|r|d}n|j|s|rn|jdk(r|j|}|xjdz c_|j j#||S#t$rtwxYw)Nr?rrow_num)rIrBrKrGrDr[r~r8r_remove_empty_lines IndexErrorrnext_next_iter_liner rErCr)r,rxret orig_lines r-rzPythonParser._next_lines dii &--)88s499~-A --) (//4881D0EFqIDHHMH00++DIIdhhl,CD,,"66v>#&q6D!"--)A yy,,,TYY --)  00A0F A (// #&q6D!,,Y74& 88q=&&t,D    C"(''(s A:I8 %I88Jc|j|jjk(r t||j|jjk(r,t j d|d|dttyy)a  Alert a user about a malformed row, depending on value of `self.on_bad_lines` enum. If `self.on_bad_lines` is ERROR, the alert will be `ParserError`. If `self.on_bad_lines` is WARN, the alert will be printed out. Parameters ---------- msg: str The error message to display. row_num: int The row number where the parsing error occurred. Because this row number is displayed, we 1-index, even though we 0-index internally. zSkipping line z: rm) stacklevelN) on_bad_linesBadLineHandleMethodERRORrWARNwarningswarnrr)r,rrs r-_alert_malformedzPythonParser._alert_malformedso"    8 8 > > >c" "    8 8 = = = MM  C53+-  >r/c |jJt|j}t|tsJ|S#tj $r}|j |jj|jjfvr@t|}d|vsd|vrd}|jdkDr d}|d|zz }|j||Yd}~yd}~wwxYw)a[ Wrapper around iterating through `self.data` (CSV source). When a CSV error is raised, we check for specific error messages that allow us to customize the error message displayed to the user. Parameters ---------- row_num: int The row number of the line being parsed. Nz NULL bytezline contains NULzNULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' insteadrzError could possibly be due to parsing errors in the skipped footer rows (the skipfooter keyword is only applied after Python's csv library has parsed all rows).. ) rBrrIrKr|ErrorrrrrrJr0r")r,rrxerreasons r-rzPythonParser._next_iter_lines! 99( (( ?DdD) ))Kyy   ((..((--%!f#%)<)C???Q&%4&=(C%%c735 s69C A8C  Cc`|j|Sg}|D]}g}|D]~}t|tr|j|vs||jvr|j |A|d|j |j}t |dkDr|j |n|j ||Sr)r=rIrJrrfindr[)r,rrrxrlr+s r-r~zPythonParser._check_commentsEs << L DB "1c*||1,DNN*IIaL0AFF4<<01A1vz !   JJrN  r/c|Dcgc]F}t|dkDs4t|dk(r(t|dtr|djr|H}}|Scc}w)a Iterate through the lines and remove any that are either empty or contain only one whitespace value Parameters ---------- lines : list of list of Scalars The array of lines that we are to filter. Returns ------- filtered_lines : list of list of Scalars The same array of lines with the "empty" ones removed. r?r)r[rIrJrw)r,rrxrs r-rz PythonParser._remove_empty_linesZs]$ D A t9>#DGS1T!W]]_     sA AcZ|j|S|j||jdS)Nrrsearchreplace)r;_search_replace_num_columnsr,rs r-_check_thousandszPythonParser._check_thousandsus4 >> !L//0  r/c`g}|D]}g}t|D]\}}t|tr;||vs7||jvs)|jj |j s|j|c|j|j|||j||Sr*) rrIrJr&rir.rwrr/) r,rr.r/rrxr*rr+s r-r0z(PythonParser._search_replace_num_columns}s DB!$ :1"1c*QD66688??17795IIaLIIaii89 : JJrN  r/cn|jtdk(r|S|j||jdS)Nr<rr-)r<rr0r1s r-_check_decimalzPythonParser._check_decimals; <SY1J1J%JJ%)%D *:%;DN#xx|DH%d^-q!,-"&gJ03G D-W44  "#'D ~~%!%e,?&@!AJ/3.E.E/ +ZDN:w..i!    I s#FF( F%$F%( F76F7c|j}|jr|t|jz }t d|D}||kDr|jdur|j |j r |j nd}g}t|}t|}g}|D]\}} t| } | |kDrt|jr&|j| } | ?|j| Q|j|jj|jjfvs|j||z |zz } |j| | f|j|jjk(sn|j| |D]s\} } d|d| dzd| } |jr?t|jdkDr'|j t"j$k7r d}| d |zz } |j'| | dzut)t+j,|| j.}|j r|j0J|j0}|jrQt|Dcgc]9\}}|t|jks|t|jz |vr|;}}}|St|Dcgc] \}}||vs |}}}|Scc}}wcc}}w) Nc32K|]}t|ywr*)r[)rrows r-rz-PythonParser._rows_to_cols..s23c#h2sFrz Expected z fields in line r?z, saw zXError could possibly be due to quotes being ignored when a multi-char delimiter is used.r$) min_width)rPrr[rmaxrr0rrFrrrrrrDr1r7r| QUOTE_NONEr"rKrto_object_arrayTrN)r,rcol_lenmax_lenfooters bad_lines iter_content content_lenr_content actual_lennew_lrrr'zipped_contentras r-rzPythonParser._rows_to_colss++    s4>>* *G2'22 g e+ $)-dooaGI$W-Lg,KG+ - 8 ] ' 1 12 $ 1 1( ; ,#NN51**00660055/#'((kAo.G"H!((':)>?,,0H0H0N0NN!NN8,% -((1 8#y(81 V!l$ NNDNN+a/ 6G4&=(C%%c7Q;7# 8(c11'WMOOP <<$$0 00++K##!*. 9"1C//s4>>22kA ""#,N";"!QqK?OA""""s(>K9 KKc|j}d}|Ut|j|k\r%|jd||j|dc}|_n|t|jz}|t|jtr|j t|jkDrt |/|j|j d}t|j}n5|j|j |j |z}|j |z}|j|}|j|||_ng} |d}d}|j |j nd}||krX|jJt|j}|j||zs|dz }|dz }|j|||krXt|} |j|}|j|n>d} |j|j |zdz} |dz }| |j| ; |xj | z c_g|_n|}|jr|d|j }|j|}|jr|j!|}|j#|}|j%|S#t $r?t|} |j|}|j|t|dk(rYwxYw)Nrr?r)rCr[rIrBrKrDr_remove_skipped_rowsrrrGrrr0r~r8rr2r5) r,rrnew_rowsnew_pos row_indexrow_ctrr  len_new_rowsnext_rows r-rzPythonParser._get_lines?s  488}$%)XXet_dhhtuo"$(DHH %  $))T*88c$))n,''<#yy4H!$))nG#yyDHHtODH"hhoG44X> X&""'$% !"-1XX-Aq$tm$(99#88#8&*499oG#'==)1C#D &! %NI$OOG4%tm(+8} #'#<# 03!$**d3B"&4040 40l! B6"&$$ $! +!  &! Fw; Bw;r  .*.)." . # .`%3,j 46p4.`*6 '14?B & L/ IL/\TlO*b$r/r$c`eZdZdZ d ddZd d dZ d d dZd dZy)FixedWidthReaderz( A reader of fixed-width lines. Nc J||_d|_|rd|znd|_||_|dk(r|j |||_n||_t |j ttfs!tdt|j|j D]}t |ttfrit|dk(r[t |dttjtdfr.t |dttjtdfrtd y) Nz z infer) infer_nrowsr(z;column specifications must be a list or tuple, input was a rrr?zEEach column specification must be 2 element tuple or list of integers)r`bufferr1r=detect_colspecscolspecsrIrrKrtypernr[rrinteger)r,r`rtr1r=r(rqcolspecs r-rAzFixedWidthReader.__init__s '+ /8)+i w  00'(1DM%DM$--%7#H~6679  }} G7UDM2LA%wqzCT$Z+HIwqzCT$Z+HI: r/c| t}g}g}t|jD];\}}||vr|j||j|t ||k\s;nt ||_|S)a Read rows from self.f, skipping as specified. We distinguish buffer_rows (the first <= infer_nrows lines) from the rows returned to detect_colspecs because it's simpler to leave the other locations with skiprows logic alone than to modify them to deal with the fact we skipped some rows here as well. Parameters ---------- infer_nrows : int Number of rows to read from self.f, not counting rows that are skipped. skiprows: set, optional Indices of rows to skip. Returns ------- detect_rows : list of str A list containing the rows to read. )rrr`rr[iterrr)r,rqr( buffer_rows detect_rowsrr@s r-get_rowszFixedWidthReader.get_rowss2  uH  ' FAs ""3'   s #;;.   ;' r/c dj|jDcgc]}d| c}}tjd|d}|j ||}|s t dt tt|}tj|dzt}|j+|D cgc] } | j|jd"}} |D]9} |j| D]#} d|| j| j!%;tj"|d} d| d<tj$|| z dk(d} t't)| ddd | ddd } | Scc}wcc} w) Nr\z([^z]+)z(No rows from which to infer column widthr?)rrr)rr1rergr|rrBrr[rzerosrr= partitionfinditerrrrollwhererKr])r,rqr(r+ delimiterspatternrrGmaskr@mshiftededges edge_pairss r-rsz FixedWidthReader.detect_colspecss]WW?11#h?@ **s:,c23}}[(3 !KL Lc#tn%xx! 3/ << #>BCsCMM$,,/2CDC .C%%c* .,-QWWY) . .''$" $.Q./2#eCaCj%1+67 !@Ds E-,%E2cJ|j t|j}nt|j}|jDcgc]#\}}|||j |j %c}}S#t$rd|_t|j}YbwxYwcc}}wr*)rrrrr`rtrwr1)r,rxfrom_tos r-__next__zFixedWidthReader.__next__s ;; " $DKK( .Gs(E1z!S))6QWWY6Es-/)r)r,rrxs r-rz)FixedWidthFieldParser._remove_empty_lines=s2 EEE    s#N)r`zReadCsvBuffer[str]r`ra)r`rbr`rnri)rnrorprrArMrrr/r-rr's /    r/rc&td|DS)Nc30K|]}|dk(s| dyw)rNr?r)rvs r-rz#count_empty_vals..Ls7Q!r'QYq7s )sum)valss r-rrKs 7$7 77r/cRt|s td|dkr td|S)a Validate the 'skipfooter' parameter. Checks whether 'skipfooter' is a non-negative integer. Raises a ValueError if that is not the case. Parameters ---------- skipfooter : non-negative integer The number of rows to skip at the end of the file. Returns ------- validated_skipfooter : non-negative integer The original input if the validation succeeds. Raises ------ ValueError : 'skipfooter' was not a non-negative integer. zskipfooter must be an integerrzskipfooter cannot be negative)rr_)r0s r-rHrHOs3* j !899A~899 r/)r`r)r0rr`r): __future__r collectionsrrcollections.abcrrrr r|ior retypingr r r rrr numpyr pandas._libsr pandas.errorsrrrpandas.util._decoratorsrpandas.util._exceptionsrpandas.core.dtypes.commonrrrpandas.core.dtypes.inferencerpandas.io.commonrrpandas.io.parsers.base_parserrrpandas._typingrrr pandasr!r"rr$rnrrrHrr/r-rs"    34 6  g$:g$T#sWs||sWl! L! H8r/