gL i_g ddlmZddlZddlZddlZddlmZddlmZm Z m Z m Z m Z dZdZdZdZd Zd Zd Zd Zd ZdZdZdZdZej6j9dddgdZdZdZdZ dZ!ej6j9ddddgfdddgfgdZ"dZ#d Z$ej6j9d!gd"fgd#fgd$fgd%fgd&d"fgd&d%fgd&d$fgd&d%fgd'Z%d(Z&d)Z'd*Z(d+Z)d,Z*d-Z+y).)datetimeN) ArrowDtype) DataFrameIndex MultiIndexSeries_testingctdtjdg|}tjt d5|j jdddddy#1swYyxYw)NfooBAD__barBADfoodtypezexpand must be True or Falsematch.*(BAD[_]+).*(BAD)expand)rnpnanpytestraises ValueErrorstrextract)any_string_dtypevaluess g/mnt/ssd/data/python-lab/Trading/venv/lib/python3.12/site-packages/pandas/tests/strings/test_extract.py+test_extract_expand_kwarg_wrong_type_raisesrsW %rvvu5=M NF z)G H> /=>>>s A!!A*cbtdtjdg|}tdtjtjg|}|jj d}t j|||jj dd}t j||tddgtjtjgtjtjgg|}|jj d d }t j||y) Nr r r BAD__z .*(BAD[_]+).*TrBADrF)rrrrrrtmassert_frame_equal)rsexpectedresults rtest_extract_expand_kwargr's "&&%08HIA'2662662:JKH UU]]? +F&(+ UU]]?4] 8F&(+ 5 BFFBFF+bffbff-=>FVHUU]]/] >F&(+c tdtjddtjddddg }|j j dd }tjtjg}td d g|d d g||||||g t }tj|||j j d d }td tjd tjtjtjdtjtjg t }tj||y)NaBAD_BAD BAD_b_BADTr @rFrBAD_r!r z.*(BAD[_]+).*BAD) rrrrtodayrrrobjectr"r#assert_series_equal)serr&err%s r&test_extract_expand_False_mixed_objectr4*s  RVV[$0@%qRUV C WW__1%_ @F &&"&& B %"vuor2r2r2FfH&(+WW__/_ >F rvvrvvNH68,r(ctgd}d}tjt|5|jj dddddy#1swYyxYw)N)A1A2A3A4B5z,only one regex group is supported with Indexr ([AB])([123])Fr)rrrrrr)idxmsgs r test_extract_expand_index_raisesr>@sK . /C 8C z -7 6777s AAcT|gd|}d}tjt|5|jj dddddtjt|5|jj dddddy#1swYKxYw#1swYyxYw) Nr6B2C3r "pattern contains no capture groupsr [ABC][123]Fr (?:[AB]).*rrrrrindex_or_seriesrs_or_idxr=s r,test_extract_expand_no_capture_groups_raisesrJJs19IJH .C z -9 \%89 z -9 \%899 9999B+BBB'c|ddg|}|jjdd}|ddgd| }|tk(rtj||ytj ||y) Nr6r7r (?PA)\dFrAunonamer)rrrr"r1assert_index_equalrHrrIr&r%s r(test_extract_expand_single_capture_grouprTWsgd|3CDH \\ ! !/% ! @FSz=MNH&  vx0 fh/r(c tgd|}|jjdd}ttjtjtjg|}t j |||jjdd}ttjtjgtjtjgtjtjgg|}t j|||jjdd}tdd tjg|}t j |||jjd d}tdd gd d gtjtjgg|}t j|||jjd d}tdd tjgd|}t j |||jjdd}tdd gd d gtjtjggddg|}t j|||jjdd}tdd gd d gtjtjggddg|}t j|||jjdd}tdd tjg|}t j ||tgd|}|jjdd}tdd gd d gtjtjgg|}t j||tgd|}|jjdd}tdd gd d gtjdggddg|}t j||tgd|}|jjdd}tdd gd d gdtjggddg|}t j||y)Nr@r (_)Fr(_)(_) ([AB])[123]rNBr;12(?P[AB])letterrP!(?P[AB])(?P[123])numbercolumnsr([AB])(?P[123])r([AB])(?:[123])A11B22C33([AB])([123])(?:[123])r6rA3"(?P[AB])?(?P[123])rjr6rAC#(?P[ABC])(?P[123])?rm) rrrrrr"r1rr#rr$r&r%s r"test_extract_expand_capture_groupsrpcs!)9:A UU]]5] /Frvvrvvrvv.6FGH68,UU]]8E] 2F &&"&& BFFBFF+bffbff-=>FVH&(+UU]]=] 7FsC(0@AH68,UU]]?5] 9F sc3Z"&&"&&!12:JH&(+UU]]-e] u] MF sc3Z"&&"&&!128$H &(+UU]]4U] CF sc3Z"&&"&&!12H H &(+UU]],U] ;FsC(0@AH68, $,<=A UU]]3E] BF sc3Z"&&"&&!12:JH&(+  (89A UU]]?] NF sc3Z"&&#/8$H &(+  (89A UU]]@] OF sc3Z#rvv/8$H &(+r(cdgd}t|dk(rtjdt|t|kr)|jd}t|t|kr)|dt|}t |||}|j j dd}t d d tjg||}tj|||j j d d}td d gd d gdtjggddg||}tj||y)NrlrzTest requires len(index) > 0indexr(\d)FrrZr[(?P\D)(?P\d)?rNrYrmr]r_rartr) lenrskiprepeatrrrrrr"r1rr#)rtrdatar2r&r%s r(test_extract_expand_capture_groups_indexr|s  D 5zQ 23 e*s4y  Q e*s4y  +CI E U*: ;C WW__WU_ 3FsC(=MNH68, WW__=e_ LF sc3Z#rvv/8$ H &(+r(ctgdd|}|jjdd}tgdd|}tj||y) Na3b3c2bobrPz(?P[a-z])Frabcsue)rrrr"r1ros r,test_extract_single_series_name_is_preservedrsH!5EFA UU]],U] ;FoE9IJH68,r(cBtdtjdg|}|jj dd}t ddgtjtjgtjtjgg|}t j||y) Nr r r rTrr r!)rrrrrrr"r#ros rtest_extract_expand_Truersz "&&%08HIA UU]]/] =F 5 BFFBFF+bffbff-=>FVH&(+r(c Ntjtjg}tdtjddtjddddg }|j j dd}td d g|d d g||||||g t }tj||y) Nr*r+Tr r,r-rrr.r!r ) rrrrr/rrrr0r"r#)r3mixedr&r%s r%test_extract_expand_True_mixed_objectrs &&"&& B   FF   NN      EYY  3D  AF %"vuor2r2r2FfH&(+r(cT|gd|}d}tjt|5|jj dddddtjt|5|jj dddddy#1swYKxYw#1swYyxYw) Nr@r rCrrDTrrErFrGs r4test_extract_expand_True_single_capture_group_raisesrs 19IJH .C z -8 \$78 z -8 \$788 8888rKc|ddg|}|jjdd}tdddgi|}tj||y) Nr6r7r rMTrrOrN)rrrr"r#rSs r-test_extract_expand_True_single_capture_groupr sRd|3CDH \\ ! !/$ ! ?F%#s,4DEH&(+r(rQ series_namectgd||}|jjdd}ttj tj tj g|}t j|||jjdd}ttj tj gtj tj gtj tj gg|}t j|||jjdd}td d tj g|}t j|||jjd d}td d gd d gtj tj gg|}t j|||jjdd}tdd d tj gi|}t j|||jjdd}td d gd d gtj tj ggddg|}t j|||jjdd}td d gd d gtj tj ggddg|}t j|||jjdd}td d tj g|}t j||y)Nr@rPrVTrr rWrXrNrYr;rZr[r\r]r^r_r`rbrrcrrrrrrr"r#)rQrr$r&r%s rtest_extract_seriesrs !4DEAUU]]5] .F"&&"&&"&&19IJH&(+UU]]8D] 1F &&"&& BFFBFF+bffbff-=>FVH&(+UU]]=] 6F#sBFF+3CDH&(+UU]]?4] 8F sc3Z"&&"&&!12:JH&(+UU]]-d] ;F(S#rvv$67?OPH&(+UU]]>t] LF sc3Z"&&"&&!128$H &(+UU]]4T] BF sc3Z"&&"&&!12H H &(+UU]],T] :F#sBFF+3CDH&(+r(ctgd|}|jjdd}tddgdd gtj tj gg|}t j||tgd |}|jjd d}tddgdd gtj d ggd dg|}t j||tgd|}|jjdd}tddgdd gdtj ggd dg|}t j||y)Nrdr rhTrrNrZrYr[rirkrjr]r_r`rlrnrmrros rtest_extract_optional_groupsrKs5$,<=A UU]]3D] AF sc3Z"&&"&&!12:JH&(+  (89A UU]]?] MF sc3Z"&&#/8$H &(+  (89A UU]]@] NF sc3Z#rvv/8$H &(+r(cgd}t|t|kr"tjdt|d|dt|}t|||}|jj dd}t dd tjg||}tj|||jj d d}t d dgd d gd tjggddg||}tj||y)NrlzIndex needs more than z valuesrsruTrrZr[rvrNrYrmr]r_rw) rxrryrrrrrrr"r#)rtrr{r$r&r%s r+test_extract_dataframe_capture_groups_indexris D 5zCI ,SYKw?@ +CI Et5(89A UU]]74] 0F#sBFF+5@PQH&(+ UU]];D] IF sc3Z#rvv/8$ H &(+r(ctgdd|}|jjdd}tdgdi| }t j ||y) Nr~rrP(?P[a-z])Trr]rr )rrrrr"r#ros r'test_extract_single_group_returns_framersM ! =MNA UU]]/] =F(O4[a-z0-9]+) @ (?P[a-z]+) \. (?P[a-z]{2,4}) )userdomaintldr )rrr,rrrr)r)rr,)r)rr,)rrrNrnames)flags))singleDave)rToby)rMaude)multiple robAndSteve)rabcdef)nonemissing)remptyrs))rrr)rrr)rrr)rrr)rrr,)rrr)rrr,)rrrr)NNr)matches description)rrr)rrrr from_tuplesrr extractallreVERBOSEr"r#rtr) rr{expected_tuplespatexpected_columnsr$expected_indexr%r&mis rtest_extractallrs#1/   D O C1t+,A ++HN)9AQHUU  c  4F&(+     B t2%56A++ $ N)9AQHUU  c  4F&(+ t2%56A.AGGM>N)9AQHUU  c  4F&(+r(zpat,expected_namesrkr]r_z([AB])?(?P[123])ctgd|}|jj|}tdtj dftj dfgt jgdd|| }tj||y) N)rr632r )rNrZrjr[)rrrrr,rr)rtrar) rrrrrrrrr"r#)rexpected_namesrr$r&r%s rtest_extractall_column_namesrss '78A UU  c "F bffc]RVVSM2$$%=_U H &(+r(c^tgdd|}tjgdd}|jj d}t dgd i|| }t j|||jj d }t gd || }t j||y) Nrrd4c2rrPrrrrrrrr])rrrrrs([a-z]))rrrrrrr"r#)rr$rr&r%s rtest_extractall_single_grouprs#-?OPA++(N UU  2 3F '(FVH&(+UU  j )FN:JH&(+r(ctgdd|}|jjd}tgdt j gdd| }t j||y) N)ab3abc3d4cd2rrPz([a-z]+))ababcrcdrrrrs)rrrrrrr"r#ros r,test_extractall_single_group_with_quantifierr s_ 'mCSTA UU  k *F $$ ,O  H&(+r(z data, names)N)i1)Ni2)rrrc t| t|dk(rtt |d}n+ fdt D}tj||}t |d||}tjg|dz}|j jd }tdg|| }tj|||j jd }tddg|| }tj|||j jd }td g|| }tj|||j jd}td dg|| }tj|||j jd}tddg|| }tj||y)Nr,rrQc3BK|]}t|gdz zyw)r,N)tuple).0ins r z-test_extractall_no_matches..0s 91%q1u &9srrrQrtrrz(z)rwz(z)(z)z (?Pz)firstz(?Pz)(?Pz)secondz(z)(?Pz)) rxrrangerrrrrrr"r#) r{rrrttuplesr$rr&r%rs @rtest_extractall_no_matchesrs D A 5zQeAhU1X.9a9&&vU;t-uAQH&(+UU  0 1FH ^;KH&(+r(ctgdd|}|jjd}tdgdit j gdddg | }t j|||d k(r]tgdt tgddtfD]3}|jjd}t j||5tgdd tgdd|}|jjd}tdgdit j gdddg | }t j||y)N)a1a2b1c1xxxrPz[ab](?P\d)digit)rZr[rZ)r)rr,rrrrsr0r s_name)XXyyzzidx_namerr))rr)rr,)rr) rrrrrrr"r#rr0)rr$r&r%r<s rtest_extractall_stringindexrUs5#%7GHA UU  2 3F /"$$%=dG_UH &(+8# &f 5 &U& A  4CWW''(<=F  ! !&( 3  4  &Z8  A UU  2 3F /"$$ -j'5J  H&(+r(ctgdd|}tjtd5|jj ddddy#1swYyxYw)NrrrPzno capture groupsrz[a-z])rrrrrr)rr$s r(test_extractall_no_capture_groups_raisesrzsJ #-?OPA z)< =# "###s AAc\tgdgdd}|jjjdd}t gd}t j |||jjjd d}gd }t |d d g }t j ||y)Nr)r8B3D4r)rtrQz([A-Z])Tr)rNrYDz!(?P[A-Z])(?P[0-9])))rNrj)rYrj)r4r]r)ra)rrtrrrr"r#)r$rre_lists r!test_extract_index_one_two_groupsrs#+=MRA  Jt4A/"A!Q  @NA 1F&8W"56A!Qr(c<tgdd|}d}|jj|d}|jj|}|j dd }t j ||d }|jj|d}|jj|}|j dd }t j ||d }|jj|d} |jj|}|j dd }t j | |d } |jj| d} |jj| }|j dd }t j | |y) Nr~rrP([a-z])([0-9])Trrrlevel!(?P[a-z])(?P[0-9])(?P[a-z])r)rrrrxsr"r#) rr$pattern_two_nonameextract_two_nonamehas_multi_indexno_multi_indexpattern_two_namedextract_two_namedpattern_one_namedextract_one_namedpattern_one_nonameextract_one_nonames rtest_extractall_same_as_extractrsv! =MNA*'9$Gee&&'9:O$'''9N,n=< &7 Eee&&'89O$'''9N+^<0 &7 Eee&&'89O$'''9N+^<#'9$Gee&&'9:O$'''9N,n=r(cptjgdd}tgd|d|}d}|jj |d }|jj |}|j d d }tj||d }|jj |d }|jj |}|j d d }tj||d} |jj | d } |jj | }|j d d }tj| |d} |jj | d } |jj | }|j d d }tj| |y)N))rNr)rYr)rmthird)capitalordinalrr~r)rtrQrrTrrrrr r r) rrrrrrr r"r#) rrr$r r has_match_indexno_match_indexrrrrrrs r-test_extractall_same_as_extract_subject_indexrs   9$ B !-GWXA*'9$Gee&&'9:O$'''9N,n=< &7 Eee&&'89O$'''9N+^<0 &7 Eee&&'89O$'''9N+^<#'9$Gee&&'9:O$'''9N,n=r(ctjd}tddgt|j j j d}|jddk(sJy)Npyarrowrrr z(ab)rzstring[pyarrow])r importorskiprrstringrrdtypes)par&s rtest_extractall_preserves_dtyper$sY   Y 'B UDMBIIK)@ A E E P PQW XF == 0 00 0r(),rrnumpyrrpandas.core.dtypes.dtypesrpandasrrrrr r"rr'r4r>rJrTrpr|rrrrrmark parametrizerrrrrrrrrrrrrrr$r(rr+s|  0>,"-,7 9 0P,f,6-,,. 8,$ !676,86,r,<,4,R,j .(/CD #a]3  , ,,* ,  W W \ \ w' |, |, |,  (, (,V",J#  >8>B1r(