gL i ddlmZddlmZddlmZmZddlZddlm Z ddl Z ddl Z ddl m Z ddlZddlZddlmZddlmcmZddlZddlmZmZmZmZmZmZmZm Z m!Z!ddl"m#Z$dd l%m&Z&ejNgd  d Z(d Z)dZ*dZ+dZ,ejNejZdej\dej\dgejZdej\dg dZ/GddZ0y))Iterator)partial)BytesIOStringION)Path)URLError)is_platform_windows) NA DataFrame MultiIndexSeries Timestamp date_rangeread_csv read_html to_datetime)file_path_to_url)zchinese_utf-16.htmlzchinese_utf-32.htmlzchinese_utf-8.htmlzletz_latin1.html)paramsc,|ddd|jS)z6Parametrized fixture for HTML encoding test filenames.iodata html_encoding)param)requestdatapaths _/mnt/ssd/data/python-lab/Trading/venv/lib/python3.12/site-packages/pandas/tests/io/test_html.pyhtml_encoding_filer$s D&/7== AAc6t|t|k(sJdt|dt|d}ttd||}|sJ|t||D]2\}}t j ||g|i||j s-Jdy)Nz*lists are not of equal size len(list1) == z, len(list2) == z$not all list elements are DataFramescFt|txrt|tSN isinstancer )xys rz(assert_framelist_equal..:sAy1NjI6Nrzframes are both empty)lenallmapziptmassert_frame_equalempty)list1list2argskwargsmsg both_framesframe_iframe_js rassert_framelist_equalr61s u:U # U %U  & # 1C N   K ;u-: gw@@@==9"99 :rc  tjd}tjd|j|ddtjtd5t |ddd d d dddy#1swYyxYw) Nbs4html5lib __version__z4.2zPandas requires versionmatchrrhtml spam.htmlflavor)pytest importorskipsetattrraises ImportErrorr) monkeypatchrr8s rtest_bs4_version_failsrGEso   e $C  #]E2 {*C DM(4=eLMMMs A99Bcd}d}d|zdz}tjt|5tt |d|dddy#1swYyxYw)Nz google.comzinvalid flavorz\{z \} is not a valid set of flavorsr;googler<r@)rArD ValueErrorrr)urlr@r2s rtest_invalid_flavorrMNsR C F &.> >C z -@(3-x?@@@s A  Actjdtjdtjd|dddd}t|ddg }t|ddg }t||y) Nr8lxmlr9rrr=valid_markup.htmlr) index_colr@)rArBrr6)rfilenamedfs_lxmldfs_bs4s rtest_same_orderingrUWsg    #ff.ABHQx@HAug>G8W-rr8r9)marksrOc8tt|jS)Nr?)rrr)rs rflavor_read_htmlrXbs 9W]] 33rc eZdZdZej dZej dZdZdZ ejjejjdZ ejjejjdZejjdZd Zd Zd Zd Zd ZdZdZdZdZdZdZdZdZdZdZdZ dZ!dZ"dZ#dZ$ejjejjdZ%ejjejjejjdZ&ejjdZ'ejjd Z(ejjd!Z)ejjd"Z*ejjd#Z+ejjd$Z,ejjd%Z-ejjd&Z.ejjd'Z/d(Z0ej d)Z1ejjejjd*Z2ejjejjd+Z3d,Z4d-Z5d.Z6d/Z7d0Z8d1Z9ejjd2Z:ejjd3Z;d4Zd7Z?d8Z@d9ZAd:ZBd;ZCd<ZDd=ZEd>ZFd?ZGd@ZHejjdAdBdCgdDZJdEZKdFZLdGZMdHZNdIZOdJZPdKZQejjdLZRdMZSdNZTejjdOdBeUdPgdQfdCeUdRgeUdPgfgdSZVejjdTdBdCgdUZWeXjejjdVdWZ[dXZ\dYZ]ejjejjdZZ^d[Z_d\Z`ejjdAgd]d^Zad_Zbd`ZcdaZddbZeyQ)c TestReadHtmlcxd}tjt|5|ddddy#1swYyxYw)NzPassing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.r;a
A B
1 2
3 4
)r+assert_produces_warning FutureWarning)selfrXr2s rtest_literal_html_deprecationz*TestReadHtml.test_literal_html_deprecationms@ >  ' ' S A      s 09c|ddddS)Nrrr=r>r^rs r spam_datazTestReadHtml.spam_datasffk::rc|ddddS)Nrrr= banklist.htmlrarbs r banklist_datazTestReadHtml.banklist_datasffo>>rcttjjdjdt j t djdjjt}|j}|t|ddid d}tj||y) N)abccolumnsz{:.3f}class dataframer)attrsrQ)r nprandom default_rngpdIndexlistr)formatastypefloatto_htmlrr+r,)r^rXdfoutress rtest_to_html_compatz TestReadHtml.test_to_html_compats  %%a(//7e-  S !&&- jjl SM';!71   c2&rc @ttdtjdgdtgddtdtjdgdtgd dgd gd gd gd d}|j d}t j d|5|t||d}ddd|dk(r9tjd}t j|j}nt j|}ttdtjdgdtgddtdtjdgdtgd dtddtgdtgd dtgd |tgd |d} |dk(rHddl}ddlm} t| j"D cic]} | | |j%| | d!c} } t'j(| dy#1swYbxYwcc} w)NrjInt64)dtype)rrhrj?@Float64)rg@r)TFN)TFTabc)rrN)rrrdefghFindexzmode.string_storage dtype_backendrpyarrowTboolean)ArrowExtensionArray) from_pandas)check_column_type)r r rqnanrzrtoption_contextrrArB ArrowDtypestring StringDtyper r pandas.arraysrrmarrayr+r,) r^string_storagerrXr{r|resultpa string_dtypeexpectedrcols rtest_dtype_backendzTestReadHtml.test_dtype_backends QN':IW5S"&&#.i@O9=(($%   jjuj%   4n E U%hsm=QRSTF U I %$$Y/B==5L>>.9LQN':IW5S"&&#.i@O9=T5"-Y?/yAO<@,LA    I % 9  (//,RXXhsmQUX-VWWH fh%HI U U8sH $HHct|d5}|j|j||jd}||jd}dddt y#1swYxYw)Nutf-8encodingcontentFirst Federal Bank of Floridar; Metcalf Bankopen serve_contentreadrLr6)r^ httpserverrfrXrdf1df2s rtest_banklist_urlzTestReadHtml.test_banklist_urlsr-' 2 a  $ $QVVX $ 6"5C #$C  sC(   A A,,A5ct|d5}|j|j||jd}||jd}dddt y#1swYxYw)Nrrr .*Water.*r;Unitr)r^rrcrXrrrs r test_spam_urlzTestReadHtml.test_spam_urlsl)g . A!  $ $QVVX $ 6":>>EC":>>@C A sC(  A ArcP||dddi}||dddi}t||y)Nz .*Florida.*idtabler<rprr6)r^rfrXrrs r test_banklistzTestReadHtml.test_banklists: tWo  g  sC(rc||d}||d}t|||djddk(sJ|djddk(sJy)Nrr;rrrr ProximatesNutrient)r6ilocrmr^rcrXrrs r test_spamzTestReadHtml.test_spamsZy <y7sC(1v{{4 L0001v~~a J...rcF||}|D]}t|trJyr!r")r^rcrXdfsr{s rtest_spam_no_matchzTestReadHtml.test_spam_no_match s+y) -Bb), ,, -rcN||ddi}|D]}t|trJy)Nrr)rpr")r^rfrXrr{s rtest_banklist_no_matchz#TestReadHtml.test_banklist_no_matchs1}T7OD -Bb), ,, -rcd||ddd}|jddk(sJ|jrJy)Nrrhr<headerrr)rmr-)r^rcrXr{s rtest_spam_headerzTestReadHtml.test_spam_headers9 i{1 Ea Hzz!} ,,,88|8rcH||dd}||dd}t||yNrrr<skiprowsrrrs rtest_skiprows_intzTestReadHtml.test_skiprows_int(y aHyCsC(rcl||dtd}||dtd}t||yNrrhrr)ranger6rs rtest_skiprows_rangez TestReadHtml.test_skiprows_range0y eAhOyqJsC(rcP||dddg}||dddg}t||yNrrrhrrrrs rtest_skiprows_listzTestReadHtml.test_skiprows_list$0y q!fMy!QHsC(rcP||dddh}||dddh}t||yrrrs rtest_skiprows_setzTestReadHtml.test_skiprows_set*rrcH||dd}||dd}t||yrrrs rtest_skiprows_slicez TestReadHtml.test_skiprows_slice0rrcl||dtd}||dtd}t||yrslicer6rs rtest_skiprows_slice_shortz&TestReadHtml.test_skiprows_slice_short6rrc r||dtdd}||dtddd}t||y) Nrrhrrrirrrs rtest_skiprows_slice_longz%TestReadHtml.test_skiprows_slice_long<s6y eAqkRyq!RQsC(rc||dtjd}||dtjd}t||yr)rqaranger6rs rtest_skiprows_ndarrayz"TestReadHtml.test_skiprows_ndarrayBs9y biiPQlSy1NsC(rcztjtd5||dddddy#1swYyxYw)Nz%is not a valid type for skipping rowsr;rasdfr)rArD TypeError)r^rcrXs rtest_skiprows_invalidz"TestReadHtml.test_skiprows_invalidHs6 ]]9-T V L YkF K L L Ls 1:cH||dd}||dd}t||yNrrr<rQrrrs r test_indexzTestReadHtml.test_indexLs(y qIy!DsC(rcL||ddd}||ddd}t||yNrrr)r<rrQrrrs rtest_header_and_index_no_typesz+TestReadHtml.test_header_and_index_no_typesQ-y AQRSyqANsC(rcL||ddd}||ddd}t||yrrrs r test_header_and_index_with_typesz-TestReadHtml.test_header_and_index_with_typesVrrcH||dd}||dd}t||yrrrs rtest_infer_typeszTestReadHtml.test_infer_types[s(y qIy!DsC(rc0t|d5}t|j}dddt|d5}t|j}ddd|d}|d}t||y#1swYYxYw#1swY6xYwNzUTF-8rrr;r)rrrr6)r^rcrXrdata1data2rrs rtest_string_iozTestReadHtml.test_string_ioas )g . '!QVVX&E ')g . '!QVVX&E 'uK8uF3sC( ' ' ' 'sBB B  Bct|d5}|j}ddd|td}|t|d}t||y#1swY ?  :4BU    5>cb||dddiddgd}t|jtsJy)NMetcalfrrrr)r<rprr#rmr r^rfrXr{s rtest_multiindex_headerz#TestReadHtml.test_multiindex_headers;  4/1a&  "**j111rcb||dddiddgd}t|jtsJy)Nrrrrr)r<rprQ)r#rr rs rtest_multiindex_indexz"TestReadHtml.test_multiindex_indexs<  4/aQRV  "((J///rc||dddiddgddgd}t|jtsJt|jtsJy)Nrrrrr)r<rprrQ)r#rmr rrs rtest_multiindex_header_indexz)TestReadHtml.test_multiindex_header_indexs[  /q6!f   "**j111"((J///rcd||dddiddgdd}t|jtsJyNrrrrr)r<rprrrrs r&test_multiindex_header_skiprows_tuplesz3TestReadHtml.test_multiindex_header_skiprows_tuplesC  /q6   "**j111rcd||dddiddgdd}t|jtsJyr&rrs rtest_multiindex_header_skiprowsz,TestReadHtml.test_multiindex_header_skiprowsr(rc||dddiddgddgdd}t|jtsJt|jtsJy)Nrrrrr)r<rprrQr)r#rr rmrs r%test_multiindex_header_index_skiprowsz2TestReadHtml.test_multiindex_header_index_skiprowss^  /q6!f    "((J///"**j111rc|}|ttjj|t j t j dddi}t |tsJ|D]}t |trJy)NFloridarrr) rrrrrecompiler#rvr rs rtest_regex_idempotencyz#TestReadHtml.test_regex_idempotencyss RWW__S1 2**RZZ 23/  #t$$$ -Bb), ,, -rc~d}tjt|5||dddddy#1swYyxYw)Nz\(you passed a negative value\)r;Waterrrr)r^rcrXr2s rtest_negative_skiprowsz#TestReadHtml.test_negative_skiprowss:0 ]]:S 1 D Yg C D D Ds 3<cy)Naf

Indices and tables:

rar^s r python_docszTestReadHtml.python_docss- rcp|j|||jd}t|dkDsJy)NrPythonr;r)rrLr')r^r7rrXrs rtest_multiple_matchesz"TestReadHtml.test_multiple_matchess5    5z~~X>3x!||rc|j|||jd}|Dcgc]}|jddd}}t|ddgk(sJycc}w) Nrr9r;rrriPythWhat)rrLrsorted)r^r7rrXrr{zzs rtest_python_docs_tablez#TestReadHtml.test_python_docs_table%sc    5z~~X>+. /RbggdmAa / /bzff----0sAcJd}|t|}t|dk(sJy)z@ Make sure that read_html ignores empty tables. a
A B
1 2
rN)rr')r^rXr=rs rtest_empty_tableszTestReadHtml.test_empty_tables-s+ ("(4.16{arc|tdd}tddgddggddg }tj||y) Na
A B
1 2
3 4
rrrhrjriABrrmrr r+r,r^rXrrs rtest_multiple_tbodyz TestReadHtml.test_multiple_tbodyHsT"   . / 2Aq6Aq6"2S#JG fh/rcx|tdd}tddidg}tj||y)zt Don't fail with bs4 when there is a header and only one column as described in issue #9178 a3
Header
first
rHeaderfirstrrNrGrHs rtest_header_and_one_columnz'TestReadHtml.test_header_and_one_columnhsJ "      "8W"5aSA fh/rc||tdd}tgdggd}tj||y)zK Ensure parser adds within on malformed HTML. a
Country Municipality Year
Ukraine Odessa 1944
r)UkraineOdessa)Country MunicipalityYearrFNrGrHs rtest_thead_without_trz"TestReadHtml.test_thead_without_trsL"   & ' *-.7  fh/rcFd}tddggddg}tddgddggddg}|jd }|jd }|t|d }|t|d }tj||tj||y )zh Make sure that read_html reads tfoot, containing td or th. Ignores empty tfoot a {footer}
A B
bodyA bodyB
bodyAbodyBrDrErFfootAfootB)footerz%footAfootBrN)r rwrr+r,) r^rX data_template expected1 expected2rrresult1result2s rtest_tfoot_readzTestReadHtml.test_tfoot_reads  $Wg$6#7#sL G$w&89C: $$B$/$$,S$T"8E?3A6"8E?3A6 gy1 gy1rc||tddd}tddggd}tj||y)Na 
S I
text 1944
rrtextrR)SIrlrGrHs r&test_parse_header_of_non_string_columnz3TestReadHtml.test_parse_header_of_non_string_columnsO"     ! $vtn-zB fh/rc ddlm  fd}||dddid}t|dd d d ttd  }|j|jk(sJgd}gd}|j |j ||} |j |} | } ddg} | | jt| | <tj| | y)Nr)_remove_whitespacec8 |S#t$r|cYSwxYwr!)AttributeError)r$rks r try_remove_wsz8TestReadHtml.test_banklist_header..try_remove_wss' )!,,!  s  rrrrrrcsvz banklist.csv) Updated Date Closing Date converters) z,First Vietnamese American Bank In Vietnamesez"Westernbank Puerto Rico En Espanolz*R-G Premier Bank of Puerto Rico En EspanolzEurobank En EspanolzSanderson State Bank En EspanolzLWashington Mutual Bank (Including its subsidiary Washington Mutual Bank FSB)zSilver State Bank En Espanolz%AmTrade International Bank En EspanolzHamilton Bank, NA En Espanolz6The Citizens Savings Bank Pioneer Community Bank, Inc.) zFirst Vietnamese American BankzWesternbank Puerto RicozR-G Premier Bank of Puerto RicoEurobankzSanderson State BankzWashington Mutual BankzSilver State BankzAmTrade International BankzHamilton Bank, NAzThe Citizens Savings Bankrqrp) pandas.io.htmlrkrrshaper)replaceapplyrr+r,)r^rfrrXrnr{ ground_trutholdnewdfnewgtnew converted date_colsrks @rtest_banklist_headerz!TestReadHtml.test_banklist_headers5  m9T7O TUV W T65. 9(19M xx<-----    }%--c37  / #^4 (399+F ) i/rcd}t|d5}|j}ddd|vsJ||dddid}||jvsJy#1swY4xYw)Nz Gold Canyonrrrrrr)rr to_string)r^rfrXgcrraw_textr{s rtest_gold_canyonzTestReadHtml.test_gold_canyonst  -' 2 avvxH X~~  tWo  R\\^###  s AAc|tddd}|tddd}tj||y)Na
C_l0_g0 C_l0_g1 C_l0_g2 C_l0_g3 C_l0_g4
R_l0_g0 0.763 0.233 nan nan nan
R_l0_g1 0.244 0.285 0.392 0.137 0.222
rrQa
C_l0_g0 C_l0_g1 C_l0_g2 C_l0_g3 C_l0_g4
R_l0_g0 0.763 0.233
R_l0_g1 0.244 0.285 0.392 0.137 0.222
)rr+r,)r^rXrrs rtest_different_number_of_colsz*TestReadHtml.test_different_number_of_colssh#   @C" D E" H"  := > ? B fh/rc||tdd}tgdggd}tj||y)NaZ
A B C
a b c
rr)rDrECrlrGrHs rtest_colspan_rowspan_1z#TestReadHtml.test_colspan_rowspan_1esD!     " # &o.H fh/rc|tddd}tgdggd}tj||y)Na
X Y Z W
A B C
rre)rDrErEZr)XzX.1YrWrFrGrHs r test_colspan_rowspan_copy_valuesz-TestReadHtml.test_colspan_rowspan_copy_values~sQ"   "% & ' *+,6Q  fh/rc|tddd}tgdggd}tj||y)Na(
A B C
D
rre)rDrErErED)rDrEzB.1zB.2rrFrGrHs rtest_colspan_rowspan_both_not_1z,TestReadHtml.test_colspan_rowspan_both_not_1sQ"     ! $+,6S  fh/rc|tddd}tddggddg}tj||y)Nz
A B
C
rrerrErDrFrGrHs rtest_rowspan_at_end_of_rowz'TestReadHtml.test_rowspan_at_end_of_rowsR"       "C:,c C fh/rc|tddd}tddgddggddg}tj||y)Nz
A B
rrerDrErFrGrHs rtest_rowspan_only_rowsz#TestReadHtml.test_rowspan_only_rowssY"        C:Sz":S#JO fh/rc|tdd}tddgddggddgddgg}tdd gg| }tj||y) Nam
A B
a b
1 2
rrDrErrrlevelscodesrhrFrr r r+r,r^rXrrmrs r+test_header_inferred_from_rows_with_only_thz8TestReadHtml.test_header_inferred_from_rows_with_only_thsn!    & ' *c3Z#s$.AFFH rc4t|jSr!)rtimers rr&z7TestReadHtml.test_parse_dates_combine..rr)rrdatetimerrhrr)r rr r)rrzr+r,)r^rX raw_datesr{r}newdfs rtest_parse_dates_combinez%TestReadHtml.test_parse_dates_combines:j"=> ! &=>! &=>    RZZ\ "aV0DPQ :y12 eSV,rc|dddd}tjj|sJt|dtjj |sJt|d||dd d }|j d k(sJd |j d vsJ|djtjdk(sJtj|jddsJy)Nrrr=wikipedia_states.htmlz is not a filez is an empty fileArizonarrr)< Unnamedrsq mifloat64)rrHzPN$A) rrisfilereprgetsizervrmrrqallcloselocr^rrXrrs rtest_wikipedia_states_tablez(TestReadHtml.test_wikipedia_states_table%sff.EFww~~d#BT |>%BB#wwt$Fd 4E&FF$!$iB1E||x'''FNN2....g$$(;;;;{{6::j19===rc|dddd}||ddd}|jdk(sJd |jd d vsJ|jjd k(sJtj|j d dsJy)Nrrr=rrrr)r rrrrh)Alaska)z Total area[2]rr)rvrmnlevelsrqrrrs r test_wikipedia_states_multiindexz-TestReadHtml.test_wikipedia_states_multiindex/sff.EF!$i1EaH||x'''FNN2.q1111~~%%***{{6::&JKYWWWrc|tdddg}tddggtjddg }t j |d|y) NaK
AB
ab
rrrerr)Unnamed: 0_level_0rD)zUnnamed: 1_level_0rErl)rr r from_tuplesr+r,rHs r%test_parser_error_on_empty_header_rowz2TestReadHtml.test_parser_error_on_empty_header_row7sd!   q6  3ZL**,.IJ  fQi2rc|tddd}tddidg}|djtjdk(sJt j ||y) Na
Header
1100#101
#)decimalrrKgClg0@rMr)rr rrqr+r,rHs rtest_decimal_rowszTestReadHtml.test_decimal_rowsPso!  $' ( ) ,8X"6qcBh%%))<<<< fh/rargTFctjd}tjt|5|||dddy#1swYyxYw)NzPassing a bool to header is invalid. Use header=None for no header or header=int or list-like of ints to specify the row(s) making up the column namesr;re)r/escaperArDr)r^rcrrXr2s rtest_bool_header_argz!TestReadHtml.test_bool_header_argmsFii   ]]9C 0 4 Ys 3 4 4 4s AAc|tddtid}tdddgi}tj||y)Na
a
0.763
0.244
rrrrz0.763z0.244)rrr r+r,rHs rtest_converterszTestReadHtml.test_convertersxsR!  "Sz% & ' *cGW#567 fh/rc|tddgd}tddtjgi}t j ||y)Na
a
0.763
0.244
gZd;?) na_valuesrrg"~j?rr rqrr+r,rHs rtest_na_valueszTestReadHtml.test_na_valuessS!  "g% & ' *cE266?34 fh/rc,d}tdddgi}|t|dd}tj||tdtj tj gi}|t|dd}tj||y) Na
a
N/A
NA
rzN/Ar F)keep_default_narT)r rr+r,rqr)r^rX html_data expected_dfhtml_dfs rtest_keep_default_naz!TestReadHtml.test_keep_default_nas  udm 45 "8I#6NqQ k73rvvrvv&6 78 "8I#6MaP k73rc|tdd}tddgtjtjggddg}t j ||y)Nak
A B
a b
rrrrDrErFrrHs rtest_preserve_empty_rowsz%TestReadHtml.test_preserve_empty_rowss\!    & ' *C:/?"@3PS*U fh/rc|tdd}tddgddggddgddgg}tdd gg| }tj||y) NaU
AB
ab
12
rrDrErrrrrhrFrrs r,test_ignore_empty_rows_when_inferring_headerz9TestReadHtml.test_ignore_empty_rows_when_inferring_headersn!       "c3Z#s$215 k73rc||dddd}||d}t|tsJt|dtsJy)Nrrr=rPrr)r#rvr )r^rrXrRrs rtest_works_on_valid_markupz'TestReadHtml.test_works_on_valid_markupsCD&&2EFx15#t$$$#a&),,,rc4|dddd}||dddgy) Nrrr=rerrOr9rJra)r^rrXrfs rtest_fallback_successz"TestReadHtml.test_fallback_success s# vvG k6:BVWrctdd}ttjj dj d|}|j }d|vsJy)Nz 2000-01-01rrrh)rrir)rr rqrrrsstandard_normalrz)r^rngr{rs rtest_to_html_timestampz#TestReadHtml.test_to_html_timestampsNr2 ryy,,Q/??HPS Tv%%%rc^tdddg}|j}|jd}|jd}|jd}|jd}|jd}d|vsJ||k(sJ||k(sJ||k7sJd |vsJd |vsJd |vsJ||k(sJy) NrrhrDrET)borderrFz border="1"z border="2"z border="0"z border)r rz)r^r{out_border_defaultout_border_trueout_border_explicit_defaultout_border_nondefaultout_border_zeroout_border_falses rtest_to_html_borderlessz$TestReadHtml.test_to_html_borderlesss aa() *ZZ\**D*1&(jjj&:# " ! 4**A*.::U:3 2222"4444!%@@@@!%:::: 5555O333 0000"2222rzdisplayed_only,exp0,exp1fooNzfoo bar baz quxcd}|t||}tj|d||tj|d|yt|dk(sJy)Na
foo bar baz qux
foo
displayed_onlyrr)rr+r,r')r^rexp0exp1rXrrs rtest_displayed_onlyz TestReadHtml.test_displayed_only+sY(x~nM c!fd+    ! !#a&$ /s8q= =rrcd}|t||d}tddgddgd}tj||y) NaW
A B
1 2
4 5
rrrrirhrrrG)r^rrX html_tablerrs r&test_displayed_only_with_many_elementsz3TestReadHtml.test_displayed_only_with_many_elementsPsM "(:"6~V  Aq6A78 fh/rz\ignore:You provided Unicode markup but also provided a value for from_encoding.*:UserWarningctjj|}tjj|d}|j d\}} t |d5}||j |dj}dddt |d5}|t|j |dj} ddd|||dj} tj tj|| y#1swYxYw#1swY[xYw#t$r(trd|vsd|vrtjwxYw)Nr_rb)rrQ1632)rrbasenamesplitextsplitrrpoprr+r, Exceptionr rAskip) r^rrX base_pathrootrrfobj from_stringfrom_file_like from_filenames r test_encodezTestReadHtml.test_encodehsE GG$$%78 ww *1-jjo 8 ($/ 4.IIK(a#%  ($/ 4!1DIIK(8q"#%  -"Xce   ! !+~ >  ! !+} =      "$8#tx'7KKM   s= D6#(D D61D*A D6D'#D6*D3/D661E'c|jjddk(rtjdGddt}|d}||sJtj t d5||dddy#1swYyxYw) Nr@rOzNot applicable for lxmlceZdZdZy)FTestReadHtml.test_parse_failure_unseekable..UnseekableStringIOcyNFrar6s rseekablezOTestReadHtml.test_parse_failure_unseekable..UnseekableStringIO.seekablesrN)__name__ __module__ __qualname__r$rarrUnseekableStringIOr!s rr(z?
spameggs
z#passed a non-rewindable file objectr;)keywordsgetrArrrDrK)r^rXr(bads rtest_parse_failure_unseekablez*TestReadHtml.test_parse_failure_unseekables  $ $ ( ( 2f < KK1 2  ! B   $$$ ]]:-R S " S ! " " "s 0 BB c`Gdd}|d}|d}||sJ||sJy)Nc:eZdZd dZd dZdZdZdZdefdZ y) 9TestReadHtml.test_parse_failure_rewinds..MockFilereturnNc ||_d|_yr#)rat_end)r^rs r__init__zBTestReadHtml.test_parse_failure_rewinds..MockFile.__init__s  # rcH|jrdn |j}d|_|S)Nr\T)r2r)r^sizers rrz>TestReadHtml.test_parse_failure_rewinds..MockFile.reads![[rdii"  rcd|_yr#)r2)r^offsets rseekz>TestReadHtml.test_parse_failure_rewinds..MockFile.seeks # rcy)NTrar6s rr$zBTestReadHtml.test_parse_failure_rewinds..MockFile.seekablesrcyr!rar6s r__next__zBTestReadHtml.test_parse_failure_rewinds..MockFile.__next__src|Sr!rar6s r__iter__zBTestReadHtml.test_parse_failure_rewinds..MockFile.__iter__s  r)r0Nr!) r%r&r'r3rr8r$r;rr=rarrMockFiler/s' $   $   ( rr>z/
spam
eggs
z2
spameggs
ra)r^rXr>goodr+s rtest_parse_failure_rewindsz'TestReadHtml.test_parse_failure_rewindss@  2IJKL%%%$$$rcGddtj}|dddd}|||f}|||f}|j|j|js|jr# |jr|jr#d|jcxur|jusJJy)NceZdZfdZxZS)@TestReadHtml.test_importcheck_thread_safety..ErrorThreadcl t|d|_y#t$r}||_Yd}~yd}~wwxYwr!)superrunerrr)r^rG __class__s rrFzDTestReadHtml.test_importcheck_thread_safety..ErrorThread.runs5$GKM $DH!#"DHH#s 3.3)r%r&r'rF __classcell__)rHs@r ErrorThreadrCs  $ $rrJrrr=rP)targetr0) threadingThreadstartis_aliverG)r^rrXrJrRhelper_thread1helper_thread2s rtest_importcheck_thread_safetyz+TestReadHtml.test_importcheck_thread_safetys  $)** $D&&2EF$,+B+B+D %%'>+B+B+D~))?^-?-??????rc|dddd}t|}||d}||d}tj||y)Nrrr=r>r)rr+r,)r^rrXfile_path_string file_pathrrs rtest_parse_path_objectz#TestReadHtml.test_parse_path_objectsM#D&&+F)* /03y)!, c3'rcx|tdd}tdggdg}tj||y)Nz
A
word1
word2
rz word1 word2rDrFrGrHs rtest_parse_br_as_spacez#TestReadHtml.test_parse_br_as_spacesH!         M?"3cUC fh/r)r(bodyrr]cnd}gdgdgdgdgdgdd}|d }|d }|d }|d k(r|d }|d}|d}n |dk(r|d }n|dk(r|d}n |dk(r|d}|t||d}t||g|} | jtj} t j || y)Na
HTTP FTP Linkless
Wikipedia SURROUNDING Debian TEXT Linkless
Footer Multiple links: Only first captured.
)HTTPFTPLinkless))r[N)r\N)r]z'https://en.wiktionary.org/wiki/linkless) WikipediaSURROUNDING Debian TEXTr]))r^zhttps://en.wikipedia.org/)r_zftp://ftp.us.debian.org/)r]N)Footer$Multiple links: Only first captured.N))r`z)https://en.wikipedia.org/wiki/Page_footer)ra1N) head_ignore head_extract body_ignore body_extract footer_ignorefooter_extractrergrcr(rfrhrdrYr]r extract_linksrrl)rr fillnarqrr+r,) r^rrX gh_13141_datagh_13141_expecteddata_expfoot_exphead_exprrs rtest_extract_linkszTestReadHtml.test_extract_linkss 07 P  % 2%]3$_5$]3 %<(8H()9:H(8H F](8H H_()9:H H_(8H!(="9MaPh18D??266* fh/rcd}tjt|5t|ddddy#1swYyxYw)NzY`extract_links` must be one of {None, "header", "footer", "body", "all"}, got "incorrect"r; incorrectrirArDrKr)r^rcr2s rtest_extract_links_badz#TestReadHtml.test_extract_links_bad:s< I ]]:S 1 < i{ ; < < Google.com r(rir)z Google.comzhttps://google.comrGr^rXrrrs r test_extract_links_all_no_headerz-TestReadHtml.test_extract_links_all_no_headerBsA "(4.FqICDEF fh/rcd}tjt|5tdddddy#1swYyxYw)NzPdtype_backend numpy is invalid, only 'numpy_nullable' and 'pyarrow' are allowed.r;testnumpyrrt)r^r2s rtest_invalid_dtype_backendz'TestReadHtml.test_invalid_dtype_backendQs; % ]]:S 1 5 fG 4 5 5 5rcd}|t|d}tddgddggddg }tj||y) Na
A B
A1 B1
A2 B2
rA1B1A2B2rDrErFrGrws rtest_style_tagzTestReadHtml.test_style_tagYsK &"(4.1!4D$<$">c S fh/r)fr%r&r'r_rAfixturercrfr~rmarknetwork single_cpurrslowrrrrrrrrrrrrrrrrrrrrrr rrrr r"r$r'r*r,r1r4r7r:r@rBrIrNrVrcrirrrrrrrrrrrrrrr parametrizerrrrrrrrrrrr r r tdskip_if_windowsfilterwarningsrr,r@rRrVrXrqrurxr|rrarrrZrZls@ ^^;; ^^?? '4Il [[ [[))  [[ [[)) [[))/- -  ) ) ) ) ) ) ) ) L) ) ) ) ))) [[ [[DD  [[ [[ [[@@  [[-- [[ [[22  [[00  [[ 0 0 [[22 [[22 [[ 2 2 [[ - -D  ^^. . ` [[ [[  [[ [[..  60@040@$2L02 [[-0-0^ [[ $ $F0P02!0F0@0:0*08* ->X320: [[UT5M2434060642040. 4-  [[XX &3& [[" 9eW%t , I345y%7I J !!< [[-e}=0>0.R [[ &  8"( %D [[ [[@@.(0* [[U$GHB0IB0H< 050rrZ)1collections.abcr functoolsrrrrrpathlibrr/rL urllib.errorrr{rqrA pandas.compatr pandas.util._test_decoratorsutil_test_decoratorsrpandasrtr r r r rrrrrpandas._testing_testingr+pandas.io.commonrrrr6rGrMrUr skip_if_norXrZrarrrs$  ! -))   - BB :(M@. U=2==#7z9R"ST V=2==#89  4  4D0D0r