[L i UdZdZddlZddlmZddlmZddlZddlmZm Z ddl m Z ddl m Z mZmZmZmZmZerdd lmZddlZddlZddlZddlZddlZddlZd!d Zd"d d dede d dfdZGddeZd ed dfdZ dZ!ee"d<dZ#ee"d<d#de$d efdZ%d$de$d efdZ&d%de$d efdZ'd&de$d dfdZ(d'de$ded dfdZ)e*d k(r!eejVjYyy)(z=Diagnostic functions, mainly for use when doing tech support.MITN)BytesIO) HTMLParser) BeautifulSoup __version__)builder_registry)AnyIOListOptionalTuple TYPE_CHECKING)_IncomingMarkupdatarreturnc tdtztdtjzgd}|D]F}tj D]}||j vs'|j|td|zHd|vrM|jd ddl m }td d jtt|jzd |vr dd l}td|jzt#|dr|j%}|D]V}td|zd} t'||}d}|r'td|ztj/tdXy #t$rtd YwxYw#t$rtdYwxYw#t($r%td|zt+j,YwxYw)zDiagnostic suite for isolating common problems. :param data: Some markup that needs to be explained. :return: None; diagnostics are printed to standard output. z'Diagnostic running on Beautiful Soup %szPython version %s) html.parserhtml5liblxmlz;I noticed that %s is not installed. Installing it may help.rzlxml-xmlretreezFound lxml version %s.z.lxml is not installed or couldn't be imported.rNzFound html5lib version %sz2html5lib is not installed or couldn't be imported.readz#Trying to parse your markup with %sF)featuresT%s could not parse the markup.z#Here's what %s did with the markup:zP--------------------------------------------------------------------------------)printrsysversionrbuildersrremoveappendrrjoinmapstr LXML_VERSION ImportErrorrhasattrrr Exception traceback print_excprettify) r basic_parsersnamebuilderrrparsersuccesssoups R/mnt/ssd/data/python-lab/Trading/venv/lib/python3.12/site-packages/bs4/diagnose.pydiagnoser3 s  4{ BD  ,.7M'00 Gw'''    & NQUU  Z( D " *SXXc#u?Q?Q6R-SS U]" H  .1E1EE GtVyy{  4v=? " 7DG  86A C 4==? $ x  D B C D H F G H " 3f< >    ! "s6;E(F"F(E?>E?FF+G  G htmlkwargsc 8ddlm}|jdd}t|tr|j d}t|t s t|}|jf||d|D]-\}}t|d|jdd|j/y ) aPrint out the lxml events that occur during parsing. This lets you see how lxml parses a document when no Beautiful Soup code is running. You can use this to determine whether an lxml-specific problem is in Beautiful Soup's lxml tree builders or in lxml itself. :param data: Some markup. :param html: If True, markup will be parsed with lxml's HTML parser. if False, lxml's XML parser will be used. rrrecoverTutf8)r4r7z, z>4N) rrpop isinstancer$encoder r iterparsertagtext)rr4r5rr7readereventelements r2 lxml_tracerBXsjjD)G${{6" dB )%//&WtWWPVWDw w{{GLLACDc eZdZdZdeddfdZ ddedeeeeefde ddfd Z dded e ddfd Z d eddfd Z deddfdZ deddfdZd eddfdZd eddfdZd eddfdZd eddfdZy)AnnouncingParserzSubclass of HTMLParser that announces parse events, without doing anything else. You can use this to get a picture of how html.parser sees a given document. The easiest way to do this is to call `htmlparser_trace`. srNct|y)N)r)selfrFs r2_pzAnnouncingParser._pws  arCr-attrshandle_empty_elementc2|j|d|dy)N z STARTrI)rHr-rJrKs r2handle_starttagz AnnouncingParser.handle_starttagzs 4&%'(rCcheck_already_closedc,|jd|zy)Nz%s ENDrN)rHr-rPs r2 handle_endtagzAnnouncingParser.handle_endtags 4 rCrc,|jd|zy)Nz%s DATArNrHrs r2 handle_datazAnnouncingParser.handle_data  D !rCc,|jd|zy)Nz %s CHARREFrNrHr-s r2handle_charrefzAnnouncingParser.handle_charref  t#$rCc,|jd|zy)Nz %s ENTITYREFrNrXs r2handle_entityrefz!AnnouncingParser.handle_entityrefs %&rCc,|jd|zy)Nz %s COMMENTrNrTs r2handle_commentzAnnouncingParser.handle_commentrZrCc,|jd|zy)Nz%s DECLrNrTs r2 handle_declzAnnouncingParser.handle_declrVrCc,|jd|zy)Nz%s UNKNOWN-DECLrNrTs r2 unknown_declzAnnouncingParser.unknown_decls !D()rCc,|jd|zy)Nz%s PIrNrTs r2 handle_pizAnnouncingParser.handle_pis $rCT)__name__ __module__ __qualname____doc__r$rIr r r boolrOrRrUrYr\r^r`rbrdrCr2rErEosCD&* ))E#x},-.)# )  )!#!T!T!"""%3%4%'S'T'%3%4%"""*** c d rCrEc:t}|j|y)zPrint out the HTMLParser events that occur during parsing. This lets you see how HTMLParser parses a document when no Beautiful Soup code is running. :param data: Some markup. N)rEfeed)rr/s r2htmlparser_tracerns F KKrCaeiou_vowelsbcdfghjklmnpqrstvwxyz _consonantslengthcd}t|D]/}|dzdk(rt}nt}|tj|z }1|S)zzrsentence..s GAE&..A./Gs)+)r"rw)rss r2 rsentencers 88GvG GGrC num_elementsc gd}g}t|D]}tjdd}|dk(r*tj|}|j d|zH|dk(r/|j t tjdd||dk(stj|}|j d|zd d j |zd zS) zDRandomly generate an invalid HTML document. :meta private: )pdivspanrzbscripttablerz<%s>rrvzz z)rwrxrryr!rr")r tag_nameselementsrzrytag_names r2rdocrs BIH < 01% Q;}}Y/H OOFX- . q[ OOIfnnQ&:; < q[}}Y/H OOGh. / 0 dii) )I 55rCctdtzt|}tdt|zdddgddfD]Q}d} t j}t ||t j}d}|s?td |z fzSd d l m }t j}|j|t j}td ||z zd dl }|j}t j}|j|t j}td||z zy#t $r%td |ztjYwxYw)z.Very basic head-to-head performance benchmark.z1Comparative parser benchmark on Beautiful Soup %sz3Generated a large invalid HTML document (%d bytes).rr4rrFTrz"BS4+%s parsed the markup in %.2fs.rrz$Raw lxml parsed the markup in %.2fs.Nz(Raw html5lib parsed the markup in %.2fs.)rrrlentimerr(r)r*rrHTMLrrparse) rr parser_namer0arrrr/s r2benchmark_parsersrs? > LN  D @3t9 LN 0*mL Q  " A $ , AG  7;A:NN P Q A JJt A 1QU ;=  "F A LL A 5Q ?A' " 3kA C    ! "s6E+E.-E.r/ctj}|j}t|}t t ||}t jd|||tj|}|jd|jddy)z7Use Python's profiler on a randomly generated document.)bs4rr/zbs4.BeautifulSoup(data, parser) cumulativez _html5lib|bs42N) tempfileNamedTemporaryFiler-rdictrcProfilerunctxpstatsStats sort_stats print_stats)rr/ filehandlefilenamervarsstatss r2profilersp,,.JH  D Cd6 2D OO5tT8L LL "E \" or*rC__main__)rrrNre))r)i)順)rr)-ri __license__rior html.parserrrrr bs4.builderrtypingr r r r r r bs4._typingrrrxrrr)rr3rjrBrEr$rnrp__annotations__rrintr|rrrrrfstdinrrkrCr2rs7C " *(+   5pD&DdDSDTD.) z) X 3 4 * S*  #  c  HcH#H6s6c6, BC BT BF +# + + +  z SYY^^ rC