[L iE2dZddlmZdZdgZddlmZddlmZm Z m Z m Z m Z m Z mZmZmZmZmZddlmZmZmZmZmZmZddlmZmZdd lmZmZm Z m!Z!dd l"m#Z#erdd l$m%Z%dd lm&Z&dd l'm(Z(m)Z)m*Z*dZ+e e e,e,fe,e,gdfZ-GddeeZ.Gdde Z/y)zCUse the HTMLParser library to parse HTML files that aren't too bad.) annotationsMITHTMLParserTreeBuilder) HTMLParser) AnyCallablecastDictIterableListOptional TYPE_CHECKINGTupleTypeUnion) AttributeDictCDataComment DeclarationDoctypeProcessingInstruction)EntitySubstitution UnicodeDammit)DetectsXMLParsedAsHTMLHTMLHTMLTreeBuilderSTRICTParserRejectedMarkup) BeautifulSoup)NavigableString) _Encoding _Encodings _RawMarkupz html.parserNceZdZUdZded<dZded< ed dd Zd ed <d ed <ded<ddZ ddZ d ddZ dddZ d dZ d!dZ d!dZ d dZd"dZd dZd dZy)#BeautifulSoupHTMLParserreplacestrREPLACEignoreIGNOREon_duplicate_attributesoupr r-&Union[str, _DuplicateAttributeHandler]c||_||_|jj|_t j |g|i|g|_|jyN)r.r-builderattribute_dict_classr__init__already_closed_empty_element_initialize_xml_detector)selfr.r-argskwargss ]/mnt/ssd/data/python-lab/Trading/venv/lib/python3.12/site-packages/bs4/builder/_htmlparser.pyr4z BeautifulSoupHTMLParser.__init__TsS &<#$(LL$E$E!D24262-/) %%'z List[str]r5ct|r1r)r7messages r:errorzBeautifulSoupHTMLParser.erroros#7++r;cN|j||d|j|y)zHandle an incoming empty-element tag. html.parser only calls this method when the markup looks like . F)handle_empty_elementN)handle_starttag handle_endtag)r7tagattrss r:handle_startendtagz*BeautifulSoupHTMLParser.handle_startendtags' S%eD 3r;cV|j}|D]_\}}|d}||vrM|j}||jk(r*|d|jfvr|||<@t t |}||||[|||<a|j jjr|j\}} ndx}} |j j|dd||| } | <| jr0|r.|j|d|jj||j|j!|yy)zHandle an opening tag, e.g. '' :param handle_empty_element: True if this tag is known to be an empty-element tag (i.e. there is not expected to be any closing tag). N) sourceline sourceposF)check_already_closed)r3r-r+r)r _DuplicateAttributeHandlerr.r2store_line_numbersgetposrAis_empty_elementrBr5append_root_tag_name_root_tag_encountered) r7rCrDr@ attr_dictkeyvalueon_duperHrItagObjs r:rAz'BeautifulSoupHTMLParser.handle_starttagsC$(#<#<#>  'JC}i55dkk)t|| 44%*IcN"#=wGGIsE2!& #% ', 99   / /$(KKM !J %) )J** tY:+   &"9"9>R   s  ?  - - 4 4S 9    &  & &s + 'r;c|r*||jvr|jj|y|jj|y)zHandle a closing tag, e.g. '' :param tag: A tag name. :param check_already_closed: True if this tag is expected to be the closing portion of an empty-element tag, e.g. ''. N)r5remover.rB)r7rCrJs r:rBz%BeautifulSoupHTMLParser.handle_endtags< C4+L+L$L  - - 4 4S 9 II # #C (r;c:|jj|y)z4Handle some textual data that shows up between tags.N)r. handle_datar7datas r:rZz#BeautifulSoupHTMLParser.handle_datas d#r;c|jdrt|jdd}n8|jdrt|jdd}n t|}d}|dkr<|jjdfD]!}|s t |gj |}#|s t|}|xsd}|j|y#t$rYUwxYw#ttf$rY8wxYw)zHandle a numeric character reference by converting it to the corresponding Unicode character and treating it as textual data. :param name: Character number, possibly in hexadecimal. xXNz windows-1252u�) startswithintlstripr.original_encoding bytearraydecodeUnicodeDecodeErrorchr ValueError OverflowErrorrZ)r7name real_namer\encodings r:handle_charrefz&BeautifulSoupHTMLParser.handle_charrefs ??3 DKK,b1I __S !DKK,b1ID I s? "YY88.I $i[188BD   9~22 *  .  s$ C, C CCC0/C0cxtjj|}||}nd|z}|j|y)zHandle a named entity reference by converting it to the corresponding Unicode character(s) and treating it as textual data. :param name: Name of the entity reference. Nz&%s)rHTML_ENTITY_TO_CHARACTERgetrZ)r7rl characterr\s r:handle_entityrefz(BeautifulSoupHTMLParser.handle_entityref s>'??CCDI  D4rErArBrZrortrwr{rrr;r:r&r&=sGSFC $JQ (((!G (  (.CB"++ ,    >    &&* <, <,/<,# <,  <,|)$$&P&## 1r;r&ceZdZUdZdZded<dZded<eZded<ee e gZ d ed <d ed <dZ ded < d dfd Z d ddZddZxZS)rzA Beautiful soup `bs4.builder.TreeBuilder` that uses the :py:class:`html.parser.HTMLParser` parser, found in the Python standard library. Fris_xmlT picklabler(NAMEz Iterable[str]featuresz$Tuple[Iterable[Any], Dict[str, Any]] parser_argsTRACKS_LINE_NUMBERSc t}dD]}||vs|j|}|||<tt|di||xsg}|xsi}|j |d|d<||f|_y)aConstructor. :param parser_args: Positional arguments to pass into the BeautifulSoupHTMLParser constructor, once it's invoked. :param parser_kwargs: Keyword arguments to pass into the BeautifulSoupHTMLParser constructor, once it's invoked. :param kwargs: Keyword arguments for the superclass constructor. r,Fconvert_charrefsNr)dictpopsuperrr4updater)r7r parser_kwargsr9extra_parser_kwargsargrT __class__s r:r4zHTMLParserTreeBuilder.__init__[s$#f. 1Cf} 3+0#C( 1 #T3=f=!'R %+ 01,1 ()'7r;c#@Kt|tr |dddfyg}|r|j|g}|r|j|t|||d|}|j t d|j|j |j|jfyw)a2Run any preliminary steps necessary to make incoming markup acceptable to the parser. :param markup: Some markup -- probably a bytestring. :param user_specified_encoding: The user asked to try this encoding. :param document_declared_encoding: The markup itself claims to be in this encoding. :param exclude_encodings: The user asked _not_ to try any of these encodings. :yield: A series of 4-tuples: (markup, encoding, declared encoding, has undergone character replacement) Each 4-tuple represents a strategy for parsing the document. This TreeBuilder uses Unicode, Dammit to convert the markup into Unicode, so the ``markup`` element of the tuple will always be a string. NFT)known_definite_encodingsuser_encodingsis_htmlexclude_encodingszPCould not convert input to Unicode, and html.parser will not accept bytestrings.) isinstancer(rOrunicode_markuprredeclared_html_encodingcontains_replacement_characters)r7markupuser_specified_encodingdocument_declared_encodingrrrdammits r:prepare_markupz$HTMLParserTreeBuilder.prepare_markupys2 fc "4u- - 57 " % + +,C D*, %  ! !"< = %=)/    ('b  %%((--66  sBBc|j\}}t|tsJ|jJt |jg|i|} |j ||j g|_ y#t$r}t|d}~wwxYwr1) rrr(r.r&feedcloseAssertionErrorrr5)r7rr8r9parseres r:rzHTMLParserTreeBuilder.feeds'' f&#&&& yy$$$(DTDVD * KK  LLN /1+  *'q) )  *s !A33 B < BB )NN)rzOptional[Iterable[Any]]rzOptional[Dict[str, Any]]r9r)NNN) rr$rOptional[_Encoding]rrrzOptional[_Encodings]rzDIterable[Tuple[str, Optional[_Encoding], Optional[_Encoding], bool]])rr$rr)rrr__doc__rrr HTMLPARSERrrrrrr4rr __classcell__)rs@r:rrJs FDItD##T62Hm255!%$04268,808 8B8<:>26 FF"5F%8 F 0 F N FP1r;)0r __future__r __license____all__ html.parserrtypingrrr r r r r rrrr bs4.elementrrrrrr bs4.dammitrr bs4.builderrrrrbs4.exceptionsrbs4r r! bs4._typingr"r#r$rr(rKr&rrr;r:rsI"  #    90!+  %tCH~sC&@$&FGJ1j*@J1ZP1OP1r;