L ij0ddlmZddlmZddlmZddlmZddlm Z ddl m Z m Z m Z mZddlmZmZdd lmZmZmZGd d ZGd d ZeeefZe eZGddZy)) annotations)aliases)sha256)dumps)sub)AnyIteratorListTuple)RE_POSSIBLE_ENCODING_INDICATIONTOO_BIG_SEQUENCE) iana_nameis_multi_byte_encoding unicode_rangeceZdZ d ddZddZddZeddZddZddZ ddZ edd Z ed d Z ed!d Z ed!d Zed d ZeddZeddZeddZeddZeddZed"dZed#dZed!dZed dZed dZd$d%dZeddZy)& CharsetMatchNc||_||_||_||_||_d|_g|_d|_d|_d|_ ||_ ||_ y)N) _payload _encoding_mean_mess_ratio _languages_has_sig_or_bom_unicode_ranges_leaves_mean_coherence_ratio_output_payload_output_encoding_string_preemptive_declaration)selfpayloadguessed_encodingmean_mess_ratiohas_sig_or_bom languagesdecoded_payloadpreemptive_declarations _/mnt/ssd/data/python-lab/Trading/venv/lib/python3.12/site-packages/charset_normalizer/models.py__init__zCharsetMatch.__init__s_ ' .'6,5%315+- ,/"-1,0#2 3I$ct|ts)t|trt||jk(Sy|j|jk(xr|j |j k(S)NF) isinstancerstrrencoding fingerprintr"others r*__eq__zCharsetMatch.__eq__*sV%.%% '4==88}}.X43C3CuGXGX3XXr,ct|tstt|j|jz }t|j |j z }|dkr|dkDr|j |j kDS|dkrS|dkrNt |jtk\r|j|jkS|j|jkDS|j|jkS)zQ Implemented to make sorted available upon CharsetMatches items. g{Gz?g{Gz?) r.r ValueErrorabschaos coherencelenrrmulti_byte_usage)r"r3chaos_differencecoherence_differences r*__lt__zCharsetMatch.__lt__1s%. "%djj5;;&>"?&)$..5??*J&K d "';d'B>>EOO3 3  $)=)E4==!%55zzEKK//((5+A+AA AzzEKK''r,c\dtt|t|jz z S)Ng?)r:r/rawr"s r*r;zCharsetMatch.multi_byte_usageGs"c#d)ns488}455r,c~|j&t|j|jd|_|jS)Nstrict)r r/rrrAs r*__str__zCharsetMatch.__str__Ks. << t}}dnnhGDL||r,c<d|jd|jdS)Nz)r0r1rAs r*__repr__zCharsetMatch.__repr__Qs" x8H8H7ILLr,ct|tr||k(r$tdj|jd|_|j j|y)Nz;Unable to add instance <{}> as a submatch of a CharsetMatch)r.rr6format __class__r rappendr2s r* add_submatchzCharsetMatch.add_submatchTsO%.%4-MTTOO    E"r,c|jSN)rrAs r*r0zCharsetMatch.encoding_s ~~r,cg}tjD]G\}}|j|k(r|j|'|j|k(s7|j|I|S)z Encoding name are known by many name, using this could help when searching for IBM855 when it's listed as CP855. )ritemsr0rJ)r" also_known_asups r*encoding_aliaseszCharsetMatch.encoding_aliasescs^ $& MMO (DAq}}!$$Q'!#$$Q'  ( r,c|jSrMrrAs r*bomzCharsetMatch.bomp###r,c|jSrMrUrAs r*byte_order_markzCharsetMatch.byte_order_marktrWr,cF|jDcgc]}|d c}Scc}w)z Return the complete list of possible languages found in decoded sequence. Usually not really useful. Returned list may be empty even if 'language' property return something != 'Unknown'. rr)r"es r*r'zCharsetMatch.languagesxs #oo.!...s c|jshd|jvryddlm}m}t |j r||j n||j }t|dk(sd|vry|dS|jddS)z Most probable language found in decoded sequence. If none were detected or inferred, the property will return "Unknown". asciiEnglishr)encoding_languagesmb_encoding_languagesz Latin BasedUnknown)rcould_be_from_charsetcharset_normalizer.cdr`rarr0r:)r"r`rar's r*languagezCharsetMatch.languages $444  X*$--8&dmm4' 6  9~"my&@ Q< q!!$$r,c|jSrM)rrAs r*r8zCharsetMatch.chaoss$$$r,c@|jsy|jddS)Nrrr r[rAs r*r9zCharsetMatch.coherences q!!$$r,c6t|jdzdSNd)ndigits)roundr8rAs r* percent_chaoszCharsetMatch.percent_chaossTZZ#%q11r,c6t|jdzdSri)rmr9rAs r*percent_coherencezCharsetMatch.percent_coherencesT^^c)155r,c|jS)z+ Original untouched bytes. )rrAs r*r@zCharsetMatch.raws }}r,c|jSrM)rrAs r*submatchzCharsetMatch.submatchs ||r,c2t|jdkDSNr)r:rrAs r* has_submatchzCharsetMatch.has_submatchs4<< 1$$r,c|j |jSt|Dcgc] }t|}}tt |Dchc]}|s| c}|_|jScc}wcc}wrM)rr/rsortedlist)r"chardetected_rangesrs r* alphabetszCharsetMatch.alphabetssk    +'' 'MPQUY,WT]4-@,W,W%d+L!!A+L&MN###-X+LsA0A5 A5 cp|jg|jDcgc]}|jc}zScc}w)z The complete list of encoding that output the exact SAME str result and therefore could be the originating encoding. This list does include the encoding available in property 'encoding'. )rrr0)r"ms r*rcz"CharsetMatch.could_be_from_charsets,t||"D!1::"DDD"Ds3c6jj|k7rr|_t}j=jjdvr!t t fd|ddd}||ddz}|j |d_jS)z Method to get re-encoded bytes payload using given target encoding. Default to UTF-8. Any errors will be simply ignored by the encoder NOT replaced. N)zutf-8utf8utf_8c|j|jd|jdj|jdt j jddS)Nrr _-)stringspanreplacegroupsrr)rr"s r*z%CharsetMatch.output..sYahhqvvx{QVVXa[AII 1 !$"7"78@@cJr,i r )countr)rr/r!lowerrr encoder)r"r0decoded_stringpatched_headers` r*outputzCharsetMatch.outputs  (D,A,AX,M$,D ! YN,,80066812"%3#5D)""0.2G!G#1#8#89#MD ###r,cPt|jjS)zw Retrieve the unique SHA256 computed using the transformed (re-encoded) payload. Not the original one. )rr hexdigestrAs r*r1zCharsetMatch.fingerprints dkkm$..00r,)NN)r#bytesr$r/r%floatr&boolr'CoherenceMatchesr( str | Noner)r)r3objectreturnr)rrrr/)r3rrNone)r list[str]rr)rr)rzlist[CharsetMatch])r)r0r/rr)__name__ __module__ __qualname__r+r4r>propertyr;rDrFrKr0rSrVrYr'rer8r9rnrpr@rsrvr}rcrr1r,r*rr s'+-1JJJ J  J $ J$J!+J8Y(,66 M #  $$$$//%%6%%%% 2266 %%$$EE$:11r,rcReZdZdZd d dZd dZddZddZddZddZ dd Z dd Z y)CharsetMatchesz Container with every CharsetMatch items ordered by default from most probable to the less one. Act like a list(iterable) but does not implements all related methods. Nc8|rt||_yg|_yrM)rx_results)r"resultss r*r+zCharsetMatches.__init__s?FF7O B r,c#8K|jEd{y7wrMrrAs r*__iter__zCharsetMatches.__iter__s==  s ct|tr|j|St|tr/t |d}|jD]}||j vs|cSt )z Retrieve a single item either by its position or encoding name (alias may be used here). Raise KeyError upon invalid index or encoding not present in results. F)r.intrr/rrcKeyError)r"itemresults r* __getitem__zCharsetMatches.__getitem__s` dC ==& & dC T5)D-- "6777!M "r,c,t|jSrMr:rrAs r*__len__zCharsetMatches.__len__s4==!!r,c2t|jdkDSrurrAs r*__bool__zCharsetMatches.__bool__s4==!A%%r,ct|ts-tdjt |j t |jtkrW|jD]H}|j|jk(s|j|jk(s7|j|y|jj|t|j|_ y)z~ Insert a single match. Will be inserted accordingly to preserve sort. Can be inserted as a submatch. z-Cannot append instance '{}' to CharsetMatchesN)r.rr6rHr/rIr:r@rrr1r8rKrJrx)r"rmatchs r*rJzCharsetMatches.appends $ -?FF'  txx=+ + $$(8(88U[[DJJ=V&&t,  T"t}}- r,c:|jsy|jdS)zQ Simply return the first match. Strict equivalent to matches[0]. NrrrAs r*bestzCharsetMatches.best)s}}}}Qr,c"|jS)zP Redundant method, call the method best(). Kept for BC reasons. )rrAs r*firstzCharsetMatches.first1syy{r,rM)rzlist[CharsetMatch] | None)rzIterator[CharsetMatch])rz int | strrr)rrr)rrrr)rzCharsetMatch | None) rrr__doc__r+rrrrrJrrrr,r*rrs0 O! "&.( r,rcZeZdZ ddZeddZddZy)CliDetectionResultc ||_| |_||_||_||_||_||_||_||_| |_ | |_ yrM) path unicode_pathr0rSalternative_encodingsrer}r&r8r9 is_preferred) r"rr0rSrrer}r&r8r9rrs r*r+zCliDetectionResult.__init__=sV (4$, +;0E"% $-$2! )".r,c |j|j|j|j|j|j |j |j|j|j|jd S)N rr0rSrrer}r&r8r9rrrrAs r*__dict__zCliDetectionResult.__dict__WseII $ 5 5%)%?%? "11ZZ -- --  r,c2t|jddS)NT) ensure_asciiindent)rrrAs r*to_jsonzCliDetectionResult.to_jsongsT]]a@@r,N)rr/r0rrSrrrrer/r}rr&rr8rr9rrrrr)rzdict[str, Any]r)rrrr+rrrrr,r*rr<s///$ / ) /  /////!//4    Ar,rN) __future__rencodings.aliasesrhashlibrjsonrrertypingrr r r constantr rutilsrrrrrr/rCoherenceMatchrrrr,r*rsd"%--GCCe1e1P@@FsEz"',A,Ar,