`L i dZddlZddlZddlZddlZddlZddlmZddlm Z e e fZ ejjejjeZd\ZZZZZZZZdodZdodZeZeZd ZGd d eZ Gd d e!Z"dZ#d\Z$Z%Z&Z'Z(Z)Z*Z+Z,Z-Z.Z/Z0Z1dZ2ejfdZ4dxZ5Z6e7dxZ8Z9ejfdZ:ejfdZ;ejfddjydzdzZ=e7de7de7de7de7de7de7d e7d!e7d"d# Z>e>jDcgc]8}|D]1}d$jy|Dcgc]}ej|c}3:c}}}ZAejfd%djyeAzZAejfd&ZBd'd(d)d*d+d,d-d.ZCd/ZDe5e8eCd0fd1ZEdpd2ZFGd3d4e ZGGd5d6ZHGd7d8e"eHZIGd9d:e"eHZJejfd;ZKejfd<ZLejfd=ZMGd>d?e eHZNd@ZOdAZPdB\Z$Z%ZQZRejfdCZSdDZTGdEdFeUZVGdGdHe ZWejfdIZXdqdJZY drdKZZdLZ[dMZ\dNZ]dOZ^dPZ_dQejfdRe\zdSze_zdze^zdTze\zdUzfdVejfdWe_zdTze]zdXzfdVejfdYfdZejfd[fd\ejfd]e_zdze^zdTze^zdUzfd^ejfdRe_zd_zfgdQejfdRe\zdSze_zdze^zdTze\zd`ze_zdze^zdazfdVejfdWe_zdTze]zd`ze_zdbzfdVejfdYfdZejfd[fd\ejfd]e_zdze^zdTze^zdUzfd^ejfdRe_zd_zfggZ`e`djdce`djdde`dcjdce`dcjdddsdeZcdfZddgxZeZfGdhdiZgdjZhGdkdle ZiGdmdne Zjy#dZY]xYwcc}wcc}}}w)tznThis file is adapted from the pattern library. URL: http://www.clips.ua.ac.be/pages/pattern-web Licence: BSD N)chain) ElementTree)&slash;wordpart-of-speechchunk prepositionrelationanchorlemmact|tr|ffdz}t|tr|D]} |j|cS|St |S#Y'xYw)z:Returns the given value as a Unicode string (if possible).)z windows-1252)utf-8ignore) isinstance basestringbytesdecodestrvencodinges T/mnt/ssd/data/python-lab/Trading/venv/lib/python3.12/site-packages/textblob/_text.py decode_stringr"sc(J'K>$LL!U A qxx|#   q6M  A  Act|tr|ffdz}t|tr|D]} |j|cS|St|S#Y'xYw)z>Returns the given value as a Python byte string (if possible).r)rrrencoders r encode_stringr 0sc(J'K>$LL!S A qxx|#   q6M rc: t|y#t$rYywxYw)NFT)float ValueError)strgs r isnumericr%Bs' d   s  cleZdZdZdZdZdZdZdZdZ dZ d Z d Z d Z d Zd ZdZdZdZy)lazydictcyNselfs rloadz lazydict.loadP c tj|dk(r?|jt||t j t t||t t||g|S)zIf the dictionary is empty, calls lazydict.load(). Replaces lazydict.method() with dict.method() and calls it. r)dict__len__r-setattrtypes MethodTypegetattrr,methodargss r_lazyzlazydict._lazyUW <<  " IIK D&%"2"2743H$"O P$wtV$T1D11r/c$|jdSN__repr__r:r+s rr>zlazydict.__repr__^zz*%%r/c$|jdSNr2r?r+s rr2zlazydict.__len__azz)$$r/c$|jdSN__iter__r?r+s rrFzlazydict.__iter__dr@r/c(|jdg|SN __contains__r?r,r9s rrIzlazydict.__contains__gtzz.0400r/c(|jdg|S)N __getitem__r?rJs rrMzlazydict.__getitem__jtzz-/$//r/c(|jdg|S)N __setitem__r?rJs rrPzlazydict.__setitem__mrNr/c(|jdg|S)N setdefaultr?rJs rrRzlazydict.setdefaultpstzz,...r/c(|jdg|S)Ngetr?r,r9kwargss rrTz lazydict.getstzz%'$''r/c$|jdS)Nitemsr?r+s rrYzlazydict.itemsvszz'""r/c$|jdS)Nkeysr?r+s rr[z lazydict.keysyszz&!!r/c$|jdS)Nvaluesr?r+s rr]zlazydict.values|szz(##r/c(|jdg|S)Nupdater?rJs rr_zlazydict.updatetzz(*T**r/c(|jdg|SNpopr?rJs rrcz lazydict.poprWr/c(|jdg|S)Npopitemr?rJs rrezlazydict.popitemstzz)+d++r/N)__name__ __module__ __qualname__r-r:r>r2rFrIrMrPrRrTrYr[r]r_rcrer*r/rr'r'OsR 2&%&100/(#"$+(,r/r'cNeZdZdZdZdZdZdZdZdZ dZ d Z d Z d Z y ) lazylistcyr)r*r+s rr-z lazylist.loadr.r/c tj|dk(r?|jt||t j t t||t t||g|S)zyIf the list is empty, calls lazylist.load(). Replaces lazylist.method() with list.method() and calls it. r)listr2r-r3r4r5r6r7s rr:zlazylist._lazyr;r/c$|jdSr=r?r+s rr>zlazylist.__repr__r@r/c$|jdSrBr?r+s rr2zlazylist.__len__rCr/c$|jdSrEr?r+s rrFzlazylist.__iter__r@r/c(|jdg|SrHr?rJs rrIzlazylist.__contains__rKr/c(|jdg|S)Ninsertr?rJs rrszlazylist.insertr`r/c(|jdg|S)Nappendr?rJs rruzlazylist.appendr`r/c(|jdg|S)Nextendr?rJs rrwzlazylist.extendr`r/c(|jdg|S)Nremover?rJs rryzlazylist.remover`r/c(|jdg|Srbr?rJs rrcz lazylist.poprWr/N)rfrgrhr-r:r>r2rFrIrsrurwryrcr*r/rrjrjs9 2&%&1++++(r/rj universal)NNVBJJRBPRDTPPrNOCJUHPT.Xc|jdr*|djt|jddfS|dvr|tfS|dvr|tfS|dvr|t fS|dvr|t fS|d vr|tfS|d vr|tfS|d vr|tfS|d vr|tfS|d vr|tfS|dvr|tfS|dvr|tfS|dvr|tfS|tfS)zLReturns a (token, tag)-tuple with a simplified universal part-of-speech tag.)zNNP-zNNPS-z{}-{}-)r|NNSNNPNNPSNP)MDr}VBDVBGVBNVBPVBZ)r~JJRJJS)rRBRRBSWRB)PRPzPRP$WPzWP$)rPDTWDTEX)IN)CD)CC)r)POSRPTO) SYMLSr!?,:()"#$) startswithformatNOUNsplitVERBADJADVPRONDETPREPNUMCONJINTJPRTPUNCrtokentags rpenntreebank2universalrs ~~'(w~~dCIIcN2,>?@@ 00t} ==t} ""s| ))s| **t} ((s| g~t} g~s| g~t} g~t} !!s| MMt} 1:r/z(\S+)\sz.,;:!?()[]{}`''"@#$^&*+-|=~_)%za.zadj.zadv.zal.za.m.zc.zcf.zcomp.zconf.zdef.zed.ze.g.zesp.zetc.zex.zf.zfig.zgen.zid.zi.e.zint.zl.zm.zMed.zMil.zMr.zn.zn.q.zorig.zpl.zpred.zpres.zp.m.zref.zv.zvs.zw/z ^[A-Za-z]\.$z^([A-Za-z]\.)+$z^[A-Z][|bcdfghjklmnpqrstvwxzz]+.$)z<3u♥) z>:Dz:-Dz:Dz=-Dz=DzX-Dzx-DXDxDz8-D) z>:Pz:-Pz:Pz:-pz:pz:-bz:bz:c)z:o)z:^)) z>:)z:-)z:)z=)z=]z:]z:}z:>z:3z8)z8-)) z>;]z;-)z;)z;-]z;]z;Dz;^)z*-)z*)) z>:oz:-Oz:Oz:oz:-oo_Ozo.Ou°O°u°o°) z>:/z:-/z:/z:\z>:\z:-.z:-sz:sz:Sz:-Sz>.>) z>:[z:-(z:(z=(z:-[z:[z:{z:-zfind_tokens..s Dq18 Dsc3\K|]$}t|dkDsdj|&yw)rrN)lenjoinrss rrzfind_tokens..s">3q6A:!>s,,c3HK|]}tjd|yw)(!)N) RE_SARCASMsubrs rrzfind_tokens..s=aq)=s "ch|jdjdd|jdzS)Nrrrr)groupreplace)ms rzfind_tokens..s(1771:#5#5c2#>#Kr/)tuplerrmrYrerrrrTOKENfindallrrruendswithrstripRE_ABBR1matchRE_ABBR2RE_ABBR3rwreversedcount RE_EMOTICONS)string punctuation abbreviationsr linebreakabtokensrtail sentencesijrs r find_tokensr^s ++C45KW]]_%&11f%&&# K WUG $ WUG $ WUG $ WUG $ WS% WS%  VVFD& )F VVIv|V 4F VVFC (F F ]]6C< (* q6A:D,,{+0@<< ,MM!A$'!"A ,,{+0@ **[612q7G::k*KK"&#2A::e$KK&#2 c*A::c?]*#>>!,8#>>!,8#>>!,8 AbE*crF)**[612q7G*Bw a MM(4. )?*@dAq!qI c&k/ !9S#s3 3c&k/fQi 4 '!9 *y}/B/B6!9/MPQ/QUV/VQc&k/fQi 4 ' bM DF1QK D D   R A Q+ c&k/,bM!%>i>I=9=I  KQOI  sO;c#$K|r t|tr-tjj |rt |d}nNt|tr|j }n-t|dr|jj }n|}t|D]p\}}|dk(r/t|tr|jtjn|}|j}t|}|r|r|j|rm|ryw)zReturns an iterator over the lines in the file at the given path, stripping comments and decoding each line to Unicode. r)rreadrN)rrospathexistsopen splitlineshasattrr  enumeraterstripcodecsBOM_UTF8 decode_utf8r)r rcommentfrlines r_readrs dJ 'BGGNN4,@TG,A j )!A T6 " &&(AA | GAt6ju5 6??+  :: !$T ;t'2  H#> r/cdtj|dt|jDy)Nc3fK|])}|js|jddd+yw)rNr)rrrxs rrzLexicon.load..s'T!'')1773<+Ts11)r1r_rrr+s rr-z Lexicon.loads DTU4::5FTUr/c|jSr)rr+s rr z Lexicon.path zzr/c|jSr)rr+s rr%zLexicon.language ~~r/)rNNNrN)rfrgrhr&r-propertyr r%r*r/rrrsI ?$Vr/rceZdZddZdZy)RulesNc2|i}|i}||c|_|_yr))lexiconcmd)r,r5r6s rr&zRules.__init__s& ;C ?G!(# dhr/c|S)z6Applies the rule to the given token or list of tokens.r*r,r*s rapplyz Rules.applysr/NN)rfrgrhr&r9r*r/rr3r3s .r/r3cHeZdZd dZedZdZd dZd dZdZ d dZ y) rNc|i}d}tj|d}|jdt|j Dt j |||||_y)z:A list of rules based on word morphology (prefix, suffix).N) charhasprefhassufaddprefaddsuf deletepref deletesufgoodleft goodrightTc30K|]\}}d|z|fyw)rNr*rkrs rrz&Morphology.__init__..-s>DAqC!GQ<>)r1fromkeysr_rmrYr3r&rr,r5r r6s rr&zMorphology.__init__sY ?G  mmC& >D,=>> tWc* r/c|jSr)r,r+s rr zMorphology.path1r-r/cdtj|dt|jDy)Nc3<K|]}|jywr)rr)s rrz"Morphology.load..7@1779@rmrwrrr+s rr-zMorphology.load5 D@eDJJ.?@Ar/c|d}|D]}}|d|jvr+td|d|d|djf\}}}} |d|jvr:td|d|d|djjdf\}}}} r |d|dk7r dk(r|vs| dk(r|j s| dk(r|j s| d k(r|z|j vs| d k(r|z|j vs| d k(r+|j r|t|d |j vsO| d k(r,|j r|d t| |j vs| dk(r|dk(s| dk(so|dk(sy|d<|S)zFApplies lexical rules to the given token, which is a [word, tag] list.rrrrr=r>r?r@rArBNrCrDrE)r6boollowerlstriprrr5r) r,rpreviousnextwrrr*posr6s rr9zMorphology.apply9s !H Attxx!%a!A$"qtzz|!C1c3ttxx!%a!A$"qtzz|7J7J37O!O1c3U1X1%169$a8O 1 9$Q$,,)>8OA(=<' Q#a&( t||3;& 1 )SVG  4:%!tAw,;&1 +;a5 6 r/cF|jdr|jdr|ddd}}|jdr|ddd}}|jdr|ddd}}|r||d|jdz|d g}n||jd|d g}tj |||y ) zInserts a new rule that assigns the given tag to words with the given affix, e.g., Morphology.append("RB", "-ly"). rrrr=rr?r>rr*N)rrrXrjrs)r,rraffixr6taggedr\s rrszMorphology.insertYs   C U^^C%8r"v3E   C r"x3E >># r"y3E cjjo 5sC@A 3c2Aa#r/cH|jt|dz g|i|yNrrsrrUs rruzMorphology.appendi" CIM3D3F3r/c8|g}|D]}|j|yr)rur,rulesr\s rrwzMorphology.extendl( =E A DKKO r/Nrr:r:)r?Nr) rfrgrhr&r1r r-r9rsrurwr*r/rrrs6(B@$ 4r/rcFeZdZd dZedZdZdZd dZdZ d dZ y) r!Nct|i}d}tj||tj|d||_y)zAA list of rules based on context (preceding and following words).N)prevtagnexttagprev2tagnext2tag prev1or2tag next1or2tagprev1or2or3tagnext1or2or3tag surroundtagcurwdprevwdnextwd prev1or2wd next1or2wd next1or2or3wd prev1or2or3wd prevwdtag nextwdtag wdprevtag wdnexttag wdand2aft wdand2tagbfr wdand2tagaftlbigramrbigram prevbigram nextbigramT)r3r&r1rJrrKs rr&zContext.__init__ys7 ?G : tWdmmC&>? r/c|jSr)r,r+s rr z Context.pathr-r/cdtj|dt|jDy)Nc3<K|]}|jywr)rOr)s rrzContext.load..rPrQrRr+s rr-z Context.loadrSr/cdgdz}||z|z}t|D]U\}}|D]I}|ddk(r |d|dk7r |ddk7r!|d|dt|dkDr|dnd } }}|j}|d k(r|||dz dk(s|d k(r|||dzdk(s|d k(r|||dz dk(s|d k(r|||dzdk(s|dk(r|||dz d||dz dfvs||dk(r|||dzd||dzdfvs^|dk(r#|||dz d||dz d||dz dfvs6|dk(r#|||dzd||dzd||dzdfvs|dk(r|||dz dk(r| ||dzdk(s|dk(r|||dzdk(s|dk(r|||dz dk(s|dk(r|||dzdk(s|dk(r|||dz d||dz dfvs|dk(r|||dzd||dzdfvst|dk(r|||dz dk(r| ||dz dk(sR|dk(r|||dzdk(r| ||dzdk(s0|dk(r|||dz dk(r| ||dzdk(s|dk(r|||dzdk(r| ||dzdk(s|dk(r|||dzdk(r| ||dzdk(s|dk(r|||dz dk(r| ||dzdk(s|dk(r|||dzdk(r| ||dzdk(s|dk(r|||dz dk(r| ||dzdk(si|d k(r|||dzdk(r| ||dzdk(sH|d!k(r|||dz dk(r| ||dz dk(s'|d"k(s|||dzdk(s*| ||dzdk(s:||d|dg||<LX|t|t| S)#znApplies contextual rules to the given list of tokens, where each token is a [word, tag] list. )STAARTrrrr*rrrorprqrrrsrtrurvrwrxryrzr{r|rrrrrrrrrrr)rrrW) r,rorrrr\r6r*ys rr9z Context.applys" "Q & JN!! ) +HAu( +8x'8qt#! aD!A$A !QiikI%!qQx{*:y(Q!AE(1+-=z)a1QU8A;.>z)a1QU8A;.>},qQx{Aa!eHQK6P1P},qQx{Aa!eHQK6P1P//!AE(1+qQx{Aa!eHQK!HH//!AE(1+qQx{Aa!eHQK!HH},aAhqk1Aa1QQRU8TU;FVw1!a% +;xA1q5!,<xA1q5!,<|+aAhqk1QU8A;5O0O|+aAhqk1QU8A;5O0O{*qAa!eHQK/?A1q5RSDT{*qAa!eHQK/?A1q5RSDT{*qAa!eHQK/?A1q5RSDT{*qAa!eHQK/?A1q5RSDT{*qAa!eHQK/?A1q5RSDT~-!qQx{2BqAaRSeHUVKGW~-!qQx{2BqAaRSeHUVKGWy(Q!AE(1+-=!qQxPQ{BRy(Q!AE(1+-=!qQxPQ{BR|+Qq1uXa[0@Q!APQE(ST+EU|+Qq1uXa[0@Q!APQE(ST+EUaDGQqT?AaDQ( +) +TQ3q6'""r/c d|vr|s|s|jd\}}d}d|vr|s|s|jd\}}d}tj||||||xsd|xsdgy)zInserts a new rule that updates words with tag1 to tag2, given constraints x and y, e.g., Context.append("TO < NN", "VB") z < roz > rprN)rrjrs)r,rtag1tag2r6r*rs rrszContext.insertsk D=1jj'GD!C D=1jj'GAtCa$c17AG!DEr/cH|jt|dz g|i|yrbrcrUs rruzContext.appendrdr/c8|g}|D]}|j|yr)rfrgs rrwzContext.extendrir/rj)roNNr)rlr*r/rr!r!xs8"HB0#d F4r/r!z^http://z#^www\..*?\.[com|org|net|edu|de|uk]$z#^[\w\-\.\+]+@(\w[\w\-]+\.)+[\w\-]+$c>eZdZddZedZdZdZd dZdZ y) r#NcZ|i}d}tj|||||_||_y)zA dictionary of named entities and their labels. For domain names and e-mail adresses, regular expressions are used. N)perslocorg)r3r&rr)r,r5r rr6s rr&zEntities.__init__s5 ?G  tWc* r/c|jSr)r,r+s rr z Entities.pathr-r/ct|jD]W}|jDcgc]}|jc}}tj ||dgj |Yycc}wNr)rr rrWr1rRrur8s rr-z Entities.loadsZtyy! 6A$%GGI.q.A OOD!A$ + 2 21 5 6.sA1cd}|t|kr?||dj}tj|s*tj|st j|r|j ||d<||vr||D]}|d|jvr|ddd|djzfn|df\}}d}t|D]6\}}||zt|k\s|||zdj|k7s4d}n|s|||zdzD]%}|dd k(xr|dxs |j |z|d<'||z }n|dz }|t|kr?|S) zyApplies the named entity recognizer to the given list of tokens, where each token is a [word, tag] list. rrrNrrTFr) rrW RE_ENTITY1r RE_ENTITY2 RE_ENTITY3rr6upperr) r,rrr[rrrrrs rr9zEntities.applys #f+oq ! ""$A"j&6&6q&9Z=M=Ma=P#xxq ! DyaA:;2$((9J3Bqu{{}!45QRTVPWAsA )! "1q5CK/6!a%=3C3I3I3Kq3P %A!"%+AA %:$E %aF 2 ?uQx K488 #($E!H$Q!" FA-#f+o. r/c|jd|gzDcgc]}|j}}|j|dgj|ycc}w)zbAppends a named entity to the lexicon, e.g., Entities.append("Hooloovoo", "PERS") rrN)rrWrRru)r,entitynamerrs rruzEntities.append/sO!' S 1TF : ;1QWWY ; ; !b!((+ A (polarity, subjectivity)-tuple with an assessments property.)r__new__r,polarity subjectivity assessmentss rrz Score.__new__[s#  K}}THl#;<.wsD9Ir/) rr _confidence_synset_synsetslabelerrTrrrrr)r,r r%synset confidencerVs rr&zSentiment.__init__hsy     K=K1NOK9 :/IJ r/c|jSr)r,r+s rr zSentiment.pathyr-r/c|jSr)r/r+s rr%zSentiment.language}r0r/c|jSr))rr+s rrzSentiment.confidencesr/c |s |j}tjj|syiii}}}t j |}|j }|jdD]}|j3|jt|jjddksC|jjd|jjd|jjdd|jjdd|jjd d |jjd |jj|jf\}}}} } } } t|t| t| f} |r1|j|ij|gj| |r| r| ||<| sp|j| gj| |jjd |j|_|D](}t!d ||j#D||<*t%|j#D]8\}}t'|j)Dcgc] }t+|c}||d<:|j#D]'\}} t'| Dcgc] }t+|c}||<)t j-||t j-|j.|t j-|j0|ycc}wcc}w)zLoads the XML-file (with sentiment annotations) from the given path. By default, Sentiment.path is lazily loaded. Nrrformr]rr intensityrlabelr%c3nK|](\}}|t|Dcgc] }t|c}f*ycc}wwr))zipr)rr]psieachs rrz!Sentiment.load..s7CS#Y7Ts4y787s 505)rr r r rparsegetrootrrr"attribrTrrRrurr1rYrmrr]rr_rr)r,r wordssynsetslabelsxmlr[r]prrrrrrids rr-zSentiment.loads ::Dww~~d# !#Rw%kkmV$ ?A'4+;+;u \3/@,HHLL(HHLL'HHLLS1HHLL5HHLLc2HHLL)HHLL.2.31aQxq584$$Q+66sB?FFsK %F1I&&vr299#>' ?( DNNC A %a 0E!H  5;;=) HFAs474FGDc$iGE!HTN H}} (-0.6, 1.0, 1.0). )n-v-a-r-rrrrr)rrNr) rzfillrrr ADJECTIVEADVERBr1r2r-rrrT)r,rr]s rrzSentiment.synsets W]]1 }}56d{BYd{BYiBYf}BY <<  " IIKT]]&&r:6r:;;r/c 2dfd}tdr>jdf|jjjzdzg}nRt t r_tjrJtdr>jdf|jjjzdzg}nt t rG|jdd j|jjD|}ntd r.|jd tjD|}nRtd r%|jd jD|}n!tdrI|jj xsj"j%jddff|}ntdrB|jtjdD|}|j'dfdn~t t(rB|jtjdD|}|j'dfdn,t t*r|jdD|}ng}|j-dd}t/||D cgc] \}}}} ||f c} }}}|||D cgc] \}}}} ||f c} }}}||Scc} }}}wcc} }}}w)aqReturns a (polarity, subjectivity)-tuple for the given sentence, with polarity between -1.0 and 1.0 and subjectivity between 0.0 and 1.0. The sentence can be a string, Synset, Text, Sentence, Chunk, Word, Document, Vector. An optional weight parameter can be given, as a function that takes a list of words and returns a weight. cyrbr*rs rrz$Sentiment.__call__..r/cnd\}}|D]\}}||}|||zz }||z }|t|xsdz S)N)rrr)r")rweightedrnrscorer[s rrzSentiment.__call__..avgsRDAq +  uUOQYQ uQV!}$ $r/glossr)r]r)synonymsc3@K|]}|jdfywr)rWrr[s rrz%Sentiment.__call__..sPq!'')T"Psrrc3K|];}|jxs|jj|jddf=ywNrr rrWr]rs rrz%Sentiment.__call__..s;WW0 0!%%)<AAlemmatac3K|];}|jxs|jj|jddf=ywrrrs rrz%Sentiment.__call__..s3Ka!''-QXX^^-quuRay9Krr Nrtermsc3(K|] }|dfdf ywNr:r*rs rrz%Sentiment.__call__..#I!aY $=#Iweightc(j|dSr)rr[rs rrz$Sentiment.__call__..s!''!A$-r/c3(K|] }|dfdf ywrr*rs rrz%Sentiment.__call__.. rrc|dSrr*rs rrz$Sentiment.__call__.. s!AaD'r/c3$K|]}|df ywr)r*rs rrz%Sentiment.__call__..s!71d)!7scyrbr*rs rrz$Sentiment.__call__..rr/)rrr)rrrrr]rr RE_SYNSETrrrrrr from_iterablerr rrWrRr1rmrTr) r,rnegationrVrrrr[rr*s ` r__call__zSentiment.__call__s'2 % 1g **Q-!DKK!%%K$@@7JKA q* %)//!*<JAW**Q-!DKK!%%K$@@7JKA: &  PCHHT^^A5F,G,M,M,OPA Q $  "003 AQ "  K177KXAQ   177#>ahhnn.>bq "J!LhWA Q   ###Iq#II8A   h(? @ 4   ###Iq#II8A   h(9 : 4   !7Q!7BAAHk2155ZQ1a1a&5v>q99Aq!q!f96B  59s L (Lc |g}g}d}d}|D]\}}| ||vrk|||vrc|||\}} } |:|jt|g|| | d|jj|||ddj|t dt ||ddzd|dd <t dt | |ddzd|dd <| |dd<|jj||dd <|1|ddj d |d|ddz |dd<d|dd <d}d}|r||jvs,tt||j|jr||f}|sf||jvsv|}z|r||jvr|}n!|rt|jddkDrd}|F|D||jvs|j|d r"|ddj|d|dd <d}n|rt|dkDrd}|dk(rJt|d kDr<|ddjdt dt |dd dzd|dd <|dk(r%|jt|gddddt|j!dust|dks|t"vst$j'D]?\\} }} |td| vs|jt|g|dddt(t+t|D]=} || d}|| d }|| d } || d }|| d } ||d kr|dzn|| | f|| <?|S)zReturns a list of (chunk, polarity, subjectivity, label)-tuples for the given list of words: where chunk is a list of successive words: a known word optionally preceded by a modifier ("very good") or a negation ("not good"). Nr)r[rrrrr*rr[rrrrrr*rrrrrg?rrFc"|jSr)r)rs rrz'Sentiment.assessments..cs aggir/g)rur1rrTmaxminrsranymaprIrrrrIRONYisalpha PUNCTUATION EMOTICONSrYMOODrange)r,rr rrrr[r]rrr_typerr*s rrzSentiment.assessmentss =E   C "FAsyDySDG^q'#,1a9HHTQC1Q!t||?O?OPQ?RST=bE#J%%a(!$T3q1R5:~t+D!EAbE#J!$T3q1R5:~t+D!EAbE#J!"AbE#J!%!1!1!!4AbE#J=bE#J%%a+!$quSz!1AbE#J!#AbE#Jt~~-3tAw33T^^DECAT^^ 3AT^^ 3A3qwws|,q0AM .$--!2EbE#J%%a(!#AbE#JA3q6A:A8A bE#J%%c*!$T3quSzD/@$+G!HAbE#J:HHTQC3#UKLIIK5(SVq[Qk=Q)2):" A$7 ;;HHTQC1sa4%PQ!"AC "Hs1v 7A!S A!S A!S A!S A!S A1q5q4xaA6AaD 7r/cf|j|i}|||fx||<|d<|r||j|<yy)zAnnotates the given word with polarity, subjectivity and intensity scores, and optionally a semantic label (e.g., MOOD for emoticons, IRONY for "(!)"). N)rRr)r,rr]rrrrr[s rannotatezSentiment.annotatepsC OOD" %$lI>>#4 !&DLL  r/)rNNNr))T)NT)NrrrN)rfrgrhr&r1r r%rr-rrr rrr*r/rrrgsnK"  0,d'<&L \VrTX 'r/rz^[0-9\-\,\.\:\/\%\$]+$cNt|ttfr|\}}|jdrd}|jdrd}|jdr|jdsd}|jdsd |vrd }|jd rd }|jd rd}||gS)zHDefault morphological tagging rules for English, based on word suffixes.ingrrrr)isousssr) ablealfulibleientishivelessticrrr~edr)ateifyiseizer)rrmrrrs r _suffix_rulesr.s%$' s ~~e ~~d ~~c5>>2E#F  U  %< ~~d ~~23 3<r/c x|i}g} t|D]P\} } | j| |j| | dk(xr|j| jxsdgRt| D]\} \} } d\}}| dkDr| | dz }| t | dz kr| | dz}| | |duxr |j xsdvsO||j | dg||| | <j| jr|dk7r | |dg| | <tj|  | |dg| | <||j | |dg||| | <|dk(rt| |dg| | <| |dg| | <|||j | } ||j | } |-| D cgc] \} } t|| | xs| |dg"} } } | Scc} } w) aReturns a list of [token, tag]-items for the given list of tokens: ["The", "cat", "purs"] => [["The", "DT"], ["cat", "NN"], ["purs", "VB"]] Words are tagged using the given lexicon of (word, tag)-items. Unknown words are tagged NN by default. Unknown words that start with a capital letter are tagged NNP (unless language="de"). Unknown words that consist only of digits and punctuation marks are tagged CD. Unknown words are then improved with morphological rules. All words are improved with contextual rules. If a model is given, uses model for unknown words instead of morphology and context. If map is a function, it is applied to each (token, tag) after applying all rules. Nrrkrr*deren) rrurTrWrunknownr9istitlerrr.rm)rr5modelr r"r$defaultr%rrVr`rrrprevrZs r find_tagsr7s . Ff% 5 GKKqAv'L'++ekkm2L'TPTU V  %V,0q #GAJ/q 104u}v&' QWX:5#$s5#'>E71:+>>XX MYs %F6/z.NN|NNS|NNP|NNPS|NNPS?\-[A-Z]{3,4}|PR|PRP|PRP\$zVB|VBD|VBG|VBN|VBP|VBZz JJ|JJR|JJSz(? The/DT/B-NP nice/JJ/I-NP fish/NN/I-NP is/VBZ/B-VP dead/JJ/B-ADJP ././O rc36K|]\}}|tywr)) SEPARATOR)rrrs rrzfind_chunks..+s@:5#cU9+&@s)caesptfritrAroNrrr)rrKONz Conj(neven)zB-zI-ct|dkS)Nr)r)r*s rrzfind_chunks..Ds#a&1*r/OrzB-NPr~zB-ADVPr)rCHUNKSintfinditerstartrr>rrrrufilterrr)r`r%r*chunkedtagsrrulerrrrrHchink_wordr s r find_chunksrR$s !!Qq!G! 77@@ @D HB BC6 Tt$ 6A ARay)A   +A1a!e_ 6wqz?a'wqz?Q&Av'!*Q-3U"UQa ))$*5  ))$*5 6  66.,g6 S#,G"4+ E3 >>$ EVO3wc|D]}|jdt|D]\}}|djds|ddk(s$|t|dz ks6||dzdjds||dzddvs^d|d<d }||dzd D]V}|djds |ddvs|djdr|rd |d<|djdrPd |d<d }X|S) zThe input is a list of [token, tag, chunk]-items. The output is a list of [token, tag, chunk, preposition]-items. PP-chunks followed by NP-chunks make up a PNP-chunk. rGrrrr)rr)rrzB-PNPTNzI-PNPF)rurrr)rMchrr pps rfind_prepositionsrVPs  #g&#5 8  T "uRyC'73w!$6#b !!a%'*#BqENN<8BqE^>$/!(2"##" Nr/penncLeZdZd dZdZdZdZdZdZdZ d d Z y) ParserNc6|i}||_||_||_y)aA simple shallow parser using a Brill-based part-of-speech tagger. The given lexicon is a dictionary of known words and their part-of-speech tag. The given default tags are used for unknown words. Unknown words that start with a capital letter are tagged NNP (except for German). Unknown words that contain only digits and punctuation are tagged CD. The given language can be used to discern between Germanic and Romance languages for phrase chunking. N)r5r5r%)r,r5r5r%s rr&zParser.__init__s# ?G    r/c tt||jdt|jdt|jdt dS)z~Returns a list of sentences from the given string. Punctuation marks are separated from each word by a space. rrrr)rrrr)rrrTr ABBREVIATIONS replacements)r,rrVs rrzParser.find_tokenssE  K =+> **_mDJJy,7   r/c t||jd|j|jd|j|jd|j|jddS)zAnnotates the given list of tokens with part-of-speech tags. Returns a list of tokens, where each token is now a [word, tag]-list. r%r5r5rN)r%r5r5r)r7rTr%r5r5r,rrVs rr7zParser.find_tagssX  ZZ DMM:JJy$,,7JJy$,,7 5$'   r/c btt||jd|jS)zAnnotates the given list of tokens with chunk tags. Several tags can be added, for example chunk + preposition tags. r%r%)rVrRrTr%r_s rrRzParser.find_chunkss* ! J )N O  r/c t|S)zGAnnotates the given list of tokens with prepositional noun phrase tags.)rVr_s rrVzParser.find_prepositionss  ((r/c t|S)z?uq)**???s&c |r|j|fi|}t|ttfr5|Dcgc])}t|txr|j dxs|+c}}t|tr.|j dDcgc]}|j dc}}t t|D]} t t|| D]1} t|| | tst|| | ||| | <3|s|s|s|r|j|| fi||| <n|| D cgc]} | gc} || <|s|r|j|| fi||| <|r|j|| fi||| <|s|j|| fi||| <|jddr|jddr|Sdg} |r| jd|r| j!d |r| jd |r| jd t t|D]v} t t|| D]C} || | d j#d d|| | d <d j%|| | || | <Edj%|| || <xdj%|}t't)|| |jd|j*}|Scc}wcc}wcc} w)apTakes a string (sentences) and returns a tagged Unicode string (TaggedString). Sentences in the output are separated by newlines. With tokenize=True, punctuation is split from words and sentences are separated by . With tags=True, part-of-speech tags are parsed (NN, VB, IN, ...). With chunks=True, phrase chunk tags are parsed (NP, VP, PP, PNP, ...). With relations=True, semantic role labels are parsed (SBJ, OBJ). With lemmata=True, word lemmata are parsed. Optional parameters are passed to the tokenizer, tagger, chunker, labeler and lemmatizer. rrcollapseTrFrr)r r r r rr8rr%ra)rrrmrrrrrrrr7rRrergrTrurwrr TaggedStringrr%) r,rtokenizerNchunks relationsrrrVrrr[rs rrz Parser.parses,    -f-A a$ 'JKLQAz*;qwws|@q@LA a $'(wwt}5!5As1v 9A3qt9% ?ad1gu-+AaDGX>AaDG ?vg%t~~ad5f5!%&qT**!'t''!77!'t''!77!(t((188!# 9*zz*d+vzz'5/IH  MM* +  MM2 3  MM* %  MM' "s1v "A3qt9% ,qT!WQZ//Y?!Q ((1Q47+!Q ,88AaD>AaD  " IIaL  FFVZZ DMM%J eM5+s.KK* K )Nr|rrN)TTTFFr) rfrgrhr&rr7rRrVrergrr*r/rrYrYs@ !     )&@ Kr/rYrceZdZddZefdZy)rjNc|dg}t|tr$t|dr|j|j}}t|t rY|Dcgc]2}|Dcgc]"}|Dcgc]}|j ddc}$c}}4}}}}djd|D}tj||}t ||_||_|Scc}wcc}}wcc}}}w)zUnicode string with tags and language attributes. For example: TaggedString("cat/NN/NP", tags=["word", "pos", "chunk"]). rrNr8rrc3LK|]}djd|Dyw)rc3>K|]}dj|yw)r8Nr)rrs rrz1TaggedString.__new__...6s'GE'GsNrsrs rrz'TaggedString.__new__..6sXAsxx'GQ'GGXs"$) rrrrNr%rmrrr)r,rrNr%rrr*s rrzTaggedString.__new__'s <8D fc "wvv'>#[[&//(D fd # JKKU;!))C+;KFYYXQWXXF KKf %d <Ks* C C#C<CCCCcl|tk7rtj||St|dk(rgStj|dDcgc]P}|jdDcgc]1}|jdDcgc]}|j ddc}3c}}Rc}}}Scc}wcc}}wcc}}}w)zReturns a list of sentences, where each sentence is a list of tokens, where each token is a list of word + tags. rrrr8r)TOKENSrrrr)r,sepsentencerr*s rrzTaggedString.split<s &=99T3' ' t9>I  IIdD1   &^^C0 5:KK4DEq9c*E   E  s* B/#B);B$B)B/$B))B/r:)rfrgrhrrurr*r/rrjrj&s* r/rjcjeZdZdZd dZdZedZedZe d dZ dZ dZ dd Z d Zy )Spellingabcdefghijklmnopqrstuvwxyzc||_yr)r,)r,r s rr&zSpelling.__init__Ts  r/c t|jD]8}|j}tj ||dt |d:y)Nrr)rrrr1rPrIr8s rr-z Spelling.loadWsCtzz" 4A A   T1Q4QqT 3 4r/c|jSr)r,r+s rr z Spelling.path\r-r/c|jSr)r/r+s rr%zSpelling.language`r0r/c@i}tjd|jD]}||vxr||dzxsd||<dt|j D}dj |}t |d}|j||jy)zCounts the words in the given string and saves the probabilities at the given path. This can be used to generate a new model for the Spelling() constructor. z[a-z]+rc30K|]\}}|d|yw)rNr*rGs rrz!Spelling.train..ls>1A3as>rIrr[N) rrrWsortedrYrrwriteclose)r,rr r4r[rs rtrainzSpelling.trainds Haggi0 8AEz2eAhl7aE!H 8>u{{}(=> %  sO   r/c Htt|dzDcgc] }|d|||df}}|Dcgc]\}}|s ||ddzc}}|Dcgc](\}}t|dkDs||dz|dz|ddz*c}}|Dcgc](\}}tjD]}|s||z|ddz*c}}}|Dcgc]%\}}tjD] }||z|ddz'c}}}f\}}} } t ||z| z| zScc}wcc}}wcc}}wcc}}}wcc}}}w)z@Returns a set of words with edit distance 1 from the given word.rNrr)rrryALPHAset) r,r[rrrrcdelete transposerrss r_edit1zSpelling._edit1rs.*/s1vz):;A!BQ%12;;#( .41aAQ12Y .16 EA#a&1*Q1X!_qu $ E', J Jtq!hnn JQUQqrU] J] J', E Etq!hnn EQUQqrU] E] E . * 7F 6I%/&899< . E J Es-D D  D D&DD #D <*DcJtfdj|DS)z?Returns a set of words with edit distance 2 from the given wordc3XK|]!}j|D] }|vs| #ywr))r)re1e2r,s rrz"Spelling._edit2..s,W"RW2BRVJ2W2Ws* *)rr)r,r[s` r_edit2zSpelling._edit2sW AWWWr/Nc4|g}tfd|DS)z8Returns the given list of words filtered by known words.c3,K|] }|vs| ywr)r*)rr[r,s rrz"Spelling._known..s1qDy11s )r)r,rs` r_knownzSpelling._knowns =E1e111r/ct|dk(r|jt|dk(r|dfgS|tvr|dfgS|tjvr|dfgS|j ddj r|dfgS|j|gxsG|j|j|xs%|j|j|xs|g}|Dcgc]}|j|d|f}}ttd|Dxsdtfd|Dd }|jr&|Dcgc]\}}|j|f}}}|S|Dcgc] \}}||f }}}|Scc}wcc}}wcc}}w) zReturn a list of (word, confidence) spelling corrections for the given word, based on the probability of known words with edit distance 1-2 from the given word. rrrrrrc3&K|] \}}| ywr)r*)rrrs rrz#Spelling.suggest..s2GAta2sc32K|]\}}|z |fywr)r*)rrrrs rrz#Spelling.suggest..sEwq$a!eT]EsT)reverse)rr-rr whitespacerisdigitrrrrTr"rrr3title)r,r[ candidatesrrrrs @rsuggestzSpelling.suggests t9> IIK q6Q;H:  H:  !! !H:  99S"  % % 'H:  KK  {{4;;q>* {{4;;q>* s  6@@txx3'+@ @ #2z227a 8E*EtT 99;;EF44::<+FJF4>>44)>J>AG>sE;F(F)r)z spelling.txtr))rfrgrhrr&r-r1r r% classmethodrrrrrr*r/rryryQsb (E4    :X 2 r/ry)r)rz;;;)r|)NNNNNrnr1N)r1)k__doc__rr rrr4 itertoolsr xml.etreerrrrr dirnameabspath__file__MODULESLASHWORDrCHUNKPNPRELANCHORLEMMArr r encode_utf8r%r1r'rmrj UNIVERSALrrrrrrrADPrrrrrrrcompilerrrrr\rrrrrrr]escaperrr]rrrrr3rr!rrrr#rrrrrrrrrrr.r7r>r|r}r~rrHrsrcrRrVPTBPENNrYrurjry)rrrs000rrs  !5\  WW__RWW__X6 7F 40tS%c65    7,t7,t((t((p  MIdCdCsCtS$$B  :<; k!$&(!( T 2::o & 2::( ) 2::  hh   'JKMTUZ[SO/0'  .:C9I9I9K45UVPQEJJA.D  $.// rzz-#((<*@@A RZZ &            _Z <hL  T5TxrhrnRZZ $ RZZ> ? RZZ> ? D&xD&t 6dIv BJJ. / - 'E 'R'R'pRZZ)*4       BN  6  BJJ+,        zrzz)b.83b88CDE zrzz*%& zrzz+,- K",s2R7(BRG&PQR EBJ345)2  BJJ+,           $ zrzz)b.83b88CbH8STU zrzz*%& zrzz+,- K",s2R7(BRG&PQR EBJ3451/1 jq F1IMM!$%q F1IMM!$%)XzdKKf $ 3$ VUxU1 Fj /s#4R2#S<R< S2R9<S