JL iT*~ddlZddlmZGddZGddeZdZdZd Zed d d gZ Gd dZ y)N) namedtuplecteZdZdZd dZedZedZdZdZ eee Z dZ d Z d Z d Zd Zy) AlignedSenta# Return an aligned sentence object, which encapsulates two sentences along with an ``Alignment`` between them. Typically used in machine translation to represent a sentence and its translation. >>> from nltk.translate import AlignedSent, Alignment >>> algnsent = AlignedSent(['klein', 'ist', 'das', 'Haus'], ... ['the', 'house', 'is', 'small'], Alignment.fromstring('0-3 1-2 2-0 3-1')) >>> algnsent.words ['klein', 'ist', 'das', 'Haus'] >>> algnsent.mots ['the', 'house', 'is', 'small'] >>> algnsent.alignment Alignment([(0, 3), (1, 2), (2, 0), (3, 1)]) >>> from nltk.corpus import comtrans >>> print(comtrans.aligned_sents()[54]) 'So why should EU arm...'> >>> print(comtrans.aligned_sents()[54].alignment) 0-0 0-1 1-0 2-2 3-4 3-5 4-7 5-8 6-3 7-9 8-9 9-10 9-11 10-12 11-6 12-6 13-13 :param words: Words in the target language sentence :type words: list(str) :param mots: Words in the source language sentence :type mots: list(str) :param alignment: Word-level alignments between ``words`` and ``mots``. Each alignment is represented as a 2-tuple (words_index, mots_index). :type alignment: Alignment Ncz||_||_|tg|_yt |tusJ||_yN)_words_mots Alignment alignmenttype)selfwordsmotsr s X/mnt/ssd/data/python-lab/Trading/venv/lib/python3.12/site-packages/nltk/translate/api.py__init__zAlignedSent.__init__/s:   &r]DN ?i/ //&DNc|jSr)rr s rrzAlignedSent.words8s {{rc|jSr)r rs rrzAlignedSent.mots<s zzrc|jSr) _alignmentrs r_get_alignmentzAlignedSent._get_alignment@s rcxtt|jt|j|||_yr)_check_alignmentlenrrr)r r s r_set_alignmentzAlignedSent._set_alignmentCs%TZZ#dii.)D#rcddjd|jDz}ddjd|jDz}d|d|d|jdS)z_ Return a string representation for this ``AlignedSent``. :rtype: str z[%s]z, c3&K|] }d|z ywz'%s'N.0ws r z'AlignedSent.__repr__..Os#D1FQJ#Dc3&K|] }d|z ywrr r!s rr$z'AlignedSent.__repr__..Ps"B!6A:"Br%z AlignedSent())joinrr r)r rrs r__repr__zAlignedSent.__repr__Is^ $))#D #DDE"Btzz"BBCeWBtfBt.ACCrc d}|dz }|dj|jDcgc] }d|d|d c}z }|dj|jDcgc] }d|d|d c}z }|dj|jDcgc](\}}d|j|d|j|d *c}}z }t t |jd z D]4}|d j |j||j|d zz }6t t |jd z D]4}|d j |j||j|d zz }6|d djd|jDzz }|d djd|jDzz }|dz }|Scc}wcc}wcc}}w)z< Dot representation of the aligned sentence zgraph align { znode[shape=plaintext] "z_source" [label="z"] z_target" [label="z _source" -- "z _target" z)"{}_source" -- "{}_source" [style=invis] z)"{}_target" -- "{}_target" [style=invis] z{rank = same; %s}  c3&K|] }d|z yw)z "%s_source"Nr r!s rr$z&AlignedSent._to_dot..vs.VQ}q/@.Vr%c3&K|] }d|z yw)z "%s_target"Nr r!s rr$z&AlignedSent._to_dot..ws.UQ}q/@.Ur%})r(rr rrangerformat)r sr#uvis r_to_dotzAlignedSent._to_dotTs  && RWW L1!-aS6L MM RWW K1!-aS6K LL RWW!OO AqDKKN#=A{K   s4;;'!+, A =DD A AE" A s4::*+ A =DD 1  1q5! A  "chh.V$++.V&V WW "chh.U$**.U&U VV S?MK sG G -G cZ|jjd}d} tjdd|zgtjtjtj}|j|\}}|jdS#t $r}t d|d}~wwxYw)zR Ipython magic : show SVG representation of this ``AlignedSent``. utf8svgdotz-T%s)stdinstdoutstderrz0Cannot find the dot binary from Graphviz packageN) r8encode subprocessPopenPIPEOSError Exception communicatedecode)r dot_string output_formatprocesseouterrs r _repr_svg_zAlignedSent._repr_svg_}s\\^**62   W &&./ oo!! G&&z2Szz&!!  WNOUV V WsAB B* B%%B*cdj|jdddz}dj|jdddz}d|d|dS)zn Return a human-readable string representation for this ``AlignedSent``. :rtype: str r.Nz...z 'z'>)r(rr )r sourcetargets r__str__zAlignedSent.__str__sU $++&s+e3$**%cr*U2 vhb99rctt|j|j|jj S)zm Return the aligned sentence pair, reversing the directionality :rtype: AlignedSent )rr rrinvertrs rrUzAlignedSent.inverts( 4::t{{DOO4J4J4LMMrr)__name__ __module__ __qualname____doc__rpropertyrrrrr r)r8rNrSrUr rrrrsh>'$8I D'R"&:NrrcLeZdZdZdZedZdZdZd dZ dZ d Z d Z y) r ac A storage class for representing alignment between two sequences, s1, s2. In general, an alignment is a set of tuples of the form (i, j, ...) representing an alignment between the i-th element of s1 and the j-th element of s2. Tuples are extensible (they might contain additional data, such as a boolean to indicate sure vs possible alignments). >>> from nltk.translate import Alignment >>> a = Alignment([(0, 0), (0, 1), (1, 2), (2, 2)]) >>> a.invert() Alignment([(0, 0), (1, 0), (2, 1), (2, 2)]) >>> print(a.invert()) 0-0 1-0 2-1 2-2 >>> a[0] [(0, 1), (0, 0)] >>> a.invert()[2] [(2, 1), (2, 2)] >>> b = Alignment([(0, 0), (0, 1)]) >>> b.issubset(a) True >>> c = Alignment.fromstring('0-0 0-1') >>> b == c True ctj||}|tgk7rtd|Dnd|_d|_|S)Nc3&K|] }|d ywrNr r"ps rr$z$Alignment.__new__..s+!+r%r) frozenset__new__max_len_index)clspairsr s rrbzAlignment.__new__s?  e,/3y}/DC+d++!   rclt|jDcgc] }t|c}Scc}w)a Read a giza-formatted string and return an Alignment object. >>> Alignment.fromstring('0-0 2-1 9-2 21-3 10-4 7-5') Alignment([(0, 0), (2, 1), (7, 5), (9, 2), (10, 4), (21, 3)]) :type s: str :param s: the positional alignments in giza format :rtype: Alignment :return: An Alignment object corresponding to the string representation ``s``. )r split _giza2pair)rfr4as r fromstringzAlignment.fromstrings';A*Q-;<<;s1cp|js|j|jj|S)zN Look up the alignments that map from a given index or slice. )re _build_index __getitem__)r keys rrozAlignment.__getitem__s,{{    {{&&s++rc&td|DS)zI Return an Alignment object, being the inverted mapping. c3<K|]}|d|df|ddzyw)r-rNr r_s rr$z#Alignment.invert..s'>A1Q41,12.>s)r rs rrUzAlignment.inverts>>>>rNct}|js|j|s'tt t |j}|D]'}|j d|j|D)t|S)z Work out the range of the mapping from the given positions. If no positions are specified, compute the range of the entire mapping. c3&K|] \}}| ywrr )r"_fs rr$z"Alignment.range..s6tq!6r%)setrernlistr2rupdatesorted)r positionsimager`s rr2zAlignment.rangesl {{    U3t{{#345I 7A LL6t{{1~6 6 7e}rcdt|zS)M Produce a Giza-formatted string representing the alignment. z Alignment(%r))r{rs rr)zAlignment.__repr__s--rcDdjdt|DS)rr.c3,K|] }d|ddzyw)z%d-%dNrsr r_s rr$z$Alignment.__str__..s>A!BQ%>s)r(r{rs rrSzAlignment.__str__sxx>>>>rct|jdzDcgc]}gc}|_|D]#}|j|dj|%ycc}w)z Build a list self._index such that self._index[i] is a list of the alignments originating from word i. r-rN)r2rdreappend)r rvr`s rrnzAlignment._build_indexsR $)Q#78ar8  (A KK!  $ $Q ' (9s Ar) rVrWrXrYrb classmethodrlrorUr2r)rSrnr rrr r s>2  = =,? . ? (rr cV|jd\}}t|t|fSN-riint) pair_stringr7js rrjrjs(   S !DAq q63q6>rcX|jd\}}}t|t|fSrr)rr7rr`s r _naacl2pairr s*$GAq! q63q6>rct|tusJtfd|Ds tdtfd|Ds tdy)ab Check whether the alignments are legal. :param num_words: the number of source language words :type num_words: int :param num_mots: the number of target language words :type num_mots: int :param alignment: alignment to be checked :type alignment: Alignment :raise IndexError: if alignment falls outside the sentence c3DK|]}d|dcxkxrkncywr^r )r"pair num_wordss rr$z#_check_alignment..s">DqDG'i''>s z&Alignment is outside boundary of wordsc3VK|] }|dduxsd|dcxkxrknc"yw)r-Nrr )r"rnum_motss rr$z#_check_alignment.. s/PdtAw$9!tAw"9"99Ps&)z%Alignment is outside boundary of motsN)r r all IndexError)rrr s`` rrrsQ  ?i '' ' >I> >ABB PiP P@AA QrPhraseTableEntry trg_phraselog_probc(eZdZdZdZdZdZdZy) PhraseTablezs In-memory store of translations for a given phrase, and the log probability of the those translations c"t|_yr)dict src_phrasesrs rrzPhraseTable.__init__-s 6rc |j|S)a Get the translations for a source language phrase :param src_phrase: Source language phrase of interest :type src_phrase: tuple(str) :return: A list of target language phrases that are translations of ``src_phrase``, ordered in decreasing order of likelihood. Each list element is a tuple of the target phrase and its log probability. :rtype: list(PhraseTableEntry) rr src_phrases rtranslations_forzPhraseTable.translations_for0s ++rct||}||jvrg|j|<|j|j||j|jddy)z :type src_phrase: tuple(str) :type trg_phrase: tuple(str) :param log_prob: Log probability that given ``src_phrase``, ``trg_phrase`` is its translation :type log_prob: float )rrc|jSr)r)rKs rz!PhraseTable.add..Ls  rT)rpreverseN)rrrsort)r rrrentrys raddzPhraseTable.add?sf!JJ T-- -+-D  Z ( $++E2 $)).BD)Qrc||jvSrrrs r __contains__zPhraseTable.__contains__NsT----rN)rVrWrXrYrrrrr rrrr's " , R.rr) rA collectionsrrrar rjrrrrr rrrsZ"QNQNh_( _(D  B*0<2LM(.(.r