JL iIddlZddlZddlZddlmZddlmZmZmZm Z m Z ddl m Z ddl mZddlmZdZGdd e ZGd d eZGd d eZGddeZy)N)PIPE) _java_options config_java find_jar_iterfind_jars_within_pathjava)ParserIDependencyGraph)Treez2https://nlp.stanford.edu/software/lex-parser.shtmlcpeZdZdZdZdZdZdZdZ ddZ dZ dd Z dd Z dd Z dd Zdd ZddZy)GenericStanfordParserz Interface to the Stanford Parserz+stanford-parser-(\d+)(\.(\d+))+-models\.jarzstanford-parser\.jarz3edu.stanford.nlp.parser.lexparser.LexicalizedParserFNc jtt|j|ddt|dd}tt|j|ddt|dd} t j j|d } t| gt| z|_ ||_ ||_ ||_ ||_y) N)STANFORD_PARSERSTANFORD_CORENLPT)env_vars searchpathurlverboseis_regexc@tjj|SNospathdirname model_paths Y/mnt/ssd/data/python-lab/Trading/venv/lib/python3.12/site-packages/nltk/parse/stanford.pyz0GenericStanfordParser.__init__..;277??:#>)key)STANFORD_MODELSrc@tjj|Srrrs r r!z0GenericStanfordParser.__init__..Hr"r#r)maxr_JAR _stanford_url_MODEL_JAR_PATTERNrrsplittupler _classpathr _encodingcorenlp_options java_options) self path_to_jarpath_to_models_jarrencodingrr0r/ stanford_jar model_jar stanford_dirs r __init__zGenericStanfordParser.__init__&s  @! ?   ''"@! ?  ww}}\215  .CL.Q QR$!.(r#c g}g}g}d}|jdD]}|dk(r|r|jt|g}d})|jr4|j|j dj |g}d}i|jt|j dj |gg}|j|d}t|S)NF T) splitlinesappenditer_DOUBLE_SPACED_OUTPUT _make_treejoin)r1output_res cur_lines cur_treesblanklines r _parse_trees_outputz)GenericStanfordParser._parse_trees_outputVs  &&u- DrzJJtI/ "I!E//$$T__TYYy5I%JK "I EJJtT__TYYy5I%J$KLM "I  &  Cyr#c |jd|jddd|jdddg }|j|j |dj d |D|S) a Use StanfordParser to parse multiple sentences. Takes multiple sentences as a list where each sentence is a list of words. Each sentence will be automatically tagged with this StanfordParser instance's tagger. If whitespaces exists inside a token, then the token will be treated as separate tokens. :param sentences: Input sentences to parse :type sentences: list(list(str)) :rtype: iter(iter(Tree)) -model -sentencesnewline -outputFormat -tokenizedz-escaperz-edu.stanford.nlp.process.PTBEscapingProcessorr;c3>K|]}dj|yw) NrA).0sentences r z4GenericStanfordParser.parse_sents..sLhsxx1Ls _MAIN_CLASSr_OUTPUT_FORMATrH_executerAr1 sentencesrcmds r parse_sentsz!GenericStanfordParser.parse_sentsmsp     OO         ;  '' MMTYYL)LLg   r#c:t|j|g|S)a& Use StanfordParser to parse a sentence. Takes a sentence as a string; before parsing, it will be automatically tokenized and tagged by the Stanford Parser. :param sentence: Input sentence to parse :type sentence: str :rtype: iter(Tree) )nextraw_parse_sentsr1rSrs r raw_parsezGenericStanfordParser.raw_parsesD(((W=>>r#c|jd|jddd|jg}|j|j |dj ||S)aI Use StanfordParser to parse multiple sentences. Takes multiple sentences as a list of strings. Each sentence will be automatically tokenized and tagged by the Stanford Parser. :param sentences: Input sentences to parse :type sentences: list(str) :rtype: iter(iter(Tree)) rJrKrLrMr;rUrYs r r_z%GenericStanfordParser.raw_parse_sentss^     OO       '' MM#tyy3W =  r#c:t|j|g|S)a0 Use StanfordParser to parse a sentence. Takes a sentence as a list of (word, tag) tuples; the sentence must have already been tokenized and tagged. :param sentence: Input sentence to parse :type sentence: list(tuple(str, str)) :rtype: iter(Tree) )r^tagged_parse_sentsr`s r tagged_parsez"GenericStanfordParser.tagged_parsesD++XJ@AAr#cd|jd|jddd|jdddd d d g}|j|j |d j fd |D|S)ad Use StanfordParser to parse multiple sentences. Takes multiple sentences where each sentence is a list of (word, tag) tuples. The sentences must have already been tokenized and tagged. :param sentences: Input sentences to parse :type sentences: list(list(tuple(str, str))) :rtype: iter(iter(Tree)) /rJrKrLrMrNz -tagSeparatorz-tokenizerFactoryz,edu.stanford.nlp.process.WhitespaceTokenizerz-tokenizerMethodnewCoreLabelTokenizerFactoryr;c3RK|]}djfd|D yw)rPc3@K|]}j|ywrrQ)rRtagged tag_separators r rTzEGenericStanfordParser.tagged_parse_sents...sOF]//7OsNrQ)rRrSrls r rTz;GenericStanfordParser.tagged_parse_sents..s( HHOhOOs$'rU)r1rZrr[rls @r rdz(GenericStanfordParser.tagged_parse_sentss     OO           :  * "'' MM $-    r#c|j}|jd|g|jr)|j|jjdj t }t |j|tjdd5}t|tr|r|j|}|j||j|jr7|j!dt#||j$|t&t&\}}n?|j)|j*t#||j$t&t& \}}|j-d d }|j-d d }|j/|}dddt1j2j*t |dS#1swY7xYw) Nz -encodingrP)optionsrwbF)modedeleter) classpathstdinstdoutstderr)rrrtrus  s)r.extendr/r+rArrr0tempfileNamedTemporaryFile isinstancestrencodewriteflush _USE_STDINseekrr-rr=namereplacedecoderunlink) r1r[input_rr4default_options input_filertrus r rXzGenericStanfordParser._executest>> K*+    JJt++113 4((=1 D--w? ( (d5 A -Z&#&8x0   V $    "!%"oo$ " :??+!%4??4"^^K6F^^K6F]]8,F3 -6 *//" OU; A - -s C ? . B% N,r#rc,eZdZdZdZfdZdZxZS)StanfordParsera >>> parser=StanfordParser( ... model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz" ... ) # doctest: +SKIP >>> list(parser.raw_parse("the quick brown fox jumps over the lazy dog")) # doctest: +NORMALIZE_WHITESPACE +SKIP [Tree('ROOT', [Tree('NP', [Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['quick']), Tree('JJ', ['brown']), Tree('NN', ['fox'])]), Tree('NP', [Tree('NP', [Tree('NNS', ['jumps'])]), Tree('PP', [Tree('IN', ['over']), Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])])])])] >>> sum([list(dep_graphs) for dep_graphs in parser.raw_parse_sents(( ... "the quick brown fox jumps over the lazy dog", ... "the quick grey wolf jumps over the lazy fox" ... ))], []) # doctest: +NORMALIZE_WHITESPACE +SKIP [Tree('ROOT', [Tree('NP', [Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['quick']), Tree('JJ', ['brown']), Tree('NN', ['fox'])]), Tree('NP', [Tree('NP', [Tree('NNS', ['jumps'])]), Tree('PP', [Tree('IN', ['over']), Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])])])]), Tree('ROOT', [Tree('NP', [Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['quick']), Tree('JJ', ['grey']), Tree('NN', ['wolf'])]), Tree('NP', [Tree('NP', [Tree('NNS', ['jumps'])]), Tree('PP', [Tree('IN', ['over']), Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['fox'])])])])])])] >>> sum([list(dep_graphs) for dep_graphs in parser.parse_sents(( ... "I 'm a dog".split(), ... "This is my friends ' cat ( the tabby )".split(), ... ))], []) # doctest: +NORMALIZE_WHITESPACE +SKIP [Tree('ROOT', [Tree('S', [Tree('NP', [Tree('PRP', ['I'])]), Tree('VP', [Tree('VBP', ["'m"]), Tree('NP', [Tree('DT', ['a']), Tree('NN', ['dog'])])])])]), Tree('ROOT', [Tree('S', [Tree('NP', [Tree('DT', ['This'])]), Tree('VP', [Tree('VBZ', ['is']), Tree('NP', [Tree('NP', [Tree('NP', [Tree('PRP$', ['my']), Tree('NNS', ['friends']), Tree('POS', ["'"])]), Tree('NN', ['cat'])]), Tree('PRN', [Tree('-LRB-', [Tree('', []), Tree('NP', [Tree('DT', ['the']), Tree('NN', ['tabby'])]), Tree('-RRB-', [])])])])])])])] >>> sum([list(dep_graphs) for dep_graphs in parser.tagged_parse_sents(( ... ( ... ("The", "DT"), ... ("quick", "JJ"), ... ("brown", "JJ"), ... ("fox", "NN"), ... ("jumped", "VBD"), ... ("over", "IN"), ... ("the", "DT"), ... ("lazy", "JJ"), ... ("dog", "NN"), ... (".", "."), ... ), ... ))],[]) # doctest: +NORMALIZE_WHITESPACE +SKIP [Tree('ROOT', [Tree('S', [Tree('NP', [Tree('DT', ['The']), Tree('JJ', ['quick']), Tree('JJ', ['brown']), Tree('NN', ['fox'])]), Tree('VP', [Tree('VBD', ['jumped']), Tree('PP', [Tree('IN', ['over']), Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])]), Tree('.', ['.'])])])] pennc\tjdtdt||i|y)NzcThe StanfordParser will be deprecated Please use nltk.parse.corenlp.CoreNLPParser instead. stacklevelwarningswarnDeprecationWarningsuperr8r1argskwargs __class__s r r8zStanfordParser.__init__Fs/  R    $)&)r#c,tj|Sr)r fromstringr1results r r@zStanfordParser._make_treePsv&&r#rrrrrWr8r@ __classcell__rs@r rrs0dN*'r#rc,eZdZdZdZfdZdZxZS)StanfordDependencyParsera >>> dep_parser=StanfordDependencyParser( ... model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz" ... ) # doctest: +SKIP >>> [parse.tree() for parse in dep_parser.raw_parse("The quick brown fox jumps over the lazy dog.")] # doctest: +NORMALIZE_WHITESPACE +SKIP [Tree('jumps', [Tree('fox', ['The', 'quick', 'brown']), Tree('dog', ['over', 'the', 'lazy'])])] >>> [list(parse.triples()) for parse in dep_parser.raw_parse("The quick brown fox jumps over the lazy dog.")] # doctest: +NORMALIZE_WHITESPACE +SKIP [[((u'jumps', u'VBZ'), u'nsubj', (u'fox', u'NN')), ((u'fox', u'NN'), u'det', (u'The', u'DT')), ((u'fox', u'NN'), u'amod', (u'quick', u'JJ')), ((u'fox', u'NN'), u'amod', (u'brown', u'JJ')), ((u'jumps', u'VBZ'), u'nmod', (u'dog', u'NN')), ((u'dog', u'NN'), u'case', (u'over', u'IN')), ((u'dog', u'NN'), u'det', (u'the', u'DT')), ((u'dog', u'NN'), u'amod', (u'lazy', u'JJ'))]] >>> sum([[parse.tree() for parse in dep_graphs] for dep_graphs in dep_parser.raw_parse_sents(( ... "The quick brown fox jumps over the lazy dog.", ... "The quick grey wolf jumps over the lazy fox." ... ))], []) # doctest: +NORMALIZE_WHITESPACE +SKIP [Tree('jumps', [Tree('fox', ['The', 'quick', 'brown']), Tree('dog', ['over', 'the', 'lazy'])]), Tree('jumps', [Tree('wolf', ['The', 'quick', 'grey']), Tree('fox', ['over', 'the', 'lazy'])])] >>> sum([[parse.tree() for parse in dep_graphs] for dep_graphs in dep_parser.parse_sents(( ... "I 'm a dog".split(), ... "This is my friends ' cat ( the tabby )".split(), ... ))], []) # doctest: +NORMALIZE_WHITESPACE +SKIP [Tree('dog', ['I', "'m", 'a']), Tree('cat', ['This', 'is', Tree('friends', ['my', "'"]), Tree('tabby', ['the'])])] >>> sum([[list(parse.triples()) for parse in dep_graphs] for dep_graphs in dep_parser.tagged_parse_sents(( ... ( ... ("The", "DT"), ... ("quick", "JJ"), ... ("brown", "JJ"), ... ("fox", "NN"), ... ("jumped", "VBD"), ... ("over", "IN"), ... ("the", "DT"), ... ("lazy", "JJ"), ... ("dog", "NN"), ... (".", "."), ... ), ... ))],[]) # doctest: +NORMALIZE_WHITESPACE +SKIP [[((u'jumped', u'VBD'), u'nsubj', (u'fox', u'NN')), ((u'fox', u'NN'), u'det', (u'The', u'DT')), ((u'fox', u'NN'), u'amod', (u'quick', u'JJ')), ((u'fox', u'NN'), u'amod', (u'brown', u'JJ')), ((u'jumped', u'VBD'), u'nmod', (u'dog', u'NN')), ((u'dog', u'NN'), u'case', (u'over', u'IN')), ((u'dog', u'NN'), u'det', (u'the', u'DT')), ((u'dog', u'NN'), u'amod', (u'lazy', u'JJ'))]] conll2007c\tjdtdt||i|y)NzwThe StanfordDependencyParser will be deprecated Please use nltk.parse.corenlp.CoreNLPDependencyParser instead.rrrrs r r8z!StanfordDependencyParser.__init__s/  \    $)&)r#ct|dS)Nroottop_relation_labelr rs r r@z#StanfordDependencyParser._make_treev&AAr#rrs@r rrTs.`!N*Br#rcHeZdZdZdZdZdZdZdZdZ fdZ d dZ d Z xZ S) StanfordNeuralDependencyParserar >>> from nltk.parse.stanford import StanfordNeuralDependencyParser # doctest: +SKIP >>> dep_parser=StanfordNeuralDependencyParser(java_options='-mx4g')# doctest: +SKIP >>> [parse.tree() for parse in dep_parser.raw_parse("The quick brown fox jumps over the lazy dog.")] # doctest: +NORMALIZE_WHITESPACE +SKIP [Tree('jumps', [Tree('fox', ['The', 'quick', 'brown']), Tree('dog', ['over', 'the', 'lazy']), '.'])] >>> [list(parse.triples()) for parse in dep_parser.raw_parse("The quick brown fox jumps over the lazy dog.")] # doctest: +NORMALIZE_WHITESPACE +SKIP [[((u'jumps', u'VBZ'), u'nsubj', (u'fox', u'NN')), ((u'fox', u'NN'), u'det', (u'The', u'DT')), ((u'fox', u'NN'), u'amod', (u'quick', u'JJ')), ((u'fox', u'NN'), u'amod', (u'brown', u'JJ')), ((u'jumps', u'VBZ'), u'nmod', (u'dog', u'NN')), ((u'dog', u'NN'), u'case', (u'over', u'IN')), ((u'dog', u'NN'), u'det', (u'the', u'DT')), ((u'dog', u'NN'), u'amod', (u'lazy', u'JJ')), ((u'jumps', u'VBZ'), u'punct', (u'.', u'.'))]] >>> sum([[parse.tree() for parse in dep_graphs] for dep_graphs in dep_parser.raw_parse_sents(( ... "The quick brown fox jumps over the lazy dog.", ... "The quick grey wolf jumps over the lazy fox." ... ))], []) # doctest: +NORMALIZE_WHITESPACE +SKIP [Tree('jumps', [Tree('fox', ['The', 'quick', 'brown']), Tree('dog', ['over', 'the', 'lazy']), '.']), Tree('jumps', [Tree('wolf', ['The', 'quick', 'grey']), Tree('fox', ['over', 'the', 'lazy']), '.'])] >>> sum([[parse.tree() for parse in dep_graphs] for dep_graphs in dep_parser.parse_sents(( ... "I 'm a dog".split(), ... "This is my friends ' cat ( the tabby )".split(), ... ))], []) # doctest: +NORMALIZE_WHITESPACE +SKIP [Tree('dog', ['I', "'m", 'a']), Tree('cat', ['This', 'is', Tree('friends', ['my', "'"]), Tree('tabby', ['-LRB-', 'the', '-RRB-'])])] conllz)edu.stanford.nlp.pipeline.StanfordCoreNLPz%stanford-corenlp-(\d+)(\.(\d+))+\.jarz,stanford-corenlp-(\d+)(\.(\d+))+-models\.jarTctjdtdt||i||xj dz c_y)Nz}The StanfordNeuralDependencyParser will be deprecated Please use nltk.parse.corenlp.CoreNLPDependencyParser instead.rrz(-annotators tokenize,ssplit,pos,depparse)rrrrr8r/rs r r8z'StanfordNeuralDependencyParser.__init__sA  \    $)&)  JJr#ctd)z Currently unimplemented because the neural dependency parser (and the StanfordCoreNLP pipeline class) doesn't support passing in pre- tagged tokens. zxtagged_parse[_sents] is not supported by StanfordNeuralDependencyParser; use parse[_sents] or raw_parse[_sents] instead.)NotImplementedError)r1rZrs r rdz1StanfordNeuralDependencyParser.tagged_parse_sentss " :  r#ct|dS)NROOTrr rs r r@z)StanfordNeuralDependencyParser._make_treerr#r)rrrrrWrVr(r*rr?r8rdr@rrs@r rrs;>N=K 3DHJ  K  Br#r)rrxr subprocessrnltk.internalsrrrrrnltk.parse.apir nltk.parse.dependencygraphr nltk.treer r)rrrrrr#r rsk #6D rGrj@'*@'F>B4>BB?B%:?Br#