JL ixRdZddlZddlmZddlmZGddej Zy)z Unit tests for nltk.tgrep. N)tgrep) ParentedTreeceZdZdZdZdZdZdZdZdZ dZ d Z d Z d Z d Zd ZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZ y) TestSequenceFunctionsz5 Class containing unit tests for nltk.tgrep. cVtjd}|j|gdy)z. Simple test of tokenization. %A .. (B !< C . D) | ![<< (E , F) $ G])A..(B!<C.D)|r [<|jtjd?gd@|jtjdAgdB|jtjdCgdD|jtjdEgdF|jtjdGgdH|jtjdIgdJ|jtjdKgdL|jtjdMgdN|jtjdOgdP|jtjdQgdR|jtjdSgdT|jtjdUgdV|jtjdWgdX|jtjdYgdZ|jtjd[gd\|jtjd]gd^|jtjd_gd`|jtjdagdb|jtjdcgdd|jtjdegdf|jtjdggdh|jtjdigdj|jtjdkgdl|jtjdmgdn|jtjdogdp|jtjdqgdr|jtjdsgdt|jtjdugdv|jtjdwgdx|jtjdygdz|jtjd{gd|y})~z8 Test tokenization of basic link types. zAB)r >r zA<3B)r <3r zA>3B)r >3r zA<,B)r <,r zA>,B)r >,r zA<-3B)r <-3r zA>-3B)r >-3r zA<-B)r <-r zA>-B)r >-r zA<'B)r <'r zA>'B)r >'r zA<:B)r <:r zA>:B)r >:r zA<>B)r >>r zA<<,B)r <<,r zA>>,B)r >>,r zA<<'B)r <<'r zA>>'B)r >>'r zA<<:B)r <<:r zA>>:B)r >>:r zA.B)r rr zA,B)r rr zA..B)r r r zA,,B)r ,,r zA$B)r rr zA$.B)r $.r zA$,B)r $,r zA$..B)r $..r zA$,,B)r $,,r zA!B)r r r(r zA!<3B)r r r)r zA!>3B)r r r*r zA!<,B)r r r+r zA!>,B)r r r,r zA!<-3B)r r r-r zA!>-3B)r r r.r zA!<-B)r r r/r zA!>-B)r r r0r zA!<'B)r r r1r zA!>'B)r r r2r zA!<:B)r r r3r zA!>:B)r r r4r zA!<>B)r r r5r zA!<<,B)r r r6r zA!>>,B)r r r7r zA!<<'B)r r r8r zA!>>'B)r r r9r zA!<<:B)r r r:r zA!>>:B)r r r;r zA!.B)r r rr zA!,B)r r rr zA!..B)r r r r zA!,,B)r r r<r zA!$B)r r rr zA!$.B)r r r=r zA!$,B)r r r>r zA!$..B)r r r?r zA!$,,B)r r r@r Nr$r%s r test_tokenize_link_typesz.TestSequenceFunctions.test_tokenize_link_typesDs --e4oF --e4oF --f57GH --f57GH --f57GH --f57GH --g68IJ --g68IJ --f57GH --f57GH --f57GH --f57GH --f57GH --f57GH --f57GH --f57GH --g68IJ --g68IJ --g68IJ --g68IJ --g68IJ --g68IJ --e4oF --e4oF --f57GH --f57GH --e4oF --f57GH --f57GH --g68IJ --g68IJ --f57KL --f57KL --g68MN --g68MN --g68MN --g68MN --h79OP --h79OP --g68MN --g68MN --g68MN --g68MN --g68MN --g68MN --g68MN --g68MN --h79OP --h79OP --h79OP --h79OP --h79OP --h79OP --f57KL --f57KL --g68MN --g68MN --f57KL --g68MN --g68MN --h79OP --h79OPr"c\|jtjdgd|jtjddg|jtjdgd|jtjdgd|jtjdgd |jtjd gd |jtjd gd |jtjdgd|jtjdgd|jtjdgd|jtjdgdy)zJ Test tokenization of the TGrep2 manual example patterns. NP < PP)NPrPP/^NP/ NP << PP . VP)rDrrErVPNP << PP | . VP)rDrrErrrHNP !<< PP [> NP | >> VP]) rDr rrErr(rDrr5rHrNP << (PP . VP))rDrr rErrHrNP <' (PP <, (IN < on))) rDr1r rEr+r INronrrS < (A < B) < C) Srr r rr rrrS < ((A < B) < C)) rPrr r r rr rrrrS < (A < B < C)) rPrr r rr rrrzA3B"3B"rrNr$r%s r test_tokenize_quotingz+TestSequenceFunctions.test_tokenize_quotings$   !? @ D r"cl|jtjddg|jtjddg|jtjddg|jtjddg|jtjdddg|jtjdgd |jtjd gd |jtjd gd y)z2 Test tokenization of node names. Robertz /^[Bb]ob/*__zN()N(rzN(0,))r[0rrzN(0,0))r[r\rr\rzN(0,0,))r[r\rr\rrNr$r%s r test_tokenize_nodenamesz-TestSequenceFunctions.test_tokenize_nodenamess --h7(D --k:[MJ --c2SE: --d3dV< --e4tSkB --g68MN --h79ST    +-L r"cR|jtjdgdy)z9 Test tokenization of macro definitions. z4@ NP /^NP/; @ NN /^NN/; @NP [!< NP | < @NN] !$.. @NN)@rDrF;r_NNz/^NN/r`z@NPrr rrDrr@NNrr r?rbNr$r%s r test_tokenize_macrosz*TestSequenceFunctions.test_tokenize_macross(   H    r"cltjd}|jtt j d|gddgg|jtt j d|g|d|dgg|jtt j d|ggdgy)z` Test a simple use of tgrep for finding nodes matching a given pattern. A(S (NP (DT the) (JJ big) (NN dog)) (VP bit) (NP (DT a) (NN cat)))rarrgNN|JJ)rrirfrhNr fromstringrlistrtgrep_positions tgrep_nodesrtrees r test_node_simplez&TestSequenceFunctions.test_node_simples && R  e33D4&ABffEUDVW  ""4$0 1T$Zd4L3M   &&w7 8;S:T r"c ftjd}|jtt j d|gtt j d|g|jtt j d|gtt j d|gy)z9Test that the tgrep print operator ' is properly ignored.(S (n x) (N x))Nz'Nz/[Nn]/z'/[Nn]/Nrrmrrnrrorqs r test_node_printingz(TestSequenceFunctions.test_node_printings&&'89  &&sTF3 4 &&tdV4 5   &&x$8 9 &&y4&9 : r"c tjd}|jtt j d|gtt j d|g|jtt j d|gtt j d|g|jtt j d|gtt j d|gy)z] Test that tgrep search strings handles bytes and strs the same way. resNNrasNN|JJrjNrlrqs r test_node_encodingz(TestSequenceFunctions.test_node_encodings && R   &&utf5 6 &&utf5 6   ""54&1 2 ""4$0 1   &&x$8 9 &&w7 8 r"ctjd}|jtt j d|gdgg|jtt j d|gddggy)zI Test selecting nodes using case insensitive node names. ru"N"rizi@"N"rNrwrqs r test_node_nocasez&TestSequenceFunctions.test_node_nocasesg&&'89 e33ED6BCtfXN e33GdVDEt ~Vr"ctjd}|jtt j d|gdgg|jtt j d|gdgg|jtt j d|gdgg|jtt j d|gd ggy ) z? Test selecting nodes using quoted node names. z(N ("N" x) (N" x) ("\" x))r|z"\"N\""r~z"N\""r}z"\"\\\""rgNrwrqs r test_node_quotedz&TestSequenceFunctions.test_node_quoteds&&'DE e33ED6BCrdVL e33K$HITF8T e33HtfEF$Q e33NTFKLPTvhWr"ctjd}|jtt j d|gddggy)/ Test regex matching on nodes. $(S (NP-SBJ x) (NP x) (NNP x) (VP x))rFr~r}Nrwrqs r test_node_regexz%TestSequenceFunctions.test_node_regex)sA&&'MN e33GdVDEt ~Vr"ctjd}|jtt j d|gddgg|jtt j d|ggdgy)rz(S (SBJ x) (SBJ1 x) (NP-SBJ x))z/^SBJ/r~r}z/SBJ/)r~r}rNrwrqs r test_node_regex_2z'TestSequenceFunctions.test_node_regex_22sm&&'HI e33HtfEF$PTW  &&w7 8;M:N r"ctjd}tt|j Dchc]}|j |}}|j Dcgc] }||vs| }}|D]]}d|}ttj||g}|jt|dd|j|dd|_ycc}wcc}w)zE Test matching on nodes based on NLTK tree position. rrvrriN) rrmrangelenleavesleaf_treeposition treepositionsrnrror)rrrxleaf_positionstree_positionspositionnode_idros r test_node_tree_positionz-TestSequenceFunctions.test_node_tree_position>s&&'MN=B3t{{}CU=VW$003WW%)%7%7%9UQn=T!UU& >H(nG"5#8#84&#IJO   S!34a 8   _Q/2H =  >XUsC& C0Cc tjd}|jtt j d|gddgg|jtt j d|gdggy)zS Test node name matching with the search_leaves flag set to False. (S (A (T x)) (B (N x)))rrrrrirrFNrwrqs r test_node_noleavesz(TestSequenceFunctions.test_node_noleavesLsn&&'@A  &&sTF3 4 97M6N  e33C$GH2$Or"c tjd}|jtt j d|gdgg|jtt j d|gdgg|jtt j d|ggdg|jtt j d|gdgg|jtt j d |gd gg|jtt j d |gd gg|jtt j d |ggdg|jtt j d|ggdg|jtt j d|ggdg|jtt j d|gdd gg|jtt j d|gdd gg|jtt j d|gddgg|jtt j d|gdgg|jtt j d|gdgg|jtt j d|ggdgtjd}|jtt j d|gdgg|jtt j d|gddgg|jtt j d|ggdg|jtt j d|gdggtjd }|jtt j d!|gd gg|jtt j d"|ggd#gtjd$}|jtt j d%|ggd&g|jtt j d'|ggd(gy))*zC Test matching nodes based on dominance relations. rz* < Tr~z * < T > Sz* !< T)rrrrr}rirrz * !< T > Sr}z* > Arz* > Brz* !> B)rr~rrr}rz * !> B >> S)r~rr}z* >> S)r~rr}rz* >>, Sz* >>' Sz* << Trz* <<' Tz* <<1 Nz* !<< T)rrr}rrz(S (A (T x)) (B (T x) (N x )))z* <: Tz* !<: T)rrrr}rr)riri)ririrz * !<: T > Sz(S (T (A x) (B x)) (T (C x)))z* >: Tz* !>: T)rr~rrrkrrirr}rz=(S (A (B (C (D (E (T x)))))) (A (B (C (D (E (T x))) (N x)))))z* <<: T)r~rrrrrrrrrrrrirrr)rirrrrz* >>: A)rrrr)rrrrrrrrNrwrqs r tests_rel_dominancez)TestSequenceFunctions.tests_rel_dominanceVs+&&'@A e33GdVDExP e33K$HITF8T  &&x$8 9 = >  e33L4&IJdVHU e33GdVDEzR e33GdVDEzR  &&x$8 9 ; <   &&}tf= >AU@V   &&x$8 9 ) *   &&y4&9 :dF^>> SN)rrm assertRaisesrTgrepExceptionrnrorqs r test_bad_operatorz'TestSequenceFunctions.test_bad_operators?&&'@A   $(=(=i$(P r"ctjd}d}|jtt j ||gddggd}|jtt j ||gddggy)z` Test that comments are correctly filtered out of tgrep search strings. z(S (NN x) (NP x) (NN x))z= @ NP /^NP/; @ NN /^NN/; @NN r~rzg # macros @ NP /^NP/; @ NN /^NN/; # search string @NN Nrw)rrrsearch1search2s r test_commentsz#TestSequenceFunctions.test_commentss{ &&'AB  e33GdVDEt ~V  e33GdVDEt ~Vr"c$tjd}|jtt j d|gdgg|jtt j d|gdgg|jtt j d|gdgg|jtt j d|gdgg|jtt j d|gddggy ) z7 Test matching sister nodes in a tree. (S (A x) (B x) (C x))z* $. Br~z* $.. Bz* $, Brz* $,, Bz* $ BNrwrqs r test_rel_sister_nodesz+TestSequenceFunctions.test_rel_sister_nodess&&'>? e33HtfEF$Q e33IvFG4&R e33HtfEF$Q e33IvFG4&R e33GdVDEt ~Vr"ctjd}|jtt j d|gdgg|jtt j d|gdgg|jtt j d|gdgg|jtt j d|gdgg|jtt j d |gdgg|jtt j d |gdgg|jtt j d |gdgg|jtt j d |gdggtjd }|jtt j d|gdgg|jtt j d|gdgg|jtt j d|gdgg|jtt j d|gdgg|jtt j d|gdgg|jtt j d|gdgg|jtt j d|gdgg|jtt j d|gdggy)zP Test matching nodes based on their index in their parent node. rz* >, Sr~z* >1 Sz* >2 Sr}z* >3 Srz* >' Sz* >-1 Sz* >-2 Sz* >-3 SzE(S (D (A x) (B x) (C x)) (E (B x) (C x) (A x)) (F (C x) (A x) (B x)))z* <, Az* <1 Az* <2 Az* <3 Az* <' Az* <-1 Az* <-2 Az* <-3 ANrwrqs r tests_rel_indexed_childrenz0TestSequenceFunctions.tests_rel_indexed_childrens&&'>? e33HtfEF$Q e33HtfEF$Q e33HtfEF$Q e33HtfEF$Q e33HtfEF$Q e33IvFG4&R e33IvFG4&R e33IvFG4&R&& V  e33HtfEF$Q e33HtfEF$Q e33HtfEF$Q e33HtfEF$Q e33HtfEF$Q e33IvFG4&R e33IvFG4&R e33IvFG4&Rr"c^tjd}|jtt j d|ggdg|jtt j d|gddgg|jtt j d|ggdg|jtt j d |ggd g|jtt j d |gd d gg|jtt j d|ggdg|jtt j d|ggdg|jtt j d|ggdgy)zD Test matching nodes based on precedence relations. zV(S (NP (NP (PP x)) (NP (AP x))) (VP (AP (X (PP x)) (Y (AP x)))) (NP (RC (NP (AP x)))))z* . X)r~rkrz* . Yrrz* .. X)r~rrrkrz* .. Y)r~rrrkrrrz* , Xrirririrrirz* , Y)rrgrrgrrrgrrrz* ,, X)rrrrrrz* ,, YNrwrqs r test_rel_precedencez)TestSequenceFunctions.test_rel_precedences&& &   &&w7 8;T:U   &&w7 8I|;T:U   &&x$8 9 9 :   &&x$8 9 R S   &&w7 8I|;T:U   &&w7 8 4 5   &&x$8 9 M N   &&x$8 9 4 5 r"ctjd}|jtt j d|gdggtjd}|jtt j d|gdggtjd}|jtt j d|gddggtjd }|jtt j d |gd d ggtjd }|jtt j d|gdggtjd}|jtt j d|gdggtjd}|jtt j d|gdggtjd}|jtt j d|gdgg|jtt j d|gdggy)zA Test the Basic Examples from the TGrep2 manual. z(S (NP (AP x)) (NP (PP x)))rCr}z$(S (NP x) (VP x) (NP (PP x)) (VP x))rGrz6(S (NP (AP x)) (NP (PP x)) (NP (DET x) (NN x)) (VP x))rIzX(S (NP (NP (PP x)) (NP (AP x))) (VP (AP (NP (PP x)) (NP (AP x)))) (NP (RC (NP (AP x)))))rJrkrz:(S (NP (AP (PP x) (VP x))) (NP (AP (PP x) (NP x))) (NP x))rKr~ze(S (NP (DET a) (NN cat) (PP (IN on) (NP x))) (NP (DET a) (NN cat) (PP (IN on) (NP x)) (PP x)) (NP x))rLz;(S (S (C x) (A (B x))) (S (C x) (A x)) (S (D x) (A (B x))))rOz/(S (S (A (B x) (C x))) (S (S (C x) (A (B x)))))rQrRNrwrqs r test_examplesz#TestSequenceFunctions.test_examplessC&&'DE e33IvFG4&R&&'MN e33OdVLMQUPVxX&& G   &&'84&A BdD\N && &   &&'AD6J Ki ! && K   &&'84&A BdVH &&    &&'@4&I JdVH && L   &&'84&A BdVH && =   &&':TFC Dvh  &&'84&A BdVH r"ctjd}|jtt j d|gddgg|j tjtt j d|gy)z8 Test defining and using tgrep2 macros. zi(VP (VB sold) (NP (DET the) (NN heiress)) (NP (NN deed) (PREP to) (NP (DET the) (NN school) (NN house))))z+@ NP /^NP/; @ NN /^NN/; @NP !< @NP !$.. @NNr})rgrgz,@ NP /^NP/; @ NN /^NN/; @CNP !< @NP !$.. @NNN)rrmrrnrrorrrqs r test_use_macrosz%TestSequenceFunctions.test_use_macrosfs&& 6   %%CdV  F^        ! !@4&  r"c|jtjdgd|jtjdgdy)z#Test tokenization of labeled nodes.!S < @SBJ < (@VP < (@VB $.. @OBJ))) rPr@SBJrr @VPrr @VBr?@OBJrrz%S < @SBJ=s < (@VP=v < (@VB $.. @OBJ)))rPrr=srr rrvrr rr?rrrNr$r%s r test_tokenize_node_labelsz/TestSequenceFunctions.test_tokenize_node_labelssH   !D E  $   !H I  r"cR|jtjdgdy)z(Test tokenization of segmented patterns.z0S < @SBJ=s < (@VP=v < (@VB $.. @OBJ)) : =s .. =v)rPrrrrrr rrrrr rr?rrr:z=sr z=vNr$r%s r test_tokenize_segmented_patternsz6TestSequenceFunctions.test_tokenize_segmented_patternss$   !S T  r"c d}tjd}tjd}|jdddz}d}|jt t j ||gd|jt t j ||gd|jt t j ||gd|jt t j ||gt t j ||g|jt t j ||gd|jt t j ||gd|jt t j ||gd|jt t j ||gt t j ||gy) zN Test labeled nodes. Test case from Emily M. Bender. z # macros @ SBJ /SBJ/; @ VP /VP/; @ VB /VB/; @ VPoB /V[PB]/; @ OBJ /OBJ/; # 1 svo S < @SBJ=s < (@VP=v < (@VB $.. @OBJ)) : =s .. =vz2(S (NP-SBJ I) (VP (VB eat) (NP-OBJ (NNS apples))))z2(S (VP (VB eat) (NP-OBJ (NNS apples))) (NP-SBJ I))z rrz-S < (/.*SBJ/ $.. (/VP/ < (/VB/ $.. /.*OBJ/)))N) rrmsplit assertTruernrror assertFalse)rsearchsent1sent2search_firsthalfsearch_rewrites r test_labeled_nodesz(TestSequenceFunctions.test_labeled_nodess @'' @ '' @ "<</25XXH U223CeWMNqQR U226E7CDQGH U22>E7KLQOP  &&vw7 8 &&~w? @  U223CeWMNqQR e33FUGDEaHI e33NUGLMaPQ  &&vw7 8 &&~w? @ r"ctjd}|jtt j d|gdgg|jtt j d|gddggy)zm Test that multiple (3 or more) conjunctions of node relations are handled properly. z'((A (B b) (C c)) (A (B b) (C c) (D d)))z(A < B < C < D)r}z (A < B < C)r~Nrw)rsents r test_multiple_conjsz)TestSequenceFunctions.test_multiple_conjssr &&'PQ  &&'84&A BdVH   &&}tf= >$ r"c`tjd}|jtt j d|gddgg|jtt j d|gddgg|jtt j d|gddggy)zp Test that semicolons at the end of a tgrep2 search string won't cause a parse failure. rerarfrhzNN;zNN;;Nrwrqs r test_trailing_semicolonz-TestSequenceFunctions.test_trailing_semicolons && R  e33D4&ABffEUDVW e33ED6BCvvFVEWX  &&vv6 766:J9K r"N)!__name__ __module__ __qualname____doc__r!r&rArTrVr]rcrsrxrzrrrrrrrrrrrrrrrrrrrrr"r rrs B AQF$YL   @   *WXW   >PU n W, WS2% NM ^ 4) V :& P    r"r)runittestnltkr nltk.treerTestCaserrr"r rs) "v  H--v  r"