K idZddlZddlZddlmZddlZddlmZddlm Z ddl m Z ddl m Z mZddlmZej"eZd Zd Zej,Zej.Zej0Zej2Zej4Zd Zd Zd ZdZdZdZ dZ!dZ"dZ#dZ$d#dZ%dZ&dZ'dZ(dZ)Gdde*Z+dZ,GddZ-Gdd Z.Gd!d"Z/y)$aa The diff parser is trying to be a faster version of the normal parser by trying to reuse the nodes of a previous pass over the same file. This is also called incremental parsing in parser literature. The difference is mostly that with incremental parsing you get a range that needs to be reparsed. Here we calculate that range ourselves by using difflib. After that it's essentially incremental parsing. The biggest issue of this approach is that we reuse nodes in a mutable way. The intial design and idea is quite problematic for this parser, but it is also pretty fast. Measurements showed that just copying nodes in Python is simply quite a bit slower (especially for big files >3 kLOC). Therefore we did not want to get rid of the mutable nodes, since this is usually not an issue. This is by far the hardest software I ever wrote, exactly because the initial design is crappy. When you have to account for a lot of mutable state, it creates a ton of issues that you would otherwise not have. This file took probably 3-6 months to write, which is insane for a parser. There is a fuzzer in that helps test this whole thing. Please use it if you make changes here. If you run the fuzzer like:: test/fuzz_diff_parser.py random -n 100000 you can be pretty sure that everything is still fine. I sometimes run the fuzzer up to 24h to make sure everything is still ok. N) namedtuple) split_lines)Parser) EndMarker) PythonTokenBOM_UTF8_STRING)PythonTokenTypesF)INDENT ERROR_DEDENTDEDENTcH|jdk(xr|jtvS)N error_leaf)type token_type_INDENTATION_TOKENSnodes W/mnt/ssd/data/python-lab/Trading/venv/lib/python3.12/site-packages/parso/python/diff.py_is_indentation_error_leafr3s 99 $ O{{ "? :$:P:P:RS  kkG!%  ((DNN: &% &:$))DKK7G!.!8!8  7?dgo"7+H%a(3x=81q,ACL,PPF!!/2446&@F1IM1~~'A$..&)AA'?Bs 9AFDFFcn |j} |j}t||D]\}}t||t|t|k(s"Jdt|zdzt|zy#t$r J||fwxYw#t$rt|dr J||f|j|jk(s J||f|j|jk(s J||f|j |j k(s J||f|j |j k(s J||fYywxYw)Nr$r+) r$r0hasattrr1rr2r)zip_assert_nodes_are_equalr4repr)node1node2 children1 children2n1n2s rrArAys> )NN  )Ii+(BB'( y>S^ +\TDO-Cd-JTR[_-\\ +  ) (5%. (5 ) 5*-=u~=-{{ekk)9E5>9)zzUZZ'7%7'||u||+;eU^;+%//1AE5>A1 s B A55B B&D43D4ct|jd}tj||}tj||}ddl}d|j ddj |ddj |S)NTkeependsrz=There's an issue with the diff parser. Please report (parso vz ) - Old/New: z Actual Diff (May be empty): )rget_codedifflib unified_diffparso __version__join)module old_lines new_lines current_lines current_diff old_new_diffrPs r_get_debug_error_messagerYsh 1DAM'' =AL'' 9=L   bggl3RWW\5J Lrc|j}t|r|jdS|j}|jdk(r d|j vr|j ddzS|j dS)Nr endmarkerr+r") get_last_leaf_ends_with_newliner)rrr2r3) node_or_leaf last_leafns r_get_last_linerasw**,I)$""1%%  # # % 66[ TQXX%5$$Q'!+ +  ##rc|O|jdk(r@|jdk(r1|j}||jdk(r|jdk(r1|S)Nrr )rrrrs r_skip_dedent_error_leavesrcsQ  tyyL8T__PX=X%%'  tyyL8T__PX=X Krct|}|jdk(r|jj}n |j}|dk(xs$|j dxs|j dS)Nrnewliner+r,)rcrrlowerendswith)rsuffixtyps rr]r]sZ $T *D yyL oo##%ii )  Mvt4 M8MMrc2|D]}|jdvsyy)zg if, while, for and try might not be finished, because another part might still be parsed. )if_stmt while_stmtfor_stmttry_stmtFT) nonterminal pgen_grammarstack stack_nodes r_flows_finishedrts*   ! !%V V rc|jdk(r|jd}|jdvr|jd}|jdvxr|jdjdk(S)N decoratedr- async_funcdef async_stmtclassdeffuncdefsuite)rr$rs r_func_or_class_has_suiter~s` yyK}}R  yy33}}R  99/ / UDMM"4E4J4Jg4UUrct||syt|D]<}|jdk(ry|jdk(s$t|jdkDcSy)NF decoratorr}r"T)rtreversedror4nodesrps r_suite_or_file_input_is_validrs\ < /uo-  ! ![ 0  ! !W ,z''(1, ,- rc|jdk(r|jd} |jdj}|dvS#t$rYywxYw)Nryr"rF)ifforwhiletrywith)rr$r1r0)rr1s r _is_flow_nodersW yyL }}Q a && 9 99 s= A A c eZdZy)_PositionUpdatingFinishedN)__name__ __module__ __qualname__rrrrsrrc|D]} |j}t|||y#t$r"|xj|z c_||urtYJwxYwr)r$_update_positionsr0r<r)r line_offsetr_rr$s rrrsb @ @}}H h Y ? @ 0 II $Iy //! 0s #(A AcHeZdZdZdZdZdZdZdZdZ dZ d Z d d Z y ) DiffParserz An advanced form of parsing a file faster. Unfortunately comes with huge side effects. It changes the given module. c.||_||_||_yr) _pgen_grammar _tokenizer_module)selfrq tokenizerrSs r__init__zDiffParser.__init__s)# rcTd|_d|_t|j|_yNr) _copy_count _parser_count _NodesTreer _nodes_treers r_resetzDiffParser._resets#%dll3rc tjdd|j_||_|j t |}tjd||j}|j}tjdt |d||D]\}}}} } tjd||dz|| dz| | |k(r |ddk(r| dz} |d k(r| |z } |j| |dz|| ]|d k(r|j| u|d k(r|j| |d k(rJ|jjtr dj|} |jj!| k(sJt#|jt%|j&dj)|j+|} t-|j| |jj4d}||k7r*t7d|d|dt3|j||ztjd|jS#t.$r"t1t3|j||wxYw)a The algorithm works as follows: Equal: - Assure that the start is a newline, otherwise parse until we get one. - Copy from parsed_until_line + 1 to max(i2 + 1) - Make sure that the indentation is correct (e.g. add DEDENT) - Add old and change positions Insert: - Parse from parsed_until_line + 1 to min(j2 + 1), hopefully not much more. Returns the new module node. zdiff parser startNzline_lengths old: z; new: z!-> code[%s] old[%s:%s] new[%s:%s]r"r-rLequalreplace) until_lineinsertdeleteTerror_recoveryr(z != z) zdiff parser end)LOGdebugr _used_names_parser_lines_newrr4rNSequenceMatcher get_opcodes_copy_from_old_parser_parsercloseDEBUG_DIFF_PARSERrRrMr/rrparserrAAssertionErrorprintrYr3 Exception)rrTrU line_lengthsmopcodes operationi1i2j1j2rcodewithout_diff_parser_modulelast_poss rupdatezDiffParser.update sK %&#'  !* )n  $ $T9d6L6L M.." S^[QR)0 - %Ir2r2 II9aR!VR 9[ Yr]b%8aG# 2g **;QBGi' r *h& r * H,,,! -(   wwy)||,,.$666#DLL1-3&&#'.% 23+( 6PQ <<''* { ""*K8*4<<INO  #$||" .t||Y RS s BI+I;c|jjdj|k7r6tj ddj|dj|yy)NrLzparser issue: %s %s)rrMrRrwarning)rrT lines_news r_enabled_debuggingzDiffParser._enabled_debuggingVsH << "bggi&8 8 KK/1CRWWYEW X 9rcd}||jjkDr|jj|z }|j|dz}|*|j|jjdzn*|jj }|j |} |dk(r3|djjjtrg} n9|jjdz} |jj|| d||} | rh|xjdz c_ |jj} tjd| djd| dj ddz  | n(|j|jjdz||jjk7sJ||jj}||jjkDryy)Nr-r"rzcopy old[%s:%s] new[%s:%s])rparsed_until_line_get_old_line_stmtrr.r$indexget_first_leafr2r5r copy_nodesrrrr)r3) rrstart_line_olduntil_line_olduntil_line_newlast_until_lineparsed_until_line_old line_stmt p_childrenr copied_nodesfrom_tos rrz DiffParser._copy_from_old_parserZst//AAA$($4$4$F$F$T !//0E0IJI  D,,>>BC&--66 "((3!Q&&qM88:AALL_]$&L ,,>>BE#'#3#3#>#>"56*&#$L  $$)$));;BII:*1o77:*2.66q9A=ubJ KK 0 0 B BQ FG#d&6&6&H&HH Y/ YH"..@@OQt//AAArcJ|jj|dfd}t|r|j}|j d|k(rS|}|j j dvr%|j }|j j dvr%|jd|k\r|Sy)NrT)include_prefixes) file_inputr})rget_leaf_for_positionr]rr6r.rr))rold_linerrs rrzDiffParser._get_old_line_stmts||118Q-RV1W d #%%'D  ' ' )! , 8D++""*AA{{++""*AA ~~a H, rcdd}||jjkDr|j|}|j}|jj ||j |j #|j |jjd<tjd|djd|jj|jddz ||jjk7sJ||jj}||jjkDryy)zy Parses at least until the given line, but might just parse more until a valid state is reached. rNr-z/parse_part from %s to %s (to %s in part parser)r") rr_try_parse_partr$add_parsed_nodes_keyword_token_indents_replace_tos_indentindentsrrr6r3)rrrrrs rrzDiffParser._parses 4++==='' 3DMME    - -eT5P5P Q''3/3/G/G  ((, IIAa00215  22 Q!#  #d&6&6&H&HH Y/ YH"..@@O%4++===rc|xjdz c_|jj}|j|d}|j |||}t |j d|_|jj|S)z Sets up a normal parser that uses a spezialized tokenizer to only parse until a certain position (or a bit longer if the statement hasn't ended. r"N)rTr)tokens) rrrr_diff_tokenizerr_active_parserr)rrr lines_afterrs rrzDiffParser._try_parse_parts a!,,>>,,->-?@ $$  )%  %    ""(((77rc#Kd}|jj}t|}|j||dzdf||dk(}|jj }d|_i|_|D]q} | j} | tk(rt||kr t|x\} } } } } | ttfvr| tk(r| ddz|_nn9d| vsd| vrtjdd | } n7| dt| k\sJt| | dt| z dk(rd } tt d | | y| t"k(r| j$d|k\rd}nH|rFd}t||k(r6t'|j(|r tt d | j$d y| t*k(r0| j,d vr"t/||j| j$<| tyw) NFr"r)r)ris_first_tokenTr+r,z [^\n\r]+\ZrL)classdef)rrr4rrrrrrrr nextr resubrBr ENDMARKERNEWLINEr)rrNAMEstringlist)rlinesrr was_newlinerinitial_indentation_countrrrtokenrirr)r2s rrzDiffParser._diff_tokenizes ""**$'L! "Q*&!+ !  ##))#' &(#, E**Cf}w<";;AEfM6VY6<"88"l2$>$@ !## Hk+@  $$U+rcd}|jrQ|jd}t|j}|jd|jz}t ||r|dz}|t t|dz z }|r'|jds|jds|dz }|jr(t||jdj|S|S)Nrr-r"r+r,) rrrr3rr]r4rrgrmax get_last_line)rrhr<children_groupr_s rrz_NodesTreeNode.get_last_line4s  !2226N944I$$Q'.*D*DDD")V4  K'(1,, &//$/8M AID   tT004BB6JK K rcPd|jjd|jdS)N) __class__rr&rs r__repr__z_NodesTreeNode.__repr__Ls!^^44dnnEErr)rN) rrrrrrrr rrrrrrrrs9;=N' ./<=-1,0FrrcVeZdZdZedZdZdZdZdZ dZ dZ d d Z d Z y ) rct||_|jg|_||_d|_d|_dg|_y)NrLr)r _base_node_working_stackr_prefix_remainderr2r)rrSs rrz_NodesTree.__init__Qs<(0#/ !# s rcR|jdj|jSr)rrr2rs rrz_NodesTree.parsed_until_lineYs"""2&44T[[AArctt|jD]@}|j|ks||jdur|cS|jj Byr)rrrrpop)rrrs r_update_insertion_nodez!_NodesTree._update_insertion_node]sXT$"5"567 &D+-9L9LQ9O1O    # # % &rcX|j}|j|}|s||jz|_y|djdk7sJ|j|djd}|j jdvsJ|j |||j|d|y)Nrrer")r}rr-)r2_remove_endmarkerrr#r)r&r_update_parsed_node_tos)r tree_nodeskeyword_token_indents old_prefixrs rrz_NodesTree.add_parsed_nodescs[[ ++J7 $t{{2DK !}!!Y...**:a=+B+B1+EF~~""&==== J 3 $$Z^5JKrc|jdk(r|jjd}t|||jd}|j dt |j|jdj||jj||j|jd|yt|r |j|jd|yy)Nr}rr-rrL) rr.r$rr)rrrr r r&r~)rr&r(def_leafnew_toss rr&z"_NodesTree._update_parsed_node_tosrs >>W $ ''003H$1(2D2DEbIG  " "2tI,>,>'? @    # 2 27 ;    & &w /  ( (););B)?AV W %i 0  ( (););B)?AV W1rcn|dj}|jdk(}d|_|rj|j}t |j d|j d}|dkDr/|jd|dz|j|dzdc|_|_d|_|r|j|_|dd}|S)zE Helps cleaning up the tree nodes that get inserted. r-r[rLr+r,Nr")r\rrr2rrfind)rr'r_ is_endmarkerr2 separations rr%z_NodesTree._remove_endmarkersrN002  ~~4 !# %%FV\\$/d1CDJB $$_j1n5y7G7G UV7X9  $"8 #**DK#CRJrc#Kt|}|r t|t|}t|}|s||jvry||D]}t||k7ry|ywr)iterrr#r)rr' is_new_suite node_iterator first_nodeindentr`s r_get_matching_indent_nodesz%_NodesTree._get_matching_indent_nodesso Z( }% %-( !*-dll :  A"f,G sA"A$c|djdvrgSt|d}t|j}|j}|j }|j Dcgc] }||ks | c}|_|j ||jt|j||||j\} |_|_} | r|xj | z c_| S||_||_||_| Scc}w)zy Copies tree nodes from the old parser tree. Returns the number of tree nodes that were copied. rr error_node)rr#rrr2rr# _copy_nodes) rr'rrrold_working_stackr) old_indentsi new_nodes added_indentss rrz_NodesTree.copy_nodess a=  != =I&z!}5  !4!45[[ ll #'<<Da1 3CD  ##K0EIEUEU $$ %    KK F B 4& ]  LLM )L #4D $DK&DL#Es  C6(C6c\g}g}t|j||}d} |D]} | jd|kDrn| jdk(rn| jdk(r| jdvrnt | |kDrt | r|j| n | j} | } | jdk(r| jd} | jd vr| jd} | jd vr| jd} n| d} | jd vrn|j| |r|r|d}|jd vst|drKd} |j|r6|d}|jjd k(rn|j|r6nt|d kDr#|djdk(r|j |sg|||fS|d}|d}d}t |r|}|jdk7r|jd}|jdk7rt|}|j|t|t!|}|j#||gz|j||d\}}} }||z }t|dkr|jd} n|sJ|j%||}d}|rt'|djs/|s-|dj)j*}t-|dd} |rd|d}|jdk(r|jd}|jd vr|jd}|jdj}|dk(sJ|dj}|j/||||| }d|_||||fS#t$rYwxYw)N)r4rLrr[r)r r rvr-rwrzr:rer"r;Fr}r+T) is_nestedrJ:)rr8r)rrrar~r r$r0rr"r\r4r'rr#r<r r]rr2rrr)r working_stackrrrr2rDr@rA new_prefixrcr` suite_node last_nodetoshad_valid_suite_lastr}r7 suite_tos suite_nodesnew_working_stackaiplastrs rr<z_NodesTree._copy_nodess  T44 "5    ' #D~~a :-yyK'yyL(T__@Z-Zd#j0+D1$$T* MM66[( 2A66<< 2A6644!"BJ!"2J??&BB   T "O' #T %bM NN&BB(27"$JMMO#$-bM $22499YF!!  $ y>A%)B-*<*< *L MMO}fm; ;BbM $ #I .E**'r***',E2F   (&u:J9:UVI>B=M=M +U^^Z>N> :K*J R M;!#    y""9- 1 '+$ %im&A&A&CDMabM//188 )T:1= # }99 +==,D99 ??==,D(, b(9(G(G(I%,333(1" (C(C(E%    ;0E  F%'D "->>]"  s N N+*N+c|jj |jj}t |}t |j }t|j}t|dkDsJt|dk(rF|djtr|ddgk(r |dxxdzcc<|dxxt|dz cc<n*|dxxt|dz z cc<t|d|d<tdt||j|jz}|j|_|jj"j%|y#t$rddg}YwxYw)Nr"rr-rL)rrrr\rcrr3 IndexErrorrr2r4r5rrtuplerr.r$r )rr_r3rr[s rrz_NodesTree.close_s4   . 224I2)r`s6 "#&'>/g!8  " "  ,,  & & P  3"+'\]( $" NV":   @HHVHFHFVf0f0r