L iE ddlZddlZddlZddlZddlmZddlmZddlm Z ddl m Z m Z m Z ddlZddlmZddlmZddlmcmZddlmcmZddlmZmZddlmZddlm Z m!Z!gd Z"d e#d e$e#e#ffd Z%d ee&dejNde(e#e ffdZ)dejNde(e#e fdej$jTfdZ+d)dej$jTd ej$jTfdZ,dejTd ejTfdZ-dejTde.ejNde.ejNde.ejNfdZ/ej`ejbejdejfejhejjejlejnejpejrejnejtejvg Z (`foo.bar`, `baz`) .r)rsplit)rparentnames h/mnt/ssd/data/python-lab/Trading/venv/lib/python3.12/site-packages/torch/fx/experimental/optimization.py _parent_namer$%s3 MM#q)MVT6!9 ,,B ,,patternnodemodulescjt|jdk(ry|jd|f}t||D]z\}}t|tj sy|j dk7ryt|jtsy|j|vryt||j|uszyy)NrF call_moduleT) lenargszip isinstancefxNodeoprstrtype)r&r'r(nodes expected_type current_nodes r#rr/s 499~"&))A,!5E'*7E': # |,0 ??m +,--s3   g -  ++, -] B  r% new_modulect|jtsJt|j\}}|||j<t ||||yN)r.rr2r$setattr)r'r(r7 parent_namer"s r#rrCsJ dkk3 '' '$T[[1K%GDKK GK $ 3r%modelctjtjftjtjftj tj ftjtjfg}|stj|}|r$t|tjjstj|}n|}t|j!}tj|j"}|D]}|j$D]}t'|||st)|j*dj,dkDr8||j*dj.} ||j.} | j0sp|dtjtjtj fvr t3| | } n t5| | } t7|j*d|| |j9|j*d|j;|tj||S)z Fuses convolution/BN and linear/BN layers for inference purposes. Will deepcopy your model by default, but can modify the model inplace as well. rr)nnConv1d BatchNorm1dConv2d BatchNorm2dConv3d BatchNorm3dLinearcopydeepcopyr.torchr/ GraphModulesymbolic_tracedict named_modulesgraphr4rr+r,usersrtrack_running_statsr r rreplace_all_uses_with erase_node) r<inplaceno_tracepatternsfx_modelr( new_graphr&r' first_layerbn fused_layers r#rrLs BNN# BNN# BNN# BNN# H  e$ :eUXX-A-AB$$U+8))+,G hnn-I+OO +D%gtW=tyy|))*Q.%diil&9&9: T[[)--1:"))RYY !BB"3K"DK"5k2"FK#DIIaL';G**499Q<8$$T* ++" >>(I ..r%ctj|}Gddtjj}||j S)z5 Removes all dropout layers from the module. cDeZdZdedeedfdeeefdeffd Z xZ S)&remove_dropout..DropoutRemoverrr,.kwargsrct|j|tjrt |dk(sJ|dSt ||||S)Nrr)r. submodulesr>Dropoutr+superr*)selfrr,r] __class__s r#r*z2remove_dropout..DropoutRemover.call_module{sI$//&12::>4yA~%~Aww*64@@r%) __name__ __module__ __qualname__r tupler rKr2rr* __classcell__)rcs@r#DropoutRemoverr\zsE A  A(-hm(< AFJ3PS8n A  A Ar%ri)r/rJrH Transformer transform)r<rUris r#rrtsB  'HA--A ( # - - //r% orig_moduler4inputsoutputscZ tj}i |D]"}|j|j}| |<$|D]}|j | fd}| |<|j |Dcgc]} | c}|j tj||Scc}w)z Given lists of nodes from an existing graph that represent a subgraph, returns a submodule that executes that subgraph. c|Sr9)xenvs r#z"extract_subgraph..s s1vr%)r/Graph placeholderr" node_copyoutputlintrI) rlr4rmrnrVinputnew_noder'rxrss @r#rrs I"$C((4E &&t-=>D 8fc&k89 NN >>+y 119s/ B(c,tj|Sr9) th_mkldnnMkldnnBatchNorm)a_s r#rtrts!:!:1!=r%ci}|D]}|jdk(st|jtsJ||j}t |t vsPt t ||t j}t|tjsJtj|||<t||||S)z For each node, if it's a module that can be preconverted into MKLDNN, then we do so and create a mapping to allow us to convert from the MKLDNN version of the module to the original. r*) r1r.rr2r3 mkldnn_maprHfloatr>ModulerFrGr)r4r( old_modulesr' cur_moduler7s r#rrs /1K? 77m #dkk3/ // -JJ:-'Z(89*ekkR !*bii888*.-- *C J'#D':>? r%rc|D]R}|jdk(st|jtsJ||j}||vsCt ||||Ty)za Maps each module that's been changed with `modules_to_mkldnn` back to its original. r*N)r1r.rr2r)r4r(rr'rs r#rrs\L 77m #dkk3/ // -J[(#D';z3JK Lr%c,eZdZdejfdZy)rfx_graphc<||_g|_g|_g|_yr9)rr4 start_nodes end_nodes)rbrs r#__init__zMklSubgraph.__init__s  $& *,(*r%N)rdrerfr/rurrqr%r#rrs++r%rcDdddtdtffd }|S)aW This generates a heuristic that can be passed into `optimize_for_inference` that determines whether a subgraph should be run in MKL by running it with the example_inputs. Example usage: heuristic = gen_mkl_autotuner(example_inputs, iters=10) fast_model = optimization.optimize_for_inference(model, heuristic) NrMrc|j} F|jj |jj t j |Dcgc]!}t j|j#c}tttj|jDcgc]}|jdc}}t |j || fd}|fd}t#j$j t'j) |fd}||kScc}wcc}w)NrctD] }| tj}tD] }| tj|z Sr9)rangetime)frbeginiterswarmups r# benchmarkz?gen_mkl_autotuner..use_mkl_heuristic..benchmarksO6]  IIKE5\  99;& &r%cDcgc]}|jc}Dcgc]}|jc}Scc}wcc}wr9) to_mkldnnto_dense)i sample_inputs submodules r#rtz>gen_mkl_autotuner..use_mkl_heuristic..s<&/1WA!++-1W&X!" 1Ws ?AcSr9rq)rrsr#rtz>gen_mkl_autotuner..use_mkl_heuristic.. s =(Ar%)rr owning_modulerr propagaterHrandnshaperlistr/r0rr,rr4rrMrKrL)rM input_nodesr' output_argsrmkl_time no_mkl_timerrexample_inputsrUrrrs @@r#use_mkl_heuristicz,gen_mkl_autotuner..use_mkl_heuristics''  ~~33H..44K h  ) ). 9=HITTZZ0I 4=EOO*TD499Q<*TU $Xu{{KU  '    OO ! !4 (?(?(A#BK  AB +%%-J*Ts &D>.E )rbool)rrrrrUrs``` @@r#rrs/HK&&&&> r%rMc2t|jdkDS)z This is a heuristic that can be passed into `optimize_for_inference` that determines whether a subgraph should be run in MKL by checking if there are more than 2 nodes in it )r+r4)rMs r#rrs u{{ a r%c>eZdZdZdefdZdedefdZdedefdZy ) rc0dg|z|_dg|z|_y)Nrr!size)rbns r#rzUnionFind.__init__s,06A: !sQw r%vc@||j|<d|j|<y)Nrr)rbrs r#make_setzUnionFind.make_set s A ! r%rc|j|}||k(r|S|J|j||j|<tt|j|Sr9)r!findrint)rbrpars r#rzUnionFind.find$sOkk!n 8H3 ACQ((r%rbc|j||j|}}||k(r|S|j||j|kr||}}||j|<|j|xx|j|z cc<yr9)rrr!)rbrrs r#joinzUnionFind.join,soyy|TYYq\1 6H 99Q<$))A, &aqA A !  ! $ r%N)rdrerfrrrrrrqr%r#rrs9'#)c)c)%c%c%r%r pass_configtracercdddtid}|i}|j||dr t|}|dr t|}|ddur|St |dt s t d d|dvr t d |dd}|}|jtj|tj|jt |j}Gd d t}tj D]}|j"} |j$d k(r||j&} t)| t*vr|j,} t/| j1d} | | j2t4j6k(sJd| j8t5j8dk(sSJd|j$dk(r=|j&t*vr |j,} n|j&t:vr |j<} | |j"k7s| |j<k(rt?d|j@Ds>jC|5tjD|j@fd} dddtGtHtjJjL |_ jO|5jQdd|f} |jS| |f| _ dddtUtj |}|_+j D]}|j$dk(s|j&dk(s#|j@d}t|jX}|D]D}|j$dk(s|j&dk(s#|jS|j[|Ft]|jXdk(sj[|t]j }t_|fd}taj D]%\}}|j$dk(r(|j&dk(r||_1je|>|j$dk(rA|j&dk(r2||j@dJ||j@d|_3|jhDcgc],}t |tjjr||||.}}t]|dk(rt?d|DrJtm|}|d|_7|ddD]}jq|d|(tsfd}j D]}tu|dr7|jw|jnj jy|tu|dr7|jw|jbjzjy|tu|ds|jw|jfj|jy||jD]q}||r |jz|j|zD]3}|j@d}|jS|j[|5t|j ||sd}j D]&}|j&dk(s|j&dk(s"|dz }(tjtjd|jtj|}|S#1swYxYw#1swYRxYwcc}w) a Performs a set of optimization passes to optimize a model for the purposes of inference. Specifically, the passes that are run are: 1. Conv/BN fusion 2. Dropout removal 3. MKL layout optimizations The third optimization takes a function `use_mkl_heuristic` that's used to determine whether a subgraph should be explicitly run in MKL layout. Note: As FX does not currently handle aliasing, this pass currently assumes nothing aliases. If that isn't true, use at your own risk. T heuristic) conv_bn_fusermkldnn_layout_optimizeNrrrFz+mkldnn_layout_optimize config is not a dictz4Heuristic not found in mkldnn_layout_optimize configceZdZdZdZdZy)*optimize_for_inference..MklSupportrrN)rdrerfNOYESUNKNOWNrqr%r# MklSupportrbs r%rr*z)this pass is only for torch.float modulescpuz!this pass is only for CPU modules call_functionc3:K|]}|jdk(yw)rN)r).0args r# z)optimize_for_inference..sI3::3Isc*jd|fS)Nr) call_method)rrs r#rtz(optimize_for_inference..s)=)=kA4)Pr%rrrrct|drj|jSt|drj|jSy)Ncolor start_color)hasattrrrr)rufs r# get_colorz)optimize_for_inference..get_colors@ 1g 77177# # 1m $771==) )r%c3$K|]}|du ywr9rq)rrs r#rz)optimize_for_inference..s9199srctSr9)r)rsr#rtz(optimize_for_inference..s H@Ur%rr end_colorzmkldnn conversions: %s)Frupdaterrr.rK RuntimeErrortracerFrGr/rIrootrLrrr4rr1rr3mkldnn_supportedrnext parametersdtyperHrdevicemkldnn_supported_unknownranyr,inserting_beforemap_argrrgr'r inserting_after create_noderPrrrNrQr+r enumeraterrrall_input_nodesr0sortedrrrrrappendrrvaluesrlogging getLoggerrdinfory)r<rrdefault_pass_configr cur_tracerr(rr'supports_mkldnnrsample_parameter mkldnn_argsdense_xrprv_noderNuser num_nodesrcur_idxr cur_colors other_color mkldnn_graphsrMprvmkldnn_conversionsresultrrs @@r#rr6s&#."?  {+>*U +,u%34= )*BCT JHII-.FGGQRR+,DEkRJ e 45HNN:??H-$()<)<)>$?GTX^^$"'$-- 77m # -JJ#33",..#' (=(=(?#F #/+11U[[@C@,22ell56II;IWW '{{..",.. 88","4"4 jmm +*"4"44ItyyII**40  jjIIP   U277#3#34kBDI))$/ '"..}j4'R**73 $w  ' '?"'J$D$8'BK&H * 77m # z(Ayy|H$E .77m+ {0J..x8''- .4::!###D) *HNN#I 9 B$#8>>24  77m # {(B&D  KK WW %$++*CTYYq\*6 66&tyy|4DN--a)Q<+! J:!#9j99 99 +J#ADJ)!"~ 4  1 {3 4)4.-88U,VMJ 4 ! "''$**- . 4 4 ; ;D A 4 ' "''$"2"23 4 @ @ G G M 4 % "''$..1 2 < < C CD I J%%'= '))EOO; *iil**3/##D) * %++w < =$ ;;+ % )B ! # $ h$$%=?QR MMO ^^E8 ,F MG   ' 'ds$] .]1]"] ] )FF) r)LrFroperatorr collectionsrcollections.abcrenumrtypingrrrrHtorch.fxr/torch.nnr>torch.nn.functional functionalFtorch.utils.mkldnnutilsmkldnnr} torch.fx.noder r torch.fx.passes.shape_propr torch.nn.utils.fusionr r __all__r2rgr$r3r0rKrrrrrrrrArErBReLU MaxPool2d AvgPool2dAdaptiveAvgPool2drelu transposesigmoid avg_pool2dadaptive_avg_pool2draddmulr MkldnnConv2d MkldnnLinearrrrrrrrrTracerrrqr%r#r!s  #$&& &&*0H  --sCx- d^#%7759#s(^(4 ''4 cN48=4%/%/588??%/P0"))0 0&22 =2 M2"''] 2.IIIINNGGLLLL JJ OO MMFFLL&%LL(,,7IIy%%IIy%%NN= T"'']T#ryy.5I&L =L #ryy. !Lbii*+L"+++\ + $ %%:-1 iip 88??p$sCx.)p Op XX__ pr%