L i_9^ddlZddlZddlZddlmZddlmZddlmZddl m Z ddl m Z erddl mZdd fd efd Zej"ed d Zej"edd ZeGddZeGddZGddZGddZddZdddfdZdZddZy)N)deque) dataclass) TYPE_CHECKINGprofile) DeviceType) _KinetoEventc|jSN)childrenxs [/mnt/ssd/data/python-lab/Trading/venv/lib/python3.12/site-packages/torch/profiler/_utils.pyrs 1::Freversec#K|rtnd}t||}|r4||}||||D]}|j||r3yyw)Nc|Sr r s rrz_traverse..sqr)reversedrappend)treenext_fn children_fnrorder remaining curr_event child_events r _traversers\H[EeDk"I Y'  Z!89 *K   [ ) * s AAAc"|jSr )popr s rrrs aeegrT)rrc"|jSr )popleftr s rrrs rcVeZdZUdZeed<dZeed<dZeed<dZeed<e dZ y) EventMetricsrduration_time_ns self_time_ns idle_time_ns queue_depthcT|jdk(ry|j|jz S)Nrg)r&r(selfs rfraction_idle_timezEventMetrics.fraction_idle_time(s*  A %  4#8#888rN) __name__ __module__ __qualname__r&int__annotations__r'r(r)propertyr-rrrr%r%!s=cL#L#K 99rr%c0eZdZUeed<eed<dZeed<y)Intervalstartendrr)N)r.r/r0r1r2r)rrrr5r5/s J HKrr5c>eZdZd dZdZdZdefdZdee fdZ y) EventKeyreturnNc||_yr event)r,r=s r__init__zEventKey.__init__7s  rc@t|jjSr )hashr=idr+s r__hash__zEventKey.__hash__:sDJJMM""rc\|jj|jjk(Sr )r=rA)r,others r__eq__zEventKey.__eq__=szz}} ..rc0|jjSr )r=namer+s r__repr__zEventKey.__repr__@s**//"#r intervalscd}t|d}|rgt|jj|dj}t |jj |dj}||kr|||z z }d\}}|t|kr||}||}|dz }|j|jkDr2|j|jkDr|dz }U|j|_|}t|jj|j}t |jj |j}||kr|||z z }|t|kr|S)Nrc|jSr r6r s rrz,EventKey.intervals_overlap..Es AGGrkey)rrO) sortedmaxr= start_time_nsr6min end_time_nsr7len) r,rI overlap_time overlap_start overlap_endij prev_interval curr_intervals rintervals_overlapzEventKey.intervals_overlapCsJ 9*;<  8 8)A,:L:LMMdjj44il6F6FGK{* m ;; 1#i. %aLM%aLM FA  =#6#66 $$}'8'88FA*7*;*;M'A 8 8-:M:MNMdjj44m6G6GHK{* m ;; !#i. $rr:N) r.r/r0r>rBrEstrrHlistr5r]rrrr9r96s-#/$#$4>rr9cJeZdZdeddfdZd dZdZd dZdZd d e d e fd Z y)BasicEvaluationprofr:NcX||_i|_|jtd|jj Dd|_|j Dcgc]}|j c}|_g|_|j|_ |jycc}w)Nc3 K|]}|ywr r).0es r z+BasicEvaluation.__init__..js ,1Q ,s c.|jjSr )r=rRr s rrz*BasicEvaluation.__init__..jsAGGzBasicEvaluation.__init__es 57    , ))+ ,2Q )-81qww8 /1 $ 8 8 :  9sB'c6|jjJt|jjj}|r|j }|j }|j D]"}||j z}|j|$t||jvs!Jd|jd|jt||jt|<|j |jt|_|ryy)zM Computes event's self time(total time - time in child ops). NzDuplicate id: z, )r') rkineto_resultsrexperimental_event_treer!r&r rr9rjrArGr%)r,stackr self_timers rrkz!BasicEvaluation.compute_self_timeqs||**666dll11IIKLJ"33I)22 * [999  [) *J't||;  r*//1BC ;2>91UDLL*- .",!cudaLaunchKernel__cudaLaunchKernelcudaLaunchKernelExCcudaLaunchCooperativeKernel&cudaLaunchCooperativeKernelMultiDevicerGc3@K|]}j|ywr ) startswithrfpatternrGs rrhzUBasicEvaluation.compute_queue_depth..is_cuda_launch_kernel..sOGtw/Os)r_getattrany)rglaunch_patternsrGs @ris_cuda_launch_kernelzBBasicEvaluation.compute_queue_depth..is_cuda_launch_kernels0Owq&!,-DOOO Orc|jtjk7rytt |d|j hd}t fd|D S)z,Check if the event is a CUDA runtime kernel.FrG>cpymemfreeallocc3&K|]}|v ywr rrs rrhzNBasicEvaluation.compute_queue_depth..is_cuda_kernel..sKw7d?Ks) device_typerCUDAr_rlowerr)rgexclude_patternsrGs @ris_cuda_kernelz;BasicEvaluation.compute_queue_depth..is_cuda_kernelsS}}*//1wq&!,-335D ? K:JKKK Krc34K|]}|s |ywr r)rfrgrs rrhz6BasicEvaluation.compute_queue_depth..s D1+@+CQ D c"|jSr start_nsr s rrz5BasicEvaluation.compute_queue_depth.. !**,rrMc34K|]}|s |ywr r)rfrgrs rrhz6BasicEvaluation.compute_queue_depth..s =1>!+.rrc"|jSr rr s rrz5BasicEvaluation.compute_queue_depth..s 1::<rrcF|jjk(Sr )linked_correlation_id)rcuda_launch_events rrz5BasicEvaluation.compute_queue_depth..s!!113$::<=rrLct|dr|jdzSt|dr|jSt|dr |jSt d)Nstart_usrrRzUnknown Event Type)hasattrrrrR Exceptionr<s rnew_old_event_comparatorzEBasicEvaluation.compute_queue_depth..new_old_event_comparatorsWuj)~~'$..uj)~~''uo.***01 1rrrrrRrO)rrtrnrProindex_of_first_matchsortrr duration_usr duration_nsrRrTrUrQrr5rjr9r))r,cuda_event_listcuda_launch_eventscuda_kernel_eventskernel_mappinglast_mapped_kernelindexcurrent_kernel_indexspawned_kernel_index all_eventsrrqr= start_timeend_timecurrent_queue_depthrrrs @@@rrpz#BasicEvaluation.compute_queue_depths ||**666,,55<<> P L$ D D& $ = =&  " !3 39O 35!3 T ("=( E 16N, -*/*;AS  T !!'*<>+e.?.?.AAN*~e/D/P+9%+@(0"00  ,,%s+='>>'(<=FFHZW$)$%s+='>>'(<=FFHZW#79M"MPQ"Q "%&91"= uj)WUJ-G ''Z3FG0.-s .?)?rrL)r6r7)dtypeg333333?T)rNr)torchr`rrqr)rUrangerargmaxrr5r6rjrlr]tensorr'float32r-meanstdrPzipoperator itemgetter)r,lengthrrqrg qd_valuestop_threasholddecrease_intervalrYrZnext_minimum_idxpeak_idxr=rrw idle_timenormalized_gainnormalized_selfheuristic_score_list_rs @r rank_eventszBasicEvaluation.rank_eventss )>)> ?@,<=qQ]]= = #i. |//Q1q5#i.1 $8?q$ ")1:JK'Ih,?>,Q%,, ,X6<<>Nq>Q>W>W -=,H(aA! " FA+#i. 2**, &&'89    ?IJee$11Jmm%I EOPEe$77Pmm%I )5::i+@@EIIiDXXO(5::i+@@EIIiDXXO#2S?5J#J !',j9 ++A. !AuJ$GV,Js>: KQsII I I: I$r print_enablec|j|}|s|S|rdnd}|dj|Dcgc]@}dd|dt|jd|j|j dzd d d Bc}z }|r t ||Scc}w) NzOptimizable events: zNo events to optimize  zP--------------------------------------------------------------------------------z Event: z Source code location: z Percentage idle time: dz.2fz% )rjoinsource_code_locationr=rjr-print)r,rrroutputr=s rget_optimizable_eventsz&BasicEvaluation.get_optimizable_eventsZs%%f-  ,6(rkrprrrr1boolrrrrrbrbds@ !W ! !=,l \08GRSDrrbcz||t|k\r t|}t||D]}|||s|cSyr )rUr)seq predicater6r7rYs rrrosF {cSXo#h 5#  SV H rc|Sr rr s rrrxsarch|||}t|dk(ry|jt|||zS)NrrM)rUrrQ)rrNr6r7s rrrxs6 eC.C 3x1} 99S#& '% //rc~|;tjd|j}| |j}1|jSy)Nz \.py\(.*\)zNo source code location found)researchrGparent)r=matchs rrrs:   -4 =LLE zz *rcJddlm}|5 dddy#1swYyxYw)Nrr)torch.autograd.profilerrrs r_init_for_cuda_graphsrs"/      s")rNr^) functoolsrr collectionsr dataclassesrtypingrrrtorch.profilerrtorch.autogradr rrpartial traverse_dfs traverse_bfsr%r5r9rbrrrrrrrrs ! +%+*>u**!y  4EtT  y   ,e   9 9  9   ++\HHV qd0+ r