K i;`ddlZddlmZmZmZmZddlZddlmZ ddlm Z ddlm Z ddlmZddlmZddlm ZddlmZdd lmZdd lmZmZdd lmZd d lmZddlm Z m!Z!ddl"m#Z#d Z$GddZ%GddZ&dee'e#jPfde#jPfdZ)de'fdZ*GddeZ+y)N)DictOptionalUnionAny)ir)proton)amd)nvidia)passes)LazyDict) JITFunction)set_profile_allocator NullAllocator)backends)Hook)set_instrumentation_onset_instrumentation_off)modec,eZdZdZdededeefdZy) CudaAllocatorc||_yN)instrumentation_hook)selfrs k/mnt/ssd/data/python-lab/Trading/venv/lib/python3.12/site-packages/triton/profiler/hooks/instrumentation.py__init__zCudaAllocator.__init__s $8!size alignmentstreamcF||jjk7r%td|d|jj||zdz |z|z}t||jj}ddl}|j |f|jd}||j_|S)NzAlignment mismatch: z != rrcudadtypedevice) rprofile_buffer_alignment RuntimeErrormaxprofile_buffer_sizetorchemptyuint8buffer)rr r!r" aligned_sizer,r/s r__call__zCudaAllocator.__call__s 11JJ J&ykd6O6O6h6h5ijl ly(1,:YF <)B)B)V)VW  l-U[[P+1!!( rN)__name__ __module__ __qualname__rintrr1rrrrs%9SS(3-rrc@eZdZdeeeffdZdefdZdefdZdZ y)Instrumentationir_mapc||_yr)manager)rr9s rrzInstrumentation.__init__1s  rrcZ||jvrtd|||j|<y)NzIR already registered: )r;r))rrfuncs rregisterzInstrumentation.register4s/  !8=> > Rrcn|j|||jvr|j||yyr) load_dialectsr;)rrpmcontexts rpatchzInstrumentation.patch9s4 7#   DLL R  rc.tj|yr) triton_protonr@)rctxs rr@zInstrumentation.load_dialects>s##C(rN) r2r3r4rstrrrr>rCr@r6rrr8r8/s3tCH~ 3 !! )rr8mode_objreturnc  t|tjr|S|sd}|jd}|d}i}|ddD]/}d|vr|jdd\}}|||<"t d|d|j dd |j d d |j d d t |j dd|j dd|j dd|j dd|j ddd fd}|dtj d<|d tj d <|d tj d <|dtj d<|dtj d<t ddkDr0 djdDcgc]}|jc}ng} | D]"}|tjvst d|| Dcgc]}tj|c} d<|dk(rtjdi S|dk(rtj di St d|cc}wcc}w)Ndefault:rr=z#Malformed instrumentation option: '' metric_typecycle buffer_typesharedbuffer_strategycircular buffer_size0 granularitywarpsampling_strategynonesampling_options optimizations)rOrQrSrUrWrYr[r]cN|}|r||vrtd|d||r||S|S)NzUnknown z: ) ValueError)opt_namemappingvalueoptionss rget_option_valuez)_interpret_mode..get_option_value[s@! U')xzE7;< >?HNNe 2 25 9NGOI||&g&& e xx"'"">(455A Os 3I:I!ctjjjj j }|dk(ry|dk(ryt d|)Nr$r hipr zUnsupported backend: )tritonruntimedriveractiveget_current_targetbackendr))rs r_get_backend_namerxsOnn##**==?GGG& E 27)<==rc eZdZUdZeed<dZeed<dZeed<dZ e e ed<dZ eed <d Z eed <d edeej"ffd ZdZdZde de dedeeefdeddf dZdefdZdeddfdZdeddfdZde ddfdZy)InstrumentationHookrpriority active_countFenable_host_bufferN host_bufferrr+r(rHc`t||_t||_d|_i|_yr)r{rr allocatorr/ metadata_path)rrHs rrzInstrumentationHook.__init__s).=h.G &t, 79rctjdkDr tdtxjdz c_ttj j jjtj j jjjdtfdfdtfdfdd tj_t!j"t$j&j(t+j,fd }|t$_y) NrzFOnly one instance of the instrumentation hook can be active at a time.rmax_shared_memcxtjjjjvrdnd}t j |jj jjjjjjjjjjjjjj| tj j#|tjj$jjvrt j&|t j(|tjj*jjvrdk(rt j,|yyy)NFTr )rOptimizeCLOCK32r]rEadd_convert_proton_to_protongpurOrYr[rWrSrQrUr+r( triton_passescommonadd_cse SCHED_STORESadd_schedule_buffer_store!add_allocate_proton_shared_memorySCHED_BARRIERSadd_sched_barriers)rA is_long_clk backend_namerrs rto_llvmir_passesz6InstrumentationHook.activate..to_llvmir_passess9#'==#8#8DIIS>SUYU^U^UpUp:>)):T:TVZV_V_VkVk:>)):S:SUYU^U^UjUj:>)):O:OQ_:>:R:RTXTqTq:E  G  ( ( ,}}))TYY-D-DD77;  ; ;B ?}}++tyy/F/FF<[`K`004LaFrcBtj|dk(rtj|ydk(ritjj j jjdjdd}tj||yy)Nr r archrLr) rE)add_allocate_proton_global_scratch_buffer%add_convert_proton_nvidia_gpu_to_llvmr~rrrutilsget_device_propertiesri"add_convert_proton_amd_gpu_to_llvm)rArrr's rto_llvm_passesz4InstrumentationHook.activate..to_llvm_passess  C CB Gx'CCBG&~~,,3399OOPVWX^_eefijklm@@TJ'rc|Srr6)rArs rz.InstrumentationHook.activate..s '+rc|Srr6)rArs rrz.InstrumentationHook.activate..s ~b)r)ttgpuir_to_llvmirllvmir_to_llvmc8t|d<|g|i|S)Ninstrumentation_mode)rG)rargskwargs original_mode original_runs rinstrumented_runz6InstrumentationHook.activate..instrumented_runs(-0-?F) *6t6v6 6r)rrr)rr~rrrget_current_devicerrrr8rcompilerinstrumentationrrr runr functoolswraps) rrrr'rrrrrs ` @@@@@@@ractivatezInstrumentationHook.activates  + +a /gh h((A-( &&--@@B..55;;QQRXYZjk(*  5$ K;J + ) K ; ''7 dnn-"   & 7 ' 7+ rctjdk(rytxjdzc_t}it|j_t ttjdr#tjjt_ tttjr dt_d|_y)Nrr __wrapped__)rrrrrrrhasattrr rrrrrrr/)rrs r deactivatezInstrumentationHook.deactivates  + +q 0 ((A-((* ;=''7 ! ;??M 2)oo99KO mo.  1 1.2  + rmodulefunctionnamemetadata_grouphashrIc|sytd|jDd}td|jDd}||j|<|rtj}tj |t } | dk(rtj |n| dk(rtj |tj |tj||}||_tj|} tj|} tj||| | |ytd|)Nc3JK|]\}}|jds|yw)ttgirNendswith.0rxpaths r z2InstrumentationHook.init_handle..s `d V]H_`##c3JK|]\}}|jds|yw)jsonNrrs rrz2InstrumentationHook.init_handle..s eysDcll\bNdderr r z+IR path not found in metadata for function )nextitemsr triton_irrBr@r triton_nvidia triton_amdrEparse_mlir_moduleget_scope_id_namesget_scope_id_parents libprotoninit_function_metadatar)) rrrrrrir_pathrrBrscope_id_namesscope_id_parentss r init_handlezInstrumentationHook.init_handles `n.B.B.D`bfgeN4H4H4Jegkl '48$ '')G  # #G ,,.Lx'++G4&((1  ' ' 000'BF$FN*==fEN,AA&I   , ,Xt^M]_l m!LXJWX XrcR|jdS|jjS)Nr)r/data_ptr)rs r _data_ptrzInstrumentationHook._data_ptrs#KK'qCT[[-A-A-CCrmetadatacz|jjd}|jjd}|jdn4|jj|jj z}t j |||j|tjr dt_ yyNrr"r) datarjr/ element_sizenumelrenter_instrumented_oprrrrrrr=r" alloc_sizes renterzInstrumentationHook.enter s}}  ,""8,++-Q4;;3K3K3MPTP[P[PaPaPc3c ''dnn6F S  1 1.2  + 2rc|jjd}|jjd}|jdn4|jj|jj z}t j |||j|tjr|j|yyr) rrjr/rrrexit_instrumented_oprrr_populate_host_bufferrs rexitzInstrumentationHook.exits}}  ,""8,++-Q4;;3K3K3MPTP[P[PaPaPc3c &&vtT^^5EzR  1 1  & &t , 2rc |r|j|rddl}ddl}ddl}dtt t fdtfd}|jdn4|jj|jjz}|jjjjd}i}t|j|d5} |j!| }ddd||d} |d} |d} |jj"t$j&j(k(r| n t+|} t|| z }|jjd k(xr1|jj,t$j.j0k(}|rt3| Dcgc]}|}}n|Dcgc] }t|}}d | d zz}d }|}|}t4||||| || | | g |}|j6d t+|zg|}|j9||z|j:d t<_t<j>d|}|jA|jCtE||j:t<j>|djG|j}|jA|jjIyyy#1swYxYwcc}wcc}w)NrtargetrIc(|ddk(ry|ddk(ryy)Nrr$rr}rrr6)rs r encode_targetz@InstrumentationHook._populate_host_buffer..encode_target s%)$.I&%/rrerprofile_scratch_size num_warpsr\(Icpur%)r&)%rr,structrrrGrr5r/rrrr[rqriopenloadrYrESAMPLING_STRATEGYNONErprW GRANULARITYWARPrangeVERSIONpackr-r.rrcopy_tensorlistview_asr)rrr,rrrr sampled_warpsrfile device_typescratch_mem_size total_unituid_num block_num is_all_warpsiuid_vec header_size header_offsetpayload_offset payload_size header_values header_bytesconfig_portion data_portions rrz)InstrumentationHook._populate_host_buffers **84    d38n  #kk1t{{7O7O7QTXT_T_TeTeTg7gJ II66<<>DDSIMDd((2C8 'Dyy '(X7K#$:; k*J$(II$?$?=CbCbCgCg$gjmpnGJ)99:IB 9955;w @U@UYfYrYrYwYw@wL&+J&78188+89a3q699w{*KM(N%L ^\S^`iku ',3M'6;;sS-?'?P-PL.3kk+ :RZ_ZeZensk.t  +0<<\kJN  d<.@ !T U.::;<HPPQUQ\Q\]L   t{{0 1[58  ' 'V99sK$ K4K#K)r2r3r4rr5__annotations__rrboolrrrr+r(rrGrrhrrrrrrr rrrr6rrrrsHcL#$$!%K#%  $'c':tS$2J2J'J!K:9+v8Y#YYCYQUVY[^V^Q_YgjYosY6D3D3h343-X-$-N2cN2dN2rr),rtypingrrrrr~triton._C.libtritonrrrrEr rr rr rtriton._C.libprotonrtriton.compilerr triton.runtime.jitr triton.runtime._allocationrrtriton.backendsrhookrflagsrrr\rrrr8rGrhr{rrr6rrr%s-- /71773$*K$C .))&36eC)A)A$AB36tG_G_36l>3>f2$f2r