K i"ddlZddlZddlZddlZddlmZddlmZddl m Z ddl m Z ddl Z ddlZ eGddZdZd Zd efd Zed k(reyy) N)ArgumentParser) dataclass)Path)ListceZdZUdZdZeed<dZeed<dZeed<dZ eed<dZ edzed<d Z e ed <d Z e ed <dZedzed <dZedzed<y) CompileArgsz@ A class to contain arguments from command-line parser. path kernel_name signaturegridNtarget num_warps num_stagesout_nameout_path)__name__ __module__ __qualname____doc__r str__annotations__r r r rrintrrrrZ/mnt/ssd/data/python-lab/Trading/venv/lib/python3.12/site-packages/triton/tools/compile.pyrrsmD#NKIsD#NFC$JIsJHcDj HdTk rra Triton ahead-of-time compiler: This program compiles the kernel with name `kernel-name` in the file at the provided `path` into self-contained C source-code that embeds the `cubin` data along with utilities to load, unload and launch the kernel. signature is provided as a list of (optionally divisibility-hinted) types or constexpr values, e.g. `compile.py --kernel-name kernel --signature "*fp32:16, i32:16, 1024, i32" --out-name kernel /path/to/kernel.py` will compile triton.JITFunction of name `kernel` inside the file `/path/to/kernel.py`. Said kernel will be specialized such that argument 0, 1 are assumed to be multiple of 16, and argument 2 is assumed to be a compile-time constant of value 1024, i.e. it won't be part of the generated prototype. The resulting entry point will have signature CUresult kernel_{specialization_suffix}(CUstream stream, unsigned gX, unsigned gY, unsigned gZ, float* arg0, int32_t arg1, int32_t arg2) Different such specialized entry points can be combined using the `linker.py` script. NOTE: when resolving the scope of /path/to/kernel.py, the file will be executed from within its parent directory with the python interpreter used to run this `compile.py` script cJtt}|jdd|jddtddd |jd d tdd |jddtdd|jddtdd|jddtdd|jddt dd|jddtdd |jd!d"td#d |j }td$it|}t|y)%N) descriptionr zTPath to Python source containing desired kernel in its scope. File will be executed.)helpz --kernel-namez-nr zName of the kernel to compileT)typedefaultr!requiredz--targetz-tzThe target to compile towards, in format of '::'; e.g., 'cuda:80:32', 'hip:gfx942:64'. Default to None, which means using current machine's GPU target)r"r#r!z --num-warpsz-wrz$Number of warps to launch the kernelz --num-stagesz-nsrz/Number of stages (meta-parameter of the kernel)z --out-namez-onz Out name for the compiled kernelz --out-pathz-oz Out filenamez --signaturez-szSignature of the kernel)r"r!r$z--gridz-gzLaunch grid of the kernelr) rdesc add_argumentrrr parse_argsrvarscompile_kernel)parsercli_argsargss rmainr-9s4  -F su CJi!%' DsDop  t#qGmn CNP  e#tJlm  dt.Y  t#j@jC||||}|jDr?t?jFj@jH|jDj%d n1t>jJjLjNjQ}t>j@jS|}|j0|j2d}|jU|}t?jV|||jX}t!|jZdddkDr t]d|jZj^dkDr t]dg}g} g}!g}"t5|j8D]\} }#|#|vrK|ja|#| ja||#|!ja|#|"ja||#U|jc| fdd k(sl|ja|#| jadd}$t5|j=D]J\} }%|$t| z }$|jc| fdd k(r|$dz }$|jc| fddk(sF|$dz }$Ldj;|| |$g}&|jd|jf}'ttijj|'d d!}(t>jJjLjNjl})id"|&d#|jd$t'|'d%d&j;to|(ddd |(d dd D*+cgc] \}*}+d'|*|+c}+}*dd&j;to|!|"D,%cgc]\},}%|)|%d(|,c}%},d)d&j;to|| D,%cgc]\},}%|)|%d(|,c}%},d*d&j;|!D-cgc]}-d+|- c}-d,gzd-gzd.t'|!d zd/|d0|jZjpd1|j0d2dj;|| gd3|dd4|d d5|d d6d}.g}/|jr}0tttjd7z |0z }1|1jwd8D]}2|2jx}3|j{d9| d|$|3}4|4j}d:5}5|5j|2jjd;i|.ddd|/ja|4|&|/fScc}} wcc}}wcc}} wcc}}wcc}} wcc}wcc}}wcc}}wcc}+}*wcc}%},wcc}%},wcc}-w#1swYfxYw)z compile_kernel.._s1773<rr ctj}|jdj|j |j ddS)Nr1)hashlibsha256updatejoinencode hexdigest)r ms rhash_signaturez&compile_kernel..hash_signatureas? NN  )$++-.{{}Ra  rwarpsxstagescv t|}|S#t$rYnwxYw t|}|S#t$rYywxYw)N)r ValueErrorfloat)r3rets r constexprz!compile_kernel..constexprisO a&CJ    (CJ   s   , 88:rrEx=z num_warps=z num_stages=)rz#Only 1 and 16 are valid hints, got rIztt.divisibility)fn constexprsr attrs)rr)roptionsglobal_scratch_sizezMAOT compiling kernels with global scratch requirements is not yet implementedzNAOT compiling kernels with profile scratch requirements is not yet implementedi32r cd_r triton_kernel_namebin_sizebin_dataz, 0xr1full_signature arg_pointers&z&global_scratchz&profile_scratchnum_argskernel_docstringsharedr algo_infogridXgridYgridZ _placeholderextraz compile.*.wr)Brr rrr sysinsertrparent importlibutilspec_from_file_locationstemmodule_from_specloader exec_modulegetattrr splitlenlistmapr rrr enumerateitems arg_namesr:valuestritoncompiler ASTSourcerbackends GPUTargetruntimedriveractiveget_current_target make_backend parse_optionscompile__dict__metadata RuntimeErrorprofile_scratch_sizeappendgetasm binary_extbinasciihexlifymap_python_to_cpp_typezipr^backend__file__globsuffix with_suffixopenwrite read_textformat)6r,rrarg_pathspecmodkernelr r r>meta_sigsig_hashrEir3hintskv constantskeyvalue const_sig doc_stringhrLsrcrrkwargsrMccinforx arg_typesarg_names_not_1arg_types_not_1arg_namerty func_namerhex_ ty_to_cpprGynameargparams output_files backend_name template_dir template_pathext output_filefps6 rr)r)Ps $ t}}43C3CH $ t}}4>HDIIHHHOOAs8??+, >> 1 1(-- JD .. ) )$ /CKKC S$** +F 99??3 D t9>>S/1E1Ec1JKLI!$s)! t~~&gdoo->?Hi8*45H I)2):;AQCq*;J;Z/0K?P2QRRJ \\^GG|FB1#FF|G6;kkm OdaqBwQ#R() ) OE O // # #v)y`e # fC ;;__ % % / /1B1B31G H"NN1188KKM oo**62G>> IF##F+G ^^C8H8H IFv 5q9A=jkk ++a/kllIIOO !1!12$ 8 9 $   X &   Yx0 1  " "8 ,  " "9X#6 7 YYud #q (   X &   U #$F9++-.2#a& 99aUD !Q & cMF 99aUD !R ' cMF  (Hf56I **W'' (C x$ %a +D%%,,CCIyd.. CH DIIs4!9d14a4j7QRtq!A3qc{RS  TYY#o_nJophdB9R=/4& 9pq  $))sS\^gOh$i84 " av%>$ij   "HQse9"HL]K^"^btau"uv C(1, J &//(( T^^ SXXy(34 a a a !F$L>>L>((72\AL%**;7) ""**Qxj&#+GH   c " Ab HH5],,.55?? @ AK( ) l ""q ] =TEX>; P`Sp$i"H" A Asl. b);b)/ b/:b/ b5 b;b;,c2cc  cc c?c9c$( c* 0c//c8 __main__)rr7importlib.utilrjrgargparser dataclassesrpathlibrtypingrrztriton.backendsrr%r-r)rrrrrsk #!   ! !  !6.~#~#B zFr