K i]2ddlmZddlZddlZddlZddlmZmZmZm Z ddl Z ddl Z ddl Z ddlmZddlZddlmZddlmZddlmZddlmZdd lmZd d lmZd d lmZeGd dZ GddZ!GddZ"edGddZ#dZ$dZ%dZ&dZ'dZ(e jRe'e jTgZ+e jRe'e jXgZ-e jRe(e j\gZ/GddZ0Gdd Z1d!Z2d"Z3d#Z4Gd$d%Z5Gd&d'e5Z6Gd(d)e5Z7d*Z8d+Z9d,Z:d-Z;d.Z<e1Z=ee=Z>d/Z?d0Z@Gd1d2ZAGd3d4ejZCGd5d6ZDGd7d8ZEy)9) annotationsN)TupleListDictCallable) dataclass)TritonSemantic)TensorDescriptor)InterpreterError)partial) interpreter)ircneZdZUdZded<ded<ej eZded<d Z d Z d Z d Z y ) TensorHandlez data: numpy array dtype: triton type, either pointer_type or scalar_type. we don't store block_type here because the shape information is already available in the data field attr: a dictionary of attributes znp.arraydataztl.dtypedtype)default_factoryrattrcHt|jjSN)boolrallselfs `/mnt/ssd/data/python-lab/Trading/venv/lib/python3.12/site-packages/triton/runtime/interpreter.py__bool__zTensorHandle.__bool__#sDIIMMO$$ch|j}t|dr|j}t|dr|S)N element_ty)rhasattrr!)rrs rget_element_tyzTensorHandle.get_element_ty&s1 e\*$$Ee\* rc^t|jj|jSr)rrcopyrrs rclonezTensorHandle.clone,sDIINN,djj99rc"||j|<yr)r)rkeyvalues rset_attrzTensorHandle.set_attr/s #rN) __name__ __module__ __qualname____doc____annotations__ dataclassesfielddictrrr#r&r*rrrrs> N O"""48D$8% :rrceZdZdZdZy)BlockPointerHandlecX||_||_||_||_||_||_yr)baseshapestridesoffsets block_shapeorder)rr7r8r9r:r;r<s r__init__zBlockPointerHandle.__init__5s-    & rcf|jj}|jdz}tj|jj |j }tj|j t}tt|j D]}dgt|j z}|j |||<|j|j tj|j |zj|}|||z|j|j zjtj z}||vs|||j"|j kz|dk\z}t%||jj&j(}||fS)Nrr r)r7r#primitive_bitwidthnp broadcast_torr;onesrrangelenr:arangereshaper9astypeuint64r8rrscalar) rboundary_checkdtype_ttn_bytesptrsmasksdim bcast_dimsoffs rmaterialize_pointersz'BlockPointerHandle.materialize_pointers=sb99++---2tyy~~t/?/?@((5T--./ JCs4#3#344J"..s3JsO<<$))BIId6F6Fs6K,LLUUV`aC7S=4<<+<+A+AAII"))TTDn$tzz#';';!;<qI  JD$))//"8"89U{rN)r+r,r-r=rTr3rrr5r53s  rr5c&eZdZ ddZdZddZy)TensorDescHandlecj||_t||_||_||_||_||_yr)r7rFndimr8r9r;padding)rr7r8r9r;rYs rr=zTensorDescHandle.__init__Os1 J   & rc|jjjdzdk(sJdt|j|j k(sJt|j |j k(sJ|j dk\sJd|jddD])}|jjdzdk(r$Jd|jdjjdk(sJdy) Nrzbase must be 16-byte alignedr z"descriptor cannot be 0 dimensionalzstride must be 16-byte alignedzlast dim must be contiguous)r7ritemrFr9rXr;)rstrides rvalidatezTensorDescHandle.validateXsyy~~""$r)Q.N0NN.4<< DII---4##$ 111yyA~CCC~ll3B' RF;;##%*a/ Q1Q Q/ R||B$$))+q0O2OO0rct||jk(sJ|jjj}|j dz}|dj |zdzdk(sJdtj|jj |j}tj|jt}tt|jD]}dgt|jz}|j|||<||j tj|j|zj|}|||z|j|j zj!tj"z}|d|kz||j$|j kz}|jtj"k(sJt'||jjj(}||fS)Nr?r\r[rz*block offset start must be 16-byte alignedr@r )rFrXr7rr!rArrBrCr;rDrrErGrHr9rIrJr8rrK) rr: scalar_tyitemsizerOrPrQrRrSs rrTz%TensorDescHandle.materialize_pointersbs7|tyy(((IIOO.. //14   8+r1Q6d8dd6tyy~~t/?/?@((5T--./ FCs4#3#344J"..s3JsO3<$$ryy1A1A#1F'GGPPQ[\C8c>DLL,=,B,BBJJ299UUDQ#X&# 30D0D*DEE  F zzRYY&&&D$))//"8"89U{rN)r7rr8List[TensorHandle]r9rcr; List[int])r:rc)r+r,r-r=r_rTr3rrrVrVMs'PrrVT)frozenceZdZUdZded<dZded<dZded<dZd ed <d Zd ed <dZ d ed<dZ d ed<dZ d ed<dZ ded<dZ d ed<y)InterpreterOptionsNr2 extern_libsFrdebugTsanitize_overflowstrarch)fp8e5fp8e5b16fp8e4nvfp8e4b8fp8e4b15z Tuple[str]supported_fp8_dtypesr3!deprecated_fp8_dot_operand_dtypestf32default_dot_input_precision)rttf32x3ieeeallowed_dot_input_precisionsrintmax_num_imprecise_acc_defaultr backend_name)r+r,r-rhr/rirjrlrrrsrurxrzr{r3rrrgrguslKE4"t"D#'^*^46%z6'--/I *I)*!3*%L#%rrgc|tjk(rtjS|tjk(rtjS|tj k(rtj S|tjk(rtjS|Sr) rBuint8int8uint16int16uint32int32rJint64r@s r_get_signed_np_dtypers[ ww  xx  xx  xx Lrc`t|tjr#tjtj Sitj tjttjtjtjtjtjtjtjtjtjtjtjtjtjtjtjtjtjtjtjtjtjtjtjtjtj tjtj tj"tjtj"tj tjtj tj$tjtjtj&tjtjtj(tjtjtj*tjtjtj,tjtjtj.tjtji}t|tj0rVt|j2tjr#tjtj S||j2S||Sr) isinstancetl pointer_typerBrrJint1rfloat16float32float64r~r}rrrrrbfloat16float8e5 float8e5b16 float8e4nv float8e4b8 float8e4b15 block_typer!)tt_dtypenp_typess r _get_np_dtypers~(BOO,xx "" $ BHHRZZ( BHHRZZ( BHHRZZ(  "''"  "((288$  "((288$ 288BII& "((288$ 288BII& "((288$ 288BII& RXXbii( RXXbhh'!" *#$ rxx)%& rxx)'( *)H,(BMM* h))2?? ;88BII& &++,, H rcttd|j}ttd|j}tj|j |}||jdz z dz}|j|j z dz }|j|j z dz } |d|j zdz z} |j } |j } ||j z d|zdz zjtj} | dk(}tj|rtj|tj}t|j D]}| |z dz}|j |z ||dk(<!| dk(}d||z | |<| | z | ||z<| |||zd|j zdz z| |<tjdtj| | z | zd| zdz }|j|}|j|}|j|jkDr| |j |j z z d|j zdz z}|tjj k(r*| d|j |j z dz zz}||dkDz}|j|}n>| j||j |j z zd|j zdz z}|dk(}tj|r||j z d|zdz zjtj} | dk7}||z}tj|tj}d| z | || z z ||<||||z d|j ||z zz||<||jdz z||j zz|z}|j#|j$S)Nuintr@r r)getattrrBrA frombuffertobytesfp_mantissa_width exponent_biasrIrany zeros_likerEmaximumminimum_ir ROUNDING_MODERTNErHr8)input input_dtype output_dtype rounding_modeinput_uint_dtypeoutput_unint_dtype input_binsigninput_exponent_widthoutput_exponent_width significand bias_input bias_outputexponentsubnormal_indexbit_posi bit_indexzero_significand_indexexponent_output sign_outputsignificand_outputcut_offnon_zero_exponent_indexshiftoutputs r_convert_floatrs+rT+*H*H)I#JK tL,K,K+L%MN emmo5EFI +881< = ED&99K\>\\_``[%B%B BaGHK**J,,Kk;;;FZAZ^_@_`hhikiqiqrH!mO vvo -- :{445 HA%*d2I&1&C&Ca&GGIN # H"-!1$%(@$@!=G+=U'/9:(3O(DP_H`(` +// /1 4(6 O$jjBJJ:0E 0SWX\qWquvVv$wxO%,,-?@O++01K%% (G(GG)k.K.KlNlNl.lm ,00 0A 57 C--22 2!Q;+H+H>%++ &&rc,tj|Sr)matherfxs r_erfrs 88A;rc6t|t|zdz S)N@)ry)abs r _umulhi_64rs FSVO ""r)otypesceZdZedZy)ExtraFunctionscxtj|jj|j|||Sr)rtensorbuildercreate_fp_to_fphandle)rdst_tyfp_downcast_rounding _semantics r_convert_custom_typesz$ExtraFunctions._convert_custom_typess.yy**::5<<QefhnoorN)r+r,r- staticmethodrr3rrrrspprrcteZdZejj ejj ejjejjejjejjejjejjiZ ejjejjejjejjejjejjejj ejj ejj"ejj"ejj$ejj$ejj&ejj&ejj(ejj(ejj*ejj*ejj,ejj,i ZddZdZdZdZdZdZdZdZd Z d Z!d Z"d Z#d Z$dZ%dZ&dZ'dZ(dZ)dZ*dZ+dZ,dZ-dZ.dZ/dZ0dZ1dZ2dZ3dZ4dZ5dZ6d Z7d!Z8d"Z9d#Z:d$Z;d%Zd(Z?d)Z@d*ZAd+ZBd,ZCd-ZDd.ZEd/ZFd0ZGd1ZHd2ZId3ZJd4ZKd5ZLd6ZMd7ZNd8ZOd9ZPd:ZQd;ZRd<ZSd=ZTd>ZUd?ZVd@ZWdAZXdBZYdCZZdDZ[dEZ\dFZ]dGZ^dHZ_dIZ`dJZadKZbdLZcdMZddNZedOZfdPZgdQZhdRZidSZjdTZkdUZldVZmdWZndXZodYZpdZZqd[Zrd\Zsd]Ztd^Zud_Zvd`ZwdaZxdbZydcZzddZ{deZ|eKZ}eKZ~dfZdgZdhZdiZdjZdkZdlZdmZdnZdoZdpZdqZdrZdsZdtZduZdvZdwZdxZdyZdzZd{Zd|Zd}Zd~ZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZ d ddZddZddZddZ ddZdZy)InterpreterBuildercd|_t|_i|_tj |jd<d|jd<y)Nconvert_custom_typescy)N)r r r r3)lhsTyperhsTypes rz-InterpreterBuilder.__init__.. sr min_dot_size)rlrgoptions codegen_fnsrrrs rr=zInterpreterBuilder.__init__sB )+ 3A3W3W/0+M(rc||jdks td||jdks td||jdks td|||f|_y)Nrzx >= grid_dim[0]r zy >= grid_dim[1]rzz >= grid_dim[2])grid_dim ValueErrorgrid_idxrryzs r set_grid_idxzInterpreterBuilder.set_grid_idx"sf4==##/0 04==##/0 04==##/0 0Aq  rc|||f|_yr)r)rnxnynzs r set_grid_dimzInterpreterBuilder.set_grid_dim+sR  rc"tjSr)rrrs r get_half_tyzInterpreterBuilder.get_half_ty0 zzrc"tjSr)rrrs r get_bf16_tyzInterpreterBuilder.get_bf16_ty3 {{rc"tjSr)rrrs r get_float_tyzInterpreterBuilder.get_float_ty6rrc"tjSr)rrrs r get_double_tyz InterpreterBuilder.get_double_ty9rrc"tjSr)rrrs r get_int1_tyzInterpreterBuilder.get_int1_ty< wwrc"tjSr)rr~rs r get_int8_tyzInterpreterBuilder.get_int8_ty?rrc"tjSr)rr}rs r get_uint8_tyzInterpreterBuilder.get_uint8_tyB xxrc"tjSr)rrrs r get_int16_tyzInterpreterBuilder.get_int16_tyErrc"tjSr)rrrs r get_uint16_tyz InterpreterBuilder.get_uint16_tyH yyrc"tjSr)rrrs r get_int32_tyzInterpreterBuilder.get_int32_tyKrrc"tjSr)rrrs r get_uint32_tyz InterpreterBuilder.get_uint32_tyNrrc"tjSr)rrrs r get_int64_tyzInterpreterBuilder.get_int64_tyQrrc"tjSr)rrJrs r get_uint64_tyz InterpreterBuilder.get_uint64_tyTrrc"tjSr)rrrs rget_fp8e4nv_tyz!InterpreterBuilder.get_fp8e4nv_tyW }}rc"tjSr)rrrs rget_fp8e4b15_tyz"InterpreterBuilder.get_fp8e4b15_tyZ ~~rc"tjSr)rrrs rget_fp8e4b8_tyz!InterpreterBuilder.get_fp8e4b8_ty]rrc"tjSr)rrrs r get_fp8e5_tyzInterpreterBuilder.get_fp8e5_ty`rrc"tjSr)rrrs rget_fp8e5b16_tyz"InterpreterBuilder.get_fp8e5b16_tycrrc.tj||Sr)rr)relt_ty addr_spaces r get_ptr_tyzInterpreterBuilder.get_ptr_tyfsvz22rc.tj||Sr)rr)rrr8s r get_block_tyzInterpreterBuilder.get_block_tyis}}UE**rc~ttj|gtjtj SNr@)rrBarraybool_rrrr)s rget_int1zInterpreterBuilder.get_int1ls$BHHeWBHH=rwwGGrc~ttj|gtjtjSr")rrBr#r}rr%s r get_uint8zInterpreterBuilder.get_uint8o$BHHeWBHH=rxxHHrc~ttj|gtjtjSr")rrBr#r~rr%s rget_int8zInterpreterBuilder.get_int8rs$BHHeWBGG JJrc~ttj|gtjtjSr")rrBr#rrr%s r get_int16zInterpreterBuilder.get_int16xr)rc~ttj|gtjtjSr")rrBr#rrr%s r get_uint32zInterpreterBuilder.get_uint32{r.rc~ttj|gtjtjSr")rrBr#rrr%s r get_int32zInterpreterBuilder.get_int32~r)rc~ttj|gtjtjSr")rrBr#rJrr%s r get_uint64zInterpreterBuilder.get_uint64r.rc~ttj|gtjtjSr")rrBr#rrr%s r get_int64zInterpreterBuilder.get_int64r)rc~ttj|gtjtjSr")rrBr#rrr%s rget_fp16zInterpreterBuilder.get_fp16$BHHeWBJJ?LLrc~ttj|gtjtjSr")rrBr#rrr%s rget_fp32zInterpreterBuilder.get_fp32r;rc~ttj|gtjtjSr")rrBr#rrr%s rget_fp64zInterpreterBuilder.get_fp64r;rcXttjdgt||SNrr@)rrBr#r)rtypes rget_null_valuez!InterpreterBuilder.get_null_values!BHHaS d0CDdKKrc|j tdttj|j|gtj t j S)Nzgrid_idx is Noner@)rrrrBr#rrraxiss rcreate_get_program_idz(InterpreterBuilder.create_get_program_idsD == /0 0BHHdmmD&9%:"((KRXXVVrcttj|j|gtjt jSr")rrBr#rrrrEs rcreate_get_num_programsz*InterpreterBuilder.create_get_num_programss.BHHdmmD&9%:"((KRXXVVrcttj|jtt j }d}|j||||||Sr")rrB ones_likerrrrcreate_masked_load)rptr_0_1 is_volatilemaskothers r create_loadzInterpreterBuilder.create_loadsABLL>H&&sD%RMMrcttj|jtt j }|j|||ddSr")rrBrKrrrrcreate_masked_store)rrMvalrNrOrQs r create_storezInterpreterBuilder.create_stores:BLL>H''S$dCCrc|j}t|}|+ttj|j ||}t j|j |j |j |} t| |Sr")r#rrrBrr _interpreterload) rrOrQrRcache_modifiereviction_policyrPrMdtype_nprets rrLz%InterpreterBuilder.create_masked_loadsi&&( * = tyy!I8TE 499ejj(KC**rcltj|j|j|jSr)rYstorer)rrOr)rQr[r\s rrUz&InterpreterBuilder.create_masked_stores#!!$))UZZCCrc|jj}|j}|tjk(r|tjk(s&|tjk(rY|tjk(rFt |j ||djt|}t||jSt|j jt||jSr) rrKrrrrrviewrrrI)rsrcdst_typesrc_element_typedst_element_typers r cast_implzInterpreterBuilder.cast_impls99++#??  +0@BJJ0N  */?2;;/N!#((,<>NPTUZZ[hiq[rsDhoo6 6 h0G H(//Z Zrc&|j||Srrgrrcrds rrzInterpreterBuilder.$..h2Orc&|j||Srrirjs rrzInterpreterBuilder.rkrc&|j||Srrirjs rrzInterpreterBuilder.rkrc&|j||Srrirjs rrzInterpreterBuilder.rkrc&|j||Srrirjs rrzInterpreterBuilder.ssH0Mrc&|j||Srrirjs rrzInterpreterBuilder.rkrc&|j||Srri)rrcrd is_signeds rrzInterpreterBuilder.sT^^CQY=Zrc|jj}|j}t|j|||j t |}t ||jSr)rrKrrrbrr)rrcrdrrerfrs rrz"InterpreterBuilder.create_fp_to_fpsU99++#??chh(8:JMZ__`mnv`wxD(//22rcrt|jjt||jSr)rrrbrrKrjs rcreate_bitcastz!InterpreterBuilder.create_bitcasts%CHHMM-*ABHOOTTrcxt||j|j|jjSrrrrrK)rlhsrhsops r binary_opzInterpreterBuilder.binary_ops(Bsxx2CII4D4DEErcD|j||tjSrr{rBaddrrxrys rrzInterpreterBuilder.sS"&&)IrcD|j||tjSrr{rBmultiplyrs rrzInterpreterBuilder.S"++)NrcD|j||tjSrr{rBdividers rrzInterpreterBuilder.sS")))LrcD|j||tjSrr{rBfmodrs rrzInterpreterBuilder.S"'')JrcD|j||tjSrr{rBsubtractrs rrzInterpreterBuilder.rrcD|j||tjSrrrs rrzInterpreterBuilder.sC(MrcD|j||tjSrrrs rrzInterpreterBuilder.sS"))1Trc&|j||Sr create_idivrs rrzInterpreterBuilder.)9)9#s)Crc&|j||Srrrs rrzInterpreterBuilder.rrcD|j||tjSrrrs rrzInterpreterBuilder.rrcD|j||tjSrrrs rrzInterpreterBuilder.rrcD|j||tjSrr}rs rrzInterpreterBuilder.ssC(HrcD|j||tjSrrrs rrzInterpreterBuilder.rrcD|j||tjSr)r{rB left_shiftrs rrzInterpreterBuilder.ssC(OrcD|j||tjSr)r{rB right_shiftrs rrzInterpreterBuilder.sS"..)QrcD|j||tjSrr{rBrrs rrzInterpreterBuilder.$..c2::*NrcD|j||tjSrrrs rrzInterpreterBuilder.rrcD|j||tjSrrrs rrzInterpreterBuilder.T^^Cbjj-QrcD|j||tjSrrrs rrzInterpreterBuilder.DNN3RZZ,PrcD|j||tjSrr{rBrrs rrzInterpreterBuilder.rrcD|j||tjSrrrs rrzInterpreterBuilder.rrcD|j||tjSrrrs rrzInterpreterBuilder.rrcD|j||tjSrrrs rrzInterpreterBuilder.rrcD|j||tjSrr{rB less_equalrs rrzInterpreterBuilder.DNN3R]],SrcD|j||tjSrr{rBlessrs rrzInterpreterBuilder.DNN3RWW,MrcD|j||tjSrr{rB greater_equalrs rrzInterpreterBuilder.DNN3REUEU,VrcD|j||tjSrr{rBgreaterrs rrzInterpreterBuilder.rrcD|j||tjSrrrs rrzInterpreterBuilder.rrcD|j||tjSrrrs rrzInterpreterBuilder.rrcD|j||tjSrrrs rrzInterpreterBuilder.rrcD|j||tjSrrrs rrzInterpreterBuilder.rrcD|j||tjSrr{rBequalrs rrzInterpreterBuilder.s4>>#sBHH+MrcD|j||tjSrr{rB not_equalrs rrzInterpreterBuilder.s4>>#sBLL+QrcD|j||tjSrrrs rrzInterpreterBuilder.rrcD|j||tjSrrrs rrzInterpreterBuilder.rrcD|j||tjSrrrs rrzInterpreterBuilder.rrcD|j||tjSrrrs rrzInterpreterBuilder.rrcD|j||tjSrrrs rrzInterpreterBuilder.DNN3RXX,NrcD|j||tjSrrrs rrzInterpreterBuilder.DNN3R\\,RrcD|j||tjSrrrs rrzInterpreterBuilder.rrcD|j||tjSrrrs rrzInterpreterBuilder.rrcD|j||tjSrrrs rrzInterpreterBuilder.rrcD|j||tjSrrrs rrzInterpreterBuilder.rrcD|j||tjSrrrs rrzInterpreterBuilder.rrcD|j||tjSrrrs rrzInterpreterBuilder.rrcD|j||tjSr)r{rB bitwise_andrs rrzInterpreterBuilder.sC(PrcD|j||tjSr)r{rB bitwise_xorrs rrzInterpreterBuilder.rrcD|j||tjSr)r{rB bitwise_orrs rrzInterpreterBuilder.st~~c3 'Nrct|jtj|j|jz |jz|jj Sr)rrrBrrrKrs rrzInterpreterBuilder.create_idivsCSXX#(((CCPRUR[R[RbRbccrc@t|jj}t|jj}|jj||_|jj||_|j ||t j Sr)rrrrIr{rBr)rrxry lhs_dtype rhs_dtypes r create_ashrzInterpreterBuilder.create_ashr se(8 (8 88??9-88??9-~~c377rcR|jj}|tjk(s|tjk(r>t t |j|j|jjSttd|jdzdz}|jj|}|jj|}tj|||jdzz }t |j||jjS)Nrr?r) rrrBrrJr np_umulhi_u64rKrrbrIr)rrxryr compute_dtypelhs_datarhs_dataret_datas r create_umulhiz InterpreterBuilder.create_umulhis BHH  2 chh A399CSCST T#B$u~~/AA/E.F(GHMxx}5Hxx}5H{{8X65>>A;MNH 6 8H8HI Irct||j|j|j|jjSrrw)rrxryrRrzs r ternary_opzInterpreterBuilder.ternary_ops.Bsxx5::> @R@RSSrcF|j|||tjSr)rrBclip)rarglohipropagate_nanss rrzInterpreterBuilder."sdoocSUWY[][b[b>crcF|j|||tjSr)rrBwhere)rcondrxrys rrzInterpreterBuilder.#ssCQSQYQY1Zrct|j|jz|jz|jjSrrwrs r create_fmazInterpreterBuilder.create_fma%s,AFFQVVOaff4aggnnEErcbt||j|jjSrrw)rrrzs runary_opzInterpreterBuilder.unary_op)s!BsxxL#))*:*:;;rc2|j}|jdz }ttd|j}|jj |}d|zdz }||zj t |}t||jjS)Nr r) rrArrBrrbrrrK)rrrM mask_bitwidth np_uint_dtyperrQr^s r create_fabszInterpreterBuilder.create_fabs,s99 33a7 d8+F+F*G$HI xx}}]+]"a'd{  x!89C!1!122rcB|j|tjSr)rrBcosrrs rrzInterpreterBuilder.64==bff#=rcB|j|tjSr)rrBexprs rrzInterpreterBuilder.7rrcB|j|tjSr)rrBexp2rs rrzInterpreterBuilder.8DMM#rww$?rcB|j|tjSr)rrBabsrs rrzInterpreterBuilder.9sDMM#rvv$>rcB|j|tjSr)rrBfloorrs rrzInterpreterBuilder.:sT]]3%ArcB|j|tjSr)rrBceilrs rrzInterpreterBuilder.;rrcB|j|tjSr)rrBlogrs rrzInterpreterBuilder.<rrcB|j|tjSr)rrBlog2rs rrzInterpreterBuilder.=rrcB|j|tjSrrrBsqrtrs rrzInterpreterBuilder.>sDMM#rww,GrcB|j|tjSrr rs rrzInterpreterBuilder.?rrcB|j|tjSr)rrBsinrs rrzInterpreterBuilder.@rrc|jjtjk(rt |jnt |j}t ||jjSr)rrrBr np_erf_fp32 np_erf_fp64rrK)rrr^s r create_erfzInterpreterBuilder.create_erfBsH'*xx~~'Ck#((#UXU]U]I^C!1!122rctdtj|jz |jj SNr )rrBr rrrKrs r create_rsqrtzInterpreterBuilder.create_rsqrtFs+A 113993C3CDDrctt|jj||jjSr)rrrHrrK)rrr8 allow_reorders rrzInterpreterBuilder.Js*\#((JZJZ[`Jacfclclcscs=trc~ttj|j||jj Sr)rrB transposerrrK)rrperms r create_transzInterpreterBuilder.create_transLs(BLL48#)):J:JKKrc|j}|j}|jjdk(r|jjs3|jjdk(r|jjrt ||jt j djtj }t ||jt j djtj }ttj|||jj|jz|jjS)Nr?r@) rrrA is_floatingrrrrbrBrmatmulrK)rrrdinput_precisionmax_num_imprecise_acca_datab_datas r create_dotzInterpreterBuilder.create_dotOs GG & &! +0C0C0E GG & &! +0C0C0E#FAGGRZZFKKBJJWF#FAGGRZZFKKBJJWFBIIffAFFLLIAFFRTUT[T[TbTbccrc~ttj||tjtjSr")rrBrGrr)rret_tystartstops rcreate_make_rangez$InterpreterBuilder.create_make_rangeXs$BIIeTBBHHMMrc|=ttj|jtt j }tj|j|jtj|j}tj||d|fd}|dxxtj|jjzcc<t|t jS)Nr@r)binsrE) rrBrKrrrrrr histogram logical_notsumr)rrr-rQr.s rcreate_histogramz#InterpreterBuilder.create_histogram[s < TYYd CRWWMDxx 499bmmDII.FGLLDD B1E ! tyy15577 Irxx00rcttj|j|j||jj S)NrF)rrBtake_along_axisrrrK)rrcindicesrFs r create_gatherz InterpreterBuilder.create_gatheres3B..sxxDQSVS\S\ScScddrc|j}|j}td|dz}t|j||jj t jzz|jS)Nr r?) r#rAmaxrrrIrBrJr)rrMoffsetrMelement_bitwidthelement_bytewidths r create_addptrz InterpreterBuilder.create_addptrjse%%'#66#3q#89CHH'86;;;M;Mbii;X'XXZ]ZcZcddrc|j|\}}|j} t| } |d} n|tjj k(r,t tj|j| | } na|tjjk(r6t tj|jtd| | } ntd||j||| |||S)Nr@nanzunsupported padding option )rTr#rrPADDING_OPTIONPAD_ZEROrrBrrPAD_NAN full_likefloatrrL) rrMrLpadding_optionr[r\rPrOrPrMr]rRs rcreate_tensor_pointer_loadz-InterpreterBuilder.create_tensor_pointer_loadqs..~> e&&( *  !E s11:: : tyy!I8TE s1199 9 diiuX!VX`aE:>:JKL L&&tUE>?\ghhrcT|j|\}}|j|||||SrrTrU)rrMr)rLr[r\rOrPs rcreate_tensor_pointer_storez.InterpreterBuilder.create_tensor_pointer_stores/..~> e''eUNO\\rc~ttj|j||jj Sr)rrB expand_dimsrrrK)rrrFs rcreate_expand_dimsz%InterpreterBuilder.create_expand_dimss(BNN388T:CIIN>NOOrcttj|j|jg|jj Sr)rrB concatenaterrrKrs r create_catzInterpreterBuilder.create_cats/BNNCHHchh+?@#))BRBRSSrcttj|j|jgd|jj S)Nr\r3)rrBstackrrrKrs r create_joinzInterpreterBuilder.create_joins1BHHchh%9CSYYEUEUVVrct|jd|jjt|jd|jjfS)N).r).r rw)rrVs r create_splitzInterpreterBuilder.create_splitsESXXf-syy/?/?@,sxxX^O_adajajaqaqBrssrc |j}t|jtjrVt t j||jdt|j|jjSt t j||jt|j|jjSrA) r8rrrrrrBfullrrrK)rr(rr8s r create_splatzInterpreterBuilder.create_splats  cii /sxx{-PSPYPYBZ []`]f]f]m]mn nsxx}SYY?W XZ]ZcZcZjZjk krc ttjd|jdt |j |j j S)Nr rr@)rrBrWrrrrKrs rcreate_unsplatz!InterpreterBuilder.create_unsplats:BGGE388A;mCII>VWY\YbYbYiYijjrc||jvrtd||j|}ttj|j |j |j ||j jS)Nunsupported semantic )ir_sem_to_interpreter_semrrrY atomic_casrrrK)rrMcmprVsemscopes rcreate_atomic_casz$InterpreterBuilder.create_atomic_cassk d44 44SE:; ;,,S1L33CHHchhRUVX[XaXaXhXhiirc X||jvrtd|||jvrtd||j|}|j|}tt j ||j |j |j ||jjS)Nzunsupported rmwOp r]) ir_rmw_op_to_interpreter_rmw_oprr^rrY atomic_rmwrrrK)rrmwOprMrVrQrarbs rcreate_atomic_rmwz$InterpreterBuilder.create_atomic_rmws << <1%9: : d44 44SE:; ;44U;,,S1L33E388SXXtyyZ]^`c`i`i`p`pqqrctd)Nz4extern_elementwise not supported in interpreter modeNotImplementedError)rlibNamelibPathsymbolargListretTypeisPures rcreate_extern_elementwisez,InterpreterBuilder.create_extern_elementwises!"XYYrctd)Nz,inline_asm not supported in interpreter moderj)r inlineAsm constraintsvaluesrBrqpacks rcreate_inline_asmz$InterpreterBuilder.create_inline_asms!"PQQrc*d|jdd|jdd|jdd}|r|d|z }|rtjdd i |D]}t|d|jz|rtjd yy) N(rz, r r) rcd|dS)N0x02xr3rs rrz1InterpreterBuilder.create_print..sb3Lr) formatter)rrBset_printoptionsprintr)rprefixhexrvisSignedmsgr)s r create_printzInterpreterBuilder.create_prints $--"#2dmmA&6%7r$--:J9K1M  Qvh< C    52H*I J *E #!EJJ<(( ) *    $ / rc|sJ|yrr3)r conditionmessages r create_assertz InterpreterBuilder.create_asserts&WI&yrc|sJdy)Nz Assume failedr3)rrs r create_assumez InterpreterBuilder.create_assumes)/)yrcyrr3rs rcreate_barrierz!InterpreterBuilder.create_barriers rcf|Dcgc]}|j}}t||||||Scc}wr)r&r5) rr7r8r9r:r;r<r9 new_offsetss rcreate_make_block_ptrz(InterpreterBuilder.create_make_block_ptrs64;<&v||~< <!$w [RWXX=s.ct|jt|k7r td|jDcgc]}|j}}t |j |j |j||j|j}tt|D]1}|j|xj||jz c_ 3|Scc}w)Nz len(ptr.offsets) != len(offsets)) rFr:rr&r5r7r8r9r;r<rEr)rrMr:r9rr^rs rcreate_advancez!InterpreterBuilder.create_advances s{{ s7| +?@ @47KK@&v||~@ @ 399ckk;PSP_P_adajajks7|$ 3A KKN  71:?? 2  3 AsC cDt|||||}|j|Sr)rVr_)rr7r8r9 tensor_shaperrrYdescs rcreate_make_tensor_descriptorz0InterpreterBuilder.create_make_tensor_descriptors"eWlGL  rct|tsJ|j|\}}|j}t |}|j } | t jjk(r,ttj|j||} na| t jjk(r6ttj|jtd||} nt!d| |j#||| ||dS)Nr@r>zunsupported padding F)r[r\rP)rrVrTr#rrYrr?r@rrBrrrArBrCrrL) rrr5r[r\rOrQrMr]rYrRs rcreate_descriptor_loadz)InterpreterBuilder.create_descriptor_loads$ 0111..w7 d&&( *,, c((11 1 tyy!I8TE **22 2 diiuX!VX`aE3G9=> >&&tT57FTY'[ [rcT|j|\}}|j|||ddSrrG)rrr)r5rOrQs rcreate_descriptor_storez*InterpreterBuilder.create_descriptor_stores/..w7 d''eT4FFrc|jjj}t|}t j |j jd|jdg|}d}d} t|j D]F\} } t| tj|g} |j|| || j || ddf<Ht||S)Nrr\r@)r7rr!rrBzerosrr8r; enumeraterrrr) rr x_offsetsy_offsetrBrnp_dtyperesultr[r\rx_offsetr5s rcreate_descriptor_gatherz+InterpreterBuilder.create_descriptor_gathers ** '9>>//2D4D4DR4HIQYZ$Y^^4 lKAx#Hbhh7BG66tWnVefkkF1a4L lFE**rct|jD]W\}}t|j||j}t|tj |g}|j |||Yyr)rrrrrrr) rrr)rrrrslicer5s rcreate_descriptor_scatterz,InterpreterBuilder.create_descriptor_scatters]$Y^^4 ?KAx A  ?rc&t|}d|jvr,ttjdd||j S|tj k(r,ttjdd||j Std|)Nryr r\r@Tzunsupported type )rnamerrBrWrKr$ TypeError)rrBnp_types rget_all_ones_valuez%InterpreterBuilder.get_all_ones_value ss% GLL 2W =t{{K K  4w ?M M/v67 7rNreturnNone)zero) r7rr8rcr9rcrrdrrrrYrk)rrVr5rc)rrVr)rr5rc)rrVrrrr)rrVr)rrrrr)r+r,r-r MEM_SEMANTICACQUIRErYRELEASERELAXEDACQUIRE_RELEASEr^ ATOMIC_OPADDRMW_OPFADDMINUMINMAXUMAXANDORXORXCHGrer=rrrrrrrrrrrrr r rrrrrrrr r&r(r+r-r0r2r4r6r8r:r=r?rCrGrIrSrWrLrUrgcreate_si_to_fpcreate_ui_to_fpcreate_fp_to_sicreate_fp_to_ui create_fp_extcreate_fp_trunccreate_int_castrrur{ create_fadd create_fmul create_fdiv create_frem create_fsub create_mulcreate_precise_divf create_sdiv create_udiv create_srem create_urem create_add create_sub create_shl create_lshr create_minsi create_minuicreate_minimumfcreate_minnumf create_maxsi create_maxuicreate_maximumfcreate_maxnumfcreate_icmpSLEcreate_icmpSLTcreate_icmpSGEcreate_icmpSGTcreate_icmpULEcreate_icmpULTcreate_icmpUGEcreate_icmpUGT create_icmpEQ create_icmpNEcreate_fcmpOLTcreate_fcmpOGTcreate_fcmpOLEcreate_fcmpOGEcreate_fcmpOEQcreate_fcmpONEcreate_fcmpULTcreate_fcmpUGTcreate_fcmpULEcreate_fcmpUGEcreate_fcmpUEQcreate_fcmpUNE create_and create_xor create_orcreate_int_to_ptrcreate_ptr_to_intrrrr create_clampf create_selectrrr create_cos create_exp create_exp2 create_iabs create_floor create_ceil create_log create_log2create_precise_sqrt create_sqrt create_sinrrcreate_reshaperr&r+r1r6r<rErHrKrMrPrSrUrXr[rcrhrrrxrrrrrrrrrrrrr3rrrrs   ,";";"C"C   ,";";"C"C   ,";";"C"C ((,*C*C*S*S ! <..22 L//44 <..22 L//44 <..22 L//44 <..22 ,--00 <..22 L//44 '#N"% 3+HIGKIKIKIMMMLW WN D+D[POOOOOOOMMOOZO3 UFJKNKLKJKNKMJTCKCKJKJKHJMJOJQKNLNLQOPNNLNLQOPNSNMNVNPNSNMNVNPNMMQMMNPNSNVNNNRNMNPNSNVNNNRNPJPJNI&&d 8 JTdMZMF<3>J=J?K>KAL?K=J?KG?K=J3EuNLdN1e e i]NPTWtlkj rZR 0'* Y `f4=JNY\ [ G +?,8?8rrcFt||dfd }t|||y)N)memberc v||i|jDcic] \}}|dk7r||c}}diScc}}w)Nritems)r argskwargskvsemantics rrz_patch_attr..s[:kMS\\^AVEIQDEDTBCAAV:kbj:kAVs5)r setattr)objrr r new_memberrs @r _patch_attrrs&g&H&,lJ Cz"rctj|D]3\}}tjj |s&t ||||5yr)inspect getmembersrcore is_builtinr)pkgrrr s r_patch_builtinrs@**3/4 f 77  f % T67 34rcvdd}d|_fd|_d|_d|_t ||_y)Ncf|jj}|jdk(r t|SdS)Nr T)rrsizer)rrs r _get_boolz%_patch_lang_tensor.._get_bool$s,{{"YY!^tDz55rcttj|jj|jj }|j jsJt|j j}|d|dc|d<|d<tjj|j |}tjj||S)Nr\)rrBrrrrrBis_blocklistr8rrrr)rrr;res_tys r_get_transposez*_patch_lang_tensor.._get_transpose*sbll4;;+;+;O>OPyy!!###499??+ +6r?KO( BR##DJJ <ww~~ff--rc@t|jjSr)ryrrrs rrz$_patch_lang_tensor..2sC (8(8$9rc|Srr3)rr"s rrz$_patch_lang_tensor..3s 9T?rc@t|jjSr)reprrrrs rrz$_patch_lang_tensor..4s4 (8(8#9rc@t|jjSr)rkrrrs rrz$_patch_lang_tensor..5s#dkk&6&6"7r) __index__r__repr____str__propertyT)rr(r"s @r_patch_lang_tensorr3"s86 .:F2FO9FO7FN'FHrc*eZdZdZdZdZdZdZy)ReduceScanOpInterfacec ||_||_yr)rF combine_fn)rrFr7s rr=zReduceScanOpInterface.__init__;s $rcH| |t|k\rtd|d|yy)Nzaxis z out of bounds for shape )rFr)rr8rFs r check_axisz ReduceScanOpInterface.check_axis?s4  E 2uTF*CE7KL L!3 rc|D]c}t|tjjst dt ||j |j|jey)Nzinput must be a tensor, got ) rrrrrrBr9r8rF)rrrs r check_tensorz"ReduceScanOpInterface.check_tensorCsP 2Cc277>>2 #?S {!KLL OOCIItyy 1 2rcZt|}t|drG|jr;|j|}t j |t |j}ntj|g|}|}tjjt||j|S)Nr8r@) rr"r8rIrrr&rBr#rrrrK)rr^rrret_types r to_tensorzReduceScanOpInterface.to_tensorIsy ' 3 SYY**X&C}}UDOrBr3rrr5r59s%M2 IIrr5c>eZdZfdZdZdZddZdZdZxZ S) ReduceOpsc4t|||||_yr)superr= keep_dims)rrFr7rH __class__s rr=zReduceOps.__init__]s z*"rcg}|D]e}||j|d}|j|j|jjj |j gt ||fSr@)appendr>rrflattenrrA)rrrFr^rs runravelzReduceOps.unravelasn SD 4  4>>$++*:*:*B*B*DdjjQR  S Sz4rcj}jj\}g}g}djjj}|d|||dzdz}D]k}|j |jj|j t j||jjjmt|djD]Y} t j| |d||dzdztfdt|D} |dk(rGtt|D]/} | | jjj|| <1tfdt|D} j j"g| | } t%| ts| fn| } tt|D][} t%| | t&j(j*r'| | jjjn| | || <]\g}t|D]\} }j,rI|t j.||}nBtt|D]}t j.|d}n||j}|j j1|| j|S)Nrr r@c3hK|])\}}j||j+ywrr>r).0iir!r input_indexrs r z+ReduceOps.generic_reduce..zs/sTYTVXYq~uRy Os/2c3hK|])\}}j||j+ywrrP)rQoior output_indexrs rrTz+ReduceOps.generic_reduce..s/!wW\WY[\$..<%)//"R!wrU)rFrMrrr8rKrBrrrEr! unravel_indexrArrFr]r7fnrrrrrHrJr>)rr original_axisrF input_data output_data input_shape output_shaperr input_tuplej acc_tuplecombine_fn_retr^r_rSrYs`` @@rgeneric_reducezReduceOps.generic_reduceks ll5$))4 t  Ahoo**00 "1T*[-CC  TC   cjjoo .   rxx CJJOOq>3H3H3M3M3R3R3TKN<0U"!w`iju`v!ww !3!3!3!MY!M!M6@QV6W^.]k s;/0HAV`!! bggnnW69Q<3F3F3K3K3P3P3R;DQ< N<0H H" - =GAt~~ ,>>$5D"3{#347!~~dA67&yy{ JJt~~dE!HNN; < = rct|tr|dn|}d}d}|rM|j||jj|j |j |j}|rQ|j||jj|j |j tj}||||fS||S||Std)NrrFkeepdimsz-val_reduce_op and idx_reduce_op are both None) rrAr>rrrFrHrrrr)rr val_reduce_op idx_reduce_oprVidxs rmin_maxzReduceOps.min_maxs&ue4a% ..u||/@/@tyy[_[i[i!jlqlwlwxC ..u||/@/@tyy[_[i[i!jlnltltuC ?s8O _J _JLM Mrc|jtj|jj|j |j |jS)Nrh)r>rBr0rrrFrHrrrs rr0z ReduceOps.sums<~~bffU\\%6%6TYYQUQ_Q_`bgbmbmnnrc2|jtjjk(r3|j |dt j t jS|jtjjk(r3|j |dt jt jS|jtjjk(r%|j |dt jdS|jtjjk(r%|j |dt jdS|jtjjk(r|j!|dS|j#|S)Nr)rjrk)r7rstandard_argmin_combine_tie_break_leftrmrBminargmin_argmax_combine_tie_break_leftr8argmax_elementwise_maxnanmax_elementwise_minnanmin _sum_combiner0rfros rrCzReduceOps.apply_impls ??bkkHH H<<abii<X X __ J J J<<abii<X X __ < < <<<a QU<V V __ < < <<<a QU<V V __ 8 8 888E!H% %&&u- -rr) r+r,r-r=rMrfrmr0rC __classcell__rIs@rrErE[s$# )VN$o .rrEc6eZdZfdZdZdZdZdZxZS)ScanOpsc4t|||||_yr)rGr=reverse)rrFr7rrIs rr=zScanOps.__init__s z* rc|jtj|jj|j |j gSNr3r@)r>rBcumsumrrrFrros rrzScanOps.cumsums8ryy):):KSXS^S^_``rc|jtj|jj|j |j gSr)r>rBcumprodrrrFrros rrzScanOps.cumprods8rzz%,,*;*;$))LTYT_T_`aarc  g}g}djjj}D]k}|j|jj|jt j ||jjj mt|djD]|}t j|| t fdt|D} jdk(rGtt|D]/}||jjj|| <1t fdtt D t fdt|D} jj g| |} t#| ts| fn| } tt|D][}t#| |t$j&j(r'| |jjjn| ||| <]g} t|D]3\}}| jj+||j 5| S)Nrr@c3hK|])\}}j||j+ywrrP)rQrRr!indexrrs rrTz'ScanOps.generic_scan..s,fur1%%)//BfrUc3VK|] }|jk(r|dz n|"yw)r Nr3)rQrrrs rrTz'ScanOps.generic_scan..s-"kTU1 >58a.s/!uUZUWYZ$..:b "P!urU)rrr8rKrBrrrEr!rZrArrFrFr]r7r[rrrrr>)rrr]r^r8rrrrbrcrdr^rrs`` @@r generic_scanzScanOps.generic_scans!  a$$** MC   cjjoo .   rxxSZZ__5J5JK L Mz!}))* HA$$Q.EfPYZdPeffDTYY1$s;/0GA,0GNN,?,?,D,D,FKN5)G#"kY^_bch_iYj"kk !!u^ghs^t!uu !3!3!3!FY!F!F6@QV6W^.]k s;/0HAOY!! bggnnP6IaL,?,?,D,D,I,I,K;DQ< N5)H H" - =GAt JJt~~dE!HNN; < = rc g}|jrf|D]`}|j|jtj|j j |j|jbn|}|jtjjk(r|j|d}nM|jtjjk(r|j|d}n|j!|}|jrK|D]F}tj|j j |j|j _H|S)Nr3r)rrKr>rBfliprrrFrr7rrqr{r _prod_combinerr)rr new_inputrr^s rrCzScanOps.apply_impls << f   dii0XZ]ZcZc!de fI ??bkk66 6++il+C __ 9 9 9,,y|,C##I.C << K"$''#**// "J  K r) r+r,r-r=rrrrCr|r}s@rrrsab<rrcdd}dd}|t_|t_|tj_|tj_y)Nc:t|||j|Sr)rErB)rrFr7rHrs r _new_reducez'_patch_reduce_scan.._new_reducesz95;;EBBrc:t|||j|Sr)rrB)rrFr7rrs r _new_scanz%_patch_reduce_scan.._new_scanstZ177>>r)F)rreduceassociative_scanr)rrs r_patch_reduce_scanrs5C?BI#B BGGN(BGGrcd}d d}d d}d}||_||_||_t|_||j _t|d|_t|d|_ t|d|_ ty) Nc>|jdk(r|jS|jdk(r|jS|jdk(r|jS|jdk(r|j S|jdk(r|j S|jdk(r|j S|jdk(r|jS|jdk(r|jS|jd k(r|jS|jd k(r|jS|jd k(r|jS|jd k(r|jS|jd k(r|jS|jdk(r|jS|jdk(r|jS|jdk(r|j!S|jdk(r|j#St%d|d)Nvoidrr~r}rrrrrrJrmrorqfp16bf16fp32fp64zfail to convert z to ir type)r get_void_tyrrrrrrr r rrrrrrrrr)rrs r _new_to_irz$_patch_lang_core.._new_to_ir s 99 &&( ( YY& &&( ( YY& &&( ( YY' !'') ) YY' !'') ) YY( "((* * YY' !'') ) YY( "((* * YY' !'') ) YY( "((* * YY' !'') ) YY) #))+ + YY* $**, , YY& &&( ( YY& &&( ( YY& '') ) YY& ((* *+D6=>>rc:|d}|d|}}n||}}t|||S)Nr r)rE)arg1arg2steprr)ends r _new_rangez$_patch_lang_core.._new_range3s2 <D <D3Et3EUC&&rc|sJ|yrr3)rrs r_new_static_assertz,_patch_lang_core.._new_static_assert<sStrct|tjs|St|ttfs|gn|}|Dcgc]*}t|tj r |j n|,}}t|tdt|jk7rtd||jj|||Scc}w)Nr z$len(values) != len(input.shape) for ) rrrr&rA constexprr)rFr8r8rrr*)rrvrrs r _set_attrz#_patch_lang_core.._set_attr?s%+L!+FT5M!B&IOPAZ2<<8!''a?PP v;#aU[[!12 2CD6JK K dF+ Qs/Cztt.divisibility)rz tt.contiguityz tt.constancy)NN)) rE static_range static_assertr static_printrto_irr multiple_ofmax_contiguous max_constancyr)langrrrrs r_patch_lang_corer sx$?P' DJ"D+DD!DJJy/@AD!)/BD @Drc(|jjDcgc]6\}}tj|s|ttj fvs5|8}}}t |dk\sJd|D]o}t|tt|jt|tk(rt|jtt|jt|qttj jtycc}}w)Nr z:triton.language must be visible from within jit'd function) __globals__rrismodulerrrFrinterpreter_builderrrr3rtensor_descriptor_base)r[rer)langsrs r _patch_langrWs#%>>#7#7#9 pxq%W=M=Me=TY^cegigngnboYoU pE p u:?XXX?t01t{{$78 2: 499&9 :4;;' 277113FG qsDDDcXt|drt||St||S)N_fields)r"rB)rcontentss r _tuple_createrds. $+3 #:949h S S (@SSrc t|trtjtj j j|d}tj}d|cxkrdkrnntj}nkd|cxkrdkrnntj}nLd|cxkrdkrnntj}n-d|cxkrdkrnntj}ntd|ttj|g||}tj ||St#|d rtjtj j j|d}ttj|j%gtj|}tj ||St|t&rt)|t+t,|St|t.r|j0Dcgc] }t-|}}|j0d d k(sJtj2d |d <t5t7}|j9t-|j:|j<Dcgc] }t-|c}||j>Dcgc]}tj2|c}|j@ S|Scc}wcc}wcc}w) NilllllzUnsupported integer value r@data_ptrr\r )r7r8r9r;rD)!rryr str_to_tytritonruntimejit mangle_typerBrrrrJrrr#rr"rrArmap _implicit_cvtr r9rr rmake_tensor_descriptorr7r8r;rY)rtyrrsr9rrs rrrns3#s \\&..,,88=t D S 5 HHE c !E !IIE s "U "HHE c !E !IIE9#?@ @bhhuE:B?yy$$sJ \\&..,,88=t Dbhh '7ryyI2Nyy$$ C S#mS"9:: C) *-0[[9=#99{{2!###ll1o !"4"67..M#((4KPSPYPY5Z1mA6F5ZdkEH__>#5#5#C#CD~~!!//?3H3HII Hrc$eZdZdZdZdZdZy) GridExecutorc ddlm}||_||_||_|j j Dcic]\}}|||}}}|Dcgc]}|j|dk(s|c}|_ycc}}wcc}w)Nr ) _normalize_tyr) rrr[ arg_namesgridr/rget constexprs)rr[rrrrrr/s rr=zGridExecutor.__init__sx&" CECUCUC[C[C]^xtR4r!22^^,5bD9L9LT9RVa9a4b_bsA:B-Bc i  fd|Dcgc] }| }}i}|jD]\}}|||<||fScc}w)Nct|trt|t|St|trGt |j |j |j|j|jSt|ds|St|}|jjvr1|j}|j|j<|jj}|jdd}|j!||j#|j%|j't)||}|S)Nrrcpu)device)r)rrArrr r7r8r9r;rYr"runtyped_storagerr new_emptyset_storage_offsetr!r^r)r unwrapped_argstoragecpu_arg_to_cpustoragess rrz,GridExecutor._init_args_hst.._to_cpus-#u%$S#gs*;<<C!12'CHH%IIKKOOKK S*- *3/M,,.779I'779/6{{}))+,}<<>GGIJG#--a->G LL-">">"@-BTBTBVXeXlXlXn o$WcBGNrr ) rargs_devrrargs_hst kwargs_hstr(r)rrs @@r_init_args_hstzGridExecutor._init_args_hstsc 2-55SGCL55  ,,. -JC%enJsO -## 6sA c i fd t||D]\}} |||jD]\}}||}  ||  jD]\}}|j|y)Nct|drVt|t|}}|j|jf|jj<yt |t rt ||D]\}}||yt |tr|j|jyy)Nr) r"rrrrrAzipr r7)arg_devarg_hst _from_cpurs rrz1GridExecutor._restore_args_dev.._from_cpusw +#1'#:N7GU+*-gw*?0&Wggw/0G%56',, 57r)rrrvcopy_) rrrrrrrr( kwarg_dev kwarg_hstrrs @@r_restore_args_devzGridExecutor._restore_args_devs 6!$Hh 7 ( GW gw ' (%lln ,NC"3I i + ,#+//"3 # Wg MM' " #rcV|jddrytj|j}|j Dcic]\}}||j vs||}}}|j ||\}}t|jtj|jg|i|}|j D cic]!\} } | | |jvr| n t| #}} } t|jr|j|n |j} t| dksJd| ddt| z zz} tj|  t!| dD]Q} t!| dD]>} t!| dD]+}tj#| | ||jd i|-@S |j3||||ycc}}wcc} } w#t$$r?}t&j(j*j,rt/t1||d}~wwxYw) NwarmupFz#grid must have at most 3 dimensionsrZrr rr3)poprgetfullargspecr[rrrr getcallargsrrcallablerrFrrrEr Exceptionrknobs compilationfront_end_debuggingr r,r)rrrargspecrrrrrrrrrrres r__call__zGridExecutor.__call__s ::h & ((1#)<<>G41aQ',,5F!Q$GG#228VD*DGG""477DXDD^b^h^h^jkQZQUWZTT__4c-:LLkk"*499"5tyy4994yA~DDD~eq3t9}--(($/ 347^ (tAw(A"47^(+88AqA$(( ( x6:F3Hl 3||'';;"47+ 2 3s+GG&GA"G H():H##H(N)r+r,r-r=rrrr3rrrrsc"$H#2 GrrceZdZdZy)ASTTransformerc g}|jD]}||j|gz }t|dkDr tdt j t j t jdt jdt j|jt jdgg |_ |S) Nr z&Multiple assignments are not supportedinterpreter_semantic)idctxr>)r)rr F)r))funcrkeywords) targetsvisitrFrastCall AttributeNameLoadr)Constant)rnodenamestargets r visit_AssignzASTTransformer.visit_Assign sll *F djj() )E * u:>EF FXXSXX1GSXXZ%X_j#&88:/6:jj#,,UZB[5\gik  rN)r+r,r-rr3rrrr s rrcDeZdZeZdZdZdZdZdZ dZ dZ y) FunctionRewriterc <||_||_d|_d|_y)Nrr)r[rfilenamedef_file_lineno)rr[rs rr=zFunctionRewriter.__init__s  $%rcJ tj|j\}}|j \|_|_|j||_|j|}|j|}|j|S#t$r|jcYSwxYwr) rgetsourcelinesr[r_get_jit_fn_file_linerr _find_def def_lineno_prepare_source_transform_ast_compile_and_exec)rlinesrerctransformed_asts r rewrite_astzFunctionRewriter.rewrite_ast%s --dgg6HE1/3.H.H.J+ t+../""5)--c2%%o66 77N s"B B"!B"cBddlm}m}|||jS)Nr )get_jit_fn_file_line JITFunction)rr,r-r[)rr,r-s rr"z&FunctionRewriter._get_jit_fn_file_line9s:#K$899rczd}t|D]*\}}|jjds&|dz},|S)Nrzdef r )rstrip startswith)rr(r$rlines rr#zFunctionRewriter._find_def=sD  ' #GAtzz|&&v.U  #rcr||jdz d}dj|}tj|S)Nr r)r$jointextwrapdedent)rr(rcs rr%z FunctionRewriter._prepare_sourceEs4doo)*+ggens##rctj|}|jj|}tj||j dz }tj |||Sr)rparseast_transformerrfix_missing_locationsrincrement_lineno)rrc parsed_astr) inc_linenos rr&zFunctionRewriter._transform_astJs[YYs^ ..44Z@ !!/2))A-  _j9rct||jd}i|j}|jj}t j D]\}}||vs |||<t|||||jjS)Nexec)rmode) compilerrr[rglobalsrr>r+)rr) compiled_codelocal_namespace fn_globalsr(r)s rr'z"FunctionRewriter._compile_and_execUs$--fU )T[[/WW(( !)//+ (JC*$"' 3 ( ]J8tww//00rN) r+r,r-rr8r=r*r"r#r%r&r'r3rrrrs-$&O&7(:$ 1rrcFeZdZUiZded<d dZdZedZdZdZ y) InterpretedFunctionzDict[Callable, Callable] rewritten_fnc |_t|fi|_|_fd}|_t j |}|jjDcgc]}|jc}_ ycc}w)Nch|d}j}t|j||i|S)Nrrewriterr)rrrr[rs rrunz)InterpretedFunction.__init__..runis6&>DB9<DNND94J6J Jr) r[rrewriterrrLr signature parametersrvrr)rr[rrLrNrs` rr=zInterpretedFunction.__init__dsi(6v6   K %%b) *3*>*>*E*E*GHQ!&&HHsA:c|j|jvr1|jj|j|j<|j|jSr)r[rGrMr*rs rrKzInterpretedFunction.rewritersJ 77$++ +)-)B)B)DD  dgg &  ))rc.|jjSr)r[r+rs rr+zInterpretedFunction.__name__wswwrcP|j}t||j|SrrJ)rrr[s r __getitem__zInterpretedFunction.__getitem__{s \\^B55rct|j|j} ||i|S#t$r}t t ||d}~wwxYwr)rr[rKrr r,)rrrr[rs rrzInterpretedFunction.__call__sODGG \\^ 3t&v& & 3"47+ 2 3s/ AA  ANr) r+r,r-rGr/r=rKr1rSrr3rrrFrF`s6-/L*/ I*   63rrF)F __future__rrr4rtypingrrrrrnumpyrBrtriton.languagelanguagerr0rtriton.language.semanticr triton.tools.tensor_descriptorr errorsr functoolsr _C.libtritonrrYrrrr5rVrgrrrrr vectorizerrrrrJrrrrrr3r5rErrrrrrrr rrrNodeTransformerrrrFr3rrras" ..  !3;$6$  64%%P $ & & & @='@ # bll4 5 bll4 5  Z < ppJ8J8Z#4 (.IID].%].@;#;| ) K\ HTD)*%&9:  hGhGV S((  B1B1J&3&3r