K iWddlmZddlZddlmZmZmZmZmZm Z m Z ddl Z ddl m Z ddlmZddlmZed Zed ZGd d eZGd de eZy)) annotationsN)ListOptionalSequenceTupleTypeVarGenericType)driver)ir)coreTTensorTyceZdZfdZxZS)IncompatibleTypeErrorImplc||_||_d|jjzdz|jjz|_tt ||jy)Nzinvalid operands of type  and )type_atype_b__repr__messagesuperr__init__)selfrr __class__s ^/mnt/ssd/data/python-lab/Trading/venv/lib/python3.12/site-packages/triton/language/semantic.pyrz"IncompatibleTypeErrorImpl.__init__sX  2T[[5I5I5KKgUX\XcXcXlXlXnn  '7 E)__name__ __module__ __qualname__r __classcell__)rs@rrrsFFrrcfeZdZUejZded<eZded<dZd|dZd|dZ d}dZ d~d Z ddd Z dd Z d dd Zdd Z ddZ ddZ ddZddZddZddZddZddZddZddZddZddZddZddZddZddZddZ ddZ!dd Z"dd!Z#dd"Z$dd#Z%dd$Z&dd%Z'dd&Z(dd'Z)dd(Z*dd)Z+dd*Z,dd+Z-d,d-dd.Z.dd/Z/dd0Z0dd1Z1dd2Z2dd3Z3dd4Z4dd5Z5dd6Z6dd7Z7dd8Z8dd9Z9dd:Z:dd;Z;dd<ZZ>d?Z?d@Z@dAZAdBZBdCZCdDZDdEZEdFZFdGZG ddHZH ddIZIddJZJddKZKddLZLdMZMdNZNddOZOddPZPddQZQddRZRddSZSddTZTddUZUdVZVdWZW ddXZXddYZY ddZZZdd[Z[dd\Z\dd]Z]dd^Z^dd_Z_dd`Z`ddaZaddbZbdcZc dddZdddeZeddfZf ddgZgddhZhdiZiddjZj ddkZkddlZldmZm ddnZnddoZoddpZpddqZqddrZrddsZsddtZtdduZuddvZvdwZwddxZxddyZyddzZz d dd{Z{y,)TritonSemanticzType[TensorTy]tensorz ir.builderbuilderc||_yN)r')rr's rrzTritonSemantic.__init__s  rc|dvrtd||j|jj|tj S)Nrrr z+program_id axis must be 0, 1, or 2 but got ) ValueErrorr&r'create_get_program_idtlint32raxiss r program_idzTritonSemantic.program_id&sA y J4&QR R{{4<<==dCRXXNNrc|dvrtd||j|jj|tj S)Nr+z-num_programs axis must be 0, 1, or 2 but got )r,r&r'create_get_num_programsr.r/r0s r num_programszTritonSemantic.num_programs+sA y LTFST T{{4<<??ErxxPPrc`|j}|j}|j}|j}||k(r ||kDr|S|S|tjjj k(r ||k\r|S|S|tjjj k(r ||k\r|S|St d|d|)Nzunexpected signedness r) int_bitwidthint_signednessr.dtype SIGNEDNESSUNSIGNED TypeError)ra_tyb_tya_rankb_ranka_snb_sns rinteger_promote_implz#TritonSemantic.integer_promote_impl4s"""""""" 4<!F?4 4 4 RXX((11 1!V+4 5 5 RXX((11 1!V+4 5 50eD6BCCrc||k7rx|r||fn||f\}}|jj|jjkr6|r2|tjtjfvrtj S|S|j s|j rtjS|js|jrtj S|js|jr"|rtj StjS|jr2|jr"|rtj StjS|js|jrtj S|jr'|jr||k(r|StjS|jr|jstd|d||rL|j|jk7r3td|jzdz|jzdz|j!||S)Nunexpected type rzCannot use /, #, or % with x because they have different signedness;this is unlikely to result in a useful answer. Cast them to the same signedness.)kindvaluer.float16bfloat16float32is_fp64float64is_fp32is_fp16is_bf16is_fp8is_intr<r8rrC)rr= a_is_scalarr> b_is_scalar div_or_mod scalar_ty tensor_tys rcomputation_type_implz$TritonSemantic.computation_type_implCs + %3>D$T\\^::  <<>T\\^::  <<>T\\^zz!zz! <<>dllnzz!{{" <<>T\\^::  ;;=T[[]4<4 7RZZ 7{{}DKKM.tfE$@A A $--1D1DD9DMMOKgUX\XeXeXggoop p((t44rct|tr9|j|jj |t j St|trd|cxkrdkrnnt j}nld|cxkrdkrnnt j}nMd|cxkrdkrnnt j}n.d|cxkrdkrnnt j}ntd|d|j|| St|trrd }d d d zz}td|}|tdk(s|dk(s||k7s||cxkr|krnnt j }nt j"}|j|| St|t j$r|j'|j(St||jr|S|rt+d|dt-|d|S)NllzNonrepresentable integer .r9g8g?r absinfgzcannot convert z of type z to tensor) isinstanceboolr&r'get_int1r.int1intr/uint32int64uint64r,scalar_constantfloat __builtins__rKrM constexpr to_tensorrHr<type)rx check_typer9 min_float32 max_float32abs_xs rrozTritonSemantic.to_tensorus a ;;t||44Q7A A 3 "U"!#e# 1$u$!#e#  #>!''* * 4;; 'H oaS $q'*MN Nrc|jrL|s t|||jr||k7r t|||jr t||yyr))is_ptrr is_floating)rrr allow_ptr_as rcheck_ptr_type_implz"TritonSemantic.check_ptr_type_impls[ ==?/??}}Ff$4/??!!#/??$ rc&t|tj}t|tj}|r|} |j|}|r|} |j|}|jj } |jj } |j | | ||j | | ||rF| js5| js$|j| || ||} |r dkr| js|r  dkr| jr td| jrx|r:| j cxkr| jksntd| d| |r:| j cxkr| jksntd| d| |r|j | n|j|| }|r|j | n|j|| }|j!||\}}||fS)Nrz{Cannot perform a binary operation between an unsigned tensor and a negative scalar. Perform a explicit cast on one of them.zScalar z is out of range for type r_)rcnumbersNumberrorpscalarrzrwrXis_int_unsignedr,rRget_int_min_valueget_int_max_valuerkcastbroadcast_impl_value)rlhsrhs allow_lhs_ptr allow_rhs_ptrarithmetic_checkrU lhs_is_scalar rhs_is_scalar lhs_scalar rhs_scalar lhs_sca_ty rhs_sca_ty ret_sca_tys rbinary_op_type_checking_implz+TritonSemantic.binary_op_type_checking_impls#37 "37 J..%C J..%CXX__ XX__    ZG   ZG J$5$5$7 @Q@Q@S33J z[hjtuJ*q.Z5O5O5Q$aJ(>(@:; ;  ! ! #O,B,B,D %5E#jj//O#jj//O  ! ! # <(>(@..PFIIeV,EIIeV,E##%{{4<<#;#;ELL%,,#WY^YcYcdd{{4<<#;#;ELL%,,#WY^YcYcdd*?*;<==rcz|jj}|jj}|jr|js td|j ||dddd\}}|j j |j|j}|j||jS)Nz4both operands of fdiv must have floating scalar typeFT) rpr~rxr<rr'rrr&)rrr ieee_roundingrrrs rfdivzTritonSemantic.fdivEs**++**++**,O4O4O4QRS S88ueUZ\`a ull&&u||U\\B{{3 ++rc\|j||dddd\}}|jj}|jj}|jrJ|j |j j |j|j|jS|jr|j|jk7r3td|jzdz|jzdz|jrJ|j |j j|j|j|jS|j |j j|j|j|jStd|)NFTz Cannot mod z by rFrE)rrpr~rxr&r' create_fremrrRr8r<rr create_srem create_urem)rrrrVrs rmodzTritonSemantic.modNs`88ueUY[_` uJJ%% **++  ";;t||77 ellSUZU_U_` `    ''?+I+II 0B0B0D Dv MP_PhPhPj jns!stt&&({{4<<#;#;ELL%,,#WY^YcYcdd{{4<<#;#;ELL%,,#WY^YcYcdd*9+677rc|j||\}}|j}|jr|tjj k(rJ|j |jj|j|j|jS|tjjk(rJ|j |jj|j|j|jStd||jrJ|j |jj|j|j|jS|j!rJ|j |jj#|j|j|jSt%d|NzUnexpected propagate_nan Unexpected dtype )rr9rxr. PropagateNanALLr&r'create_minimumfrrpNONEcreate_minnumfr,r create_minsir create_minuir<rrqy propagate_nanr9s rminimumzTritonSemantic.minimumeM00A61     3 33{{4<<#?#?!((#SUVU[U[\\"//"6"66{{4<<#>#>qxx#RTUTZTZ[[ #<]O!LMM  ";;t||88188LaffU U  " " $;;t||88188LaffU U/w78 8rc|j||\}}|j}|jr|tjj k(rJ|j |jj|j|j|jS|tjjk(rJ|j |jj|j|j|jStd||jrJ|j |jj|j|j|jS|j!rJ|j |jj#|j|j|jSt%d|r)rr9rxr.rrr&r'create_maximumfrrprcreate_maxnumfr,r create_maxsir create_maxuir<rs rmaximumzTritonSemantic.maximumvrrc|j||\}}|j||\}}|j||\}}|j}|jrV|j|jj |j |j |j ||jStd|d)Nrz(. Only floating point clamp is supported) rr9rxr&r' create_clampfrrpr<)rrqminmaxrr9s rclampzTritonSemantic.clamps44S#>S221c:3221c:3    ;;t||99!((CJJPSPZPZ\ijlmlrlrs s/w6^_` `rcd|j||\}}|jj}|jj}|jr|js t |||j ||}||k7r|j ||}||k7r|j ||}||fSr))rrpr~rRrrCr)rrr input_sca_ty other_sca_tyrs rbitwise_op_type_checking_implz,TritonSemantic.bitwise_op_type_checking_impls88F uzz(( zz(( ""$L,?,?,A+L,G G..|\J  %IIeZ0E  %IIeZ0Ee|rc|j||\}}|j|jj|j|j|j Sr))rr&r' create_andrrprrrs rrzTritonSemantic.and_I99%G u{{4<<225<<NPUPZPZ[[rc|j||\}}|j|jj|j|j|j Sr))rr&r' create_orrrprs ror_zTritonSemantic.or_sF99%G u{{4<<11%,, MuzzZZrc|j||\}}|j|jj|j|j|j Sr))rr&r' create_xorrrprs rxor_zTritonSemantic.xor_rrc|jjs |j|tj}|jjs |j|tj}|j ||Sr))rpis_int1bitcastr.rfrrs r logical_andzTritonSemantic.logical_ands[zz!!#LL0Ezz!!#LL0Eyy&&rc|jjs |j|tj}|jjs |j|tj}|j ||Sr))rprrr.rfrrs r logical_orzTritonSemantic.logical_ors[zz!!#LL0Ezz!!#LL0Exxu%%rc|jjs |j|tj}|j |Sr))rprrr.rfinvertrrs rnot_zTritonSemantic.not_s5zz!!#LL0E{{5!!rc|j||\}}|j|jj|j|j|j Sr))rr&r' create_lshrrrprs rlshrzTritonSemantic.lshrI99%G u{{4<<33ELL%,,OQVQ[Q[\\rc|j||\}}|j|jj|j|j|j Sr))rr&r' create_ashrrrprs rashrzTritonSemantic.ashrrrc|j||\}}|j|jj|j|j|j Sr))rr&r' create_shlrrprs rshlzTritonSemantic.shlrrc|Sr)rs rpluszTritonSemantic.pluss rc:|jj}|jrtd|j zdz|j |j j|j|j |}|j||dS)Nz$wrong type argument to unary minus ()T) rpr~rwr,rr&r'get_null_valuerr)rrr_0s rrzTritonSemantic.minusszz((    ClF[F[F]]`ccd d [[44\5G5G 5UVXd exxE4((rcX|jj}|js|jrt d|j zdz|j |jj|j|j|}|j||S)Nz%wrong type argument to unary invert (r ) rpr~rwrxr,rr&r'get_all_ones_valuerr)rrr_1s rrzTritonSemantic.invertszz((    L$<$<$>D|G\G\G^^adde e [[889K9KDLL9YZ\h iyy##rcT|jjtjSr))rprr.rf)rvs r _bool_likezTritonSemantic._bool_likesvv%%bgg..rc|j||\}}|jj}|jrO|j |j j |j|j|j|S|jr|jrO|j |j j|j|j|j|S|j |j j|j|j|j|Std|r)rrpr~rxr&r'create_fcmpOGTrrrRrcreate_icmpSGTcreate_icmpUGTr<rrrrVs r greater_thanzTritonSemantic.greater_than88F uJJ%%  ";;t||::5<<VX\XgXghmXno o    &&({{4<<#>#>u||U\\#Z\`\k\klq\rss{{4<<#>#>u||U\\#Z\`\k\klq\rss*9+677rc|j||\}}|jj}|jrO|j |j j |j|j|j|S|jr|jrO|j |j j|j|j|j|S|j |j j|j|j|j|Std|r)rrpr~rxr&r'create_fcmpOGErrrRrcreate_icmpSGEcreate_icmpUGEr<rs rrzTritonSemantic.greater_equalrrc|j||\}}|jj}|jrO|j |j j |j|j|j|S|jr|jrO|j |j j|j|j|j|S|j |j j|j|j|j|Std|r)rrpr~rxr&r'create_fcmpOLTrrrRrcreate_icmpSLTcreate_icmpULTr<rs r less_thanzTritonSemantic.less_thanrrc|j||\}}|jj}|jrO|j |j j |j|j|j|S|jr|jrO|j |j j|j|j|j|S|j |j j|j|j|j|Std|r)rrpr~rxr&r'create_fcmpOLErrrRrcreate_icmpSLEcreate_icmpULEr<rs rrzTritonSemantic.less_equalrrc|j||\}}|jj}|jrO|j |j j |j|j|j|S|jrO|j |j j|j|j|j|Std|r) rrpr~rxr&r'create_fcmpOEQrrrR create_icmpEQr<rs requalzTritonSemantic.equal"88F uJJ%%  ";;t||::5<<VX\XgXghmXno o    ;;t||99%,, UW[WfWfglWmn n*9+677rc|j||\}}|jj}|jrO|j |j j |j|j|j|S|jrO|j |j j|j|j|j|Std|r) rrpr~rxr&r'create_fcmpUNErrrR create_icmpNEr<rs r not_equalzTritonSemantic.not_equal-r.rN)rct|trt|ts tdt|dz }t|dz }|s|r td||kr td||z }||dz zdk7r td|g}|$t j tj |}|j|j}|j|jj||||S)Nz/arange's arguments must be of type tl.constexpr zarange must fit in int32z=arange's end argument must be greater than the start argumentrrz#arange's range must be a power of 2) rcrgr,rdr. block_typer/rr'r&create_make_range) rstartendris_start_int64 is_end_int64rangeshape ret_ty_irs rarangezTritonSemantic.arange<s%%ZS-ANO Oerk*C2I \78 8 %<\] ]e  UQY A %BC C >]]288U3FLL. {{4<<99)UCPRXYYrc | td|dk(r5|jj|j|j}n+t |jd|j }||}|j ||S)Nz2dtype must be specified when value is not a tensorrget_)r,r'rrgetattrnamer&)rrHr9 get_value_fns rrkzTritonSemantic.scalar_constantNsq =QR R A:LL// DLL0IJE"4<<4 |1DEL 'E{{5%((rct|tjr2|jjdk(sJd|j ||S|j ||S)Nrzonly accepts size-1 tensor)rcr.r&numelrHrrk)rrHr9s r make_scalarzTritonSemantic.make_scalarYsR eRYY ';;$$) G+G G)99UE* *##E511rcF|j|j|||Sr))splatrF)rr<rHr9s rfullzTritonSemantic.full`s zz$**5%8%@@rcB|jjrJdt|dk(r|Stj|j |}|j |jj|j|j|j|S)NzCannot splat a block tensorr) rpis_blocklenr.r5r9r&r' create_splatrr)rrHr<rs rrHzTritonSemantic.splatgsy::&&(G*GG( u:?Lu{{E2{{4<<44V\\$,,5OQVQ]Q]^`fggrc|j|jj|j|jSr))r&r'create_unsplatrr9)rrHs runsplatzTritonSemantic.unsplatns*{{4<<66u||DekkRRrc(d}|D]}||z} |jj|k7r tdtj|jj |}|j |jj|j|||S)Nrz:reshape() cannot change total number of elements in tensor) rprEr,r.r5r~r&r'create_reshaper)rr dst_shape can_reorderrEsrs rreshapezTritonSemantic.reshapeqs A QJE  ::  u $YZ Zuzz00)<{{4<<66u||YP[\^deerc|jDcgc]}tj|}}|j|d|jj s|j ||Stj|jj|}|j|jj|j||Scc}w)Nr)r<) r<r._unwrap_if_constexprinsertrprKrHr5r~r&r'create_expand_dimsr)rrr1rqrSrs r expand_dimszTritonSemantic.expand_dimszs9>EAR,,Q/E Eq!zz""$::e9:5 5uzz00)<{{4<<::5<<NPVWWFsC cX|sJdt|jdk(sJtj|jj |jd|jdzg}|j |jj|j|j|S)Nz;current implementation of `cat` always may reorder elementsrr) rLr<r.r5rpr~r&r' create_catr)rrrrTret_types rcatzTritonSemantic.catsYYY{399~"""==399Q<#))A,3N2OP{{4<<223::szzJHUUrc:|j||\}}|jgk(}|r$|j|d}|j|d}t|jdtj rt j d}nd}|j|gz}t j |jj|}|j|jj|j|j|}|r|j|dgd}|S)Nrr FrT)rr<r[rcr.rnr5rpr~r&r' create_joinrrV)rab was_rank_1two new_shaper^rs rjoinzTritonSemantic.joins((A.1WW]   A&A  A&A aggbk2<< 0,,q/CCGGseO == :kk$,,22188QXXFQ ,,sQCU,;C rct|jdkDsJtj|jddk(sJ|jdd}tj|j j |}|jj|j\}}|j|||j||fS)Nrrar ) rLr<r.rXr5rpr~r' create_splitrr&)rrdrhr^outLHSoutRHSs rsplitzTritonSemantic.splitsAGG q ! '' 49:9GGCRL == :22188< KK ) KK )  rct|jt|k7r tdtd|Dt t t|k7rtd|t j|jj|Dcgc]}|j|c}}|j|jj|j||Scc}w)Nz5permute dims must have the same length as input shapec3FK|]}tj|ywr))r.rX).0ds r z)TritonSemantic.permute..s;"))!,;s!z?permute dims must be a permutation of 0, 1, ..., n-1, but were )rLr<r,sortedlistr;r.r5rpr~r&r' create_transr)rrdimsrrr^s rpermutezTritonSemantic.permutes u{{ s4y (TU U ;d; ;tE#d)DT?U U^_c^def f==!2!2T4RU[[^4RS{{4<<44U\\4H(SS5SsC& c (|jjs|j||S|jj}t |t |k7rt d|d|||k(r|St |D]0\}}|||k7s|dk7st d||d|d|d|d| tj|jj|}|j|jj|j||S)Nz!Cannot broadcast, rank mismatch: z, rz3Cannot broadcast, the expanded size of the tensor (z ) must match the existing size (z) at non-singleton dimension : )rprKrHget_block_shapesrLr, enumerater.r5r~r&r'create_broadcastr)rrr< src_shapeiitemrs rbroadcast_impl_shapez#TritonSemantic.broadcast_impl_shapeszz""$::eU+ +JJ//1 y>SZ '@ 2eWUV V I L + @GAtQx4DAI #VW\]^W_V`aCCG&I%%&Cr)Bug"?@@ @ uzz00%8{{4<<88uMvVVrc x|j}|j}|jr~|jsn|j|j}|j |j j |j|j |j|}||fS|js~|jrn|j|j}|j |j j |j|j |j|}||fS|jr|jr|j}|j}t|t|krtt|t|D]}|j |j j|jdtj|jdg|jz}|j}|j}nt|t|krtt|t|D]}|j |j j|jdtj|jdg|jz}|j}|j}t|t|k(sJg}t!|D]q\} } || } | dk(r|j#| "| dk(s| | k(r|j#| >t%dt'| zdzt'| zdzt'| z||k7rVtj|j|} |j |j j)|j|| }||k7rVtj|j|} |j |j j)|j|| }||fS)Nrrz?Cannot make_shape_compatible: incompatible dimensions at index rzr)rprKrr~r&r'rMrrr{rLr;rZr.r5valuesr|appendr,strr}) rrrlhs_tyrhs_ty lhs_shape rhs_shape_ ret_shaperleftrightrs rrz#TritonSemantic.broadcast_impl_values ?? V__%6++FMM:F++dll77 T\\8RTWT^T^_aghCVCxS"v'8++FMM:F++dll77 T\\8RTWT^T^_aghCNCxK__ 6??#4//1I//1I9~I.s9~s9~>:A++dll&E&EcjjRS&T&(mmFMMA3IYIYCY&Z\C XXF & 7 7 9I : Y#i.0s9~s9~>:A++dll&E&EcjjRS&T&(mmFMMA3IYIYCY&Z\C XXF & 7 7 9I : y>S^3 33I$Y/ e4!! 19$$U+qjetm$$T*$&136q6&:<@&ACFt9&MOV&WY\]bYc&dee eI%v}}i@kk$,,"?"? I"VX^_I%v}}i@kk$,,"?"? I"VX^_Cxrc|y|dk(rtjjS|dk(rtjjSt d|d)NrtnertzzInvalid rounding mode: z0. Supported rounding modes are 'rtne' and 'rtz'.)r ROUNDING_MODERTNERTZr,)r rounding_modes r_str_to_rounding_modez$TritonSemantic._str_to_rounding_modesU   F "##(( ( E !##'' '2=/Aqrssrc6|j}|jr|j|j}||k(r|S|j}|j}|j s|j r|j ||S|j }|j }||k7r&tdt|zdzt|z|j|jj|j|j|j|S)Nz!Cannot bitcast data-type of size z to data-type of size )rprKrr~rwrprimitive_bitwidthr,rr&r'create_bitcastrr)rrdst_tysrc_ty src_sca_ty dst_sca_tysrc_bitsdst_bitss rrzTritonSemantic.bitcasts ?? ++FMM:F V L]] ]]    *"3"3"599UF+ +0000 x @3x=PT2247MBC C{{4<<66u||V\\RVR^R^E_`bhiirc|j}|j}|j}||k(r|S|jr|j|}|j |}d}|j rf|j rV|j |j kr=|tjj}nH|tjjk7r+d}n(|&tdt|zdzt|z|js|jrP|jjjd Jd|jjd||||S|j!r|j s"|j r|j!s|rP|j#|jj%|j&|j)|j||S|j+r|j-r |j/r@|j-s0|j1|j1|t2j4|S|j xr+|j xr|j |j kD}|rO|j#|jj7|j&|j)|j|S|j xr+|j xr|j |j k} | rO|j#|jj9|j&|j)|j|S|j;r2|j;r!|j<|j<k7s|j>|j>k7r|jAxr|jC } |jCrl|jDj)|j} |j#|jjG| |jD} |jI|| S|j#|jjK|j&|j)|j| |S|jMr;|j;r*|jCrl|jDj)|j} |j#|jjG| |jD} |jI|| S|jArO|j#|jjO|j&|j)|j|S|j#|jjQ|j&|j)|j|S|j;r|jMr|jCs|jAsO|j#|jjS|j&|j)|j|S|j#|jjU|j&|j)|j|S|jWr|j;r|j<} | dk(rO|j#|jjY|j&|j)|j|S| d k(rg|jI|j1|t2jZ|j#|jj]d t2jZS|j;r_|jWrO|j#|jj_|j&|j)|j|S|jWr_|jWrO|j#|jja|j&|j)|j|SJd |d |) NFTz]fp_downcast_rounding should be set only for truncating fp conversions. Source scalar type is z and destination type is convert_custom_typesz0target doesn't provide conversion for this type.) _semanticrrrz cannot cast z to )1rpr~rKrrrxrr rrr,r is_fp8e4b15r' codegen_fnsgetrQr&create_fp_to_fprrrOrNrPrr.rKcreate_fp_trunc create_fp_extrRr7r8ris_boolr9rr2ris_standard_floatingcreate_fp_to_sicreate_fp_to_uicreate_ui_to_fpcreate_si_to_fprwcreate_ptr_to_intri get_int64create_int_to_ptrr)rrrfp_downcast_roundingrrruse_custom_rounding truncate_fpext_fp sign_extendtyrbitwidths rrzTritonSemantic.casts]] ]]  #L ?? ++J7F $99:NO#  ! ! # (>(>) ++j.K.KK#+BDTDTDYDY-A%)9)9)>)>>VZ@S#/ ":(>(@<<++//&(/34 h5g h4C4<<++,BCE6Sgswx x    J$:$:$<  " " $):):)< ;; ,,U\\6<< ;UWklntv v    ););)=    ););)=99TYYubjj9:F F !,,.J  " " $J  ) )J,I,I I  ;;t||;;ELL&,,W[WcWcJdegmn n '')J  " " $J  ) )J,I,I I  ;;t||99%,, UYUaUaHbcekl l    :#4#4#6  # #z'>'> >*B[B[_i_x_xBx$224QZ=O=O=Q9QK!!#[[&&t||4[[!2B.!QRR rcdtjj}|r|dk(rtjj}|S|dk(rtjj}|S|dk(rtjj }|S|dk(rtjj }|Std|d|S)Nz.wbrz.csz.wtrr)r rrWBrCSWTr,rs r_str_to_store_cache_modifierz+TritonSemantic._str_to_store_cache_modifiers!!&& &)),,  5()),,  5()),,   5()),, !?>2B.!QRR rctjj}|rQ|dk(rtjj}|S|dk(rtjj}|St d|d|S)N evict_last evict_firstzEviction policy r)r EVICTION_POLICYNORMAL EVICT_LAST EVICT_FIRSTr,)reviction_policyevictions r_str_to_eviction_policyz&TritonSemantic._str_to_eviction_policysu%%,, ,.--88  !M1--99!#3O3DN!STTrcd}|rQ|dk(rtjj}|S|dk(rtjj}|St d|d|S)NzeronanzPadding option r)r PADDING_OPTIONPAD_ZEROPAD_NANr,)rpadding_optionpaddings r_str_to_padding_optionz%TritonSemantic._str_to_padding_optionsh '++44   5(++33!?>2B.!QRRrcdtjj}|r|dk(rtjj}|S|dk(rtjj}|S|dk(rtjj}|S|dk(rtjj }|St d|d|S)Nacquirereleaseacq_relrelaxedMemory semantic r)r MEM_SEMANTICACQUIRE_RELEASEACQUIRERELEASERELAXEDr,)r sem_optionsems r _str_to_semzTritonSemantic._str_to_semsoo-- Y&oo-- y(oo-- y(oo55  y(oo-- !#3J<~!NOO rc"tjj}|rr|dk(rtjj}|S|dk(rtjj}|S|dk(rtjj}|St d|d|S)Ngpuctasysrr)r MEM_SYNC_SCOPEGPUCTASYSTEMr,)r scope_optionscopes r _str_to_scopezTritonSemantic._str_to_scopes!!%% u$))-- &))--  &))00 !#3L>!PQQ rc~|rt|ds|g}|Dcgc]*}t|tjr |jn|,}}|D]+}t|t rd|cxkrt |kr(JJt |dkDsJt |t t|k(sJdt|Sycc}w)N__iter__rz'Duplicate dimension in `boundary_check`r ) hasattrrcr.rnrHrgrLsetrt)rboundary_check block_shapeelemdims r_canonicalize_boundary_checkz+TritonSemantic._canonicalize_boundary_checks >:6"0!1aopY]JtR\\,JdjjPTTpNp% L!#s+S0K3{;K0KKK0KKK L~&* **~&#c..A*BB mDm mB.) ) qs/B:c || td|jjj} | tjk7sJd| j r(|t jjk(r td|jj} |j|| j}|j|jj|j|||||| S)NK`mask` and `other` arguments cannot be specified for loading block pointers4`tl.int1` should be rewritten in `tl.make_block_ptr`z@Padding option `nan` is not supported for integer block pointers)r,rp element_tyr.rfrRr rrrr{r&r'create_tensor_pointer_loadr) rptrmaskrrrrr is_volatileelt_tyrs r_load_block_pointerz"TritonSemantic._load_block_pointers  u0jk k$$// X"XX ==?w"*;*;*C*CC_` `$$::>6KbKbKde{{ LL 3 3CJJPWY^`hju v  rc  |jjjs'td|jj d| | td|s|r td|jj sN|r%|jj r td|r%|jj r td|jj r.||j ||\}}||j ||\}}|jj} | j} | tjk(} | rBtj} tj| | j} |j|| }||j|| }|jj r|jj| } n| } |9|j|j j#|j$|||| } nR|j|j j'|j$|j$|r |j$nd|||| } | r |j| tj} | S)NUnsupported ptr type z in `tl.load`z)`other` cannot be provided without `mask`z`padding_option` or `boundary_check` argument is not supported for loading a tensor ofpointers or loading a scalar. Because the compiler does not know the boundary; please use block pointers (defined by `make_block_ptr`) insteadEMask argument cannot be block type if pointer argument is not a blockzFOther argument cannot be block type if pointer argument is not a block)rpr~rwr,rrKrrr.rfint8 pointer_type address_spacerrr&r' create_loadrcreate_masked_load)rrrrrrrrrptr_tyrrrrs r _load_legacyzTritonSemantic._load_legacys9xx%%'4SXX5F5F5H4IWX X V[[B->?rcB|tjtjtjtjtj tj hvsJd|tj tj hvr|jsJdyy)Nr)z-16-bit float types require native tma support)r.rhr/rjrirIrJr7)rr9s r$_descriptor_atomic_min_max_supportedz3TritonSemantic._descriptor_atomic_min_max_supportedfsmBHHbii2::r{{[[p]pp[ RZZ- -'') Z+Z Z) .rcX|j||||j|j|j|d}tj j }|j|jj||j|j|tjSr#) r!r9r9rr r*MINr&r'r,rr.r%r-s rdescriptor_atomic_minz$TritonSemantic.descriptor_atomic_mink   ug6 11$**=,,W%,H((,,{{4<<@@t{{TYT`T`bijlnlslsttrcX|j||||j|j|j|d}tj j }|j|jj||j|j|tjSr#) r!r9r9rr r*MAXr&r'r,rr.r%r-s rdescriptor_atomic_maxz$TritonSemantic.descriptor_atomic_maxrr=rc|j||||jtjtjtj tj hvsJd|j|d}tjj}|j|jj||j|j|tjSr()r!r9r.rhr/rjrirr r*ANDr&r'r,rr%r-s rdescriptor_atomic_andz$TritonSemantic.descriptor_atomic_andy   ug6zzbii299bhhGG\I\\G,,W%,H((,,{{4<<@@t{{TYT`T`bijlnlslsttrc|j||||jtjtjtj tj hvsJd|j|d}tjj}|j|jj||j|j|tjSr()r!r9r.rhr/rjrirr r*ORr&r'r,rr%r-s rdescriptor_atomic_orz#TritonSemantic.descriptor_atomic_ors   ug6zzbii299bhhGG\I\\G,,W%,H((++{{4<<@@t{{TYT`T`bijlnlslsttrc|j||||jtjtjtj tj hvsJd|j|d}tjj}|j|jj||j|j|tjSr()r!r9r.rhr/rjrirr r*XORr&r'r,rr%r-s rdescriptor_atomic_xorz$TritonSemantic.descriptor_atomic_xorrDrct|tjsJ|dk(sJd|dk(sJdt|jdk(sJd|j|jddk(sJd|jt|j dk(sJd |j |j dd k\sJd |j |j }d |jzd z}|jd|k\sJd |d|d|jdtj|j |j d|jdg}|j|fdd}|jj|j|j||j|j} |j| |S)Nz#cache modifier is not supported yetz$eviction policy is not supported yetr descriptor must be 2D, but got rr*descriptor block must have 1 row, but got x offsets must be 1D, but got z5descriptor gather must have at least 8 rows, but got r4zdescriptor gather of  must have at least  columns, but got Fr)rcr.rrLrr<r9rr5rr'create_descriptor_gatherrrr&) rr x_offsetsy_offsetrrr9min_colsrprqs rdescriptor_gatherz TritonSemantic.descriptor_gathers$ 9 9:::#J%JJ#"$L&LL$4##$)_-LTM]M]L^+__)"a'h+UVZVfVfUg)hh'9??#q(\,J9??J[*\\(q!Q&q*_`i`o`o_p(qq& 111A5  A3E7:NxjXjkok{k{|}k~jA A}}TZZ)//!*N>Nq>Q)RS--xl-NqQ LL 1 1$++y?O?OQY[_[e[efjfrfr[s t{{1d##rct|tjsJt|jdk(sJd|j|jddk(sJd|jt|j dk(sJd|j |j ddk\sJd|j |j}d |jzdz}|jd|k\sJd |d |d |jd|j|fd d}|jj|j|j|j||jdtjS)Nr rMrrrNrOrPz6descriptor scatter must have at least 8 rows, but got r4zdescriptor scatter of rQrRFr)rcr.rrLrr<shapaer9rrr'create_descriptor_scatterrr&r%)rrrHrTrUr9rVs rdescriptor_scatterz!TritonSemantic.descriptor_scatters$ 9 9:::4##$)_-LTM]M]L^+__)"a'h+UVZVfVfUg)hh'9??#q(],J9K[K[J\*]](q!Q&r*`ajapap`q(rr& 111A5  B4UG;OPXzYklpl|l|}~llAB B--xl-NqQ ..t{{ELL)JZJZ\de{{4))rc | td|jjj}|jj s|j ||}|jj sJd||jjk(s&Jd|d|jjd|jjj|jjk(s@Jd|jjjd|jjd|jjj}|t jk7sJd|j||}|j||}|j|jj|j|j|||t jS) Nrz-Value argument must be block type or a scalarz Block shape(z) and value shape(z ) mismatchzBlock element type(z) and value element type(r)r,rprr{rKrr.rfrrr&r'create_tensor_pointer_storerr%) rrvalrrrrrrs r_store_block_pointerz#TritonSemantic._store_block_pointers  jk khh))::< xx  "++C=Cxx  "S$SS"chh77   a +&89R9R9T8UU_ ` a xx""--1D1DD uH[\_\d\d\o\o\z\z[{|UVYV^V^ViViUjjtGu uD$$// X"XX ::>;WiiV${{ LL 4 4SZZ^]bdl moqovovx xrc (|jjjs'td|jj d|r td|jj sL|jj r td|r%|jj r td|jj rV|j ||jj}|*|j ||jj}|jj}|j}|tjk(rBtj}tj||j}|j||}|j||}|P|j|j j#|j$|j$||tj&S|jjj)s td|j|j j+|j$|j$|j$||tj&S)Nr z in `tl.store`z`boundary_check` argument is not supported for storing a tensor of pointers or storing a scalar. Because the compiler does not know the boundary; please use block pointers (defined by `make_block_ptr`) insteadzFValue argument cannot be block type if pointer argument is not a blockr "Mask must have boolean scalar type)rpr~rwr,rrKrr{rr.rfr r r rr&r' create_storerr%rcreate_masked_store) rrr^rrrrrrs r _store_legacyzTritonSemantic._store_legacysxx%%'4SXX5F5F5H4IXY Y EF F xx  "xx  " !ijj **, !hii 88   ++C1J1J1LMC00sxx7P7P7RS"" RWW WWF__VV-A-ABF))C(CiiV$ <;;t||88SZZQVX`acecjcjk kyy'')AB B{{4<<;;CJJ TXT_T_afhpq77$ $rc|j|}|j|}|jjs$|jjjr t d|jj r:|jjjr|j||||||S|j||||||S)N"Cannot store to a constant pointer) rrrpis_constr~r,rwrrKr_rd) rrr^rrrrrrs rstorezTritonSemantic.stores11.A//@ 88   #((//":":"<AB B 88?? !4!4!=!=!?,,S#t^UT\] ]%%c3neXV Vrc f|j|}|j|}|jjj}|j dvr t d|j|jj|j|j|j|||jS)N)r4rz9atomic_cas only supports elements with width {16, 32, 64}) rrrpr~rrr,r&r'create_atomic_casr)rrcmpr^rrrs r atomic_caszTritonSemantic.atomic_casss#""5)XX__//  ( ( <XY Y{{4<<99#**cjjRUR\R\^achiknksksttrc`|jjjs&td|jj z|jj s$|jj j r td|jjj }|tjur|dk7rtd|zdz|tjur|dk7rtd|zdz|tjtjfvs|jdkrtd|zdzt|z|jjrX|*|j||jj!}|*|j||jj!}|j#||jjj }||j$j'd }tj(}|jjr^|jj+tj(}|j$j-|j/|j$|}|j1||}|||fS) Nz)Pointer argument of store instruction is rfratomic_z does not support fp16z does not support bf16rjz does not support T)rpr~rwr,rrgrr.rIrJint16uint16rrrKrr{rr'rerfrrMrr&)rrr^roprmask_irmask_tys ratom_red_typechecking_implz)TritonSemantic.atom_red_typechecking_implsxx%%'H388K\K\K^^_ _ 88   #(("5"5">">"@AB BXX__//  #e Y^.FFG G  $uY^.FFG G "((BII. .*2O2ORT2TY^.BBS_TU U 88   00sxx7P7P7RS//SXX5N5N5PQiiSXX__778 <ll++D1GggGxx  "((22277;,,33GMM$,,4OQXY;;w0DC~rc|jj}tj|d}|j ||}|j ||dz }|j |tjS)NF)rsignedr)r9rr. get_int_dtyperrrrf)rrqridtypeixsignbits r_signbitzTritonSemantic._signbit;s\77--!!8EB \\!V $))B1 -yy"''**rc <|j|||d\}}}|j|}|j|}|jj}|j r|j rp|j|jjtjj|j|j|j|||jS|j|jjtjj|j|j|j|||jS|tj tj"hvrt%d||tj k(rtj&ntj(}|j+||}|j+|tj,|d} |tj k(rtj.ntj0} |j+|| } |j+|tj,| d} |j3|} |j5| }|j|jjtjj| j|j|j7||j|||j}|j|jjtjj8| j| j|j7|| j||| j}|j;|||}|j+||S)Nrz#atomic_max not supported for dtype r)rurrrpr~rRrr&r'create_atomic_rmwr ATOMIC_OPr?rUMAXr.rKrMr<r/rirr rhrjr|rrUMINwhererrr^rrrsca_tyi_typei_vali_ptrui_typeui_valui_ptrnegpospos_retneg_retrs r atomic_maxzTritonSemantic.atomic_maxB88c4OS$s#""5) ==?##%{{LL222<<3C3CSZZQTQ[Q[]a]h]hjmotuHH{{LL222<<3D3DcjjRUR\R\^b^i^iknpuvHH "**bjj1 1A&JK K#rzz1rxx S&) S"//&!"<=%3"))c7+c2??7A#>?mmC iin++ LL * *2<<+;+;U\\5<<+/99T3+?+F+FU TUZU_U_a++ LL * *2<<+<+*>*[*[[ y-dll.B.B._._-``fgvfw x y[)//1 h &&Or))?;;rc |jjr|jjsJ|jjr|jjrn2|jtj tj tjtjtjtjfvsJd|j|jtj tj tjtjtjtjfvsJd|j|j|jk(s!Jd|jd|j|jjs|jjrwd|jjjvrtj d|j#|tj}|j#|tj}|jj%xs|jj%}|jj'xs|jj'}|s|r|rdnd} | |jjjvr~|jjj(} tj | d | d | d |j#|tj}|j#|tj}| |jjj*}|j-|}t/|j0} t/|j0} | | cxk(rd k(s1n| | cxk(rd k(s$nJd|j0d|j0d|j0dj2|j0dj2k(sVJd|j0d|j0d|j0dj2d|j0dj2d |jj4j7d Jd|jj4d|j|j} |j0dj2| dk\r>|j0dj2| d k\r|j0dj2| dk\sJd| dd| dd| d |jj8j;r[|jj8tj k(sJd|jj=d}tj>}n'|jAr tCd|jj8jEs$|jj8jAr,|jjGd}tj}n|jj8jIr,|jjKd}tj}nH|jMr|jjOdn|jjGd}|}|jj0d}|jj0d}|jj0d}| d k(r|jj0dnd}t jP||r|||gn||g}|6|jjS|jU|j|}nJ|jV}|jj0|j0k(r|jjX|k(sJ|X|jjr;|jjr!|jjjZ}nNd}nK|jjr1|jjr||kDrtCd |d!|d|j]|jj_|jV|jV||||S)"NzUnsupported lhs dtype zUnsupported rhs dtype z&Both operands must be same dtype. Got rfp8e4b15zthe use of fp8e4b15 is deprecated on Hopper and later architectures and can cause significant slow down. It will be removed in a future triton releasefp8e4b8fp8e5b16z- is AMD gfx942 specific and not supported on z^ so it's upcasted to fp16 and can cause significant slow down. Please use OCP fp8 variants on z for performancer +Both inputs must be either 2D or 3D; (lhs: vs rhs: r razFirst input shape (z) and second input shape z= are not compatible for matmul (second index of first shape (z0) must be equal to first index of second shape ( min_dot_sizez2target doesn't provide lower shape bounds for dot.rrzInput shapes should have M >= z, N >= z and K >= zonly int8 supported!zhout_dtype=bfloat16 is unsupported. Please use out_dtype=float32/float16 and cast with `.to(tl.bfloat16)`zmax_num_imprecise_acc (z) must be <= K ()0rprKr9rQr.r uint8rIrJrKrMrr'r!deprecated_fp8_dot_operand_dtypeswarningswarnr is_fp8e4b8 is_fp8e5b16r5default_dot_input_precisionrrLr<rHrrr~rR get_int32r/rPr,rNget_fp32rLget_fp64rOget_fp16r5rMrrrmax_num_imprecise_acc_defaultr& create_dot)rrraccrmax_num_imprecise_acc out_dtype uses_fp8e4b8 uses_fp8e5b16 type_namer5lhs_rankrhs_rankrr ret_scalar_tyMNKBr acc_handles rdotzTritonSemantic.dotsxx  "sxx'8'8'::: 99   #))"2"2"4 99"((BJJ RZZ!#!-- S0Fsyyk.R S-99"((BJJ RZZ!#!-- S0Fsyyk.R S-99 ) o-STWT]T]S^^cdgdmdmcn+o o) 99 "cii&;&;&=T\\11SSS m))C,C))C,Cyy++-G1E1E1G  --/J3993H3H3J =%1 zIDLL00RRR||++00  k!NtfU66:V;KMNiiRZZ0iiRZZ0  ""ll22NNO::?Ksyy>syy>8(q(H,EA,E VItuxu~u~t@IJMJSJSITTUHV VEyy}""cii ' u,SYYK7PQTQZQZP[\YZ]ZcZcdfZgZmZmYnn^_b_h_hik_l_r_r^sstu u||''++ #'( ^)] ^(?t||//?#((S yy}""l1o5#))B-:M:MQ]^_Q`:` " ##|A6 v0a0AVWHYYcdpqrdsctu v7 88?? ! ! #88??bgg- E/E E-''*BHHM    z XX__ $ $ &#((//*A*A*C&&q)BJJM XX__ $ $ &&&q)BJJM-6->->-@&&q)dllF[F[\]F^B%M HHNN2  HHNN2  HHNN2 !)QCHHNN1 D}1q!Qi1a&I ;226<< 3MrRJJ88>>V\\1chh6I6IY6V VV ! (yy!cii&6&6&8(, (<(<(Z(Z%()%yy!cii&6&6&8=RUV=V #:;P:QQabcadde!fgg{{ LL # #CJJ JYn oqwy yrczttj|jd}|t d|d|S)NzInvalid float format: r^)rAr ScaleDotElemTypeTYrr,)r float_formatty_enums r_str_to_fp_typezTritonSemantic._str_to_fp_types>"//1C1C1EtL ?5l^1EF Frc<tjtjtjtjdj |}|B|dk(s Jd||j tjk(sJd|j |S|j |k(r|Stjtjtjtjd|}|j |k(sJd|d|j |j||S)z If float_format is subbyte, make sure it's packed as uint8 and return it. Otherwise, return a tensor (perhaps bitcasting) of the specified float format. )e5m2e4m3bf16fp16e2m1z)Internal Error: Unexpected float format: z)e2m1 format must be packed as uint8. Got zUnexpected dtype for r) r.float8e5 float8e4nvrJrIrr9rrqr)rr^r triton_ty unsigned_tys r_bitcast_to_fp_typez"TritonSemantic._bitcast_to_fp_type#s  [["--ZZ!!$\!2   6) e-VWcVd+e e)99( a,UVYV_V_U`*a a(J 99 !J#%88RXXryyZ\ZcZcdeqrK99 + d/D\NRXY\YbYbXc-d d+<<Y/ /rc |jjr|jjsJt|j} t|j} | | cxk(rdk(s1n| | cxk(rdk(s$nJd|jd|jd|j}|j}|j |}|j |}hd}||vs Jd|||vs Jd||duxs*t |tjxr|jdu}|duxs*t |tjxr|jdu}|j||}|j||}| s |d k(sJd | s |d k(sJd |jjd d\}}|jjd d\}}|d k(rdnd }|d k(rdnd }| r||zn|}| r||zn|}||k(s"Jd |jd|jd| dk(r|jjdnd}| s||z}| s||z}tj| |r|||gn||g}|jjd}|6|jj|j|j|}nJ|j}|jj|jk(r|jj | k(sJ|rdn |j}|rdn |j} |j#|jj%|j| ||j|||| | | |S)Nr rrrr >rrrrrzNYI: lhs_format zNYI: rhs_format rzBonly mxfp4 inputs can be packed along a dimension different than KrrzCReduction dimension should pack the same number of elements; (lhs: r)rprKrLr<rHrrcr.rnrr5r'rrMrrrr&create_dot_scaled)!rr lhs_scale lhs_formatr rhs_scale rhs_formatr fast_math lhs_k_pack rhs_k_packrrrlhs_format_enumrhs_format_enumallowed_formatsrhs_scale_is_nonelhs_scale_is_nonerK_LHSK_RHSrPACKED_APACKED_B PACKED_A_DIM PACKED_B_DIMrrrrrhs_scale_handlelhs_scale_handles! r dot_scaledzTritonSemantic.dot_scaled5sxx  "sxx'8'8':::syy>syy>8(q(H,EA,E VItuxu~u~t@IJMJSJSITTUHV VE$** $** ..z:..z:B_,M0@ .MM,_,M0@ .MM,%-r*Y 2U2qZcZiZimqZq%-r*Y 2U2qZcZiZimqZq&&sJ7&&sJ7Z61w3ww1Z61w3ww188>>"#&588>>"#&q"f,1!"f,1!+5x%'5 +5x%'5 |+ T/rsvs|s|r}~GHKHQHQGRRS.T T+!)QCHHNN1 DH AH Ayq1a)q!fE \\ " "1 % ;226<< 3MrRJJ88>>V\\1chh6I6IY6V VV#44):J:J#44):J:J{{ LL * *3::7GZ]ZdZdfv+:IzS]_i klrt trcP|jtjk7r"tjd|j|j |tj}|j ||dd\}}|jjr+|j||\}}|j||\}}n|j||\}}|j}|j|jj|j|j|j|S)Nzgtl.where with a non-boolean condition is deprecated and will error out in a future triton release. Got T)r9r.rfrrrrrprKrr&r' create_selectr)r conditionrqrrrs rrzTritonSemantic.whereis ??bgg % MMy{D{J{JzKL IIi1 00AtTB1 >> " " $44YBLIq,,Q2DAq44YBLIq{{4<<55i6F6FRSRZRZ[]cddrc\|rtj||}n|}|j||Sr))r.r5r&)rrqrVrres_tys r wrap_tensorzTritonSemantic.wrap_tensor}s- ]]9i8FF{{1f%%rc& |tfdDd}djj t }||ks Jd|dt Dcgc] \}}||k7s |c}} t fdDsJdj jDcgc]}|jc}||jsJt fdttDScc}}wcc}w)Nc3pK|]-}j||jjgd/yw)TrbN)rVrErH)rqtrs rrsz+TritonSemantic.reduction..s+^RS4<<AGGMM?<M^s36rz&reduction axis must be < inputs rank (r c3PK|]}|jjk(ywr))rpr<)rqrr<s rrsz+TritonSemantic.reduction..s9Q166<<5(9s#&z-all reduction inputs must have the same shapec3K|]=}jj||jj?ywr)r get_resultrpr~)rqrinputs reduce_oprrs rrsz+TritonSemantic.reduction..s@u\]D  Y11!4fQinn6K6KY WuAA) tuplerpr<rLr|allr' create_reducerverifyr;) rrr1region_builder_fnrankrrUrr rr<s `` @@@r reductionzTritonSemantic.reductions <^W]^^FDq $$5zd{LDTF!LL{#,U#3A41aqDyQA 9&99j;jj9LL..&/IQ/I4P )$!!!uafgjkqgrasuu uB0Js D.D*Dcdjjt}| |cxkr|ksnJd|d|d|dkr||z }D]"}|jjk(rJdjj Dcgc]}|j c}|||j sJtfdttDScc}w)Nrz scan axis z must be < inputs rank (r z(all scan inputs must have the same shapec3K|]=}jj||jj?ywr)r)rqrrscan_oprr<s rrsz2TritonSemantic.associative_scan..s;w_`T%%g&8&8&;VAY^^=R=RTYZwr ) rpr<rLr' create_scanrrr r;) rrr1rreverserrrr<s `` @@rassociative_scanzTritonSemantic.associative_scansq $$5zu#t#Wz$7OPTvUV%WW# !8 DLD UA66<<5( T*T T( U,,**f+EAHH+EtWU'"~~wdijmntjudvwww ,FsC.c|jjsJdt|jj}t|jj|k(sJd| |cxkr|ksnJd|d|d|dkr||z }t |D]F}||k(r |jj||jj|k(r=Jd|d|j j|j|j|}|j||jj|jjS) Nzindex must be an integer tensorz0source and index tensors must have the same rankz gather axis z must be < source rank (r rz index dim z( must match the corresponding source dim) r9rRrLrpr<r;r' create_gatherrrr~)rsrcindexr1rrrgathers rrzTritonSemantic.gathers1{{!!#F%FF#388>>"5::##$,`.``,u#t#Y|D69QRVQWWX%YY# !8 DLDt yADy::##A&#((..*;; xz$Ow=x x; y ++CJJ dK9I9IJJrc|sy|^}}tt|D]}|j|||\}||<tt|D]}|j|||\}||<|g|S)Nr )r;rLr)rrheadtailrs rbroadcast_tensorsz TritonSemantic.broadcast_tensorss ts4y! EA 55dDGDMD$q' Es4y! EA 55dDGDMD$q' E}t}rc j|}t|dkDsJd|Dcgc]*}|djj|j,}}j j |Dcgc]}|jc}|Dcgc]}|jj c}||tfdt|DScc}wcc}wcc}w)Nrz1map_elementwise must have at least 1 input tensorc3fK|](\}}jj||*ywr))r&r)rqrrelementwise_oprs rrsz1TritonSemantic.map_elementwise..s+huq"T[[!:!:1!=rBhs.1) r rLrprr~r'create_map_elementwiserrr r|)rr result_typespackrrrr#s` @rmap_elementwisezTritonSemantic.map_elementwises'''06{QS SSLXYbq 66ryyAY Y<<% &!QXX &.: ;RXXdll # ;   .)hPYZfPghhhZ & ;s/C5C#"C( ct|jdk(sJd|jjsJd|W|j ||j}|j j js td|j}|j|jj|j||tjtj|gS)Nrz histogram only supports 1D inputz%histogram only supports integer inputra)rLr<r9rRrrpr~rr,rr&r'create_histogramr.r5r/)rrnum_binsrs r histogramzTritonSemantic.histograms5;;1$H&HH${{!!#L%LL#  ,,T5;;?D99##++- !EFF;;D{{4<<88xQUV==H:>@ @rctdt|jt|k7r td|jj dt j||jj|S)NrzAShape of input to multiple_of does not match the length of valuesztt.divisibility) rrLr<r,rset_attrr make_attr get_contextrrqrs r multiple_ofzTritonSemantic.multiple_ofs[ q#agg, 3v; .`a a +R\\&!((BVBVBX-YZrct|jt|k7r td|jj dt j ||jj|S)NzDShape of input to max_contiguous does not match the length of valuesz tt.contiguityrLr<r,rr-r r.r/r0s rmax_contiguouszTritonSemantic.max_contiguoussS qww<3v; &cd d /2<<@T@T@V+WXrct|jt|k7r td|jj dt j ||jj|S)NzCShape of input to max_constancy does not match the length of valuesz tt.constancyr3r0s r max_constancyzTritonSemantic.max_constancysS qww<3v; &bc c .",,vqxx?S?S?U*VWrcr|j|jjtjSr))r&r'create_barrierr.r%)rs r debug_barrierzTritonSemantic.debug_barriers${{4<<668"''BBrc|jds|r|dz }|jds |r|dddz}t|dkDr|jdsd|z}|Dcgc]}|j}}|Dcgc]}|jj }}|j |jj||||tjScc}wcc}w)N rzrar ) endswithrL startswithrr9rr&r' create_printr.r%)rprefixargshexargnew_args is_signeds r device_printzTritonSemantic.device_printss# cMFt$CR[4'F v;?6#4#4S#96\F*./3CJJ//:>?3SYY,,.? ?{{4<<44VS(IVXZX_X_``0?s C5!Cc|jjjsy|!|j||j |}|j |jj |j|tjSr)) r'rdebugrrr& create_assertrr.r%)rrrrs rrzTritonSemantic.device_asserts`||##))   88D$))D/2D{{4<<55dkk3GQQrc|j|jj|jtj Sr))r&r' create_assumerr.r%)rrs rassumezTritonSemantic.assumes*{{4<<55dkkBBGGLLrcrt|trtj|}t|tjrt|jt r%|j j|jS|rQd|jcxkrdksnJd|jd|j j|jSd|jcxkrdksnJd|jd|j j|jSt|tjr|jjdk(sJd |jjsJd |jtjk7rY|rW|j j|j |j j#|jj%S|jtjk(r |sJd |j SJd t'|) Nr\r]z@Block pointers only support 64 bit `shape/strides`, got a value z which is out of the rangerZr[zFBlock pointers only support 32 bit `offsets/block_shape`, got a value rz*Expected a scalar in shape/strides/offsetsz8Expected an integer scalar type in shape/strides/offsetszzBlock pointers only support 32 bit `offsets/block_shape`, add a `.to(tl.int32)` or use regular indexing for 64 bit supportz3Unsupported element type in shape/strides/offsets: )rcrgr.rnrHrdr'rerrr&rEr9rRrirr get_int64_tyrrp)rrrs r_convert_elem_to_ir_valuez(TritonSemantic._convert_elem_to_ir_values dC <<%D dBLL )$**d+||,,TZZ883e3J8##'::,.H6JJ3||--djj993e3J8##'::,.H6JJ3||--djj99 bii (::##q( V*V V(::$$& b(b b&zzRXX%+||33DKKAZAZA\48JJ4L4L4NPPrxx' WWWu;; XKDQUJ<XXurct|dr |Dcgc]}|j||c}S|j||gScc}w)Nr)rrN)r list_likerrs rrz$TritonSemantic._convert_to_ir_values9sF 9j )R[\$D224E\ \..y+FGG]sAc |j|}|j|}|j|d}|jjr$|jjj r t d|jjt jk(rH|j|t jt j|jj}tdsgDcgc]*}t|t jr |jn|,c}t!dDsJdt|ds|g}|Dcgc]*}t|t jr |jn|,}}t#|t%t't)|k(sJdt!fd||||fDsJd |j*j-|j.||||}|j1|t jt j2|jjScc}wcc}w) NFrzMExpected `base` to be a pointer type (but not a block pointer type or others)rc3`K|]&}t|txrd|cxkxrdknc(yw)rZr[N)rcrg)rqrs rrsz0TritonSemantic.make_block_ptr..Qs)\:dC(CVt-Ce-CC\s,.zGExpected a list of constant integers (`int32_t` range) in `block_shape`z.[sh)3{#s9~5hs!$zBExpected shape/strides/offsets/block_shape to have the same length)rrprwrrKr,r.rfrr r r rrcrnrHr rtrur;rLr'create_make_block_ptrrr&r5) rbaser<stridesrrorderrrs ` rmake_block_ptrzTritonSemantic.make_block_ptr>s**51,,W5,,W%,Hyy!TYY%9%9%B%B%Dlm m 99  277 *99T2??277DII1!!!RXX.>>SZ[a4##B$;$;A$>I[[&&{3 $))R__555}}TYY11;?kk  ,,::< --n= 99   & & (W8I8I8Q8Q-QWX X;;K\aIbWX!((IbOVrhrgrh) r=rhrSrdr>rhrTrdrUrdrgrh)T)rrrd)rrhrrhryrdrgNone)FFTF)rTensorTy | numbers.NumberrrjrgTuple[TensorTy, TensorTy])rrrrrcallable)rrjrrjrrdrgr)rrjrrjrgr)rrjrrjrrdrgr)rqrrrrtl.PropagateNan)rqrrrrrrrm)rrrrrgrk)rrrrrgr)rr)rrrgr)rrrg tl.block_type)r7rgr8rgrrnrgr)r9rhrgr)r< List[int]r9rhrgr)rHrr<rorgr)rHrrgr)rrrSrorTrdrgr)rrr1rgrgr)rrrrrTrdrgr)rdrrerrgr)rdrrgrk)rrrw Tuple[int]rgr)rrr<rprgr)rrrrrgr)r Optional[str])rrrrhrgrr))rrrrhrrqrgr)rrrOptional[TensorTy]rrrrrrrrrrrrrdrgr)rtl.tensor_descriptor_baserrrrrgr)rrsrHrrgri)rrsrHrrgr)rrrrrgr) rrr^rrrrrrrrrgr) rrrlrr^rrrrrrgr) rrr^rrrrrrrgz#Tuple[TensorTy, TensorTy, TensorTy])rqrrgr) rrr^rrrrrrrrgr)rrrrrrrrqrrgrrhrgr)rr)r^rrr)rrrrrrrrrrrrrrzTensorTy | Nonerrdrrdrrdrrhrgr)rrrqrrrrgr)rSequence[TensorTy]r1rgrgTuple[TensorTy, ...])rrtr1rgrrdrgru)rrrrr1rgrgr)rzSequence[tl.tensor]r%zSequence[tl.dtype]r&rgrgzTuple[tl.tensor, ...])rrr*rgrrrrgr)rqrrrorgr)rgr)r?rr@List[TensorTy]rArdrgr)rrrrrrrrgr)rUrrgr)r) rUrr<rvrVrvrzList[tl.constexpr]rrrgztl.tensor_descriptor)|r r!r"r.r&__annotations__langrr2r5rCrXrorzrrrrrrrrrrrrrrrrrrrrrrr rrrrrr%rr-r2r>rkrFrIrHrPrVr[r_rirnrxrrrrrrrrrrrrrrrrr!r&r.r7r9r<r@rCrGrJrWr[r_rdrhrmrur|rrrrrrrrrrrrrrrrrr r'r+r1r4r6r9rErrKrNrrXr[rfr rrr%r%sYYFN& D O Q D05*.053;05d#R @ae05#:S#J,$>#>(0>>8#8(08" 8# 8(0 8]2 >,8.9"9" a \[\'&" ]]\)$/ 8 8 8 8 8 8GKZ$ )2AhSfXV 0  TW 2ptj$i9^       ,:x n  n25 nHK nZ^ nck n /), /19 // nu@[ uuuuu$0**x8*$XW"W'/W(u'*/R8+$)L$)L%yxy<[y#&[y3;[y@H[yz 0$.t0.t>A.tHW.tdh.t#.t15.tBJ.tOW.the(&u,x"&x+?x.K,i.Ci* @   C aRMY4H $fLYW](B,>(BPS(Bau(Brr%) __future__rrtypingrrrrrr r r|triton.runtimer _C.libtritonr rLrr.rr Exceptionrr%r rrr~sV"JJJ! CL : F FzBWX&zBr