~L i-vddlZddlmZddlmZddlmZddlmZm Z m Z m Z ddl m Z ddlZddlmZddlmZmZmZddlmZmZmZmZdd lmZdd lmZmZdd lmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)dd l*m+Z+m,Z,m-Z-m.Z.m/Z/dd l0m1Z1m2Z2ddl3m4Z5ddl6m7Z8e dZ9e dZ:ejvjxZed?e ed@e eudAe ejf dBZweDej&dqZd(edreufdsZd(edtedueufdvZ d)dwedreudxeYfdyZd*dtedreudzeufd{Zdted|ed}eYdreufd~Z d+deudedjdZ eDejvj:j<jBdZ"eDejvj:jFjeDejvj:jFjHdZ%eDejvj:jFjBeDejvj:jFjLdZ'eDejvj:jPjeDejvj:jRjdZ*ejzj}dddZ+eDejvjXjZ d6d Z.eDejvjXj^d Z0d Z1eDe<jdj d7d Z3dZ4eDe<jjjdZ6eDe<jne/ d7dZ8eDe<jre/dҫdZ:eDe<jvjdZ<eDe<jzjdZ>eDe<j~jdZ@eDe<je/dҫdZBdedzeufdZCeDe<je/dd$dZEeDe<je/dҫdZGeDe<je/dd$dZIeDe<je/dҫdZKeDe<jj(d&dZMeDe<jje<jjge/dZOeDe<jje<jjge/dddeXfd ZPeDejvjxjjejvjxjjge/d!ZQeDe<jj(e<jj(gd"ZTeDe<jjgd#ZVeDe<jje<jjge/dRȫdGdGdd$ZXeDe<jj(gd%ZZeDe<jje<jjgddd&d'Z]eDe<jjgddd&d(Z_eDe<jge/d)ZaeDe<jgd*ZceDe<jgd+ZeeDe<jgd,ZgeDe<jgd-ZieDe<jgd.Zjd/eXd0eXdeXfd1Zkd2ZleDe<jgd?e efd3ZneDe<jgd4ZpeDe<jgd5ZreDe<jjd6ZteDe<je/d7ZveDe<jj d8d8ZxeDe<jjd9Zzd)d:Z{eDe<jje<jjge/d9dd;d<Z}eDe<jje<jjgd=ZeDe<jje<jje<jje<jje<jje<jjge/d#d$d:d>ZeDe<j jd?ZeDe<jjd@ZeDe<jjdAZeDe<jje<jje<jj(e<jj(e<jje<jje<j jgdBZeDe<j$je<j&je<j$j(e<j&j(gd dCZeDe<j*je<j,jgd dDZeDe<j0je<j0j2gdEZdFZeDe<j8j(e<j8jgdGZeDe<j<j(e<j<jgdHZeDe<j@jdIZeDe<jDj(e<jDjgdJZeDe<jHj(e<jHjgdKZeDe<jLjdLZeDe<jPj(e/d defdMZeDe<jTge/ d;dNZeDe<jXg d;dOZeDe<j\g d;dPZeDe<j`je<jbjgd$dQZeDe<jfjdRZeDe<jjjdSZeDe<jndTZeDe<jre/dUZeDe<jvdVZeDe<jzjd$dWZd(dXZeDe<jjdYZeDe<jjdZZd[ZÐd\ZĐd]ZŐd^Z d$ddZeDe<jjdZeDe<jdZeDe<jje<jje<jje<jjge/d&dZeDe<jje<jje<jje<jjgd&dZeDe<jg d?dededededeYdeYde efdZdedeeXdffdZeDe<jg d?dededede edeYdedeYdeYde efdZeDe<jg d@dededede ededeYdeYde efdZeDe<jg d&dededededededededeXdeXdedeYdedede efdZeDe<j g dAdededededeYde ede efdZeDe<jg d(dedededededededeYde ede efdZeDe<jg dBdededede ededeYde ede edeeeffdZ eDe<jg dCdededede edeYdeYde efdZ eDe<jg dDdedededede ededededededeWeYdeYde efdZeDe<jg d&dedededededededededededeXdeXdedeYde ef dZeDe<j"g d<dededede ede edeXdeXdedeYdeYde ede eXde eXde ede efdZeDe<j&g d%dededededededededeXdeXdedeYdedede ede eXde eXf"d„ZeDe<j*g dEdededed?e ede ede ede eXde eXdedeXdeYde ede ede ede eXfd˄ZeDe<j.g d>dedededed?e ede ede edej0dej0dededededeXdeYde ede eXdeYf$dτZeDe<j4jg dFd(ej(dEej(dej(dej(d?e ej(de ej(dAe ejdeYfdԄZeDe<j8j:e<j8j<ge/d)dՄZeDe<j@j:d)dքZ!eDe<jDje<jDjge/d$dd/dׄZ#d؄Z$dلZ%eDe<jLje<jNjgd&dڄZ&eDe<jPje<jRjgd(dۄZ(eDe<jTje<jVjg d(dedee eXej0fdee eXej0fde ede ef dZ*eDe<jXje<jZjgd%dZ,eDe<j\je<j\j^e<j\je<j\j`gdGdZ1dZ2eDe<jfj d(dZ4eDe<jjjdZ5eDe<jljdZ6dZ7dZ8eDe<jrje<jtjgd9dZ;eDe<jxjdHdZ<eDe<jzjdIdZ>eDe<j~e/ dJdZ@eDe<jje<jjge/d#d$d:dZBejZDdZEeDe<jjdZFeDe<jjdZGeDe<jjdZIeDe<jjdZJeDe<jj(e<jjge/ddddZMeDe<jge/dKdZOeDe<jje<jjg d(dZReDe<jjg d(dZTeDe<jjdZUeDe<jje<jjge/d%dZVeDejvjxjdZWeDejvjxjdZXeDe<je/ddddddZZdZ[eDe<jdZ]eDe<j dLdZ_eDe<j dLdZaeDe<j dLdZceDe<je/ddddZeeDe<je/deXd(edefdZgeDe<jd(efdZieDe<je/dRȫd(edefdZjeDe<je/d(edefd Zkd Zl dMd ed ede ej(de ej(d e ed?e ede ej(dAe ejdeYfdZmeDe<je/ d%d ed ed e ed?e edAe ejdef dZoeDe<jg dMd ej(d ej(dej(dej(d e ej(d?e ej(de ej(dAe ejdeYfdZqeDe<je/ded)eXdeYdefdZseDe<je/ddZueDe<je/ dNd=ed$edeXdeYdeYdef dZveDe<jj dOd#ed^eWedeWeXdefdZxdZydZzeye<jeye<jeye<jeye<jeye<jeye<jeye<jeye<jeye<jeze<jeze<j eze<j eze<jeze<jeze<jeze<jeze<jeze<jeze<jeze<jeze<jdZeDe<j"e/dZeDe<j$e/dGd d!ZeDe<j&e/dGd d"Zee<j"Zee<j$Zee<j&Zddl0ZddlZddlZd#Zey(PN)Sequence)Enum)wraps)CallableOptionalTypeVarUnion) ParamSpec)SymBoolSymFloatTensor)_add_op_to_registry_convert_out_paramsglobal_decomposition_table meta_table) OpOverload)_prim_elementwise_meta$ELEMENTWISE_PRIM_TYPE_PROMOTION_KIND) BoolLikecorresponding_complex_dtypecorresponding_real_dtypeelementwise_dtypesELEMENTWISE_TYPE_PROMOTION_KIND FloatLikeIntLikemake_contiguous_strides_forNumbersuggest_memory_format TensorLike)_maybe_convert_to_dtype_maybe_resize_out_resize_output_check_safe_copy_out out_wrapper)_broadcast_shapes_maybe_broadcast)_config)_pytree_T_PatenIMPLMetareturncfd}|S)NcVtfd}tj|S)Nc(tt|yN)rr)opfns _/mnt/ssd/data/python-lab/Trading/venv/lib/python3.12/site-packages/torch/_meta_registrations.pyregisterz0register_meta..wrapper..register:s  B 3)rpytree tree_map_)r5r7r4s` r6wrapperzregister_meta..wrapper7s)  $ 4 2& r8)r4r;s` r6 register_metar=6s Nr8type_promotionctj|d|i\}}|Dcgc]}t||}}t|}t |dt j iScc}w)Ntype_promotion_kindr>)utilsrr r&rrDEFAULT)r>args_ result_dtypexs r6elementwise_metarGCss .. *OA|?C C #A| 4 CD C T "D "  BJJ  DsActjtjtjtjtj tj i}|j||Sr3)torch complex32halfcfloatfloatcdoubledoubleget)dtype from_complexs r6toRealValueTyperSWsE  ekk u||L   E5 ))r8clttg|tjk(fdy)NcddS)Nzoutput with shape z# doesn't match the broadcast shape r<)broadcasted_shape self_shapesr6z)check_inplace_broadcast..ds$ZL0STeSfgr8)tupler%rI_check)rW args_shaperVs` @r6check_inplace_broadcastr\`s0/ HZHI LLZ'gr8Fc < ttjr(tjj dk(dttjr(tjj dk(dt dfDrZt jtj  nFtjt j fdnxstjttjsJtjttfdttsJtjdk\dtjf|d|| S) NrcyNz:linspace only supports 0-dimensional start and end tensorsr<r<r8r6rXz(meta_linspace_logspace..xr8cyr_r<r<r8r6rXz(meta_linspace_logspace..}r`r8c3<K|]}t|tywr3) isinstancecomplex).0args r6 z)meta_linspace_logspace..s C:c7 # CscddS)Nzlinspace(): inferred dtype z& can't be safely cast to passed dtype r<)default_complex_dtyperQsr6rXz(meta_linspace_logspace..s56K5LLrsxryzr8cdtjdtjdtjdS)Nz4received an invalid combination of arguments - got (, ))type__name__)endstartstepssr6rXz(meta_linspace_logspace..sDu+r$s),,-RU 0D0D/EQHr8cy)Nz$number of steps must be non-negativer<r<r8r6rXz(meta_linspace_logspace..r`r8metarQlayoutdevice pin_memory requires_grad)rcrIr rZdimanyrArget_default_dtypeis_complex_dtyperQ _check_typerempty) rprorqbaserQrvrurwrxris ``` ` @r6meta_linspace_logspacerhsL%& IIK1  P #u||$ GGIN P  CsE/B CC % A A  # # %!  =)E LL&&u-z  2002 eU[[ )) ) 5'" H eW %% % LL!KL ;; #  r8ctjjtjk(fdtj|j dk(xrj dk7 d|j jS)Nc"djS)Nz2take(): Expected a long tensor for index, but got rQindexsr6rXzmeta_take..sDU[[MRr8rcy)Nz*take(): tried to take from an empty tensorr<r<r8r6rXzmeta_take..r`r8)rIrZrQlong _check_indexnumel new_emptyshape)selfrs `r6 meta_takersm LL uzz!R   ZZ\Q  55;;=A#56< >>%++ &&r8rycTj}j}tj||k(dtjjdk(xrjdk(fdt j j }j |S)Ncy)Nz=linalg.cross: inputs must have the same number of dimensions.r<r<r8r6rXzlinalg_cross..r`r8r.cVddjdjS)Nzlinalg.cross: inputs dimension z must have length 3. Got  and size)ryotherrsr6rXzlinalg_cross..s6-cU399S>"% 3'8 :r8)ndimrIrZrr%rr)rrryx_dy_d out_shapes``` r6 linalg_crossrs ))C **C LL s O LL #!4 31 4 "$**ekk:I >>) $$r8c|t|dt|dtj|tjS)Nzlinalg.matrix_exp memory_format)squareCheckInputscheckFloatingOrComplexrI empty_likecontiguous_formatrs r6linalg_matrix_exprs3d/04!45   D0G0G HHr8valuesindicescZtj|j|j|j}tj|j|jtj }|j dk7r%|jdk7rt||j||fS)NrvrQr) rIr~rrvrQint64rrmaybe_wrap_dim)rryrrs r6 cummaxminrsp [[DKKtzz JFkk$**T[[ LG zz|qTYY!^sDII& 7?r8cxt||jtj|tjSNr)rrrIrr)rrys r6 logcumsumexprs+3 "   D0G0G HHr8cD|j}t|}||z }tt|}t|D cgc]} d} } |D]} d| | < gg} } |D]*} | | s| j | | j | ,| | z}t| }|j |d|}|j fdd|||dz}|j|}dgt|j|dz}|j|}|jd}||d<t|}tt|D]}|||||dz<|j|tjt|D cgc]} d}} d}|dz }|dk\r0||j dz|||<||||z}|dz}|dk\r0t||D]}|j d||z z|||<!|j|||j|Scc} wcc} w) NFTc|Sr3r<)rF self_stridess r6rXz_exec_fft..s <?r8keyreverserrr)rlenlistrangeappendstridesortpermuterreshaperresize_rIr as_strided_storage_offset)outr out_sizesryforwardr signal_ndim batch_dims dim_permuterDis_transformed_dimdleftright batch_endtmpinput batched_sizes batch_sizebatched_out_sizesi out_strides batch_numelrs @r6 _exec_fftrsZ 99Dc(K #JuT{#K).t5A%55 % $1%b%D !!$ KKN LLO  ,KD I;;=L jy !CHH*DH9 IJ//K LL %ED4 JK 899M MM- (EAJ!M!]+ 3s8_5#,SV#4!a% 5KK!1H1HKI$Dk*1*K*KQA q&&1CJJqM&A KN#yQ00  Q q&:t $G&)jja*n1E&F KN#GOOI{C,>,>,@A JW6@+s H Hrry exclude_lastct|}|j|dt|t|z j fd|S)Nc|Sr3r<)rrs r6rXz_sort_dims.."s l1or8)r)rrrintr)rryr sorted_dimsrs @r6 _sort_dimsrsLs)K;;=L6#k"S%667<< %= r8c tj|jj|s|j St ||}|j |j}t|||j||S)Nr) rIrZrQ is_complexclonerrrr)rry normalizationrrrs r6 meta_fft_c2cr)sb LL&&' zz|T3'K .. %C S$ [' JJr8cft|tkDst|dk\r|ddk(r |ddk(ryy)NrrFT)rcufft_max_ndimrs r6use_optimized_cufft_pathr8s3 3x. SX]s1v{s1vQR{r8cztj|jjt |j }t |}|d}||dzdz}t |}|||<|r|||<t |dk(st |dk(rz|j|tj|j} |} t |dk(rt|rt| | ||dnt|dk(r|n|} t| | | |gdt|dkDr0|j|tj|j} |dd} | rx| | } } | j| jfd d tt t| } | t| | z d}t| | ||d| dt| | z } | rx|s:| j |||k7r#| j#|tj$ | } | S|j|tj|jS) NrrrcudaxpurTrc|Sr3r<)rstridess r6rXzmeta_fft_r2c..fs '!*r8rr)rIrZrQis_floating_pointrr device_hintrrArrrrrrminrrr)rryronesided input_sizesrlast_dimlast_dim_halfsizeonesided_sizesoutputworking_tensor target_sizesrmax_dims last_dimsrs @r6 meta_fft_r2cr?s> LL--.tyy{#K[!I2wH#H-2Q6+&N0N8/ (4F"k$&75&@ U>>tzzJ   t  &+CC+H fnid K),CA 9>L fnlXJPT U3x!|!%U%F%Ftzz%R"0" cr(K)7(//1  ,d!~s;/?@'K(88(C(EF NNIt**GC ,>tzzJ  r8) generatorcBt|tj|gSr3)r!rISize)nrrs r6 meta_randpermr|s S%**aS/ 22r8rQrurvrwc6tj|||||SNrrIr~)rrQrurvrws r6meta_randperm_defaultrs  ;; vf r8cxdtjkDfdtj|||||S)NrcddSNz:random_ expects 'from' to be less than 'to', but got from=z >= to=r<highlowsr6rXzmeta_randint..LSEQXY]X^_r8rrIrZr~)rrrQrurvrwrs` @r6 meta_randintr s> C LL s _ ;; E&J r8cttjkDfdtj|||||S)NcddSrr<rsr6rXz"meta_randint_low..rr8rr )rrrrQrurvrws`` r6meta_randint_lowr s9 LL s _ ;; E&J r8c6tj|||||Srr)rrQrurvrws r6meta_rand_defaultrs  ;; E&J r8rlastdimctj|jjt |dk(rt |j }|||d<|j|t|j}t|r.t||jtj||dSt|dkDrt||ddd|}n |jtj}t||||dgdS|}t|dkDr|dd}t|||d}|dd}t |j }|||d<|j|t|j} t| |||dS) NrrrrFrrr)rIrZrQrrrrrrSrrrrrr) rryrrrrtemprc2c_dimsrs r6 meta_fft_c2rrsk LL&&'4F"% $ #b' 1LM #C ( )@)@ A  3x!|#D#cr(Aw?zz0G0GzHVT9s2wiO O s8a<3BxH xNEbc(C& $ #b'nnYodjj.InJeYUCCr8cJddlm}||s#tj|dk(r t dt |t ra|j||}|j|jk7r.tjj||j|S)Nr)free_unbacked_symbolsrzQmore than one element of the written-to tensor refers to a single memory location) %torch.fx.experimental.symbolic_shapesrrI_debug_has_internal_overlap RuntimeErrorrcr torr+ expand_copydefault)rsrc non_blockingr intermediates r6 meta_copy_r s L "$ 'E,M,Md,SWX,X _  #vvvdL1 99;,++- -    $ $\499; ? Kr8ct|j}t|j}||jk\rdn ||||z}|j |d|j ||||fSNr)rrrryinsert)tensorry result_sizesresult_strides new_strides r6inferUnsqueezeGeometryr(sq &L&--/*NVZZ\)|C/@>RUCV/VJQ#z*  ''r8ct||jdz}t||\}}|j|||Sr")rryr(r)rryg_sizes g_stridess r6meta_unsqueeze_r, s> dhhj1n -C/c:GYWi( Kr8rweight_metabias_activation_opt out_dtypec8t|j}|*|jd|jdk(sJd|jd|jddz k(sJ|jd|d<t|jdk(sJdd|jdf}|7|jt j k(r|t jk(sJd|j|| |jn|j||}|S) Nrzoutput size mismatchrrrz*we can only handle the squashed input case9out_dtype is only supported for i8i8->i32 linear operatorr) rrrrrQrIint8int32r as_strided) rr-r.r/r0r1 output_sizestransposed_stridesrs r6meta_sparse_structured_linearr9s $L {{1~1-E/EE- ;;q>UZZ^a/ // /{{1~L u{{ q N"NN UZZ]+{{ejj(Y%++-E G E__&.ekkIj12 Mr8mat1 mat1_metamat2ct|jdk(sJt|jdk(sJt|jdk(sJ|jd|jddz k(sJ|jd|jdg}|7|jtj k(r|tj k(sJd|j|| |jn|}|S)Nrrrr3rrrrrQrIr4r5r)r:r;r<r1r7rs r6meta_sparse_structured_mmr?6s tzz?a   y 1 $$ $ tzz?a   99Q<499Q) rr:r;r<r@rAr1r7rs r6meta_sparse_structured_addmmrCOs/ u{{ q O  tzz?a   y 1 $$ $ tzz?a   ::a=DIIaL (O ( 99Q<499Q {out_dtype} matmul!r)rQrIfloat32float16bfloat16r4 float8_e4m3fnrr is_contiguousrrr5r)rDrEr/r@r1rFrGrHrIis_8bit_input_typecompression_factorkrm output_shapes r6meta__cslt_sparse_mmrXrs ==       E E E     .Q0QQ . w}}  "M$MM "%++ E?A DIIaL   !i MM NN KK    4 '  k   .Aq6Aq6L   \  ;;r8T) include_selfrsourcereducerYcLtj|tjSrrIrrrryrrZr[rYs r6meta_index_reducer_s   D0G0G HHr8c|Sr3r<r^s r6meta_index_reduce_ras  Kr8ct|j}|jdkDr|j||<|j |SNr)rrryrr)rryr result_sizes r6meta_index_selectres@tyy{#K xxzA~ ;;= C >>+ &&r8)lengthsroffsetsaxisunsafeinitialdatarfrgrhric| tdfd}|||jS|+|jdd|jddz fz} || Std)Nz?segment_reduce(): indices based reduction is not supported yet.ctj|jdzdzjdtjS)NrrsrQrvr)rIr~rrQr) lengths_shaperhrks r6segment_reduce_lengths_tensorz:meta_segment_reduce..segment_reduce_lengths_tensors>{{ DJJtaxz2 2**11   r8rrz>" r8ctj|j|f}t|||}|j ||j |t j fSNrrAreduction_dimsr_compute_reduction_shaperrIrrrykeepdimrWs r6 meta_max_dimrR   tzzC6 2C+D#w?L |$ |5::6 r8c$|jdSrtrurs r6meta_minrrwr8ctj|j|f}t|||}|j ||j |t j fSryrzr}s r6 meta_min_dimrrr8c|jrt|j}nt|tj \}}t j||SNr@r)rrrQrr INT_TO_FLOATrIr)rrErDs r6 meta_angler sI / ; ,  ? L L <   D 55r8ctj||j|j|j tj |Sr3)rI_resize_output_rrvcopy_angle)rrs r6meta_angle_outrs6 #tyy{DKK8 99U[[& ''r8cyr3r<)vals r6 assert_asyncr r8cyr3r<)r assert_msgs r6assert_async_metar"rr8cyr3r<)ss r6 print_metar'rr8rQrurvrwrc0tjddS)Nrrsrvrrs r6make_dep_tokenr,s ;;q ((r8chddlm}t|ttfr t d||||y)Nr)constrain_range'Constraining SymFloat or Symbool is nyirmax)rrrcr r ValueError)rrrrs r6sym_constrain_ranger8s/F$7+,BCCDcs+r8c6tj||||SNr)r+rrrr dep_tokens r6functional_sym_constrain_rangerBsTs4 r8c(ddlm}||tj|yt |t t fr tdt|tur5|tj||k\|tj||ky||||y)Nr)_constrain_range_for_sizerr) rrrI_check_is_sizercr r rrmrrZ)rrrrs r6sym_constrain_range_for_sizerHsP {s{ T"$7+,BCC DzS ? LL % ? LL %d5r8c6tj||||Sr)r+rrs r6'functional_sym_constrain_range_for_sizer\s%%d%= r8c|Sr3r<)rrrs r6functional_assert_async_metarbs r8f_namec|jdk\s J|d|jd|jdk(s.J|d|jdd|jddy)Nrz3: The input tensor must have at least 2 dimensions.rz5: A must be batches of square matrices, but they are  by matrices)ryr)rrs r6rrhs} 88:? (EF? 99R=DIIbM ) (G RT VZ[_[d[deg[hZiirs )r8Anamectjjjk(fdtjjjk(fdtjj dj dk(fdtjj dj dk(fdy)Nc>djdjdS)Nz:Expected b and A to be on the same device, but found b on z and A on instead.rrrsr6rXz(linearSolveCheckInputs..ws%H{{m:ahhZy :r8c>djdjdS)Nz=Expected b and A to have the same dtype, but found b of type z and A of type rrrsr6rXz(linearSolveCheckInputs..s%Kzzl/!'') =r8rrcRdjddjddS)Nz3A must be batches of square matrices, but they are rrrrrrsr6rXz(linearSolveCheckInputs..s0FF2J.sR,TF3 D $TYYr]O4 "  Hr8)rIrZrvrQr)rrrs```r6linearSolveCheckInputsrts LL qxx  LL agg  LL r affRj   LL r diim# r8tallow_low_precision_dtypescJ|jtj|jxs|j fd|sYtjtj tj tjtjfvfdyy)NcdS)Nz<: Expected a floating point or complex tensor as input. Got r<rQrsr6rXz(checkFloatingOrComplex..s6(VW\V]^r8cdS)Nz*: Low precision dtypes not supported. Got r<rsr6rXz(checkFloatingOrComplex..svhHPr8) rQrIrZrrrMrOrLrN)rrrrQs ` @r6rrsn GGE LL /^ & ekk5<<u}}M M P  &r8arg_namec^tj|jdk\fdy)NrcddS)Nz: The input tensor z! must have at least 2 dimensions.r<)rrsr6rXzcheckIsMatrix..s6(-hZ7XYr8)rIrZry)rrrs ``r6 checkIsMatrixrs LL 1 Yr8Brctttjr#j dj dk(n"j dj dk(fdy)Nrrc drdnddjddjddjddjdd S) Nz2: Incompatible shapes of A and B for the equation zAX = BzXA = Bz (rrFrrrlr)rrrrsr6rXz#checkInputsSolver..s[hHxX. AaffRj\qvvbzl!AFF2J.s5i{;-/dm4 nU\\N Lr8)rIrZrv)rrrrs````r6checkSameDevicers&  LL % r8UPLOcj}tjtdk(xr |dk(xs|dk(fdy)NrULcdS)Nz1Expected UPLO argument to be 'L' or 'U', but got r<)rsr6rXzcheckUplo..sCD6Jr8)upperrIrZr)rUPLO_uppercases` r6 checkUplors<ZZ\N LL D QKNc1J^s5JJr8 eigenvalues eigenvectorsr compute_vcTt|dt|t|j}|r/|j |}|j |t |dn|j dg}|j|j |t|j}||fS)Nz linalg.eighF row_majorrr) rrrrrrrpoprSrQ)rrrrvecsvalss r6meta__linalg_eighrsa' dO ME{{5!  ;EU ST{{A3 IIK ;;uOAGG$<; =D :r8ct|dtj|jr |jntj|j}|j |j dd|S)Nzlinalg.eigvalsrrrrAr|rQrrr)r complex_dtypes r6meta__linalg_eigvalsrsce-.  ! !%++ .   . .u{{ ; ??5;;s+=? AAr8c0t|dtj|jr |jntj|j}|j |j dd|}|j |j |}||fS)Nz linalg.eigrrr)rrrvectorss r6meta_linalg_eigrse\*  ! !%++ .   . .u{{ ; __U[["-]_ CFooekko?G 7?r8rcv|jjtjj ddS)Nrrr)mTrrIr transpose)rs r6cloneBatchedColumnMajorrs* 66< H HR PPr8rct|Sr3)r)rrrs r6_cholesky_solve_helperrs #4 ((r8ctjjdk\fdtjjdk\fdtd\}}t |||S)Nrc$djdS)Nz-b should have at least 2 dimensions, but has  dimensions insteadrrsr6rXz cholesky_solve..s? {J]^r8c$djdS)Nz-u should have at least 2 dimensions, but has rrrsr6rXz cholesky_solve..s?xGZ[r8cholesky_solve)rIrZr!_linalg_broadcast_batch_dims_namer)rrrself_broadcasted A_broadcasteds`` r6rr sh LL Q^ LL ! ['H a!'#m ""2M5 IIr8c|jdk(r%tj|tjSt |dt |S)Nrrcholesky)rrIrlegacy_contiguous_formatrrrrs r6rrs@ zz|qE4R4RSSdJ' "4 ((r8c0t|dt|S)Ncholesky_inverse)rrrs r6rr&sd./ "4 ((r8 check_errorsct|dt|d|j}t|}t |d}|j |}|j |||j |d|dz tj}||fS)Nzlinalg.choleskyFrrr) rrrrrrrrIr5)rrrA_shaper L_stridesrinfoss r6linalg_cholesky_exr.sa*+1/0ggG w.Gr`r8rrcy)Nzbtorch.linalg.householder_product: input.shape[-2] must be greater than or equal to input.shape[-1]r<r<r8r6rXz,linalg_householder_product..Kr`r8cy)Nz`torch.linalg.householder_product: input.shape[-1] must be greater than or equal to tau.shape[-1]r<r<r8r6rXz,linalg_householder_product..Or`r8rc<djdjS)Nzptorch.linalg.householder_product: Expected tau to have one dimension less than input, but got tau.ndim equal to  and input.ndim is equal to rrr sr6rXz,linalg_householder_product..T')), 2Nuzzl \r8cdS)Nzltorch.linalg.householder_product: Expected batch dimensions of tau to be equal to input.shape[:-2], but got r<actual_batch_tau_shapesr6rXz,linalg_householder_product..^66L5MOr8c<djdjS)Nz,torch.linalg.householder_product: tau dtype z does not match input dtype rrsr6rXz,linalg_householder_product..fs#:399+*5;;- 9r8z torch.linalg.householder_productr FrrrrQrv) rIrZrrrrQr empty_stridedrrv)rr expected_batch_tau_shapers`` @r6linalg_householder_productr@sK  LL aZ LL 2%**R.(t LL 2#((2,&r  LL SXX"  zzA~#(;;s#3 !$3B "&> >   LL U[[  6UEJ    [[*5;;%Hkk||  r8c2t|dt|dd|j|j}|j |jt |jd|j|jddt j}||fS)Nz linalg.inv_exF)rrrrrrrrrrrIr5)rrrrs r6linalg_inv_ex_metarvsqa)1o%P AGGAMM!''6qww%PQ KK EKKK 8E e8Or8LDpivotsinfo) hermitianrr!ct|dt|dtj|jt |jd|j |j}|j|jddtj}|j|jddtj}|||fS)Nztorch.linalg.ldl_factor_exFrrrrr) rrrIrrrrQrvrr)rr!rrrr s r6linalg_ldl_factor_ex_metar#sd894!=>    ZZ*4::Gjj{{  B ^^DJJsO599^ =F >>$**Sb/> ;D vt r8)r!cdtdtdtdtjj dk\fdj dd}tj|j k(fdtjtjjfdtjjjk(fdt\}}tj|t|d jj S) Nztorch.linalg.ldl_solverc$djdS)NzMtorch.linalg.ldl_solve: Expected B to have at least 2 dimensions, but it has rr)rsr6rXz'linalg_ldl_solve_meta..&&!4 6r8rc$djdS)Nzjtorch.linalg.ldl_solve: Expected LD.shape[:-1] and pivots.shape to be the same, but got pivots with shape  insteadrrsr6rXz'linalg_ldl_solve_meta..))/h @r8c"djS)Nz.sNv||n]r8c<djdjS)Nz!torch.linalg.ldl_solve: LD dtype z does not match b dtype r)rrsr6rXz'linalg_ldl_solve_meta..s"3BHH:=UVWV]V]U^_r8Frr)rrrrIrZrrrAis_integer_dtyperQ_linalg_broadcast_batch_dimsrrrv)rrrr!expected_pivots_shapeB_broadcast_sizerDs``` r6linalg_ldl_solve_metar2sb232781b":; LL !  HHSbM LL-  LL v||,] LL AGG_7q"=a    *+;uMggxx  r8Pr)pivotr4chtjjdk\fdtj}|d}|d}t ||}||d<|rj |}nj dg}||d<j |}||d<||d<j |}|||fS)Nrc$djdS)Nz@linalg.lu: Expected tensor with 2 or more dimensions. Got size: r(r)rsr6rXz linalg_lu_meta..sRSTSZSZR[[cdr8rrr)rIrZrrrrr) rr4sizesrVrrUr3rrs ` r6linalg_lu_metar8s LL ! d ME b A b A Aq AE"I KK  KK E"I EAE"IE"I EA a7Nr8LU)r4rctjjdk\fdtj}|d}|d}tj |t |djj}|jt|||d<j|tj}|jj|tj}|||fS) Nrc$djdS)NzFtorch.lu_factor: Expected tensor with 2 or more dimensions. Got size: r(r)rsr6rXz*linalg_lu_factor_ex_meta..sXYZY`Y`Xaaijr8rrFrrr) rIrZrrrrrrQrvrrrr) rr4rr7rVrr9rr s ` r6linalg_lu_factor_ex_metar<s LL ! j ME b A b A    *5EBggxx  B IIKAq E"I [[eii[ 0F IIK ;;uEII; .D vt r8)radjointr=ctdtjjjk(fdtjjtjk(dt dt |dtjjdjdk(dtjjddjk(fdt\}}tj|t|| jj }|jd k7r"|s |jr|j}|S) Nztorch.linalg.lu_solvec>djdjdS)NzPlinalg.lu_solve: Expected LU and B to have the same dtype, but found LU of type  and B of type r(r)rr9sr6rXz&linalg_lu_solve_meta..s($$&HH:_QWWIX Or8cy)NzElinalg.lu_solve: pivots should be a Tensor of scalar type torch.int32r<r<r8r6rXz&linalg_lu_solve_meta..r`r8zlinalg.lu_solvercy)NzYlinalg.lu_solve: Number of pivots per batch should be same as the dimension of the matrixr<r<r8r6rXz&linalg_lu_solve_meta.. r`r8c$djdS)Nzclinalg.lu_solve: Expected LU.shape[:-1] and pivots.shape to be the same, but got pivots with shape r(r)r*sr6rXz&linalg_lu_solve_meta..&r+r8rrr)rrIrZrQrrrrrr/rrrvrrconj)r9rrrr=r1rDrs``` r6linalg_lu_solve_metarEs.267 LL AGG  LL  !W b12b!T#45 LL  v{{2&k  LL " % 7q"=a  *+;4xPggxx F||~4    [[]F Mr8 unpack_data unpack_pivotsctjjdk\fd|r2tj|jtjk(dt j }|d}|d}t||}||d<|rj|}njdg}|r2||d<j|} ||d<||d<j|} n$jdg} jdg} || | fS)Nrc$djdS)NzFtorch.lu_unpack: Expected tensor with 2 or more dimensions. Got size: r(r))r9sr6rXz lu_unpack_meta..FsXY[YaYaXbbjkr8c y)Nztorch.lu_unpack: LU_pivots is expected to be a contiguous tensor of torch.int32 dtype. Note: this function is intended to be used with the output produced by torch.linalg.lu_factorr<r<r8r6rXz lu_unpack_meta..Ks pr8rrr) rIrZrrQr5rrrr) r9rrFrGr7rVrrUr3rrs ` r6lu_unpack_metarK<s LL 1 k LLEKK '   NE b A b A Aq AE"I LL  LL! b LL b b LL  LL!  LL!  a7Nr8modecdk(rd}d}||fSdk(rd}d}||fSdk(rd}d}||fStjdfdfS)NreducedTcompleteFrcddS)Nzqr received unrecognized mode 'z=' but expected one of 'reduced' (default), 'r', or 'complete'r<)rLsr6rXz _parse_qr_mode..ss1$8NOr8rIrZ)rL compute_qrNs` r6_parse_qr_moderTfs y  g     g    g      g r8QRct|dt|dt|\}}|jd}|jd}t ||}|rMt |j}|r|n||d<|j |}|j|t|dn|j dg}t |j} |s|s|n|| d<|j | } | j| t| d|| fS)Nz linalg.qrrrFrr) rrrTrrrrrr) rrLrS reduced_moderVrrUQ_shaperUR_shaperVs r6linalg_qr_metar[{s![!1k*,T2I|  A  A Aq Aqww-'aQ KK  g:7eTU KK 177mG#9!!GBK GAMM'6w%PQ a4Kr8sign logabsdetct|dt|dd|j}|j|dd}|j|ddt |j }t j|t|d|j |j}|j|ddt j}||||fS)Nzlinalg.slogdetFrrrr) rrrrrSrQrIrrrvr5)rrr\r]r9rs r6_linalg_slogdetr_sa)*1.6 GGE ;;uSbz "D E#2Joagg.F GI    *5%8ggxx  B [[s5;;[ 7F B &&r8 full_matrices compute_uvdrivercbt|dt|dt|jdd}|jd}|jd}t ||}|r|||r|n|gz}|j |} | j |t|d||r|n||gz} |j | } t|dk(} | j | t| | n$|j dg} |j dg} |j ||gzt|j} | | | fS) Nz linalg.svdrrFrrrr) rrrrrrrrrrSrQ)rr`rarbrrVrrUU_shaperV_shapeVis_cudaSs r6_linalg_svd_metaris#!\"1l+aggcrl#J  A  A Aq A 11== KK  g:7eTU]1== KK  a.F* g:7gVW KK  KK  J!$OAGG,D EA a7Nr8arg1arg2c|jdd}|jdd}t||}t|}||jd|jdgz }t|}||jd|jdgz }||fS)Nrr)rr%rr)rjrkarg1_batch_sizesarg2_batch_sizesexpand_batch_portionarg1_expand_sizearg2_expand_sizes r6r/r/s zz#2zz#2,-=?OP012 " 66012 " 66 - --r8c|r t|||t||\}}||jk(r|n|j|}||jk(r|n|j|}||fSr3)rr/rexpand)rjrkrrprqarg1_broadcastedarg2_broadcasteds r6rrsv  tT40)EdD)Q&&!DJJ.DKK@P4Q!DJJ.DKK@P4Q - --r8rc|jdd}|jdk(xs-|jdz |jk(xr|j|k(}|S)Nrr)rr)rrexpected_batched_rhs_shape vector_cases r6linalg_solve_is_vector_rhsrysS!&Sb!1**/ Q%**$R8R)R r8)rrrr9rr ctdtjjjk(fdt }|rj dn} t | |dt| \} } tj|xs| d|r| ddn| } tj| t| | jj} j}tj|t|djj}j|ddtj}j|ddtj}||||f}| |||f}td |Drbt||D]S\}}t!||j|j#|j|j%t'||d U|S) Nz linalg.solvec>djdjdS)NzKlinalg.solve: Expected A and B to have the same dtype, but found A of type r@r(r)rrsr6rXz"_linalg_solve_ex.. s%Ywwiqwwix 9r8rc y)Nzlinalg.solve: Vector broadcasting of the left hand side is not supported for left=False. In this case linalg.solve is equivalent to B / A.squeeze(-1)r<r<r8r6rXz"_linalg_solve_ex..s  Kr8rFrrc3$K|]}|du ywr3r<)rerFs r6rgz#_linalg_solve_ex../s &Q1D= &s) copy_fromcopy_to exact_dtype)rrIrZrQry unsqueezerr/rrrvrrr5allzipr!rrr#)rrrrrr9rr rxB_ B_broad_shaperD result_shaperesult_rLU_pivots_info_rresrPos`` r6_linalg_solve_exrs1n- LL 177 -Q2K'RQBaT>23B:M1 LL K *5="%-L!! *.^s))$7 9r8c$djdS)NzMtorch.triangular_solve: Expected A to have at least 2 dimensions, but it has rrrsr6rXz'triangular_solve_meta..er&r8triangular_solveFrrrcy)Nz+triangular_solve: Got an unexpected layout.r<r<r8r6rXz'triangular_solve_meta..r`r8)rIrZrrrustridedr/rrrQrv sparse_csr sparse_bsrrr) rrrrrself_broadcast_sizeA_broadcast_sizesolutioncloned_coefficients `` r6triangular_solve_metarSsL LL Q  LL !  4$67xx5== 0LTST0U--&&$./BeT**;;   #00!./?5Q''88   ' '' U%% %U5E5E)E##D)!^^QC0 ' ''  UQR ' ''r8clt|dt|d|j|jdd}|j|j}|j |jt |jd|j|jddt j}|||fS)Nz linalg.detrFrrrr)rdetr9rs r6_linalg_det_metarsa&1l+ ++aggcrl #C QWW BNN17775QR [["U[[[ 9F F?r8c0tjjdk\dtjjdk\d|rdndtjjjdk\fdtjjjdk(fdtjjdjdkdtjjjz d k(fd tjjjk(fd jdkDrejdd}jddtj|k(fd jddtj|k(fd tjjjk(fdtjjjk(fdt ddt ddtj jtjdjjS)Nrcy)Nz3torch.ormqr: input must have at least 2 dimensions.r<r<r8r6rXzormqr..r`r8cy)Nz3torch.ormqr: other must have at least 2 dimensions.r<r<r8r6rXzormqr..r`r8rrcddS)Ntorch.ormqr: other.shape[z0] must be greater than or equal to tau.shape[-1]r<left_size_conditionsr6rXzormqr..s+,?+@@pqr8cddS)Nrz"] must be equal to input.shape[-2]r<rsr6rXzormqr..s+,?+@@bcr8cy)NzHtorch.ormqr: tau.shape[-1] must be less than or equal to input.shape[-1]r<r<r8r6rXzormqr..r`r8rc<djdjS)Nz[torch.ormqr: Expected tau to have one dimension less than input, but got tau.ndim equal to rrrsr6rXzormqr..rr8c<djdjS)Nzhtorch.ormqr: Expected other to have the same number of dimensions as input, but got other.ndim equal to rrrrsr6rXzormqr..s+++0::,6RSXS]S]R^ `r8cdS)NzWtorch.ormqr: Expected batch dimensions of tau to be equal to input.shape[:-2], but got r<rsr6rXzormqr..rr8cdS)NzYtorch.ormqr: Expected batch dimensions of other to be equal to input.shape[:-2], but got r<)actual_batch_other_shapesr6rXzormqr..s66N5OQr8c<djdjS)NzPtorch.ormqr: Expected input and tau to have the same dtype, but input has dtype z and tau has dtype rrsr6rXzormqr..s'##(;;-/B399+ Or8c<djdjS)NzRtorch.ormqr: Expected input and other to have the same dtype, but input has dtype z and other has dtype rrsr6rXzormqr..s'##(;;-/DU[[M Sr8z torch.ormqrr rFrr) rIrZrrrQrrrrv) rr rrrexpected_batch_shaperrrs ``` @@@r6ormqrrs LL aV LL aV!%"" LL '(CIIbM9q LL '(EKKO;c  LL " R(Z  LL SXX"  LL ejj   zzA~${{3B/!$3B "&: :  $);;s#3  $(< <   LL U[[   LL u{{" M3u5M5%9    [[*5;;%Hkk||  r8cttjtdzk(fdj}|dzk(}|}| }|r*t d|D]}|xrj |dk7}n)t d|D]}|xrj |dk7}tj|xs|fdy)Nrc,ddzdtS)Nzpadding size is expected to be rz , but got: r)rypaddingsr6rXz,_padding_check_valid_input..s1!c'+c'l^Tr8rrc:ddzddzdjS)N Expected rzD or rzcD (batch mode) tensor with possibly 0 batch size and other non-zero dimensions for input, but got: r))ryrsr6rXz,_padding_check_valid_input..s2aycAgY/AAF  Or8)rIrZrrrr)rrry input_dim is_batch_modevalid_batch_modevalid_non_batch_moders``` r6_padding_check_valid_inputrs LL G CT  I#'*M$,,q)$ GA/FEJJqMQ4F  Gq)$ OA#7#NEJJqMQ."2%%*G2eWOE7*UZU`U`Tacr8cddS)Nz input (W: z%) is too small. Calculated output W: r<)input_woutput_wsr6rXz_pad1d_common..*s*WI%J8*Ur8r)rrrrIrZr) rr is_reflection dim_planenbatchnplanerrrrrs ` @@@@@r6 _pad1d_commonr sI E F zzQA  Q ug15LE5 ZZ "FjjG&H GO /   LLA U  zzQ1229::r8ct||dSNTr)rrrs r6meta_reflection_pad1dr3 t <.> =ekk>Q>Q>S=TTUXr8Fr)rIrZrQboolrrs` r6meta_replication_pad1dr95 LL uzz!X u ==r8cd|s#tjt|dk(djdk(rdz |\j }|zz|r&tj|kxr|kfdtjj k(fdj j S)Nrrcy)Nz padding size is expected to be 2r<r<r8r6rXz(_pad1d_backward_common..Fr`r8r.c4ddddjSrr)rsr6rXz(_pad1d_backward_common..Srr8c2ddjSNz(grad_output width unexpected. Expected: , Got: rr grad_outputrsr6rXz(_pad1d_backward_common..[":8*GKL\L\]bLcKder8rIrZrrrrr) rrrrrrrrrs `` @@@@r6_pad1d_backward_commonrCs E  S\Q&(RS zzQ  LE5jjG&H GO /   LLK$$U++e ??5;; ''r8 grad_inputc t|||dSrrrrrs r6meta_reflection_pad1d_backwardras "+ugT RRr8c t|||dS)NFrrrs r6meta_replication_pad1d_backwardrgs "+ugU SSr8c ddd}d}t|dj}|dk(r jd}dz dz |dz }|\ j|}j j z z zz |rLtj kxr kfdtj kxr k fdtj dk\xs dk\ fdjd k(rj | fSj || fS) Nrrrrc4ddddjSrr)rsr6rXz_pad2d_common..rr8c4ddddjSNzcArgument #6: Padding size should be less than the corresponding input dimension, but got: padding (rkrrr)dim_hrpad_bpad_tsr6rXz_pad2d_common..rr8c ddddS)Nz input (H:  W: z%) is too small. Calculated output H: r<)input_hroutput_hrsr6rXz_pad2d_common..s* gY/$$,:T( =r8r.rrrrIrZr)rrr dim_slicesrrrrrrrrrrrrrs` @@@@@@@@@@r6 _pad2d_commonrmsU E EJ Fug15 ::D qyA    a !(E5% ZZ #FjjGjjG&H&H GO /    GO /   LLA &Q  zzQ(;<<(CDDr8ct||dSr)rrs r6meta_reflection_pad2dr rr8ctjjtjk7fdt |dS)Nc@djjdS)Nz)"replication_pad2d" not implemented for 'rrrsr6rXz(meta_replication_pad2d..rr8Fr)rIrZrQrrrs` r6meta_replication_pad2dr rr8c d d d}|j}|jdk(r dz dz |dz }|\}}}}| } | } | |z|z | |z|ztjj k( fdtj j k( fd|j |jS)Nrrrrc2ddjSrrrsr6rXz%meta_pad2d_backward..rr8c2ddjSNz)grad_output height unexpected. Expected: rrrrrsr6rXz%meta_pad2d_backward..";H:W[M]M]^cMdLefr8)rryrIrZrr)rrrrrWrrrrrrrrrrs` @@@@r6meta_pad2d_backwardrs E EIJ xxzQ    Q !(E5%GG&H&H LLK$$U++e LLK$$U++f >>$** %%r8c $ d ddd}t|djdk(}|r%jd} dz dz dz |dz }|\j|}j j j  zz zz zz|rrtj kxr k fdtj kxr kfdtj kxr kfd tjdk\xs dk\xs dk\ fd |rj | fSj | fS) Nr.rrrrc4ddddjSrr)rsr6rXz_pad3d_common..rr8c4ddddjSrr)rsr6rXz_pad3d_common..rr8c4ddddjS)NzcArgument #8: Padding size should be less than the corresponding input dimension, but got: padding (rkrrr))dim_drpad_bkpad_fsr6rXz_pad3d_common..s2%%*G2fX_UG:V[VaVaUbdr8c ,dddddd S)Nz input (D:  H: rz%) is too small. Calculated output D: r<)input_drroutput_drrsr6rXz_pad3d_common.. s7 gYd7)<$$,:T(4z Kr8r)rrrr batch_moderrrrrrrrr rrrrrrrrs` @@@@@@@@@@@@@@@r6 _pad3d_commonr"s E E EIug15qJA      Q 07-E5%v ZZ "FjjGjjGjjG'H&H&H GO /    GO /    GO 0 0   LLA 7Q7(a-  (HMNN(HEFFr8ct||dSr)r"rs r6meta_reflection_pad3dr$rr8ctjjtjk7fdt |dS)Nc@djjdS)Nz)"replication_pad3d" not implemented for 'rrrsr6rXz(meta_replication_pad3d.."rr8Fr)rIrZrQrr"rs` r6meta_replication_pad3dr'rr8c tjt|dk(d|jdkDsJj|jk(sJdd d |jdk(rdz  dz dz |\}}}}}}|j } |j } |j } | |z|z| |z|z| |z|ztjj k(fdtjj k( fdtjj k( fd |j |j S) Ncy)Nz padding size is expected to be 6r<r<r8r6rXz%meta_pad3d_backward..1r`r8r.rrrc2ddjSrrrsr6rXz%meta_pad3d_backward..Irr8c2ddjSrrrsr6rXz%meta_pad3d_backward..Mrr8c2ddjS)Nz(grad_output depth unexpected. Expected: rr)rrr sr6rXz%meta_pad3d_backward..Qrr8r)rrrrrrrrrrrrrrrr rrs` @@@@@@r6meta_pad3d_backwardr.'s_ LLW"$NO ::>>   uzz )) ) E E E zzQ      07-E5%vjjGjjGjjG'H&H&H LLK$$U++e LLK$$U++f LLK$$U++e ??5;; ''r8pcJtj|jd|jd}|dkr0|j dgj tj S|j ||dz zdzfj tj S)Ncy)Nz(_pdist_forward requires contiguous inputr<r<r8r6rXz%meta__pdist_forward..[r`r8rrrr)rIrZrRrrrr)rr/rs r6meta__pdist_forwardr2Ws LL P ! AAv~~qc"%%E4R4R%SS~~qAE{a/1255886  r8gradpdistctj|jdtj|jdtj|tjS)Ncy)Nz._pdist_backward requires self to be contiguousr<r<r8r6rXz&meta__pdist_backward..jr`r8cy)Nz/_pdist_backward requires pdist to be contiguousr<r<r8r6rXz&meta__pdist_backward..mr`r8r)rIrZrRrr)r3rr/r4s r6meta__pdist_backwardr8fsW LL V LL X   D0N0N OOr8)rAr@c 0 ddlm}m}jd}jd}jd} |t j |j ||| frj||| ft jjdk(dt jjdk(dtjsGt jjjcxk(xrjk(ncfdj } j | d | d t j d k(xr d k( fd jjS) Nr) guard_or_truesym_eqrrr.cyNzbatch1 must be a 3D tensorr<r<r8r6rXzmeta_baddbmm..|r`r8cyNzbatch2 must be a 3D tensorr<r<r8r6rXzmeta_baddbmm..}r`r8cVdjdjdjS)Nz+Input dtypes must be the same, got: input: z , batch1: z , batch2: r)batch1batch2rsr6rXzmeta_baddbmm..s0A$**ZX^XdXdWeeopvp|p|o}~r8c .ddddddd SNz@Expected size for first two dimensions of batch2 tensor to be: [rkz ] but got: [rr].r< batch2_sizesbscontraction_sizesr6rXzmeta_baddbmm..s:t2&'|LO3DB|TUFWWY [r8)rr:r;rrIsym_notrrsrZry exp_config&skip_dtype_check_in_meta_registrationsrQr)rrArBrAr@r:r;dim1dim2dim3 batch1_sizesrGrHrIs``` @@@r6 meta_baddbmmrQrs7L ;;q>D ;;q>D ;;q>DU]]6$**tT46H#IJK{{D$-. LL"$HI LL"$HI  < < JJ&,, 6&,, 6 ~ <>$))+ &&r8cLtj|tjSrr]rrs r6meta_bernoullirTs   D0G0G HHr8c|Sr3r<rr/rs r6meta_bernoulli_rW Kr8cLtj|tjSrr]rVs r6meta_bernoulli_prZs   D0G0G HHr8c,tj|Sr3rIrrSs r6 meta_poissonr]   D !!r8c tj| |jkdtj|tj} tj|| fS)Ncy)NzJError in fused_moving_avg_obs_fake_quant_cpu: ch_axis must be < self.dim()r<r<r8r6rXz6meta__fused_moving_avg_obs_fq_helper..r`r8r)rIrZryrr)r observer_on fake_quant_on running_min running_maxscale zero_pointaveraging_const quant_min quant_maxch_axisper_row_fake_quantsymmetric_quantmasks r6$meta__fused_moving_avg_obs_fq_helperrnsO  LL$((*\   D 3D   T "D ))r8cHtj|jdk(dtj|jdk(d|j\|j\tjk(fd|j S)Nrcy)Nz a must be 2Dr<r<r8r6rXzmeta_mm..r`r8cy)Nz b must be 2Dr<r<r8r6rXzmeta_mm..r`r8c "ddddd S)Nz/a and b must have same reduction dim, but got [rkz] X [rEr<)M1M2Nr3sr6rXzmeta_mm..s(A!Brd%PRtSUVWUXXZ[r8)rIrZryrr)abrsrtrur3s @@@@r6meta_mmrxsz LLA56 LLA56 GGEAr GGEB LL b[ ;;q! r8c|r(tfdtjDStjj S)Nc3HK|]}|vrj|ndyw)rNr))rerdimsrs r6rgz+_compute_reduction_shape..s$UqatmTZZ]:Us")rYrrrAcompute_reduction_output_shaper)rr{r~s`` r6r|r|s7UE$))DTUUU  / / D AAr8ct|tjjr|jj St |drEt |jdr/|jj dk7r|jj Sy)Nrvrmrsr)rcrI _subclasses FakeTensor fake_devicermhasattrrv)r$s r6rrsg&%++667!!&&&! FMM6 * MM  & (}}!!!r8 input_tensorrrdilation is_transposedgroupsoutput_paddingc$dtdtdtdtdtdtf d}dtdtdtdtdtdtdtfd } |jd d} |jd d|r||jd z} n<|jd } |jd |z|jd k7r td |jd | gt|tr|gt z}n t |d k(r|d gt z}t|tr|gt z}n t |d k(r|d gt z}t|tr|gt z}n t |d k(r|d gt z}d} |rCt|tr|gt z} n#t |d k(r|d gt z} n|} t t D]]} | r/j| | || || | | || | | 4j|| || || | | || _d dlm }tj|d dDcgc]}|d kD c}fdScc}w)Nlnr/rrUrr/c6|d|zz||dz zz dz |zdzS)a Formula to apply to calculate the length of some dimension of the output See: https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html Args: ln: length of the dimension p: padding in that dim d: dilation in that dim k: kernel size in that dim s: stride in that dim Returns: The output length rrr<)rr/rrUrs r6_formulaz+calc_conv_nd_return_shape.._formulas.QU Q!a%[(1,2Q66r8r4c<|dz |zd|zz ||dz zz|zdzS)a Formula to apply to calculate the length of some dimension of the output if transposed convolution is used. See: https://pytorch.org/docs/stable/generated/torch.nn.ConvTranspose2d.html Args: ln: length of the dimension p: padding in that dim d: dilation in that dim k: kernel size in that dim s: stride in that dim op: output padding in that dim Returns: The output length rrr<)rr/rrUrr4s r6_formula_transposedz6calc_conv_nd_return_shape.._formula_transposed s2"Q!|a!e#a1q5k1B6::r8rrrzInvalid channel dimensions)sym_orc.dtddddS)NzGiven input size per channel: z&. Calculated output size per channel: rz. Output size is too small)r)r{ ret_shapesr6rXz+calc_conv_nd_return_shape..L s*0d =//8}o>#$r8) rrrrcrrrrrrrIrZ)rr-rrrrrrrr kernel_size out_channelsoutput_padding_listrrrFr{rs @@r6calc_conv_nd_return_shapers7S7S7S7S7S7S7";;;;;;;QT;&,,qr"K   ab !D Q/ ||A <<?V #|'9'9!'< <;< <##A& 5I&'"CI% V )s4y('7#)c$i' W 1:,T*(G$:D ) X! QK=3t9,/3 ng .#1"2SY">   A %#1!#4"5D "A "0  3t9     #GAJQKN1I'*     a'!*hqk;q>6RS9U "= LL !" .1Q./ $  /s2 J cbtjj|tjk(Sr3rI _prims_commonr channels_lasttens r6is_channels_lastrT s$    4 4S 9U=P=P PPr8 running_mean running_vartrainingexponential_average_factorepsiloncrj}| |jn |j} | |jn |j} fd} j|j| } |r#j| } j| }n"jd} jd}| | |fS)NctrtjSjtjrtjStjSr)rrIrrRr)rsr6pick_memory_formatz2meta_miopen_batch_norm..pick_memory_formatk sF L )&& &  % %E4K4K % L** *&&&r8rr)rrr)rr-r/rrrrrrsave_mean_shapesave_var_shaperr save_meansave_vars` r6meta_miopen_batch_normrX s""I-9,Dl((&,,O*5*A[&&v||N'   + . .=O=Q . RC **?; )).9 **40 ))$/  8 ##r8c fd} t||||||r|nd} d} d} j| dk(rd| | <j| } | j| } | S)Ncdtdk(r&ts tr+tjStrtjSj tj rtj Sj tj rtj SyNrr)rrrIrrRrpreserve_format)rr-sr6rz%meta_conv..pick_memory_format s | $ . -1A&1I*** -***  % %E4K4K % L** *  ' 'e6K6K ' L(( (Mr8rrr)rrrr)rr-r/rrrrrrr shape_outinput_channels_dimoutput_channels_dimrs`` r6 meta_convr~ s )*'T I+,1)* %&   +C &&13& 4C Jr8mkldnnc t|||||d|g} |j| } tj} |j dk(rtj } | j | } | S)NFrr)rrrIrrychannels_last_3dr) rr-r/rrrrattrscalars algorithmrrout_memory_formats r6meta_mkldnn_convolution_defaultr sp. &&'8UFB $$Y/!//     " % 6 6 ff#4f5 r8cb|jg|jdd|jdSNrrrr)rr-r/rrrs r6meta_linear_pointwise_defaultr s5%%&Q (:(:3B(?&Qa&QRRr8mklcb|jg|jdd|jdSrr)r packed_weight orig_weightr/rs r6meta_mkl_linearr s:))@,$$Sb)@;+<+. sP1#6GzQVW[V\\^_ .?zfll[cNdMe fgr8)rIrZryr)r$ryrrs````r6check_dim_sizer s6 LL > X 6$ > gr8cd}|d|\}} tjt|dvdtjjtjtj tj tjfvfdt|dk(r|| } } n%t|dk(r |d|d} } n |d|\} } |d |\} } tj|duxs|dk7d jd k(rjd nd}jd }jd}jd}t||| | d|}t|| | | d|}tj}t|| | | | | dd||||||jdk(r|||g}n||||g}tj|jj|S)Nctjt|dvfd|d}t|dk(r|n|d}||fS)NrrcddS)Nz avg_pool2d: 4 must either be a single int, or a tuple of two intsr<rsr6rXz1meta_avg_pool2d..unpack.. l4&(\]r8rrrIrZrrrHWs` r6unpackzmeta_avg_pool2d..unpack G H  ]  FSQACF!t r8rrrrcyNzOavg_pool2d: stride must either be omitted, a single int, or a tuple of two intsr<r<r8r6rXz!meta_avg_pool2d.. r`r8c@djjdS)Nz""avg_pool2d" not implemented for 'rrrsr6rXz!meta_avg_pool2d.. 6u{{7J7J7L6MQQr8rrrrcyNzdivisor must be not zeror<r<r8r6rXz!meta_avg_pool2d.. r`r8rrrrr.rn)rIrZrrQruint16uint32uint64ryrpooling_output_shaperArpool2d_shape_checkr~rv)rrrrrcount_include_paddivisor_overrider kHkWdHdWpadHpadWrr inputHeight inputWidthrrrrs` r6meta_avg_pool2dr" sM; /FB LL F y a LL EKKu||U\\RRQ 6{aRB V F1IB&)B 7+JD$ LLD 9$4$9* %yy{a/UZZ^QF**R.K**R.KBJ' Rr1iPL&z2tRINK//6M         $ yy{a\;7 \;? ;; kk||#  r8ct|||||||dd| | | | | ||j}| }t|||dz |t|||dz | t|||dz | y)Nrr.r)rryr)r gradOutputrrrrrrrrr r!rr mem_formatr nOutputPlanes r6avg_pool2d_backward_shape_checkr' s"         $ 99;DL:tTAX|<:tTAX|<:tTAX{;r8ctjt|dk(xst|dk(d|d}t|dk(r|n|d} tjt|dk(xst|dk(xst|dk(dt|dk(r|n|d} t|dk(r| nt|dk(r| n|d} tjt|dk(xst|dk(d|d} t|dk(r| n|d} tj|duxs|dk7d|j}|j dk(r|d nd}|d }|d }|d }t ||| | d|}t || | | d|}t j|}t||||| | | | | ||||||tj||j|j| S)Nrrcy)NzKavg_pool2d: kernel_size must either be a single int, or a tuple of two intsr<r<r8r6rXz*meta_avg_pool2d_backward..F r`r8rcyr r<r<r8r6rXz*meta_avg_pool2d_backward..L r`r8cy)NzGavg_pool2d: padding must either be a single int, or a tuple of two intsr<r<r8r6rXz*meta_avg_pool2d_backward..R r`r8cyrr<r<r8r6rXz*meta_avg_pool2d_backward..Y r`r8rrrrrrn) rIrZrrryrrArr'r~rQrv) gradOutput_rrrrrrrrrrrrr input_sizerrr r!rrr%s r6meta_avg_pool2d_backwardr/8 s LL KA6[!1Q!6] QB;1$+a.B LL F q@CK1,@F q0@a6{aVAYB6{a3v;!+;RB LL G .S\Q.Y 1:Dw<1$4'!*D LLD 9$4$9* J$yy{a/Z^QFR.KR.KBJ' Rr1iPL&z2tRINK,,U3J#       $ ;;kk||  r8ctjt|dvd|d}t|dk(r|n|d}t|dk(r|n|d} tj| xs t|dvdtjjtjtj tj tjfvfd|s|n|d} |s|nt|dk(r| n|d} |s| nt|dk(r| n|d} tjt|dvd|d} t|dk(r| n|d}t|dk(r| n|d}tjjd vd tj| xs|dk7d jd}jd }jd }jd}jd}t||| | d|}t|||| d|}t|| || d|}t|||| | | | | ||ddd||||||ddjdk(rj||||fSj|||||fS)Nrr.cyNzFavg_pool3d: kernel_size must be a single int, or a tuple of three intsr<r<r8r6rXz!meta_avg_pool3d.. r`r8rrrcyNzJavg_pool3d: stride must be omitted, a single int, or a tuple of three intsr<r<r8r6rXz!meta_avg_pool3d.. r`r8c@djjdS)Nz""avg_pool3d" not implemented for 'rrrsr6rXz!meta_avg_pool3d.. rr8cyNzBavg_pool3d: padding must be a single int, or a tuple of three intsr<r<r8r6rXz!meta_avg_pool3d.. r`r8rrcyNz9non-empty 4D or 5D (batch mode) tensor expected for inputr<r<r8r6rXz!meta_avg_pool3d.. r`r8cyrr<r<r8r6rXz!meta_avg_pool3d.. r`r8rrrrz avg_pool3d()T)check_input_sizer) rIrZrrQrrrrrrrpool3d_shape_checkr)rrrrrrrkTrrdTrrpadTrrrnslicesitimeiheightiwidthotimeoheightowidths` r6meta_avg_pool3drI s LL KF"X QB;1$+a.B;1$+a.B LL  +c&kV+\ LL EKKu||U\\RRQvayBc&kQ&6F1IBc&kQ&6F1IB LL G T 1:Dw<1$4'!*Dw<1$4'!*D LL fK  LL 5 0A 5* ZZ]FjjnG JJrNEjjnG ZZ^F D"a CE"7Bb!YGG !&"dB9 EF               -2 zzQ@AAHIIr8ctjt|dvd|d}t|dk(r|n|d} t|dk(r|n|d} tj| xs t|dvd|s|n|d} |s| nt|dk(r| n|d} |s| nt|dk(r| n|d} tjt|dvd|d}t|dk(r|n|d}t|dk(r|n|d}tj|jdvd tj| xs|dk7d |j d }|j d }|j d }|j d}t |||| d|}t || || d|}t || || d|}t ||||| | | | | |||||||||d|j|jS)Nr1cyr3r<r<r8r6rXz*meta_avg_pool3d_backward.. r`r8rrrcyr5r<r<r8r6rXz*meta_avg_pool3d_backward.. r`r8cyr8r<r<r8r6rXz*meta_avg_pool3d_backward.. r`r8r9cyr;r<r<r8r6rXz*meta_avg_pool3d_backward.. r`r8cyrr<r<r8r6rXz*meta_avg_pool3d_backward.. r`r8rrrrzavg_pool3d_backward()) rIrZrrrravg_pool3d_backward_shape_checkrr)rrrrrrrrr?rrr@rrrArrrBrCrDrEotime_for_shape_checkoheight_for_shape_checkowidth_for_shape_checks r6meta_avg_pool3d_backwardrT s LL KF"X QB;1$+a.B;1$+a.B LL  +c&kV+\vayBc&kQ&6F1IBc&kQ&6F1IB LL G T 1:Dw<1$4'!*Dw<1$4'!*D LL fK  LL 5 0A 5* jjnG JJrNEjjnG ZZ^F0D"aS27Bb!YW1&"dB9U#           ', ??5;; ''r8c,tjjdk(xsjdk(fdjddt |z}t j }tj|jj|S)Nr.rc"djS)Nz"Expected 3D or 4D tensor, but got r)rsr6rXz*meta_adaptive_avg_pool2d../ 4TZZLAr8rrn) rIrZrrrYrArr~rQrv)r output_sizerWrs` r6meta_adaptive_avg_pool2drY+ s| LL Q($))q.A::cr?U;%77L//5M ;;jj{{#  r8ctjjdk(xsjdk(fdjjddt |zS)Nrrc"djS)Nz"Expected 4D or 5D tensor, but got r)rsr6rXz*meta_adaptive_avg_pool3d..A rWr8r)rIrZrrrrY)rrXs` r6meta_adaptive_avg_pool3dr\= sO LL Q($))q.A >>$**Sb/E+,>> ??r8cj}td|D].tjj dkDfd0tj|dk(xs|dk(fdtjj j k(fdtj }trtj}jjj|S) Nrrc*djddS)Nz{adaptive_avg_pool2d_backward(): Expected grad_output to have non-zero size for non-batch dimensions,  with dimension being emptyr))grad_outrsr6rXz4meta__adaptive_avg_pool2d_backward..L s&66>nn5EEUVWUXXdfr8r.rc"djS)NzBadaptive_avg_pool2d_backward(): Expected 3D or 4D tensor, but got r)rsr6rXz4meta__adaptive_avg_pool2d_backward..Q sTUYU_U_T`ar8c<djdjSNexpected dtype z! for `grad_output` but got dtype r)rarsr6rXz4meta__adaptive_avg_pool2d_backward..U s/$**-Nx~~N^_r8r) rrrIrZrrQrrrrrr)rarrrrs`` @r6"meta__adaptive_avg_pool2d_backwardrfF s ==D 1d^  MM! q  f   LL  TQYa LL hnn$_++M++ >>$** % ( (} ( EEr8cdt|dtj|tjS)Nadaptive_avg_pool3d_backwardr)!_adaptive_pool_empty_output_checkrIrrrrs r6"meta__adaptive_avg_pool3d_backwardrk] s(&k3QR   D0N0N OOr8rcj}td|D]/tjj dkDfd1y)Nrrc.djddS)Nzc(): Expected grad_output to have non-zero size for non-batch dimensions, but grad_output has sizes r_r`r))rrrsr6rXz3_adaptive_pool_empty_output_check..i s/*--8->->,??OPQsR^`r8)rrrIrZr)rrrrs`` @r6ririd sG   D 1d^    Q ! #   r8c j}tj|dvfdtd|D]. tjj dkD fd0tjt |dk(dd}d}d}jdk(rj d}|dz }j |dz }|\}}jd k(r;|||f}j |} j |tj } | | fS||||f}tj} j |j| } j |tj j| } | | fS) Nr.rc"djS)Nz:adaptive_max_pool2d(): Expected 3D or 4D tensor, but got: r)rsr6rXz*meta_adaptive_max_pool2d..v LU[[MZr8rrc*djddS)Nzjadaptive_max_pool2d(): Expected input to have non-zero size for non-batch dimensions, but input has sizes r_r`r)rrsr6rXz*meta_adaptive_max_pool2d..{ %'',{{m3CA3lTr8rcy)NzCadaptive_max_pool2d(): internal error: output_size.size() must be 2r<r<r8r6rXz*meta_adaptive_max_pool2d.. r`r8rr.rr) rrIrZrrrrrrArr) rrXrdimHsizeBsizeDosizeHosizeWrrrrrs ` @r6meta_adaptive_max_pool2dr{p s| ::D LL Z1d^  JJqMA     LL KAU D E E zzQ 1    JJtax E NFF zzQFF+ ooi(//)5;;/?G|E662 33E: ooi(++-+H//)5;;/?BB'C G|r8cNj}tj|dvfdtdtjjjk(fdt j }jjj|S)Nroc"djS)NzKadaptive_max_pooling2d_backward(): Expected 3D or 4D grad_output, but got: r)rsr6rXz3meta_adaptive_max_pool2d_backward.. s]^i^o^o]pqr8adaptive_max_pool2d_backwardc<djdjSrdr)rrsr6rXz3meta_adaptive_max_pool2d_backward.. s!/%++.OP[PaPaObcr8r) rrIrZrirQrArrrr)rrrrrs`` r6!meta_adaptive_max_pool2d_backwardr s   D LL q &k3QR LL {(((c //6M ??5;; ' * * * GGr8c j}tj|dvfdtd|D]. tjj dkD fd0tjt |dk(dd}d}d}|dk(rj d}|dz }j |}|\}}}|d k(r||||f} n|||||f} j | } j | tj } | | fS) Nr9c"djS)Nz:adaptive_max_pool3d(): Expected 4D or 5D tensor, but got: r)rsr6rXz*meta_adaptive_max_pool3d.. rqr8rrc*djddS)Nzjadaptive_max_pool3d(): Expected input to have non-zero size for non-batch dimensions, but input has sizes r_r`r)rssr6rXz*meta_adaptive_max_pool3d.. rtr8r.cy)NzCadaptive_max_pool3d(): internal error: output_size.size() must be 3r<r<r8r6rXz*meta_adaptive_max_pool3d.. r`r8rrr)rrIrZrrrrr) rrXrdimDrwrxosizeTryrzrrrrs ` @r6meta_adaptive_max_pool3dr s ::D LL Z1d^  JJqMA     LL KAU D E E qy 1    JJt E(FFF qyFFF3 E666: //) $Cooiu{{o;G <r8cPt|d|j|jS)Nadaptive_max_pool3d_backward)rirr)rrrs r6!meta_adaptive_max_pool3d_backwardr s"&k3QR ??5;; ''r8c>| td|j|S)Nz:cannot repeat_interleave a meta tensor without output_size)rr)repeatsrXs r6meta_repeat_interleave_Tensorr s%WXX   [ ))r8c&|jjsJ|jjsJt|jt |j|jt |jt j }|SNr>)rQrrGrrrrB)realimagrs r6 meta_complexr sp :: ' '' ' :: ' '' '  +DJJ78 +DJJ786>>F Mr8) fill_valuercd|j||jftjSry)rryrIr)rrrs r6nonzero_staticr s& >>4,EJJ> ??r8c tjtjdtj|j |j fd|j ftj|jS)Ncy)NaYThe register_meta function for torch.nonzero() raises unimplemented by default, as a correct data-independent implementation does not exist. This implementation returns a fake value, assuming all elements of the tensor are non-zero. To enable this registration, please set 'torch.fx.experimental._config.meta_nonzero_assume_all_nonzero' to True.r<r<r8r6rXznonzero.. r`r8rrQrv) rI_check_not_implementedrKmeta_nonzero_assume_all_nonzerorrryrrvrs r6nonzeror sf   22 S    txxz" DJJLjj{{  r8c tjtdg}tD]\ftjjtj tj tjtjfvdjtjtjfvrȉj}t|tjjzjkfdtjD]`tjjjzk(fd|j|jdb]|jp|j|tjtjkfdddlm}t%|j&tjkr*jdtjkr*d}d}D]|dk(r d}|dk(rd }nd }|sg}g}tD]*\ |j|j,tD]*\ |j|j,j)||gggtD]\\} @rjj| )jj| Ht%j^fd } j+zz} dd lm} | j1dk(r| S| } t3j4| }t%|t%tt|k7r~t3j6| j|}t3j8|}t3j6|t3j:|}| j=| j?|} | S) Ncy)Nz#at least one index must be providedr<r<r8r6rXz#meta_index_Tensor.. r`r8cy)Nz?tensors used as indices must be long, int, byte or bool tensorsr<r<r8r6rXz#meta_index_Tensor.. r`r8c"djS)N)too many indices for tensor of dimension rrsr6rXz#meta_index_Tensor..% sG {Sr8c NdjddjdzS)NzThe shape of the mask at index z0 does not match the shape of the indexed tensor r))rrjrUrsr6rXz#meta_index_Tensor..* s<"8 ZPQsSJJN**U_`ade`e_f!hr8rc<djdtdS)Nrz (got rl)rr)rrsr6rXz#meta_index_Tensor..5 s!;DII;fSQX\NZ[\r8rFrTczz}t|j}dgtz|tt|jtz |j ||S)zI This follows restride_src in TensorAdvancedIndexing.cpp r)rrrrr6)rrr after_shape before_shapereplacement_shapes r6 _restride_srcz(meta_index_Tensor.._restride_srcv so00;>t{{}%KL#PS Q K L!C Oc+6F$FGug..r8guard_or_false) rIrZr enumeraterQrrr4rrrrrrrselect torch._refs_refsrr&rrrrrrA3compute_elementwise_output_logical_to_physical_perm apply_permr invert_permr6r)rrrrrefsstatehas_contiguous_subspacer{transposed_indicesryrrrrestrided_selfperm perm_shaper'rrrrrrUrs`` @@@@@@@r6meta_index_Tensorr s LLg MN&(Fg&!5   LL  EIIuzz5::NNY {{uzz5::66--/K"" Ndii/Suzz*8A&& A$**QU*;;h MM'..A"67 8 e$ MM% /!0G LL G  !\ (4(('23G g, "t g, " E# ' A:  aZ}  '#' #!'* 1HAu  A"))%0 1"'* 1HAu} A"))%0 1||D!$!LK#%(2 U = ""4::c?3##DJJsO4 $U[[ 1 2 / ..(99KG HCDdjjla'( #4(N  D D^ TD DzT%D *++%%cii6 66zB %%j%2C2CD2IJ nnSXXZ4 Jr8c d} d} d} | dr|j|j} | dr|j|j} | dr|j|} | | | fS)Nrrrrr) grad_output_input_weight_bias_sizes_optrrr transposedrr output_maskbackend_grad_inputbackend_grad_weightbackend_grad_biass r6meta_convolution_backwardr sy 1~)33FKKMB1~*44W\\^D1~(22>B  35F GGr8cjd}jd}|j||f}tjj dk(dtjj dk(dtjjdjdk(fdtjjdjdk(fdtj|jd|k(xr|jd|k(d |j |jS) Nrrr.cyr=r<r<r8r6rXzmeta_addbmm.. r`r8cyr?r<r<r8r6rXzmeta_addbmm.. r`r8rcPdjddjdS)Nz8batch1 and batch2 must have same number of batches, got rrrrArBsr6rXzmeta_addbmm.. s.J6;;WX>JZZ_`f`k`klm`n_opr8c djddjddjddjdd S)Nz#Incompatible matrix sizes for bmm (rrFrrrlrrsr6rXzmeta_addbmm.. sQ1&++a.1A6;;q>BRS;;q>"!FKKN#31 6r8cy)Nz.self tensor does not match matmul output shaper<r<r8r6rXzmeta_addbmm.. r`r8)rrsrIrZryr)rrArBrAr@rMrNs `` r6 meta_addbmmr s ;;q>D ;;q>D ;;d| $D LL"$HI LL"$HI LL A&++a.(p LL A&++a.(  LL ! 51!5@ >>$))+ &&r8c @|j|jSr3r)rrkwargss r6meta_randint_liker s >>$))+ &&r8) grad_scale found_infc n||||||fD])tjttfd+y)Nc dtSNz'exponent must be a tensor list but got rmlsr6rXz#meta__fused_adam_.. =d1gYGr8rIrZrcr)rgradsexp_avgs exp_avg_sqsmax_exp_avg_sqs state_stepslrbeta1beta2 weight_decayepsamsgradmaximizerrrs @r6meta__fused_adam_r s:&E8[/; O  q$  G  r8c ||||||fD])tjttfd+d}||||||||||fS)Nc dtSrrrsr6rXz"meta__fused_adam..rr8cR|Dcgc]}tj|c}Scc}wr3r\) tensor_listrs r6empty_like_listz)meta__fused_adam..empty_like_lists!-89  #999s$r)rrrrrrrrrrrrrrrrrs @r6meta__fused_adamr ss&E8[/; O  q$  G  : ! $(  r8cjtjjdk(dtjjdk(dtjjtjufdtjjtjufdtjj dj dk(fdj j dj dftj S) Nrcy)Nza must be a 2D tensorr<r<r8r6rXzmeta__int_mm..r`r8cy)Nzb must be a 2D tensorr<r<r8r6rXzmeta__int_mm..r`r8c"djS)Nzexpected self to be int8, got r)rvsr6rXzmeta__int_mm..0 :r8c"djS)Nzexpected mat2 to be int8, got r)rwsr6rXzmeta__int_mm..rr8rrc djddjddjddjdd S)Nz'Incompatible matrix sizes for _int_mm (rrFrrrlrrvrwsr6rXzmeta__int_mm.."sM5affQi[!&&)M66!9+Qqvvayk ,r8r)rIrZryrQr4rrr5rs``r6 meta__int_mmrs LLA>? LLA>? LL 5::: LL 5::: LL q QVVAY  ;;q 166!9-U[[; AAr8cftjjdk(dtjjtjufdj d}j ddz}j |dz||dzzd|dzftj S) NrcyNzw must be a 2D tensorr<r<r8r6rXz2meta__convert_weight_to_int4pack..,r`r8c"djSNrrrsr6rXz2meta__convert_weight_to_int4pack../.qwwi8r8rrrM r)rIrZryrQrrrr5r inner_k_tilesrrUs` r6 meta__convert_weight_to_int4packr*s LLA>? LL 5;;8 q A q A A ;; F -"$ %  Q   kk  r8cJtjjdk(dtjjtjufdj d}j d}j ||dzftjS)Nrcyrr<r<r8r6rXz:meta__convert_weight_to_int4pack_for_cpu..@r`r8c"djSNzexpected w to be int32, got rrsr6rXz:meta__convert_weight_to_int4pack_for_cpu..Crr8rrr)rIrZryrQr5rrrrs` r6(meta__convert_weight_to_int4pack_for_cpur >s LLA>? LL 5;;8 q A q A ;; AF kk  r8c.tjjdk(dtjjdk(dtjjtjtj tj fvfdtjjtjufdjjdjddzj S) NrcyNzx must be a 2D tensorr<r<r8r6rXz*meta__weight_int4pack_mm..Or`r8rcy)Nzw must be a 4D tensorr<r<r8r6rXz*meta__weight_int4pack_mm..Pr`r8c"djSNrrrFsr6rXz*meta__weight_int4pack_mm..S5aggY?r8c"djSr rrsr6rXz*meta__weight_int4pack_mm..Wrr8rrr rIrZryrQrNrOrPr5rrrs`` r6meta__weight_int4pack_mmrMs LLA>? LLA>? LL EMM5==%..AA? LL 5;;8 ;;qvvay!&&)a-qww; ??r8c(tjjdk(dtjjdk(dtjjtjtj tj fvfdtjjtjufdjjdjdjS)Nrcyr r<r<r8r6rXz2meta__weight_int4pack_mm_for_cpu..^r`r8cyrr<r<r8r6rXz2meta__weight_int4pack_mm_for_cpu.._r`r8c"djSrrrsr6rXz2meta__weight_int4pack_mm_for_cpu..brr8c"djSrrrsr6rXz2meta__weight_int4pack_mm_for_cpu..frr8rr) rIrZryrQrNrOrPrrrrs`` r6 meta__weight_int4pack_mm_for_cpur\ LLA>? LLA>? LL EMM5==%..AA? LL 5;;8 ;;qvvay!&&)177; ;;r8c(tjjdk(dtjjdk(dtjjtjtj tj fvfdtjjtjufdjjdjdjS)Nrcyr r<r<r8r6rXz;_weight_int4pack_mm_with_scales_and_zeros..mr`r8cyrr<r<r8r6rXz;_weight_int4pack_mm_with_scales_and_zeros..nr`r8c"djSrrrsr6rXz;_weight_int4pack_mm_with_scales_and_zeros..qrr8c"djSr rrsr6rXz;_weight_int4pack_mm_with_scales_and_zeros..urr8rrr)rFrrqScaleqZeross`` r6)_weight_int4pack_mm_with_scales_and_zerosr$krr8rvrwc||zdz |z|zSr"r<rs r6 kai_roundupr&zs UQY1  !!r8c |dk(ry||k(r(d}d}d}dddd fd fd}||||||S|dzd k(rC||zd k(r:d}d}d}dddd fd } fd  d fd  fd|||||||Syyy)NrrrMrc8t||zd}t||S)Nrr&)rUkrsrkr_sr_roundedup4s r6kai_k_roundedupz3get_kai_packed_weight_size..kai_k_roundedups#$/rBw#: "1&677r8cX|||}|dzdk(sJd||dzzzzzS)Nrrzk_internal must be evenr<) rUnrr*r+ k_internalr-kai_num_bytes_biaskai_num_bytes_multiplier_rhskai_num_bytes_sum_rhss r69kai_get_rhs_packed_stride_rhs_pack_nxk_qsi4cxp_qsu4cxs1s0z]get_kai_packed_weight_size..kai_get_rhs_packed_stride_rhs_pack_nxk_qsi4cxp_qsu4cxs1s0sY-QB7 "Q1,G.GG,1_23+,))r8c>t|||z}|||||zSr3r))rrUr/r*r+num_rowsr4s r67kai_get_rhs_packed_size_rhs_pack_nxk_qsi4cxp_qsu4cxs1s0z[get_kai_packed_weight_size..kai_get_rhs_packed_size_rhs_pack_nxk_qsi4cxp_qsu4cxs1s0s6'q"-3O2r2r8rrc|||zdk(sJ| zdk(sJ|zdk(sJt|||z}||||||zSrcr)) rrUr/r*r+blr6kai_bl_multiple_of;kai_get_rhs_packed_stride_rhs_pack_nxk_qsi4c32p_qsu4c32s1s0kai_nr_multiple_ofs r69kai_get_rhs_packed_size_rhs_pack_nxk_qsi4c32p_qsu4c32s1s0z]get_kai_packed_weight_size..kai_get_rhs_packed_size_rhs_pack_nxk_qsi4c32p_qsu4c32s1s0spRA~%~//A555//A555&q"-3Q2r2rr8c||zdk(sJ| zdk(sJ|zdk(sJ } ||} ||}|||zz zzSrcr<)rUr/r*r+r9num_bytes_multiplier_rhsnum_blocks_per_rownum_bytes_per_blockr:#kai_get_bf16_datatype_size_in_bytesr<kai_num_blocks_per_rowr1kai_num_bytes_per_blockr3s r6r;z_get_kai_packed_weight_size..kai_get_rhs_packed_stride_rhs_pack_nxk_qsi4c32p_qsu4c32s1s0sRA~%~//A555//A555,O+P(%;Ar%B"&=0'#(+==+,()r8cy)Nrr<r<r8r6rBzGget_kai_packed_weight_size..kai_get_bf16_datatype_size_in_bytessr8c6|zdk(sJt|||zSrcr))rUr9r:s r6rCz:get_kai_packed_weight_size..kai_num_blocks_per_rows)//A555"1b)R//r8c(|zdk(sJ|dz|zS)Nrrr<)r9r?r:s r6rDz;get_kai_packed_weight_size..kai_num_bytes_per_blocks'//A555a#;;;r8r<)n_bitsruK groupsizekai_nrkai_krkai_srr7r=r:rBr;r4r-r<rCr1r2rDr3s @@@@@@@@@@@r6get_kai_packed_weight_sizerN~s { >FFF$% !+, (!"  8  K1fff ^q Q]a%7FFF$% !!" !" !#     ,  0 <M1fffi u&8 [r8cVtjjtjufdtjj j r||k(r|jtjk(s2||kre|dzdk(r]||zdk(rU|jtjk(r8td|||}jt|tjSj|jz}j|tjS)Nc"djSrr)weightssr6rXz2meta__dyn_quant_pack_4bit_weight..s.w}}o>r8rrrr) rIrZrQrbackendskleidiai is_availablerMrPrNrrr)rQ scales_zerosr/ block_size in_features out_featurespacked_weight_sizes` r6 meta__dyn_quant_pack_4bit_weightrZs LL $> ~~++- { "|'9'9U[['H  $R1$j(A-""enn48 |[*   %7!8  LL <+=+=+??   /u{{  CCr8ctjjdk(dtjjtjfvfdj d}j ||jS)Nrcy)Nzinput must be a 2D tensorr<r<r8r6rXz-meta__dyn_quant_matmul_4bit.. r`r8c"djS)Nzexpected input to be f32, got r)inpsr6rXz-meta__dyn_quant_matmul_4bit..s0 <r8rr)rIrZryrQrNrr)r^packed_weightsrVrWrXrs` r6meta__dyn_quant_matmul_4bitr`sg LLa!DE LL emm_$<  A ==L = ::r8c(tjjdk(dtjjtjtj tj fvfdtjjdk(dtjjtjufdjjdjdjS)Nrcyr r<r<r8r6rXz*meta__weight_int8pack_mm..r`r8c"djSrrrsr6rXz*meta__weight_int8pack_mm..rr8cyrr<r<r8r6rXz*meta__weight_int8pack_mm..r`r8c"djS)Nzexpected w to be int8, got rrsr6rXz*meta__weight_int8pack_mm..s-aggY7r8rr) rIrZryrQrNrOrPr4rr)rFrq_scaless`` r6meta__weight_int8pack_mmrgs LLA>? LL EMM5==%..AA? LLA>? LL 5::7 ;;qvvay!&&)177; ;;r8cftjjdk\fdtjjdk\fdtjjdjdk(fdtjt j j dtjt j j dtj|dk\d tjd vfd jd }jd }jdd }jdd }ttj||}|j||gj|S) Nrc,djdS)Nz1cdist only supports at least 2D tensors, X1 got: rr)x1sr6rXz$meta_cdist_forward..(CBFFH:QOr8c,djdS)Nz1cdist only supports at least 2D tensors, X2 got: rr)x2sr6rXz$meta_cdist_forward..,rkr8rcPdjddjdS)Nz4X1 and X2 must have the same number of columns. X1: rz X2: r)rjrmsr6rXz$meta_cdist_forward..0s,Frwwr{mSXY[Y`Y`acYdXefr8cy)Nz=cdist only supports floating-point dtypes, X1 got: {x1.dtype}r<r<r8r6rXz$meta_cdist_forward..4r`r8cy)Nz=cdist only supports floating-point dtypes, X2 got: {x2.dtype}r<r<r8r6rXz$meta_cdist_forward..8r`r8rcy)Nz)cdist only supports non-negative p valuesr<r<r8r6rXz$meta_cdist_forward..:r`r8NrrcdS)Nz%possible modes: None, 1, 2, but was: r<) compute_modesr6rXz$meta_cdist_forward..=s7 ~Fr8r) rIrZryrrAis_float_dtyperQrrbroadcast_shapesextendr) rjrmr/rtr1r2 batch_tensor1 batch_tensor2rWs `` ` r6meta_cdist_forwardr|$sJ LL A O LL A O LL  rwwr{"f LL RXX&O LL RXX&O LLaLM LL $F B BHHSbMMHHSbMM..}mLMLR! << %%r8c4|jd}|jd}|jd}|jdd}|jdd} ttj|| } | j } | j ||gt j| } |dk(s|dk(s |dk(s| dk(rtj|S| t|jk7r|j| }tj|tjS)Nrrrr) rrrIrvcopyrwmathprod zeros_likersrr) r3rjrmr/cdistc1rxryrzr{rotensor1_expand_size batch_products r6meta_cdist_backwardrHs "B "B "BHHSbMMHHSbMM 6 6}m TU.335Bx(II23M Qw"'R1W (:##d288n, YY* +   Be.E.E FFr8c tjjtjtjfvfdtjjtjtjfvfdtjt j jfdjd} |rtj| dk\d| dz} j| jd} }tj|tk(dtjjdk(fdtjjjk(fd fd d fd } td k7r|jjd} jj} |tk(r"j| jd}njd}n| | |}|ttfvs|s!jjd} njd} j| } jd}|tk(rA|rtj|dk\d|dz}j|jd}nj| j}| | | |fS)Nc"djS)Nz(expected indices to be long or int, got r)rsr6rXz$meta_embedding_bag..m:7==/Jr8c"djS)Nz(expected offsets to be long or int, got r)rgsr6rXz$meta_embedding_bag..qrr8c"djS)Nz/expected weight to be floating point type, got r)r-sr6rXz$meta_embedding_bag..usA&,,Pr8rrcyNz1include_last_offset: numBags should be at least 1r<r<r8r6rXz$meta_embedding_bag..|r`r8cy)Nz@embedding_bag: per_sample_weights only supported with mode='sum'r<r<r8r6rXz$meta_embedding_bag..r`r8c$djdS)Nz1expected per_sample_weights to be 1D tensor, got rr)per_sample_weightssr6rXz$meta_embedding_bag..sGHZH_H_G``abr8cNdjdjdS)Nz%expected per_sample_weights.numel() (z$ to be the same as indices.numel() (rlr)rrsr6rXz$meta_embedding_bag..s478J8P8P8R7ST66=mmo5FaIr8cD|||xr|jddk(SNrrr)rrer padding_idxis_fast_path_index_selects r6is_fast_path_index_select_scalez;meta_embedding_bag..is_fast_path_index_select_scales( %c6; ? XELLQROWXDX r8c|jtjk(xs|jtjk(xr1|j ddk(xr|j ddk(xr|dkSNrr)rQrIrMrKr)rrrs r6rz5meta_embedding_bag..is_fast_path_index_selectsb YY%++ % @ejj)@  1 "  a A% a  r8c2| ||||S|||Sr3r<)rrerrrrs r6 is_fast_pathz(meta_embedding_bag..is_fast_paths)  23v{S S,S&+F Fr8cpucyrr<r<r8r6rXz$meta_embedding_bag..r`r8)rIrZrQrrrArurrMODE_SUMrrrMODE_MAX MODE_MEANr)r-rrgscale_grad_by_freqrLsparserinclude_last_offsetrnum_bagsrr offset2bagbag_size max_indices fast_path_sumnumBagsrrs``` ` @@r6meta_embedding_bagr_s} LL %**eii00J LL %**eii00J LL V\\*P ||AH M G  A    h A 7F% H  V    # #q ( b    $ $ &'--/ 9    G 7u$&&w||A7 $$W\\^4 8 !++Hfkk!nEK!++A.K$V-?U Ix( (  **7<<?;J **1-J$$X.--" 8 " qLO1 !++GV\\!_EK!++HMMO(>{{ zz ;;r8rct||d}tj|j|}t |||}|j ||S)NT)rr)rrAr{rr|r)rr{r~rQrrWs r6 meta_nansumrsI(u$OL    T 2D+E4AL ??<|? <. r`r8rcddS)Nz"Repeats cannot be negative, found rr<)rrepsr6rXzmeta_repeat..s8ZsKr8r) rIrZrryrrYrrr)rrnum_new_dimensions padded_sizer target_sizers ` @r6 meta_repeatrs LL G  "lG$ 3 1H K  W 2++eDJJ.??K8=c'l8KL1;q>GAJ.LKL >>+ &&Ms1Cc|Sr3r<rs r6 meta_zero_rrXr8czt|tjr t|j|j|Sr3)rcrIr r\rrrs r6meta_binop_inplacers)%& EKK8 Kr8cd}d}d}||r||r td||r||s tdt|tjr t |j |j |S)a* Some checks for inplace ops. Checks for promotion rules for some dtypes. int.add/sub_(float) and bool.add/sub_(others) are rejected. Promoting in these in-place operations would require reallocating and copying over elements, hence not allowed. Checks for alpha param. ct|trtj|jSt|t Sr3)rcrrAr.rQrrfs r6 is_integericz.meta_binop_inplace_alpha..is_integericBs. c: &))#))4 4c7+ +r8ct|trtj|jSt|t Sr3)rcrrArurQrrs r6 is_floaticz,meta_binop_inplace_alpha..is_floaticHs. c: &'' 2 2c9- -r8ct|trtj|jSt|t Sr3)rcrrAis_boolean_dtyperQrrs r6 is_booleanicz.meta_binop_inplace_alpha..is_booleanicNs. c: &))#))4 4c8, ,r8z]Promotion of int.add/sub_(float) in in-place ops are not possible due to element size change.z_Promotion of book.add/sub_(others) in in-place ops are not possible due to element size change.)rrcrIr r\r)rrr@rrrs r6meta_binop_inplace_alphar0sz$, . -Dj/ k  D,u"5 m  %& EKK8 Kr8c:t||tjSrrGrrBrrr@s r6meta_binop_alphares  e$C$K$K r8c 8t|tjSrr)rrs r6 meta_roundrqs  <DD r8cltjtjjfdt tj r8tjtjjfdytjt tfdy)Nc&djS)Nz7: Expected input tensor to have an integral dtype. Got r)rrsr6rXz#shift_dtype_check..{s7)RSWS]S]R^_r8c&djS)Nz6: Expected shift value to have an integral dtype. Got rrrsr6rXz#shift_dtype_check..swiUVYV_V_U`ar8cdS)Nz): Expected shift value to be an int. Got r<rsr6rXz#shift_dtype_check..swiHNr8)rIrZrAr.rQrcr r)rrrs```r6shift_dtype_checkrxsp LL tzz*_#u||$  " "399 - a  sG $ N r8cTtd||t||tjS)NrshiftrrrGrrBrs r6 meta_rshiftsr)he,  e$C$K$K r8cTtd||t||tjS)Nlshiftrrrs r6 meta_lshiftsrrr8c8|j|jSr3rrs r6 meta_zerors >>$** %%r8c|Sr3r<rrs r6 meta_fill_rrXr8c,tj|Sr3r\rs r6 meta_fillr   D !!r8c|Sr3r<rs r6 meta_relu_rrXr8c:t||tjSrrrs r6meta__add_relurs  e$C$K$K r8c,tj|Sr3r\rnoiselowerrrrs r6meta_rrelu_with_noisers   D !!r8cVtj|tj|fSr3r\rs r6 meta_rrelu_with_noise_functionalrs%   D !5#3#3E#: ::r8c|Sr3r<)rrrrrs r6meta_rrelu_with_noise_rs  Kr8c,tj|Sr3r\rrr accumulates r6meta_index_putrrr8cFt|j|j|Sr3r\r)rrmvalues r6meta_masked_fill_rsDJJ 3 Kr8c|j|jjtj|}|Sr)rrrrAr)rrmre masked_scales r6meta__masked_scalers<>>$))+.1111$72L r8ctj|jtjtjfvdtjjjk(fdS)Ncy)NzMask must be bool or uint8r<r<r8r6rXz&meta_masked_scatter_..r`r8c<djdjS)NzEmasked_scatter: expected self and source to have same dtypes but got rr)rrZsr6rXz&meta_masked_scatter_..s"**U6<<.:r8)rIrZrQrr)rrmrZs` `r6meta_masked_scatter_rsU LL uzz5;;//1U LL fll" : Kr8ct||\}}tj|tj}t |||Sr)r&rIrrr)rrmrZrs r6meta_masked_scatterr s;"$-JD$   d%2I2I JF f 55r8c$|j|Sr3ru)rrmr7s r6meta_masked_scatter_backwardr s >>%  r8c|Sr3r<rs r6meta_index_put_rrXr8c tj|jdk(dtj|jdk(d|j}|j |d |d |d} d} ||f tj d k(xr d k( fd|r|jtj k(xs|jtj k(xr|tjk(}tj||jk(xs|d|j j|} n|j } |sUStjjdk(d tjj k( fd | S) Nr.cyr=r<r<r8r6rXz)common_meta_baddbmm_bmm..r`r8cyr?r<r<r8r6rXz)common_meta_baddbmm_bmm..r`r8rrrc .ddddddd SrDr<rFsr6rXz)common_meta_baddbmm_bmm.. s5RSURV  l<?*;2l1o=NbRr8cy)Nzfout_dtype only supported for torch.float32 output with float16/bfloat16 inputs or same as input dtypesr<r<r8r6rXz)common_meta_baddbmm_bmm..r`r8cy)Nzself must be a 3D tensorr<r<r8r6rXz)common_meta_baddbmm_bmm..r`r8c0ddjS)Nz*Expected an input tensor shape with shape z but got shape: r)rX self_baddbmmsr6rXz)common_meta_baddbmm_bmm.. s!@ M]^j^o^o^q]rsr8) rIrZryrrQrOrPrNrr)rArBis_bmmrr1rPres_rowsres_colssupported_out_dtyperrGrHrIrXs ` @@@@r6common_meta_baddbmm_bmmrs LL"$HI LL"$HI;;=L;;=L aB#AAHAHx*K LLQ2E,q/5E"E R  LLEMM ) KV\\U^^-K)5==(    % <)< | !!+.11)<!!+. l. \%%'1,.PQ    ; . s Mr8ct||dS)NTr)rr<s r6meta_bmmr&s "4t 44r8c t||d|S)NT)r1r)rr<r1s r6meta_bmm_dtyper +s "4ty IIr8ch||z}||z}|dk7r"t|dkt|dkk7r|dz}|Sr)r)rFyqrPs r6div_rtnr$0sB QA AA Av4A;$q1u+- Q Hr8ct||z|z||dz zz dz |r|dz ndz|dz}|r|dz |z||zk\r|dz}|Sr)r$) inputSize kernelSizerrrrr outputSizes r6pooling_output_shape_pad_lrr):s    *q.) *   'vzA  /     Nf $ E(9 9 !OJ r8c tj|dk7dtjdk\fdtjdz zdzdzkfdt|||S)Nrcy)Nzstride should not be zeror<r<r8r6rXz&pooling_output_shape..Vr`r8cdS)Nz'pad must be non-negative, but got pad: r<padsr6rXz&pooling_output_shape..Ws%LSE#Rr8rrcdddS)NzApad should be at most half of effective kernel size, but got pad=z, kernel_size=z and dilation=r<)rr'r.sr6rXz&pooling_output_shape..Zs'OPSuU%,nXJ @r8)rIrZr))r&r'r.rrrs `` ` r6rrUss LL1AB LLRS LL a8+a/A55  ':sC9 r8c > j} tjdkDxrdkDdtj|dkDxr|dkDdtj|dkDxr|dkDdjddk7xrjddk7}|tjk(r5tj|dk(xr|xrjddk7d nWtj|dk(xrjddk7xr|xs|dk(xr|xrjddk7fd tjdzk\xrdzk\fd tj dk\xr dk\  fd y) Nrcy)NzCkernel size should be greater than zero, but got kH: {kH}, kW: {kW}r<r<r8r6rXz$pool2d_shape_check..zr`r8cy)Nz>stride should be greater than zero, but got dH: {dH}, dW: {dW}r<r<r8r6rXz$pool2d_shape_check..~r`r8cy)Nz\dilation should be greater than zero, but got dilationH: {dilationH}, dilationW: {dilationW}r<r<r8r6rXz$pool2d_shape_check..r`r8rrrr.cy)NzExpected 4D (batch mode) tensor expected for input with channels_last layout with optional 0 dim batch size for input, but got: {input.size()}r<r<r8r6rXz$pool2d_shape_check..r`r8c*djS)NzYExpected 3D or 4D (batch mode) tensor with optional 0 dim batch size for input, but got: rrsr6rXz$pool2d_shape_check..sopupzpzp|o}~r8c ddddS)NzKpad should be smaller than or equal to half of kernel size, but got padW = z , padH = z, kW = z, kH = r<)rrrrsr6rXz$pool2d_shape_check..s&ygbT>r8c .ddddddd SNzGiven input size: (rFz). Calculated output size: (z). Output size is too smallr<)r r!rr&rrsr6rXz$pool2d_shape_check..s:%k]!K=*N$$0><.+O##r8)ryrIrZrr)rrrrrrr dilationH dilationWrr r!rrrr valid_dimsr&s``` `` ````` @r6rrds" 99;DL LL Q26U LL Q26P LLA ')a-n A!#: 1 (:J+++ AI ;* ;A!); Q  QY <5::a=A- <* A ?j?UZZ]a-? ~  LL a4+B!GtO >  LLq.\Q. # #r8rBr?rrr@rrpTpHpW dilationTr9r:rCrDrErFrGrHr=c J j}tjdkDxr dkDxrdkDfdtjdkDxr dkDxrdkDfdtj dkDxr  dkDxr dkD fdtj|dvfdt|D]:|dk(rdk(rtjj dkDfd<|r/tjk\xr k\xrk\fd tjd z k\xrd z k\xrd z k\  fd tjd k\xr d k\xrd k\fd y)NrcdddS)Nz5kernel size should be greater than zero, but got kT: z, kH: z, kW: r<)rr?rsr6rXz$pool3d_shape_check..s#$fRDrd ,r8cdddS)Nz0stride should be greater than zero, but got dT: z, dH: z, dW: r<)rr@rsr6rXz$pool3d_shape_check..s >rd&FSURV Wr8cdddS)Nz9dilation should be greater than zero, but got dilationT: z , dilationH: z , dilationW: r<)r9r?r:sr6rXz$pool3d_shape_check..s$#M)M) Vr8r9c&djS)Nz/: Expected 4D or 5D tensor for input, but got: r))rrsr6rXz$pool3d_shape_check..s7)J5;;-Xr8rcLdjdjdS)NzZ: Expected input's non-batch dimensions to have positive length, but input has a shape of z and non-batch dimension z has length zero!)rr)rrrsr6rXz$pool3d_shape_check..s3)--2[[M+EJJqM?:KMr8c .ddddddd S)Nzinput image (T: rrz ) smaller than kernel size (kT:  kH:  kW: rlr<)rDrCrErr?rsr6rXz$pool3d_shape_check..s9"5'gYd6(C$$&4uRDbT<r8rc ,dddddd S)NzHpad should be smaller than or equal to half of kernel size, but got kT: rHrGz padT: z padW: z padH: r<)rr?rr=r<r>sr6rXz$pool3d_shape_check..s6$eB4uRDt72$gbT Kr8rc:dddddddddSr8r<)rDrCrErBrGrFrHsr6rXz$pool3d_shape_check..sI!'!E7!G9AfXF((/y%'!F8L' (r8)rrIrZrr)rrBr?rrr@rrr<r=r>r?r9r:rCrDrErFrGrHrr=rrs````````````````````` @r6r>r>s0 ::D LL Q$26$b1f  LL Q$26$b1f   LLA 9)a-9IM  LL X 4[   19a  JJqMA     RK :GrM :fl    LL Q" 6a26"q&B,   LL  3v{3w!|  r8cj|j}t|||||||| | | | | |||||||||t|||dz |t|||dz |t|||dz |t|||dz |t|||dz |t|||dz |t|||dz |t|||dz |y)Nrr.rrrr>r)rrrrBr?rrr@rrr<r=r>r?r9r:rCrDrErFrGrHrrs r6max_pool3d_backward_shape_checkrMs2 ::D            +0;dQh8;dQh6;dQh8;dQh77D$(G47D$(E27D$(G47D$(F3r8c|j}t||||||||| | | ddd| | |||||dt|||dz |t|||dz |t|||dz |t|||dz |y)NrTrr.rrL)rrrBr?rrr@rrr<r=r>rCrDrErFrGrHrrs r6rPrP<s* ::D                -2;dQh8;dQh6;dQh8;dQh7r8cd}|d|\}}tjt|dvdt|dk(r||} } n |d|\} } |d|\} } |d|\} }|jd }|jd }|jd }t j |}|tj k(r)tj|jd k(d nR|tjk(r(tj|jdvdntjddt||| | | |}t||| | ||}t|||| | | | | ||||||||||fS)Nctjt|dvfd|d}t|dk(r|n|d}||fS)NrcddS)Nz max_pool2d: rr<rsr6rXzEmax_pool2d_checks_and_compute_shape..unpack..~rr8rrrrs` r6r z3max_pool2d_checks_and_compute_shape..unpack{r r8rr cy)NzOmax_pool2d: stride must either be omitted, a single int, or a tuple of two intsr<r<r8r6rXz5max_pool2d_checks_and_compute_shape..r`r8rrrrrrrrcy)NzMnon-empty 4D (batch mode) tensor expected for input with channels_last layoutr<r<r8r6rXz5max_pool2d_checks_and_compute_shape..r`r8rocy)Nz9non-empty 3D or 4D (batch mode) tensor expected for inputr<r<r8r6rXz5max_pool2d_checks_and_compute_shape..r`r8Fcy)NzAUnsupported memory format. Supports only ChannelsLast, Contiguousr<r<r8r6rXz5max_pool2d_checks_and_compute_shape..r`r8) rIrZrrrArrryrrr)rrrrrrr rrrrrrr9r:rr r!rrrs r6rrrsM; /FB LL F y a 6{aRB&)B 7+JD$!*h7Iy**R.K**R.KBJ//6M+++ IIK1  c  %11 1 IIK6 ! O   W ( Rr9iXL&z2tRIVK       $  k 11r8c| t|||||\} tjjjk(fd| j fd} | | |t j } tjjjj| S)Nc<djdjS)NzExpected dtype z for `gradOutput` but got dtype rrjsr6rXz7meta_max_pool2d_with_indices_backward..s /$**-MkN_N_M`ar8clt|dz t|dz t|dz y)Nr.rr)r)rr&rrrs r6_check_dim_sizez>meta_max_pool2d_with_indices_backward.._check_dim_sizes9q$q,7q$q,7q$q+6r8rn) rrIrZrQrrArr~rrv)rrrrrrrrrrYrr&rrrs`` @@@@r6%meta_max_pool2d_with_indices_backwardrZs , k67Hi    LL k'''a L 99D7 K G//5M ;; jj{{#  r8ct||||||\}}}|jdk(r|jdnd} tj|} |jdk(r|||g} n| |||g} t j | |j|j| t j | t j|j| fSr) rryrrArrIr~rQrvrrs r6meta_max_pool2d_with_indicesr\s , {FGXy   %yy{a/UZZ^QF//6M yy{a\;7 \;? ++<<'   ++<<'    r8c  tjjdvfdj}t|dz |D]? tjj dkDdj d dAtjt dk(d tjt |dk(d j d }j d  j d  |dk(rj d}nd}tjj j k(dtjjdk(fdj d}j d}j d tj||k\dtj||k(dtj dk( fdtj|ddzdz k fdtj|ddzdz k fdjdk(r |||d|dg} n ||d|dg} tj| j jtj| tjjfS)Nroc"djS)Nz:fractional_max_pool2d: Expected 3D or 4D tensor, but got: rrsr6rXz,meta_fractional_max_pool2d..sLTYYKXr8r.rz_fractional_max_pool2d: Expected input to have non-zero size for non-batch dimensions, but got r_z emptyrcy)NzNfractional_max_pool2d: kernel_size musteither be a single int or tuple of Intsr<r<r8r6rXz,meta_fractional_max_pool2d..#r`r8cy)NzOfractional_max_pool2d: output_size must either be a single int or tuple of Intsr<r<r8r6rXz,meta_fractional_max_pool2d..(r`r8rrrrrcy)Nz6Expect _random_samples to have the same dtype as inputr<r<r8r6rXz,meta_fractional_max_pool2d..6r`r8c"djS)Nz1Expect _random samples to have 3 dimensions got, r)random_samplessr6rXz,meta_fractional_max_pool2d..:sCNDWDWCXYr8z=Expect _random_samples.size(0) no less then input batch size.cy)Nz.Fr`r8cddS)Nz/Expect _random_samples.size(2) equals to 2 got .r<)rsr6rXz,meta_fractional_max_pool2d..Hs#RSTRUUV!Wr8cdddS)Nz%fractional_max_pool2d: kernel height rz' is too large relative to input height r<) input_heightrsr6rXz,meta_fractional_max_pool2d..Ls7 A7GGno{n|}r8cdddS)Nz$fractional_max_pool2d: kernel width rz& is too large relative to input width r<) input_widthrsr6rXz,meta_fractional_max_pool2d..Ps6{1~6FFlmxlyzr8r) rIrZrrrrrQryr~rvr) rrrXrcrinput_channels input_batchrcrrrhrjs `` ` @@@r6meta_fractional_max_pool2drns LL VX 99D 4!8T "  IIaL1 77;yy{mCSTUSVV\ ^   LL KA 2  LL KA 2 YYr]N99R=L))B-K qyiil   LL n***H LLq Y AAAAAA LL [G LL ^N LLaWX LLAQ'!+|;} LLAQ'!+{:z  xxzQ^[^[^L A A?  **;;  ++;;  r8c|tjt|dvd|d}t|dk(r|n|d}t|dk(r|n|d}tj| xs t|dvd|s|n|d} |s|nt|dk(r| n|d} |s|nt|dk(r| n|d} tjt|dvd|d} t|dk(r| n|d} t|dk(r| n|d}tjt|dvd|d}t|dk(r|n|d}t|dk(r|n|d}tj|jd vd |jd k(r|j d nd}|j d }|j d}|j d}|j d}t ||| | ||}t ||| | ||}t |||| ||}t |||||| | | | | ||||||||||d|jd k(xr&tj|tjk(}|jdk(rK|jd}|j xr |jtj}||||f}n|||||f}|j|}|j|tj}|r@|jtj}|jtj}||fS)Nr1cyNzMmax_pool3d: kernel_size must either be a single int, or a tuple of three intsr<r<r8r6rXz.meta_max_pool3d_with_indices..rr`r8rrrcyNzQmax_pool3d: stride must either be omitted, a single int, or a tuple of three intsr<r<r8r6rXz.meta_max_pool3d_with_indices..zr`r8cyNzImax_pool3d: padding must either be a single int, or a tuple of three intsr<r<r8r6rXz.meta_max_pool3d_with_indices..r`r8cyNzJmax_pool3d: dilation must be either a single int, or a tuple of three intsr<r<r8r6rXz.meta_max_pool3d_with_indices..r`r8r9cyr;r<r<r8r6rXz.meta_max_pool3d_with_indices..r`r8rrrrrzmax_pool3d_with_indices()rrr)rIrZrrrrr>rArrrrRrrr)rrrrrrr?rrr@rrr<r=r>r?r9r:rrBrCrDrErFrGrHrinput_channels_last_checkrrrs r6meta_max_pool3d_with_indicesr{fsJ LL KF"_ QB;1$+a.B;1$+a.B LL  +c&kV+cvayBc&kQ&6F1IBc&kQ&6F1IB LL G [ B7|q gajB7|q gajB LL H \ I ]a/ Xa[I ]a/ Xa[I LL fK %zzQUZZ^AFjjnG JJrNEjjnG ZZ^F BIy IE"7BB 9MG !&"b"i KF            #+2  aXE77>%BXBXX zzQ$)OOA$6!)779 9 '55006   eWf5 WeWf= //) $Cooiu{{o;Gff5#9#9f:**5+A+A*B <r8ctjt|dvd|d}t|dk(r|n|d} t|dk(r|n|d} tj| xs t|dvd|s|n|d} |s| nt|dk(r| n|d} |s| nt|dk(r| n|d} tjt|dvd|d}t|dk(r|n|d}t|dk(r|n|d}tjt|dvd|d}t|dk(r|n|d}t|dk(r|n|d}tj|jd vd |j d }|j d }|j d }|j d}|j d }|j d }|j d}t |||||| | | | | ||||||||||||d|jdk(xr&t j|tjk(}|jdk(rD|jd}|j xr |jtj}|j|j}|r |jtj}|S)Nr1cyrqr<r<r8r6rXz7meta_max_pool3d_with_indices_backward..r`r8rrrcyrsr<r<r8r6rXz7meta_max_pool3d_with_indices_backward..r`r8cyrur<r<r8r6rXz7meta_max_pool3d_with_indices_backward..r`r8cyrwr<r<r8r6rXz7meta_max_pool3d_with_indices_backward..r`r8r9cyr;r<r<r8r6rXz7meta_max_pool3d_with_indices_backward..r`r8rrrrz"max_pool3d_with_indices_backward()rrr)rIrZrrrrMrArrrrRrrr)rrrrrrrrr?rrr@rrr<r=r>r?r9r:rBrCrDrErFrGrHrrzrs r6%meta_max_pool3d_with_indices_backwardrs LL KF"_ QB;1$+a.B;1$+a.B LL  +c&kV+cvayBc&kQ&6F1IBc&kQ&6F1IB LL G [ B7|q gajB7|q gajB LL H \ I ]a/ Xa[I ]a/ Xa[I LL fK jjnG JJrNEjjnG ZZ^F   R Er"G   b !F#            ,/6  aXE77>%BXBXX zzQ$)OOA$6!)779 9 '55006   -J]]1G1G]H r8gridcztjjjk(fdtjjtjk(xrjtjk(fdtjj dj dk(fdtjj dj dz k(fdtdj D],tjj dkDfd.y) Nc<djdjS)NzNgrid_sampler(): expected input and grid to be on same device, but input is on z and grid is on rrrsr6rXz+check_grid_sampler_common..9s'\\N"24;;- Ar8c<djdjS)NzTgrid_sampler(): expected input and grid to have torch.strided layout, but input has z and grid has )rursr6rXz+check_grid_sampler_common..@s&nT[[M Cr8rc<djdjS)NzZgrid_sampler(): expected grid and input to have same batch size, but got input with sizes  and grid with sizes r)rsr6rXz+check_grid_sampler_common..Gs' % },A$** Or8rrcBdjdz djS)Nz+grid_sampler(): expected grid to have size rz, in last dimension, but got grid with sizes )rrrsr6rXz+check_grid_sampler_common..Ns,9%**q.9IJ226** ?r8c*djddS)NzYgrid_sampler(): expected input to have non-empty spatial dimensions, but input has sizes r_r`r)rssr6rXz+check_grid_sampler_common..Wrtr8)rIrZrvrurrrr)rrrs``@r6check_grid_sampler_commonr6s LL  #  LL  %F$++*F  LL A$**Q-'  LL 2%**q.( 1ejj !  KKNQ    r8ceZdZdZdZdZy)GridSamplerInterpolationrrrN)rn __module__ __qualname__BILINEARNEARESTBICUBICr<r8r6rr^sHGGr8rinterpolation_modectjjdk(xrjjk(fdtjjdk(xr|tjj k( dy)Nrc<djdjS)Nzdgrid_sampler(): expected 5D input and grid with same number of dimensions, but got input with sizes rr)rsr6rXz'check_grid_sampler_3d..gs&449KK=#DJJ< 1r8cy)Nz.rr`r8)rIrZrrrr)rrrs`` r6check_grid_sampler_3drdsp LL a3EJJ$))3  LL JJ!O M"&>&F&F&L&LL  O r8c|d}|r&tj|tj}nd}tj|tj} || fSNrr)rIrrr rrrr padding_mode align_cornersrinput_requires_gradr grad_grids r6grid_sampler_2d_backward_metarvsQ&a.%%e5;R;RS    U5L5LMI  ""r8ct||t||||jd}|jd}|jd}|jd}|jd} |j||||| fS)Nrrrr.)rrrr) rrrrrruCout_Dout_Hout_Ws r6grid_sampler_3drsveT*%'9: AA AA JJqME JJqME JJqME ??Aq%6 77r8rct||t||||d}|r&tj|tj}nd}tj |tj} || fSr)rrrIrrrrs r6grid_sampler_3d_backwardrsmeT*%'9:%a.%% !?!?    U5S5STI y  r8c|jdd}|stj|}||d<tj|g|i|S)NrQ)rPrA get_dtyperIr~)rrrCrrQs r6fullrsE JJw %E  +F7O ;;t -d -f --r8cN|tjk(rtj|dudtjd| |jn||| |j n||}|j r>|j|j|j|jn/|j|j|jd|jd|Stjj||||||}|j!d|S)Ncy)Nz9memory format option is only supported by strided tensorsr<r<r8r6rXzzeros_like..r`r8rrTr)rI sparse_coorZr~rQrv is_sparsesparse_resize_and_clear_r sparse_dim dense_dimry _coalesced_r+rrfill_)rrQrurvrwrrs r6rrs!!! T ! O kk % $**5"(.4;;f!   >>  ( ( T__.0@   ( (dhhj! D  // ! ! # " CIIaL Jr8rtc|tj}|tj}|tj}tj|||||SrrIr{get_default_devicerr~rrQrurvrwrxs r6 meta_onesrT }'') ~))+ ~ ;; E&J r8c|tj}|tj}|tj}tj|||||Srrrs r6 meta_zerosrrr8c,tj|Sr3rAclone_preserve_strides)rrryrs r6meta_select_scatterr  ' ' --r8c,tj|Sr3r)rrryrprosteps r6meta_slice_scatterrrr8 dim_post_expr wrap_scalarcv|dkr|sJd}| }|dz }||ks||kDrJd|d|d|d|dkr||z }|S)Nrrzdim z out of bounds (rkrlr<)ryrrrrs r6rr"sm{ .C ! Cc S3YR4u4DSEC5PQ)RR ' Qw } Jr8cJ|jdk(rdS|j|Srr)rrys r6ensure_nonempty_sizer.s!1 1.!''#,.r8c:tjd}tjd}tj||k(dt |D];k7s tjt t kfd=y)Nrcy)NzDIndex tensor must have the same number of dimensions as input tensorr<r<r8r6rXz$gather_shape_check..8r`r8cNddjdjdzS)Nz!Size does not match at dimension z expected index  to be no larger than self  apart from dimension r))ryrrrsr6rXz$gather_shape_check..>s7;A3>Nu{{m\/ |;QRUQVWXr8)rryrIrZrr)rryr self_dims index_dimsrs``` @r6gather_shape_checkr3sDHHJ"IUYY[!$J LLZV9  8 LL$UA.2FtQ2OOX r8cpddlm}t||j}|j dk(}|s`t j jt jk(xsjt jk(fdt|||jjS)Nrrc"djS)Nz8gather(): Expected dtype int32/int64 for index, but got rrsr6rXzmeta_gather..LsNu{{m\r8) rrrryrrIrZrQrrrrr)rryr sparse_gradr wrapped_dimis_index_emptys ` r6 meta_gatherrCsD dhhj1K#EKKMQ$67N  KK5:: % A )A \  4e4 >>%++ &&r8c|r6|dk(ry|dk(ry|dk(ry|dk(ry|d k(ry tjd d y|d k(ry|dk(rytjd dy)Nr REDUCE_ADDrREDUCE_MULTIPLYmean REDUCE_MEANamaxREDUCE_MAXIMUMaminREDUCE_MINIMUMFcy)Nz=reduce argument must be either sum, prod, mean, amax or amin.r<r<r8r6rXz#get_operator_enum..ar`r8addmultiplycy)Nz/reduce argument must be either add or multiply.r<r<r8r6rXz#get_operator_enum..ir`r8rR)reduce_use_new_optionss r6get_operator_enumrSs{ e   $    #  #  S   e   "$ UUVr8cPddlm}||jdk7rStj|j tj k(xs|j tjk(fd|1tj|j |j k(fdyy)Nr)r:cdS)Nz((): Expected dtype int32/int64 for indexr< method_namesr6rXz,scatter_gather_dtype_check..ts{m#KLr8cdS)Nz0(): Expected self.dtype to be equal to src.dtyper<rsr6rXz,scatter_gather_dtype_check..zs{m#STr8)rr:rrIrZrQrr)rrrsrc_optr:s` r6scatter_gather_dtype_checkrnsvCU[[]a'( KK5:: % A )A L  JJ'-- ' T r8ct|dSr")rrs r6ensure_nonempty_dimr~s sA;r8cddlm}|jdk(rytjt j t j k(dd}t j }t|D]'}t|}|k(r|t|kDs%d}n|s1/t|D]!}t|}|t|kDsd}nftjt j t j k(dtj| fdytj| fdy) NrrcyNzCIndex tensor must have the same number of dimensions as self tensorr<r<r8r6rXz%scatter_shape_check..r`r8FTcyrr<r<r8r6rXz%scatter_shape_check..r`r8cbdjdjddjzS)NExpected index rrz and to be no larger than src r))ryrrrsr6rXz%scatter_shape_check..s8oekk]2Mdjj\Z&se+I'--YZr8cHdjdjdzS)Nrrrr))ryrrsr6rXz%scatter_shape_check..s,oekk]2Mdjj\Z&se,-r8) rrrrIrZrryrr) rryrrris_wrong_shaperr index_d_sizes ```` r6scatter_shape_checkrsPDekkmq() LLDHHJ'+>uyy{+KKU N#DHHJ/I9 +E15 8  .tQ7 7!N   g1y! A/q9L27A>>!%     +/B599;/O O Y     Z    - r8ct||j}td|||t||||| t ||yy)Nscatter)rryrrr)rryrrrrrs r6scatter_meta_implrsE dhhj1Ky$s;k5#6'?3r8cVt||||d|j|jSNrrrrrryrrs r6meta_scatter_addrs%dCU3 >>$** %%r8c$t||||d|Srrrs r6meta_scatter_add_rsdCU3 Kr8ct|tjr|nd}t||||||j |j Sr3)rcrIr rrrrryr src_or_valuer[rs r6 meta_scatterr s;%\5<<@,dCdCV4 >>$** %%r8c`t|tjr|nd}t||||||Sr3)rcrIr rrs r6 meta_scatter_r s-%\5<<@,dCdCV4 Kr8queryrr dropout_p is_causalreturn_debug_maskrec V|jd}|jd}|jd} |jd} |jd} |jdd} tj| jdd} tj||| ftj |j }|ra| dkDrdnd}tj| |z }| dkrd}n| dkrd}tj||| |f|j|j }n,tjd|j|j }tjjrktjjrMtjd tjd }tjd tjd }nLtjdtjd }tjd tjd }| |dd| | |||f S) Nrrrr.r@r<rs)rrrIrr~rMrvrceilrQversionhiprrTrr)r rrr rrrer num_headsmax_seqlen_batch_qhead_dimmax_seqlen_batch_kquery_t attention logsumexp blocksize_c max_seqlen_k debug_maskseedoffsets r6(meta__scaled_dot_product_flash_attentionr#sAJ 1 IAzz!}H!ooa#G  )33Aq9I Y 23kk||I %]c yy!3k!AB  $L 3 &L[[ $6 E++<< [[%++ellK  }}UZZ446{{2UZZ?Ruzz&A{{Aell6BRu||FC      r8 res_shape.ctj|k(r9jdd}tj|jdd}|St gdfdd}|Dcgc]}|| }}t t|Dcgc]}|j|}}tj|jjj|}|Scc}wcc}w)Nrr)rrrr.c*j|Sr3r)idxr s r6rXz,alloc_with_matching_layout..0s%,,.*=r8Trr) rYrrrIrsortedrrrr~rQrvr) r r$rr dim_orderr'permuted_shaper final_permutes ` r6alloc_with_matching_layoutr,'s U[[Y&//!Q'w'11!Q7 J =t 5>>S)C.>>5:3y>5JK+K Kkk %++ell '-   J ?Ks * C%C* attn_biascompute_log_sumexpc |jd} |jd} |jd} |jd} |jd} | | | | f}t||}tj| | | dftj|j }tjdtj d}tjdtj d}||dd| | ||df SNrrrrrr<rsrr,rIr~rMrvr)r rrr-r.r rrrerrS_QS_KVD_Vr$r logsum_expr!r"s r6(meta__scaled_dot_product_cudnn_attentionr6;s  1 A 1 A **Q-C 88A;D **R.CAsC I $UI 6C AsAkk||J ;;rF ;D [[5::f =F         r8c |jd}|jd} |jd} |jd} |jd} || | | f} t|| }tj|| | ftj|j }tjdtj d}tjdtj d}||dd| | ||df Sr0r1)r rrr-r rrrerH_Qr2r3r4r$rr5r!r"s r65meta__scaled_dot_product_fused_attention_overrideabler9gs  1 A **Q-C **Q-C 88A;D **R.CCc"I $UI 6C C kk||J ;;rF ;D [[5::f =F         r8rar cum_seq_q cum_seq_kmax_qmax_k philox_seed philox_offsetcJtj|jddjdd}tj|jddjdd}tj|jddjdd}|||fSrr)rIrr)rar rrrrr:r;r<r=r rr>r?regrad_qgrad_kgrad_vs r6'meta__scaled_dot_product_flash_backwardrDs,  eooa3 4 > >q! DF   cmmAq1 2 < >q! DF 66 !!r8 attn_maskc |jd}|jd}|jd} tj|} tj|| |ftj|j j dd} | | fS)Nrrrr)rrIrr~rMrvr) r rrr rrErerrrrrs r60meta__scaled_dot_product_flash_attention_for_cpurGsAJ 1 IA  'I    kk||i1o  r8c ntj|jd|j|j} tj|jd|j|j} tj|jd|j|j} | | | fS)Nrrrr.r)rIempty_permutedrrQrv) rar rrrrr rrErerArBrCs r69meta__scaled_dot_product_flash_attention_for_cpu_backwardrKs& ! ! kk|| F  ! !  iizz F  ! ! kk|| F 66 !!r8 dropout_maskcd}|\||\} } ||\} } j\| j\} } } fd} fd}dk\s | kr dk\r|S| S)Ncl|jdk(r|jddfS|jdkDrxd}t|jdz D]}||j|z}|j ||j d|j d|j ddfS|d fS) Nr.rTrrrrrF)ryrrrviewr)rFrrs r6 ensure_4dzBmeta__scaled_dot_product_attention_math_for_mps..ensure_4ds 557a<;;q>4' ' UUWq[J1557Q;' )aggaj(  )66*affRj!&&*affRjI4O Oe8Or8cXjj} r|j}jf} rajdk(r|j d}||fSt jdd|jddz}|j |}||fS)Nr.rrrr)rrview_asrysqueezerrO) rattnrrmax_seq_lengthnum_headq_q_sizer  unsqueezeds r6sdpa_vector_fast_mpszMmeta__scaled_dot_product_attention_math_for_mps..sdpa_vector_fast_mpssll288$ ++e$C||Z6>JK yy{a||ADyU[["-.Aa@yy'Dyr8crd}jj}j|f}||fS)Nrr)blocksrrr head_sizerVrWrXs r6sdpa_vector_2pass_mpszNmeta__scaled_dot_product_attention_math_for_mps..sdpa_vector_2pass_mps$s>ll288$||Z669$UV L  r8iir))r rrrEr rrLrerPk_rDv_k_sizerZr^rr]rUrVrWrXrYs` @@@@@@@r6/meta__scaled_dot_product_attention_math_for_mpsrbs u%NB cNEB e EB.0hh+J&)#%88 Av~q  !! $FVO$8N$&&#%%r8c@|jdd}|jdd}|jdd}|jd}|jd} |jd} |jd} tj|| | | |j|j } tj jr&tjjr |r| nd} n|rtj| dz dznd} tj|| | ftj|j }| jdd} tjdtjd }tjdtjd }| |||fS) Nrrrrrrrr<rs)rrrIr~rQrvrrrrTrrrMr)r rrr-r.r rrerrrKvr logsumexp_dimr5r!r"s r6,meta__scaled_dot_product_efficient_attentionrf0sD OOAq !E --1 C OOAq !E 1 A 1 A 2I BB ++aIrU\\ RC }}UZZ446 0Q 2D !b&)B.!  I}%kk||J --1 C ;;rF ;D [[5::f =F  D& ((r8grad_input_maskc |jd} |jd}|jd}|jd}|jd}|jd}tj| |||fd|j|j}tj| |||fd|j|j}tj| |||fd|j|j}d}|~| dry|jd}|dzdk(r|n |dz|dzz }t |j}||d<tj ||j|j}|d d|f}||||fS) Nrrrr.rIrrrM.)rrIrJrQrvrr~)rar rrr-rrr>r?r rgrrerrr<r head_dim_vr=rArBrC grad_biaslastDimlastDimAligned new_sizess r6+meta__scaled_dot_product_efficient_backwardrn]s{(AJ 1 I JJqMEzz!}HAJ HHQKE  ! ! Yx0kk|| F  ! ! Yx0iizz F  ! ! Yz2kk|| F I!3..$$+bLA$57R<'TV,;V)* & " KK Y__Y5E5E c8G8m, 669 ,,r8ctj|}tj|}tj|}|||fSr3r\)rar rrrrr>r?r-r:r;r<r=r rrerArBrCs r6'meta__scaled_dot_product_cudnn_backwardrpsA.  e $F   c "F   e $F 66 !!r8window_size_leftwindow_size_right seqused_k alibi_slopesc||jdn|jdz }||jdn|}||jdn|}|jd}|jd}tj|}|4tj|||ftj |j }nC|jd}tj||ftj |j }| ra|dkDrdnd}tj||z }|dkrd}n|dkrd}tj||||f|j|j }n,tjd|j|j }d \}}tjjrktjjrMtjd tjd }tjd tjd }nLtjd tjd }tjd tjd }|||||fS) NrrrrrrrrNNr<rsr)rrrIrr~rMrvrrrQrrrrTrr)r rrr:r;r<r=r rrrerqrrrsrtrrrrrrrtotal_qrrr r!r"s r6meta__flash_attention_forwardrxs4#,"3A9JQ9NJ*3*;A(1(9!u 2Izz"~H  'IKK $6 7++<< **Q-KK   ELL %]c yy!3k!AB  $L 3 &L[[ $6 E++<< [[%++ellK LD& }}UZZ446{{2UZZ?Ruzz&A{{Aell6BRu||FC   r8ctj|}tj|}tj|}|||fSr3r\)rar rrrrr:r;r<r=r rr>r?rerqrr grad_querygrad_key grad_values r6meta__flash_attention_backwardr}sA0!!%(J$H!!%(J x ++r8 cu_seqlens_q cu_seqlens_k max_seqlen_qrcustom_mask_typecausal_diagonalseqlen_k window_sizec|jd}|jd}|jd}|jd}|jd}tj|||||j|j}||jddz n|}|}||J|}||n|}| rt j |dz dznd}tj|||ftj|j}tjdtjd}tjdtjd}||||||fS) Nrrrrrrr<rs) rrIr~rQrvrrrMr)r rrr/r~rrrr rr.rerrrrrrurrdrlogsumexp_batch_dimactual_max_seqlen_qactual_max_seqlen_krer5r!r"s r6!meta__efficient_attention_forwardr%s9,  1 A 1 A  A 2I BB ++aIrU\\ RC7C7O,++A.2VW'''**6*B,4F %*+b0A i7kk||J ;;rF ;D [[5::f =F  D&*=?R RRr8bias_requires_gradnum_splits_keyshared_storage_dqdkdvc|rtj|jd|jdk(dtj|jd|jdk(dtjg|jddd|jd|jd|j|j }|j d d}|j d d}|j d d }n?tj|}tj|}tj|}|z|jd}|d zdk(r|n |d z|d zz }t|j}||d<tj||j|j }|d d|f}n!tjd |j }||||fS)Nrcy)Nz,seqlen must match for `shared_storage_dqdkdvr<r<r8r6rXz4meta__efficient_attention_backward..ur`r8r.cy)Nz3embedding dim must match for `shared_storage_dqdkdvr<r<r8r6rXz4meta__efficient_attention_backward..yr`r8rrrrrrrM.r<r) rIrZrr~rQrvrrrr)rar rrr/r~rrrrr r>r?rrrerrchunkrzr{r|rkrlrmrjs r6"meta__efficient_attention_backwardrYs2 KKNciil * B   KKNciil * I  Eekk!B E EEKKO EU[[_ E++<<  \\"a( <<A&\\"a( %%e, ##C(%%e,  ))B-$+bLA$57R<'TV,;V% & " KK DKKP c8G8m, KK5<<8 xY 66r8scale_ascale_b scale_resultuse_fast_accumc d}tjjdk(xrjdk(fdtj|jxr|jfdt dk(r%d} d} d} tj| j xs| fd tj| j xs| fd tjj d d zd k(fdtjj d d zd k(xrj d d zd k(fdj\} j d jtjk(xrjtjk(xs<jtjk(xrjtjk(} jd k(rfjd k(rStjjtjk(xrjtjk(dn&| rjtjk(rd }| dz} nd}d}d}|| |}||ddz}|||z|z|||z|zjk(r_jk(rLtjjdtjjdnStjdfdn5tjjtjk(xrjtjk(dtjjdk(xrjdk(fdj d k(rtj d d k(r`j d d k(rLj d k(r8tjjxrjdntjdfd||n j}tjj d j d |jS)Nc|tjtjtjtjtj fvSr3)rIrQ float8_e5m2float8_e4m3fnuzfloat8_e5m2fnuzfloat4_e2m1fn_x2rs r6is_fp8_or_fp4_typez*meta_scaled_mm..is_fp8_or_fp4_typesA        ! !  ! !  " "    r8rcLdjdjS)Nz%Inputs must be 2D but got self.dim()=z and mat2.dim()=rr<rsr6rXz meta_scaled_mm..s'7 |CSTXT\T\T^S_`r8c<djdjS)Nz?Expected both inputs to be fp8 or fp4 types but got self.dtype=z and mat2.dtype=rrsr6rXz meta_scaled_mm..s&QRVR\R\Q]]mnrnxnxmyzr8rc,|d|dkDxr|ddk(Srr<rs r6 is_row_majorz$meta_scaled_mm..is_row_majors"!9vay(;VAY!^ ;r8c&|ddk(xr|ddkDSrr<rs r6 is_col_majorz$meta_scaled_mm..is_col_majors!9>3fQi!m 3r8cV|jddk(xs|jddk(Srr) tensor_2ds r6 has_zero_dimz$meta_scaled_mm..has_zero_dims)>>!$)CY^^A->!-C Cr8c*djS)Nz#self must be row_major, got stride rrsr6rXz meta_scaled_mm..9$++-Ir8c*djS)Nz#mat2 must be col_major, got stride rr<sr6rXz meta_scaled_mm..rr8rrMrc,djdS)NzBExpected self.size(1) to be divisible by 16, but got self.size(1)=rrrsr6rXz meta_scaled_mm..sXY]YbYbcdYeXfgr8c"djS)Nz?Expected both dimensions of mat2 to be divisible by 16 but got r)rsr6rXz meta_scaled_mm..sUVZV`V`Uabr8cy)NzNFor tensorwise scaling, both scale_a and scale_b must be float (fp32) tensors.r<r<r8r6rXz meta_scaled_mm..r`r8rrc||zdz |zSr"r<rs r6ceil_divz meta_scaled_mm..ceil_divsA a''r8rcy)Nzscale_a must be contiguousr<r<r8r6rXz meta_scaled_mm..r`r8cy)Nzscale_b must be contiguousr<r<r8r6rXz meta_scaled_mm..r`r8Fc Zddjddjd S)NzTInvalid blockwise scaling configuration. For blockwise scaling, scale_a should have  elements, got z, scale_b should have rfr)expected_a_sizeexpected_b_sizerrsr6rXz meta_scaled_mm.. sHFFUEVVefmfsfsfuevw//>.?w}}N__`br8cy)NzKFor rowwise scaling, both scale_a and scale_b must be float (fp32) tensors.r<r<r8r6rXz meta_scaled_mm..r`r8cLdjdjS)NzLFor non-tensorwise scaling, scale tensors must be 2D, but got scale_a.dim()=z and scale_b.dim()=rrrsr6rXz meta_scaled_mm..s,gY`YdYdYfXhh|nunynyn{m}~r8cy)Nz@Both scale_a and scale_b must be contiguous for rowwise scaling.r<r<r8r6rXz meta_scaled_mm..%r`r8cdddjddjddjddjdd S) Nz}Invalid scaling configuration. For tensorwise scaling, both scales should be scalar. For rowwise scaling, scale_a should be (z, 1), scale_b should be (1, z). Got scale_a.size()=(rrkrz) and scale_b.size()=(rlr)rVrrrsr6rXz meta_scaled_mm..+skCCD#Eabcade//6||A.?r',,q/ARS//6||A.?r',,q/ARRS Ur8r)rIrZryrQrrrrfloat8_e8m0fnurQrrNrRr~rv)rr<rrr/rr1rrrrr_kis_blockwise_scaling block_size_k block_size_mnr num_k_blockspadded_num_k_blocks _out_dtyperrrVrs```` @@@@r6meta_scaled_mmrs  LL  a+DHHJ!O` LL4::&I+=djj+Iz 4F" < 4 D   ' =<+= I    ' =<+= I   IIaL2  " g   IIaL2  " =tyy|b'8A'= b  2 IIaL MMU11 1 6 !5!55  MMU00 0 5 !4!44  ==?a GMMOq$8 LL .Q7==EMM3Qh "}} 3 33 " !V! M ($B 5L"*<";a"? M ::=PP M ::=PP   ?2MMO6 ))+8 ))+8   LL .Q7==EMM3Qe  LL "9w{{}'9~   Q1$LLOq(LLOq(LLOq( ))+G0E0E0G^   (3J ;;tyy|TYYq\DKK XXr8cZt|||||d|j|jSNT)rrrryrrr[rYs r6meta_scatter_reduce_twor8s)dCVTJ >>$** %%r8c(t|||||d|Srrrs r6meta_scatter_reduce__twor?sdCVTJ Kr8cttjdjcxkxrdkncfdjdk(r0tj|tjj Stjj d|tjj S)Nrrc*djS)NzAThe probability distributions dimensions must be 1 or 2, but got rrsr6rXz"meta_multinomial..JsSTYT]T]T_S`ar8rr)rIrZryr~rrvr)r num_samples replacementrs` r6meta_multinomialrEs| LL EIIK1a yy{a{{;ejjNN ;; 1 {%**U\\ r8c"d}|D]}||z} |Sr"r<)vsrPvs r6multiply_integersrSs$ A  Q Hr8cLtjtk(fddztjtk(fdtjtdddDxrtdDfddd\}}||gS)Nc&ddtS)Nz%It is expected output_size equals to , but got size r)num_spatial_dimsrXsr6rXz'upsample_common_check..]s78H7IY\]hYiXjkr8rc&ddtS)Nz$It is expected input_size equals to rr)expected_input_dimsr.sr6rXz'upsample_common_check..bs67J6K?[^_i[jZklr8c3&K|] }|dkD ywrNr<rers r6rgz(upsample_common_check..fs *aAE *c3&K|] }|dkD ywrr<rs r6rgz(upsample_common_check..fs2NQ1q52NrcddS)NzDInput and output sizes should be greater than 0, but got input size z and output size r<)r.rXsr6rXz'upsample_common_check..gs \!2;-Ar8)rIrZrr)r.rXrrchannelsrs``` @r6upsample_common_checkrZs LL K,,k+Q. LL J..l  LL *:ab> **Ns2N+2N/N A ""1~FH H +{ ++r8c4tjjdk7xstj ddfdt j |d}j |jtjS)Nrrc*djS)Nz>Non-empty 3D data tensor expected but got a tensor with sizes rrsr6rXz$upsample_nearest1d..uPQVQ[Q[Q]P^_r8rr rIrZrrrrrrrAr)rrXscalesfull_output_sizes` r6upsample_nearest1dro LL A/ QR0@A_- kA ??+ , / /11%8 0 r8ctjjdk7xstj ddfdt j |d}j |}tj}j\}}}}jjdk(r|dkrtj}|j|}|S) Nrrc*djSNz>Non-empty 4D data tensor expected but got a tensor with sizes rrsr6rXz$upsample_nearest2d..rr8rrrrr)rIrZrrrrrrArrrvrmr contiguous) rrXscales_hscales_wrrrrD n_channelss ` r6upsample_nearest2drs LL A/ QR0@A_- kA__- .F//6M ++Az1a ||F"zA~//   ]  ;F Mr8rXr.rrcXt||dtjjdk(fdt dD]2tjj k(fd4j |jtjS)Nrrrc"djSNzFExpected grad_output to be a tensor of dimension 4 but got: dimension rr~sr6rXz-upsample_nearest2d_backward..XYdYiYiXjkr8c DddddjS)NzCExpected grad_output to have the same shape as output; output.size() = z but got grad_output.size(rrrrsr6rXz-upsample_nearest2d_backward..s> !s$'7':&;,QCtK4D4DQ4G3HJr8r) rrIrZrrrrrrAr)rrXr.rrrrs` @@r6upsample_nearest2d_backwardrs-K! LLAk1X    Q #3A#6 6      , / /11+> 0 r8c4tjjdk7xstj ddfdt j |d}j |jtjS)Nrrc*djS)Nz>Non-empty 5D data tensor expected but got a tensor with sizes rrsr6rXz$upsample_nearest3d..rr8r.rrr)rrXscales_drrrs` r6upsample_nearest3drrr8ctj|tj|tj}}||t|tsJt|tsJ|j }|j } t||}t||}|j|| |j|| t||t||||fS||fS)Nr)r~r) rIrrrcrrrr!rr#) rstablery descendingrrrrr out_strides r6 meta_sortrs   D !5#3#3D #LqA g1&*---':...GG XXZ "695#GY79j1Iz2F3G4w a4Kr8c tjjdk(fdtjjjk(fdj dtjjdk(fdtjj k(fdtjjjk(fdtjjdk(fdj d zztjj k(fd tjt fd fDd y) Nrc"jdSNz != 2r) input_gatessr6rXz%rnn_cell_checkSizes..;3C3C2DE0Jr8c:jdjSN != r)) hidden_gatesrsr6rXz%rnn_cell_checkSizes..s ;$$%T,*<*<)=>r8rc"jdS)Nz != 1r) input_biassr6rXz%rnn_cell_checkSizes..sjoo5Fe3Lr8c.jdSr r) gates_sizer sr6rXz%rnn_cell_checkSizes..sz'')*$zl;r8c:jdjSr r)) hidden_biasr sr6rXz%rnn_cell_checkSizes..s z''([->->,?@r8c"jdSrr) prev_hiddensr6rXz%rnn_cell_checkSizes..rr8rc `jdjddddd S)Nr rz * z // z (aka rl)rr)expected_prev_hidden_numelfactorrrrsr6rXz%rnn_cell_checkSizes..sB;$$&'tK,<,.s(  HH ** * s#&cy)Nz%expected all inputs to be same devicer<r<r8r6rXz%rnn_cell_checkSizes.. r`r8)rIrZrrrrr)rr r rrrrrs``````@@r6rnn_cell_checkSizesrs@ LL!!Q&(JK LL\///>!!!$J Z__)+LM    * , ;      1 1 1 @  LL!!Q&(JK!,!1!1!!4z!AV!K LL99 LL  "J [I   8 r8c t||||d|tj|tj}tj|tj}tj|tj}|||fS)Nrr)rrIrr)rr cxr r workspacehycys r6_thnn_fused_lstm_cell_metarsk \:{ArR  E ?j(^*CD  __Y 'F~-z;GJ z [[5<< 0 \\* % zN2JI JBAAMoom5;;o?G 2r7J ..r8c(|r|jdn|jd}|r|jdn|jd}| }|r|||gn|||g}|j|}|"tjd|j}n|j|j}|"tjd|j}n|j|j}tjd|jtj }||||fS)Nrrrr)rrrIr~rvr)rw0w1w2w3hx_cx_rr+rLr$r& has_biasesr*r'r)r.r/output_chanelsrrrrrs r6mkldnn_rnn_layerr@Us&$/QEKKNJ#.QEKKNJ N  Z0*n 5 __Y 'F { [[5<< 0 ]]399 % { [[5<< 0 ]]399 % Aell%++FI 2r9 $$r8c|jdk(r%tjdk(xsdk(fdytj|jdk7fdy)NrrcdS)Nz4: Expected reduction dim -1 or 0 for scalar but got r<ryrsr6rXz'zero_numel_check_dims..swiSTWSXYr8cddS)Nz: Expected reduction dim z to have non-zero size.r<rCsr6rXz'zero_numel_check_dims..swi8=TUr8)rrIrr)rryrs ``r6zero_numel_check_dimsrE}sR yyA~  1H !r Y  IIcNa  U r8c|(t||j}t||ytj|j dk7fdy)NrcdS)Nz@: Expected reduction dim to be specified for input.numel() == 0.r<rsr6rXz%check_argmax_argmin..stf\]r8)rryrErIrZr)rrrys` r6check_argmax_argminrHsC S$((*-dC. JJLA  ] r8ctd||tj|j||fnd}t |||}|j |t jS)Nargmaxr)rHrAr{rr|rrIr)rryr~r{rs r6argmax_argmin_metarKsQ$,    coSF4 PD $T4 9E >>%u{{> 33r8c||tjk(rtj}tjd||||S)Nr<r)rIjaggedrr~)rrQrurvrws r6 scalar_tensorrNs5  ;; %v* r8ct||jd}|jdk(rdn|j|}tj|tj ||kdt |j}t|dkDr|||<|j||j|tjfS)NTrrrcy)Nzk not in range for dimensionr<r<r8r6rXztopk_meta..r`r8r) rryrrIrrZrrrrr)rrUrylargestr( sliceSizetopKSizes r6 topk_metarUs dhhjd ;CXXZ1_$))C.I  LLi!GHDJJH 8}q >>( #T^^HEKK^%P PPr8c| |Jd|j}|j} tj|| j| j| j S)Nz;segment_reduce(): Either lengths or offsets must be defined)rQrvru)rrIrrQrvru) r3rrkr[rfrgrhrj data_contig grad_contigs r6meta__segment_reduce_backwardrYsj  '"5E 5//#K//#K   !!!!  r8cddlm}t|jd|jdkDr|j nd}t j ||dk\||kfdt|jd|jdzdz}|r%|jdkDr|jd|j||j|t jfS)Nr)sym_andTrPrcdS)Nz9kthvalue(): selected number k out of range for dimension r<rsr6rXzkthvalue_meta..sKC5Qr8r) rr[rryrrIrZrrr#rr)rrUryr~r[dimSizers ` r6 kthvalue_metar^s> dhhjd ;C $ QdiinAG LLQW %Q DS!DJJsQwy$99 :E488:> S! >>% $..ekk."J JJr8c||n|}tj|jdk(d|j}|(tj|j|k(d|(tj|j|k(dtj|j|k(dtj|j|k(dtj|jdk(dtj|j |d|d zd zk(d y) NrcyNr<r<r8r6rXz(checkLSTMBackwardSizes..r`r8cyrar<r<r8r6rXz(checkLSTMBackwardSizes..r`r8cyrar<r<r8r6rXz(checkLSTMBackwardSizes..r`r8cyrar<r<r8r6rXz(checkLSTMBackwardSizes..r`r8cyrar<r<r8r6rXz(checkLSTMBackwardSizes..r`r8cyrar<r<r8r6rXz(checkLSTMBackwardSizes..r`r8rrrcyrar<r<r8r6rXz(checkLSTMBackwardSizes..r`r8)rIrZryrr)grad_hygrad_cyrrr defined_gradexp_sizes r6checkLSTMBackwardSizesrms%17wL LL!!#q(*5  "H W\\^x/< W\\^x/< LLh& 3 LLh& 3 LLA%z2 LL"hqkHQK&?!&CCZPr8c||yt|||||tj|t}tj|t}|r|j ddnd}|||fS)NNNNrrF)r~)rmrIrlegacy_contiguous_memory_formatr) rirjrrrhas_bias grad_gatesgrad_cxrjs r6#_thnn_fused_lstm_cell_backward_implrtsl7?7GRY?!!!@Jr1PQG4< q%0$I w ))r8cd}d}d}|dr|j|j}|ds|drQ|j|jd|jdf}|j|jd}|||fS)Nrrrrr)rrrrr grad_weightrjs r6linear_backwardrwsJKI1~!++FKKM: 1~Q",,l.?.?.CV[[QS_-UV  **<+<+.is_channels_lasts$""88=ATATTTr8c2r.tdk(rtjStjSj tjrtjSj tj rtj Syr)rrIrrrRr)rrsr6rz.meta_pixel_shuffle..pick_memory_formats| D !4 F*...***   e.E.E  F** *   e.C.C  D(( (Er8rrr)rrrr) rupscale_factorrrHrWrrrrs ` @r6meta_pixel_shuffler~ s DJJ! 2.>2Q RVW W 2$**=TUcTdeW U ) 2>N:;A B. (B B. (B-$**Sb/-1-b-"-I .. #C &&13& 4C Jr8cX|j|j}|j|j}|j|j}|j|j}|j|j}|j|j}|||||||fSr3r)rweight0weight1weight2weight3r<cx_tmprhy_cy_grad_output_r_opt grad_hy_r_opt grad_cy_r_optrrLr$r&r>r)r*r+r'rdiff_xdiff_hxdiff_cxdiff_w1diff_w2diff_bs r6mkldnn_rnn_layer_backwardr*s4__U[[ )FmmCII&Gv||,G .G .G   w}} -F 7GVVWg EEr8) out_int32rctj||rtjntjtjS)NrQr)rIrr5rr)r boundariesrrs r6meta_bucketizerMs2    &ekkEKK-- r8cdtdk(r'tjjfdtdk(r%jrt j dtjt tfdtjdkDfdtjt tfd tjt tfd tjk\d tjjj S) Nzhistc()rc$djdS)Nz%"histogram_cpu" not implemented for 'rrrsr6rXzmeta_histc..^s=ekk]!Lr8rz%_histc_cuda with floating point inputc$dtS)Nz#: argument 'bins' must be int, not rbinsrsr6rXzmeta_histc..ds7)>tDzlKr8rcdS)Nz: bins must be > 0, but got r<rsr6rXzmeta_histc..fsgY.J4&#Qr8c$dtS)Nz%: argument 'min' must be Number, not r)rrsr6rXzmeta_histc..i7)@c Lr8c$dtS)Nz%: argument 'max' must be Number, not r)rrsr6rXzmeta_histc..mrr8cy)Nz&{fn_name}: max must be larger than minr<r<r8r6rXzmeta_histc..or`r8r) rrIrZrrArrcrrr~rvrQ)rrrrrs````@r6 meta_histcrWsG5U"  # # % L 5V#(?(?(A %%&MN LL4!K LLQR LL3L LL3L LLMN ;;tELL DDr8cBtj|d}tjj dk7xs#t djddDfdj |jtjS)Nrrrc3&K|] }|dkD ywrr<)rers r6rgz,meta_upsample_bimode2d_aa..s!Ht$(!Hrrc*djSrrrsr6rXz+meta_upsample_bimode2d_aa..rr8r) rrrIrZrrrrrAr)rrXrrrrs` r6meta_upsample_bimode2d_aarss- kA LL Hc!Huzz|AB7G!HH_ ??+ , / /11%8 0 r8cTt||dtjjdk(fdt dD]0tjj k(fd2j |jtjS)Nrrrc"djSrrr~sr6rXz4meta_upsample_bimode2d_aa_backward..rr8c DddddjS)NzD Expected grad_output to have the same shape as output; output.size(rz but got grad_output_size(rrsr6rXz4meta_upsample_bimode2d_aa_backward..s@DDE3dK[\]K^J_`D!1!1!!4 59r8r) rrIrZrrrrrrAr)rrXr.rrrrrs` @@r6"meta_upsample_bimode2d_aa_backwardrs-K! LLAk1X    a $4Q$7 7 9     , / /11+> 0 r8cPtj|jdk(dtj|jdk(dtj|jjdtj|jjdy)Nrcy)Nz%found_inf must be a 1-element tensor.r<r<r8r6rXz<_amp_foreach_non_finite_check_and_unscale_..r`r8cy)Nz%inv_scale must be a 1-element tensor.r<r<r8r6rXz<_amp_foreach_non_finite_check_and_unscale_..r`r8cy)Nz!found_inf must be a float tensor.r<r<r8r6rXz<_amp_foreach_non_finite_check_and_unscale_..r`r8cy)Nz!inv_scale must be a float tensor.r<r<r8r6rXz<_amp_foreach_non_finite_check_and_unscale_..r`r8)rIrZrrQr)rr inv_scales r6*_amp_foreach_non_finite_check_and_unscale_rs| LLQ O LLQ O LL))3 LL))3r8c,tj|Sr3r\)rnanposinfneginfs r6 nan_to_numrr^r8c|jtjtjtjtj hvsJd|jd|j }t||}t||}||k(r|St|j}t|j}||||c||<||<||||c||<||<|j|||S)Nz>torch.transpose_: in-place transposition is not supported for z layout) rurIr sparse_cscr sparse_bscrrrrrr)rdim0rMndimsrrs r6rrs ;;       I U\]   IIE $ &D $ &D t|  D $++- F!'vd|F4L&,!$ZdDJT T6" Kr8c|j}|jr8|j}|j}|dkr|dk(s,Jd|d|d|j dks Jd|dt |d|dkrdSdS) NrrzEt_ expects a tensor with <= 2 sparse and 0 dense dimensions, but got z sparse and z dense dimensionsz6t_ expects a tensor with <= 2 dimensions, but self is rr)rrrrryr)rrrrs r6t_rs IIE ~~__& NN$ Q9> !l,yk9J L 1 xxzQ DUG1 M  dAEAIq 551 55r8)rrsidesorterc\tjtjdkxsjddjddk(fdtjduxsjjk(fdtj|dk7xs| d|rtjntj }t tjr&tj|tjStjd|j S) Nrrc`dtjdtjS)Nztorch.searchsorted(): boundaries tensor should be 1 dimension or the first N-1 dimensions of boundaries tensor and input value tensor must match, but we got boundaries tensor z and input value tensor rr)rsorted_sequencesr6rXz#meta_searchsorted..s8 3378M8M3N2OP""&tzz"2!3 5r8cldtjdtjSgS)Nz[torch.searchsorted(): boundary and sorter must have the same size, but got boundary tensor z and got sorter tensor r)rrsr6rXz#meta_searchsorted.. sO ##'(=(=#>"??V%+%7tFLL!@ B>@@ Br8rzetorch.searchsorted(): side and right can't be set to opposites, got side of left while right was Truerr<r) rIrZrrr5rrcr rrr~rv)rrrrrrrQs`` ` r6meta_searchsortedrs LL O ! !"a' 9  " %CR 8   LL$?///6<<?  LL #e) $ %EKK%++E$ % U-D-D  {{2U?3I3IJJr8ctjtjtjtjfvfdy)NcdS)Nz/Unsupported input type encountered for isin(): r<rsr6rXz3_check_for_unsupported_isin_dtype..$sA%Ir8)rIrZr complex128 complex64rs`r6!_check_for_unsupported_isin_dtyper!s/ LL ejj%"2"2EOODDIr8cJ|j||jdf}|S)Nrr)rr num_weightsrrrvs r6meta_embedding_dense_backwardr(s*''k6F6Fr6J(KLK r8c j| rtj||||||||| | St||||||||| | Sr3)r+_embedding_bag_sparse_backward!meta_embedding_bag_dense_backward) r3rrgrrmaximum_indicesrrrLrrrs r6meta_embedding_bag_backwardr4se22            1            r8c Ntjjtjtjtj tj fvfd|tk(rtj|duj|jdf} | S)Nc"djS)Nz$Unsupported input type encountered: r)r3sr6rXz3meta_embedding_bag_dense_backward..ns6tzzlCr8r) rIrZrQrOrPrNfloat64rrr) r3rrrrrrrLrrindex_grad_weights ` r6rr_sv LL u}}ennemmU]]SSC x _D01 TYYq\'BC r8c|jd}tj|tk(dtj|j dk(tj|j dk(|jd}tj|j dk(tj|jd|k(|j |f} | S)NrzHembedding_bag_backward: per_sample_weights only supported for mode='sum'rr)rrIrZrryr) r3r-rrgrrLrembedding_featuresrrs r6.meta_embedding_bag_per_sample_weights_backwardrvs1 LL R LLq! LL!#$,,q/K LL"# LLQ#556 ^^[N +F Mr8) assume_uniqueinvertctjt|txst|tdt|ts!tj||j }t|ts!tj||j }t |jt |jtj|tjS)Ncy)Nz.r`r8rr) rIrZrcr r$rvrrQrr)elements test_elementsrrs r6 meta_isinrs LL8V$I =&(IN h '<<1E1EF mV , ]8??K %hnn5%m&9&9:   HEJJ 77r8rctj|dk\dt|tj\}}tj ||S)Nrcy)Nz,polygamma(n, x) does not support negative n.r<r<r8r6rXz meta_polygamma..r`r8rr)rIrZrrrr)rrrDrEs r6meta_polygammarsF LLaOP( ;HHOA|   D 55r8ctd)Nz.Tensor.item() cannot be called on meta tensors)rrs r6meta_local_scalar_densers G HHr8c,tj|Sr3r\rs r6silurr^r8clt|tj\}}tj||Sr)rrrrIr)rrDrEs r6sigmoidrs3) ;HHOA|   D 55r8cR|jdk(}|jdk(}|r|r4|jd|jd|jdg}n"tj|jd|jdk(d|jd|jdg}n|r[tj|jd|jdk(d|jd|jdg}njtj|jd|jdk(d|jd|jd|jdg}|xs |j}tj j rZd|jz}|d|zdz |z|z}||k(r |d|z|dg} n|dg} tj|| ||j } | Stj|||j } | S) NrrrcyNz matrix batch sizes have to matchr<r<r8r6rXz2_create_grouped_mm_output_tensor..r`r8rcyrr<r<r8r6rXz2_create_grouped_mm_output_tensor..r`r8cy)Nzbatched dimension has to matchr<r<r8r6rXz2_create_grouped_mm_output_tensor..r`r8rMr) ryrrIrZrQrritemsizerrvr~) r:r<offsr1 mat1_is_2d mat2_is_2dr2 alignment size_paddedrrs r6 _create_grouped_mm_output_tensorrsqJqJ  ! diilDIIaLAH LL !  ! ,.X  ! diim4H  LL !  ! ,.X  ! diil3H LL !  ! ,.V  ! diilDIIbMBH'TZZI }}),,, |i/!3 AIM  #"1+ 3[!DJ%q)J!! j $++  Jkk()DKKH Jr8mat_amat_brc tjduduk(dduxrdu} | rstjjrtjntj } tjj | k(xrj | k(fdnTtjj tjk(xrj tjk(fdtjjdvxrjdvfdjdk(} jdk(} | r| s7tjjdjdk(d | rDd } d }tj| fd tj|fd d}|d|d# tjj tjk(xrj tjk(xs<j tjk(xrj tjk(fdj tjk(xrj tjk(ddfd }| r| rjdnd}|dd||dd|tj|dud| s| r}tjdufdxtjjdk(fdtjj tjk(fdntjdudtj|dudtj|duxs|tjk(dt|S) Ncy)Nz,Either both scale factors are given, or noner<r<r8r6rXz)_meta_grouped_mm_common..r`r8c>djdjdS)Nz5Expected inputs of E4M3 FP8 type but got mat_a.dtype= and mat_b.dtype=rfrrrsr6rXz)_meta_grouped_mm_common..s%KEKK=Xijojujuivvwxr8c>djdjdS)Nz1Expected inputs of BF16 type but got mat_a.dtype=rrfrrsr6rXz)_meta_grouped_mm_common..s%G }Tefkfqfqerrstr8)rr.cLdjdjS)Nz3Multiplicands must be 2D or 3D but got mat_a.dim()=z and mat_b.dim()=rrsr6rXz)_meta_grouped_mm_common.. s'Eeiik]Rcdidmdmdocpqr8rrrz3contraction dimension of mat_a and mat_b must matchcF|j}|ddkDxr|ddk(SNrrrrmat mat_strides r6rz-_meta_grouped_mm_common..is_row_majors*Jb>A%=*R.A*= =r8cF|j}|ddk(xr|ddkDSrrrs r6rz-_meta_grouped_mm_common..is_col_majors*Jb>Q&=:b>A+= =r8c0djddS)NzNExpected mat_a tensor to be row major in the last two dimensions, got strides rr)rsr6rXz)_meta_grouped_mm_common..%s#dejeqeqestvtwexdyzr8c0djddS)NzQExpected mat_b tensor to be column major in the last two dimensions, got strides rr)rsr6rXz)_meta_grouped_mm_common..)s#ghmhththvwywzh{g|}r8cjdz djz}jdz dk(rGtdjdz k\r%t j |zdk(fdydk(rJdz tdjk\r(t j dz |zdk(fdyt j dfdy)NrrMrc"ddddS)Nr stride along % dim to be multiple of 16 bytes, got rfr<end_dimmat_namer sr6rXzF_meta_grouped_mm_common..check_valid_strides..5s))H:^G9Dijtu|j}i~~Ar8c.dddz ddz dS)Nrr rr rfr<r sr6rXzF_meta_grouped_mm_common..check_valid_strides..<sK)H:^GaK=HmnxzADEzEoFnGGHIr8Fc*ddjdS)NzInvalid strides/sizes, got z for strides and z for sizes.r)rsr6rXzF_meta_grouped_mm_common..check_valid_strides..As!5j\ARSVS\S\R]]hir8)ry element_sizerrrrIrZ)r r rr r s`` @@r6check_valid_stridesz4_meta_grouped_mm_common..check_valid_strides,s'')a-#**,, ZZ\ gk "a 'Jw,?3 syy1%D -  LL7#i/14A  A %*Wq[*AS syy!F +  LL7Q;')3q8I  LLi r8rrc>djdjdS)NzhFor FP8 scales must both be float32, or for MXFP8 both scales must be float8_e8m0fnu. Got scale_a.dtype=z and scale_b.dtype=rfrrsr6rXz)_meta_grouped_mm_common..NsT~@G@M@MNNabiboboappqrr8c||zdz |z|zS)z$Rounds up x to nearest multiple of yrr<rFr"s r6round_upz)_meta_grouped_mm_common..round_upUsUQY1$) )r8rc jdk(rtjjfd r;tjjjk(fdytjjdk(fdtjjdjzk(fdytjj ddk(fd tjjdjdk(fd  rtjj j k(fd j\}}d } ||z d   |d tjjd k(xrjd k( fdytjjdk(fdtjjdjdzk(fdy)NrcddS)Nrz to be contiguous.r< scale_namesr6rXz>_meta_grouped_mm_common..check_scale..]si |3EFr8cBddjdjS)NzKFor MXFP8, scale must have same number of dimensions as target tensor, but  has mat.ndim= and scale.ndim=rr rer sr6rXz>_meta_grouped_mm_common..check_scale..fsZ"mnxmyzHILIQIQHRRbchcmcmbn!or8rc2ddjdS)Nrz to be 1D tensor, but got D tensor.rrer sr6rXz>_meta_grouped_mm_common..check_scale..k#)J<7QRWR[R[R]Q^^g hr8rcVddjzdjddS)Nrz to have rrz elements.r))r rescale_multiplierr scaled_dimsr6rXz>_meta_grouped_mm_common..check_scale..osW)J_meta_grouped_mm_common..check_scale..tsi |3[\r8cPddjddjddS)Nrz batch dimension to be r, got rfr)r sr6rXz>_meta_grouped_mm_common..check_scale..xs6i |3J399UV<.X^_d_j_jkl_m^nnopr8cBddjdjS)NzMFor MXFP8, scale should have same number of dimensions as target tensor, but r r rr sr6rXz>_meta_grouped_mm_common..check_scale..sZ"opzo{|JKNKSKSJTTdejeoeodp!qr8rrrrc Ndjddddj S)NzFor MXFP8, expected mat.shape=z to have scale shape of (,z ), but got r))G blocked_K blocked_Nr resr6rXz>_meta_grouped_mm_common..check_scale..sT"@ Kdefdgghirhsstu~t@KLQLWLWKX!Yr8c2ddjdS)Nrz to be 2D tensor, but got r rr sr6rXz>_meta_grouped_mm_common..check_scale..r r8cVddjdzdjddS)Nrz non-batch dimension to be rr% rfr))r rer r" sr6rXz>_meta_grouped_mm_common..check_scale..sT)J<7RSVS\S\]^ak]kSlRmmstyttABuCtDDE!Fr8)ryrIrZrRrrr) r rer r" r! rIrurVr) r* r+ is_mxfp8r s ````` @@@r6 check_scalez,_meta_grouped_mm_common..check_scaleYswwyA~ '')FLL swwy0o LL q(hLL A#))J*?BR*RRU  LL$)\ KKNciil2p LLEJJ.q $kkGAq!!#J (Z ;I (C 0ILL " 2Qsyy} 7QY LL q(hLL A#))A N*CCFr8rrrcy)Nz:Scale result tensor provided, but it is not supported yet.r<r<r8r6rXz)_meta_grouped_mm_common..r`r8cNdjdjdS)Nz/Offsets tensor not provided, but is needed for zD/zD multiplicand layouts.rrsr6rXz)_meta_grouped_mm_common..s*Eeiik]RTUZU^U^U`Taaxyr8c,djdS)Nz.Offsets tensor must be 1D, but got offs.dim()=rfrrsr6rXz)_meta_grouped_mm_common..sH TUVr8c$djdS)Nz7Offsets tensor must be integer (int32) tensor, but got rfrr3 sr6rXz)_meta_grouped_mm_common..sQRVR\R\Q]]^_r8cy)NzJOffsets tensor provided, but is not needed for 3D/3D multiplicand layouts.r<r<r8r6rXz)_meta_grouped_mm_common..r`r8cy)Nz2Bias tensor provided, but it is not supported yet.r<r<r8r6rXz)_meta_grouped_mm_common..r`r8cy)Nz4If output dtype provided, it must be torch.bfloat16.r<r<r8r6rXz)_meta_grouped_mm_common..r`r8r)rIrZrrrrQrQrPryrrNrrr5r)rrrrrr/rr1rscaled fp8_dtype mat_a_is_2d mat_b_is_2drrr r/ r! r. r s````` @@r6_meta_grouped_mm_commonr< sa LL Dgo.>D 8WD%8F -2]]->->E))EDWDW  KK9 $ A )A x  KK5>> ) KekkU^^.K t  LL v7%))+"7q ))+"K))+"K k JJrNejjn , A  > >    z     } 0''w2 ]]emm + N 0N  !5!55:MMU%9%99 r   MMU11 1 6 !5!55   *: z"-++DJJqMST  Iwq2BCIwq2BC D P k   y    LL aV  LL ekk)_   DL `  LL  D  LLT8Y%..8F ,E5$ JJr8c (t||dd||d|S)N)rrrr/rr1r< )rrrr/r1s r6meta_grouped_mmr? s) #      r8c *t||||||||| S)N)rrrr/rr1rr> ) rrrrrr/rr1rs r6meta_scaled_grouped_mmrA s, #    !%  r8rF half_to_floatc|r|jtjk(sJtj|tj j \}}|s|n|}tj||tj}|S)Nrr) rQrIrKrArrrBrr)rFryrB computation_dtyperErs r6softmaxrE soww%**$$$&+&>&> uDDLL'#|(5<:KL   1L@W@W XC Jr8c  tjtdzdk(fd|jt tdz} |z tj |k\ fdt dDr|}t D]td z dz z  dkr*|j   |j z} dzdksL|j d|j dzz}v|jStd }t |D]^tdzdzz  z z dzz}tj|dk\  fd|j|`tj||j|j|jt|S) Nrrc dtS)Nz1Length of pad must be even but instead it equals rr-sr6rXz'_constant_pad_nd_meta..sCCH:Nr8c(dtddS)Nz`Length of pad should be no more than twice the number of dimensions of the input. Pad length is z while the input has z dimensions.r)l_inpr.sr6rXz'_constant_pad_nd_meta.. s"225c(;P 'r8c3^K|]%}t|tjxr|dk'ywr)rcrAIntWithoutSymInt)rer/s r6rgz(_constant_pad_nd_meta..s) I:a// 0 ;Q!V ; Is+-rc Fdzdddzdzd S)NzThe input size z, plus negative padding rrzG resulted in a negative output size, which is invalid. Check dimension z of your input.r<)rrl_diffr.pad_idxsr6rXz'_constant_pad_nd_meta..&sGok&1*&=%>>V7|nE#gk"2!34117! OMr8)rQrvrxr)rIrZrrrrnarrowrrrr~rQrvrxr) rr.rl_padc_input new_shapenew_dimrrrM rI rN s ` @@@@@r6_constant_pad_nd_metarT s LL C1 N ++K  E HME U]F LL   IS IIvu% TA519q=)G7|a!..G }gmmA&6W&E7Q;!#!..Aw}}Q/?#gPQkBR/RS T}}[&)*I 5\ "c(q1uk*fqj)CL83w{;KK qL M ! " ;;kk||))+E2  r8rrrc|jdk(sJd|j}|j}|jdk(r|df}n$|jdk(r |d|df}n g||d}|j}|j ||S)Nrz'weight' must be 2-Drrr)ryrrrQr) r-rrrr weight_shape indices_shaperr1s r6 embeddingrX 5s ::<1 444 <._f_s =JJ  r8r=r$funcr` s r6_create_unary_float_meta_funcrd ^*4]  Ir8cBt|td}|S)Nc:t||tjSrr_ r s r6r` z*_create_binary_float_meta_func.._fjs q!@!M!M  r8ra rb s r6_create_binary_float_meta_funcrh ire r8ctfd}jd}||_ttt||}|S)Nc`|g|i|}t|j|j|Sr3r)rrCrrr5s r6_fnz#_register_inplace_meta.._fns.''' CII6 r8rD)rrnr=getattrr+)r5rk inplace_names` r6_register_inplace_metarn sO 2Y kk]!$LCL 4-l3 4S 9C Jr8cftjjjk(fdg}ttrQj dk7r1tjjjk(fd|j t|dtjiS)Nc<djdjS)Nrez for `end`, but got dtype r)rorpsr6rXzlerp..s/%++.H Tr8rc<djdjS)Nrez for `weight`, but got dtype r)rpr-sr6rXzlerp..s!/%++6STZT`T`Sabr8r>) rIrZrQrcrrrrGrrB)rpror-rCs``` r6lerprr s LL syy T 3.r`r8r)rIrZrAr.rQrGrrBrt s r6addcdivrz sb LL  " "7== 1 6&&w}}5    w0O0W0W r8ci}dD]}t|}|D]}||vs||||<!|jD]\}}t|tjj r,t|t sJ|jtjjj|tjj|jdr|tdvst|d|jr|jdvrd|jvrtj!|| d|jvrt"j!||3d|jvrt$j!||]d |jvrt&j!||t(j!||y) N)rs post_autograd pre_autogradCompositeImplicitAutogradrsz is a CompositeImplicitAutograd op, we shouldn't register meta function for it. Instead, we should let the decomposition run and write meta kernels for the base operators.> aten::clone aten::copy_ aten::rot90aten::_to_copyaten::empty_stridedaten::constant_pad_ndaten::as_strided_scatterzmkldnn::zmkl::zonednn::z quantized::)ritemsrcrI_opsHigherOrderOperatorrpy_impl_C DispatchKeyr-%_dispatch_has_kernel_for_dispatch_keyrris_view2_meta_lib_dont_use_me_use_register_meta_for_mkldnnimpl/_meta_lib_dont_use_me_use_register_meta_for_mkl2_meta_lib_dont_use_me_use_register_meta_for_onednn5_meta_lib_dont_use_me_use_register_meta_for_quantized'_meta_lib_dont_use_me_use_register_meta)activate_meta_tablermregistryopo op_overloadr5s r6 activate_metar s:9-d3 9C--+3C=#C( 99/4466N R k5::#A#A B +z2226 EHH00556r: 88 9 9     ; 8@@""m$;;         [--//BGG UWXK,,..?DD[RTU{//11BGG UWX+"2"2"44EJJ8<<["Mm6Nr8)Fror3)NNNFrrrrv)Tr)r)rT)FF)TT)rN)FTN)TFF)TF)r)g?N)r/str)r<rrF)r<rFTN)FrFNFr)NF)rF)g?gUUUUUU?FN)NNNNN)rNNr)NNF)FFN)Nr FFN)r FNN)Nr FNN)r FN)FN)FNNNN)NNNF)NrFNN)NNNN)rTT)NNrN)drr)r)NNNNF)rFF)r (rcollections.abcrenumr functoolsrtypingrrrr typing_extensionsr rItorch._prims_commonrrAr r r torch._decomprrrr torch._opsr torch._primsrrrrrrrrrrrrrtorch._prims_common.wrappersr r!r"r#r$rr%r&torch.fx.experimentalr'rK torch.utilsr(r9r)r*opsr+libraryLibraryr rrrrr=rGrSr\linspacelogspacerrtakerrrrrcummaxcumminrrrrrrr_fft_c2crrr_fft_r2crrandperm generator_outrrrrandintr rlow_outr randr_fft_c2rrrr r( unsqueeze_r,_sparse_semi_structured_linearr rQr9_sparse_semi_structured_mmr?_sparse_semi_structured_addmmrC_cslt_sparse_mmrX index_reducer_ index_reduce_ra index_selectresegment_reducerrr unary_outrvryrrrrrrr _assert_asyncrmsgr_printr_make_dep_tokenrr_functional_sym_constrain_rangerr(_functional_sym_constrain_range_for_sizer_functional_assert_asyncrrrrrrrr _linalg_eighrr_linalg_eigvalslinalg_eigvalsr linalg_eigrrrrrrrr linalg_inv_exrlinalg_ldl_factor_exrYr#linalg_ldl_solver2 linalg_lur8linalg_lu_factor_exr<linalg_lu_solverE lu_unpackrKrT linalg_qrr[r_r\ _linalg_svdrir/rryrlinalg_solve_triangularrrr _linalg_detrrrrreflection_pad1drreplication_pad1drrreflection_pad1d_backwardrreplication_pad1d_backwardrrreflection_pad2dr replication_pad2dr reflection_pad2d_backwardrreplication_pad2d_backwardrr"reflection_pad3dr$replication_pad3dr'reflection_pad3d_backwardreplication_pad3d_backwardr._pdist_forwardrMr2_pdist_backwardr8baddbmmrQ bernoullirT bernoulli_rWr/rZpoissonr]_fused_moving_avg_obs_fq_helperrnmmrxr|rrrmiopen_batch_normr convolutionrr _has_mkldnnr r_convolution_pointwiser_linear_pointwiserhas_mklr r _mkl_linearrr rqconv2d_pointwiseqconv_pointwiserbinaryrqlinear_pointwiser$r binary_tensorrlinear_dynamic_fp16linear_relu_dynamic_fp16rr r max_pool2drint4mm_packed_weight_cpurr avg_pool2dr"r'avg_pool2d_backwardr/ avg_pool3drIavg_pool3d_backwardrT_adaptive_avg_pool2drY_adaptive_avg_pool3dr\_adaptive_avg_pool2d_backwardrf_adaptive_avg_pool3d_backwardrkriadaptive_max_pool2dr{rradaptive_max_pool3drrrrepeat_interleaverrdrrrr _unsafe_indexrconvolution_backwardraddbmmr randint_liker _fused_adam_ _fused_adamw_r _fused_adamr_int_mmr_convert_weight_to_int4packr#_convert_weight_to_int4pack_for_cpur _weight_int4pack_mmr_weight_int4pack_mm_for_cpurr$r&rN_dyn_quant_pack_4bit_weightrZ_dyn_quant_matmul_4bitr`_weight_int8pack_mmrg_cdist_forwardr|_cdist_backwardr_embedding_bagr_embedding_bag_forward_onlyrrnansumrmedian nanmedianr dim_valuesrLrr logical_not_rrepeatrzero_rmul_Scalardiv_ logical_and_ logical_or_ logical_xor_radd_sub_rrsubrrounddecimalsrr __rshift__r __lshift__rzerorrrfillrrelu_r _add_relurrrelu_with_noiserrrelu_with_noise_functionalrrrelu_with_noise_r index_put_unsafe_index_putr masked_fill_r _masked_scalermasked_scatter_rmasked_scatterr masked_scatter_backwardr  index_put_rrbmmrr r$r)rrr>rMrPr max_pool2d_with_indices_backwardrZmax_pool2d_with_indicesr\fractional_max_pool2drnmax_pool3d_with_indicesr{ max_pool3d_with_indices_backwardrrrrgrid_sampler_2d_backwardrrrrronesrzerosrselect_scatterr slice_scatterrrrrgatherrrrrrr scatter_addr scatter_add_rrrrr[ value_reducer scatter_r #_scaled_dot_product_flash_attentionr#r,#_scaled_dot_product_cudnn_attentionr60_scaled_dot_product_fused_attention_overrideabler9,_scaled_dot_product_flash_attention_backwardrD+_scaled_dot_product_flash_attention_for_cpurG4_scaled_dot_product_flash_attention_for_cpu_backwardrK*_scaled_dot_product_attention_math_for_mpsrb'_scaled_dot_product_efficient_attentionrf0_scaled_dot_product_efficient_attention_backwardrn,_scaled_dot_product_cudnn_attention_backwardrp_flash_attention_forwardrx_flash_attention_backwardr}_efficient_attention_forwardr_efficient_attention_backwardSymIntr _scaled_mmrscatter_reducetwotwo_outrscatter_reduce_r multinomialrrrr_upsample_nearest_exact1dr_upsample_nearest_exact2dr"_upsample_nearest_exact2d_backwardr_upsample_nearest_exact3drr values_stablerr_thnn_fused_lstm_cellrr6r@rErHrJargminrKrNtopkrU_segment_reduce_backwardrYkthvaluer^rrprmrtrw pixel_shuffler~r bucketize Tensor_outrhistcr_upsample_bilinear2d_aa_upsample_bicubic2d_aar _upsample_bilinear2d_aa_backwardrrrrr searchsortedrrembedding_dense_backwardr_embedding_bag_backwardr_embedding_bag_dense_backwardr*_embedding_bag_per_sample_weights_backwardrisinr polygammar_local_scalar_denserrrrr< _grouped_mmr? _scaled_grouped_mmrA _softmaxrE constant_pad_ndrT rX _jagged_to_padded_dense_forwardr\ rd rh special_airy_aispecial_bessel_y0special_bessel_y1special_modified_bessel_i0special_modified_bessel_i1special_modified_bessel_k0special_modified_bessel_k1!special_scaled_modified_bessel_k0!special_scaled_modified_bessel_k1special_chebyshev_polynomial_tspecial_chebyshev_polynomial_uspecial_chebyshev_polynomial_vspecial_chebyshev_polynomial_w&special_shifted_chebyshev_polynomial_t&special_shifted_chebyshev_polynomial_u&special_shifted_chebyshev_polynomial_v&special_shifted_chebyshev_polynomial_wspecial_hermite_polynomial_hspecial_hermite_polynomial_hespecial_laguerre_polynomial_lspecial_legendre_polynomial_prn rr rw rz lerp_addcmul_addcdiv_torch._refs.nn.functionaltorch._refs.specialr r<r8r6r sH $55' #++ "U    <7) T]t_ yy~~*/--*?*?PV*W' %a)X 8BF#3"4hr2v6F"FG 3(* t}}-.     ==5/5p !!499==12  '3 '!!))4+<+<+@+@AB %'%C%$t%%& I'I  [[$++//4;;+>+> P Xy!"!!))4+<+<+@+@AB ICI3lV$s)4 %%t}}'8'89: K;K$s) %%t}}'8'89: 8 ;8 vt}}**+"&3,3t}}$$% **   &  $$dll&6&678  **  9&   $,,"6"678  **  9& !!499==12 %)$tPT3  %%t}}'8'89: $Dv$DDI$Dc$DC$D;$DNtzz!!"#0(t&&'(t223 "%)'+     6   c]   $ 4Bt../ (,    $ 00t112 '+     $3Dt##$""'+"-<,,-< \\-< 6 -< F  -<  $ -<  -< -<-<-<%-<`t  (() I  I  I  I LL I  I I  I* It!!))*        LL      +  t  (()'*'t""**+ !% $ $  W  W  Wf  W f  W f  W  W  W  W, WF  $(("4"456 7txx||  $(("4"456 7txx||tzz!!"6#6tzz~~(( t!!))* + t!!%%& ' t{{""# $ t##++,    )-)t''//0,1,t33;;<= t008896:6&t<<DDEF t,,0012 FC  F # N(,    !% $VSC  F $  "        C!!))4+<+<+H+HIJ ]N+sT,K"$$,,d.A.A.E.EFG BB6BHB ! ]N+ 6 ," QQFQt**+ ))F)4)F),)t""#  J JF J4 JF J$ Jt}} )6)$)6))t$$% )6)$)6)&) t&&../&T0" $$,,d.M.M.Q.QR .f.6.f..dt!!))*&+))1143L3L3P3PQR T8V$     666 !" %S&%%--t/D/D/H/HIJ   '' ''  '  'K'T&&(:(:;< S#s/3ffff>T8U=4((00$2J2J2N2NOP T8V$      666 !" %QD$$,,d.B.B.F.FGH   44 44  4  4 4I4nt~~ S#s $$ $$ $  666 !" $$PtTz!2*&&(:(:;< S#fCffn8M=4$$,,d.B.B.G.GHI V[$1 'v '%(F"G '2J '$t''(  " """ SM ")"J. . . 49d3i  .". . . 3-. 66> .(fVt$$% ##!7 7 7  7  7 V  7 7 V 7 6 7 6666 )*7&7t,,44d6R6R6V6VWX           &  Y2t$$% S#4( +( +( +( +( +(  +(  66> +()&+(^t''( ) tzz   W W W W  W  W  WWt>#;Lt$$% =&=t%%& >'>(<t--. \S/St../ \T0T2Ejt$$% =&=t%%& >'> &&.. &&11 ''// ''22  \&&:<G~t$$% =&=t%%& >'> &&.. &&11 ''// ''22  \$($(Nt""#   f    v  $  t##$ PvPVPPfPQWP%P $$dll&6&678 /0'9':&&(:(:;< &*I=I t$$%&t~~ I!I  $$dll&6&678 "9"t33;;<*=*.tww   B *7;j,,j LLj $s)S. !j49c> " j DIsN# j  j jU49c>23jZQt%%--."$,,"$ LL"$ 5<< "$5<<( "$ %,,' "$  "$!&"$"$/"$Jt''(),,) LL) ,,) I ) #Y ) 3i ))I) )))X 889>9N9N&&:6599##::BBCD,599##55==>S?S  xx:?--:O:O 66; 7 uyy}}00 1  2  :?9N9N&&:6599##55==>599##33;;<0=?0d599##55<<=>6599##55==>599##55<<=>?8599##55<<=599##55CCD!E>!F599##77??@599##<<DDE FA =BMM?D))1123H4H: ##T[[__56 ./q'7'0!!(()*'+'!!))4+=+=+E+EFG ! H 2  (()* !!+!H ~ BB*00123&889: ; (()* @+ @0012 <3 <>>?@ <A <"3"3"3"jZ0012D!)&!1D3D0++,- ;. ; (()* <+ <t""**+ &, &Ft##$ G%G*t""**+  `5,`5Ft//778595  ##T[[__56 =$=7= ##T^^%;%;<=)>)    !!     Xy!" t  (()*t{{""#'$'&tzz!!"#     !!    !!        **Z       ""DJJ$7$789:  "&&(>(>?@A&&(>(>?@Atyy  !&"& !!4::#4#4567   $))"2"234"5"tzz!!"#t~~$$% F& %%&' RV"(" 0012RV;3; &&'(KO) &&(>(>(F(FGH"I"t  ''() t!!))*+t##$ % t""# 6$6 t++,!-!t&&'(&Rtxx 5!5txx~~JJ 6 ;h#-Y Y Y Y Y Y Y Y Y Y Y YYYY Y !Y" #Y$ %Y&'Y( )Y*+Y,-Yx;4|38 383838 38 38 38 38 38 38 38 38 38 3838 38 !38"#38$ %38&'38lI2Xt44<<=(>(Vt++334    #5#Lt))112Q3Qht++, UI    d-dNt445 \b6bJ% V% 6% Pt v3$t,,445#6#$t##$ 8%8"t,,- \;'!(.!, !!"#.$.t&&'    )()X !!499==12    3( ""DJJNN34    5(t""**+.,.t!!))*.+.  C d /  t{{""# '$ '6   - b4t''(&)& t  !"     !!  &&     ""  889: #!= = = = =  =  = E?=;=@ S#X(889:#!( ( ( ( (  (  ((( E?(;(VEEFG #'#!' ' ' ' '  '  '' E?'H'T 99(""" " "  "  "  """ " """"" E?"  ". 88"&!         E?  : AA#'!!"!" !" !"  !"  !"  !"!"!"!" E?!"  !"H??@A #'%)!3& 3& 3& 3& 3&  3&  3&6"3& E?3& 66>3&B3&l<<=>!)) )) )) )) ))  )))) E?))?))X =="!4-4- 4- 4-  4-  4-  4-4-4-4-4-$Z4-4- E?4-  4-n 99*"!"" " "  "  "  """""" " """ E?!"  "0 %% "&*'+"&%)H H H H H  H  H HHHH E?HsmH }HH6"H  HV &&("&*'+#,, , ,  ,  ,  ,,, , ,,,,, E?, sm!," }#,  ,4 )) %!(,!%!%,S ,S ,S ,S 6  ,S 6" ,S 6" ,S3-,S3-,S,S,S,S E?,Sf%,Sv,S#,S  ,S^ ***"$("'%4747 47 47  47 6  47 6" 476"47,,47,,47474747474747 E?!47"SM#47$ %47  47n''() $(+/'+ _Y ,,_Y ,,_Y\\_Y\\ _Y 5<< _Y 5<<( _Y $_Y_Y*_YD##'')<)<)D)DEF &G& t##''()   (($*:*:*>*>?@   A  ,* $$d&D&D&L&LM   $$d&D&D&L&LM. ((00 //77!% $ %U\\ 123sELL012uo  uo   : $$d&D&D&L&LM       &$Nt))112  3 t&&'4/(4/nt$$,,-$%.$%N    ##T[[%8%89:4;4t!!))*+tyy  ! Q" Qt,,- LP .   %%t}}';';<= Xy! K"> K #("9"9 Qt77??@ *A *t##++, 0- 0t!!))*+>t--556F7FD%%t~~'@'@AB 27uC | EE4 !!))4+F+F+N+NO  &55==>?   @8t>>FFGH$'')<)<=> "?"uyy~~(()*8uyy~~  !6"6$t  !    -K"-K`t,,-.t++,' -' Tt1123,t>>?@.tyy 8=e 8 8 t~~ 6c66F66t''(I&I)Ityy "v"&""t|| 6&6V66'^"!+/'+ TK TK TKell #TKell # TK 6  TK 6  TK5<<(TK $TKTKnt  "!'+    6  6    $   !&''() $(#'+/'+  << <<\\\\  5<<  5<< 5<<( $*0t}}  v C    t##$ 3%3lt~~ $ 8 8 88 8  8  88,t33;;<  *  * &\ *c * *= * d223d445d445d==>d==>d==>d==>dDDEdDDEtBBCtBBCtBBCtBBCtJJKtJJKtJJKtJJKt@@AtAABtAABtAAB tyy $t|| ./ t|| ./, tyy) !$,, / !$,, /  BNJr8