~L i"!tUddlZddlZddlZddlZddlZddlmZddlmZddlm Z m Z ddlm Z m Z ddl mZmZmZmZmZddlZddlZddlmZddlmZddlmcmZddlmZm Z m!Z!ddl"m#Z#dd l$m%Z%dd lm&Z&m'Z'm(Z(m)Z)m*Z*dd l+m,Z,m-Z-m.Z.m/Z/dd l0m1Z2dd l3m4Z4ejjjlZ6gZ7e8e9e:d<ejvjxjzZ=GddeZ> ddedej~de@de@fdZAe eAej~jdZCe eAej~jZDe eAej~jdZEe eAej~jZGde!deHde!fdZIe#e=je/deDd e!d!e!fd"ZJe#e=je/deDd e!d!e!fd#ZKe#e=je/deDd e!de!d$eMd%eMfd&ZLe#e=je/deDd'e!d(eMd)eMd*eMd+e@d,e!f d-ZNe#e=jjgd.ZQe#e=jjBgd/e!fd0ZRe#e=je/eDd1e!de!fd2ZSe#e=je/deDd'e!d1e!fd3ZTe#e=je/dd'e!d1e!d4eMd5eMfd6ZUe#e=je/eDd1e!de!fd7ZVe#e=je/eDd'e!d1e!de!fd8ZWe#e=je/dd'e!d1e!d%eMfd9ZXe#e=je/deDd'e!d1e!d:eMd;e@fd<ZYe#e=je/deDdd=e!d1e!d>e9fd?ZZe#e=jeDd'e!d@e!fdAZ[e#e=je/eDd1e!de!fdBZ\e#e=je/deDd'e!d1e!de!fdCZ]e#e=jd1e!dDe!de!fdEZ^e#e=jd'e!d1e!dDe!de`e!e!ffdFZ_e#e=je/eDd'e!d1e!dGe!dHeMdIeMdJe@d;e@de!fdKZae#e=je/deDd'e!d1e!dLe!de!fdMZbdNe!dOeHfdPZcdQejfdRZee#e=je/eDe>jjfd1e!dSe!dOeHde!fdTZfe#e=je/deDd'e!d@e!dSe!dOeHfdUZie#e=jddVZke#e=je/eDe>jjdWfd1e!dSe!dOeHd$eMfdXZle#e=jjeDd'e!d1e!dSe!dOeHd$eMf dYZme#e=jjeDd'e!d1e!dSe!dOeHd$eMde!f dZZpe#e=jjeDd'e!d1e!dSe!dOeHd[eMf d\Zqe#e=jjeDd'e!d1e!dSe!dOeHd[eMde!f d]Zsd'e!d1e!dSe!dDee!dOeHd^eHd_e!de!fd`Zte#e=je/deDd'e!d1e!deHde!fdaZue#e=je/dd'e!d1e!dSe!dDee!dOeHd^eHd_e!de!fdbZve#e=je/dd'e!d1e!dSe!dDee!dOeHd^eHd_e!de!fdcZwe#e=je/eDde>jjfd1e!dSe!dDee!dOeHde!f ddZxe#e=je/deDde>jjfd'e!d1e!dSe!dDee!dOeHde!f deZye#e=je/eDe>jjfd@e!dSe!dOeHde!fdfZze#e=je/deDe>jjfd'e!d1e!dSe!dOeHde!f dgZ{e#e=je/dd@e!dhe!dieMfdjZ|e#e=je/dke!dle!de!fdmZ}e#e=je/d'e!dne8eHdeHdoeHdpeHdqeHf drZ~e#e=jjB dd1e!deHdoeeHdpeeHdqeHf dtZde!deHdoeeHdpeeHde`eHeHff duZe#e=je/ dd@e!dve!deHdoeeHdpeeHdqeHf dwZe#e=je/d'e!dne8eHdeHdxeHfdyZe#e=je/d'e!dne8eHdzeHd{eHd|eHf d}Zd'e!de!d~ejfdZe#e=j e/deCd'e!de!deHd~ejfdZe#e=je/eCd'e!de!deHd~ejfdZdZe#e=je/d@e!de8eHde8eHde8eHde8eHde!f dZe#e=je/eDd@e!de8eHde8eHde8eHde8eHde8eHde!fdZe#e=je/d'e!de!d)eMfdZe#e=je/d=e!de8eHdeHdeHdqeHde!f dZe#e=jjeD dd'e!d1e!deeMde!fdZe#e=je=jjje6j e=jjje6j"d@e!dieMdee@fdZe#e=j$e/ddd@e!dieMdee@fdZe#e=j&e/de!deHde@fdZe#e=j(e/dde!deHde@fdZe#e=j*e/ ddDe!de!deHde@de@de!f dZe#e=j,e/d'e!de!deHdeHde@f dZde8eHfdZde8e!deHdeHde8e!fdZde8e!fdZde8e!deHfdZde8e!deHdeHfdZe#e=j8je=j8jg dde8e!deHdeHdee!de!f dZe#e=j:je=j:jg dd1e!de8eHdeHdee8e!dee8e!f dZe#e=j<jBdd@e!deHdeHde`e!dffdZe#e=j>j dd@e!de8eHdeHde`e!dffdZe#e=j@jBdd1e!deHdeHde`e!dffdZe=jBjDje6j  dd1e!de!deHde`e!dffdZe#e=jHe/deDdd1e!de!de!d$eHd(eHf dZe#e=jJe/eD dd1e!de!de!d$eHd(eHde@f dZe#e=jLe/deDdd1e!de!de!d$eHd(eHf dZe#e=jNjeDd'e!d@e!de!de!dee!deHdeHdeHdeHde8e@de`ee!ee!ee!ffdĄZe#e=jNjd'e!d@e!de!de!dee!deHdeHdeHdeHde8e@dejBdejBdejBde`ee!ee!ee!ffdƄZdee!dee!fdDŽZe#e=jTjde!d@e!de8eHde!de!dDee!dee!de8e@de`ee!ee!ee!ffd˄Ze#e=jTjde!d@e!de8eHde!de!dDee!dee!de8e@dejBdejBdejBde`ee!ee!ee!ffd̄Ze#e=jXjde!d@e!de8eHde!dDee!de8e@de`ee!ee!ffd̈́Zd@e!dDee!dee!dee!dee!dJe@deMdeMde@de`e!e!e!ee!ee!ffd҄Ze#e=j\e/dddԫd@e!dDee!dee!dee!dee!dJe@deMdeMde`e!e!e!ffdՄZe=j\jje6j"e=j\jje6j d@e!dDee!dee!dee!dee!dJe@deMdeMde`e!e!e!ffdքZe=j`jje6j dde8e!fdׄZe#e=jdjd@e!dDee!dee!de!de!deMdeMde`e!e!e!ffd؄Ze#e=jfjd@e!dDee!dee!de!de!dJe@deMdeMde`e!e!e!ffdلZe#e=jfjhd@e!dDee!dee!dJe@deMdeMde`e!e!e!ffdڄZe#e=jljd@e!dDee!dee!de!de!dJe@deMdeMde`e!e!e!e!e!ffdۄZd@e!dDee!dee!de!de!deMdJe@de!fd܄Ze#e=jpjd@e!dDee!dee!de!de!deMdeMde`e!e!e!e!ffd݄Ze#e=jrjd@e!dDee!dee!de!de!deMdeMde`e!e!e!e!e!e!ffdބZe#e=jtjd@e!dDee!dee!de!de!deMdeMde`e!e!e!e!ffd߄Ze#e=jve/ddeDddZe#e=jze/ddddddddee!e'fdQeejdeej|de@de@deej~f dZe#e=je=je=jge/dZe=jjje6j"e#e=je/ddddd@e!dDe!dee!dee!dee!dJe@deMdeMfdZdZe#e=jjde!d@e!dDee!dee!dee!dee!dee!de@deMde8e@de!de`e!ee!ee!ffdZe#e=jjde!d@e!dDee!dee!dee!dee!dee!de@deMde8e@de`e!ee!ee!ffdZe#e=jjde!d@e!dDee!dee!dee!dee!dee!de@deMde8e@dejBdejBdejBde`e!ee!ee!ffdZe#e=je/dddūd@e!d'e!dDe!dee!dee!dee!dee!deMfdZe#e=je/dddūd@e!d'e!dDe!dee!dee!dee!dee!deMde!fdZe#e=je/eDd@e!de`eHeHffdZd1e)de)de8eHdeHfdZe#e=je/d1e)de)de8eHfdZe#e=je/d@e)de)de8eHde8eHde8eHf dZe#e=jdsdde)deHdxe)de)d(e'f dZe#e=je/dsdde)deHdxe)de)d(e'f dZdsdde)deHdxe)de)de@d(e'f dZe#e=jje=jjje6j ddZe#e=jde)deHdxe)de)fdZe#e=je/de)deHdxe)de)fdZde)deHdxe)de)de@f dZe#e=je/ddLeDd1e!de`e!e!ffdZe#e=je/ dde!dee@eHeMfdee@eHeMfdeejfdZe#e=jddZڐd Zېd Ze#e=jje#e=jje#e=jje=jjje6j e=jjje6j"e=jjje6j e=jjje6j"e=jjje6j e=jjje6j"d@e!dee8eHd ee8eMde!fd Ze#e=jje#e=jje#e=jje=jjje6j e=jjje6j"e=jjje6j e=jjje6j"e=jjje6j e=jjje6j"d@e!dee8eHd ee8eMde!fd ZddZe#e=jje=jjge=jjje6j e=jjje6j"e/dd dd@e!de8eHdeeMde!fdZe#e=jje=jjge=jjje6j e=jjje6j"e/dd dd@e!de8eHdeeMde!fdZe#e=jje=jjge=jjje6j e=jjje6j"e/dd dd@e!de8eHdeeMdeeMde!f dZe#e=jje=jjge=jjje6j e=jjje6j"e/dd dd@e!de8eHdeeMdeeMde!f dZe#e=jje=jjge=jjje6j e=jjje6j"e/dd dd@e!de8eHdeeMdeeMdeeMde!f dZe#e=jje=jjge=jjje6j e=jjje6j"e/dd dd@e!de8eHdeeMdeeMdeeMde!f dZeD dd@e!de8eHde8eeMde@de!f dZdZdZdZdZ dd Zd!Zd"Zdd#Zdd$Zd%Ze#e=jje=jjje6j e=jjje6j"d&Ze#e=jje=jjje6j e=jjje6j"d'Ze#e=jje=jjje6j e=jjje6j"d(Ze#e=jje=jjje6j e=jjje6j"d)Zd*Zdd+Zdd,Zd-Ze#e=jje=jjje6j e=jjje6j"d.Ze#e=jje=jjje6j e=jjje6j"d/Zd0Zd1Ze#e=jje=jjje6j e=jjje6j"d2Ze#e=jje=jjje6j e=jjje6j"d3Ze#e=jje=jjje6j e=jjje6j"d4Ze#e=jje=jjje6j e=jjje6j"d5Z e#e=jje#e=jje=jjje6j e=jjje6j"e=jjje6j e=jjje6j"e=jjje6j e=jjje6j"d6Ze#e=jje=jjge/ dd@e!de8eHd7e@deeMde!f d8Z e#e=jje=jjge=jjje6j"e/ dd@e!de8eHd7e@deeMdeeMde!f d9Z e#e=jje=jjge/ dd@e!de8eHd7e@deeMdeeMdeeMde!fd:Z dd;Zd<Zdvee!d=ee!d>e!de!fd?Zd=e*de!fd@ZeDd@e!de8eHd7e@de8eeMde!f dAZe#e=j(jdBe!dCe!de@fdDZe#e=j*e=j,ge/dEZe#e=j.gdFZe#e=j0gddGZe#e=j2gdHZe#e=j4gdIZd1e!dSe!dDee!dOeHd^eHde`e!e!ff dJZe#e=j8e/dd_d1e!dSe!dDee!dOeHd^eHde`e!e!ff dKZe#e=j:e/dd_d1e!dSe!dDee!dOeHd^eHde`e!e!ff dLZde!dMeMde!fdNZde!dMeMde!fdOZdPe!de*fdQZ dRe*dSe!de!fdTZ!dSee!de!fdUZ"dVeHd7e@dQejdej|fdWZ#dXe!dYeHdZeHd7e@fd[Z$dXe!d\eHdYeHdZeHd7e@f d]Z%dXe!de8eHd7e@fd^Z&dXe!de8eHd7e@fd_Z'e#e=jPe/eDdXe!de8eHd7e@fd`Z( ddBe!dae!dbeHdceHd7e@dde@de!fdeZ)e#e=jTe/eD ddBe!dae!dbeHdceHd7e@de!f dfZ*e#e=jVe/deDdgZ+e#e=jXe/dde>jjfdhZ,diejBdjejBdke@de@fdlZ-e=j\jje6j e=j\jje6j e/dmddndoZ.e#e=j^je=j^jge=j^jje6j"e/eD dd@e!de`eHeHfd7e@dpeeMdqeeMde!f drZ0e#e=j^je=j^jje6j e=j^jje6j"e/eD ddBe!dee`eHeHfd7e@d ee`eMeMfde!f dsZ1e#e=jde#e=jfe#e=jheDe/dBe!de`eHdfde!fdtZ5e#e=jle#e=jne#e=jpeDe/dBe!de`eHdfde!fduZ9dBe!de`eHdfdveeHeHeHge!fde!fdwZ:e#e=jve#e=jxe#e=jze/ddxZ>e#e=j~e/dydzddd{d|Z?e#e=je/ddd}d~Z@e#e=jje=jjge/dejddddpe'dQeejdejdeej|de@f dZDe#e=jjgdejddddoe'dpe'dQeejdejdeej|de@f dZFe#e%dZGe#e=je=jjje6j"e/dsdsde>jjfd@e!dSe!die'de'dDee!dOeHde!fdZHe#e=je=jjje6j"e/ddd@e!dSe!dOeHde`e!e!ffdZIe#e=jj ddddde!de!d/e!deMde@dee!d)eeMde`e!e!ffdZKdZLe#e=jge/deDddZMe#e=je/dZNe#e=jdZOe#e=jje=jjgdddd1e!dQeejdee!de!fdZQe#e=jje=jjgdd1e!deeHfdZTe#ejxjzjddZUe#e=je/ddddZVe#e=jjddd1ejBdeejdejBfdZWdddZXddddZYe#e=je/dZZe#e=jddZ[eLe=je=jeLe=je=jHeLe=je=jLeLe=je=jeLe=je=jeLe=je=jeLe=je=jeLe=je=jeLe=je=jeLe=je=jeLe=je=jeLe=je=jeLe=je=jeLe=je=jeLe=je=jeLe=je=jeLe=je=jeLe=je=jeLe=je=jeLe=je=jeLe=je=jeLe=je=jeLe=je=jeLe=je=j eLe=j e=jy(N)Iterable)Enum)partialreduce)chainproduct)AnyCallablecastOptionalUnion) sym_floatsym_intTensorregister_decomposition) out_dtype)IntLike NumberTypesuggest_memory_format TensorLikeTensorSequenceType)_maybe_convert_to_dtype_maybe_resize_out_safe_copy_out out_wrapper)_pytree)tree_map__all__ceZdZdZdZdZy) ReductionrN)__name__ __module__ __qualname__NONEMEANSUMb/mnt/ssd/data/python-lab/Trading/venv/lib/python3.12/site-packages/torch/_decomp/decompositions.pyr!r!0s D D Cr+r!Fftype_promotioncompute_dtype_onlyinclude_non_tensor_argscNtjfd}|S)Nc\ r ttjjfntf}t j |i|Dcgc]}t ||r|}}tj|d i\ fd} fd} t||it||} r|St||Scc}w)Ntype_promotion_kindcJt|tr|jS|SN isinstancerto)xcomputation_dtypes r, increase_precz0type_casts..inner..increase_precNs"!V$tt-..r+cJt|tr|jS|Sr5r6)r9 result_dtypes r, decrease_precz0type_casts..inner..decrease_precTs!!V$ttL))r+) rtorchtypes_Numberpytreearg_tree_leavesr7utilselementwise_dtypesr)argskwargs allowed_typesr9 flat_argsr;r>rr:r=r/r-r0r.s @@r,innerztype_casts..inner?s.EVU[[(( )6)  ++TJ;V1W W ' !  r+c.tj||Sr5)r? full_likeselfvalues r, fill_scalarr}s ??4 ''r+r|ctjjdk(fdtj |S)Nrc,djdS)Nz@fill only supports 0-dimension value tensor but got tensor with z dimensionsrO)r|sr,zfill_tensor..sRSXS\S\S^R__jkr+)r?_checkrOatencopyrzs `r, fill_tensorrs3 LL qk 99T5 !!r+r{cftjtj|dzdddz SNrminmaxr?clampr{s r, hardsigmoidrs) ;;u{{4!83 ;a ??r+cHtj|dkD|dkz|dzdS)Ngg@gUUUUUU?r?rirlr{s r,hardsigmoid_backwardrs0 ;; $y!  r+min_valmax_valcBtj||k||k\zd|S)Nrr)rlr{rrs r,hardtanh_backwardrs$ ;;DGO L r+c6tj||kd|Srsr)rlr{rds r,threshold_backwardrs ;;ty(![ 99r+negative_slopeself_is_resultc<tj|dkD|||zSrsr)rlr{rrs r,leaky_relu_backwardrs ;;taxkN.J KKr+grad approximatecd}d}d}|dk(ri||zdz}d}||z}||z} |||| zzz} tj| } d|z} d| z} d| z}d| | zz }|dd|z|zzz}| |z|z}|||zzS|}||zdz}ddtj||zzz}|tj||zd zz}||||zzzS) Ng;f?g;f?gmBP ?tanhrgHm?r"rg)r?rerfrh)rr{rM_SQRT2 M_SQRT1_2 M_2_SQRTPIkBetakKappax_sqx_cuberK tanh_innerleftrightleft_derivativetanh_derivativeinner_derivativeright_derivativekAlphacdfpdfs r, gelu_backwardrs'%G&I'Jf*$s*d{/0ZZ& TzJ+j:55 AF T(9$9:/14DD)99::Y&,Q4&=112eiit d 233sTCZ'((r+inputctjtj|}tj|}||zd||zz z}|||zzSr\)r?rFsoftplussigmoid)rlrinput_tanh_softplus input_sigmoidouts r, mish_backwardrsV **QZZ%67MM%(M - 1':=P'P#P QC -3 44r+c2|tj|zSr5)r?rrs r,silur!s %--% %%r+c\ddtj| zz }||zd|d|z zzzSr\)r?rh)rlr{rs r, silu_backwardr(s<1uyy$''(G  AG (<$< ==r+weightc<tj|dkD|||zSrsr)r{rs r, _prelu_kernelr0s ;;taxv} 55r+c~tj|dkD|||z}tj|dkDd||z}||fS)Nrrr)rlr{r input_grad weight_grads r,_prelu_kernel_backwardr5sE TAX{F[4HIJ++dQhTK-?@K  $$r+noiseloweruppertrainingcx|r||z dkDr|j|S||zdz }tj||||S)Ngư>r#)mulrr)rlr{rrrrrrs r,rrelu_with_noise_backwardr@sKEEMD(u%%%-1,'' ~~  r+bufferc|dk}tj|dd}tj|dd}tjtj| }||||d|zz zz zS)Nrr"rS)r?rirhabs)rlr{r in_negative max_derivsignrjs r,log_sigmoid_backwardrUsg(K KA.I ;;{Ar *D 599T?"#A )da1q5k&:: ;;r+loss reductionc|tjjk(rtj|S|tj jk(rtj |S|Sr5)r!r(r|r?meanr)sum)rrs r,apply_loss_reductionrbsHINN(((zz$ imm)) )yy r+dtypec|tjk(rtjS|tjk(rtjS|tj k(rtj Syr5)r? complex32float16 complex64float32 complex128float64rs r, to_real_dtyperksK }} %// !}} %"" "}} #r+targetc*||z dz}t||S)Nr#)r)r{rrrs r,mse_lossrzs 6Ma D i 00r+c||tjjk(rd|jz nd}|||z z|zS)N@)r!r(r|numel)rlrrrnorms r,mse_loss_backwardrs; #,y~~/C/C"C3 D 56> "[ 00r+ctj|||}|jtd}tj||d}tj |}tj |||S)N)rOrz-infTrOkeepdim)r?softmaxeqfloatall zeros_likeri)r{rOrrmasked masked_rowszeross r, safe_softmaxrs[ --#U 3C WWU6] #F))FT:K   S !E ;;{E3 //r+rgc||z j}tj||kd|dzz|z |d|zz }t||S)Nrr#)rr?rir)r{rrrcrs r,smooth_l1_lossrsO 6M   D ;;td{C$'MD$8$t:K LD i 00r+c|tjjk(rd|jz nd}||z }t j |}||z}t j ||k||z|z |t j|zSrf)r!r(r|rr?rrir) rlr{rrrcrr9abs_x norm_grads r,smooth_l1_loss_backwardr s{ "+inn.B.B!B3 D v A IIaLE{"I ;;  A EJJqM! r+cht|||||}t||jt||dSNT copy_fromcopy_to exact_dtype)r rshaper)rlr{rrrcrXresults r,smooth_l1_loss_backward_outrs3%[$ 4 PFj&,,/ FJD QQr+deltac |tjjk(rd|jz nd}||z }t j || k| |z|zt j ||kD||z|z||z|zSrf)r!r(r|rr?ri)rlr{rrrrr9s r,huber_loss_backwardrs "+inn.B.B!B3 D v A ;; UF  e# AItk1E94!8k;QR r+cht|||||}t||jt||dSr )rrrr)rlr{rrrrXrs r,huber_loss_backward_outrs3!dFIu MFj&,,/ FJD QQr+ ignore_index total_weightc|jdkrdnd}|tjjk(r||z }|j |}t j ||k7|d}t j|} t j| ||d} | j|jcxkDrdkDrnn|j |}|Nt|jD cgc]} d} } |jd| |<|j| }||z}t j ||k7|d}| |zScc} w)Nr#rr"g) rOr!r(r|rUr?rirscatterrTrreshape) rlr{rrrrr channel_dim safe_targetrXrV new_shapes r,_nll_loss_backwardr sxxzA~!1KINN(((!L0   k *F++f 4fa@K!!$'Jz; TJJ~~+//+/a/!++K8   %dhhj 121Q2 2!'a + *!F* ++f 4k1EK  ##3s* D<c|jdkDsJdtj|j|}|j|}|dzdk(s Jd|d||dz}|j |d|}|j |||}t j |}d|z |z|z|z} ||z}t j|| g|S)Nrz*glu does not support 0-dimensional tensorsr#z.Halving dimension must be even, but dimension z is size rgr)rOrDcanonicalize_dimsizenarrowr?rcat) rlr{rOwrap_dimnIn inputSize firstHalf secondHalfgradInputFirstHalfgradInputSecondHalfs r, glu_backwardr-s 88:>GGG>%%dhhj#6H ))H C 7a< 8 )C5Q<qI Ha3IXy)H MMr+c d|jcxkr dksJdJd|jdksJd|jdk(xr|jdk(}|sA|jd|jdk(s"Jd|jd|jd|jdk(s'Jd |jd |jd f|'|j|jd k(sJd |tjj k(r}|jdk(rj|jdk(r|jd|jdk(srJd|jdd|jd|jd|jdkr|jdk(sJd|jt |||||||S)Nrr#input tensor should be 1D or 2Dr";0D or 1D target tensor expected, multi-target not supportedsize mismatch (got input: , target: ):expected total_weight to be a single element tensor, got: z (z elements)rSzV\\^tzz"~=F =INN(((TXXZ1_ A%+*;*;A*>$**Q-*O EdjjQRm_U)oo/00G HYHYZ[H\G] _ O  A%+*;*;*=*B EkFWFWEX Y B T669lL r+c >|jdk(sJd|j|jdk(sJd|j|jd|jdk(r>|jd|jdk(r|jd|jdk(s!Jd|jd |j|jdk(s&Jd |jd |jd t|||||||S) NzSonly batches of spatial inputs supported (4D tensors), but got input of dimension: rzUonly batches of spatial targets supported (3D tensors) but got targets of dimension: rr#r"r1r2r4z ( z , elements))rOrrr )rlr{rrrrrs r,nll_loss2d_backwardr9Cs@ 88:? ]^b^f^f^h]ij? ::<1  _`f`j`j`l_mn  1 a( JJqMV\\!_ , JJqMV\\!_ ,I $DJJ>r+cn|j|dtffd }||dd}|||}||fS)zn Normalize start and end such that both are in the range [0, x.get_size()[dim]] and start <= end. rPcL||S|dkr|z}tt|||Srsrr)valrrdefaultdim_sizes r, clamp_wrapz(_normalize_start_end..clamp_wraps0 ;N 7.C3sE?E**r+r)rint)r9rOr`rarrs @r,_normalize_start_endrsJwws|H+#+ ua1 -E S%8 4C #:r+srcc |tj|j|}|j|}t ||||\}}t |j}||z |dz z|z||<|j |}|dk(r||k(r|dk(r|jSdg|jz}tj||j} | |z |z||<tj||jtj} |dk7rtj| | |k\} ||k7rtj| | |k} |dk7rtj| | |z |zdk(} dg|jz} d| |<| j| } t j#| t j%|| |d|S)Nr"rdevicerrrS)rDr"rtrrrlexpandclonerOr?arangeronesbool logical_andviewrri_unsafe_masked_index) rrrOr`rarbrsrc_sizeindicesidxmask mask_shapes r,reres  S 1C{{3H%eS%=JE3EKK H5[D1H-$6HSM **X C zcXo$!)yy{'+fuyy{&:G ,,x 5C%KD(GCL ::hu||5:: FD z  se|4 h  sSy1 qy  e t';q'@Auyy{"JJsO 99Z D ::dD55c4!Le TTr+indexcT|j|}tj||||Sr5)rdr?select_scatter)rlr_rOrrXs r,select_backwardrEs+&&{3J    Ke DDr+offsetdim1dim2cV|j|}tj|||||Sr5)rdr?diagonal_scatter)rlr_rrrrXs r,diagonal_backwardrLs- &&{3J  ! !*k64 NNr+ input_dtypecF|j|k7r|j|}|Sr5)rr8)rlrXrs r,_cast_grad_to_input_dtyperUs&K']];/ r+outputc~||z}||tj||dzz }t|||jSNTr)r?rr contiguous)rlrrOrnew_grad_outputrXs r,_softmax_backward_datar]sK "F*O 6EIIS$-$J %[*k J U U WWr+c~|tj|tj||dzz }t|||Sr)r?rhrr)rlrrOrrXs r,_log_softmax_backward_datarosA uyy0599d4 J %[*k JJr+c||dzz||dz zz }ttjtj|}|d||j d}|d||z|j d} || zS)z/Utility function to implement im2col and col2imr#r"rrrrS)rr?rint64rU) input_dkernel_d dilation_d padding_dstride_drblocks_d arange_kwblocks_d_indices kernel_grids r, _im2col_col2im_indices_along_dimr{sQ&x!|)DDH EKKGI!Hh7AA!DAx*4jAKKBOK k ))r+ kernel_sizedilationpaddingrmc tjtdk(dtjtdk(dtjtdk(dtjtdk(ddd}|d|d|d d |d |jt}tj|d vxrt dddDfdt dt ddDtjt dDfd|dk(}|s|jd}|j\}} } } \} } \}}\}}\}}t| |||| |j}t| |||| |j}tj|||||f}|jdjd}|dddd||f}|jdddddd}|jd}|jd}|j|| |z|z||z}|s|jd}|S)Nr#cy)Nz"im2col(): only 2D kernel supportedr*r*r+r,rzim2col..r+cy)Nz$im2col(): only 2D dilation supportedr*r*r+r,rzim2col..rr+cy)Nz#im2col(): only 2D padding supportedr*r*r+r,rzim2col..rr+cy)Nz"im2col(): only 2D stride supportedr*r*r+r,rzim2col..rr+c~|rtd|Dntd|D}tj|dy)Nc3&K|] }|dkD ywrNr*.0rHs r, z1im2col..check_positive..(Q1q5(c3&K|] }|dk\ ywrr*rs r,rz1im2col..check_positive..;RqAF;Rrcy)Nz<{param_name} should be greater {'than' zero, but got {param}r*r*r+r,rz0im2col..check_positive..rr+rr?rparam param_namestrictconds r,check_positivezim2col..check_positives3,2s(%((;RE;R8R X r+rrrFrrmrr8c3&K|] }|dk7 ywrr*rds r,rzim2col..:!qAv:rrc dtS)NzmExpected 3D or 4D (batch mode) tensor for input with possible 0 batch size and non-zero dimensions, but got: tuplersr,rzim2col..--25\N<r+c3\K|]$\}}}}}d|d|zz||dz zz dz |zz&yw)r"r#Nr*)rrpaddilkersts r,rzim2col..sF "Cc3 S1s7]SC!G_ ,q 0R 77s*,rPc3&K|] }|dkD ywrr*)rcs r,rzim2col..s 'aAE 'rc Fdtdddddddd S) Nz!Given an input with spatial size rP, kernel_size= , dilation= , padding= , stride=z9, the calculated shape of the array of sliding blocks is z*, but its components must be at least one.r)rr output_sizerrrmsr,rzim2col..sL3E%*4E3FG"m;xj9)9VH-]D Fr+r8rrSr"rT)r?rryrrrziprUrrrrpermuter#rsqueeze)rrrrrmrrt batched_input batch_dimrinput_hinput_wstride_hstride_w padding_h padding_w dilation_h dilation_wkernel_hkernel_wblocks_row_indicesblocks_col_indices padded_inputrnum_blocks_rownum_blocks_colrrs ```` @@r,im2colrs~ LL[!Q&(TU LLX!#%ST LLW"$QR LLV!#OP ; .8Z(8Yu568$ KKE u:D LL :3:uRSz:: < &) "#J;' K  LL '; '' F FAIM "/4{{,I{GWHh"Iy%J $Hh9:y(ELL::y(ELL 55Iy) LML+55b9CCBG !Q 24FF GF ^^Aq!Q1 -F',,Q/N',,Q/N ^^;)H4n~6UF " Mr+rc !"tjtdk(dtjtdk(dtjtdk(dtjtdk(dtjtdk(ddd}|d |d |d d |d|d|j"t"}tj|dvxrt d"ddD"fdddz}tj"d|zdk("fdt D cgc]"\} } } } } d| d| zz| | dz zz dz | zz$}} } } } } |d|dz!tj"d!k(!"fdtj!dkD!"fd|dk(}|s|j d}|j"\}}\}}\}}\}}\}}|j"d"d|zgz|z}|jdddddd}t||||||j}t|d}t||||||j}t Dcgc] \}}|d|zz}}}|j"d"dtzg|z}dd||f} tj|| |d}t!j"|| | | | f}|s|j%d}|Scc} } } } } wcc}}w)Nr#cy)Nzonly 2D output_size supportedr*r*r+r,rzcol2im..rr+cy)Nzonly 2D kernel supportedr*r*r+r,rzcol2im..rr+cy)Nzonly 2D dilation supportedr*r*r+r,rzcol2im..rr+cy)Nzonly 2D padding supportedr*r*r+r,rzcol2im..rr+cy)Nzonly 2D stride supportedr*r*r+r,rzcol2im..rr+Tc~|rtd|Dntd|D}tj|dy)Nc3&K|] }|dkD ywrr*rs r,rz1col2im..check_positive..rrc3&K|] }|dk\ ywrr*rs r,rz1col2im..check_positive..rrcy)Nz9{param_name} should be greater than zero, but got {param}r*r*r+r,rz0col2im..check_positive..rr+rrs r,rzcol2im..check_positives3,2s(%((;RE;R8R U r+rrrFrrmr)r#rc3&K|] }|dk7 ywrr*rs r,rzcol2im..rrrPc dtS)NzmExpected 2D or 3D (batch mode) tensor for input with possible 0 batch size and non-zero dimensions, but got: rrsr,rzcol2im..rr+rr"cdddS)Nz|Expected size of input's first non-batch dimension to be divisible by the product of kernel_size, but got input.shape[-2] = rPz and kernel_size=r*)rrsr,rzcol2im..s#==B2YKH"m%r+rSc:ddddddddd S NzGiven output_size=rrrrz , expected input.size(-1) to be but got rS.r*Lrrrrrrmsr,rzcol2im..F$[M }M:Zy &B))*9U2YKqBr+c:ddddddddd Srr*rsr,rzcol2im..rr+rr8r accumulater)r?rryrrrrUrrrrrWrdprodr_unsafe_index_putrrr)#rrrrrrmrrtprod_kernel_sizerrrrrcolrout_hout_wrrrrrrrr indices_row indices_colorHoutput_padded_sizerrrrs# ````` @@r,col2imr#s LL[!Q&(OP LL[!Q&(JK LLX!#%IJ LLW"$GH LLV!#EF ; .8Z(7Ie468$; . KKE u:D LL :3:uRSz:: < #1~ A6 LL b $$) %'* (K'    "Cc3 S1s7]SC!G_ ,q 0R 77 C  AQA LL b Q B B  LL A B B AIM " KKELE5Hh"Iy%J $Hh MM58U1X1A%AB[PSVV WE MM!Q1a +E2 xY%,,K$K3K2 xY%,,K14K0IJ1!a!e)JJ __ q58tK0014FFF {K 0C  # #FC4 # HF UU6YJ YJ K LF " Mk VKs 6'M 9Mrcz||j||zzjtj|}|SNrN)type_asrrDr)rlrrnrJs r,native_dropout_backwardr'CsB  [1E9 :AA11+> B A Hr+ input_size dimensionr#ct|dk(rtj|dStjt||}tj |||j tj}|jd||j}|jd|dzj||dz}|j|}d|z|fz}tj|||djS)NrrrSr"r5Tr)ryr? squeeze_copyrDr"rrint32unfoldflattenmovedimrdrrr) rr(r)r#rbrOrrXrs r,unfold_backwardr0Rs  :!!!$**  Z) &> uDDLL'#|  AwwyA~yy|  1c40yyU+ EIIlCF FF <( Mr+)rc|j}|r|jtjk(sJt j |tj j\}}|j|}|jdk(r|}ntj||d}||z }tjtjtj||d}||z }|s|j|}|SrD)rrr?rFrDrErGrHr8rrIr>rrh) r9rOrBr:r=shiftedrKshifted_logsumexprs r, _log_softmaxrPs Aww%**$$$&+&>&> uDDLL'#|  AwwyA~ 1c40e) %))EIIg,>T"RS ( (F <( Mr+r padding_idxscale_grad_by_freqsparsec|jdk(sJd|jdkr4|jd|}|jdk(r|jd}|S||S)Nr#z'weight' must be 2-Dr"r)rOrt index_selectr)rrrQrRrSrs r, embeddingrVsd ::<1 444 ||q!!!W- <<1 ++a.C gr+ num_weightscttj|tjj\}}|j |}t |t j}|rZ|j|f}t j|}tj||g|d}||} || jdz }t||k(|j} |j| d} |j|f|j |jdz} tj| |g| dj |S)NrETrrSr)rDrErGrHr8rr?longrdrRrrrUrWrt masked_fillr) rlrrWrQrRr:r=countsrgrad_weights_scalerr grad_weights r,embedding_dense_backwardr^s,',&>&>)N)N)V)V'#|..!23K%guzz:G""K>2w''' 4D'Q#G_!$6$@$@$DD W 3[5E5E FD  " "4 +D'' **7<<>::K  ! !+y$4 ! P S S r+c"d}|D]}||z} |Sr\r*)r9rJis r,rrs$ A  Q Hr+tensors num_chunkscZg}|D]}|j}|||zdz |z|z}|||k7r;dgdz|j|z dz zd|||z gz}tj||d}|d|t j |dgz}|j |j||S)Nr"rr#rS)r#rtrconstant_pad_ndr?Sizeappendr) rarOrbpadded_tensorstensor tensor_size pad_along_dimr view_sizes r, _pad_chunkrls N 9kkm $S)J6:zIJV K, ,#'V[[3.23 C 007C))&#q9F% J3C(DD fnnY78 9 r+cR|dj}|D]}|j|k7syy)NrFTrt)rartrhs r,have_same_ndimsros2 1:??D ;;$  r+c|djd|}|D]-}tj|jd||k(d/y)Nrcy)NzG_chunk_cat expects same sizes of 0,...,dim-1 dimensions for all tensorsr*r*r+r,rz+leading_dimension_matches..!rr+)r#r?r)rarOleading_dim_sizesrhs r,leading_dimension_matchesrssN )$3/  KKM$3 #4 4 ]  r+ctj|dk\dtjt|dkDd|dj}|dj}|D]r}tj|j dkDdtj|j|k(dtj|j|k(dtt |r(tj|dj|}nEtj|dk\d|D]&}tj||jkd (t|||S) Nr"cy)Nz&_chunk_cat expects positive num_chunksr*r*r+r,rz._preprocess_chunk_cat_inputs..*rr+rcy)Nz0_chunk_cat expects a non-empty input tensor listr*r*r+r,rz._preprocess_chunk_cat_inputs..,rr+cy)Nz#_chunk_cat expects non-empty tensorr*r*r+r,rz._preprocess_chunk_cat_inputs..1rr+cy)Nz8_chunk_cat expects all input tensors with the same dtyper*r*r+r,rz._preprocess_chunk_cat_inputs..4rr+cy)Nz8_chunk_cat expects all inputs tensors on the same devicer*r*r+r,rz._preprocess_chunk_cat_inputs..8rr+cy)NzK_chunk_cat expects non-negative dim when input tensors have different ndimsr*r*r+r,rz._preprocess_chunk_cat_inputs..?rr+cy)Nz3_chunk_cat expects dim < ndim for all input tensorsr*r*r+r,rz._preprocess_chunk_cat_inputs..Drr+) r?rryrrrrorDr"rOrtrs)rarOrbexpected_dtypeexpected_devicerhs r,_preprocess_chunk_cat_inputsr~%s-  LLq"RS LL G qTQZ%%Naj''O   V\\^a')VW LLN * N   MM_ , N  w$$WQZ^^%5s; 1H a  F LLfkk!M   gs+ Jr+rct|||}t|||}|tj||dzStj||dz||S)Nr")r)r~rlr?r%)rarOrbrrgs r, _chunk_catrJsS 'wZ @Cj9N {yyq11 .#'s3 r+ split_sizesctj|||}|.|Dcgc]"}|jtj$c}St ||D])\}}t ||jt||d+ycc}w)NrrNTr ) rsplit_with_sizesrr?rSrrrr)r{rrOrsplitssrsplits r,split_with_sizes_copyr[s " "4# " >F {HNO1e&=&=>OO f- NMFE fekk 2 UF M N Ps'B split_size.cDtjj|||Sr5)rrr)rrrOs r, unsafe_splitrns ::  UJ 44r+cDtjj|||Sr5)rrr)rrrOs r,unsafe_split_with_sizesrss  ( ( S AAr+c|j}||}|dk(r|dk(sJ|jfS||zdz |z}ddlm}||}t |Dcgc]}|}}|||z|z z |d<t j |||Scc}w)Nrr") guard_intrS)rdetachrjrrTr?r) r{rrOr_rchunksrr`rs r,rrzs**K3HQ1}} #a'J 6F@ v F',V}5!:5K5 J$7($BCKO ;;t[# ..6s Btensor_indices_or_sectionsc|jjdk(sJ|jtjk(sJ|j tj dk(xsdk(fddk(r4|j}t|tsJ|j||S|Dcgc]}|j}}|j||Scc}w)Ncpur"rcddS)Nz{tensor_split expected tensor_indices_or_sections to be a zero-dimensional or one-dimensional tensor, but got a tensor with z dimsr*) split_dimsr,rzAtensor_split_tensor_indices_or_sections_py_impl..s< is_complexrr?mm)r{rrrcrmrs r,addmmrs]  ! ! #DOO,=4yE  %((4& &C qy   r+use_geluct|||||}|r8|jrtj|dStj|Stj |S)Nr)r)ris_cudargelurelu)r{rrrcrmrrs r,_addmm_activationrsO dD$ .C <<99Sf95 599S> ! 99S>r+vecc|js&|jst|}t|}|tj||z}|dk(r|S|j dk(r||zS|||zzSrs)r>rrr?mvr)r{rrrcrmrs r,addmvrss  ! ! #DOO,=4yE  %((4% %C qy  yy{ad{  r+rrstdgammaNCHxWgroup output_maskc Rtj|||dtj||dtj|dtj|j zzk(fdtjj fk(fdtjduxsj k(fdt\} } tj| dk(fdtj||jjdg } |jjdg } d}d}d}| dr*d | zz }tj| jdj| jd}tj| jdj| jd}tj|jd jd | }n| j| jd}| j| jd}tj|jd tjd | f|j }|z|z |z|z|z|z}| z||z|zz }|jd }t|d}t|d}tj|j| |tj|j| |z|z}|j|j j!|j"}| d rk| j| | j| jd zz |jd zjdg j}| dr| jdg }|||fS)NF)allow_cpu_scalar_tensorscdzzdS)NzExpect input to have z elementsr*)rrrsr,rz,native_group_norm_backward..s'A }I>r+c.dddjS)NzExpect mean to have shape (, z , but got r)rrrsr,rz,native_group_norm_backward..s-aS5'DJJ<Pr+c<ddjSdS)NzExpect gamma to have z elements but got rS)r)rrsr,rz,native_group_norm_backward..s-'s*. s,QC/[\a[bcr+r#rrgrSr"rr8)rDcheck_same_devicecheck_same_shaper?rrrdivmodrrrrUrrrrWr8r)rlrrrrrrrrrcpg_remdsdbd_inputd_gammad_biasrds_valdb_valc1c2c3s ` ````` r,native_group_norm_backwardrs UD$ 5+N 4F LL Q$> LL q%j P LL  +!+i q% IC LL  c ; & + +Aq# 6 : :s : CB   !Q $ ( (aS ( 1B $G $G#F1~ 39   YYr5??1#56>>q%MQQRSTFYYr5??1#56>>q%MQQRSTFr" a,B ZZ5#.2215FZZ5#.2215Fr" Auc?4;;?Btmf$ ,t 3d :Q >S4Z&4-!+ + \\"  r1 % r1 % IIk))!UC=r Bii aS92> ?   //%++.11%++>1~E3'"''!UC*@4>>RTCU*UU..$%SaSS\ WQZ 1~QC Wf %%r+out2c  t|||||||||| } | | | f}t| D]2\}}| t|||jt |||d4|Sr )r enumeraterrr)rlrrrrrrrrrr;r<rrrXr`rJs r,native_group_norm_backward_outr?sz"(UD$q!S%Fd#J&!Q1 = jmQWW 5 Q 1 4 PQ r+c,||j|S|Sr5r8)r9rs r, _maybe_castr\s}ttE{ Hr+grad_outnormalized_shapebiasc@"|j}|j} tj|j""fd||||fD\} } } } | J| t |z }||d}|d|}g}g}t | D]*}||k\r|j||j|,t|}t|}ddl m }||dk(s ||dk(rN|dr|j|nd|dr|j||dnd|dr|j||dfSdfSt|| j}t|| j}| J| |z |z}| | | z}n| }||z}tj||d}tj||}tj||d}tj||}||z |z }d}d} d}!|dr||z |z}|dr0| .t |dkDrtj| |z|d} n| |z} |dr8| 6t |dkDrtj| |d}!n| j!}!t#||jt#| |jt#|!|jfS)Nc3hK|])}|!|jtjn|+ywr%)r8r?rSrr9r:s r,rz-native_layer_norm_backward..qs>9  =  e.E.EF  9s/2r)rir"r#TF)rrOrDget_computation_dtyperryrTrfrrjrirdrWr?rrrr)#rrrrrrrr input_shape input_ndim grad_out_cast input_cast weight_cast bias_castaxis inner_dims outer_dimsinner_dim_indicesouter_dim_indicesr`rMrix_hat grad_x_hatabrrrrKrd_weightrr:s# @r,native_layer_norm_backwardrcs++KJ33EKK@9E640 95M:{I  $$ $ ,- -DTU#JUd#J#%#% : ( 9  $ $Q '  $ $Q ' ( ZA ZAKQ!V$(=a1f(E,7NEOOK (3>q>EOOK. /t3>q>EOOK. /  HL  T:>>#3 4D T:>>#3 4D  !! ! $ $ &E"[0 " QA */6A :u %B 2($ /B 5" B EBJE $G!%H#F1~!8u$1~+1  !A %yy!68I5QH$u,H1~)/  !A %YY}.?GF"((*F GU[[)Hekk*FEKK( r+c t||||||||} || | f} t| D]2\} }| t| | |jt || | d4| Sr )rrrrr)rrrrrrrrr;r<rrrXr`rJs r,native_layer_norm_backward_outrsw(%)4vt[Fd#J&!Q1 = jmQWW 5 Q 1 4 PQ r+c:|j}|j}tj|j}|j |t j} |j |t j} |!|j |t jnd} | J|t|z } || d} |d| }g}g}t|D]*}|| k\r|j||j|,t| }t|}ddl m }||dk(s ||dk(r4|dr|j|nd|dr|j|| dfSdfSt|| j}| | | z}n| }d}d}| |z}|dr)t j ||z|d}|||z |zz |z}|dr0| .| |z}t|dkDrt j ||d}n|}t#||jt#||jfS)NrNrguard_or_falser"TrF)rrOrDrrr8r?rSryrTrfrrjrrdrWrr)rrrrrrrrr:rrrrrrrrr`rrrrrrrsum_vald_weight_full_shapes r,_fused_rms_norm_backwardrs[++KJ33EKK@KK)@)@ M+5;R;RSJ    #53J3J K   $$ $ ,- -DTU#JUd#J#%#% : ( 9  $ $Q '  $ $Q ' ( ZA ZADa1fQ!7,7NEOOK (3>q>EOOK. /  GK  T:>>#3 4D"[0 " $G!%H  E1~))EJ.4EtTg 55=1~+1+e3  !A %yy#):EH+H GU[[)Hekk* r+ running_mean running_varmomentum functionalc 2dgttd|jz} tj|j } |} |} |r"tj|j } |j | } tj| | dd\}}tj||z}||z |z}tj|| }tj|| }|!||zd|z |zz} |s|j| |2|j|jdz }tj|| }|||dz z z}||zd|z |zz} |s|j| n||J|j | d}|} |j | d}|} |}dtj||zz }|jj dk7r|}|}n"|j#d }|j#d }t%||jdz }t%||jdz }||z |z}|2|j'}t%||jdz }||z}|2|j'}t%||jdz }||z}|jj dk(r8|j |j }|j |j }|j |j ||| | fS) Nrr#rT)rO correctionrr")rrrr)rlrTrOrDrrr8r?var_meanrsqrtrcopy_rrrWrrrdrWr.)rrrrrrrr1rreduction_dimsr:new_running_meannew_running_var input_acc biased_varrrr save_mean save_rstdn squeezed_var unbiased_varinvstds r,native_batch_norm_helperrsS4a 566N33EKK@#!O!77 DHH#4H5  >> >a  D{{:+,$,$&MM$7 MM$7  #')3q8||6SS ""#34  "  A.A!==^DL'1A;7L&5X8TTO!!/2'K,CCC#->TJ '!nn+<4nH %ejjs!234 <<   %$II-I-I uyy{Q7"6599;?;$,&( !"6599;?;& ||~ uyy{Q7$ ||E!LLu{{L3 LLu{{L3   $  r+r save_invstdc >t||||||||d \}} } } } || | fSNFr rrrrrrrr1rrrrVs r,native_batch_normr os=*B vt\;(CQV*&FIy!Q 9i ''r+c ||tj||||||S| td| td|rtj||||||||Stj|||||||S)Nz`running_mean is None, but running_var is provided. They should both be None or both be provided.z`running_var is None, but running_mean is provided. They should both be None or both be provided.)r_native_batch_norm_legitrk$_native_batch_norm_legit_no_training)rrrrrrrr1s r,native_batch_norm_decompositionrs 3,, 648S   <   <  ,, 64{HhPS  88 64{Hc  r+cl|j|}||zdz |z}|dk(rZ|dk(rU|Dcgc]}|}}|||z|z z ||dz <tjjjj |||Stjjj j|||Scc}wNr"r)r#r?opsrrrrr)rhrrOrrrVrs r,unsafe_chunk_py_implrs{{3HV#a'F2JQ8q=+12az2 2", V0Ch0N"O FQJyy~~55==fkSVWW 99>> & & - -fj# FF3s B1c Ntjj|||||d||Sr )rrr)rrrrrrr1s r,rrs5  ( ( 0 0      r+c >t||||||||d \}} } } } || | fSr r r s r,rrs=*B vt\;(CQV*&FIy!Q 9i ''r+c >t|||dd|||d \}}}} } |||fSr r ) rrrrrr1rrrrVs r,!_native_batch_norm_legit_no_statsrs<*B vtT48S%*&FIy!Q 9i ''r+c ft||||||||d \}} } } } | Jd| Jd|| | | | fS)NT#new_running_mean should not be None"new_running_var should not be Noner ) rrrrrrrr1rrrrrs r,#_native_batch_norm_legit_functionalrsl" ! vt\;(CQU    'N)NN '  &L(LL & 9i)9? JJr+c Ttjj|||||d|}d}|tjjjk(r tjj ||}tj |tj|j|jS)a Return a reserve tensor for batch norm, used only by cudnn to pass forward state to the backward pass. This is needed for `_batch_norm_with_update` and `_batch_norm_no_update`, which support a variety of backends including cudnn. We create this tensor here to get the correct shape in the traced graph if we detect that will call the cudnn kernel, and rely on DCE to avoid materializing this tensor. Tr)rlayoutr) r?_C_select_batch_norm_backend_BatchNormBackendCudnn(_get_cudnn_batch_norm_reserve_space_sizeemptyuint8rr) rrrrrr1rbackend reserve_sizes r,_get_batch_norm_reserve_tensorr( s hh11 vt\;cGL%((,,222xxHH 8  ;;EKK U\\ r+c dt|||||d||d \}}} } } t||||||d} ||| | fS)NTFrrr( rrrrrrr1rrrrVreserves r,_batch_norm_with_updater.(sa*B      *&FIy!Q- vt\;dG 9i 00r+c t|||||d||d \}}} } } t||||||d} | Jd| Jd||| | | | fS)NTr*rrr+) rrrrrrr1rrrnew_rmnew_rvr-s r,"_batch_norm_with_update_functionalr2Cs ! vt\;hT  - vt\;dG  DDD   CCC  Iy'66 BBr+c dt|||||d||d \}}} } } t||||||d} ||| | fS)NFr*r+r,s r,_batch_norm_no_updater4^sa*B      *&FIy!Q- vt\;eG 9i 00r+c|Jtj||kjtj}|j ||zd|z z}||fS)Nrrg)r?r?r8r%r&)rrH generatorrrAs r,_fused_dropout_decompositionr7ys[    OOE "Q & * * * =D ,,u  %q 1C ;r+)rrr pin_memory non_blockingrOrr8r9rOc|r|tjk(sJd|rJdt|tjtt t tfsJ|0|.|,t|tjr|jS|Sd}t|tjr|}ntj|}|c||jk7rT|1|jdk(r"tjj||}d}tjj|||}|$|s"tjj||}d}|tj||S|S)NTODOFrTrN)r?stridedr7rrrrcomplexr scalar_tensorrr_primsconvert_element_type device_put) r9rrrr8r9rOdtype_convertedx_tensors r,_to_copyrDs25==08&8 0!6!> a%,,UD'B CC C ~%-M,A a &779 HO!U\\"&&q) f7  !5||885IH"O<<**8V\J <<44XuE {{8=AA Or+c,tj|Sr5)ralias)r9s r,nop_decompositionrGs ::a=r+out3exponential_average_factorepsilonc tj||||||||\}} } |r%|| | |jdtjfS||jd|jd|jdtjfS)Nrr)rr rdr?r%) rrrrrrrIrJrrrs r,cudnn_batch_normrLs$$  " GAq!1aU[[ABB  EKK0  r+ct|D]>\}}|dk(s ||jkr|j||k(r.|j|}@|Sr\)rrtrrU)r9broadcast_maskrrs r,_broadcast_batch_norm_backwardrOsO/" d 19dQVVm 0E D!A" Hr+r-c *t|||||||||| Sr5)native_batch_norm_backward) rrrrrrrr7r1rr-s r,batch_norm_backwardrRs/ &     r+c &|j} | |j} n| } tj|j&&fd|||||||fD\} } }}}}}|j}|j }|dk\sJdd}t t |||z }|}|}|r||"J||J|}tj||z}dg|z}||||<g}t|D]}||k7s |j|t||}d|z }tj| |}tj| | |z z|}t||z|}ttj||z||z|} |t||dz}!nt||z|}!|r| |z | z}"| |"z |z |!z}#n| |!z}#| dr||z}$nd}$| dr|}%nd}%|#j| t|$| t|%| fS)Nc3HK|]}||jn|ywr5rrs r,rz-native_batch_norm_backward.. s,  $%= a7 s"r#z$rank of the input must be at least 2r"rg)rrDrrrOrrlr?rrTrfrOrrr8r)'rrrrrrrr7r1rr weight_dtyperrrrunning_mean_castrunning_var_castsave_mean_castsave_invstd_castr input_rankr num_featuresrrrNreduction_axesr`rgrad_output_sumdot_p grad_mean proj_scale grad_scaleprojrXr] grad_biasr:s' @r,rQrQsx++K || " 33EKK@          ++KJ ?BBB? D[)*[->>L D F F$666!,1A1MMM -34!"j 0N&t,N4 "N : % 9  ! !! $% *$ ?D  Dii ~>O IImzD'89> JE./E~VI/ %$,0J 3FNKcQ 3 [ .  T!Z/$t+y8JF "Z/ 1~fn  1~#    k"K.I|, r+c  t|||||||||| } | | | f}t| D]2\}}| t|||jt |||d4|Sr )rQrrrr)rrrrrrrr7r1rr;r<rrrXr`rJs r,native_batch_norm_backward_outreg s"(    Fd#J&!Q1 = jmQWW 5 Q 1 4 PQ r+save_varc Btj|||||||d|gd SNT)TTTrrQ)rrlrrrrrfrJs r,miopen_batch_norm_backwardrj s5  * *    r+ reserveSpacec Btj|||||||d|gd Srhri) rrlrrrrrfrJrks r,cudnn_batch_norm_backwardrm s5  * *    r+c|j|jttjdvfd|jddD]}tj|dk7fd d|dzdk(rxd|dzdk(rjt dt dd|D}t dt dd||D}tjjj|||Sd d fd }|d|d\}}}} |d|d\} } } } |d t|d | f}| s| stj|dSd}||||| d\}}||| | | d\}} d}tt|jdt|jdD]!\}}| |d |dd|f}||d |dd|fz}#||| zz S)NrcdS)Nz9adaptive_avg_pool2d(): Expected 3D or 4D tensor, but got r*rnsr,rz%adaptive_avg_pool2d.. sKD6Rr+rPrc"dtdS)Nzjadaptive_avg_pool2d(): Expected input to have non-zero size for non-batch dimensions, but input has shape rrrsr,rz%adaptive_avg_pool2d.. s99>uaIr+rSc3,K|] \}}||zywr5r*)rr`r!s r,rz&adaptive_avg_pool2d.. sG$!QqAvGc3:K|]\}}}||dz |zz ywr"Nr*)rr`r!rs r,rz&adaptive_avg_pool2d.. s' '1aAQ! O sc8tj||z|dS)Ntrunc rounding_moder?divrrrs r, start_indexz(adaptive_avg_pool2d..start_index syyQ99r+cJtj|dz|z|zdz |dS)Nr"rvrwryr{s r, end_indexz&adaptive_avg_pool2d..end_index s&yy!a%1q1,awGGr+ctj| tj}|||}||zdz}||z}|dk(xs||zdk( }|r|dz }n |dk(r|dz}tj| tj}|jd|z}|rUtj|dz |j |j } tj|| } |||} | |z } n|} || ||fS)Nrr"rrSr)r?rrrUr>rrminimum)in_sizeout_sizeorangei0 maxlength in_size_modadaptive range_maxrmaxvali1lengthrr~r|s r, compute_idxz(adaptive_avg_pool2d..compute_idx s hvU[[I 7 3x'!+ ( #q(GH{,Ba,GH  NI A  NILL6M ll2* ((! 399SZZF--V,C68W5B"WFFFIx//r+.r8)rrSrct|tr||fS|dksJ||jdk\}|dk(r t|d}t j ||d}t|| }||fS)NrrSrPr8r)r7rrUrWr?rZ)valsrrrrOrs r, maybe_maskz'adaptive_avg_pool2d..maybe_mask sw fg &< 7N7 0 0 44Dby(q1$$T45D&vt4F< r+)rrOr)rrryr?rrrnnr avg_pool2drWrrrT)rrrrmkernelridxhlength_h range_max_h adaptive_hidxwlength_w range_max_w adaptive_wrrretr`jrr~rtrr|s @@@@@r,adaptive_avg_pool2dr s. \\F KKE u:D LL R[[   F I   Ry;r?"a'E"I B,G1,LG#eBCj+*FGG +.uRSz;+O  xx""--eVVDD:H0@/:%)[QS_.U+D(K.9%)[QS_.U+D(K 'a0$6 7D jzz$H--    h jbND(  h jbND( Cdjjn-uTZZ^/DE+1 ;sAq!|$CS!Q\**C + (X% &&r+ctjd|dttj|j d| }ttj|}dg|j z}|j d| |d| |tj||jj||zzjd}|jt|j d| t|z}tj|jd|g|jddj|j S)N max_unpooling d_forward_outr"rrSFr)rDalert_not_deterministicroperatorrrrtrrrrrrdrlr) r{rrrOnchwindices_nc_shape indices_flatrs r, _max_unpoolndr+ s !!M#m"DE  djj3$/ 0B  k *BsTYY"jj3$/Usd$++b+5::;KLrQQ gbk^^DEcT!23d;6GG HF  ! !r\NDLL,< "  d6<<r+ctjjtjk(fdtjt dk(fdtjj dvfdtjj j k(fdtdj D].tjjdkDfd 0tdS) Nc"djS)Nz2elements in indices should be type int64 but got: r)rsr,rzmax_unpool2d..L sDW]]OTr+r#c"dtdS)NzMThere should be exactly two elements (height, width) in output_size, but got elements.ryrsr,rzmax_unpool2d..P ;'(  4r+rc$djdS)NzLInput to max_unpooling2d should be a 3d or 4d Tensor, but got a tensor with dimensions.rnrsr,rzmax_unpool2d..X s%%)YYK| =r+c<djdjSNzBExpected shape of indices to be same as that of the input tensor (z%) but got indices tensor with shape: r)rr{sr,rzmax_unpool2d.._ s,PQUQ[Q[P\]229-- Br+r"rc*djddS)NzZmax_unpooling2d(): Expected input to have non-zero size for non-batch dimensions, but got  with dimension being empty.r)r`r{sr,rzmax_unpool2d..h s%::,&6qcHr+) r?rrrryrtrrTr#r)r{rrr`s```@r, max_unpool2drC s LL $T LL KA  LL V  LL gmm# 1dii   IIaL1     w Q 77r+c tjjtjk(dtjjdvfdtjt dk(fdtjt dk(fdtjt dk(fdtjj j k(fdtd jD].tjjd kDfd 0tjd d kDxrd d kDxrd d kDfd tdS)Ncy)Nz(elements in indices should be type int64r*r*r+r,rzmax_unpool3d..| rr+r8rc$djdS)NzLInput to max_unpooling3d should be a 4d or 5d Tensor, but got a tensor with rrnrsr,rzmax_unpool3d.. s^_d_i_i^jjvwr+rc"dtdS)NzVThere should be exactly three elements (depth, height, width) in output_size, but got rrrsr,rzmax_unpool3d.. rr+c"dtdS)NzRThere should be exactly three elements (depth, height, width) in stride, but got: rrrmsr,rzmax_unpool3d.. sdehioepdqq{|r+c"dtdS)NzSThere should be exactly three elements (depth, height, width) in padding, but got: rr)rsr,rzmax_unpool3d.. sefijqfress}~r+c<djdjSrr)rrsr,rzmax_unpool3d.. s,PQVQ\Q\P]^229-- Br+r"rc*djddS)NzZmax_unpooling3d(): Expected input to have non-zero size for non-batch dimensions, but got rrr)r`rsr,rzmax_unpool3d.. s% ;;-'7s-Ir+r#cdS)Nz5strides should be greater than zero, but got stride: r*rsr,rzmax_unpool3d.. sGxPr+) r?rrrrtryrrTr#r)rrrrmrr`s`````@r, max_unpool3drr sD LL $&X LL fw LL KA  LL F q| LL G ~ LL w}}$ 1ejj !  JJqMA     LLq A 9&)a-9F1IMP a 88r+)rmrhc$t||||d|S)NTinplacerm _index_addr9rOrrhrms r, index_add_r s aeVT GGr+c$t||||d|S)NFrrrs r, index_addr s aeVU% HHr+rc8 tj|jtjjdkfdjdk(rj dnd |jdkDr|j nd tj k( fddk7rftj |j tj tk(xstjt  fd|z}|jdk(}|r|jdn|}dzfz}|rtjntj} | |||d} |r|S|r| jdS| j!S) Nr"c$djdSNz(Index should have dimension 1 or 0 (got r3rnrsr,rz_index_add.. :5::,aHr+rcdddS)NzNumber of indices (z') should be equal to tensor.size(dim) (z ), for dim=r*)rO index_sizerisr,rz_index_add.. s!%j\1XYdXeeqmplrsr+c(dtddS)Nzalpha argument of type z cannot be safely cast to type !)r)rm python_typesr,rz_index_add.. s-d5k]:YZeYffghr+r5Tr)rDcanonicalize_dimsrtr?rr# dtype_to_typerris_weakly_lesser_typerrUr index_put_ index_putrr)r9rOrrhrrmzero_dimrKrrrrrris `` ` @@@r,rr sM  ! !!&&# .C LL aH#(**/AqJ&,kkAo&++c"1K LLz!s z))!''2  4  E**4; D h %vv{H#QB C-5( "C#*I BV 5C!)s{{1~?s~~/??r+c  tjt|dkDdt|}|dj}|dd}t d|D}|r||f}n||f}||z}|dj ||}dt|z} t |D]j} || } tj| | d|| jdz fz|} |rtj|| d| }Rtj|| d| }l|S)Nrcy)Nz#received an empty list of sequencesr*r*r+r,rzpad_sequence.. rr+r"c3>K|]}|jdywrr#)rr9s r,rzpad_sequence.. s/!&&)/s)rrrOr) r?rryr#rr=rTrrdr) sequences batch_first padding_valuesequences_sizemax_size trailing_dimsmax_lenout_dimsr dim_paddingsr`currseqrows r, pad_sequencer s# LLY!#%RS^N|  "HQRLM/Y//G"G,^,-'H A,  - 8CC ..L > "@A,"" \Q',,q/(A$BBM  %%c3AQ%?C%%c3AQ%?C@ Jr+c"t||||dS)NTr _index_copyr9rOrrhs r, index_copy_r s q#ufd ;;r+c"t||||dS)NFrrrs r, index_copyr s q#ufe <. rr+rr5) rDrrtr?rrUrrrrr) r9rOrrhrrrKrrrs ` r,rr s  ! !!&&# .C LL aH vv{H#QB"'**/EOOA uE C-5( "C#*I BV $C!)s{{1~?s~~/??r+c*tj|jd|}tjtj| }|j s |j r|jd}n|}|tj|z |fS)Nr*r)r?rrdrhrris_xpur<)r{rrjrs r,log_sigmoid_forwardr( sn --r*D 1C 599T?"#A ||t{{% Q  ''r+lowhighr6ctj|jt|t||j|j |S)N)rrrrr6)prims_uniform_helperrrrr)r9rrr6s r,uniformr5 s=    cN t_ggxx  r+c<|jt||||Sr5)rr)r{rrr6s r,uniform_rG s ::gdCy9 ::r+c"t|dz }|>tj|dudtjt||k(d|S|tj|dudtjt||k(dg}t|D]Z\}}t ||k(r$|j ||dzt |z8|j t ||dz|z\|Stjddy)Nr#cyNz9Must specify exactly one of output_size and scale_factorsr*r*r+r,rz.upsample_compute_output_size..R rr+cyNr*r*r+r,rz.upsample_compute_output_size..T rr+cyrr*r*r+r,rz.upsample_compute_output_size..Z rr+cyrr*r*r+r,rz.upsample_compute_output_size..\ rr+Fcyrr*r*r+r,rz.upsample_compute_output_size..e rr+)ryr?rrrrfr)r(r scale_factorsspatial_dimensionsr`rs r,upsample_compute_output_sizerM s Z1, T ! O   S%);;ZH  4  O   S'+==zJ m, CDAq1v{"":a!e#4s1v#=>""7:a!e+>r+cg}t|}|rdnd}t|D]}||}|j| |z} || | | ||zz n| |z } tj|tj |j } | |z| zjtj} t|dz |z D]} | jd} |j| |S)Nrrrr"rS) ryrTrr?rrrr8rrUrf)rrrrrnum_spatial_dimsrrrisizernoutput_indices input_indicesrVs r,!_compute_upsample_nearest_indicesr" sG;'SsF # $&A --12/5ay/D*+%RW-e5==V(61U:>>u{{K '!+a/0 8A)33B7M 8}%+&, Nr+)preserve_memory_formatrrct|||gSr5rrrrs r,upsample_nearest1dr' s UK& ::r+c"t|||gdSrr%r&s r,upsample_nearest_exact1dr) s UK& FFr+scales_hscales_wc t||||gSr5r%rrr*r+s r,upsample_nearest2dr. s UK(H1E FFr+c$t||||gdSrr%r-s r,_upsample_nearest_exact2dr0 s UK(H1ET RRr+scales_dc"t|||||gSr5r%rrr1r*r+s r,upsample_nearest3dr4 s UK(Hh1O PPr+c&t|||||gdSrr%r3s r,_upsample_nearest_exact3dr6 s!  {Xx:$ r+rcDt||||}ddg|z}tj||}|jdk(rdt j |}|j d}|jjdk(r|dkrtj}|j|}|S)Nrr8r"cudarN) r"r _unsafe_indexrtrDrrrrr?rSr) rrrrspatial_indicesrrrO n_channelss r,rr s8 {F%OTl_,G   w /F {{a33E: [[^ <<   &:>!33M"""? Mr+c |r|rd}n |rd}n|rd}nd}t||zdk(sJt|tdt||Dcgc]}t||||zc}Scc}w)Nrr8rr#r)ryrTr)params has_biaseshas_projections group_sizer`s r, gather_paramsrA* s{o    v; #q (5#f+5 (38CK3T ./fQZ()  s A(c~|r'|d|z|d|z}}|d|zdz|d|zdz}}n||||}}d\}}||||fS)Nr#r"NNr*)r=hiddensr` bidirectional cur_params cur_hidden bidir_params bidir_hiddens r,params_hiddensrJ: sk!'AAJ %+AEAI%6A 8Jl !'GAJJ %/" l z< ==r+c||kDsJ|j|jd|||z |jdd|Srs)rfr$)rGlast_batch_size batch_sizerDs r,update_hidden_for_packedrNE sE Z '' ' NN:$$Q Oj4PQR   Q: ..r+c t||k(r|S||ksJtj||jd|||z fSrs)r?concatr$)rGrLrM inp_hiddens r, update_hidden_for_packed_reverserRK sP*$ Z '' ' <<    a*2N O  r+c X|d}|d}|r|dnd} |r|dnd} g} g} |r|dn|d} |jdd| }tj|t|}|r|ddd}|D]V}|jd}| |k(rn|rt || ||}nt || || }||||| || }|} | j|X|r| jn!| j|| jtj| d}|stj| dn|}||fS)Nrr"r#rrS) r$r?rrlrrRrNrfreverser%)inphiddenr=r> hidden_fn batch_sizesrT ih_weight hh_weightih_biashh_bias step_outputrDrLrG split_inpr`r hidden_outs r,one_layer_rnn_datar`Y sRq Iq I%fQi4G%fQi4GK"$G)0k"ok!nOq!_5J Ck!23IddO ' IIaL a   9OQJ2OQJsJ 7IwW :&#'&z" ))K #C.57A&:J  ?r+cfd}|S)NcDtj||||zSr5rlinearr`rGrYr[rZr\ nonlinearitys r,rKzrnn_cell..inner s AHHZGDqHIIr+r*rfrKs` r,rnn_cellrh sJ Lr+cfd}|S)Ncrtj|||}tj||||zSr5rcres r,rKzrnn_cell_data..inner s2 HHQ 7 +AHHZGDqHIIr+r*rgs` r, rnn_cell_datark sJ Lr+c x|d}|d}|r|dnd}|r|dnd} tj|||} |r| jdn| } |jd} g} | D] } || | |||| } | j | "|r| j t j| d}|| jdfS)Nrr"r#r) rrdfliprUrfrTr?r%r)rUrVr=r>rWrTrYrZr[r\precomputed_inputrGr]r`rs r, one_layer_rnnro sq Iq I%fQi4G%fQi4Gi95<)..q1BS!!!$JK 'q*i)WU :&' ))K #C  ""1% %%r+c|d}|d}|r |d}|d}nFtj|j}tj|j}|djd} |djd} g} d} | jd} d}d}d}d}|j }| j } | j } tj j jj|||||| | || | | |||||}|d|d|d}}}||jd|jdffS)Nrr"r#rF) r?rr#rUrrrmkldnn_rnn_layerrr)rUrVr=r>rTw0w1w2w3hxcxrXmode hidden_size num_layersrErr7outputsrZhycys r,mkldnn_one_layer_lstmr~ sS B B AY AY [[ # [[ #   Q B   Q BK D''!*KJMK E .. C B Biinn--55         !G$ GAJ 2rA rzz!}bjjm, ,,r+c |r|jddn|}g} t|D]} t||| |\} } }}|r || dz kr|nd}| || | |\}}| j||r!| ||||d\}}| j||r*t j |g|j dz }n|}|dk7s|s| |dz kst j||d}|r|jddn|}|| fS)Nrr"rT)rT)r7) transposerTrJrfr?r%rOr:)rrVr=r>rzr:r7rErlayer_fn final_hiddensr`rFrGrHrIfwd_inp fwd_hiddenbwd_inp bwd_hiddens r, _rnn_helperr s"&1EOOAq !eEM : >=K FA}> : J l$ QU(:'&uj*jQZ( "*|\:t# GZ   , IIw0'++-!2CDEE a,&1EOOAq !eE - r+c |jd} t||d}t|| |||||||ttt t j \} } | t j| dfSNrFrW) unbindrArrrorhr?rstack rrvr=r>rzr:r7rErrVrrs r,rnn_tanh_inputr tYYq\F 6:u 5F$   %**)=> C  M1- --r+c |jd} t||d}t|| |||||||ttt t j \} } | t j| dfSr) rrArrrorhr?rrrs r,rnn_relu_inputr& rr+c |jd} t||d}t|| ||||||dtt|t t j \} } | t j| dfSNrFrXrW) rrArrr`rkr?rr datarXrvr=r>rzr:r7rErVrrs r, rnn_relu_datarE {YYq\F 6:u 5F$    ##EJJ/ C  M1- --r+c |jd} t||d}t|| ||||||dtt|t t j \} } | t j| dfSr) rrArrr`rkr?rrrs r, rnn_tanh_datarh rr+cltj||||z}|jd|}|dj} |dj} |dj } |dj} | |z| | zz} | | j z}||ntj||d}|| fS)Nr8rr"r#rrrdchunkrr)rUrvrwrZr\ hr_weight chunk_dimgates chunked_gatesin_gate forget_gate cell_gateout_gater}r|s r, lstm_cellr s HHRG ,s 2EKK9-MA&&(G"**,Ka %%'IQ'')H r Wy0 1B BGGI B ahhr9d&CB r6Mr+c (|d}|d}|r|dnd}|r|dnd}t|dk(r|dnt|dk(r|dnd} |djd} |djd} tj|||} |r| j dn| } g} | D](}t || | ||| d\} } | j | *|r| jtj| d}|| jd| jdffS)Nrr"r#rrr8r) ryrUrrdrmrrfrTr?r%r)rUrVr=r>rTrYrZr[r\rrvrwrnr]rs r,one_layer_lstmr s*q Iq I%fQi4G%fQi4G[A%q F q8H6!9d   Q B   Q Bi95<)..q1BSK 3B 7IQRSB2 ))K #C A 1 . ..r+c |d}|d}|r|dnd}|r|dnd} t|dk(r|dnt|dk(r|dnd} g} g} |r|dn|d} tj|t|}|r|ddd}|d}|d}|j dd| |j dd| }}|D]}|j d}t j|||}|| kra| j|j d|| |z |j d|| |z f|j dd||j dd|}}|| kDrXtj||j d| || z fd}tj||j d| || z fd}t||||| | d\}}|} | j||r| j||f}nZ| j||f| jt| \}}tj|dtj|df}tj| d}||fS) Nrr"r#rrr8rSr)ryr?rrlr$rrrdrfrPrrTrr%)rUrVr=r>rXrTrYrZr[r\rr]rDrLr^orig_hxorig_cxrvrwr`r_hidden0hidden1rs r,one_layer_lstm_datar syq Iq I%fQi4G%fQi4G[A%q F q8H6!9dKG)0k"ok!nO Ck!23IddO QiGQiGq!_-q!_- B  IIaLhhsIw/   NNIIaOa$78IIaOa$78  YYq!Q'1a);B  W^^AO8KLMqBW^^AO8KLMqB3B 7IQRSB236"X Bx =YYw*EIIgq,AA ))K #C  ?r+c4d}||||rtStS)a*Check whether we could use decompose lstm with mkldnn_rnn_layer. All the below conditions need to be met: * ``torch._C._get_mkldnn_enabled()`` returns ``True``. * All the input args are on CPU. * The dtypes of args are either torch.float or torch.bfloat16. * Inference. * ``has_projections`` returns ``False``. Args: * input: the input sequence to LSTM * hx: a tuple of the input hidden state and cell state ``(h_0, c_0)`` to LSTM * params: the weight and bias tensors of LSTM cNtjjsy|gt|ztt j |z}|Dchc]}|j }}t|dk7ry|j}|tj dk7ry|Dchc]}|j}}|D]&}|tjtjfvs&y|jry|djd|djdk7} | ryycc}wcc}w)NFr"rrr#T)r?r_get_mkldnn_enabledrlr from_iterablerrypoprrbfloat16 requires_gradr#) rrvr=ratdevicesrdtypesrr?s r, use_mkldnnz2select_one_layer_lstm_function..use_mkldnnsxx++-'DH$tE,?,?,G'HH%,-188-- w<1  U\\%( (#*+a!''++ EU[[%..99    Q%**Q-2a5::a=8 ).,s D#D")r~r)rrvr=rs r,select_one_layer_lstm_functionr s!:%V$$$r+c t|dk(sJdt|||djd|djdk7}tt |d|d} t |||} t || |||||||| \} } tt | } | tj| ddtj| ddfS)Nr#lstm expects two hidden statesrr") ryrAr#rlrrrr?r) rrvr=r>rzr:r7rErrVrrrs r, lstm_implr&s r7a<999< 6:r!uzz!}1 1 /M NF #beRU# $F-eR@H$   Cm,-M  M!,a0%++mA>NPQ2R RRr+c t|dk(sJdt|||djd|djdk7}tt |d|d} t || ||||||dt t| \} } tt | } | tj| ddtj| ddfS)Nr#rrr"F)rX) ryrAr#rlrrrrr?rrs r,lstm_data_implrHs r7a<999< 6:r!uzz!}1 1 /M NF #beRU# $F$   #= Cm,-M  M!,a0%++mA>NPQ2R RRr+c&|jdd}tj|||jdd}|d|dzj}|d|dzj} |d|d|zzj } || z | z| zS)Nrr"r#r)rrrdrr rUrGrYr[rZr\chunked_igateschunked_hgates reset_gate input_gatenew_gates r,gru_cellrisYYq!_NXXj)W=CCAqIN #nQ&77@@BJ #nQ&77@@BJq!^A%6%CDJJLH  !Z /( ::r+cPtj|||jdd}tj|||jdd}|d|dzj}|d|dzj} |d|d|zzj } || z | z| zS)Nrr"rr#rrs r, gru_cell_datarrsXXc9g6<rzr:r7rErrs r, gru_impl_datar{si6:u 5F$  !   " }U C  M1- --r+c t||d}t||jd|||||||ttt  \} } | t j| dfS)NFrr)rArrrrorr?r) rrvr=r>rzr:r7rErrrs r,gru_implrsf6:u 5F$  !   2 C  M1- --r+ct|j||}t|d}t|d}tjj j |||||SNrr")rr#rr?rr_upsample_bilinear2d_aarr align_cornersr rscale_hscale_ws r,upsample_bilinear2d_aa_vecrsV ){M REmQ/GmQ/G 99>> 1 1 umWg r+ct|j||}t|d}t|d}tjj j |||||Sr)rr#rr?rr_upsample_bicubic2d_aars r,upsample_bicubic2d_aa_vecrsV ){M REmQ/GmQ/G 99>> 0 0 umWg r+czt|j||}|r|ndgt|z}t||||Sr5)rr#ry_upsample_linear)rrrr rrs r,_upsample_linear_vecrs= ){M RE+]$#e*1DF E5- @@r+rc t||||gSr5r)rrrr+s r,upsample_linear1drs E; z JJr+c"t|||||gSr5r)rrrr*r+s r,upsample_bilinear2drs E; (?S TTr+c$t||||||gSr5r)rrrr1r*r+s r,upsample_trilinear3drs!  {MHh+I r+cL|r|dkDr |dz |dz z SdS| |dkDrd|z S||z S)Nr"rgrr*)rrrrns r,_compute_scalersB5=\# (S.1HqH#/EAIsU{U7XCUUr+c&|r||zS||dzzdz SNrr*)rn dst_indexrs r,_compute_source_indexrs$y   C(3..r+weightsweights_precisionctdt||Dd|dz zz}||z }tj|ddj tj S)Nc3K|]F\}}|jtj|jtjzHywr5)r8r?r,)rrrs r,rz%_sum_tensors_uint8..s826!QU[[ADD--sA Ar"r) _sum_tensorsrr?rr8r%)rrrrs r,_sum_tensors_uint8rsd:=c7:K  1$ %'F( (F ;;vq# & ) )%++ 66r+ctj|j}d}tj||j}d|d|dzzzz}|dk\}||j z S)Nrrr"i)r?rrrrr)r max_weightmax_weight_precision precisionsvaluesrs r,_compute_weight_precisionr siW%))+J2:;L;LMJ :zA~!67 7F g D $((* ,,r+c jd}jdd}t|}tjtjj \}fd}t t|||D cgc]\} \} } }|| | ||dz | z } } } } }tt| \} }}g}tddgg|zD]c}ddgt|Dcgc]}||dk(r||n||c}z}tj|}t|}|j|ett|D]p} | | || z j!ddj#}t|ddd|dddDcgc]!\}}|t%j&||z |z#}}}rt|dk(sJ|d}tj(}j*j,dk(r|d krt$j.}t1|t$j2sJ|j5| }j7s|j9}|Scc}} } } wcc}wcc}}w) Nr"r#rEc|t|| |}tj| jj }t || j d}|j|jdgdg|z}|j tj}|dzj |dz }|||fS)Nrrrrrr"r) rr?rrr8rrrrr) inp_sizerrnsqueeze scale_factorr`x_f32r9xp1rrrs r, get_valuesz$_upsample_linear..get_values:s%h-P  LL%,, 7 : : : G%lA}EKKPSKT ekk!n@sh/?@ HHU[[ !1umm1 m-a}r+rrrgr8rN)rryrDrErG INT_TO_FLOATrrrlrrTrr9rrfreversedrr8r?rrrrrSr7rrr>round)rrrrr; inp_sizesn_dimsrVrr`rrrxs_f32xsxp1svsrkrvxscalev1v2rrOrs` ` @r,rr)s|QJ ABI ^F'' !AANNHAu 09  ; /0  +A+(F 8XvvzA~>F CL)FB B 1vh' (TluV}U!qtqybed1g=UU   uc * #Au - !  eFm $ )be#**3477>b1gr!$Q$x0 B 27F+ +    r7a<< UF//6M ||F"zB// fell ++ +   ]  ;F  " " $ MQV sI I' &I,rrc4|j|jk(Sr5r)rrs r, is_same_sizerss 77agg r+c.tj||Sr5)rr)r9rrFs r,_reshape_aliasrxs 99Q r+c.tj||Sr5)rr)r9rs r,r9r9~s ::a !!r+c2tj||||Sr5)rr)r9rr|rs r,rrs >>!WeZ 88r+c|D]F}|tj|jtjtjfvdHtj|jtj k(dddlm}||jdk(r.rr+cyNz*tensors used as masks must be bool tensorsr*r*r+r,rz&_unsafe_masked_index..rr+rrr"r})r?rrrYrrrjrr_meta_registrationsmeta_index_Tensorr=rrTryrr#rr9rZ)r9rrfillrr meta_resultr`s r,rrs   LL  EII66M  LL ejj < Eaggi1n%//AA!WM zz+++T22 3w< ?   q A >GAJ?   a ) 5 5teT BBr+cL|D]F}|tj|jtjtjfvdHtj|jtj k(d|j dk(r|jStt|D]B}||}| |j|j| |j|dz ||<D|j|d}tj|||dS)Ncyrr*r*r+r,rz5_unsafe_masked_index_put_accumulate..rr+cyrr*r*r+r,rz5_unsafe_masked_index_put_accumulate..rr+rr"r}Tr)r?rrrYrrrrrTryrr#rZrr)r9rrrrr` masked_values r,#_unsafe_masked_index_put_accumulater"s   LL  EII66M  LL ejj <  wwyA~wwy 3w< H   !&&)QGGAJH %%teQ/L  ! !!Wlt ! LLr+c|j}d}|dkrd}|6|dkDr*dg|z}|jd||<|j|}n|}||z}tj||k7|d} | j |} tj ||| j| } tj||k7| d} |tjjk(r|dkDr|jdd} | | fS|lj|j}tj ||| j|} tj||k7| d} | j} n"||k7jj|} |tjjk(r| j} | | fS|tj jk(r| j| z } | | fS)Nr"r#rr*r)rOrrr?rirUgatherrr!r'r|r=rrr8r)r()r{rrrrrrrwr safe_target_rrwsums r,_nll_loss_forwardr(sXXZFK z   A:E"(aE+  E"AAax++f 4fa@K((5Lll4l;CCKP PF [[</ ;FINN(((VaZ}}R- |##  HHTZZ ||A{L9AA+N{{6\14;xxz ,.33588> IMM''' <  inn** * , < r+c |jdkDr|jdksJd|jdksJd|jdk(xr|jdk(}|sA|jd|jdk(s"Jd|jd|jd|jd }|=|jdk(r|j|k(sJd |d |jt|||||S) Nrr#r/r"r0r1r2r3rSz/weight tensor should be defined either for all z7 classes or no classes but got weight tensor of shape: )rOrrr()r{rrrrr5 n_classess r,nll_loss_forwardr+s 88:>dhhjAoP/PP - ::<1 E 88:?8vzz|q'8L DJJqMV\\!_< $TZZL 6<<.J = 2I >fjjla/FLLNi4O 9)E++1<<. : P T669l KKr+c t|||||Sr5)r()r{rrrrs r,nll_loss2d_forwardr-s T669l KKr+Ac0|dz|z|dzz |z|zdzS)Nr#rr"r*r9r.s r,_upsample_cubic_convolution1r1s( UaK1q5 !Q & *Q ..r+c<||zd|zz |zd|zz|zd|zz S)Nrr8r*r0s r,_upsample_cubic_convolution2r4"s0 UQU]a !a% '1 ,q1u 44r+rcd}|jtjdk(rtj|d|z gd}tj|dzd|z gd}t||}t ||}tj |d\}}tj |d\}} ||| |fSt|dz|t ||t d|z |td|z |fS)Ngrrgrrr)rr?rr4r1r) rr.tt1tt2w03w12rrrursrts r, _upsample_get_cubic_coefficientsr:&s Axx5<<&&kk1cAg,A.kk1s7C!G,!4*32*32cq)Bcq)B2r2~ )S! 4 (A . (q! 4 (q! 4   r+coeffstscPt|}tdt||DS)Nc3,K|] \}}||zywr5r*rrrs r,rz+_upsample_cubic_interp1d..<sEHRRErr)r:rr)r;r<coeffs2s r,_upsample_cubic_interp1drA:s$.r2G EFG0DE EEr+c6ttj|Sr5)rr?add)r<s r,rr@s %))R  r+ num_stepsc|dkrtjd||S|s|dz |z nd}tj| ||||S)Nr"rr)stepsrr)r?rhlinspace)rDrrrrs r,_linspace_from_neg_onerHDsIA~||AfE::-:)a-9 $A >>1"ayu MMr+thetahr%c|j}|j}t||||jd|d}t||||j|dd}t j d||}tj jj|ddd}tj jj|ddd}tj jj|d dd}||z|zS) Nr")r"r"r"r)rr#constantrrrxr|r"r")r#r rrrHrr?rrrr) rIrJr%rrrgrid_xgrid_ygrid_ones r,_make_base_grid_4drSNs KKE \\F$A}eV D I I!QPQ RF #A}eV D I I!QPQ RFzz)5@HXX $ $VjPQ $ RF XX $ $VjPQ $ RFxx""&&xV*TU&VH F?X %%r+rc|j}|j}t||||jdd|d}t||||jd|dd}t||||j|ddd} t j d||} tj jj|ddd}tj jj|ddd}tj jj| d dd} tj jj| d dd} ||z| z| zS) Nr")r"r"r"r"r)rrrLrrM)r"r#)r#r")rrrO) rIrrJr%rrrrPrQgrid_zrRs r,_make_base_grid_5drV_s5 KKE \\F #A}eV D I I!QPQST UF #A}eV D I I!QPQST UF #A}eV D I I!QPQST UFzz,eFCHXX $ $VjPQ $ RF XX $ $VjPQ $ RF XX $ $VjPQ $ RFxx""&&xV*TU&VH F?V #h ..r+c|\}}}}t||||}|jddd|jjdzj d}|j|||dS)NrrSrr"rPr#)rSrrUrUr) rIr#rrrVrJr% base_gridgrids r,_affine_grid_generator_4dr[psgJAq!Q"5!QmLI NN2q! $uxx'9'9!'< < A A" ED 99Q1a  r+c|\}}}}}t|||||}|jddd|jjdzj d} | j||||dS)NrXrSr8r"rPr)rVrrUrUr) rIr#rrrVrrJr%rYrZs r,_affine_grid_generator_5dr]zsmMAq!Q"5!QOI NN2q! $uxx'9'9!'< < A A" ED 99Q1a ##r+ctjt|dvdt|dk(rt|||St |||S)Nrcy)NzCaffine_grid_generator needs 4d (spatial) or 5d (volumetric) inputs.r*r*r+r,rz'affine_grid_generator..rr+r8rX)r?rryr[r])rIr#rs r,affine_grid_generatorr`sJ LL D VU 4yA~(MRR(MRRr+rZinterpolation_mode padding_mode _expand_gridc  !"#$%&'()*+,-tjdvfdtjdvfddtdtdtffd -dtdtd tdtfd +dtdtdtf+fd dtdtdtf -fd }j\$%|j\})*}|d k(sJr(|j d)*|j )*d }dtdtdtf$%fd &tjjj dddtjjj ddddtdtdtdtf&)*fd dtdtdtffd "|d} |d} dk(r|| %} || $} | j| jc'('dz(}} '(dz}}| |}}|| z || z z}| |z || z z}| | z | |z z}| 'z | (z z}t"fd'(|f| ||f|||f|||ffDSdk(r<|| %} || $} | j}| j}"||dS-| %} -| $} | j'| j(| 'z ,| (z }s",jd,|jd}dtdtdtf "$%fd #dtdtf#'(,fd !t!fdtd D}t!||S)!N)rr"r#cdS)NzInvalid interpolation mode r*)rasr,rz"_grid_sampler_2d..s-.@-ABr+cdS)NzInvalid padding mode r*)rbsr,rz"_grid_sampler_2d..s-B<.+Qr+coordsr#rPcBr|dzdz n|dz}|dzdz }||z|zSrr*)rgr#rofsrs r, unnormalizez%_grid_sampler_2d..unnormalizes8%2tczCs Sj3|c!!r+ twice_low twice_highcP||k(rtj|S|dz }||z dz }||z j}tj||}||z j j tj }tj|dzdk(||z||z|z S)Nr#rr"r)r?rrfmodfloorr8int8ri)rgrkrl coords_min coords_spancoords2extraflipss r,reflect_coordinatesz-_grid_sampler_2d..reflect_coordinatess  "##F+ +] !I-2 J&++- 7K0;&--/222D{{ AINEJ. j0H50P  r+cdk(r|Sdk(rtj|d|dz Sr|dd|dz z}n|dd|zdz }tj|d|dz S)Nrr"r#rSr)rgr#coords_reflectedrrbrvs r,compute_coordinatesz-_grid_sampler_2d..compute_coordinatessx 1 M Q ;;vq$(3 3#6vq!tax.#Q #6vr1t8a<#P ;;/D1H= =r+c(||}||Sr5r*)rgr# coords_unryrjs r,compute_source_indexz._grid_sampler_2d..compute_source_indexs- "9d33r+r#r"rysc tjd|ktj|ktjd|k|kSrsr?r)rr}iHiWs r,in_bounds_condz(_grid_sampler_2d..in_bounds_condsF  GU&&rBw0A0A!r'2PR70ST  r+rwsc||rndt fd|jtj|jtj|fDS)Nr"c3pK|]-}tj|dj/ywr)r?rir)rrrrroHoWs r,rz1_grid_sampler_2d..clip..s7  KKa # ( (Ar2 6 s36r)rr8r?r) rr}rrrrrrcrrrs @@r,clipz_grid_sampler_2d..clipsYb"% A1 ee%++e.EKK0H"M   r+ixiyc8 |||\}}}||f|zSr5r*) rrr%idx_xidx_yw_C_idxN_idxrrs r, get_summandz%_grid_sampler_2d..get_summands0B?ubue+,r11r+).r).r"rc3:K|]\}}}|||ywr5r*)rrrr%rs r,rz#_grid_sampler_2d..s( R B " sc<|}|}||dSr\r*)rrr9rZryrrrs r,get_value_boundedz+_grid_sampler_2d..get_value_bounded%s*#B+A#B+Aq!Q' 'r+ric|dz z}dz ||dz|dz|f}t|S)Nr"r#)rA)riiy_ofscsrix_nwiy_nwtxs r, get_coeffz#_grid_sampler_2d..get_coeff*s[cAg&F!%!)V4!%0!%!)V4!%!)V4 B ,B3 3r+c3.K|] }|ywr5r*)rrirs r,rz#_grid_sampler_2d..4s:#y~:r8)r?rrrrrrrrrrorrrUrrTrA).rrZrarbrrcr|rVtwor9rZrrix_neiy_neix_swiy_swix_seiy_sew_nww_new_sww_se ix_nearest iy_nearesttyr;rrrrrryrrrrrrrrrrrvrrjs.` ```` @@@@@@@@@@@@@@@@@@@r,_grid_sampler_2drs LLi'B LL !#Q"F"#"&"  F  s    PV   >F ># >& >4V4346477LAq"bZZNAr2s !8O8 yyAr2s+221aRC 6 v & LL188 , 1 1!Q1 =E LL188 , 1 1!Q1 =E    V    4F   22F2&22 V A V AQ !!R ( !!R (xxz288: uqy%ueaiueu urz*U urz* rEz*U rEz* t$t$t$t$      q !!R ( !!R (XXZ XXZ :z155 B  B    %Z %ZaBaB (& (f ( ( (  43 46 4 4:q::'33r+c"t|||||S)N)rZrarbr)r)rrZrarbrs r,grid_sampler_2dr8s   -!#  r+c&tjjdk(xrjdk(fdtjjdjdk(fdzj dS)Nr#r"cLdjdjS)Nzmatrix @ vector expected, got rrr{rsr,rzmv..Qs!0 BswwykJr+rcvdjddjddjddS)Nzsize mismatch, got input (rr9r"z), vec (r3rrsr,rzmv..Us<,TYYq\N!DIIaL>RURZRZ[\R]Q^^_`r+r)r?rrOr#rrs``r,rrKss LL  a*CGGINJ LL !  #` 3J    ""r+c|-|dz |zdz}d|z |z|tj|zz }nd|z |ztj|z }|||z}t||Sr\)r logsigmoidr)r{rr pos_weightr log_weightrs r, binary_cross_entropy_with_logitsrZsv  1n.2 F d"j1<<3E&EFF d"Q\\$%77 f} i 00r+tensor1tensor2is_outc  |j|jk\r||fn||f\}}ddlm |jdk\r|jdksy|jr|sy|jdk(ry |j dk(ry|j }|j }dg}t|ddD]}|j||dzt fd t|tt||DS) Nrrrr#FTr"rSc3VK|] \}}}|dk(xs ||k("ywrtr*)rrrr#rs r,rzshould_fold..s9 D% tqy!B^DEM%BBs&)) rtrjrrrrrmrrfrrrl) rrrt1t2t1_shape t1_strideexpected_strider#rs @r, should_foldrks$+<<7<<#?gw gwEWFBD GGqLRWW\ ||qbhhjAo&xxH IcO!"&;tob&99:; !$ tH_56"  r+) pass_is_out)rc: |j}|j}|dk7r|dk7sJ|dk(r|dk(rtj||S|dk(r|dk(rtj||S|dk(rC|dk(r>tjtj tj |d|dS|dk(r|dk(rtj ||St|||r>||kD}|r |jn|}|s|n|dk(r|jn|}|j}t|dd} ttj| } |jdk(} | r| j|jd|j!| |d} | rWtj"j$j'| j || } |r| jj)S| Stj"j$j'| j|| S|dk\r^|dk\rX|dkDr|j+dnd}|j+d}|jdd}|dkDr|j+dn|j+d}|dkDr|j+dnd}g}t-|dz D]"}|j|j+|$|dk(rn|dk(ri|d|dk7r^|ddk(r'|j.rt1|j d|S|ddk(r'|j.rt1||j dSttj2||}|||gz}t5|}|j7|j!|||}|dk(}|r7||gz}|j7|j!||j d}n)|||gz}|j7|j!|||}|} |dkDr| j||dkDr| j||r/|j9|j dj;| S|j9|j;| Stj<ddy) Nrr"r#rSrPrFcy)Nz/both arguments to matmul need to be at least 1Dr*r*r+r,rzmatmul..rr+)rOr?dotrrrrUrrUrrrlrrrrfrrr _unsafe_viewrr#rTrrTbroadcast_shapesrrbmmrr)rrr dim_tensor1 dim_tensor2rrrsizes_1 output_shape folded_dim1 t2_is_matrix t1_foldedrrm1 batch_tensor1m2rH batch_tensor2r`expand_batch_portiontensor1_expand_sizeexpand_batch_producttensor1_expanded vector_rhstensor2_expand_sizetensor2_expandeds r,rTrTs[++-K++-K !  q 00 0aK1,yy'**  kQ.xx))  kQ.}}UXXeoogq&A7KQOO  kQ.xx)) Wgv . +- $WZZ'$G+:J799;PW ((GCRL) X\\<8 vvx1}     ,JJ{GBK8 YY^^00b1A<PF-6699'') BF B99>>..y||B/?N N  kQ.!,aGLL Q \\"  cr* !,qW\\" gll26F +aGLL Q#% {Q' 2A  a 1 2 1 q a M!$44Q1$)>)>gooa0'::Q1$)>)>gwq'9:: $  " "=- @ 3aW<#$89#>>*=>FF !R !A% "6""= 23-r21  #7"a"@ &~~.ABJJ$b!  , ?    " ?    " #''(89AA"EJJ<X X#''(89>>|L L UUVr+rrc\j\}}t|d||}t|d||}tjtjj \}}t j|djj|} t j|djj|} t|| |} t|| |} | jd} | j} | j}| |z jdd}| | z jdd}| jt j} |jt j}|dz ||dz|d zf}| dz | | dz| d zft|t|}d \}j t j"k(rt%t%|}Dcgc]@}|dzzt j&|d zzjt j(Bc}|Dcgc]@}|d|zzt j&|d zzjt j(B}}fd fd t+fd|D}j t j"k(r|Jt-|||}nt/dt1||D}tj2}|j5|}|Scc}wcc}w)Nrr"rErrrSrrgr#rCrctj|ddz }tj|ddz }tjdd||g}|Sr)r?rrr9)r}ry_idxx_idxr in_hin_wrs r, load_boundedz0upsample_bicubic2d_default..load_boundedBsO B4!8, B4!8,   utT5%&@ Ar+ctfdD}jtjk(rJt |St dt |DS)Nc30K|] }|ywr5r*)rx_ofsrrZs r,rzCupsample_bicubic2d_default..get_x_interp..IsBl1e,Bsc3,K|] \}}||zywr5r*r?s r,rzCupsample_bicubic2d_default..get_x_interp..MsJRBGJrr)rrr?r%rrr)rZsrc_xrixs_ofsrweights_precision_x weights_xs` r, get_x_interpz0upsample_bicubic2d_default..get_x_interpHsWB'BB ;;%++ %&2 22%eY8KL LJCy4IJJJr+c3.K|] }|ywr5r*)ry_ofsrs r,rz-upsample_bicubic2d_default..Os;%,u%;rc3,K|] \}}||zywr5r*r?s r,rz-upsample_bicubic2d_default..TsL(2rb2gLrrrN)rrrDrErGrr?rrr8rrUrorrr:rr%rrint16rrrrrr)rrrrrrVh_scale_factorw_scale_factorrr`rx_floaty_floatr9rZyscaler iys_ofs weights_yweights_precision_yr%src_yrrOrrrrrrrs` @@@@@@@r,upsample_bicubic2d_defaultrs{{Aq$$D+a.-QN#D+a.-QN'' 5#H#H#U#UHAu  [^ELL9<<5<IA [^ELL9<<5<IA#NA}EG#NA}EG#G A Ak c *Fk c *F U[[A U[[A1uaQA&G1uaQA&G08I08I/9,, {{ekk!7 B7 B !** +ejjmc.A A E Eekk R  !** +ejjmc.A A E Eekk R   KK ;7; ;E {{ekk!"...#E96IJLc%6KLL//6M   ]  ;F MA  s9AL$AL)c $tjt|t|zdk(d|H|Jttt t ft dt |jdd|D}|r|nd\}}t|||||S)Nr"cy)Nz:Must specify exactly one of output_size and scale_factors.r*r*r+r,rz(upsample_bicubic2d_vec..irr+c3PK|]\}}tt||z ywr5)rr)rr%rns r,rz)upsample_bicubic2d_vec..os*Au ! u,-s$&r#rC) r?rrr rrrrr)rrrr rrs r,upsample_bicubic2d_vecr\s LL [D//14L((( #s(O  #AGGABK ?  )6}<GW %amWg VVr+c(fd}t||S)Nctj| ||zj}|dz |dz |jz jz S)Nrr")r?rrrrmiddlerdim_idxrs r,rz_reflection_pad..idx~sF,,ufunQXXFzVaZ'++-7<<>>>r+_reflection_or_replication_padrrrs` r,_reflection_padrxs ? *   r+c(fd}t||S)Nctj| ||zj}tj|d|dz S)Nrrr")r?rrrrs r,rz_replication_pad..idxs6,,ufunQXXF{{7Avz22r+rrs` r,_replication_padr s 3 *   r+idx_fncn t|dz tj|j dz dzfv fd|j d}|j z }t Dcgc]}|d dz |z z}}t Dcgc]}|d dz |z zdz}}|}t D]E}dg|jz} |||||||| ||z<t j|| }Gtj|} |j| }|Scc}wcc}w)Nr#r"c(dddzddzdS)Nreflection_padz d requires r"zD or r#zD inputr*rsr,rz0_reflection_or_replication_pad..s$.[q sQwiwOr+rN) ryr?rrOrrTrr9rDrr) rrr  inp_shapenc_dimr` padding_left padding_rightrrrOrOs @r,rrsH g,! C LL C!GS1W%%OI UUWs]F8=c C1GAq1-.CLC=B3ZHWQ#'A+.23HMH F 3Z1&**,. a)A, a@PQAJ##FC01 //7M   ]  ;F MDHs 1D-D2c t|dz|j dDcgc]}|dz  }}tDcgc]}|ddz |z z}}tDcgc]}|ddz |z zdz}}g}t|jD]c}dg|jz} d| |<|j t j |j||jj| e|d | d} dtDcgc] }| |||z} }tDcgc] }||| |z } }tDcgc]}d||z||z| |z } }tDcgc]}| |d||||z||zf}}tjtjtDcgc] }||c}}tj|| zd}fd}tjtDcgc]}gd c}D]}|t!dgzk(rg}g}tD]t}||dk(r | |}||}n=||dk(r| |}| |d||f}n$||dk(r| |}| |||||z ||dz f}|j |j v||||}|Scc}wcc}wcc}wcc}wcc}wcc}wcc}wcc}wcc}w) Nr#r"rSrcF|\}}}tj||k\||kSr5r) index_ranger`lbubs r,index_range_conditionz7_reflection_pad_backward..index_range_conditions( 2r  b!r'22r+rrc .t D]*}||d||dk}t|ts%|s(|cStjt j |Dcgc] } | c}}t j ||zd}||zScc}w)Nr#r"r)rTr7rrLrrrr) rr index_rangesr`upper_less_than_lowerrrgrrOrlrs r,rz,_reflection_pad_backward..accumulatess A$0OA$6a9K$K !/6;P      CO PK "; / P   % %k4S# Fax QsB )rSrr")ryrrTrtrfr?rrrrLrrrr itertoolsrr)rlr9rrJdhwr`rrr view_shapexyzcenter left_reflect right_reflectrange_crrrrVareaoutsrrrrrOrs` @@@r,_reflection_pad_backwardr&s` g,! C''3$%. )Q1q5 )C )8=c C1GAq1-.CLC=B3ZHWQ#'A+.23HMHG 166]SS166\  1 u||AGGAJqxx@EEjQRS #A 3$%.C316c ;1c!f|A& ;F ;6;CjALOc!f,ALADI#JOqQQZ,q/1CF:OMO NSSVZHIAs1v Q/-2BBCG    eCjQ0<Q D  $ $[$F C HD !!c #C1J#CD4 5!s# #  s -AAw!|Qi%aj aB"1o"1vq,q/: aA#A&"1vs1v a0@'@#a&1*M KK     , -$l3-40 Kc *CH2<AOR&$Ds5 KK$$K)K.8K3K8K=L & Lrrrcjtj|||}tj|||}||fS)Nr)r?aminrI)r{rOrr(rIs r,aminmaxr)s2 ::dW 5D ::dW 5D :r+rctjtjtj|d||||S)Nrr)rrr?riisnan)r{rOrrs r,nansumr,s2 88EKK D 11d;S'QV8 WWr+rrrr8rc Ntjjd|d||||S)Nrr"r-rr start_step)rarrrr8s r,arange_defaultr1s/ ;; ! ! 3vf " r+c Ntjj||d||||S)Nr"r-r/)r`rarrrr8s r, arange_startr3.s/ ;; ! ! sAU6&Z " r+cddlm}||i|S)Nr)out_dtype_dense)!torch._higher_order_ops.out_dtyper5)rFrGr5s r,out_dtype_decompr7=sA D +F ++r+marginct tjtjjd jd tj|dk(xs|dk(dtjj dk(xr dk7fdtjj dk(xrj k( fdQtjtjj dk(xrj k( fdjdtjd}||z z}|jd}|dk(r|n||z}|z}tj j }tj|k7|d}|tjjk(r|j!S|tj"jk(r |j%|jdz S|j!d S) Nrr"r#cy)Nz only p == 1 and p == 2 supportedr*r*r+r,rz#multi_margin_loss..Srr+c"djSNzMExpected non-empty vector or matrix with optional 0-dim batch size, but got: rrsr,rz#multi_margin_loss..Vs_`e`k`k_lmr+c(ddjS)Nz#inconsistent target size, expected rr)nframersr,rz#multi_margin_loss..Zs5fXYv||nUr+c(ddjS)Nz#inconsistent weight size, expected rr)rOrsr,rz#multi_margin_loss..`s9#i ~Vr+rrr)r? atleast_2d atleast_1drrrtrrUr$rVrrrir!r(r|rr)r) rrrHr8rrurjrrOr>s `` ` @@r,multi_margin_lossrCDs   U #E   f %F [[^F ++a.C LLa!16#MN LL a$C1Hm LL q5V\\^v5U!!&) KK1  63!6 V   a F U0A UA AA !VQA  v  ,,s5<< 0C C6M1a(AINN(((vvx imm)) )uuw##vv!v}r+ is_targetc |j |jtj|}tj|}|jd}tjt dkxr|dk7 fdtjt dkxr k( fdtj ||j }|dk(}tjtj|||dd }||k}tj||d}tj|d| } tj||d} tj|| jd k(d } d | jjd z |z} | jd} | |z } tj| d| } |tjj k(r!| j#d j%} n@|tj&j k(r| j#} n| j#d } | j)|j*j-} | | fS)Nr"r#rcdSr<r*)orig_input_shapesr,rz0multilabel_margin_loss_forward..s_`p_qrr+cddS)Nzinconsistent target size: z for input of size: r*)rGorig_target_shapesr,rz0multilabel_margin_loss_forward..s,->,??STdSefr+rrSTrrrrg)rrS)rr?r@rryrrr(rir$anyrUTrVr!r(r|rrr)r8rr)rrrrOris_endend_idx target_masktidx0rBtidx1rDrjrGrIs @@r,multilabel_margin_loss_forwardrQss{{    U #E   f %F ++a.C LL "/saxr LL !#M(9=M(Mf ,,s6== 1C r\FjjVS#6BMG-K KK VQ /E U%0A KK VR 0E #R!88a@I accmmm##e+A AA CA Iq!$AINN((( EEgE  # # % imm)) ) EEG EEgE  U[[)112CDI i<r+) attn_maskrnquerykey dropout_p is_causalrRctjtjfdtjjdk(xr(jdk(xrjdk(fdtjdk(fdtjjdjdk(xrjdjdk(dt j j||d|jdjdk7 \}}|jd d ddjtj jdd d d}||fS) Nc"djS)Nz-query must be FP32, FP64, BF16, FP16 but got r)rSsr,rz.s? }Mr+r8cndjdjdjS)Nz,q, k, v must be a 4 dimensional tensor, got rr)rTrSr|sr,rz.s3>uyy{m2cggi[XZ[`[d[d[fZghr+rcdS)Nz&dropout probability must be zero, got r*)rUsr,rz.s$J9+"Vr+rcy)Nz&q, k, v should have the same head sizer*r*r+r,rz.rr+r")rRrUrV dropout_maskrn enable_gqar#rrN) r?rr>rOrr"_scaled_dot_product_attention_mathrr#rrrS) rSrTr|rUrVrRrnrattns ```` r,*scaled_dot_product_flash_attention_for_cpur`sV LL &M LL q@SWWY!^@ q0@h LLSV LL A%++a.(KSYYq\U[[^-K8 ::BB   ::a=CHHQK/C LFDF q!Q" %"9"9 : Aq!  4<r+c.t|fd}|S)Nc<|i|}|dj|Srs)r)rFrGr outplace_ops r, inplace_opz$register_inplace..inplace_ops%4*6*Aw}}S!!r+r)aten_oprcrds ` r,register_inplacerfs"G$"%" r+cB|js&|jst|}t|}tj||}t |t jr|dk7r||z}|dk(r|St |t jr|dk7r||z}||zSr)r>rrr?rr7numbersNumber)r{batch1batch2rcrmrs r,baddbmmrls  ! ! #DOO,=4yE  YYvv &F eW^^ , % qy dGNN +tqyd{ &=r+c2tj||dS)Nrorwry)r{rGs r, floor_dividerns 99T5 88r+c`tjtj|jdSr\)rLrrrr)rs r, sym_numelrps   HLL!''1 55r+rrc|"tjj|g|Stjj|g||S)Nrrq)rr dim_IntList IntList_out)r{rrs r, sum_defaultrusC {xx##D"E#::xx##D"Es#CCr+c t|tjs|S|@tjj |t t|jStjj ||gSr5) r7r?rrrdimsrlrTrO)r{rOs r,squeeze_defaultrx)s\ dELL )  {||  tE$((*,='>??||  u--r+c2tfdtt|jD}|jt j k(rt jnd}|jd|d|}|||j|jz z|fS)Nc3.K|] }|k7s |ywr5r*)rr`rOs r,rz)_weight_norm_interface..8s@1qCxQ@s r#T)rr) rrTryrrr?rrrr8)r rrOkeep_dim norm_dtypers ` r,_weight_norm_interfacer}5st@c!''l 3@@H !5>> 9tJ 66!Xt:6 >D DGGAGG$$ %t ++r+ assume_uniqueinvertct|tjs!tj||j}t|tjs.|rtj ||Stj ||S|jdt|jdzkrt|||St||||S)Nrg$@g(\?rr~) r7r?rrhrnerrrQ isin_default isin_sorting)elements test_elementsrrs r,isinr?s h -<<1E1EF mU\\ 2 88Hm4 488Hm4 4tc(..*:E&BBBHmFCC m=  r+)r6cB|?tj|jtj|j}n?tj|j|tj|j}||kj |j }|S)Nr)r6rr)r?randr#rrr8r)r{r6raw_prHs r, bernoullirSsq  499;emmDKKP IIK--;;   $**%A Hr+rcL|jdk(r%tj|tjS|jd|j zz}|j |}ttd|j dz d}||k(j|}|r|S|S)Nrr)r"rSr"r) rr? empty_likerrrtrrrTrJ)rrrexpanded_elem_shaper9rOrAs r,rrfs~~1 ;;"..4-2D2D+DD )*A b=---126 7C  " "s " +CC4"s"r+c|j}|j}|rtj||g}tj|d\}}|dd|ddk(} tj| ddgd} |r| j } tj | } | jd|| } | d|jStj|\} } tj| |} tj| | jk| d}| ||k(}|r|j n|}|j|jS)NT)stabler"rSrF) r.r?r%sortrd logical_notrrr searchsortedrirr)rrrr elements_flattest_elements_flat all_elementssorted_elements sorted_orderduplicate_maskrsorted_test_elementsrVrtest_idxcmps r,rrps?$$&M&..0yy-1C!DE (- <(M%(,0DD..~1vuM +779N/q,?A())"'**-?"@a  !5}E;;s%9%?%?%AA3J"8, =#)coos{{8>>**r+c.|jd}||SrR)r)r{r flatteneds r,takers R I U r+c|tj}|tjk(r t|}tj ||j |Sr%)r?rSpreserve_formatrrresizer)r{rGrOs r, resize_asrsD// ----e4 ;;tU[[ ; FFr+)FF)noner5)r#)rNNr")rSFFrrrN)r"r"F)Fr)rrgN)rr"N)FrC)NNN)rrFT)rrFr )rF(rLrrhrrncollections.abcrenumrrrrrtypingr r r r r r?torch._meta_registrations torch._primsr?rtorch._prims_common _prims_commonrDtorch.nn.functionalrrrrrr torch._decomprr6rrrrrrtorch._prims_common.wrappersrrrr torch.utilsrrBtorch.utils._pytreerr DispatchKeyrrlstr__annotations___opsrrr!rGrrNrHcompute_only_pw_cast_for_opmathpw_cast_for_opmath"pw_cast_for_opmath_non_tensor_argsrpw_cast_for_int_to_realrrWr`rbrkrrwrScalarr}rrrrrrrrrrrrrrrrrrrrrr(r|r _safe_softmaxrrr rrXrrrrr r-r6r9r?rBrDrFrJr^rfslicerzrrerrrrrrrr#r'r0r6r:py_implCompositeImplicitAutogradAutogradr9rLrPrVr^rrlrorsr~rrrrrrrrrrrrrrrrrrr r unsafe_chunkrrrno_statsrrr(r.r2r4_fused_dropoutr7rDrrOrlift lift_freshrGrLrOrRrQrerjrm_adaptive_avg_pool2drrrrrrrrrrrrr Generatorrrrr'rr.r4r_upsample_nearest_exact1dr0r6rr"r)rrArJrNrRr`rhrkror~rrnn_tanhrrrnn_relurrrrrrrrlstmrrrrgrurrrrrrrrrrrrrrrrrrr9rrr"r(r+r-r1r4r:rArrHrSrVr[r]r`rrrrrrTupsample_bicubic2drrreflection_pad1dreflection_pad2dreflection_pad3drreplication_pad1dreplication_pad2dreplication_pad3dr rreflection_pad1d_backwardreflection_pad2d_backwardreflection_pad3d_backwardr&r)r,rr<rr1r`r3r7rCrQ+_scaled_dot_product_flash_attention_for_cpur`rfrlrnrprrurrOrxr}rrrrrraddbmm_addbmmaddmm_addmv_baddbmm_fill_gelu_r hardswish_ hardtanh_hardtanh hardsigmoid___iand____and__ __ilshift__ __lshift__rr index_reduce_ index_reduce__ior____or__ __irshift__ __rshift____ixor____xor__ leaky_relu_ leaky_relulogit_logitrelu_rrenorm_renormround_rscatter_r scatter_add_ scatter_addscatter_reduce_scatter_reducesilu_r*r+r,rsM $%$77 #,,07 *(hh"" c zz~~   %$) ''99''" 'T#*88@@# uDDLL&-88@@ &" "uDDQQ   c f **+ \2F2v2,2--. \4v4&4/4../ \SS6SS5S0S ))* \           + 2))*+(,())*+"V","(() @f@@*@112 \fF3../ \PP%P05P@EP0P ' GFGvG(G//0 F&V1//0 \:F:&:U:1:001 \LL%L7<LNRL2L **+ \))f)3),)<**+5v5f5,5 " &v&&&#&**+ \>v>V>>,> **+66666,6334%% % % 66> %5%667                8 $112 \>3G3G1 1 1-01 1'1../ \11 &1061CF101**+0,0++, ^^)) 1 1 11  1-144<<=  % /5 BE MR > 44??@ R R  R  R R  R  RA R00889  % /5 BE NS : 00445 R R  R  R R  R  R6 R$$ $ $ V  $  $  $$ $@))* \NfNFNNN+N$../ \'' ' ' V  '  '  '' '0'T001 \   V       2B112  $^^)) 1 1 1 V 1 1  131&::; \ $^^))       V      <  --. ^^))1 1 11 1/1667 \ ^^))         8  " ))v)%)#),,- &&F&v&.&++,  O Oc O  O  O O  O- O ))* 6? 6? 6? C= 6? # 6?  6?+6?r  ( 4B  % ,4UO   5 & %kCCDk2236eHTN4E&++, VV A& AU A8D> A- A & S'*))* FT+,' $        (&556     7: DI  &\  &\ *T&\ tF| # " &\" ""J00$//2E2EFG !  &\     &    H   '')C)C)G)GH "&   c    $v,   d6l   ))00155C5c5%PS BT52544<<=67B B $S B03B 63;B>B  ))*//C/c/% :L/+/"--55)) / / &/ / 63; //@ # fF##$"../            0" #   f 6   $ 77??@S&S& S& S&  S& F  S&  S& S& S& S&dS& 8F Xf-x/? ?@S&AS&n77;;<     F       d ,, ,, ,, 8F Xf-x/? ?@=8 8F# x/? 77??@OO O3iO  O  O V  O 6 OdO 8F Xf-x/? ?@OAOf77;;< 3i     V   6 d ,, ,, ,, 8F Xf-x/? ?@=455==>GG G3iG  G V  G d G 8F Xf- -.G?GTO O V O 6 O6" O &! O  OO OO 6668F#3Xf5E EFOd../ UK/ (  ( V  ( 6  (6" ( &! (  ( (  ( 666 !" (00 (4'' (<(<='' (M(MN    V    6   6"  &!          666 !"  O>  F"";#H#HIG4<GJGAAIIJ  V  6        666 !"K*55==> (  ( V  ( 6  ( (  (  ( (  ( 666 !" (? ( 55>>? (  ( V  ( 6  ( (  (  ( 666 !" (@ (@@HHIK K V K 6 K K  K  KK K 66666 12KJK0  V  6       :44<<=1 1 V 1 6 1 1  1  1 1 6666 )*1>14??GGHC C V C 6 C C  C  C C 666666 9:CIC422::;1 1 V 1 6 1 1  1  1 1 6666 )*1<14++, VV- & $( %)37& VZ  & EKK & U\\ " &  &&E//0&'&XdiiAB C &&{';';<--. VVVV,   6 6"  &!   !&-/=> 00889  V 6"  &!   &!  d 68F#Xf%5 56:677??@dd d V d6" d &! d  d&!d d ddd 68F#Xf%5 56dAdP77;;<"" " V "6" " &! "  "&!" " "d" ,," ,," ,," 68F#Xf%5 56"="J778 VVV$  6"  &!   v%90667 VVV$  6"  &!   v%82112 c'vc'E#s(Oc'3c'L )8(!2( % $'$'+/    tS%   c5 ! ( &   &;'; 8 //334//334//334$$[%J%JK$$[%9%9:$$[%J%JK$$[%9%9:$$[%J%JK$$[%9%9: 3  3$s)$ 3DK( 3 3;L;L;L555 366::;66::;66::;##++K,Q,QR##++K,@,@A##++K,Q,QR##++K,@,@A##++K,Q,QR##++K,@,@A ?  ?$s)$ ?DK( ? ?BSBSBS<<< ?@0088$:Q:Q:U:UVW  (()N)NO  (()=)=> Dd;#; ;c; UO; ;<?PX; ##++T-K-K-O-OP''// 0U0UV''// 0D0DE Dd;#G GcG UOG G<FW G0088$:Q:Q:U:UVW  (()N)NO  (()=)=> Dd;!% $ G GcGuoGuo G  G<?PXG ##++T-K-K-O-OP''// 0U0UV''// 0D0DE Dd;!% $ S ScSuoSuo S  S<FW S0088$:Q:Q:U:UVW  (()N)NO  (()=)=> Dd;!% $ $ Q QcQuoQuo Q uo Q  Q<?PXQ ##++T-K-K-O-OP''// 0U0UV''// 0D0DE Dd;!% $ $   c uo uo  uo   <FW    c % !    4  >/ FK+\&,/-d& R ++,[BBC[112.3D-.8 ++,[BBC[112.3D-.8 **+KAABK001.2C,.@ **+KAABK001.2C,.@ /6=@/d (>>?--.S/@)S> ' ==> ,,-S.?(S<;; &{<<={++,.->'.6' ==> ,,-..?(.644889!!))+*O*OP!!))+*>*>?@Q:33778  (()N)NO  (()=)=>?P90044511556##K$I$IJ##K$8$89%%k&K&KL%%k&:&:;&&{'L'LM&&{';';<A=N<M:K76A //779O9O9S9STU  !% K KcKKuo K  KVK %%t'?'?'C'CD!!))+*>*>?  !% $ U UcUUuo U uo U  U@ U &&(A(A(E(EF  !% $ $   c  uo  uo  uo    V/7 & 7$,V$47IO7 7-'9-f-E EcEE % ! E  EER))112Fv$3,,d.?.?@A B++,-"."//019292234C5C6AABCMDM22 2 2  V 2  2  2  66> 2 j--. X~&L L L V L L  L  66> L'/L6//0 X~&L L L V L L  L  66> L'1L/F/u//5F5u55  3E (F%7FVFF !Xf%!&!NN#'N05 NEJ\\N&f&&&T&"/f////T/"!V!49!T!$V$49$T$223 SStCySS4S  c4 c4 c4c4 c4  c4  c4 c4L,,-             .     #! #==> $)..:N:N 1? 1 dt@[BBC>>? ',tW@DtWn0088$:Q:Q:U:UVW  (()=)=>  $# P PsCxPPe_ P e_ P  P?XPf//334$$[%J%JK$$[%9%9:  48 W W%S/*WWE%,/0 W  W;L5 W.--.--.--.  v c3h F /// ../../../   sCx V 000   38_ c3_f, - 8667667667 \T888Tn % UEu&  $ X4X%X,,dkkoo>? $( ==%)    EKK   LL  U\\ "   @ **+, $( ==%)      EKK  LL  U\\ "  -  ",#, ../'' (<(<= #^^)) ) ) ))  ) V  )  ) )>0)X;;<$$,,44[5I5IJ X{#- - -- 66> -$K=-xHHPPQ  A#'!A A A A A  AA E?A 66>ARAH'  ( ))* 9+9'6(6))488<<89$( D  D EKK  D &  D  D: D--t||/?/?@A.&.x}.B. ==>,?, " 38 # $../,0 ,, (  \\  0 $5:#