L i;ddlmZddlmZmZmZmZerddlZddlm Z ddl Z ddl m Z ddlmZerddlmZej$eZe j*de j,fdZd!d ej0d ed eej0ej0ffd Ze j*d e j,de j,de j,de j,fdZej:fdej0dej0dej0dej0d eedej>d ej0fdZ ejBdej:fdej0dej0dej0dej0d eeeefdej>d ej0fdZ"Gdde jFZ$ d"dZ% d#d Z&y)$)Optional)is_accelerate_availableis_torch_accelerator_availableis_torch_availableloggingN) functional)init_empty_weights BLOCK_SIZEctjd}||ztjd|z}tj||zj tj }tj tj|dz }||z }|j |jj}tj||z|tj||z|y)Nraxisg|@) tl program_idarangeloadtofloat32maxabsdtype element_tystore) x_ptry_ptrs_ptrr pidoffsxsys o/mnt/ssd/data/python-lab/Trading/venv/lib/python3.12/site-packages/transformers/integrations/finegrained_fp8.pyact_quant_kernelr#$s --Q C  bii:6 6D    ,A rvvayE!A AA U[[ # #$AHHUT\1HHUS[!r block_sizereturncfjsJjd|zdk(sJtjtj}j gj ddj d|zdtji}fd}t||||||fS)NrrrcTtjj|dfS)Nr )tritoncdivnumel)metars r"gridzact_quant..grid6s" AGGItL'9:<sbJ --Q C<(I<(I#i/&&H\)Ky;. =L 3- .E # # 4E|#bii<&@@AEG|#bii<&@@AEG YYq, 'F '!T'"Y.a91LL MF &D/I-a0@90LL MF7[((G'!H8k))G((L,7rzzJK 1bgga. / + GGFa1q<7G3G!Gs S GGF41q<7G3G!Gs Sl"W$ggg+ 556ggg+ 556rvva|c!T'l2Sq\AA ,**,** + wwR[[( NN2;; '   rzz ) NN2:: & NN2:: &l"RYYq,%??Gl"RYYq,%??G WQW-- - GD!G.grids1 AtN34v{{1d>FZ7[[]]r$)r8r9r:r;) lenr1r0r+r,r-ndimr5next_power_of_2rrvstride)rErFrHrIr%rwblock_nblock_krLC_shaperGr8r:r9r/rJrKs @@r"w8a8_block_fp8_matmul_tritonrsG. z?a  !!}jmWG 772;!''"+ %% % 773B<288CR= (Q__->> > ;;qwwr{G , << <  QWWR[ A 66Q;1??,A= = 77DAq ;;q' "bhhqk 11 1 ;;q' "bhhqk 11 1ggcrlaT!G G< 0AL<--a0 <, L \ !Q && &L^4                      "  "  !  ! !!!16 Hr$input_qweight_q input_scale weight_scalec ~|jdk(r |jnd|jd|jdf\}}}|jd} |jd|} |j|jdd} | |dz} ||dz} tj||z| ftj |j }t| D]}||dz}||dz}t| D]}||dz}||dz}| dd||f}|||||f}| dd||dzf}|||f}tj||jtjdtj |j |||z}|dd||fxx|z cc<|j||| }|j|S)a Performs blocked matrix multiplication with FP8 quantized matrices. Args: input_q: Quantized input tensor with 1x128 block quantization weight_q: Quantized weight tensor with 128x128 block quantization input_scale: Scaling factors for input blocks weight_scale: Scaling factors for weight blocks block_size: Tuple of (M, N) for weight block dimensions output_dtype: Desired output dtype ryrr(rdeviceN)scale_ascale_b out_dtype) rr1viewr2r@rrrA _scaled_mmttensorr)rrrrr%rw batch_sizeseq_len hidden_dim out_featuresinput_reshapedinput_scale_reshapednum_weight_blocks_mnum_weight_blocks_noutputim_startm_endjn_startn_end input_block weight_blockcurr_input_scalecurr_weight_scale block_results r"w8a8_block_fp8_matmul_compilers(8?||q7HgmmqRYR_R_`aRbdkdqdqrsdtNu#J>>!$L\\"j1N&++K,=,=a,@"E&*Q-7$ 1 5 [[*w. =U]][b[i[i jF & '5jm#*Q-'*+ 5A*Q-'Gjm+E)GEM)9:K#GEM75=$@AL 4Aq1q5yLA  ,QT 2    NN$!LL%--W-* ##  1gem# $ 4 $/ 5 5:[[Wl ;F 99\ ""r$c eZdZejZ d dedededee eefffd Z dejdejfdZ xZ S) FP8Linear in_featuresrbiasr%ct |||||_||_tj j t j||tj||_ |jjdk(rb||dzdz |dz}||dzdz |dz} t j t j|| tj||_ n|jdd||_||_|r8t j t j|j|_y|jddy)Nrryrweight_scale_invr)super__init__rrr2nn Parameteremptyrrweight element_sizerrregister_parameterr%activation_schemer) selfrrrrr%rrscale_out_featuresscale_in_features __class__s r"rzFP8Linear.__init__)s  l3&(hh((\;V_VeVent)uv ;; # # % *".A">"BzRS}!T !,z!}!  )%1C D 5KK))OO!&   % 0 0 2yy$$))+995;;9/ /  s 7AEE%)FNNNdynamic)__name__ __module__ __qualname__r2r4rintboolrtuplerTensorr __classcell__)rs@r"rr&ss   E 04# 2 2 2 2 U38_- 2D0U\\0ell0r$rc  |g}|jD]<\}}|j|t|tjr||xsgvrdj | t fd|xsgDst5t|j|j|jdu|jj|jj|j|j |j"|<d}dddt%t'|j)dkDrt+||||||\}}|j-d?||fS#1swYZxYw) z%Replace Linear layers with FP8Linear.N.c3&K|]}|v yw)N).0keycurrent_key_name_strs r" z+_replace_with_fp8_linear..us]ss22]s)rrrrrrr%Tr)has_been_replacedr()named_childrenappend isinstancerLinearjoinanyr rrrrrrrrweight_block_size_modulesrlistchildren_replace_with_fp8_linearpop) modeltp_planmodules_to_not_convertcurrent_key_namequantization_configrnamemodule_rs @r"rrdsW,,.! f% fbii (T:P:VTV-W#&88,<#= ]?U?[Y[]]') -+4$*$6$6%+%8%8#[[4%}}33$mm11*=*O*O#6#H#H,ENN4()-% - tFOO%& '! +#;& #"3 $ A  R ;!> # ##3 - -s A>EE# c|dgn|}|j|j|jtt|}t ||j ||\}}|st jd|S)z:Helper function to replace model layers with FP8 versions.lm_head)rrrzYou are loading your model using fp8 but no linear modules were found in your model. Please double check your model architecture.)rextendrsetr_tp_planloggerwarning)rrrrs r"replace_with_fp8_linearrs -C,Ji[Pf11=%%&9&P&PQ!#&<"=>7 5/  E   < Lr$)rz)NNNNF)NN)'typingrutilsrrrrr2torch.nnrr+triton.languagelanguagerr r accelerater get_loggerrrjit constexprr#rrrr7rvrrrrcompilerrrrrrr$r"rs$ hh (-   H %bll   3 u||U\\?Y9Z Q%4,,5Q%6,,7Q%8,,9Q%:,,;Q% Q%t!& M  ||M  ||M   M   M S M ++ M  \\M b -1 % ># \\>#ll>#>#,, ># sCx) ># ++ ># \\>#>#B;0 ;0@  +$` r$