L i00ddlmZddlmZmZmZmZer ddlZddlmZerddl m Z erddl Z ejeZGddejj ZGd d ej$Z d d Z dd Zy))ACT2FN)is_accelerate_availableis_fbgemm_gpu_availableis_torch_availableloggingN)nn)init_empty_weightsc<eZdZejffd ZdZxZS)FbgemmFp8LinearcVt||||||_||_tj j t j||ftj|_ tj j t j|df||_ |jdt jdgtjd|rDtj j t j|j||_ yd|_ y)Ndtypeinput_scale_ubF persistent)super__init__ in_features out_featurestorchr Parameterzeros float8_e4m3fnweight weight_scaleregister_bufferfloatbias)selfrrr weight_dtype __class__s j/mnt/ssd/data/python-lab/Trading/venv/lib/python3.12/site-packages/transformers/integrations/fbgemm_fp8.pyrzFbgemmFp8Linear.__init__!s lD9&(hh((lK5PX]XkXk)lm !HH..u{{L!;LT`/ab -u{{A3ekk/R_de **5;;8I8IR^+_`DIDIcLg|jddd}tjjj |j d|jdj |j\}}|jjtj}tjjj||j||d}|j||jzn|}|j|j}|j|}~~|S)N)scale_ubTuse_fast_accum)shaperopsfbgemmquantize_fp8_per_rowview contiguousrrtofloat32f8f8bf16_rowwiserr devicereshape)r!x output_shape x_quantizedx_scaleweight_scale_float32outputs r$forwardzFbgemmFp8Linear.forward/s*"*r*  %yy//DD FF2qwwr{ # . . 04;N;N E  W $0033EMMB!!22 g/CTX3 (,yy'<$))#&188$ -  r%__name__ __module__ __qualname__rr2rr< __classcell__r#s@r$r r sEJ]] r%r c<eZdZejffd ZdZxZS)FbgemmFp8Llama4TextExpertscnt||j|_|j|_|j |_|j|_t|j|_ tjjtj|j|j d|j zftj|_tjjtj|jd|j dzftj |_tjjtj|j|j |j ftj|_tjjtj|j|j dftj |_|j)dtjdgtj*dy)NrrrrFr)rrnum_local_experts num_expertsintermediate_size hidden_size expert_dimr hidden_actact_fnrr rrr gate_up_projr2gate_up_proj_scale down_projdown_proj_scalerr)r!configrr#s r$rz#FbgemmFp8Llama4TextExperts.__init__Hs !33!'!9!9!--00V../ !HH.. KK))4+;+;Q=PQY^YlYl m #((("4"4 KK))1doo.AB%-- X# ++ KK))4??D'*//A6AAC# >M+KNU* +V"nn]%9%9: D$4$455r%r=rBs@r$rDrDGs%*]]f0:6r%rDc ddl}|g}|jD]/\} } |j| t| tj r| |vrdj |tfd|Dstd5| j} | j} t| | | jdu|j| <d}|j| jddddtj |j"gtj$|j| _| j(j*d k(r| |vrdj |tfd |Dstd5d||j-d d d z<t/|j0|j| <dddtj |j"gtj$|j| _t3t5| j7dkDrt9| |||||||\} }|j;d2||fS#1swYsxYw#1swYxYw)z Private method that wraps the recursion for module replacement. Returns the converted model and a boolean that indicates if the conversion has been successful or not. rN.c3:K|]}|dzvxs|k(ywrhN.0keycurrent_key_name_strs r$ z2_replace_with_fbgemm_fp8_linear...Y\s22T?S8STT)include_buffersFrLlama4TextExpertsc3:K|]}|dzvxs|k(ywrjrkrls r$rpz2_replace_with_fbgemm_fp8_linear..rqrrz\d+*z.down_proj_scale)has_been_replaced pre_quantizedrQtp_planr')renamed_childrenappend isinstancer Linearjoinanyr rrr r _modulesrequires_grad_rtensoractivation_scale_ubrrr#r>subrD text_configlenlistchildren_replace_with_fbgemm_fp8_linearpop)modelmodules_to_not_convertcurrent_key_namequantization_configrwrxrQryrznamemodulerr_ros @r$rrsR ,,.4! f% vryy )t;Q/Q#&88,<#= `v(= ?"("4"4K#)#6#6L+:#$ 4/,ENN4( )-%NN4(77> ?7> # E    Lr%)NNNFFNN)NNNFNN) activationsrutilsrrrrrr accelerater fbgemm_gpu.experimental.gen_ai fbgemm_gpu get_loggerr>rr~r ModulerDrrrkr%r$rs!aa-)   H %$ehhoo$NS6S6p   J$^   2r%