L iHrddlmZmZmZer ddlZddlmZerddlmZddlZddl m Z eje Z gdZe dZd Zd Zej$d d d ej&dedej*fdZGddej.ZdZdZdZdZdZ ddZ ddZy))is_accelerate_availableis_torch_availableloggingN)nn)init_empty_weights)contextmanager)gg?g?g?g@g@g@g@ggggggggc#Ktrddl}t||jr |j}n"t|t r|j|}t |dd}|dk(r*|jj |5d dddy|dk(r6t|dr*|jj |5d dddydy#1swYIxYw#1swYxYww)Nrtypecudaxpu) rtorch isinstanceTensordevicestrgetattrr hasattrr )devrdev_types e/mnt/ssd/data/python-lab/Trading/venv/lib/python3.12/site-packages/transformers/integrations/mxfp4.py on_devicer3s c5<< (**C S !%,,s#C3- v ""3'    u !6!!#&        s6BC*C 5C*>CC*CC*C'#C*c|jjj}||jtj tj d\}}||fS)N)axis)numerics_detailsmxfpdowncast_to_mxfp_torchtorbfloat16uint8)wtriton_kernels_hubrw_scales rquantize_to_mxfp4r%JsH/@@EE\\'U^^(tjj r |j}|j}|j tj dz }|jdd|jk(s$Jd|jddd|jtjt||j}|j^}}}|j||z} |j| |}|j| d}tj| |d z||j} td| |D]} t| |z| } || | } || | }| d zj tj }| d z j tj }| | | }|||ddddd f<|||ddddd f<tj"||| ~~~ ~~| jg|||d zj$g|||zd z} ~~~| j'dd j)S) zw Convert the mxfp4 weights again, dequantizing and makes them compatible with the forward pass of GPT_OSS. rNzblocks.shape[:-1]=z does not match scales.shape=)r*rrr)out)mathis_cudarr is_availablerint32shaper+ FP4_VALUESrprodreshapeemptyrangeminlongldexpview transpose contiguous)blocksscalesr*r6r>lut prefix_shapeGB rows_totalr=r0r1blkexpidx_loidx_hisubs rconvert_moe_packed_tensorsr\ds( >>ejj557 YYu{{ #c )F <<  ,d1Ccr1B0DDbU[UaUaTc.dd , ,,zv}} EC ,,\1a<(1,J ^^J *F ^^J *F ++j!a%uV]] KCAz>2* n$j 1RmRm*,(uzz*"Rj6{Aqt!tG 6{Aqt!tG  C#& FCc*" 4+#++ .| .Q .A . 3 3 M\ M1q519 MC ==A  ) ) ++r&cVeZdZfdZdej dej fdZxZS)Mxfp4GptOssExpertsc rt||j|_|j|_|j |_t jtj|jd|jz|j dzdtjd|_ t jtj|jd|jz|j dztjd|_ t jtj|jd|jztjd|_t jtj|j|j |jdzdftjd|_t jtj|j|j |jdztjd|_t jtj|j|j tjd|_d|_t'|dd |_d|_d|_t'|dd |_y) Nr r)F requires_gradgZd;? swiglu_limitg@)super__init__num_local_experts num_expertsintermediate_size hidden_sizer Parameterrzerosr!gate_up_proj_blocksgate_up_proj_scalesfloat32gate_up_proj_biasdown_proj_blocksdown_proj_scalesdown_proj_biasalpharlimitgate_up_proj_precision_configdown_proj_precision_config)selfconfig __class__s rrfzMxfp4GptOssExperts.__init__s !33!'!9!9!--#%<< KK((!d.D.D*DdFVFVZ\F\^`hmhshs t$  $&<< KK((!d.D.D*DdFVFVZ\F\didodo p$  "$ KK((!d.D.D*DEMM Zjo" !# KK))4+;+;T=S=SWY=Y[]^fkfqfq r! !# KK(($*:*:DCEE)__name__ __module__ __qualname__rfrrr __classcell__)rzs@rr^r^s'":H#U\\#]b]i]i#r&r^cBddl}tjjtjjtjj tjj f\}}}}t|j5tjj}t|jjdd}d} |jd} |jd} | |z} || z} |dz| z}| |z}d}|||\}}tj |d}tj"|d\}}tj$|d|}|j'd}tj(|| | dz | |}|j+dj-tj.}d }tj0|| k||}tj2|d j-tj.}tj2|j-tj.}tj0||k|| }tj0| |k|| }tj0|| k(| |}||}tj0||| k(| |}||j|j }||j|j }||| |}|}ddd| fS#1swYxYw) Nr LOCAL_RANK0r:rctj| ddddd|f}|j}tj||d}||j fS)NrT)dimstabler)rargsortrItake_along_dimint)valsktk_indxtk_vals rtopkz routing_torch_dist..topksSmmTEq>q"1"uEGllnG))$Q?F7;;=( (r&r)binsmaxiT)r)src_indxdst_indx)osr#routing GatherIndx RoutingData ScatterIndxcompute_expt_data_torchrrr distributedget_world_sizerenvirongetrBsoftmaxsortgatherrEhistcrKrrAwherer)logits n_expts_actrrrrr world_sizerank replace_valuen_tokens n_expts_totn_local_expertslocal_expert_startlocal_expert_end n_gates_padr expt_scal expt_indx sort_indiceshistvar topk_indx gate_indxrr~r expt_data hit_expertss rrouting_torch_distrs ""--"".."".."":: EAJ [*A 6== !3"&&557 2::>>,45 <<?ll1o %3!O3 1H7,  ) $FK8 9MM)4 "'**YA"> <LLA|< %%b) {{9;K!OLM_`pqNN2&))%++6 KK ,> >YO MM)D9<z(should_convert_module..=s>dgC523_rxx3%J^7__s !^^M63K3Ka3OQSTF^^M2v7O7OST7TUF =&- 8E A"M=)446=)446 } % 1>  R(&*:*:2r*BDV2 . ,  > !).]FDVDVX^XpXpstXt4u)v &).]FD\D\^d^p^p4q)r & 23 f% & Q[Q]@^ _   $ $ A)E#  s 1M  Mc n|g}|jD]\}}|j|t||s|jd6|jj dk(r9|j s-t5t||j|<d}ddd|jj dk(r$|j sddl m }|t||_ tt|j!dkDrt#||||||\} }|jd||fS#1swYxYw)Nr: GptOssExpertsT GptOssMLPr) MethodType)has_been_replacedry)named_childrenappendrpoprzrrrr^_modulestypesrrrlenlistchildren_replace_with_mxfp4_linear) rmodules_to_not_convertrquantization_configrrynamerr_s rr#r#s1,,.! f%$%57MN   $     $ $ 7@S@^@^#% )'9&'At$$(! )    $ $ 39   E    Lr&)NNNFN)NNNN) utilsrrrrr acceleraterr contextlibr get_loggerrr.rCrr%r5r r*rrr\Moduler^rrrrrr#r0r&rr7sIH- %   H % (  , 0& 3, ;; 3,  3,  \\ 3,lD#D#RAlH%( -F@J   "$N   "r&