L i6{ &dZddlmZddlmZddlmZmZmZer ddl Z ddl m Z erddl m Z dd l mZmZerdd lmZdd Zd ed eddfdZdded ededefdZGdde jj,Z ddZddZy)zZHIGGS through FLUTE (Flexible Lookup Table Engine for LUT-quantized LLMs) integration file)sqrt)Optional)is_flute_availableis_hadamard_availableis_torch_availableN)nn)prepare_data_transposed) TuneMetaDataqgemm_v2)hadamard_transformctdt|jzDcgc]}d}}|D]/}|j|}|dz |zdz|z}||z } | |d|zdz <1tjj ||d|Scc}w)Nrrconstant)rangelenshaper functionalpad) tensordimshad_block_sizevalue_pad_dimsdimsizenext_multiple_of_1024deltas e/mnt/ssd/data/python-lab/Trading/venv/lib/python3.12/site-packages/transformers/integrations/higgs.py pad_to_blockr"'s S%6!678a8H8'||C "&(~!=!A^ S%,!&cA ' ==  VXz5 AA9s Bpnreturnz torch.Tensorc||fdk(rtjgddgddgddgdd gd d gd d gddgddgddgddgddgddgddgddgddgd d!gd"d#gd$d%gd&d'gd(d)gd*d+gd,d-gd.d/gd0d1gd2d3gd4d5gd6d7gd8d9gd:d;gdd?gd@dAgdBdCgdDdEgdFdGgdHdIgdJdKgdLdMgdNdOgdPdQgdRdSgdTdUgdVdWgdXdYgdZd[gd\d]gd^d_gd`dagdbdcgdddegdfdggdhdigdjdkgdldmgdndogdpdqgdrdsgdtdugdvdwgdxdygdzd{gd|d}gd~dgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgdd gd d gd d gddgddgddgddgddgddgddgddgddgd d!gd"d#gd$d%gd&d'gd(d)gd*d+gd,d-gd.d/gd0d1gd2d3gd4d5gd6d7gd8d9gd:d;gd<d=gd>d?gd@dAgdBdCgdDdEgdFdGgdHdIgdJdKgdLdMgdNdOgdPdQgdRdSgdTdUgdVdWgdXdYgdZd[gd\d]gd^d_gd`dagdbdcgdddegdfdggdhdigdjdkgdldmgdndogdpdqgdrdsgdtdugdvdwgdxdygdzd{gd|d}gd~dgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgdĐdgdƐdgdȐdgdʐdgd̐dgdΐdgdАdgdҐdgdԐdgd֐dgdؐdgdڐdgdܐdgdސdgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgS||fdk(rtjgddgddgddgd d gd d gd dgddgddgddgddgddgddgddgddgdd gd!d"gd#d$gd%d&gd'd(gd)d*gd+d,gd-d.gd/d0gd1d2gd3d4gd5d6gd7d8gd9d:gd;dgd?d@gdAdBgdCdDgdEdFgdGdHgdIdJgdKdLgdMdNgdOdPgdQdRgdSdTgdUdVgdWdXgdYdZgd[d\gd]d^gd_d`gdadbgdcddgdedfgdgdhgdidjgdkdlgdmdngdodpgdqdrgdsdtgdudvgdwdxgdydzgd{d|gd}d~gddgddgS||fdk(retjddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddggS||fdk(rEtjdgdgdgdgdgdgdgdgdgdgdgdgdgdgdgdggS||fdk(r-tjdgdgdgdgdgdgdgdggS||fdk(r!tjdgdgdgdggStd|d|(N)rgg f?gBg]?g_g\?g9g@g?gg@y?gЂ @g@?g?g`Qcg2g:7gdig c?g`ֿg`ؿg?g+@g?g`(f?g@8ig=g򩣿g g+gfg gLgx?g`*(g?g/տg@/aſgEggg ?gGg{?gh?g_a?g?g?g3@g#?gygѱܿg @g׿g d,gg&gٿg`{X?gYb?g@"gيg3gTg@@gkgg`{ڿg?g>g ?gg`5g ?g?g5?g?gIؿg A?g?gѿg9?g@@gzg j g g@g@N?g}g?gZg`@gcg ҿg@=Dgg?g`gb@g{@gh@gg?g]ƿgK?ggw?g`?g 4?g b?gg`ſgp?g@?g@Q=s?g@տgg@@g1u?g͔ƿg@?g䉫?g`@g_g@g,?g`dg`T?gL?gb?g3?gȃgxg v?g`?g섥gK3g/ۿg`Eg@#?g3?g g c?g1g?g`%?g%?g@w@gI?gkg g@J߿g?gǿgFֿg9?g`g8?g` @gxg@wg x?gg@ҿg͆gvg?gn@g`9ǿgY?g g@?g[m?g F?g@ ?g`Ьg`Ggc?g@gPug@?gg@Hg4g@gwg#?gd?gMg g ?g?g@vPgſgr?gLg ?g@?g:ڿg ?g?g` ?g@_g$Rg Z?g3g`g ?g@Ogew@g LgV?gߩҿgV?g?ggŠ?g`;?g Կg`@g-g?g`gZY?gg ?g8;?g?gaοg@Rh?g`F ?gA?g@?gg`g8?g`>?g?g@pgi?g ?g`пg@@gB?gbg@@gg`\gg)W?g`?g`g`@g 0$ӿg >gi?gcg?gH?g T?g ]g.ܿgyؿg*g`F?g@g`Is?g_;?gE?g g`޿g.g@\?g`[?g #?g ?g?ggN?gjg=?gR޿g耎?g`gLg@g=g@g@?gR@grgg?g`v_?g@T?g?g@?g8?g{}gi?gSg@k?gs?g?g?g _g@3@g?g@ ?g`Xrg[gg?g`?g@?g5g?)r@ggx?g gk?g gl?gy?g6g?gпg`5?gk@g$gz?g 1J@g ?go?g.ghgӿg_?g?g$?gg6@g@T?g-?g g@ggù?g.?g?gfg?g,'?g?g^@g@Lg>ۿg4bg?g Ng޿g ?gnZgtgi~@g Jf?g^Gg`mg`6@g/?g ?gOܿg?gggg^DgFg|g1g`8g@*?gֿg%&9:?A @$%78AA B%&89CA D%&89EA F%&9:GA H$%78IA J#$67KA L#$67MA N$%78OA P&';$%89?A @%&9:AA B%&9:CA D%&89EA F%&89GA H%&89IA J$%78KA L$%89MA N$%89OA P&';>bits group_size hadamard_sizec t|jdk(sJdt|d||zzj|j}t j j|ddz}|j}|j}|jdt j}t|dg|}|jd|z} |jd| |}t j j|d} t|d| dddddfz }t|dg|j|jd| d|}t j|jdd|t j } td|jdd D]U} t j d|| | d zz|j"z|z d jt j| | | d zW~| j| jdd} | t%|z } t'| t j(| j|||zd |j||||||d \}} } }}|| | |j+t j,|dS)Nrz%Only 2D weights are supported for now)axisT)copydtyperrdevicer7r))rF)num_bitsr1 vector_sizer7r9check_correctnessr7)weightscalestablestables2 tune_metadata)rrr.tor9r,linalgnormr7float32r"reshaper emptyuint8rargmaxTrr repeat_interleaveviewfloat16)r>r0r#r1r2grid grid_norm_2r9r7multr?codesir@rArBs r!quantize_with_higgsrTsY v||  !J#JJ ! !Q1t8_ - 0 0 ?D,,##Dr#2a7K ]]F LLE YYD Y 6F &1#} 5F <<?m +D ^^Bm 4F \\  vB  /F  *VAq$J-? ?F&1#q ) 1 1&,,q/4Q OF KK Sb)& LE 1fll1or *p!LLVAB-?)?$&&)H;)V\^_bbchcncnoa!b&p MM%++a." -E d=) )F5L   % 0-:2MSTU  62FFFG]<r?r@rAr\register_parameter workspacerB) selfrWrXr:r\r7r9r1r2 __class__s r!r^zHiggsLinear.__init__s^ &(  $*Z'1,,,9$$$ll KK0B6 DEKK`f g  ll KK{j'@AW] ^ns  ll5;;8 ~USY#Zjop || KKHak15U6 Rbg    U[[fTY%ZjopDI  # #FD 1+/r/c Nt|dg|j}|j tdt ||j |j |j|jjtj|j|j|jS)Nr4z,Workspace must be set before calling forwardr=)r2) r"r2rb Exceptionr r>r?r@rArMr,rFrB)rcxs r!forwardzHiggsLinear.forwards RD$"4"4 5 >> !JK K KK KK KK LL  EMM  2 NN   ,,  r/)TNNr') __name__ __module__ __qualname__intrr,r7r9r^rh __classcell__)rds@r!rVrVsu '+)-!'0'0'0 '0  $ '0&'0'0'0R r/rVc  ddlm}|jD]P\}}|g}|j|t |t j rdj| t fd|Ds|5|j}|j} t|| |jdu|j|j|j|j |<d}t#||j |_|j |j'ddddt)t+|j-dkDrt/||||| \} }|j1d S||fS#1swYYxYw) a~ Public method that recursively replaces the Linear layers of the given model with HIGGS quantized layers. `accelerate` is needed to use this method. Returns the converted model and a boolean that indicates if the conversion has been successful or not. Args: model (`torch.nn.Module`): The model to convert, can be any `torch.nn.Module` instance. quantization_config (`HiggsConfig`): The quantization config object that contains the quantization parameters. current_key_name (`list`, *optional*): A list that contains the current key name. This is used for recursion and should not be passed by the user. has_been_replaced (`bool`, *optional*): A boolean that indicates if the conversion has been successful or not. This is used for recursion and should not be passed by the user. r)init_empty_weightsN.c3@K|]}j|ywN)endswith).0keycurrent_key_name_strs r! z,replace_with_higgs_linear..Ks\c+44S9\s)r\r:r2r1TF)quantization_configcurrent_key_namehas_been_replacedmodules_to_not_convertr4) acceleraterpnamed_childrenappend isinstancer LinearjoinanyrWrXrVr\r0r2r1_modulestype source_clsrequires_grad_rlistchildrenreplace_with_higgs_linearpop) modelryrzr{r|rpnamemodulerWrXrrws @r!rr)sl0.,,.$! f  #! % fbii (#&88,<#= \E[\\')?"("4"4K#)#6#6L+6#$#[[4!4!9!9&9&G&G#6#A#A ,ENN4()-%7;6lENN4(3NN4(77>#?$ tFOO%& '! +#<$7!1"3'= $ A  R I$!J # ##9??s :BE00E9 c Ltj5|jD]e\}}|g}|j|t |t r|j }|j}tjj|||jdu|jj|jj|j|<|tj||jj|jjj j#|j|j$_t)t+|j-dkDr t/||}|j1dh|cdddS#1swYyxYw)a Dequantizes the HiggsLinear layers in the given model by replacing them with standard torch.nn.Linear layers. Args: model (torch.nn.Module): The model containing HiggsLinear layers to be dequantized. current_key_name (list, optional): A list to keep track of the current module names during recursion. Defaults to None. Returns: torch.nn.Module: The model with HiggsLinear layers replaced by torch.nn.Linear layers. N)r\r9r7r8r)rzr4)r,no_gradr~rrrVrWrXr rr\r?r9r7reyerK contiguousr>datarrrdequantize_higgsr)rrzrrrWrXrs r!rrksT !002 %LD&'#%  # #D )&+.$00 %22 ',xx D0!==// ---- (7(t$4:IIk&--2F2FfmmNaNab4!JJLt$++04)*+a/$%5   $7 %8;s E;FF#)r)r+rr'ri)NNFNrs)__doc__mathrtypingrutilsrrrr,r flute.integrations.higgsr flute.tuner r fast_hadamard_transformr r"rmr.rTModulerVrrr/r!rs]@1:BE?cE?cE?nE?P 0c0#0s0ad0f9 %((//9 | ?$D'r/