L i,ddlmZddlmZddlmZddlmZmZm Z ddl m Z ddl m Z erdd lmZerdd lZerdd lmZed Zee_e j*eZGd de Zy )) defaultdict) TYPE_CHECKING)prepare_for_hqq_linear)is_hqq_availableis_torch_availablelogging) HfQuantizer)get_module_from_name)PreTrainedModelN) HQQLinearcZtjd|j|jS)Nr)dtypedevice)torchempty compute_dtyperselfs k/mnt/ssd/data/python-lab/Trading/venv/lib/python3.12/site-packages/transformers/quantizers/quantizer_hqq.pyweightr%s{{1D$6$6t{{KKc eZdZdZdZdZdZdgZfdZddde e d e d e e fd Z ddd e e d e e d e e fdZ ddde d e fdZddddde ddfdZdZ ddZddZddZed e fdZxZS)HqqHfQuantizerz HQQ quantizer base HF class. nn.Linear modules are first tagged with quant_config in _process_model_before_weight_loading(). FThqqc ts tdt| |fi|d|_d|_t ddjdhz |_|jdds|jddr td|j9d|vr |d|_n*tj|_tjd|jd }t|t rZd |j#vsd |j#vr td t%t'|j#d kD|_yy)NzA valid HQQ version (>=0.2.1) is not available. Please follow the instructions to install it: `https://github.com/mobiusml/hqq/`.Fbiasfrom_tf from_flaxzwConverting weights from tf/flax weights is currently not supported, please make sure the weights are in PyTorch format.rzOSetting dtype to torch.float32 as the default value since it was not specified. device_mapcpudiskzYou are attempting to use an HQQ model with a device_map that contains a CPU or disk device. This is not supported. Please remove the CPU or disk device from the device_map.r )r ImportErrorsuper__init__rusing_multi_gpurstate_dict_keyshqq_keysget ValueErrorrfloat32loggerinfo isinstancedictvalueslenset)rquantization_configkwargsr! __class__s rr&zHqqHfQuantizer.__init__9s5!T  ,77 $!$-==?6(J ::i '6::k5+I;  :: & #G_ "]]  mnZZ - j$ ' ))++v9J9J9L/L h (+3z/@/@/B+C'Dq'H$ (rmodelr missing_keysprefixreturnc R|jr|Dcgc] }d|vs| c}S|Scc}w)Nr) pre_quantized)rr7r8r9r5keys rupdate_missing_keysz"HqqHfQuantizer.update_missing_keys[s1   #/ICHC4GCI I Js $$ expected_keys loaded_keysc|js|Sfdt|}|jD] \}}||_t}||t}|D]6} |jj dD]} | | vs|j | 8||z}tddtjddjdhz } t} |D](tfd|Ds| j *|| z}|D]_} | dz|vr|j | dzn%|j| D chc] } | d z| z c} | d z|vsL|j | d zat|Scc} w) Nc|jD]M\}}t|tjjr|j |j ||OyN)named_childrenr/rnnLinearaddname)r7layersrHmodule_find_hqq_quantizable_layerss rrKzIHqqHfQuantizer.update_expected_keys.._find_hqq_quantizable_layersksK % 4 4 6 = ffuxx8JJv{{+,VV< =r skip_modulesr"F linear_layer quant_configrrdel_origrc3&K|]}|v ywrC).0_moduler=s r z6HqqHfQuantizer.update_expected_keys..s@g7c>@z.weight.z.bias)r<r3 named_modulesrHconfigr4rGrrfloat16r(anyupdatelist)rr7r?r@new_keysrHrJ_valid_modules_skipped_modulesrT _skip_module _ref_keys_rm_keys_ref_keyrKr=s @@rupdate_expected_keysz#HqqHfQuantizer.update_expected_keysds!!  = }%"//1 LD&FK $UN;5% 2G % @ @ P 2 7*$((1 2 2 **--  / vh ' 5 "C@@@ S! " H& 0G"k1 Wy01) Th3!9 TU K/ Ww./  0H~ !Us9E8 param_namec ht||\}}t|tjjSrC)r r/rrErF)rr7rfr5rJ_s rparam_needs_quantizationz'HqqHfQuantizer.param_needs_quantizations)( ; &%((//22r param_valuez torch.Tensor target_devicez torch.devicec t||\}|jddd}t||\}} |jjd} |jjd} t fd| Dr2j ||j ||jidd y|jr?t|d stt|_ |j|j||i|j|tfd |jDrd vs j t#dd|j|d} | j | j Rt%| j t&j(r.t&j*j-| j | _|j.r|j1| } t3|| | |j|=yj ||idd j4j6j8dk7xr3j duxs#j j6j8dk7} | rdj;j<j?ddd}d| vr| }n || vr| |}t#|j|d } | j Rt%| j t&j(r.t&j*j-| j | _|j.r|j1| } t3|| | yy)NrWr rrOrLc3:K|]}|jvywrC)rH)rS skip_modulerJs rrUz8HqqHfQuantizer.create_quantized_param..sJk{fkk)Js)rrFT)strictassign hqq_paramsc3&K|]}|v ywrCrR)rSkrqs rrUz8HqqHfQuantizer.create_quantized_param..s:q1 ?:rVrrMmetaweight_quant_params)rOrrrP) r rsplitrYr4r[load_state_dicttorr<hasattrrr0rqr\allr)rrr/rTensorrE Parameterr'_patch_layer_for_multigpusetattrrrtypejoinrHsplit)rr7rjrfrkr5 tensor_name module_name parent_modulenoderOrL hqq_layermodule_is_ready module_tagmodule_quant_configrqrJs @@rcreate_quantized_paramz%HqqHfQuantizer.create_quantized_params35*E  ''Q/2 25+F t||77G ||77G  J\J J  " "knnMnTU^clp #     4."-d"3 OOK ( / /k0J K5J:DMM::*@TX^XcXcXk%!%!%"&**("  ))*5>>-*Y^^U\\2Z%*XX%7%7 %GIN'' $ > >y II tY7OOK0&   [9%PTU!--..33v= KK4  D6;;#5#5#:#:f#D  &++"3"3C"8"=>J$ 4&2#|+&2:&>#!0"jj$ I~~)j.V!&!3!3INN!C ## ::9E M4 3+ rc&dfd_S)Nctj|j|j|j j }|j ||j z }|SrC)rmatmulryr dequantizetr)rxouts rforward_with_devicezEHqqHfQuantizer._patch_layer_for_multigpu..forward_with_devicesL,,qttDKK0$//2C2E2E2GHCyy$tyy Jrc|SrCrR)rrrs rz:HqqHfQuantizer._patch_layer_for_multigpu..s&9)Q&Gr)forward)rrrs `@rr~z(HqqHfQuantizer._patch_layer_for_multigpus  H rc 2t||j}y)N)r4)rr4rr7r5s r$_process_model_before_weight_loadingz3HqqHfQuantizer._process_model_before_weight_loadings'u$BZBZ[rc >d|_|j|_|SNT)is_hqq_quantizedis_serializableis_hqq_serializablers r#_process_model_after_weight_loadingz2HqqHfQuantizer._process_model_after_weight_loading s !%$($8$8$:! rcyrrR)rsafe_serializations rrzHqqHfQuantizer.is_serializablesrcyrrRrs r is_trainablezHqqHfQuantizer.is_trainablesr)r7r rC)__name__ __module__ __qualname____doc__use_keep_in_fp32_modules requires_parameters_quantizationrequires_calibrationrequired_packagesr&r]strr>reboolrirr~rrrpropertyr __classcell__)r6s@rrr.s %'+$  ID & 6:3i IL c 9&97;Cy9OSTWy9 c9v3.?3S3_c3 P4 P4$P4 P4 & P4d\ \ drr) collectionsrtypingr integrationsrutilsrrr baser quantizers_utilsr modeling_utilsr rhqq.core.quantizerrr get_loggerrr-rrRrrrsx$ 1AA20+ LLI   H %f[fr