L iddlZddlmZmZmZddlmZddlmZddl m Z erddl m Z dd l mZmZmZmZdd lmZerddlZej*eZGd d eZy) N) TYPE_CHECKINGOptionalUnion)version) HfQuantizer)get_module_from_name)PreTrainedModel)is_accelerate_availableis_optimum_quanto_availableis_torch_availablelogging) QuantoConfigc"eZdZdZddgZdZdZdeffd ZdZ d Z d Z d!d Z d e eded e efdZddded efdZdeeeeeffd eeeeefffdZdddddeddfdZd"dZ d#dddee efdZdZed efdZd#d ZxZS)$QuantoHfQuantizerz* Quantizer for the quanto library quanto accelerateTFquantization_configc Ft||fi||jyN)super__init__ post_init)selfrkwargs __class__s n/mnt/ssd/data/python-lab/Trading/venv/lib/python3.12/site-packages/transformers/quantizers/quantizer_quanto.pyrzQuantoHfQuantizer.__init__2s  ,77 c`|jj|js tdyy)z Safety checker NzWe don't support quantizing the activations with transformers library.Use quanto library for more complex use cases such as activations quantization, calibration and quantization aware training.)r activations pre_quantized ValueErrorrs rrzQuantoHfQuantizer.post_init6s;  # # / / ;DDVDVO EW ;rcXts tdts tdy)NzhLoading an optimum-quanto quantized model requires optimum-quanto library (`pip install optimum-quanto`)z`Loading an optimum-quanto quantized model requires accelerate library (`pip install accelerate`))r ImportErrorr )rargsrs rvalidate_environmentz&QuantoHfQuantizer.validate_environment@s5*,z '(r )rc<|ddi}tjd|S)NcpuzThe device_map was not initialized. Setting device_map to {'':'cpu'}. If you want to use the model for inference, please set device_map ='auto')loggerinfo)r device_maps rupdate_device_mapz#QuantoHfQuantizer.update_device_mapJs+  eJ KK\  rreturncT|%tjdtj}|S)NzPYou did not specify `dtype` in `from_pretrained`. Setting it to `torch.float32`.)r,r-torchfloat32)rdtypes r update_dtypezQuantoHfQuantizer.update_dtypeTs" = KKj kMME r missing_keysprefixc8trddlm}g}|jD]\\}}t |s|D]E}||vs ||d|vs|j dr#|j dr5|j |G^|D cgc] } | |vs|  c} Scc} w)Nr QModuleMixin.z.weightz.bias)r optimum.quantor: named_modules isinstanceendswithappend) rmodelr6r7r:not_missing_keysnamemodulemissingks rupdate_missing_keysz%QuantoHfQuantizer.update_missing_keysZs & ( 3!//1 9LD&&,/+9GDvhay4I,I ' 0 0 ; ' 0 0 9(//8 9 9(Ea14D+DEEEs  BBrAr param_namec |trddlm}t||\}}t |rd|vr |j Sy)Nrr9weightF)r r<r:r r>frozen)rrArHrr:rD tensor_names rparam_needs_quantizationz*QuantoHfQuantizer.param_needs_quantizationjs= & ( 325*E fl +K0G}}$ $r max_memoryc^|jDcic] \}}||dz }}}|Scc}}w)Ng?)items)rrNkeyvals radjust_max_memoryz#QuantoHfQuantizer.adjust_max_memoryvs66@6F6F6HI(#sc3:oI IJs) param_valuez torch.Tensor target_devicez torch.devicec ddlm}||||j|t||\}}|j d|j _y)Nr )_load_parameter_into_modelF)modeling_utilsrWtor freezerJ requires_grad) rrArTrHrUrrWrD_s rcreate_quantized_paramz(QuantoHfQuantizer.create_quantized_paramzsB @"5*knn]6ST( ;  &+ #rcPtjtjjdtjdkDrTddlm}t j|j|j|jd}||jj}|Std)Nrz0.27.0r) CustomDtype)int8float8int4int2zYou are using `device_map='auto'` on an optimum-quanto quantized model. To automatically compute the appropriate device map, you should upgrade your `accelerate` library,`pip install --upgrade accelerate` or install it from source.)rparse importlibmetadataaccelerate.utilsr_r2r`FP8INT4INT2rweightsr#)r target_dtyper_mappings radjust_target_dtypez%QuantoHfQuantizer.adjust_target_dtypes ==++33LA BW]]S[E\ \ 4 %//#((#(( G #4#;#;#C#CDL P rkeep_in_fp32_modulesc ddlm}|j||jj||_|||j|j\}}|j|j _y)Nr )replace_with_quanto_layers)modules_to_not_convertr) integrationsrqget_modules_to_not_convertrrrconfig)rrArorrqr\s r$_process_model_before_weight_loadingz6QuantoHfQuantizer._process_model_before_weight_loadingsf >&*&E&E 4++BBDX' #. $*E*E[_[s[s q,0+C+C (rc |Sr)rrArs r#_process_model_after_weight_loadingz5QuantoHfQuantizer._process_model_after_weight_loadings rcy)NTrxr$s r is_trainablezQuantoHfQuantizer.is_trainablesrcy)NFrx)rsafe_serializations ris_serializablez!QuantoHfQuantizer.is_serializablesr)r4 torch.dtyper0r)rlrr0rr)__name__ __module__ __qualname____doc__required_packages requires_parameters_quantizationrequires_calibrationrrrr(r/r5liststrrGboolrMdictrintrSr]rnrrvrypropertyr{r~ __classcell__)rs@rrr)s@"<0'+$ L FtCyF#FRVWZR[F .? S _c DeCHo1E,F4PSUZ[^`c[cUdPdKe ,  ,$ , , & ,(UY D& D>FtCy>Q Ddrr)retypingrrr packagingrbaserquantizers_utilsr rXr utilsr r rrutils.quantization_configrr2 get_loggerrr,rrxrrrs[1120 5   H %I Ir