L i#yddlZddlZddlZddlZddlZddlZddlmcmcm Z ddl mZddl m Z ddlmZddlmZmZmZmZmZmZddlmZmZmZmZmZmZmZmZddl m!Z!m"Z"ddl#m$Z$dd l%m&Z&m'Z'm(Z(gd Z)eZ*ejVejXjVejZejXjZiejXjVejjVejXjZejjZid Z.d Z/ d!d Z0d"dZ1dZ2dZ3d#dZ4 d$dZ5dZ6dZ7ejpe& d%dZ9dZ:dZ;ejpe&d&dZ<ejpe&dejzddfdZ>ejpe&d&dZ?ejpe&d#dZ@ejpe& d'dZA d(dZB d#dZCd)d ZDy)*N) _FusedModule)_is_activation_post_process)_activation_is_memoryless_add_module_to_qconfig_obs_ctrdefault_dynamic_qconfigfloat16_dynamic_qconfig!float_qparams_weight_only_qconfig&float_qparams_weight_only_qconfig_4bit)_get_special_act_post_process_has_special_act_post_process)get_default_dynamic_quant_module_mappingsget_default_qat_module_mappings$get_default_qconfig_propagation_list(get_default_static_quant_module_mappings2get_default_static_quant_reference_module_mappingsno_observer_set) DeQuantStub QuantWrapper)type_before_parametrizations)DEPRECATION_WARNINGget_qparam_dict)has_no_children_ignoring_parametrizations) get_default_custom_config_dictpropagate_qconfig_add_quant_dequantpreparequantizequantize_dynamic prepare_qat quantize_qatconvert swap_module)%float_to_observed_custom_module_class)observed_to_quantized_custom_module_classctS)z'Defines the default custom config dict.)_DEFAULT_CUSTOM_CONFIG_DICTd/mnt/ssd/data/python-lab/Trading/venv/lib/python3.12/site-packages/torch/ao/quantization/quantize.pyrrGs &&r)c|jt||}|j||}t|d|}tjj j j||t||}||_|jD]T\}}|r|dz|zn|} |3||jdgvr)t||jdgvrGt|||| Vy)aThis is a helper function for `propagate_qconfig_` Args: module: input module qconfig_dict: dictionary that maps from name of submodule to quantization configuration qconfig_parent: quantization config of parent module, we will fallback to this config when there is no specified config for current module prefix: corresponding prefix of the current module, used as key in qconfig_dict prepare_custom_config_dict: dictionary for custom handling of modules see docs for :func:`~torch.ao.quantization.prepare_fx` Return: None, module is modified inplace with qconfig attached qconfig.Nnon_traceable_module_namenon_traceable_module_class) getrgetattrtorchao quantizationr,_assert_valid_qconfigrnamed_childrentype_propagate_qconfig_helper) module qconfig_dictqconfig_parentprefixprepare_custom_config_dictmodule_qconfigqconfig_with_device_checknamechild module_prefixs r*r8r8Ls2"%%$V,nN"%%fn=NVY?N HH!!77O >~v V.FN,,.  e/5 t+4 % - .223NPRS SE{)--.JBOP &|%>   r)c0|i}|i}t|||y)aPropagate qconfig through the module hierarchy and assign `qconfig` attribute on each leaf module Args: module: input module qconfig_dict: dictionary that maps from name or type of submodule to quantization configuration, qconfig applies to all submodules of a given module unless qconfig for the submodules are specified (when the submodule already has qconfig attribute) prepare_custom_config_dict: dictionary for custom handling of modules see docs for :func:`~torch.ao.quantization.prepare_fx` Return: None, module is modified inplace with qconfig attached N)r=)r8)r9r:r=s r*rr}s+  !)%'" 9Sr)c$|j|S)z.Forward hook that calls observer on the outputactivation_post_process)selfinputoutputs r*_observer_forward_hookrJs  ' ' //r)c*|j|dS)z2Forward pre hook that calls observer on the outputrrE)rGrHs r*_observer_forward_pre_hookrLs  ' 'a 11r)Fct|dsJd|r|jtdy|jtdy)NrFzGExpect activation_post_process attribute already attached to the moduleT)prepend)hasattrregister_forward_pre_hookrLregister_forward_hookrJ)r9pre_hooks r*&_register_activation_post_process_hookrSsJ 64 5Q 5(()CT(R$$%;T$Jr)c | t}|i}Gt|}t|dks Jd|t|dkDrtt |ndd d d d  fd }|j D]t\}}t |tjfvr$tt |tjtjfrB |s`t|dsJd t |d  |j|_t!|t"r |s|||t ||vr |s||t%|rt'|} |||  |rbt ||vrU|t |} | j)|} t+||| t| t-t/r\|| ft1||||wt3|r9t!|t4jj6st ||vr||t|d r.get_activation_post_processs=(/    )+    MM& !r)c:t|dxr|jduS)Nr,rOr,)ms r*needs_observationz)_add_observer_..needs_observationsq)$>$)>>r)c|rVt|tsE|jd|j|t |t |jyyy)zmAdds an activation post process module and register a pre or post hook that calls the module rFrRN) isinstancer add_moduler,rSr)r^rZrYr[r_s r*insert_activation_post_processz6_add_observer_..insert_activation_post_processs[ Q  1k(B LL)+IIv'?  35aii@ )C r)rFzfunctional class z- has no pre-defined `activation_post_process`weight_fake_quantrV)r_get_unique_devices_lennextiterr6rnnDropout issubclassnnqFloatFunctional QFunctionalrOr,rFrbrr r from_floatsetattrtupler_add_observer_rr2 Sequential)r9qconfig_propagation_listnon_leaf_module_listrYcustom_module_class_mappingdevicesrdr@rArZobserved_classobserved_childr[r_s ` @@r*rsrssz, '#G#I "*&(#~&v.7|q  `ah`i j  ),G q(8d7m$d?&,,./ e ' .2::, >   ( /#2E2Es1W !'u&?@'(DU(K'LLyz@1LMM61-| , '.u5 ,,U37KK '.u5 *5 1'DU'K $ *52J K e $,U37RR8,U3N,66u=N FD. 1neO4E.FG.~> ($+  S/h 2&96588#6#67 ( 04L L&v. +,6588#6#67 ( 04L L&v. M8 -r)c |jDchc](}|jjdk7s|j*c}|jDchc](}|jjdk7s|j*c}zScc}wcc}w)Nmeta) parametersrYr7buffers)r9ps r*rfrf3sn$//1 MQXX]]f5LAHH M ..*Qahhmmv.EQ  MQsBBB 3B ct|r#t|dr|jr t|S|j D]\}}t ||j |<|S)a{Wrap the leaf child module in QuantWrapper if it has a valid qconfig Note that this function will modify the children of module inplace and it can return a new module which wraps the input module as well. Args: module: input module with qconfig attributes for all the leaf modules that we want to quantize Return: Either the inplace modified module with submodules wrapped in `QuantWrapper` based on qconfig or a new `QuantWrapper` module which wraps the input module, the latter case only happens when the input module is a leaf module and we want to quantize it. r,)rrOr,rr6r_modules)r9r@rAs r*rr9s\ 2&9 FI & NNF##,,.9 e 1% 89 Mr)cltjjd| t}|j di}|st j |}|}| t}t|dtd|jDstjdt|||||S)afPrepares a copy of the model for quantization calibration or quantization-aware training. Quantization configuration should be assigned preemptively to individual submodules in `.qconfig` attribute. The model will be attached with observer or fake quant modules, and qconfig will be propagated. Args: `model`: input model to be modified in-place `inplace`: carry out model transformations in-place, the original module is mutated `allow_list`: list of quantizable modules `observer_non_leaf_module_list`: list of non-leaf modules we want to add observer `prepare_custom_config_dict`: customization configuration dictionary for prepare function .. code-block:: python # Example of prepare_custom_config_dict: prepare_custom_config_dict = { # user will manually define the corresponding observed # module class which has a from_float class method that converts # float custom module to observed custom module "float_to_observed_custom_module_class": {CustomModule: ObservedCustomModule} } z!quantization_api.quantize.prepareNr$r:c3PK|]}t|dxr |j yw)r,Nr]).0r^s r* zprepare..s#Lqwq)$22Ls$&zNone of the submodule got qconfig applied. Make sure you passed correct configuration through `qconfig_dict` or by assigning the `.qconfig` attribute directly on submodules)rw)r2_C_log_api_usage_oncerr0copydeepcopyrranymoduleswarningswarnrs)modelinplace allow_listobserver_non_leaf_module_listr=rwrus r*rrTsD HH  !DE!)%C%E""<"@"@/#  e$ *#G#I u40 LEMMOL L  K   %$?  Lr)ctdr!tjr tddfd }|d|dy)NrFFc|r jn j}|rtnt}t }|j D]\}}||us |j ||D]}|j|yrV)_forward_pre_hooks_forward_hooksrLrJsetitemsaddpop)rRhook_map observer_hookhandle_ids_to_remove handle_idhook_fnr9s r* remove_hooksz5_remove_activation_post_process..remove_hookss~086,,f>S>S*2 &8N  #u"*.."2 4 Iw-'$((3 4. $I LL # $r)TraF)rOrrFdelattr)r9rs` r*_remove_activation_post_processrsEv016Q&&7 12 $$% r)cv|jD] }t|t|dr|`t |y)zClean up the qconfig left in the module so that new qconfig can be propagated. Args: module: module to be cleaned up r,N)children_remove_qconfigrOr,r)r9rAs r*rrs;"vy! N#F+r)ctjjd| t}|st j |}|j t|d||g|t||d|S)aQuantize the input float model with post training static quantization. First it will prepare the model for calibration, then it calls `run_fn` which will run the calibration step, after that we will convert the model to a quantized model. Args: model: input float model run_fn: a calibration function for calibrating the prepared model run_args: positional arguments for `run_fn` inplace: carry out model transformations in-place, the original module is mutated mapping: correspondence between original module types and quantized counterparts Return: Quantized model. z"quantization_api.quantize.quantizeTr) r2rrrrrevalrr")rrun_fnrun_argsmappingrs r*rrsf$ HH  !EF:<  e$ JJL E4  58 E7D) Lr)c ptjjd||tjk(r|tj t tjt tjt tjt tjt tjt i}n|tjk(r|tj ttjttjttjttjttjti}n(|tjk(r+tjt tj"t i}n|tj$k(rtjt&i}nt)d|dt+|t,r|tjurt }n`|tjurt}nG|tjurt }n.|tj$urt&}nt/dt1|t3t5|t7j8|}| t;}|st=j>|}|jAtC||tE||d|S)avConverts a float model to dynamic (i.e. weights-only) quantized model. Replaces specified modules with dynamic weight-only quantized versions and output the quantized model. For simplest usage provide `dtype` argument that can be float16 or qint8. Weight-only quantization by default is performed for layers with large weights size - i.e. Linear and RNN variants. Fine grained control is possible with `qconfig` and `mapping` that act similarly to `quantize()`. If `qconfig` is provided, the `dtype` argument is ignored. Args: model: input model qconfig_spec: Either: - A dictionary that maps from name or type of submodule to quantization configuration, qconfig applies to all submodules of a given module unless qconfig for the submodules are specified (when the submodule already has qconfig attribute). Entries in the dictionary need to be QConfig instances. - A set of types and/or submodule names to apply dynamic quantization to, in which case the `dtype` argument is used to specify the bit-width inplace: carry out model transformations in-place, the original module is mutated mapping: maps type of a submodule to a type of corresponding dynamically quantized version with which the submodule needs to be replaced z*quantization_api.quantize.quantize_dynamicz5Don't know how to quantize with default settings for z. Provide full qconfig pleasez.Unknown dtype specified for quantize_dynamic: Tr)#r2rrqint8rjLinearrLSTMGRULSTMCellRNNCellGRUCellfloat16rquint8 EmbeddingBagr Embeddingquint4x2r ValueErrorrbr RuntimeErrorstrdictzip itertoolsrepeatr rrrrr")r qconfig_specdtyperrdefault_qconfigs r*rrs@ HH  !MN EKK  20/ 4 3 3 Lemm # 20/ 4 3 3 Lell "!B ?Lenn $!GLGwNkl  L# & EKK 5O emm #5O ell "?O enn $DO@#e* C i.>.>.OPQ ;=  e$ JJLul+ E7D) Lr)c2tjjd|jsJd| t }|st j |}t|dt||ddt|t|jd|S) a Prepares a copy of the model for quantization calibration or quantization-aware training and converts it to quantized version. Quantization configuration should be assigned preemptively to individual submodules in `.qconfig` attribute. Args: model: input model to be modified in-place mapping: dictionary that maps float modules to quantized modules to be replaced. inplace: carry out model transformations in-place, the original module is mutated z%quantization_api.quantize.prepare_qatz1prepare_qat only works on models in training modeNrTF)rrremove_qconfig)rr) r2rrtrainingrrrrr"rrvalues)rrrs r*r r :s}  HH  !HI >>NNN>13  e$u40 E7DG EW^^5E1FPTU Lr)ctjjd|stj|}|j t |d||g|t|d|S)agDo quantization aware training and output a quantized model Args: model: input model run_fn: a function for evaluating the prepared model, can be a function that simply runs the prepared model or a training loop run_args: positional arguments for `run_fn` Return: Quantized model. z&quantization_api.quantize.quantize_qatTr)r2rrrrtrainr r")rrrrs r*r!r!XsW HH  !IJ  e$ KKMt$ 58 E4 Lr)ctjjd|stj|}t ||d||||r t ||S)aConverts submodules in input module to a different module according to `mapping` by calling `from_float` method on the target module class. And remove qconfig at the end if remove_qconfig is set to True. Args: `module`: prepared and calibrated module `mapping`: a dictionary that maps from source module type to target module type, can be overwritten to allow swapping user defined Modules `inplace`: carry out model transformations in-place, the original module is mutated `convert_custom_config_dict`: custom configuration dictionary for convert function `use_precomputed_fake_quant`: a flag to enable use of precomputed fake quant .. code-block:: python # Example of convert_custom_config_dict: convert_custom_config_dict = { # user will manually define the corresponding quantized # module class which has a from_observed class method that converts # observed custom module to quantized custom module "observed_to_quantized_custom_module_class": { ObservedCustomModule: QuantizedCustomModule } } z!quantization_api.quantize.convertT)r is_referenceconvert_custom_config_dictuse_precomputed_fake_quant)r2rrrr_convertr)r9rrrrrrs r*r"r"psUJ HH  !DE v& !#=#=  Mr)c ||r tn t}| t}|jdi}|st j |}i}|j D]D\}} t| tst| |vrt| |d|||t| |||||<F|jD]\} } | |j| <|S)aoConverts submodules in input module to a different module according to `mapping` by calling `from_float` method on the target module class Args: module: input module mapping: a dictionary that maps from source module type to target module type, can be overwritten to allow swapping user defined Modules inplace: carry out model transformations in-place, the original module is mutated is_reference: a flag to enable quantized reference module use_precomputed_fake_quant: a flag to enable use of precomputed fake quant r%Tr)rrrr0rrr6rbrrrr#rr) r9rrrrrrwreassignr@modkeyvalues r*rrs, ? @9;  ")%C%E""<"@"@3R# v&H**, c3 -,S19TT *+E  % 57Q  &nn&% U$% Mr)c.|}t|dr|jd}t||vr |t|j|}d}nt||vr|t|}t|drd|jrX|jJ|jj }||j t |}|j||}nRtj|j} d| jvr|j||}n|j|}d}|r|jjD]} |j| |jjD]} | tus |j!| t#|} t%| dks/t%| d k(rt'j(d | vs Jd | t%| d kDrt+t-| nd} | r|j/| |S) a Swaps the module if it has a quantized counterpart and it has an `observer` attached. Args: mod: input module mapping: a dictionary that maps from nn module to nnq module Return: The corresponding quantized module of `mod` r,NFT _IS_REFERENCErrrr|zOswap_module only works with cpu or single-device CUDA modules, but got devices r)rOr,r from_observedrweightrrpinspect signaturer}rrrPrrJrQrfrgr2rYrhrirX)rrrwrnew_modswappedqmodweight_post_processweight_qparamssig pre_hook_fnrrxrYs r*r#r#sGsI3;;#: ' ,0K K1,S1mC  G )# .' 97<=Dt_-$2D2D{{...&)kk&8&8&:##CJJ/!01D!E//#~>''8/3>>A"oo8R.G#ooc2GG "55<<> ? 11+> ?--446 ;"8811': ; +3/Gw<1$G !ell6&:g&E bbiajk  -0L1,.get_prefix*s2v76C<7r)rFN)rOrFr6_get_observer_dict)r target_dictr<rr@rArBs r*rr!so8s-.  ' ' Jv&)BBC))+> e5; 6*T1 5+}=>r))NrN)NNr)NNNN)FNNN)NF)NFTFNF)NFFNF)r)Errrtyping_extensionsrr2torch.ao.nn.quantizedr3rj quantizedrmtorch.nntorch.ao.nn.intrinsicrtorch.ao.quantization.observerrtorch.ao.quantization.qconfigrrrrr r +torch.ao.quantization.quantization_mappingsr r r rrrrrtorch.ao.quantization.stubsrrtorch.nn.utils.parametrizerutilsrrr__all__is_activation_post_processr quantizableMultiheadAttentionr'rr8rrJrLrSrsrfr deprecatedrrrrrrr r!r"rr#rr(r)r*rsT  ##.F   BC 9 $$ r~~@@. R\\.. ))2<<+J+J2 ' # .b20 2 K"  $ F/R 612 "&# ?3?D!4 , 123:12EKKuW3Wt123:123.12  #$131l  #$ ;~KP;|>r)