L i.-,ddlmZddlmZddlZddlmZddlmZddl m Z m Z m Z m Z ddlmZddlmZdd lmZmZmZmZej.eZGd d ej4ZeGd d ZeGddZeGddZy))partial)OptionalN)Cache)BaseModelOutputWithPastQuestionAnsweringModelOutput SequenceClassifierOutputWithPastTokenClassifierOutput) AutoModel)Unpack)TransformersKwargsauto_docstringcan_return_tupleloggingc&eZdZdZdZfdZxZS)GradientCheckpointingLayeraBase class for layers with gradient checkpointing. This class enables gradient checkpointing functionality for a layer. By default, gradient checkpointing is disabled (`gradient_checkpointing = False`). When `model.set_gradient_checkpointing()` is called, gradient checkpointing is enabled by setting `gradient_checkpointing = True` and assigning a checkpointing function to `_gradient_checkpointing_func`. Important: When using gradient checkpointing with `use_reentrant=True`, inputs that require gradients (e.g. hidden states) must be passed as positional arguments (`*args`) rather than keyword arguments to properly propagate gradients. Example: ```python >>> # Correct - hidden_states passed as positional arg >>> out = self.layer(hidden_states, attention_mask=attention_mask) >>> # Incorrect - hidden_states passed as keyword arg >>> out = self.layer(hidden_states=hidden_states, attention_mask=attention_mask) ``` Fc|jr|jrd}|jj}d|d}d|vr|dr d|d<|dz }d}d|vr|d d|d<|dz }d}d |vr|d  d|d <|d z }d}d |vr|d  d|d <|d z }d}|r)|j d dz}t j ||jtt|(fi|g|St|(|i|S)NFz7Caching is incompatible with gradient checkpointing in z . Setting use_cachez `use_cache=False`,Tpast_key_valuez `past_key_value=None`,past_key_valuesz `past_key_values=None`, layer_pastz `layer_past=None`,,.) gradient_checkpointingtraining __class____name__rstriplogger warning_once_gradient_checkpointing_funcrsuper__call__)selfargskwargsdo_warn layer_namemessagers b/mnt/ssd/data/python-lab/Trading/venv/lib/python3.12/site-packages/transformers/modeling_layers.pyr#z#GradientCheckpointingLayer.__call__<sM  & &4==G00JOPZ|[deGf$ )<&+{#00 6)f5E.F.R+/'(44 F*v6G/H/T,0()55v%&*>*J'+|$00!..-3##G,4444WUW=M5XQW5X`[_` `w000)r __module__ __qualname____doc__rr# __classcell__rs@r*rr#s,#"1"1r+rceZdZdZfdZee d deejdeejdeejdee deejdeejd ee d eed efd ZxZS) GenericForSequenceClassificationmodelct|||j|_t||jt j |tj|j|jd|_ |jy)NF)bias) r"__init__ num_labelssetattrbase_model_prefixr from_confignnLinear hidden_sizescore post_initr$configrs r*r6z)GenericForSequenceClassification.__init__esd   ++d,,i.C.CF.KLYYv114??O  r+ input_idsattention_mask position_idsr inputs_embedslabelsrr&returnc t||j|f|||||d|} | j} |j| } ||jd} n|jd} |j j | dk7r td|j j d} n|||j j k7j| jtj}tj|jd| jtj}||zjd} n.d} tj|j j"d| tj| | j| f}d}||j%| |||j }t'||| j(| j*| j, S) NrCrDrrErrrz=Cannot handle batch sizes > 1 if no padding token is defined.)devicedtypez will not detect padding tokens in `inputs_embeds`. Results may be unexpected if using padding tokens in conjunction with `inputs_embeds.`)rK)logitsrF pooled_logitsrA)lossrMr hidden_states attentions)getattrr9last_hidden_stater>shaperA pad_token_id ValueErrortorKtorchint32arangeargmaxrr rr loss_functionr rrPrQ)r$rBrCrDrrErFrr&transformer_outputsrPrM batch_sizelast_non_pad_token non_pad_mask token_indicesrNrOs r*forwardz(GenericForSequenceClassification.forwardos8]wtTE[E[7\ 8 )%+' 8 8 ,== M*  "+J&,,Q/J ;; # # + a\] ] ;; # # +!#   "%)A)AAEEfmmUZU`U`aL!LL)!F!Fr!J !#    >>**+,ZZ  u||Jv}}MOaab   %%VFR_hlhshs%tD/ /??-;;*55   r+NNNNNNN)rr,r-r9r6rrrrX LongTensorTensorr FloatTensorboolr r r rbr/r0s@r*r2r2as151537+/59-1$(8 E,,-8 !.8 u//0 8 "% 8   1 12 8 ))*8 D>8 +,8  *8 8 r+r2c&eZdZdZfdZdZdZee dde e jde e jde e jde e d e e jd e e jd e e jd eed efdZxZS)GenericForQuestionAnsweringr3ct||t||jt j |t j|jd|_ |jy)N) r"r6r8r9r r:r;r<r= qa_outputsr?r@s r*r6z$GenericForQuestionAnswering.__init__sQ  d,,i.C.CF.KL))F$6$6: r+cBt||jjSNrRr9 embed_tokens)r$s r*get_input_embeddingsz0GenericForQuestionAnswering.get_input_embeddingsstT334AAAr+c:|t||j_yrnro)r$values r*set_input_embeddingsz0GenericForQuestionAnswering.set_input_embeddingss=Bd,,-:r+rBrCrDrrEstart_positions end_positionsr&rGc t||j|f||||d|} | j} |j| } | j dd\} } | j dj } | j dj } d}|||j| | ||fi|}t|| | | j| jS)N)rCrDrrErrJ)dim)rO start_logits end_logitsrPrQ) rRr9rSrlsplitsqueeze contiguousr\rrPrQ)r$rBrCrDrrErurvr&outputssequence_outputrMryrzrOs r*rbz#GenericForQuestionAnswering.forwards,Q749O9O+P , )%+' ,  , "331#)<<r<#: j#++B/::< ''+668   &=+D%4%%lJQ^ibhiD+%!!//))   r+rc)rr,r-r9r6rqrtrrrrXrdrerrfr r rrbr/r0s@r*ririsBC151537+/596:48% E,,-% !.% u//0 % "% %   1 12 % "%"2"23%   0 01% +,%  &% % r+riceZdZdZfdZee d deejdeejdeejdee deejdeejd ee d eed efd ZxZS)GenericForTokenClassificationr3ct|||j|_t||jt j |t|dd |j}nt|dd |j}nd}tj||_ tj|j|j|_|j!y)Nclassifier_dropouthidden_dropoutg?)r"r6r7r8r9r r:rRrrr;Dropoutdropoutr<r=r>r?)r$rArrs r*r6z&GenericForTokenClassification.__init__s   ++d,,i.C.CF.KL 6/ 6 B!'!:!:  V-t 4 @!'!6!6 !$ zz"45 YYv1163D3DE  r+rBrCrDrrErFrr&rGc ,t||j|f|||||d|} | j} |j| } |j | } d} ||j | ||j } t| | | j| jS)NrI)rOrMrPrQ) rRr9rSrr>r\rAr rPrQ) r$rBrCrDrrErFrr&r~rrMrOs r*rbz%GenericForTokenClassification.forwards,Q749O9O+P , )%+' , , "33,,7O,  %%ffdkkBD$!//))   r+rc)rr,r-r9r6rrrrXrdrerrfrgr r r rbr/r0s@r*rrs"151537+/59-1$(! E,,-! !.! u//0 ! "% !   1 12 ! ))*! D>! +,!  ! ! r+r) functoolsrtypingrrXtorch.nnr; cache_utilsrmodeling_outputsrrr r models.autor processing_utilsr utilsr rrr get_loggerrrModulerr2rirr+r*rs  #$PP   H %;1;1|G G G T9 9 9 x7 7 7 r+