L idZddlmZddlZddlZddlZddlZddlZddlZddl Z ddl Z ddl m Z ddl mZddlmZmZmZmZddlZddlZddlZddlmZdd lmZmZdd lmZdd l m!Z!dd l"m#Z#dd l$m%Z%m&Z&ddl'm(Z(m)Z)m*Z*m+Z+m,Z,ddl-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6m7Z7m8Z8m9Z9m:Z:m;Z;mZ>m?Z?m@Z@mAZAddlBmCZCmDZDe=r ddlEmFZFddlGmHZIerddlmJZJe?jeLZMdejvrdejd<n#ejddk7reMjd ddlPZQddlPmRZSejZYeeZejeZeje]e^ejfe]e^ejfejejfZ_dZ`GddZadZbGddZcGd d!ZdGd"d#ZeGd$d%ZfGd&d'ZgGd(d)ecZhGd*d+Zid,Zjd-Zkd.ZldBd/Zmd0e1fdCd1ZndDd2ZodEd3Zp dDd4ZqdEd5ZrdEd6ZsdEd7Ztd8ZuGd9d:eQjeae&e6ZwGd;deQjjZ{Gd?d@eQjjZ|dFdGdAZ}y#eTeUf$r5ddlQZQddlQmRZSeeQjjdkDreXdYwxYw)HzTF general model utils.) annotationsN)Mapping)Path) TYPE_CHECKINGAnyCallableUnion)parse)DataCollatorWithPaddingDefaultDataCollator)get_tf_activation)PretrainedConfig)custom_object_save)GenerationConfigTFGenerationMixin)convert_batch_encoding expand_1dload_attributes_from_hdf5_groupsave_attributes_to_hdf5_group shape_list)SAFE_WEIGHTS_INDEX_NAMESAFE_WEIGHTS_NAMETF2_WEIGHTS_INDEX_NAMETF2_WEIGHTS_NAMETF_WEIGHTS_NAMEWEIGHTS_INDEX_NAME WEIGHTS_NAME ModelOutputPushToHubMixin cached_file download_url find_labelshas_fileis_offline_mode is_remote_urlis_safetensors_availableis_tf_symbolic_tensorloggingrequires_backendsworking_or_temp_dir)convert_file_size_to_intget_checkpoint_shard_files) safe_open) save_file)PreTrainedTokenizerBaseTF_USE_LEGACY_KERAS1zTransformers is only compatible with Keras 2, but you have explicitly set `TF_USE_LEGACY_KERAS` to `0`. This may result in unexpected behaviour or errors if Keras 3 objects are passed to Transformers models.)backendzYour currently installed version of Keras is Keras 3, but this is not yet supported in Transformers. Please install the backwards-compatible tf-keras package with `pip install tf-keras`.c|jjdkr|Sttd|jj}t j ||S)Nr axis)shaperanklistrangetf reduce_mean)y_truey_predreduction_axess d/mnt/ssd/data/python-lab/Trading/venv/lib/python3.12/site-packages/transformers/modeling_tf_utils.py dummy_lossrBvsE ||A eAv||'8'89:~~f>::ceZdZdZdddZy)TFModelUtilsMixinzC A few utilities for `keras.Model`, to be used as a mixin. cp|r%ttd|jDS|jS)a9 Get the number of (optionally, trainable) parameters in the model. Args: only_trainable (`bool`, *optional*, defaults to `False`): Whether or not to return only the number of trainable parameters Returns: `int`: The number of parameters. c3vK|]1}tj|jj3ywN)npprodr8as_list).0ws rA z3TFModelUtilsMixin.num_parameters..s$X!277177??#45Xs79)intsumtrainable_variables count_params)selfonly_trainables rAnum_parametersz TFModelUtilsMixin.num_parameterss2 sXt?W?WXXY Y$$& &rCN)F)rTboolreturnrO)__name__ __module__ __qualname____doc__rUrCrArErE~s 'rCrEcjtdd tdtjfd}|_t ds t dt jdr fd}|_d _t tjd r#tjjS) a Decorate a Keras Layer class to support Keras serialization. This is done by: 1. Adding a `transformers_config` dict to the Keras config dictionary in `get_config` (called by Keras at serialization time. 2. Wrapping `__init__` to accept that `transformers_config` dict (passed by Keras at deserialization time) and convert it to a config object for the actual layer initializer. 3. Registering the class as a custom object in Keras (if the Tensorflow version supports this), so that it does not need to be supplied in `custom_objects` in the call to `keras.models.load_model`. Args: cls (a `keras.layers.Layers subclass`): Typically a `TF.MainLayer` class in this project, in general must accept a `config` argument to its initializer. Returns: The same class object, with modifications for Keras deserialization. config_classNz2Must set `config_class` to use @keras_serializablec`|rt|dtr|dn|jdd}t|trj |}||g|i|nDt|tr)t |dkDr |g|i|n||g|i|n t d||_||_y)Nrconfigz?Must pass either `config` (PretrainedConfig) or `config` (dict)) isinstancerpopdict from_dictlen TypeError_config_kwargs)rSargskwargsr`r^ initializers rA wrapped_initz(keras_serializable..wrapped_inits ZQ9I%JaPVPZPZ[ceiPj fd #!++F3F f 6t 6v 6  0 14y1}D24262D&:4:6:]^ ^  rC get_configz=Only use @keras_serializable on keras.layers.Layer subclasses _is_defaultct|}|jj|d<|j |j |S)Nr`)superrmrgto_dictupdaterh)rScfgclss rArmz&keras_serializable..get_configs=T-/C LL002CM JJt|| $JrCTregister_keras_serializable) __init__getattrAttributeError functoolswrapshasattrrfrm_keras_serializablekerasutilsru)rtrlrmr^rks` @@rAkeras_serializablers*,,K35LQRR__[!"" CL 3 %WXXs~~}-  $"Cu{{9:kk557< JrCceZdZdZdZy)TFCausalLanguageModelingLossz Loss function suitable for causal language modeling (CLM), that is, the task of guessing the next token. Any label of -100 will be ignored (along with the corresponding logits) in the loss computation. c tjjdtjjj}|j j rtjtj|dd}tjtj|dt|df|}tjtj|d|}|||S|tjj||}tj|dk7|j}||z}tj |tj |z } tj| dS) NT from_logits reductionrr4dtyper )r}lossesSparseCategoricalCrossentropy ReductionNONEr`tf_legacy_lossr< not_equalreshape boolean_maskrnnrelucastr reduce_sum rSlabelslogitsloss_fn active_lossreduced_logits unmasked_loss loss_mask masked_lossreduced_masked_losss rAhf_compute_lossz,TFCausalLanguageModelingLoss.hf_compute_losss,,<<Y^YeYeYoYoYtYt<u ;; % %,,rzz&%'@$GK__RZZZPVEWXYEZ@[-\^ijN__RZZ%> LF6>2 2  6 2F; GGFdN-2E2EF #i/  mmK82==;SSzz-t44rCNrXrYrZr[rr\rCrArrs 5rCrceZdZdZdZy)TFQuestionAnsweringLossz8 Loss function suitable for question answering. ctjjdtjjj}||d|d}||d|d}||zdz S)NTrstart_positionr end_positionr g@r}rrrr)rSrrr start_lossend_losss rArz'TFQuestionAnsweringLoss.hf_compute_losssi,,<<Y^YeYeYoYoYtYt<uV$45vayA 6.16!9=X%,,rCNrr\rCrArrs -rCrceZdZdZdZy)TFTokenClassificationLossz Loss function suitable for token classification. Any label of -100 will be ignored (along with the corresponding logits) in the loss computation. c tjjdtjjj}t j r7t jj|dk(rt jd|jjrt jj|dk(r/t jdt j|ddk7}nt j|ddk7}t jt j|dt|df|}t jt j|d|}|||S|t jj!||}t j"|dk\|j$ }||z}t j&|t j&|z } t j| d S) NTrrzSUsing `-1` to mask the loss for the token is deprecated. Please use `-100` instead.rrr4rrr)r}rrrrr<executing_eagerlymath reduce_anyprintr`rrrrrrrrrrs rArz)TFTokenClassificationLoss.hf_compute_loss s,,<<Y^YeYeYoYoYtYt<u   !ww!!&B,/no ;; % %ww!!&B,/no jj72=  jj74? __RZZZPVEWXYEZ@[-\^ijN__RZZ%> LF6>2 2  6 2F; GGFaK}/B/BC $i/  mmK82==;SSzz-t44rCNrr\rCrArrs 5rCrceZdZdZdZy)TFSequenceClassificationLossz= Loss function suitable for sequence classification. c|jjdk(s|jddk(rstjj tjj j }|jjdk(r[tj|d}nCtjjdtjj j }|||S)Nr )rrr6Tr) r8r9r}rMeanSquaredErrorrrr< expand_dimsrrSrrrs rArz,TFSequenceClassificationLoss.hf_compute_loss.s <<   !V\\!_%9ll33ell>T>T>Y>Y3ZG||  A%R8ll@@ ELL,B,B,G,GAGvv&&rCNrr\rCrArr)s  'rCrceZdZdZdZy)TFMultipleChoiceLossz1Loss function suitable for multiple choice tasks.ctjjdtjjj}|||S)NTrrrs rArz$TFMultipleChoiceLoss.hf_compute_loss?s;,,<<Y^YeYeYoYoYtYt<uvv&&rCNrr\rCrArr<s ;'rCrceZdZdZy)TFMaskedLanguageModelingLossz Loss function suitable for masked language modeling (MLM), that is, the task of guessing the masked tokens. Any label of -100 will be ignored (along with the corresponding logits) in the loss computation. N)rXrYrZr[r\rCrArrDsrCrceZdZdZdZy)TFNextSentencePredictionLossz Loss function suitable for next sentence prediction (NSP), that is, the task of guessing the next sentence. Any label of -100 will be ignored (along with the corresponding logits) in the loss computation. cftjjdtjjj}|j j rtjtj|dd}tjtj|d|}tjtj|d|}|||S|tjj||}tj|dk7|j}||z} | S)NTrrr)rr4)r>r?r)r}rrrrr`rr<rrrrrrr) rSrrrnext_sentence_active_lossnext_sentence_reduced_logitsnext_sentence_labelunmasked_ns_loss ns_loss_maskmasked_ns_losss rArz,TFNextSentencePredictionLoss.hf_compute_loss[s,,<<Y^YeYeYoYoYtYt<u ;; % %)+ RZZ5NPT(U %+-??2::fg;VXq+r ("$//"**VU2KMf"g .0LM M #"%%**V*DM=R=^F=$9djdvdvN=!f#)+#6#BF; PVXceiHj {# rCc~tjtjfd}|_|S)a Decorator that processes the inputs to a Keras layer, passing them to the layer as keyword arguments. This enables downstream use of the inputs by their variable name, even if they arrive packed as a dictionary in the first input (common case in Keras). Args: func (`callable`): The callable function of the TensorFlow model. Returns: A callable that wraps the original `func` with the behavior described above. c |jDcic] \}}|t jvs||"}}}|jDcic] \}}||vs ||}}}|jd|i|jtt j j dd|d|jjvrd}n |j}t |fi|} |fi|Scc}}wcc}}w)N kwargs_callr EncoderDecoder) itemsrc parametersrrzip__code__ co_varnames __class__rXr`input_processing) rSrirjkeyvalrfn_args_and_kwargsr`unpacked_inputsfuncoriginal_signatures rArun_call_with_unpacked_inputsz4unpack_inputs..run_call_with_unpacked_inputss17 qHC#TRdRoRoMpBpsCxq q7=||~`83T_I_c3h``!!=+">? !!$s4==+D+DQR+H$'O"PQ t~~66 6F[[F*4N;MND,O,,r`sC0C0 C6C6)inspect signatureryrz __signature__)rrrs` @rA unpack_inputsrsC!**40__T--*3E!/ ((rCc  ttj|j}t |j dd}|j ddt |j}|d}|j |d}i}tjttttt ttjf} d|dvr1tj dt"|dj d|d<d |dvr1tj d t"|dj d |d <d |dvr6d |vr2tj d t"|dj d |d <n"d |dvrd |vr|dj d |d <|r|j di|d<nKt%|ddkDr)t'dt |djd|j d|j)D]K\} } t+| | stj,| s| | || </t'dt/| d| d| dt+|tt frt1|D]x\} } t3| r1| j4j7dd}||vr| ||<9| ||| <Bt+| | s| | ||| <Yt'dt/| d| d|| dn't+|t8rd|vr.tj dt"|j d|d<d |vr.tj d t"|j d |d <t|j)D][\} } t+| | s| | || <| |vr!d|vrt:j=d| d|d?t'dt/| d| d| dn;tj,|s||||<nt'dt/|d| d|d|D]F}|t |jvs|dk7s%|j |||j>||<Hd|vrH|d8t3|dr*|dj4j7dd}|d||<n|d|d<|d=d|vr|d=i}|j)D]\}}t+|tjrE|j@tjBk(r(tjD|tjF||<et+|tjr@|j@tjBk(r#|jItjF||<|||<|}~|C|j)D cic] \} } | dvr| | }} } |jKtMdd|i||Scc} } w)a Process the input of each TensorFlow model including the booleans. In case of a list of symbolic inputs, each input has to be named accordingly to the parameters name, i.e. `input_ids = keras.Input(shape=(128,), dtype='int32', name="input_ids")` otherwise the order of the tensors will not be guaranteed during the training. Args: func (`callable`): The callable function of the TensorFlow model. config ([`PretrainedConfig`]): The config of the running model. **kwargs: The inputs of the model. Returns: Two lists, one for the missing layers, and another one for the unexpected layers. rjNrSrinputsrzeThe `inputs` argument is deprecated and will be removed in a future version, use `input_ids` instead. input_idsdecoder_cached_stateszzThe `decoder_cached_states` argument is deprecated and will be removed in a future version, use `past_key_values` instead.past_key_valuespastziThe `past` argument is deprecated and will be removed in a future version, use `past_key_values` instead.zAThe following keyword arguments are not supported by this model: .z Data of type z is not allowed only z is accepted for :rizThe parameter z( does not belongs to the parameter list z and will be ignored.)rrrrr`r\)'rcrrrrVrbr:keysr<TensorrOrtuplerIndarraywarningswarn FutureWarningre ValueErrorrra is_tensortype enumerater(namesplitrloggerwarningdefaultrint64rint32astyperrr)rr`rjr has_kwargsparameter_namesmain_input_name main_inputoutput allowed_typeskviinput tensor_namer cast_outputrr boolean_dicts rArrs/"W&&t,778IimmHd34J MM&$9>>+,O%a(OOT2J FYYc;tT2::VM6-(( s  %]377A{&"77  *  %+=$9$=$=>U$V !  &&+<+O    %+=$9$=$=f$E ! f]3 3/8Q .223DEv!::mR8x vm$ % ) .33567q:   =! q1 a '2<<?aiF1I}T!WI5J=/Yjkljmmnop p q *udm,!*- HAu$U+$jj..s3A6 /1*/F;'16F?1-.E=1U]-2q)* #DK=0Em_U'*+1. $ J ( z ! MM  #-..":F;  "j 0 MM.  )37N(OF$ %$**, uDAq!]+qyq /)fO.K$QC'OP_O``uv =a 9N}o]nopnqqr!stt u << #z'9&0F? #Z 011F}oV#$A'   E tFKKM* *tv~!::dIdO,C,CDF4LE  &> %*?v*O .--33C8;K"(.F; #).F;  6N6 8 KLLN#S c299 %#))rxx*?!wwsBHH5K  RZZ (SYY"((-B"zz"((3K "K  #F   1]] qD        M s)Wc|5|j|r$|t|d}|jdr|dd}d|vr@t|jddkDr#dj|jddd}|S)N/r model.) startswithrerjoin)r_prefixs rAstrip_model_name_and_prefixrlsxtw7CLN# ??3 8DtDJJsO 4q 8xx 3+, KrC10GBc vt|}g}g}d}d}|D]k}|jj|jjz}||z|kDr|j |g}d}|j |||z }||z }m|j |t |dk(r ||didfSi} i} t |D]p\} } |jdd| dzddt |dd} | jdd| dzddt |dd} | | | <| D]}|j}| | |<rd |i}|| d }| |fS) a? Splits a model state dictionary in sub-checkpoints so that the final size of each sub-checkpoint does not exceed a given size. The sub-checkpoints are determined by iterating through the `state_dict` in the order of its keys, so there is no optimization made to make each sub-checkpoint as close as possible to the maximum size passed. For example, if the limit is 10GB and we have weights of sizes [6GB, 6GB, 2GB, 6GB, 2GB, 2GB] they will get sharded as [6GB], [6+2GB], [6+2+2GB] and not [6+2+2GB], [6+2GB], [6GB]. If one of the model's weight is bigger that `max_shard_size`, it will end up in its own sub-checkpoint which will have a size greater than `max_shard_size`. Args: weights (`dict[str, tf.RessourceVariable]`): The list of tf.RessourceVariable of a model to save. max_shard_size (`int` or `str`, *optional*, defaults to `"10GB"`): The maximum size of each sub-checkpoint. If expressed as a string, needs to be digits followed by a unit (like `"5MB"`). rr Nz.h5-05dz-of- .safetensors total_size)metadata weight_map) r,numpysizerappendrerreplacer)weightsmax_shard_size weights_namesharded_state_dicts current_blockcurrent_block_sizeritem weight_sizershardsidxshard shard_fileweight weight_namerindexs rAtf_shard_checkpointr.vs..n=NMJ "jjl''$**//9   +n <  & &} 5M!" T"k)k!  "}- 1$1!45t;;J F 341 U!))%1S1WSMcJ]F^_bEccf1gh '' aa}D5H1I#0Nl[ #z 1F ++K&0J{ # 1 1j)H! .>s.C*D*I*"2"23"7";V>V=WX | q "xx<(Ha1QCq(HI  12B1C1E EM  ! #"%((o+Nas!H+N"O  12E1FaH HM=)) / 99)I,Os  H Hc @t}i}t}t} tj|d5} tt| d} g} | D]} | | } t j | || <|j | | |vr|j | G|j|| }|| }|Otj||jk7r+ t j|tj|}n|}| j|f dddtj |||fS#t$r@}|r7|j | |jtj|fYd}~)|d}~wwxYw#1swYoxYw#t$r} t!|5}|j#j%dr t'dtd|d|#1swYnxYwn%#t(tf$rt'd|d |d wxYwYd}~yd}~wwxYw) a Loads a shard from a sharded checkpoint file. Can be either H5 or Safetensors. Handles missing keys and unexpected keys. Args: model (`keras.models.Model`): Model in which the weights are loaded model_layer_map (`Dict`): A dictionary mapping the layer name to the index of the layer in the model. resolved_archive_file (`str`): Path to the checkpoint file from which the weights will be loaded ignore_mismatched_sizes (`bool`, *optional*, defaults to `False`): Whether to ignore the mismatched keys Returns: `keras.models.Model`: Three lists, one for the layers that were found and successfully restored (from the shard file), one for the mismatched layers, and another one for the unexpected layers. r layer_namesNversionYou seem to have cloned a repository without having git-lfs installed. Please install git-lfs and run `git lfs install` followed by `git lfs pull` in the folder you cloned.zUnable to locate the file z_ which is necessary to load this pretrained model. Make sure you have saved the model properly.z4Unable to load weights from TF checkpoint file for 'z' at 'z'. If you tried to load a TF model from a sharded checkpoint, you should try converting the model by loading it in pytorch and saving it locally. A conversion script should be released soon.)r6h5pyFilerrIasarrayr8rK int_shaper8rrrbatch_set_value ExceptionopenreadrOSErrorUnicodeDecodeError)r=rDresolved_archive_filer1rrF saved_weightsrBr@sharded_checkpoint_filesaved_h5_model_layers_nameweight_value_tuplesrEh5_layer_objectsymbolic_weightsaved_weight_valuearrayefs rAr9r9so!UMeOeOB YY,c 2' I6M),-LMdfs-t)u &"$ 9 I "9*"E,.JJ,G j)&**:6_4#'' 3&+mmOJ4O&PO)6z)B&)5;;7;M;S;SS ,(* 3Eq{{SbGc(d%7E(../GHA I' IR -.%GG!$.,#:$3$7$7)35G5M5Mq{{[jOk(l%&%-*+G,7' I' IZ  +, 668&&y1!& %45J4KLOO   #J/ FG\F]^,-.oo   sFBF)D9>F!F9 F3E=5F;E==FFF F H G.&;G!!G* &G.-H."HHHct}g}t}|D]e}t||||\} } } |jt| |j| |j| t j gtj |} |rt| dkDst|dkDrd|jj} t| dkDr,dj| Dcgc]}d|d c}}| d|dz } t|dkDr,dj|Dcgc]}d|d c}}| d|dz } t| | ||fScc}wcc}w)a This is the same as `load_tf_weights_from_safetensors` but for a sharded TF-format safetensors checkpoint. Detect missing and unexpected layers and load the TF weights from the shard file accordingly to their names and shapes. This load is performed efficiently: each checkpoint shard is loaded one by one in RAM and deleted after being loaded in the model. Args: model (`keras.models.Model`): The model in which to load the checkpoint. shard_files (`str` or `os.PathLike`): A list containing the sharded checkpoint names. ignore_mismatched_sizes`bool`, *optional`, defaults to `True`): Whether or not to ignore the mismatch between the sizes strict (`bool`, *optional*, defaults to `True`): Whether to strictly enforce that the keys in the model state dict match the keys in the sharded checkpoint. Returns: Three lists, one for the missing layers, another one for the unexpected layers, and a last one for the mismatched layers. r0rr2r3r4r5r) r6 load_tf_weights_from_safetensorsrrrr:r; intersectionrerrXrr<)r=r>r1r?rr@all_missing_keysrBr*missing_layersunexpected_layersmismatched_layersrIrJrrKrLs rA(load_tf_sharded_weights_from_safetensorsrpWsv2eOeO!  ?_  $; @ <)+< N 340101  ##%56L 3|$q(C,@1,D=eoo>V>V=WX | q "xx<(Ha1QCq(HI  12B1C1E EM  ! #"%((o+Nas!H+N"O  12E1FaH HM=)) / 99)I,Os ' E! EcV|jdrt}nt}|||||S)a Detect missing and unexpected layers and load the TF weights from the shard file accordingly to their names and shapes. Args: model (`keras.models.Model`): The model to load the weights into. resolved_archive_file (`str`): The location of the H5 file. ignore_mismatched_sizes (`bool`, *optional*, defaults to `False`): Whether or not to ignore weights with shapes that don't match between the checkpoint of the model. Returns: Three lists, one for the missing layers, another one for the unexpected layers, and a last one for the mismatched layers. rr0)endswithrjload_tf_weights_from_h5)r=r^r1r load_functions rAload_tf_weightsrus5"%%n58 /  $>U_f rCc g}tj|d5}tt|d}t |j Dchc]}|j c}|z }t ||j Dchc]}|j c}z } t} t} g} |j D]}|j |vs||j } |j|jz}i}t| dD][}dj|jddd}||dz|z}tj| |||<| j|]|D]O}|jt|jd}dj|j jdd||j jd|dzdz}n-dj|j jddd}|j|}|*|j!dr|dddz}|j|}| j||t#j$||j&k7r+ tj(|t#j$|}n|}| j-||fR dddt#j. j1t   z  j1t | | z || |fScc}wcc}w#t*$r@}|r7|j-||j&t#j$|fYd}~|d}~wwxYw#1swYxYw) NrOrP weight_namesr r z embeddings:0izweight:0)rSrTr6rr:layersrtrainable_weightsnon_trainable_weightsrrrIrUr8regetrrrVrWr8rrrrXextend)r=r^r1rror`ralayerrmrnrFsymbolic_weights_namesrbrcsymbolic_weightsr_r,rrd delimitersymbolic_weight_namererfrgs rArsrss (# .TM2I%()HI`bo)p%q"u||DeuzzDGaab!!;W\WcWc>deuzz>d!de!$!$ \\E MEzz77"9%**"E#(#:#:U=X=X#X " $C?Tb#c 5K88K$5$5c$:12$>?D*&}t3*,**_[5Q*RM$'+..t4 5(8-MO*$' c(:$; /2xx+0066s;JYG-2288=i!moNO0, 03xx8L8L8R8RSV8WXYXZ8[/\, *7):):;O)P&*16J6S6STb6c/CDS/IJ/V,-:->->?S-T*+../CD*5;;7;M;S;SS ,(* 3Eq{{SbGc(d%7E,22OU3KL[-M1E MTMn)*$58NNOPT"8;Q"QRS ,.? ??qE?e@$.,#:$5$<$<)=?Q?W?WYZYdYdetYu(v%&%-*+G,QTMTMsf)M#L M#3L =M#FM#"M#+)LM# M# M 3M M#M M M##M,c zt|d5}g}|jDcgc]}t|j|}}t |j }t t |t |z } t t |t |z } |jD]} t| j|} | |vs|j| } tj| | jk7r* tj| tj| } tj"| |  ddd  fScc}w#ttjjf$r@}|r7|j!| | jtj| fYd}~|d}~wwxYw#1swYyxYw)Nr< framework)r)r.rrrr:rr6 get_tensorrVrWr8r<rrerrorsInvalidArgumentErrorr set_value)r=r^r1rsafetensors_archiverorMrwloaded_weight_namesrmrnr+r, weight_valuergs rArjrj s (D 92=PV[VcVcdQR3AFFGLd d"#6#;#;#=>c,/#6I2JJK %8!9C old_num_tokens. A mask is also computed in order to know which weight in the embeddings should be kept or not. Example: - if new_num_tokens=5 and old_num_tokens=4 and old_embeddings=[w1,w2,w3,w4] - mask=[True,True,True,True,False] and current_weights=[w1,w2,w3,w4,-1] - if new_num_tokens=4 and old_num_tokens=5 and old_embeddings=[w1,w2,w3,w4,w5] - mask=[True,True,True,True] and current_weights=[w1,w2,w3,w4] rrconstant_valuesr TF) rr<rgreaterpadvalueconvert_to_tensorminfillslice)old_embeddingsnew_num_tokensold_num_tokensold_embedding_dim size_diffcurrent_weightsnum_tokens_to_copymasks rAinit_copy_embeddingsr-sB)3>(B%N%/I wwy!$&&  "B$8$81i.1a&9Q$Rdf !@wwr++-?,CDdKvvdB001i.1a&1IJ\ab   ((  "  !Q (  .2C!D E  wwr++^Q,?@$G   rCc 8eZdZdZdZdZdZdZdZdZ dZ dZ dZ e dd Zfd Zd Zej*ej.j0fd Zej*ej.j2fd Zej*ej.j4fdZej*ej.j6fdZej*ej.j8fdZej*ej.j:fdZedZedZ d?dZ!dZ"e#jHdZ%e d@dZ&dZ'edAdZ(dBdZ)dZ* dC dDdZ+ dEfd Z,fdZ-dZ.d Z/d!Z0 dF dGd"Z1d#Z2dHd$Z3d%Z4dHd&Z5dId'Z6dJd(Z7d)Z8dBd*Z9d>dKd+Z:d>dLd,Z;d-Zd0Z? dMd1Z@d2ZAd>dNd3ZB dOd4ZCd5ZD dP dQd6ZEeddddddd7dd8 dRd9ZF dS dTd:ZGedUd;ZHxZIS)VTFPreTrainedModela Base class for all TF models. [`TFPreTrainedModel`] takes care of storing the configuration of the models and handles methods for loading, downloading and saving models as well as a few methods common to all models to: - resize the input embeddings, - prune heads in the self-attention heads. Class attributes (overridden by derived classes): - **config_class** ([`PretrainedConfig`]) -- A subclass of [`PretrainedConfig`] to use as configuration class for this model architecture. - **base_model_prefix** (`str`) -- A string indicating the attribute associated to the base model in derived classes of the same architecture adding modules on top of the base model. - **main_input_name** (`str`) -- The name of the principal input to the model (often `input_ids` for NLP models, `pixel_values` for vision models and `input_values` for speech models). NrFci}|jjD]{\}}|jDcgc]}||nd }}|jdd|d<tj||j ||<|dk(satj ||||<}|jjrdtj|jjvr`d|vr\|jdk(rBtjdd|jjftjd|d<|St!d |Scc}w) z| Dummy inputs to build the network. Returns: `dict[str, tf.Tensor]`: The dummy inputs. r4rr )r8rtoken_type_idsencoder_hidden_statesrr8rrzModel has cross-attention but we couldn't infer the shape for the encoder hidden states. Please manually override dummy_inputs!)input_signaturerr8r<onesr zeros_liker`add_cross_attentionrrcallrr hidden_sizefloat32NotImplementedError)rSdummiesrspecdim dummy_shapes rA dummy_inputszTFPreTrainedModel.dummy_inputsxs8--335 ;ICDHJJOS#/3q8OKOzz!}$!" A77DJJGGCL&&!}}WS\:  ; ;; * */F'J[J[\`\e\eJfJqJq/q&g5'';679ww !T[[%<%<=RZZVm8G34.Z%Ps D>ctj|j5|jddddy#1swYyxYw)N) input_shape)r< name_scoperbuildrSs rAbuild_in_name_scopez%TFPreTrainedModel.build_in_name_scopes5 ]]499 % ) JJ4J ( ) ) )s <Acy)zC :str: Identifies that this is a TensorFlow model. r<r\rs rArzTFPreTrainedModel.frameworks rCcyrHr\rSrs rArzTFPreTrainedModel.builds rCct||i|t|ts:t d|j j d|j j d||_|j|_|jrtj|nd|_ |j|jtj!dy)NzParameter config in `zt(config)` should be an instance of class `PretrainedConfig`. To create a model from a pretrained model use `model = z(.from_pretrained(PRETRAINED_MODEL_NAME)`zTensorFlow and JAX classes are deprecated and will be removed in Transformers v5. We recommend migrating to PyTorch classes or pinning your version of Transformers.)rprvrarrfrrXr` name_or_path can_generaterfrom_model_configgeneration_config_set_save_specrr warning_once)rSr`rrjrs rArvzTFPreTrainedModel.__init__s &+F+&"23'(?(?'@A NN3344\^   "//OSO`O`Ob!1!C!CF!Khl D001 ^ rCc6|jjSrH)r`rqrs rArmzTFPreTrainedModel.get_configs{{""$$rCc>t|i|\}}t||i|SrH)rrpfitrSrirjrs rArzTFPreTrainedModel.fits*-t>v> fw{D+F++rCc>t|i|\}}t||i|SrH)rrptrain_on_batchrs rArz TFPreTrainedModel.train_on_batchs+-t>v> fw%t6v66rCc>t|i|\}}t||i|SrH)rrp test_on_batchrs rArzTFPreTrainedModel.test_on_batchs+-t>v> fw$d5f55rCc>t|i|\}}t||i|SrH)rrppredict_on_batchrs rArz"TFPreTrainedModel.predict_on_batchs+-t>v> fw'888rCc>t|i|\}}t||i|SrH)rrppredictrs rArzTFPreTrainedModel.predicts*-t>v> fw///rCc>t|i|\}}t||i|SrH)rrpevaluaters rArzTFPreTrainedModel.evaluates+-t>v> fw000rCc t|tr|j|fi|S|j|jj|fi|SrH)rar _from_configr^rdrtr`rjs rA from_configzTFPreTrainedModel.from_configsO f. /#3##F5f5 5 : 0 0 : :6 LV LMMrCc ||fi|S)zZ All context managers that the model should be initialized under go here. r\rs rArzTFPreTrainedModel._from_configs 6$V$$rCc>||j||}|Sdg|z}|S)a$ Prepare the head mask if needed. Args: head_mask (`tf.Tensor` with shape `[num_heads]` or `[num_hidden_layers x num_heads]`, *optional*): The mask indicating if we should keep the heads or not (1.0 for keep, 0.0 for discard). num_hidden_layers (`int`): The number of hidden layers in the model. Returns: `tf.Tensor` with shape `[num_hidden_layers x batch x num_heads x seq_length x seq_length]` or list with `[None]` for each layer. N)_convert_head_mask_to_5drS head_masknum_hidden_layerss rA get_head_maskzTFPreTrainedModel.get_head_masks:  55iARSI!22IrCcz|jjdk(r%|ddddddf}tj||d}n'|jjdk(r|dddddddf}|jjdk(sJd|j tj |tj }|S)zD-> [num_hidden_layers x batch x num_heads x seq_length x seq_length]r Nr)repeatsr7r4zhead_mask.dim != 5, instead )r8r9r<repeatrrrrs rArz*TFPreTrainedModel._convert_head_mask_to_5ds ??  1 $!$at";