L i(LUddlZddlZddlZddlZddlZddlZddlZddlZddl Z ddl Z ddl Z ddl m Z ddlmZddlmZmZddlmZddlmZddlmZmZddlmZdd lmZmZmZmZmZm Z dd l!m"Z"ddl#Z#dd l$m%Z%dd l&m'Z'dd l(m)Z)ddl*m+Z,ddl*m-Z.ddl#m/Z/m0Z0ddl1m2Z2ddl3m4Z4ddl5m6Z6ddl7m8Z8ddl9m:Z:ddl;mm?Z?m@Z@mAZAmBZBddlCmDZDmEZEddlFmGZGddlHmIZIddlJmKZKddlLmMZMddlNmOZOddlPmQZQmRZRdd lSmTZTdd!lUmVZVdd"lWmXZXmYZYmZZZm[Z[m\Z\m]Z]m^Z^dd#l_m`Z`dd$lambZbdd%lcmdZddd&lemfZfdd'lgmhZhdd(limjZjdd)lkmlZldd*lmmnZnmoZompZpmqZqmrZrmsZsmtZtmuZumvZvmwZwmxZxmyZymzZzm{Z{m|Z|m}Z}m~Z~mZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZdd+lmZmZmZdd,lmZmZdd-lmZmZmZmZmZdd.lmZmZergdd/lmZmZdd0lmZdd1lmZmZmZmZmZmZe'jZej\jOd2Zee'jZd3k\rdd4lmZerdd5lmmZe#jnjgZexred6Zerdd7lmZer7ddlmcm#Zdd8lmZe'jZee'jZd9k\Znd:Zej~eZejjd;d<jZejjd=d<jZed>d?@ZdAad:ad:adBZe0jje0jje0jje0jje0jje0jje0jje0jje0jje0jje0jje0jje0jje0jjdCZgdDZedEZedFZedGZdHZdIZdJee0jdKffdLZdJee0jdKffdMZdNZddOZe#je#je#je#je#je#je#je#je#je#je#je#jdP ZedQr0e#jedR<e#jedS<e#jedT< ddUeeejfdVedWeeee#jfdXefdYZdZe#j^d[efd\Zdd]e0jfd^Zd_eeed`eee#j^fd[eeeeeeffdaZd_eeed`eee#j^fd[eeeeeeffdbZ ddcd?ddedee#j^dfee jdgeefd[eeee#j ff dhZdcd?ddedZe#j^fdiZe#j ddcd?d`edjedkeeefdleedmeedneedgeefdfee jdoedpd[eeeeeffdqZ drZ dsZ ddtedueed[efdvZ ddweeeejfdxedueedyeedzed{ed|ed}ed~edeeeefdedeeeefdededeededeed[eeeeeeff$dZ deeee#j efdeeede6deed`eedXed[ee6ee#j ee#j ffdZdcd?dleeeefdeedgeefdee#j dfee jd[efdZdcd?deedeededgeefd[eeeeeff dZdcd?d`eedeeededeeefdVedXed[eeeeeeefffdZGddeZGddKZGddZGdd?e0jeeeze?Ze}ej,e_ej,j.>ej,j.j1dcddej,_ddce0jded[e0jfdZdZdeeee#jfd[efdZdcededgeeffdZdZGddeZeZee d<GddeZ!y)N)abstractmethod) defaultdict)ThreadPoolExecutor as_completed)contextmanager)Enum)partialwraps)Thread)AnyCallableOptionalTypeVarUnionget_type_hints) is_zipfile)"split_torch_state_dict_into_shards)version) safe_open) load_file) save_file)Tensornn) constraints) checkpoint)PretrainedConfig)DistributedConfig)custom_object_save) CompileConfigGenerationConfig)PeftAdapterMixindeepspeed_configis_deepspeed_zero3_enabledis_fsdp_enabled)find_tied_parametersinit_empty_weights)!_load_state_dict_into_zero3_model)eager_paged_attention_forward)flash_attention_forward)paged_attention_forward)flex_attention_forward) is_kernelload_and_register_kernel)sdpa_attention_forward)sdpa_attention_paged_forward)_get_parameter_tp_plandistribute_modelinitialize_tensor_parallelismrepack_weights%replace_state_dict_local_with_dtensorshard_and_distribute_moduleverify_tp_plan) LOSS_MAPPING)lazy_import_flash_attention)id_tensor_storage) HfQuantizer)get_hf_quantizer)get_module_from_name)auto_conversion)#ADAPTER_SAFE_WEIGHTS_NAMEADAPTER_WEIGHTS_NAME CONFIG_NAME DUMMY_INPUTSFLAX_WEIGHTS_NAMESAFE_WEIGHTS_INDEX_NAMESAFE_WEIGHTS_NAMETF2_WEIGHTS_NAMETF_WEIGHTS_NAMEWEIGHTS_INDEX_NAME WEIGHTS_NAMEContextManagersPushToHubMixin cached_filecheck_torch_load_is_safe copy_func download_urlextract_commit_hashhas_fileis_accelerate_availableis_bitsandbytes_availableis_flash_attn_2_availableis_flash_attn_3_availableis_kernels_availableis_offline_modeis_optimum_availableis_peft_available is_remote_urlis_torch_flex_attn_availableis_torch_greater_or_equalis_torch_mlu_availableis_torch_npu_availableis_torch_xla_availableis_torch_xpu_availablelogging)_CAN_RECORD_REGISTRYGeneralInterfaceOutputRecorder)create_and_tag_model_cardget_checkpoint_shard_files)ENV_VARS_TRUE_VALUES#is_huggingface_hub_greater_or_equalis_sagemaker_mp_enabledis_torch_fx_proxyis_torchdynamo_compiling)BitsAndBytesConfigQuantizationMethod)dispatch_modelinfer_auto_device_map)add_hook_to_module)$check_tied_parameters_on_same_deviceextract_model_from_parallelget_balanced_memoryget_max_memoryoffload_weightsave_offload_index accelerate0.31)get_state_dict_from_offload)find_adapter_config_filez2.5)DTensor __version__z1.10F XLA_USE_BF160XLA_DOWNCAST_BF16SpecificPreTrainedModelTypePreTrainedModel)boundTctjjxrLtjjxr,t t j jdddk(S)N LOCAL_RANKz-1r)torch distributed is_availableis_initializedintosenvirongeta/mnt/ssd/data/python-lab/Trading/venv/lib/python3.12/site-packages/transformers/modeling_utils.pyis_local_dist_rank_0rsQ &&( 9    , , . 9  |T2 3q 8r)uniform_normal_ trunc_normal_ constant_xavier_uniform_xavier_normal_kaiming_uniform_kaiming_normal_uniformnormalxavier_uniform xavier_normalkaiming_uniformkaiming_normal)aria ayavisioncolpaliemu3fuyugotocr2gemma3internvlllavamistral3mllama paligemma shieldgemma2qwen2vl qwen2_5_vl videollavavipllavac#Kt}dad}tjD]*\}}ttj j ||, d|atjD]*\}}ttj j ||,y#|atjD]*\}}ttj j ||,wxYww)ze Context manager to globally disable weight initialization to speed up loading large models. FcyNr)argskwargss r _skip_initz#no_init_weights.._skip_inits rN) _init_weightsTORCH_INIT_FUNCTIONSitemssetattrrrinit)old_init_weightsrname init_funcs rno_init_weightsrs %M 05571i tZ014 ( 399; 4OD) EHHMM4 3 4) 399; 4OD) EHHMM4 3 4s A C BAC ACC c#,Kda dday#dawxYwwNTF) _is_quantizedrrrset_quantized_statersM    c#,Kda dday#dawxYwwr)_is_ds_init_calledrrrset_zero3_staters"# "Urc.tfd}|S)z Decorator to restore the default torch dtype at the end of the function. Serves as a backup in case calling the function raises an error after the function has changed the default dtype but before it could restore it. ctj} |i|tj|S#tj|wxYwr)rget_default_dtypeset_default_dtype)rr old_dtypefuncs r_wrapperz'restore_default_dtype.._wrappers@++-  /((  # #I .E # #I .s 4A )r )rrs` rrestore_default_dtyper s" 4[// Orctjgj}tdrtjntjd}||k(r|tjdk7r|Sy|S)z Test if a device context manager is currently in use, or if it is not the case, check if the default device is not "cpu". This is used to infer the correct device to load the model on, in case `device_map` is not provided. z2.3cpuN)rtensordevicer\get_default_device)device_in_contextdefault_devices r*get_torch_context_manager_or_global_devicerse  R(//3LU3SU--/Y^YeYefkYlNN* U\\%0 0! ! r parameterModuleUtilsMixinc t|jjS#t$r]dtj dt tttffd}|j|}t|}|djcYSwxYw)Nmodulereturnc|jjDcgc]\}}tj|s||f!}}}|Scc}}wr__dict__rr is_tensorrkvtupless rfind_tensor_attributesz4get_parameter_device..find_tensor_attributes2sA)/)>)>)@WAEOOTUDVq!fWFWMX AAget_members_fnr) next parametersr StopIterationrModulelisttuplestrr_named_members)rrgen first_tuples rget_parameter_devicer,s %I((*+222  % 299 eCK>P9Q &&6L&M3i 1~$$$ %s"%A#B  B cFd}|jD]}|j}|js ttvrt rt jcSttvrht r^|jt jk(rt jcS|jt jk(rt jcS|jcS||Sdtjdttt t"ffd}|j%|}d}|D](}|}|djs|djcS||djS|j'D],}|j}|js |jcS|S)zz Returns the first found floating dtype in parameters if there is one, otherwise returns the last dtype it found. Nrrc|jjDcgc]\}}tj|s||f!}}}|Scc}}wrrrs rrz3get_parameter_dtype..find_tensor_attributesUsA%+__%:%:%<STQPQ@R1a&SS Trrr)rdtypeis_floating_pointr~rgr_rbfloat16rfloatdoublefloat32rrrrrrrbuffers)r last_dtypetrr last_tuple gen_tuples rget_parameter_dtyper;swJ  ! ! #WW   338N8P~~% $88=S=U77ekk) >>)77ell* ==(77N ryyT%V :L5M  " "2H " ICJ&  Q< ) ) +Q<%% %& !}"""   WW   77N rc|jD] }|js|jcSt|jjS)zt Returns the first found floating dtype in `state_dict` if there is one, otherwise returns the first dtype. )valuesrrr) state_dictrs rget_state_dict_dtyperlsM      77N  !!# $ * **rctjj|t}tjj|t}tjj |}tjj |}|s/|s-ttf}t ddj|d|d|xr|xs| } | r|n|} t| dd5} tj| } dddtt d j} | d j}|jj}|Dcgc] }||vs| }}|Dcgc] }||vs| }}|rt|d kDst|d kDrd |j j"}t|d kDr,d j|Dcgc]}d |d  c}}|d|dz }t|d kDr,d j|Dcgc]}d |d  c}}|d|dz }t%|| rt&}n&t)t+t,jdd}| D]P}|tjj||}|j/|d~t1j2Rt,j4j6j8j;||S#1swYxYwcc}wcc}wcc}wcc}w)a This is the same as [`torch.nn.Module.load_state_dict`](https://pytorch.org/docs/stable/generated/torch.nn.Module.html?highlight=load_state_dict#torch.nn.Module.load_state_dict) but for a sharded checkpoint. This load is performed efficiently: each checkpoint shard is loaded one by one in RAM and deleted after being loaded in the model. Args: model (`torch.nn.Module`): The model in which to load the checkpoint. folder (`str` or `os.PathLike`): A path to a folder containing the sharded checkpoint. strict (`bool`, *optional*, defaults to `True`): Whether to strictly enforce that the keys in the model state dict match the keys in the sharded checkpoint. prefer_safe (`bool`, *optional*, defaults to `False`): If both safetensors and PyTorch save files are present in checkpoint and `prefer_safe` is True, the safetensors files will be loaded. Otherwise, PyTorch files are always loaded when possible. Returns: `NamedTuple`: A named tuple with `missing_keys` and `unexpected_keys` fields - `missing_keys` is a list of str containing the missing keys - `unexpected_keys` is a list of str containing the unexpected keys zCan't find a checkpoint index ( or z) in .rutf-8encodingN weight_mapr#Error(s) in loading state_dict for ,"z Missing key(s): rT map_location weights_onlyF)strict)rpathjoinrHrDisfile ValueErroropenjsonloadrsetrkeysrlen __class____name__ RuntimeErrorsafe_load_filerMr rload_state_dictgccollectrmodulesr_IncompatibleKeys)modelfolderr prefer_safe index_filesafe_index_file index_presentsafe_index_present filenames load_safe load_indexfindex shard_files loaded_keys model_keyskey missing_keysunexpected_keys error_messagerstr_missing_keysstr_unexpected_keysloader shard_filers rload_sharded_checkpointr?xs0f&89Jggll6+BCOGGNN:.M8 !3')@A :6;;y;Q:RRWX^W__`abb"I (H=7HI$-:J j# 0A ! s5.55789K %**,K!!#((*J#-HCK1GCHLH&1KsS 5JsKOK 3|$q(C,@1,D=eoo>V>V=WX | q "xx<(Ha1QCq(HI  12B1C1E EM  ! #"%((o+Nas!H+N"O  12E1FaH HM=)) "%dK! BGGLL<=  j7   88   " " 4 4\? SSGIK)I,Os0K! K.K.# K3-K3 K8 K=!K+) BOOLU8I8I16F16BF16I32F32F64I64F8_E4M3F8_E5M2z2.3.0U16U32U64checkpoint_file is_quantizedrrc x|jdrt|d5}|j}|"|jddvrt d|di}|j D]}|d k(rk|j |}|j} | tvr t| } ntd | tj|j| d ||<s|j|||<|cdddS|r t |dtr?tj j#r!tj j%d kDst'rt)s|sd }nd }i} t+|t,r|d k7rt/|rddi} tj0|f||d| S#1swYxYw#t2$rx} t5|5}|j7ddk(r t dtd|d| #1swYnxYwn%#t8tf$rt d|d|dwxYwYd} ~ yd} ~ wwxYw)zg Reads a `safetensor` or a `.bin` checkpoint file. We load the checkpoint on "cpu" by default. .safetensorspt frameworkNformat)rStfflaxmlxz"The safetensors archive passed at zf does not contain the valid metadata. Make sure you save your model with the `save_pretrained` method.metaz)Cannot load safetensors of unknown dtype )sizerrrrmmapTrrzYou seem to have cloned a repository without having git-lfs installed. Please install git-lfs and run `git lfs install` followed by `git lfs pull` in the folder you cloned.zUnable to locate the file z_ which is necessary to load this pretrained model. Make sure you have saved the model properly.z9Unable to load weights from pytorch checkpoint file for 'z' at 'zZ'. If you tried to load a PyTorch model from a TF 2.0 checkpoint, please set from_tf=True.)endswithrmetadatarOSErrorr get_slice get_dtypestr_to_torch_dtyperrempty get_shape get_tensorrMr$rrget_rankr%r isinstancerrr ExceptionrreadUnicodeDecodeError) rOrPrrr2r_rr_slicek_dtyper extra_argses rr#r#s/ $ 7 1zz|H# X(>Fa(a88IJMMJVVX 46)[[^F$..0G"44 27 ;(+TU\T])^__$)KKV5E5E5Gu]c$dJqM$%LLOJqM 4)  . "*  /0))88:))224q8#%.B.D"% $  os + 0F:VeKf $Jzz  %%    U  `  o& !66!9 )!& %4_4EFNN   #J/ KOK\]&'(jj   sICF,B$F8,F58 H9 H  0G==H H  H4 "H,,H44H9rrc|jr5|jddj|jz}|S|j}|S)N)nelementviewdata_ptr element_size)rstops r_end_ptrrw*sO {{2r"++-0C0C0EE K  Krrcg}t|dd3|jDcgc] }|r|d|n|}}|j|t|dd3|jDcgc] }|r|d|n|}}|j||j D],\}}|r|d|n|}|jt ||.|Scc}wcc}w)N_tied_weights_keysr_dynamic_tied_weights_keys)prefix)getattrryextendrznamed_children_get_tied_weight_keys)rr{tied_weight_keysrnamesr submodule local_prefixs rrr3sv+T2>;A;T;TUaFF81QC1UU&v3T:F;A;\;\]aFF81QC1]]&!002Wi-3&4&)  5i UVW V^s CC tensorsrc0g}|D]}t|dkr|j|#g}|D]2}||}|j|jt||f4|j |d\}}} |j| h|ddD]4\} } }| |k\r|j|hn|dj || }6g} g} |D]A}t|dk(r | j|j 1| j|C| | fS)Nrrrq)rappendrtrwsortaddpop)rrfiltered_tensorssharedareasrr_ last_stop last_namestartrvdisjoint_tensorsshared_tensorss r_find_disjointrAsA v;?  # #F +  FD%F LL&//+Xf-=tD E F  "'(9i ,!&qr  E4 ! ''/ $((.I  &N#+ w<1   # #GKKM 2  ! !' * + + ++rc^g}g}|D]}t|dkrtjt}|D]A}||}|j|j t |f}||j|Ct|dk(r|j||j|||fS)Nrr) r collectionsrrrrtrwrr) rrr identicalrrrrareas r_find_identicalr`sNI * v;? '', "D%FMM6??#4hv6FGD $KOOD ! " u:?   V $  ! !& ) * 9 $$rr( param_name empty_paramkeep_in_fp32_regex hf_quantizercZ |j|}ttd}d}|xr|jtjk(} |jjrK| sI|"|j|rtj}n%||j j"}n |j}|duxr|j%|fS#t$rb}|Y|jjtj tj tjtjhvrYd}~y|d}~wwxYw)N)TN float8_e4m3fn)get_parameter_or_bufferriquantization_config quant_methodrmHQQQUARKMXFP4BITS_AND_BYTEShasattrrrrrsearchrconfig_pre_quantization_dtype is_contiguous) r(rrrr old_paramrois_torch_e4m3fn_available casting_dtypeis_param_float8_e4m3fns r_infer_parameter_dtyperss 11*= !( ?M6c;;L;LPUPcPc;c**3I  ).@.G.G .S!MMM  %!LL@@M%OOM D >Y%<%<%> MM1    # (H(H(U(U  " "  $ $  $ $  - - Z ) G sB?? D*AD%#D%%D*cNt||\}}|j||iddy)zKCast a single parameter `param_name` into the `model`, with value `tensor`.FT)rassignN)r=r#)r(rrr param_types r_load_parameter_into_modelrs--eZ@FJ J/dKrr>reverse_renaming_mapping device_mapdisk_offload_folderdisk_offload_index device_mesh(torch.distributed.device_mesh.DeviceMeshc d} |_|jddM|ddtjdfvr1t|dtjr|djn|d} |Kdj t |jdD cgc]} tj| c} } |du} |jd}|}|rt|d| nd}t|j}|D]h}||}|r||}|j|}n|j| }t|||||\}}| r| r|j!||s"t#||||||| j%| n|||| j%| d }|j&|||| j%fi|n|d }||j|}|r|j)}|d}n9tj* |}|st-|d ||j/}|d k(r|s9t1||||}n)| r|j!||s5t3rt5rdnd}t7|||j|n|j'||||t3s t9r|j;|}t=||\}}t?||}|jj@dk(r|jB}|jEsd|d<t3r t5sdnd}tA||jFj|fi|}tI||||rf||=k||jKddd|Scc} w)aLoad parameters from `meta_state_dict` into the model. The parameters of the `meta_state_dict` are on the meta device in order to easily infer the shapes and dtypes that they will have. Then proper parameters are then loaded from `shard_file`, which is the actual state dict file on disk. This function takes care of correctly casting dtypes, devices, and sharding tensors in case of tensor parallelism. rN|T)reverserRrS)rUr)rr to_contiguousrankr.z doesn't have any device set.diskrZF requires_grad)&rrrrhr3rsortedrreescaper^rrratorparam_needs_quantizationr6get_local_rankcreate_quantized_param contiguousrrgrouprur%rrr$get_param_namer=r|typerrdatar__exit__) r(rr>rrrrrrr tensor_devicerdevice_map_regexrPis_safetensorsis_meta_state_dict file_pointerparams_to_loadrrserialized_param_nameparamrrsharding_kwargs param_device module_layerrrvalue val_kwargsrs r _load_state_dict_into_meta_modelrs$M*..T":"F b>%e)z"~u||4\JrN00blmobpM886*//BS]a;b$caRYYq\$cdt+L((8N'Rd9Z4 NjnL*//+,N$^'  , $TU33E5*l[ #$(B(D!-!rdisk_only_shard_filesrPrrkey_renaming_mappingrr(reverse_key_renaming_mappingrrrrrrr error_msgss rload_shard_filer#s<  $**%%%L>*4N4PYe R$ \ [g :D9I9I9KiAqThOh&q)1,iJiJ!#L7zJJ ) ))(<(>|=    (! 31%1#   ) )))js " C /C c 2ttjjdd}t t ||}t jd|dg}t|5}tjt |d5}|Dcgc]}|jt|}}t|D]+}|j\}} ||z }|jd- dddddd| fScc}w#1swYxYw#1swY| fSxYw) NHF_PARALLEL_LOADING_WORKERS8z'Loading model weights in parallel with z workers...) max_workersLoading checkpoint shards)totaldescr)rrrrminrloggerinforratqdmsubmitrrresultupdate) args_list num_workersrexecutorpbarargfuturesfuture _error_msgsrs r load_shard_files_with_threadpoolr[sbjjnn%BCHIKc)nk2K KK9+kRSJ  4 \\I5P Q UYHQRx<RGR&w/ 28--// /k)  A    ) ))S   ) ))s6&!D C> C9);C>%D 9C>>D D  D weights_namevariantcH||jdd\}}|d|d|}|S)Nrr)rsplit)rrrrs r _add_variantrrs:!((a0 dq 4&1 rpretrained_model_name_or_path subfolder gguf_filefrom_tf from_flaxuse_safetensors cache_dirforce_downloadproxieslocal_files_onlytoken user_agentrevision commit_hashis_remote_codetransformers_explicit_filenamecbd}||t|}tjj|}|r|4tjj |||}|j d}nN|rotjj tjj ||tdzr*tjj ||tdz}n|ritjj tjj ||tr'tjj ||t}nr|ritjj tjj ||tr'tjj ||t}n|dur}tjj tjj ||tt|r1tjj ||tt|}n|durtjj tjj ||tt|r3tjj ||tt|}d}n|s}tjj tjj ||tt|r1tjj ||tt|}n|stjj tjj ||tt|r3tjj ||tt|}d}n|stjj tjj ||tdzsBtjj tjj ||tr tdtt|d|d|sbtjj tjj ||tr tdtt|d|d |r tdtt|d|d tdtt|d tt|d td tdzd td|d tjj tjj ||r|}d}nHtjj tjj ||dzr;|st!d |dzdtjj ||dz}d}nt#|r|}t%|}n||}|j d}n7|rt}n.|rt}n%|durtt|}ntt|} ||| | | | | |dd|d }t'||fi|}||tt|k(rt'|tt|fi|}|d}nk|rL| dk(rt)|fi|\}} }| |d<|Mt|dtt|d tt|dtt|}t'||fi|}|2|tt|k(rt'|tt|fi|}|d}| sst+sh||ttfvrWtj,dddk7r=|rtnt}| | | || d}||| | |dd|d|}t/||fi|s |s t1t(|fddi|dj3n| | | || d}t/|tfi|rt|dtt|dt/|tfi|rt|dtt|d |3t/|tfi|r"t|dtt|d|dt|dtt|d tt|d td td td |rt6j9d |}n[t6j9d d!n?|r=tjj |r|}n||| | | | | |dd|d }t'||fi|}d}|rt;|||| | | | | ||" \}}||fS|gnd}||fS#t$rt4$r>}td|d|dtt|d td td td |d}~wwxYw)#zGet all the checkpoint filenames based on `pretrained_model_name_or_path`, and optional metadata if the checkpoints are sharded. This function will download the data if necessary. FN.safetensors.index.json.indexTzError no file named z found in directory zf but there is a file for TensorFlow weights. Use `from_tf=True` to load this model from those weights.zb but there is a file for Flax weights. Use `from_flax=True` to load this model from those weights.rz, rz$We found a TensorFlow checkpoint at z:, please set from_tf to True to load from this checkpoint.) rrr r r r r r _raise_exceptions_for_gated_repo%_raise_exceptions_for_missing_entries _commit_hashmainr z& does not appear to have a file named z and thus cannot be loaded with `safetensors`. Please make sure that the model has been saved with `safe_serialization=True` or do not set `use_safetensors=True`.DISABLE_SAFETENSORS_CONVERSIONtrue)r r r rr )rrr r rrrrignore_errors_during_conversionzThread-auto_conversion)targetrrrz) but there is a file without the variant z;. Use `variant=None` to load this model from those weights.zCan't load the model for 'z'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure 'z=' is the correct path to a directory containing a file named zloading weights file z from cache at ) rrr r r r r rr)rrrisdirrr^rrGrFrCrrErDrIrHr`rrZrOrLr>rWgetenvrQr rrirrrf)rrrrrrrrrr r r r r rrr is_shardedis_local archive_filefilenameresolved_archive_filecached_file_kwargssafe_weights_namehas_file_kwargsrosharded_metadatacheckpoint_filess r_get_resolved_checkpoint_filesr(ysG .J$0Y5F(+,I(J%77==!>? -9!ww||,I9Vtu ;DDE^_ RWW^^ :IYaGab "ww||,I9VehpVpq RWW^^BGGLL9VXacs,tu!ww||,I9Vfg rww~~ :IGXY  "ww||,I9Vgh  -"''.. :I|TegnGop3 "ww||19lK\^e>f  !-"''.. :I|TkmtGuv3 "ww||19lKbdk>l  " $ :I|T`biGjk* "ww||19la  % :I|TfhoGpq* "ww||19lK]_f>g  " $rww||,I9VehpVpqr77>>"'',,/LiYi"jk*< g+N*OP567MM % :IGXY**< g+N*OP567>> !*<8I7+S*TU56a9 *< g+N*OrR^_pryRzQ{|()Oh,F+GtL]K^_56a9 WW^^BGGLL4QR S8LH WW^^BGGLL4QT\4\] ^ :;X[c;c:deDD77<< 3PS[3[\LH 8 94H$01N$O !.99;DDE^_ +, -'(97C' g>P "+&4&(8"", (!*8==B$/ &")44QS[(r_q(r%)0XN_ahAi5i,75$% ?$0 ! KK/zI^H_` a  77>>) $$- ! '"0"$4($&499> + " %00My$o\n$o !-G ) !)-!$ . ** - --7T6_12ei - --}  01N0OP99V8WX::F|U\:]9^_()O+$>{$KejjNiNijuNv$vM#K0MA0%+*C367J;7WYcdoYp3q#K0 r+>,'*5Z ZHYZ  #  - - - D   *51 ,[*E qqs I 1I original_checkpoint_keyscheckpoint_keys'loading_base_model_from_task_state_dictc|j}t|jj}||j |||}t t |t |z }t |t |z }|r5|D cgc]} | j|dr| } } |j| |jD chc]\} } |  } } } t || z }t|}|D]Q}|D cgc] } | |vs|  }} t|dkDs&t|t|ks>|D cgc] } | |vs|  }} S|%|j|||}|j||}||fScc} wcc} } wcc} wcc} w)zFind missing keys (keys that are part of the model parameters but were NOT found in the loaded state dict keys) and unexpected keys (keys found in the loaded state dict keys, but that are NOT part of the model parameters) rr)base_model_prefixrrrupdate_expected_keysrr startswithr named_buffersr&rupdate_missing_keysupdate_unexpected_keys)r(rQrRrSrr{ expected_keysr8r9rtask_specific_keysnr model_buffersrOrmissing_in_groups r!_find_missing_and_unexpected_keysr`_s $ $F))+0023M$99%P_` #m,s?/CCDL/*S-??O.)AdAY_X``aVbIcadd12$)#6#6#8941aQ9M9_}<=O&u-KR'3B!qEzABB  1 $-=)>U)K'3Q!q@P7PAQLQR #77|VT &==e_U  ((%e : CQs*E3 E3 E8; E>E>6 FFignore_mismatched_sizeskeys_to_rename_mappingcB|sggfS|dg}|j}g}g} |D]} | dk7rt| |d|}|jD cic]\} } | |vs || | } } } | jD]\}}||vs |j||jk7s(|r:|jddk(r(|j dz||j k(rd|j || j |j||jf|| fScc} } w)a  Find potential shape mismatch between the different state dicts and the model parameters, but only if `ignore_mismatched_sizes` is True. Otherwise, return immediately and any shape mismatch that may exist will be raised later on. This avoids checking every parameter in advance, as shape mismatch are extremely rare in practice. If we want to ignore them however, we do need to check in advance as we need to know which parameters we need to move back from meta to cpu, and initialize correctly. Indeed, as our model initialization takes place at the module level, and not the weight level, in the case of a sharded checkpoint we cannot correctly initialize the weights according to `model._init_weights()` if we perform this check on each state dict at loading time (after the first loaded checkpoint, there are no way to initialize only the mismatched weights if any, without overwriting the previously loaded weights as well because all the module will be initialized, not only the weights that are mismatched). rrZrrqrr)rr#rshapenumelr)r(rr'rarbrPrmodel_state_dictmismatched_keysmismatched_shapesr>rrnew_state_dictr7rs r_find_mismatched_keysrjsX. #2v 4'')O&Z  (FYeJ DNCSCSCUu41aYZ^tYt03Q6uu)//1 ZKC&&6<<;KC;P;V;V+V!V\\"%5%:v||~PQ?QUefiUjUpUpUr?r#**3/%,,fll