L i UddlZddlZddlZddlZddlZddlZddlZddlZddlZddl Z ddl Z ddl m Z m Z ddlmZmZmZmZddlmZddlmZddlmZddlmZddlmZdd lmZmZmZmZm Z m!Z!m"Z"dd l#m$Z$ddl%Z%dd l%m&Z&dd l'm(Z(m)Z)dd l*m+Z+ddl,m-Z-m.Z.ddl/m0Z0m1Z1m2Z2m3Z3m4Z4ddl5m6Z6m7Z7m8Z8ddl9m:Z:m;Z;mZ>m?Z?m@Z@mAZAddlBmCZCddlDmEZEmFZFmGZGddlHmIZIddlJmKZKgdZLdZMeNeOd<dZPeeNeOd<eGddZQeGddZRGdd eZSd!ZTd"eNfd#ZUGd$d%e ZVGd&d'eVZWGd(d)eVZXGd*d+ZYd,e@d"eZfd-Z[d.eZd/e\e@d"e\e\e@fd0Z]d1eYd2ejd3e"eje%jLfd4e@d5eNd6eSd"eGfd7Z`d8ed9ejd:ejd;e?d1eYdeZd6eSd"dfd?ZcGd@dAe ZdGdBdCedZeGdDdEeFZfGdFdGZgGdHdIeEZhGdJdKefeCZiy)LN)ABCabstractmethod) GeneratorIterableIteratorSequence)contextmanager) dataclass)Enum)UnsupportedOperation)Path)AnyCallablecastFinalIOOptionalUnion)Buffer)Tensor)_get_available_device_type_get_device_module)narrow_tensor_by_index)ExtensionRegistryStreamTransformExtension)CUSTOM_METADATA_KEYDCP_VERSION_KEY FORMAT_KEY FORMAT_VALUEHF_DCP_VERSION)MetadataSTATE_DICT_TYPE StorageMeta) LoadItemTypeLoadPlan LoadPlannerReadItemSavePlan SavePlanner WriteItem WriteItemType)BlockingAsyncStager) StorageReader StorageWriter WriteResult)_create_file_view)Future)FileSystemWriterFileSystemReader FileSystemFileSystemBaseSerializationFormatz .metadata _metadata_fnz1.0.0CURRENT_DCP_VERSIONcPeZdZUdZeed<eed<eed<dZee eed<dZ y) _StorageInfoz#This is the per entry storage info. relative_pathoffsetlengthNtransform_descriptorscp|jjDcic] \}}| || c}}Scc}}wN)__dict__items)selfkvs m/mnt/ssd/data/python-lab/Trading/venv/lib/python3.12/site-packages/torch/distributed/checkpoint/filesystem.py __getstate__z_StorageInfo.__getstate__Ss/!%!4!4!6HA!-1HHHs 22) __name__ __module__ __qualname____doc__str__annotations__intr>rrrGrFr:r:Js/- K K598HSM29IrPr:ceZdZUeed<y)_StoragePrefixprefixN)rHrIrJrLrMrOrPrFrRrRWs KrPrRceZdZdZdZy)r6 torch_save safetensorsN)rHrIrJ TORCH_SAVE SAFETENSORSrOrPrFr6r6\s JKrPr6z.distcpreturnc<ttjSr@)rLuuiduuid4rOrPrF_generate_uuidr]ds tzz| rPcveZdZedededdfdZeddZedee e jeffdZ y) _TensorLoadersizeobjrYNcyr@rOrCr`ras rFaddz_TensorLoader.addi rPcyr@rOrCs rF start_loadingz_TensorLoader.start_loadingmrerPcyr@rOrgs rFvaluesz_TensorLoader.valuesqrerPrYN) rHrIrJrrNobjectrdrhrtupletorchrrjrOrPrFr_r_hsk  & T     u||V';!<=  rPr_cheZdZdeddfdZdededdfdZd dZde e e jeffd Z y) _SerialCpuLoader resolve_funrYNc ||_g|_yr@)rqrB)rCrqs rF__init__z_SerialCpuLoader.__init__ws&/1 rPr`rac>|jj||fyr@)rBappendrcs rFrdz_SerialCpuLoader.add{s 4+&rPcyr@rOrgs rFrhz_SerialCpuLoader.start_loading~s rPc#K|jD]y\}}|j|j}|j}|j j |j k7r|j}||f{ywr@)rBrqdetachcpustorager`numelclonerC_ratensors rFrjz_SerialCpuLoader.valuesszjj FAs%%c*113FZZ\F~~$$&&,,.8   sB B rk)rHrIrJrrsrNrlrdrhrrmrnrrjrOrPrFrprpvsS2H22''&'T'  u||V';!<= rPrpc eZdZ ddedeej deddfdZe de fdZ de e ejeffdZdd Zdee ejeffd Zd ed eddfd ZddZdee ejeffdZy)_OverlappingCpuLoaderNrqstreaminflight_threshholdrYc8||_g|_||_d|_t j |_d|_d|_|r |jn t|_ t|j|_ ttjj |xs|jj#|_|j$|jj#k7r4|j$j'|jj#yy)NrF)rqrBrin_flight_data collectionsdeque current_itemsidxstarted device_typerr device_modulerrncudaStreamcurrent_streamr wait_stream)rCrqrrs rFrsz_OverlappingCpuLoader.__init__s '/1 #6 0;0A0A0C "(F  .H.J 00@0@A JJ  vL););)J)J)L   ;;$,,;;= = KK # #D$6$6$E$E$G H >rPcF|jt|jk\Sr@)rlenrBrgs rF_donez_OverlappingCpuLoader._donesxx3tzz?**rPcg}|j|jk\r|jj|j|jk\r|jj }|xj|dj |djzzc_|j||j|jk\r|SNr) rrr synchronizerpopleftr{ element_sizeru)rCdrainedvals rF_drainz_OverlappingCpuLoader._drains   $":": : KK # # %!!T%=%==$$,,.C   3q6<<>CF4G4G4I#I I  NN3 !!T%=%==rPc|jj|j5|js|j|jkrm|j |j \}}|xj dz c_|j|j}|jj|jk(r|jdd}nn|jtjdk(rL|jj|j!|j"zk7r|j%}|j&j)||f|xj|j!|j+zz c_|js|j|jkrmdddy#1swYyxYw)NryT)device non_blocking)rrrrrrBrrqrxrtypertornuntyped_storager`r{itemsizer|rrurr}s rF_refillz_OverlappingCpuLoader._refillsc    & &t{{ 3 NjjT%8%84;S;S%SDHH-3A ))#.557==%%)9)99#YYe$YGF]]ell5&99..0557!<<>FOO;<"("")) ##v||~8K8K8M'MM#)jjT%8%84;S;S%S N N Ns FGG c|jsJt|jdkDr|jj |jSr)rrrrrrgs rF_finishz_OverlappingCpuLoader._finishs=zzz t!! "Q & KK # # %!!!rPr`racl|jr td|jj||fy)Nz&cannot add items after loading started)r RuntimeErrorrBrurcs rFrdz_OverlappingCpuLoader.adds+ <<GH H 4+&rPc|jryd|_|jjtjd|j y)NTrkey)rrBsortoperator itemgetterrrgs rFrhz#_OverlappingCpuLoader.start_loadings9 <<   H//23 rPc#K|j|js7|j}|j|Ed{|js7|j Ed{y7*7wr@)rhrrrr)rCrs rFrjz_OverlappingCpuLoader.valuessY **kkmG LLN  ** <<>!! !s*AA3A/A3A3)A1*A31A3)Ni@Brk)rHrIrJrrrnrrNrspropertyboolrlistrmrrlrrrrrdrhrrjrOrPrFrrs*.#, II&I! I  I.+t++U5<<#789N0"% f(<"=>" ''&'T' "u||V';!<="rPrc neZdZdZ d deeeddfdZdede jde e e eeffdZy) _StorageWriterTransforms This is experimental, and will likely move elsewhere in the future. It lives here to minimize changes while we are still learning and gathering feedback. N extensionsrYc&|d|_y||_y)a If the extensions arg is None, this means the implementation should provide whatever defaults it chooses. An empty sequence indicates no extensions should be used. At this time, the default extensions sequence is empty. NrO)r)rCrs rFrsz!_StorageWriterTransforms.__init__s!+ 2" rP write_item raw_streamc"Gddtj}ttt||}|j D]}|j |}|t|j Dcgc]}|jc}fScc}w)NcNeZdZdejfdZdefdZdede fdZ dZ y) E_StorageWriterTransforms.transform_save_stream..NoCloseWriterrawc||_yr@)r)rCrs rFrszN_StorageWriterTransforms.transform_save_stream..NoCloseWriter.__init__s rPrYcyNTrOrgs rF writeablezO_StorageWriterTransforms.transform_save_stream..NoCloseWriter.writeablesrPbc8|jj|Sr@)rwrite)rCrs rFrzK_StorageWriterTransforms.transform_save_stream..NoCloseWriter.write sxx~~a((rPcX|j|jjyr@)flushrrgs rFclosezK_StorageWriterTransforms.transform_save_stream..NoCloseWriter.close s  rPN) rHrIrJioIOBasersrrrrNrrrOrPrF NoCloseWriterrs6 BII  4  )v )# ) !rPr) rrrrbytesr transform_toreversedget_descriptor)rCrrrrexs rFtransform_save_streamz._StorageWriterTransforms.transform_save_streamsw !BII !BuI}Z'@A // 9B??<8L 9Xdoo=VWrr002WXXWs1B r@)rHrIrJrKrrrrsr*rrrmrrrrLrrOrPrFrrshJN C"8,D#EF C  CY#Y13Y r%y$s)# $YrPritemcd}|jJ|jjD]}||z} |jjj}|tj j |zSNr) tensor_datar` propertiesdtypern_utils _element_size)rr`srs rF _item_sizersp D    '' '    " "      ' ' - -E %,,,,U3 33rPbinsrBc|dk(r|gS|Dcgc]"}|jtjk(s!|$}}|Dcgc]"}|jtjk7s!|$}}t|Dcgc]}g}}t|Dcgc]}d}}|j t dt |D]\}}|||zj||D]X}tt |tjdd} || j||| xxt |z cc<Z|Scc}wcc}wcc}wcc}w)NrrT)rreverser) rr+BYTE_IOrangerr enumerateruminrr) rrBwibytes_wtensor_wr~buckets bucket_sizesirs rF_split_by_size_and_typer%s3 qyw!FbRWW 0E0E%ErFGF"Grbgg1F1F&FGHG27+%>Qb%>G%>$T{+!A+L+ MMj$M/7#%2D  $%,)L)x/B/B1/EFqI BSZ^+ , N#GG%>+s!"D3D3"D8D80 D= E transformsrdatar storage_keyserialization_formatc "|j}|j||\}}|jtjk(r)index size_in_bytes storage_data)tellrrr+r isinstancerBytesIOr getbufferrnrrr6rWsaverr{rrr/rr:) rrrrrrr<rr>r=info_transform_descriptorss rF _write_itemr=sS[[]F,6,L,LF-)\(-///$ +++4>>+,$ ---{{ell51111 #6#A#A A JJt\ *2=== bjjB' 1 1 33 )*a/5J !   "<    rP create_stream file_queue result_queueplannerr use_fsync thread_countc P |j\} } } tjj} t t| d} |dk(rMtj j s| r-| j r|dkDrt|j|}nt|j}| Dcgc]"}|jtjk7s!|$}}|D]}|jt|||j| Dcgc]"}|jtjk(s!|$}}g}|| d5}|D]2}|j|}|j!t#||||| |4i}i}|j%D]\}}|j&sJ|j!t#||||| ||||j(j*<d|j,j.j0i||j(j*<|t2j4k(rUddlm}|j;||t<t?j@|tBtEtFtHtJi|r$ tMjN|jQ|jYddd|j[|cc}wcc}w#tRtTf$rtMjVYZwxYw#1swYOxYw#t\j^$rYywxYw)Nrr)rwb saved_offsets)r)metadata)0 get_nowaitrn_C_get_privateuse1_backend_namegetattrr is_availabler resolve_datarprr+rrdrrhrurrjis_cpurfqnrchunkoffsetsr6rXsafetensors.torchrrrjsondumpsrrLr rrosfsyncfilenoAttributeErrorr syncrputqueueEmpty)rrrrrrrrr file_namer write_itemscustom_backend_namecustom_device_modloaderrrrr write_resultsrr tensor_dict metadata_dictrrs rF_write_files_from_queueros[ 2<2G2G2I /I{K#((("H"H"J  '/BD I  !JJ++-).?.L.L.N'!+.(((; *((&1UrBGG}?T?T4TUHU& ?  :j1:> ?  "$/Tb277m>S>S3SrTGTMy$/4 6") J"// ;D!((#&" &'0   ! " *0--/&FJ!==(=!((#&""&'0  9?K 0 0 4 45')?)?)E)E)M)M;M*"2"2"6"67"(+>+J+JJ6LL' 3TZZ 5N /^1D *L& "1 i4 j   ] +q6V Uj+,@A" "e4 4 l ;;   ssB/L1"KK8L"K3K7 LD=L#K%L5%L#L=L?LLL LL%$L%c feZdZeedeeejfdede e jddffdZ edeeejfdedeeejffdZ edeeejfdeeejfddfd Zedeeejfdeeejffd Zedeeejfddfd Zeed eeejfdefd ZedeeejfdefdZedeeejfddfdZy)r5pathmoderYNcyr@rO)rCr!r"s rFrzFileSystemBase.create_streams,/rPsuffixcyr@rOrCr!r$s rF concat_pathzFileSystemBase.concat_paths#&rPnew_pathcyr@rOrCr!r(s rFrenamezFileSystemBase.renamesrPcyr@rOrCr!s rF init_pathzFileSystemBase.init_pathsSVrPcyr@rOr-s rFmkdirzFileSystemBase.mkdirsArP)rHrIrJr rrrLrPathLikerrrrr'r+r.r0 classmethodrr5r7r9rOrPrFr5r5s/#r{{*+/36/ 299dD( )//&#r{{*+&58& sBKK &&#r{{*+7eZdZdZddddddej fdeeejfde de d e d e d e d e e ed edededdffd Zd'deeejdfddfdZde dededdfdZde fdZdedefdZdeedeefdZdededeeefdZdedej:deeefdZded eeeddfd!Z de e!fd"Z"d'd#e e dejfd$Z#e$deeejffd%Z%e&deeejfde fd&Z'xZ(S)(_FileSystemWriteraa Basic implementation of StorageWriter using file IO. This implementation makes the following assumptions and simplifications: * The checkpoint path is an empty or non-existing directory. * File creation is atomic The checkpoint consist of one file per write request plus a `.metadata` file with the serialized metadata. Tr逖Nr!single_file_per_rank sync_filesrper_thread_copy_ahead overwrite _extensionsrargskwargsrYc .t |t|_|jj ||_||_||_||_||_ t|_ ||_ t||_||_d|_d|_y)a= Initialize the writer pointing to `path`. Args: path: directory where the checkpoint will be written to. single_file_per_rank: Produce one file per rank instead of one file per tensor/blob. Default to True. sync_files : force files to be synced to permanent storage. Default to True. thread_count: Number of IO threads to use to write. Default to 1. per_thread_copy_ahead: How many bytes to copy from the GPU ahead of saving then. Default 10Mb. overwrite: Whether to allow overwriting existing checkpoints. Defaults to True. _extensions: Extensions to apply to output streams (EXPERIMENTAL) N. B. If sync_files is disabled, there's no guarantee that the checkpoint will be consistent in the case of a failure. NT)superrsr4fsr.r!rSrTrrUr]save_idrVrrrrankuse_collectives) rCr!rSrTrrUrVrWrrXrY __class__s rFrsz_FileSystemWriter.__init__Is6 ,GG%%d+ $8!$(%:"%' "2;?$8!#' %)rPr1cf|r |jj||_t|_yr@)r\r.r!r]r]rCr1s rFresetz_FileSystemWriter.resetrs% ))-8DI%' rPis_coordinatorc`|jdd|_|jdd|_yNr^r_T)getr^r_)rCrdrXrYs rFset_up_storage_writerz'_FileSystemWriter.set_up_storage_writerws*JJvt, %zz*;TBrPc|jr|jd}n|jd}|jj|S)N)r^r)r__get_metadata_pathr\r7)rC metadata_paths rF_metadata_existsz"_FileSystemWriter._metadata_exists}sF    333>M!333;Mww~~m,,rPplanc|jj|j|jrV|jr1t j d|jd|jdntd|jd|j:|js.tj|td|jd}|S) Nz#Detected an existing checkpoint in z#, overwriting since self.overwrite=z. Past version 2.5 of PyTorch, `overwrite` will default to False. Set this variable to True to maintain this functionality or False to raise when an existing checkpoint is found.z-Checkpoint already exists and self.overwrite=.__r~r) r\r0r!rlrVwarningswarnrr^r_ dataclassesreplacerRrCrms rFprepare_local_planz$_FileSystemWriter.prepare_local_plans  dii  "~~ 9$))DhY]YgYgXijkk #%SDNNCTTU#VWW 99 )=)=&&>Btyyk2C#DD rPplansc t|Dcgc]7\}}|j$tj|t d|dn|9}}}|Scc}}w)Nrpr~rq)rrrtrurR)rCrxrrm new_planss rFprepare_global_planz%_FileSystemWriter.prepare_global_plansi %U+  4  (   >Bqc)3L M     sjt}dz |Sr)rSDEFAULT_SUFFIX)r file_count storage_plans rFgen_filez._FileSystemWriter.write_data..gen_files,'../ |N;KLI !OJ rP) rrQueuerSrrrBr\r'r!r _write_data) rCrmrrrbucketrr!rrrs @@rF write_dataz_FileSystemWriter.write_datas (,'8'8    #(++-  $ $1$2C2CTZZP :$J ww**499i@i89 :   :$J ww**499i@i$89 : 44rPrc tj}g}td|jD]}t j t |jj||||j|j|j|j|jf }|j|j|t |jj||||j|j|j|j|j |D]}|jg} ||j!z }#tj"$r t%}|j'||cYSwxYw)Nr)targetrX) rrrrrrrrr)rrrr threadingThreadrr\rrrUrTrstartrujoinrrr1 set_result) rCrrrthreadsr~tresfuts rFrz_FileSystemWriter._write_datasS %*KKM q$++, A  .GG)) OO..OO%%--  A GGI NN1 ! $ ''//!% $ : :oo**!%!:!:  A FFH  |..00{{ -3XC NN3 J s1E0E98E9rresultsc8tj|t}i}|D]6}|j|Dcic]}|j|j c}8||_|j |_|js"|jd|jtdntd}tt|jj|j|}|jj|d5}t!j"|||j$r$ t'j(|j+ddd|js(|j|j3|j} n|j3} |jj5| r|jj7| |jj9|| ycc}w#t,t.f$rt'j0YwxYw#1swYxYw)N)versionrpz.tmpr)rtrur8updaterr storage_metar_r^r7rr r\r'r!rpickledumprTrrrrr rrjr7r9r+) rCrr storage_mdwr_listwr tmp_filenametmp_path metadata_filerks rFfinishz_FileSystemWriter.finishs&&x9LM  MG   7KRrxx8K L M * $ 1 1 3''DII,A L> . >&  dgg11$))\JK WW " "8T 2 m KK- 0HH]1134  ## (= 33DII>M 335M 77>>- ( GGOOM * x/7L'(<=GGI   s/G" <#H #G''#H  H H  HHcDt|j|jS)N)r1r])r#r1r]rgs rFrz_FileSystemWriter.storage_metas););T\\RRrPr^c|tn d|t}tt|jj |j |SNrpr7rr r\r'r!rCr^filenames rFrjz$_FileSystemWriter._get_metadata_path<(, l^Btf\N:SD$''--diiBCCrPc|jS)zT return the checkpoint_id that will be used to save the checkpoint. r!rgs rFr1z_FileSystemWriter.checkpoint_id yyrPc,tj|Sr@r4r5r3s rFr5z(_FileSystemWriter.validate_checkpoint_id"00??rPr@))rHrIrJrKr6rWrrLrr:rrNrrrrrsrcrhrlr(rwrr{r)r1r/rrrrr!rr#rrjrr1r;r5 __classcell__r`s@rFrQrQ;sE  &*%/DH4G4R4R'*C$%'*#'* '*  '* # '*'*h'?@A'*2'*'*'* '*R(5bkk4)?#@(D( C"C+.C:=C C -$ -xH&hDN555 [! " 5800KK0 [! " 0d 0x 0$tK7H2I 0d 0DSh{3SDx}D DuS"++%56 @5bkk9I3J@t@@rPrQc ReZdZdZd deeddfdZdedee de e de e fd Z y) _StorageReaderTransformsrNextension_registryrYc6|t|_y||_yr@)rr)rCrs rFrsz!_StorageReaderTransforms.__init__.s!#5#=   CU rP read_itemr>rc|jj|}|}|D]$}t|ts|j |}&|Sr@)rfrom_descriptor_listrrtransform_from)rCrr>rrrrs rFtransform_load_streamz._StorageReaderTransforms.transform_load_stream3sT ,,AABWX # CB"67!#!2!2>!B CrPr@) rHrIrJrKrrrsr'rrLrrrrOrPrFrr'sX  84E+F RV   (} uI  E rPrc eZdZ ddeeej fdeeddffd Z de de e fdZ ddeeej dfddfd Zd ed ededfd Zdd eedej fdZdededefdZdededededdf dZd edefdZdeedeefdZedeeej ffdZedeeej fdefdZ xZ!S)r3Nr!_extension_registryrYct|t|_|jj ||_i|_t|_t||_ d|_ d|_ yr) r[rsr4r\r.r!rr]load_idrrr^r_)rCr!rr`s rFrszFileSystemReader.__init__Bs] ,GG%%d+ ,.%' 23FG #rPsinfocntttt||j|j Sr@)rrrr0r<r=)rCfilers rF _slice_filezFileSystemReader._slice_filePs%BuI0u||U\\RSSrPr1cti|_|r |jj||_t |_yr@)rr\r.r!r]rrbs rFrczFileSystemReader.resetSs- ))-8DI%' rPrmrc i}|jD]H}|j|j}|j}|j |gj |J|jD]>\}}|j j|j|} |j j| d5} |D]} |j| j}|j| |} |jj| |jxsd| } | jtj k(rHt#j$| j'd}|j)d|j+| || j-r| }n5t#j$| j'd}|j)dt/t0t3j4|dd}t7|| j8| j:}|j=| j?}|jA|jAk(s6Jd| jd |jAd |jA|jC||jE| | dddAtG}|jId|S#1swYixYw) NrbrOrryT) map_location weights_onlyzreq z mismatch sizes z vs )%rBr storage_indexr; setdefaultrur\r'r!rrrrr>rr$rrrreadseek load_bytesseekablerrrnloadrstorage_offsetslengthsresolve_tensorrxr`copy_ commit_tensorr1r)rCrmrper_fileritem_mdr!r;reqsr(rreq file_slicer read_bytesrr target_tensorrs rF read_datazFileSystemReader.read_dataYs.0 >#3- B M4ww**499mDH&&x6+ B&)BC"//0A0ABG!%!1!1&'!BJ%)__%J%J 55;" &Nxx<#7#77%'ZZ0C0CB0G%H "***3 ;)224'5H(*zz.2E2Eb2I'JH$MM!,!%"!JJ (-2-1""8"C$7$7")0(>(>s(C(J(J(L ,113v{{}D"3#4#4"55EmFXFXFZE[[_`f`k`k`m_noD&++F3--c=AS)B+ B+ B- B^h t _+ B+ Bs 3G-K  K r^c|tn d|t}tt|jj |j |Srrrs rFrjz#FileSystemReader._get_metadata_pathrrPrXrYcF|jdd}|j|}|jj|d5}t j |}dddt ddt|_|j|j_ |S#1swYBxYw)Nr^rr) rgrjr\rrrrr#rr)rCrXrYr^r!rrs rF read_metadatazFileSystemReader.read_metadataszz&$'&&t, WW " "4 . 2-{{=1H 2 8^T 2 :$/MH !(, % 2 2s BB rrdc|j|_|jdd|_|jdd|_|jJyrf)rrgr^r_)rCrrdrXrYs rFset_up_storage_readerz&FileSystemReader.set_up_storage_readersJ%11JJvt, %zz*;TB  ,,,rPc|Sr@rOrvs rFrwz#FileSystemReader.prepare_local_plans rPrxc|Sr@rO)rCrxs rFr{z$FileSystemReader.prepare_global_plans rPc|jS)zT return the checkpoint_id that will be used to load the checkpoint. rrgs rFr1zFileSystemReader.checkpoint_idrrPc,tj|Sr@rr3s rFr5z'FileSystemReader.validate_checkpoint_idrrPr@)"rHrIrJrrLrr:rrrsr:rrrrcr%r&r1rrNrjrr!rrrrwrr{rr1r;r5rrs@rFr3r3As<@ $C$% $&&78 $  $T|T5 T(5bkk4)?#@(D( 9h999vDx}D D 3 # ( - -26-?B-NQ- -xHhDNuS"++%56 @5bkk9I3J@t@@rPr3ceZdZdZdddddddej fdeeejfde d e d e d e d e d e de e ededdfdZdedeffd ZxZS)r2a Basic implementation of StorageWriter using file IO. This implementation makes the following assumptions and simplifications: * The checkpoint path is an empty or non-existing directory. * File creation is atomic The checkpoint consist of one file per write request plus a global `.metadata` file with the serialized metadata if rank coordination is enabled. a rank local `__{rank}.metadata` file with the serialized metadata if rank coordination is NOT enabled. TrrRFNr!rSrTrrUcache_staged_state_dictrVrWrrYc ntj|||||||||  tj||y)a Initialize the writer pointing to `path`. Args: path: directory where the checkpoint will be written to. single_file_per_rank: Produce one file per rank instead of one file per tensor/blob. Default to True. sync_files : force files to be synced to permanent storage. Default to True. thread_count: Number of IO threads to use to write. Default to 1. per_thread_copy_ahead: How many bytes to copy from the GPU ahead of saving then. Default 10Mb. cache_staged_state_dict: Whether to cache the staged state_dict. This option decreases staging latency at the cost of increases memory usage. Additionally, if this parameter is set to True, it's the expectation that the stager is maintained and reused for multiple dcp.async_save calls. Default to False. overwrite: Whether to allow overwriting existing checkpoints. Defaults to True. _extensions: Extensions to apply to output streams (EXPERIMENTAL) N. B. If sync_files is disabled, there's no guarantee that the checkpoint will be consistent in the case of a failure. )r!rSrTrrUrVrWr)rN)rQrsr,) rCr!rSrTrrUrrVrWrs rFrszFileSystemWriter.__init__sH: "" !5!%"7#!5 #  $$ $; rP state_dictc0d|_t| |S)zOverride of AsyncStager.stager)rUr[stage)rCrr`s rFrzFileSystemWriter.stages&'"w}Z((rP)rHrIrJrKr6rWrrLrr:rrNrrrrsr"rrrs@rFr2r2s "&*%/(-DH4G4R4R+ C$%+ #+  +  + # + "&+ + h'?@A+ 2+  + Z))O))rPr2)jrrtrr rrrrrr[rrabcrrcollections.abcrrrr contextlibr r enumr r pathlibr typingrrrrrrrtyping_extensionsrrnr torch._utilsrrtorch.distributed._shard._utilsr'torch.distributed.checkpoint._extensionrr&torch.distributed.checkpoint._hf_utilsrrrrr %torch.distributed.checkpoint.metadatar!r"r#$torch.distributed.checkpoint.plannerr$r%r&r'r(r)r*r+$torch.distributed.checkpoint.stagingr,$torch.distributed.checkpoint.storager-r.r/"torch.distributed.checkpoint.utilsr0 torch.futuresr1__all__r7rLrMr8r:rRr6r~r]r_rprrrNrrrrrrrrrr5r4rQrr3r2rOrPrFrs   #CC%!#BBB% GBYX   E A   c")U3Z)  I I  I   $   C  }.W"MW"t0Y0Yf4Y434#d9o$tIBW0/(/ II/  ELL( )/ /  / . //df f  f ++f  f ) f  f f f .f  f RBSBD>0>0Bi@ i@X4{@}{@|A)(*=A)rP