L i+&ddlZddlmZddlmZddlmZddlZddlm cm Z ddl m cm Z ddlmZddlmZddlmZddl mZddlmZd gZej.eZej4Gd d ZGd d ej8ZGddZdZdZ dZ!ejDdeede#ddfdZ$dZ%Gdd Z&y)N) defaultdict)Lock)Optional)Tensor)RRef)functional_optim_mapDistributedOptimizerceZdZdeddfdZy)_ScriptLocalOptimizerInterfaceautograd_ctx_idreturnNcyN)selfr s g/mnt/ssd/data/python-lab/Trading/venv/lib/python3.12/site-packages/torch/distributed/optim/optimizer.pystepz#_ScriptLocalOptimizerInterface.step#s )__name__ __module__ __qualname__intrrrrr r !s C D rr cVeZdZeZfdZejdefdZ xZ S)_ScriptLocalOptimizerct||Dcgc]}|jc}|_||jg|i||_ycc}wr)super__init__ local_value _local_paramsoptim)r optim_clslocal_params_rrefargskwargsrref __class__s rrz_ScriptLocalOptimizer.__init__-sL =NOTd..0Ot11CDCFC PsAr ctj|}|jDcgc] }||vr||nd}}|jj |ycc}wr) dist_autograd get_gradientsr r!r)rr all_local_gradspgradss rrz_ScriptLocalOptimizer.step2s_'55oF'') #$"6OA D @) )  ) sA) rrrr compile_lockrjitexportrr __classcell__)r's@rrr's16LD  ZZCrrc&eZdZeZdZdZy)_LocalOptimizerc|Dcgc]}|jc}|_||jg|i||_ycc}wr)rr r!)rr"r#r$r%r&s rrz_LocalOptimizer.__init__Js@=NOTd..0Ot11CDCFC PsActj|}tj5|j D] \}}||_|j jdddy#1swYyxYwr)r)r*r3 global_lockitemsgradr!r)rr r+paramr8s rrz_LocalOptimizer.stepNsa'55oF  ( ( .446 " t!  " JJOO    s :A))A2N)rrrrr6rrrrrr3r3@s&KDrr3cHtjt||g|i|Sr)rpcrr3)r"r#r$r%s r_new_local_optimizerr<Ws# 88OI/@R4R6R SSrcF|j}|j|yrrrlocal_optim_rrefr local_optims r_local_optimizer_steprB[s"..0K_%rct||g|i|}tj5tj|}t j |t cdddS#1swYyxYwr)rr.r/scriptr;rr )r"r#r$r%r! script_optims r_new_script_local_optimizerrFasX !)-> P P PE  + +Fzz%( xx &DEFFFs /AA#r@r rcF|j}|j|yrr>r?s r_script_local_optimizer_steprHis #..0K_%rcd}g}|D]"} |j|j$|||S#t$r}|j||}Yd}~Ld}~wwxYwr)appendwait Exception)rpc_futs exceptionresultsfutes r _wait_for_allrRqsgIG  NN388: &  N   NN1 I s2 AAAceZdZdZdZdZy)r ah DistributedOptimizer takes remote references to parameters scattered across workers and applies the given optimizer locally for each parameter. This class uses :meth:`~torch.distributed.autograd.get_gradients` in order to retrieve the gradients for specific parameters. Concurrent calls to :meth:`~torch.distributed.optim.DistributedOptimizer.step`, either from the same or different clients, will be serialized on each worker -- as each worker's optimizer can only work on one set of gradients at a time. However, there is no guarantee that the full forward-backward-optimizer sequence will execute for one client at a time. This means that the gradients being applied may not correspond to the latest forward pass executed on a given worker. Also, there is no guaranteed ordering across workers. `DistributedOptimizer` creates the local optimizer with TorchScript enabled by default, so that optimizer updates are not blocked by the Python Global Interpreter Lock (GIL) in the case of multithreaded training (e.g. Distributed Model Parallel). This feature is currently enabled for most optimizers. You can also follow `the recipe`__ in PyTorch tutorials to enable TorchScript support for your own custom optimizers. Args: optimizer_class (optim.Optimizer): the class of optimizer to instantiate on each worker. params_rref (list[RRef]): list of RRefs to local or remote parameters to optimize. args: arguments to pass to the optimizer constructor on each worker. kwargs: arguments to pass to the optimizer constructor on each worker. Example:: >>> # xdoctest: +SKIP("distributed") >>> import torch.distributed.autograd as dist_autograd >>> import torch.distributed.rpc as rpc >>> from torch import optim >>> from torch.distributed.optim import DistributedOptimizer >>> >>> with dist_autograd.context() as context_id: >>> # Forward pass. >>> rref1 = rpc.remote("worker1", torch.add, args=(torch.ones(2), 3)) >>> rref2 = rpc.remote("worker1", torch.add, args=(torch.ones(2), 1)) >>> loss = rref1.to_here() + rref2.to_here() >>> >>> # Backward pass. >>> dist_autograd.backward(context_id, [loss.sum()]) >>> >>> # Optimizer. >>> dist_optim = DistributedOptimizer( >>> optim.SGD, >>> [rref1, rref2], >>> lr=0.05, >>> ) >>> dist_optim.step(context_id) __ https://github.com/pytorch/tutorials/pull/1465 cJtjjdtt}|D]$}||j j |&|tvr0tjjrtj|}n|}||k7|_ |jrt}ntjd|t }g} |j#D]4\} } t%j&| ||| f|z|} | j | 6t)| |_y)Nz,torch.distributed.optim.DistributedOptimizera$Creating the optimizer %s without TorchScript support, this might result in slow computation time in multithreading environment(i.e. Distributed Model Parallel training on CPU) due to the Python's Global Interpreter Lock (GIL). Please file an issue if you need this optimizer in TorchScript. )r$r%)torch_C_log_api_usage_oncerlistownerrJr r/_state_enabledgetis_functional_optimrFloggerwarningr<r7r; rpc_asyncrRremote_optimizers) roptimizer_class params_rrefr$r%per_worker_params_rrefr9 optim_ctoroptimizer_new_funcremote_optim_futsworker param_rrefsremote_optim_rref_futs rrzDistributedOptimizer.__init__s $$%ST!,T!2  @E "5;;= 1 8 8 ? @ 2 2szz7J7J-11/BJ(J#-#@  # #!<  NN-   "6 #9#?#?#A < FK$'MM" +.5 % !  $ $%: ; <"//@!Arc tj||jrtnt}|j Dcgc]*}t j|j|||f,}}t|ycc}w)a% Performs a single optimization step. This will call :meth:`torch.optim.Optimizer.step` on each worker containing parameters to be optimized, and will block until all workers return. The provided ``context_id`` will be used to retrieve the corresponding :class:`~torch.distributed.autograd.context` that contains the gradients that should be applied to the parameters. Args: context_id: the autograd context id for which we should run the optimizer step. )r$N) r)_is_valid_contextr]rHrBrar;r`rYrR)r context_idoptimizer_step_func optimizerrMs rrzDistributedOptimizer.steps~ '' 3'' )& "33   MM!#,    h s/A9N)rrr__doc__rrrrrr r s9v#BJ r)'logging collectionsr threadingrtypingrrUtorch.distributed.autograd distributedautogradr)torch.distributed.rpcr; torch.jitr/torch.nnnnrrutilsr __all__ getLoggerrr^ interfacer Modulerr3r<rBrFrDrrHrRr rrrrs# 22##&' " "   8 $   BII2.T& F&9:&MP& & &   r