L iO$4dZddlmZmZmZddlZddlmZddlmZm Z m Z m Z m Z m Z mZmZmZmZmZmZmZmZddgZGd deZd d ed ed e d ed e d ze_deedeedeedeedeedeededededededededededef dZdeedeedeedeedeedeededededededededededef d Ze e! d$deedeedeedeedeedeed"eededededededededededef"d#Zy)%z)Implementation for the RMSprop algorithm.)castOptionalUnionN)Tensor)_capturable_doc_default_to_fused_or_foreach_differentiable_doc_disable_dynamo_if_unsupported _foreach_doc!_get_capturable_supported_devices_get_scalar_dtype _maximize_doc _params_doc _to_scalar_use_grad_for_differentiable _view_as_real OptimizerParamsTRMSproprmspropceZdZ ddedeeefdedededededed eed ed effd Z fd Z dZ e ddZ xZS)rparamslralphaeps weight_decaymomentumcentered capturableforeachmaximizedifferentiablec Zt|tr|jdk7r tdd|kstd|d|kstd|d|kstd|d|kstd|d|kstd||||||||| | | d } t ||| y) NrzTensor lr must be 1-elementgzInvalid learning rate: zInvalid epsilon value: zInvalid momentum value: zInvalid weight_decay value: zInvalid alpha value: ) rrrrrrr r!r"r#) isinstancernumel ValueErrorsuper__init__)selfrrrrrrrr r!r"r#defaults __class__s Y/mnt/ssd/data/python-lab/Trading/venv/lib/python3.12/site-packages/torch/optim/rmsprop.pyr)zRMSprop.__init__s b& !bhhjAo:; ;by6rd;< <cz6se<= =h7zBC Cl";L>JK Ke|4UG<= =  ($ ,   *c|t|||jD]}|jdd|jdd|jdd|jdd|jdd|jdd|d D]}|jj |g}t |dk7s.tj|d rGt|d }|dr*tj|t|j ntj|t |d <y) NrrrFr!r"r#r rstepdtypedevicer2) r( __setstate__ param_groups setdefaultstategetlentorch is_tensorfloattensorrr3)r*r8grouppp_statestep_valr,s r-r5zRMSprop.__setstate__Hs U#&& E   Z +   Z /   Y -   Z /   -u 5   \5 18_ **..B/w<1$U__WV_-M$WV_5H !. $,=,?#\\(:K:MN FO  r.cd}|dD]} | j|tj| z}|j| | jjr t d|j| j|j | } t| dk(r|dr*tjdt| jntjdt| d <tj| tj | d <|d dkDr(tj| tj | d <|dr(tj| tj | d<|j| d |j| d |d dkDr|j| d |ds|j| d|S)NFrz)RMSprop does not support sparse gradientsrr r1r4r0) memory_format square_avgrmomentum_bufferrgrad_avg) gradr; is_complexappend is_sparse RuntimeErrorr8r:zerosrr3 zeros_likepreserve_format) r*r?params_with_gradgrads square_avgsmomentum_buffer_list grad_avgs state_steps has_complexr@r8s r- _init_groupzRMSprop._init_group]s x$ 4Avv~ 5++A. .K  # #A &vv"#NOO LL JJqME5zQ\*KK*;*=ahhOR/@/BCf ',&6&6U%:%:'l#$q(/4/?/?)>)>0E+,$(-(8(8)>)>)E*%   u\2 3   uV} -Z 1$$++E2C,DEZ   z!23I$ 4Lr.cj|jd}|$tj5|}ddd|jD]_}g}g}g}g}g}g} |j ||||||| } t |||||| |d|d|d|d|d|d|d|d |d |d | a|S#1swYyxYw) zPerform a single optimization step. Args: closure (Callable, optional): A closure that reevaluates the model and returns the loss. Nrrrrrrr!r"r#r ) rrrrrrr!r"r#r rW) _cuda_graph_capture_health_checkr; enable_gradr6rXr) r*closurelossr?rQrRrSrUrTrVrWs r-r0z RMSprop.steps --/  ""$ !y !&&$ E-/ "$E(*K&(I13 (*K** $K  $;Gn%L">2z*z*i(z*$%56 .'# %$ L S ! !s B))B2) g{Gz?gGz?g:0yE>rrFFNFFN)__name__ __module__ __qualname__rrr=rboolrr)r5rXrr0 __classcell__)r,s@r-rrs$( "&$'+'+ %- '+ '+  '+  '+'+'+'+$'+'+'+R*1f"4"4r.aj Implements RMSprop algorithm. .. math:: \begin{aligned} &\rule{110mm}{0.4pt} \\ &\textbf{input} : \alpha \text{ (alpha)}, \: \gamma \text{ (lr)}, \: \theta_0 \text{ (params)}, \: f(\theta) \text{ (objective)} \\ &\hspace{13mm} \lambda \text{ (weight decay)},\: \mu \text{ (momentum)}, \: centered, \: \epsilon \text{ (epsilon)} \\ &\textbf{initialize} : v_0 \leftarrow 0 \text{ (square average)}, \: \textbf{b}_0 \leftarrow 0 \text{ (buffer)}, \: g^{ave}_0 \leftarrow 0 \\[-1.ex] &\rule{110mm}{0.4pt} \\ &\textbf{for} \: t=1 \: \textbf{to} \: \ldots \: \textbf{do} \\ &\hspace{5mm}g_t \leftarrow \nabla_{\theta} f_t (\theta_{t-1}) \\ &\hspace{5mm}if \: \lambda \neq 0 \\ &\hspace{10mm} g_t \leftarrow g_t + \lambda \theta_{t-1} \\ &\hspace{5mm}v_t \leftarrow \alpha v_{t-1} + (1 - \alpha) g^2_t \hspace{8mm} \\ &\hspace{5mm} \tilde{v_t} \leftarrow v_t \\ &\hspace{5mm}if \: centered \\ &\hspace{10mm} g^{ave}_t \leftarrow g^{ave}_{t-1} \alpha + (1-\alpha) g_t \\ &\hspace{10mm} \tilde{v_t} \leftarrow \tilde{v_t} - \big(g^{ave}_{t} \big)^2 \\ &\hspace{5mm}if \: \mu > 0 \\ &\hspace{10mm} \textbf{b}_t\leftarrow \mu \textbf{b}_{t-1} + g_t/ \big(\sqrt{\tilde{v_t}} + \epsilon \big) \\ &\hspace{10mm} \theta_t \leftarrow \theta_{t-1} - \gamma \textbf{b}_t \\ &\hspace{5mm} else \\ &\hspace{10mm}\theta_t \leftarrow \theta_{t-1} - \gamma g_t/ \big(\sqrt{\tilde{v_t}} + \epsilon \big) \hspace{3mm} \\ &\rule{110mm}{0.4pt} \\[-1.ex] &\bf{return} \: \theta_t \\[-1.ex] &\rule{110mm}{0.4pt} \\[-1.ex] \end{aligned} For further details regarding the algorithm we refer to `lecture notes `_ by G. Hinton. and centered version `Generating Sequences With Recurrent Neural Networks `_. The implementation here takes the square root of the gradient average before adding epsilon (note that TensorFlow interchanges these two operations). The effective learning rate is thus :math:`\gamma/(\sqrt{v} + \epsilon)` where :math:`\gamma` is the scheduled learning rate and :math:`v` is the weighted moving average of the squared gradient. z Args: a0 lr (float, Tensor, optional): learning rate (default: 1e-2) alpha (float, optional): smoothing constant (default: 0.99) eps (float, optional): term added to the denominator to improve numerical stability (default: 1e-8) weight_decay (float, optional): weight decay (L2 penalty) (default: 0) momentum (float, optional): momentum factor (default: 0) centered (bool, optional) : if ``True``, compute the centered RMSProp, the gradient is normalized by an estimation of its variance z z rrRrSrUrTrVrrrrrrr"r#r rWc tjjs t|}t |D]'\}}||}tj j s\|rZt}|jj|jjk(r|jj|vs Jd|d||}| s|n| }||}|dz }| dk7r|j|| }tj|}|r?tj|}tj|}tj|}|j|j||d|z | rT||}|rtj|}|j|d|z |j!||dj#}n|j%}| r|j|}n|j'|}| dkDrS||}|rtj|}|j| j)|||j'|| |j)||| *y)NIIf capturable=True, params and state_steps must be on supported devices: .rrrvalue)r;jit is_scriptingr enumeratecompiler is_compilingr r3typeaddrJ view_as_realmul_addcmul_lerp_addcmulsqrt_sqrtadd_addcdiv_)rrRrSrUrTrVrrrrrrr"r#r rWiparamr0capturable_supported_devicesrIrFis_complex_paramrHavgbufs r-_single_tensor_rmspropr s2& 99 ! ! # ^f%3151~~~**,+L+N ( !!T[[%5%55LL%%)EE \\x[yyz{  F Qx#t$ ^    1 88E86D ++E2 &&u-E%%d+D++J7J''d!e)'D  |H --h7 NN4U +$$Xxr$BHHJC//#C ''#,C((3-C a<&q)C((- HHX  ' 'c 2 JJs2#J & NN4RCN 0g31r.c H!t|dk(ry| rJdtjjs5|r3t !t !fdt ||Ds Jd!dt|}tj||||||g}|jD]|\\}}}}}}}ttt|}ttt|}ttt|}ttt|}|rg||g}| dkDr(ttt|}|j|| r(ttt|}|j|t|g|| rtj |}tjjs=|dj"r.tj$|tj&ddd ntj$|d | dk7r3| rtj$||| ntj(||| }tj*||tj,|||d |z | rvttt|}tj.||d |z tj0|||d }tj2|tj$||n+tj4|}tj$||| dkDrttt|}tj*|| tj6||||rIt9|tjr/tj:|| } tj$|| tj$||| |rJt9|tjr0tj<|| tj6|||ctj6|||| y) Nrz#_foreach ops don't support autogradc3K|]N\}}|jj|jjk(xr|jjvPywr^)r3rp).0r@r0r}s r- z(_multi_tensor_rmsprop..psQ 4 HHMMT[[-- - > !== > sAArerfg?cpu)r3rgrrhrj)r:r;rnror allziprr"_group_tensors_by_device_and_dtypevaluesrlistrrKr _foreach_negis_cpu _foreach_add_r> _foreach_add _foreach_mul__foreach_addcmul__foreach_lerp__foreach_addcmul_foreach_sqrt_ _foreach_sqrt_foreach_addcdiv_r% _foreach_mul _foreach_div_)"rrRrSrUrTrVrrrrrrr"r#r rWgrouped_tensorsgrouped_params_grouped_grads_grouped_square_avgs_grouped_grad_avgs_grouped_momentum_buffer_list_grouped_state_steps__grouped_params grouped_gradsgrouped_square_avgsgrouped_state_stepsstate_and_gradsgrouped_momentum_buffer_listgrouped_grad_avgsr momentum_lrr}s" @r-_multi_tensor_rmsproprUs& 6{aDDD  >> & & (Z'H'J$ v{3   XXtWuuv w    BBBB  Y0DkRO  " " $YW     ) d6lO<T&\>: "4<1EF"4<1EF ,.ABO!|/3L"?0, &&'CD$(f7I$J!&&'89 . ;? ; !..}=M ~~**,1DQ1G1N1N   #U\\#e%DC     3Q 7 1 ##M>V % 2 2!>!  /7   QY   $T&\3E F   !2M1u9 M((#%68IQSC   %   S )%%&9:C   S ) a<+/V ;, (    .s 5() 1ell#5s$&zPAPI has changed, `state_steps` argument must contain a list of singleton tensorsNF) use_fusedz6torch.jit.script not supported with foreach optimizers) rrrrrrr"r r#rW) r;rnrorrMr rkrlrr)rrRrSrUrTrVr!r"r#r rWrrrrrrrfuncs r-rrs: >> & & (5-852 ^  1 Ne 7599))+STTuyy--/$%   !%!r.)NFFFF)__doc__typingrrrr;r optimizerrr r r r r rrrrrrrr__all__rrr=rbrrrrDr.r-rs?0(( $ i gigV+X       Y<BI1 LI1 <I1fI1F| I1 v, I1 f I1 I1 I1 I1I1I1I1I1I1 !I1"#I1XAW LAW <AWfAWF| AW v, AW f AW AW AW AWAWAWAWAWAW !AW"#AWH 1GH# A LA <AfAF| A v, A f Ad^AAAAA A !A" #A$%A&'A()AIAr.