L i?""ddlmZmZmZddlZddlmZddlmZmZm Z m Z m Z m Z m Z mZmZmZmZmZmZmZmZddgZGddeZd ed e d ed e d ed e_d eedeedeedeedeedeedededededededededefdZd eedeedeedeedeedeedededededededededefdZe e d!d eedeedeedeedeedeedeedededededededededef d Zy)")castOptionalUnionN)Tensor)_capturable_doc_default_to_fused_or_foreach_differentiable_doc_disable_dynamo_if_unsupported _foreach_doc!_get_capturable_supported_devices_get_scalar_dtype _get_value _maximize_doc _params_doc _to_scalar_use_grad_for_differentiable _view_as_real OptimizerParamsTASGDasgdceZdZ ddedeeefdededededeeded ed effd Z fd Z d Z e ddZ xZS)rparamslrlambdalphat0 weight_decayforeachmaximizedifferentiable capturablec t|tr|jdk7r tdd|kstd|d|kstd||||||||| | d } t ||| y)NrzTensor lr must be 1-elementgzInvalid learning rate: zInvalid weight_decay value: ) rrrrrr r!r"r#) isinstancernumel ValueErrorsuper__init__) selfrrrrrrr r!r"r#defaults __class__s V/mnt/ssd/data/python-lab/Trading/venv/lib/python3.12/site-packages/torch/optim/asgd.pyr)z ASGD.__init__s b& !bhhjAo:; ;by6rd;< <l";L>JK K( ,$   *ct|||jD]f}|jdd|jdd|jdd|jdd|dD]}|jj |g}t |dk7s/tj|ds;t|d}tj|t|j |d<tj|d s0tj|d t|j |d <tj|d rtj|d t|j |d <iy) Nr r!Fr"r#rrstep)dtypedeviceetamu) r( __setstate__ param_groups setdefaultstategetlentorch is_tensorfloattensorrr2)r*r8grouppp_statestep_valr,s r-r5zASGD.__setstate__?sF U#&& E   Y -   Z /   -u 5   \5 18_ **..B/w<1$ ??76?;#(#9*/,,$,=,?+!??75>:).#EN2C2Eahh*!??74=9(- #DM1B1DQXX)    r.cd}|dD]} | j|tj| z}|j| | jjr t d|j| j|j | } t| dk(rtjd| jt| d<tjt|d| jtjj| d <tjd| jt| d <tj | tj" | d <|j| d |j| d |j| d |j| d|S) NFrz&ASGD does not support sparse gradientsr)r2r1r0rr3r4) memory_formatax)gradr; is_complexappend is_sparse RuntimeErrorr8r:zerosr2r as_tensorrclonedetachones zeros_likepreserve_format) r*r?params_with_gradgradsmusaxsetas state_steps has_complexr@r8s r- _init_groupzASGD._init_groupWs{ x! 2Avv!u//22  ''*66##&'OPP QVV$ 1 u:?$)KK1883D3F%E&M&uT{3#$88"3"5 %L#(**1883D3F#E$K#("2"2)>)>#E$K 5;' 5;' E%L)""5=1C! 2Dr.cb|jd}|$tj5|}ddd|jD][}g}g}g}g}g}g} |j ||||||| } t |||||| |d|d|d|d|d|d|d|d |d | ]|S#1swYuxYw) zPerform a single optimization step. Args: closure (Callable, optional): A closure that reevaluates the model and returns the loss. Nrrrrrr r!r"r#) rrrrrr r!r"r#rY) _cuda_graph_capture_health_checkr; enable_gradr6rZr) r*closurelossr?rSrTrUrVrWrXrYs r-r0z ASGD.step}s --/  ""$ !y !&& E-/ "$E "C "C!#D(*K**'S$ K  Gn;;Gn">2i(z*$%56 .'!  > E ! !s B%%B.) g{Gz?g-C6?g?g.ArNFFFN)__name__ __module__ __qualname__rrr=rrboolr)r5rZrr0 __classcell__)r,s@r-rrs$("&$ ++ %- + +  +  ++$++++B0$L"-"-r.zImplements Averaged Stochastic Gradient Descent. It has been proposed in `Acceleration of stochastic approximation by averaging`_. Args: am lr (float, Tensor, optional): learning rate (default: 1e-2) lambd (float, optional): decay term (default: 1e-4) alpha (float, optional): power for eta update (default: 0.75) t0 (float, optional): point at which to start averaging (default: 1e6) weight_decay (float, optional): weight decay (L2 penalty) (default: 0) z z .. _Acceleration of stochastic approximation by averaging: https://meyn.ece.ufl.edu/wp-content/uploads/sites/77/archive/spm_files/Courses/ECE555-2011/555media/poljud92.pdf rrTrVrUrWrXrrrrrr!r"r#rYc (tjjs t|}t |D]\}}||}| s|n| }||}||}||}||}tj j s| rt}|jj|jjcxk(r3|jjcxk(r|jjk(rnn|jj|vs Jd|dtj|r?tj|}tj|}tj|}|dz }| dk7r|j|| }| r,|jd||zz |j||dn6t|}|jd||zz |j!|| | s|j#dk7r0|j!|j%|j|n|j'|| r`|j'|d||z|zz| zz |j'dtj(||z tj*|z et|}tj,|d||z|zz| zz }|j'|tj,dt/d||z z }|j'|y)NUIf capturable=True, params, mus, etas, and state_steps must be on supported devices: .rrrvalue)r;jit is_scriptingr enumeratecompiler is_compilingr r2typerH view_as_realaddmul_addcmul_radd_itemsubcopy_maximum ones_likerMmax)rrTrVrUrWrXrrrrrr!r"r#rYiparamrGr4rFr3step_tcapturable_supported_devices eta_valuer0new_etanew_mus r-_single_tensor_asgdrs$ 99 ! ! # ^f%75Qx#t$ V V1gQ~~**,+L+N ( !!99>>&::??&==%%&LL%%)EE ))E(FaI F   E "%%d+D&&u-E##B'B !  1 88E86D  JJq53; ' NN4BN /"3I JJq59,, - JJtI:J . a GGEIIbM&&r* + HHUO  IIbQf!44>? @ HHQv{EOOF4KLL Mf%DoobQd1B-Bu,L&MNG IIg __QQr ):%:;F HHV o7r.c \"t|dk(ry| rJdtjjs9| r7t d"t "fdt ||||Ds Jd"dt|}tj||||||g}|jD]x\\}}\\}}}}}}}ttt|}ttt|}ttt|}ttt|}ttt|}ttt|}|r t|||| rtj|}tjjs=|dj r.tj"|tj$dd d ntj"|d | dk7rN| rtj"||| |}ntj&||| }tj"||| ntj&||| }tj(|||d ~tj*||}tj(|||~| rtj*||}tj,|dtj.|tj0||~tj2||} tj4| |tj"| d tj6| | tj.| tj4| |tj0|| |D!cgc](}!tj8|d ||z|!zz| zz | *} }!|D!cgc]2}!tj8d t;d t=|!|z z | 4}}!tj0|| tj0||{ycc}!wcc}!w)Nrz#_foreach ops don't support autogradF) supports_xlac3,K|]\}}}}|jj|jjcxk(xr5|jjcxk(xr|jjk(ncxr|jjvywr`)r2rr).0r@r4r3r0rs r- z%_multi_tensor_asgd..0so !2sD HHMMRYY^^ Rszz R$++BRBR R > !== > sBBrgrhg?cpu)r2rirrjrk)r:r;rprqr allziprr"_group_tensors_by_device_and_dtypeitemsrlistrr _foreach_negis_cpu _foreach_add_r> _foreach_add_foreach_addcmul_ _foreach_sub_foreach_maximum__foreach_reciprocal__foreach_copy_ _foreach_mul _foreach_mul_ _foreach_pow_rMr}r)#rrTrVrUrWrXrrrrrr!r"r#rYgrouped_tensorsr2_grouped_params_grouped_grads_ grouped_axs_ grouped_mus_ grouped_etas_grouped_state_steps_grouped_params grouped_grads grouped_axs grouped_mus grouped_etasgrouped_state_steps intermediatenew_musnew_etasr0rs# @r-_multi_tensor_asgdrs$ 6{aDDD  >> & & (Z'H( $ %(T;$G   deAdBBC D    BBBB S$ 4O    c7          d6lO<T&\>: 4<6 4<6 DL-8 "4<1EF  .- E !..}=M ~~**,1DQ1G1N1N   #U\\#e%DC     3Q 7 1 ##M>V, $11!>      nE J --~UL  lRTU )).+F   \;G  (()rs(( & 6 N9Nb       .L LL <L fL f L v, L f L L L L LLLLL !L^L7 LL7 <L7 fL7 f L7 v, L7 f L7 L7 L7 L7 L7L7L7L7L7 !L7^ 1DE# 6 L6 <6 f6 f 6 v, 6 f 6d^66666 6 !6" #6$ %6&'6F6r.