L iJdZddlmZddlZddlZddlZddlmZddlm Z ddl m Z m Z ddl mZmZmZmZmZmZmZmZmZddlmZmZdd lmZdd lmZmZd d lm Z m!Z!erdd l"m#Z#m$Z$gdZ%dZ&d:dZ'GddZ(d;dZ)Gdde(Z*GddZ+GddZ,Gdde(Z-Gdde(Z.Gdde(Z/Gd d!e(Z0Gd"d#e(Z1Gd$d%e(Z2Gd&d'e(Z3Gd(d)e(Z4Gd*d+e(Z5Gd,d-e(Z6Gd.d/e(Z7Gd0d1e(Z8Gd2d3e(Z9Gd4d5e(Z:Gd6d7eZ;Gd8d9e(Z._copyCs!+FF!;v||~GGz6 must have the same length as optimizer.param_groups.  has z values, param_groups has .)r.listtuplelen param_groups ValueErrormap)name optimizerparamr2s r1 _format_paramr?@sH%$' u:Y334 4&N&c%j\)CC H^H^D_C``ac  #i4455 E5! ""r3ceZdZUdZdZded<dZded< d ddZddZddZ dd Z dd Z dd Z ddd Z dddZy )r*z.Adjusts the learning rate during optimization.Fbool_get_lr_called_within_step _is_initialct|ts!tt|jd|_|dk(rI|j D]9}|d}t|tr|j}|jd|;n0t|j D]\}}d|vs td|d|j Dcgc]}|d c}_ |_ dfd }|j jycc}w) N is not an Optimizerlr initial_lrz4param 'initial_lr' is not specified in param_groups[z] when resuming an optimizerct|jdr |jSfd}||j|_y)N_wrapped_by_lr_schedctj|jtfd}d|_|S)Ncd}d|_j||j|i|SNT) _opt_called__get__ __class__)argskwargsoptfuncopt_refs r1wrapperzYLRScheduler.__init__..patch_track_step_called..wrap_step..wrappers2!)C&*CO;4<<S]];TLVLLr3T)rr=__func__rrJ)step_fnrVrTrUselfs @@r1 wrap_stepzHLRScheduler.__init__..patch_track_step_called..wrap_step}sCdnn-''tMM 04,r3)hasattrstep)rSrZrYs r1patch_track_step_calledz5LRScheduler.__init__..patch_track_step_calledxs2sxx!78xx !*CHr3)rSr)r.r TypeErrortype__name__r=r9rr/ setdefault enumerateKeyErrorbase_lrs last_epoch _initial_step)rYr=regrouprHir]s` r1__init__zLRScheduler.__init__Xs )Y/tI7788LMN N"  "// ;"4[ j&1!+!1!1!3J  z:  ; &i&<&<= 5u,"++,#-IK .7-C-C& $)E, &  %  +( / ;& s D crd|_t|5|jdddy#1swYyxYw)z*Initialize step counts and perform a step.rN) _step_count _initial_moder\rYs r1rfzLRScheduler._initial_steps0 4   IIK   s-6cv|jjDcic]\}}|dk7s ||c}}Scc}}w)zReturn the state of the scheduler as a :class:`dict`. It contains an entry for every variable in self.__dict__ which is not the optimizer. r=)__dict__items)rYkeyvalues r1 state_dictzLRScheduler.state_dicts=*.)<)<)> %3#BTCJ   s 55c:|jj|y)Load the scheduler's state. Args: state_dict (dict): scheduler state. Should be an object returned from a call to :meth:`state_dict`. N)roupdaterYrss r1load_state_dictzLRScheduler.load_state_dicts Z(r3c|jS)z8Return last computed learning rate by current scheduler.)_last_lrrms r1 get_last_lrzLRScheduler.get_last_lrs }}r3ct)z>..0FG ] T^^]EB ]   A   MM3[ A r3cDt|5|&|xjdz c_|j}nI||_t|dr&t t t |j}n|j}dddt|jjD];\}}t|dtr|djt|7||d<=|jjDcgc]}|d c}|_y#1swYxYwcc}w)Nr_get_closed_form_lrrG)_enable_get_lr_callrer~r[r r6floatrzipr=r9r.rfill_rrz)rYrvalues param_grouprGrgs r1rzLRScheduler._update_lrs  & +}1$"'4!67!$u+t/G/G/IJF![[]F + #4>>#>#>G 'OK+d+V4D!'' 27$& D!  '&*^^%@%@& !E$K&  # + +"& sA2D< DDrF)r=rreintreturnNonerrrdict[str, Any])rsrrz list[float]r-)r Optional[int]rr)rr)r` __module__ __qualname____doc__rB__annotations__rCrirfrsrxr{r~r\rr3r1r*r*Rsd9',,K 555  5n  )"> r3r*cV|jstjdtdyy)NzTTo get the last learning rate computed by the scheduler, please use `get_last_lr()`.) stacklevel)rBrrr) lr_schedulers r1_warn_get_lr_called_within_steprs'  2 2  *    3r3c eZdZy) _LRSchedulerN)r`rrrr3r1rrsr3rc$eZdZddZddZddZy)rc||_yr-orYrs r1riz_enable_get_lr_call.__init__ r3c(d|j_|SrMrrBrms r1 __enter__z_enable_get_lr_call.__enter__s,0) r3c&d|j_yNFrrYr_rr tracebacks r1__exit__z_enable_get_lr_call.__exit__s,1)r3N)rr*rr)rrrr`rrrirrrr3r1rrs2r3rc eZdZddZdZdZy)rlc||_yr-rrs r1riz_initial_mode.__init__rr3c&d|j_yrMrrCrms r1rz_initial_mode.__enter__s!r3c&d|j_yrrrs r1rz_initial_mode.__exit__ s"r3N)rr*rrr3r1rlrls"#r3rlcleZdZdZ d dfd ZeddZed dZed dZxZ S) ra"Sets the initial learning rate. The learning rate of each parameter group is set to the initial lr times a given function. When last_epoch=-1, sets initial lr as lr. Args: optimizer (Optimizer): Wrapped optimizer. lr_lambda (function or list): A function which computes a multiplicative factor given an integer parameter epoch, or a list of such functions, one for each group in optimizer.param_groups. last_epoch (int): The index of last epoch. Default: -1. Example: >>> # xdoctest: +SKIP >>> # Assuming optimizer has two groups. >>> num_epochs = 100 >>> lambda1 = lambda epoch: epoch // 30 >>> lambda2 = lambda epoch: 0.95**epoch >>> scheduler = LambdaLR(optimizer, lr_lambda=[lambda1, lambda2]) >>> for epoch in range(num_epochs): >>> train(...) >>> validate(...) >>> scheduler.step() >>> >>> # Alternatively, you can use a single lambda function for all groups. >>> scheduler = LambdaLR(opt, lr_lambda=lambda epoch: epoch // 30) >>> for epoch in range(num_epochs): >>> train(...) >>> validate(...) >>> scheduler.step() .. image:: ../scripts/lr_scheduler_images/LambdaLR.png cr||_|t|ts/t|ts|gt |j z|_n^t |t |j k7r-tdt |j dt |t||_t|%||y)N Expected  lr_lambdas, but got ) r=r.r6r7r8r9 lr_lambdasr:superrirYr= lr_lambdarerPs r1rizLambdaLR.__init__0s # )T*:i3O(kC 0F0F,GGDO9~Y%;%;!<< I$:$: ;<>> # xdoctest: +SKIP >>> lmbda = lambda epoch: 0.95 >>> scheduler = MultiplicativeLR(optimizer, lr_lambda=lmbda) >>> for epoch in range(100): >>> train(...) >>> validate(...) >>> scheduler.step() .. image:: ../scripts/lr_scheduler_images/MultiplicativeLR.png c||_|t|ts/t|ts|gt |j z|_n^t |t |j k7r-tdt |j dt |t||_|j D].}t|rtdt|jt|5||y)Nrrz(lr_lambda should be a function, but got )r=r.r6r7r8r9rr:callabler^r_r`rrirs r1rizMultiplicativeLR.__init__s # )T*:i3O(kC 0F0F,GGDO9~Y%;%;!<< I$:$: ;<tI?W?W>XY  J/r3c\|jjDcic] \}}|dvr||}}}dgt|jz|d<t |jD]@\}}t |t jr!|jj|d|<B|Scc}}w)a1Return the state of the scheduler as a :class:`dict`. It contains an entry for every variable in self.__dict__ which is not the optimizer. The learning rate lambda functions will only be saved if they are callable objects and not if they are functions or lambdas. rNrrrs r1rszMultiplicativeLR.state_dicts#mm113 U55 J  %)6C,@#@ <  1 CGCb%"4"4502 0@0@0B <(- C rc|jd}|jj|||d<t|D]0\}}| |j|jj|2y)rurNrrs r1rxz MultiplicativeLR.load_state_dictsj ^^L1  Z($. <  , 9GC~$--44R8 9r3c:t||jsSt|j|jj Dcgc]\}}|d||j zc}}S|jj Dcgc]}|d c}Scc}}wcc}w2Compute the learning rate of each parameter group.rG)rrCrrr=r9re)rYrrgs r1r~zMultiplicativeLR.get_lrs (-%(9T9T$U E5d eDOO44  .2^^-H-HIEE$KI I  Js "B Brrrrrrrs@r1rrzsw6 00O0 0  00*99" J Jr3rcVeZdZdZ d dfd ZeddZddZxZS)raDecays the learning rate of each parameter group by gamma every step_size epochs. Notice that such decay can happen simultaneously with other changes to the learning rate from outside this scheduler. When last_epoch=-1, sets initial lr as lr. Args: optimizer (Optimizer): Wrapped optimizer. step_size (int): Period of learning rate decay. gamma (float): Multiplicative factor of learning rate decay. Default: 0.1. last_epoch (int): The index of last epoch. Default: -1. Example: >>> # xdoctest: +SKIP >>> # Assuming optimizer uses lr = 0.05 for all groups >>> # lr = 0.05 if epoch < 30 >>> # lr = 0.005 if 30 <= epoch < 60 >>> # lr = 0.0005 if 60 <= epoch < 90 >>> # ... >>> scheduler = StepLR(optimizer, step_size=30, gamma=0.1) >>> for epoch in range(100): >>> train(...) >>> validate(...) >>> scheduler.step() .. image:: ../scripts/lr_scheduler_images/StepLR.png cB||_||_t| ||yr-) step_sizegammarri)rYr=rrrerPs r1rizStepLR.__init__s"#  J/r3c8t||jdk(s|j|jzdk7r'|jjDcgc]}|d c}S|jjDcgc]}|d|j zc}Scc}wcc}w)rrrG)rrerr=r9rrYrgs r1r~z StepLR.get_lrs| (- OOq doo&F!&K-1^^-H-HIEE$KI I6:nn6Q6QRUd djj(RRJRs  B6Bc|jDcgc]+}||j|j|jzzz-c}Scc}wr-)rdrrerrYrs r1rzStepLR._get_closed_form_lrsC ==  djjT__%FG G   s0A皙?rF) r=rrrrrrerrrr r`rrrrirr~rrrs@r1rrsa@ 0 0 0 0  0  0SS r3rcTeZdZdZ d dfd ZeddZdZxZS)ra?Decays the learning rate of each parameter group by gamma once the number of epoch reaches one of the milestones. Notice that such decay can happen simultaneously with other changes to the learning rate from outside this scheduler. When last_epoch=-1, sets initial lr as lr. Args: optimizer (Optimizer): Wrapped optimizer. milestones (list): List of epoch indices. Must be increasing. gamma (float): Multiplicative factor of learning rate decay. Default: 0.1. last_epoch (int): The index of last epoch. Default: -1. Example: >>> # xdoctest: +SKIP >>> # Assuming optimizer uses lr = 0.05 for all groups >>> # lr = 0.05 if epoch < 30 >>> # lr = 0.005 if 30 <= epoch < 80 >>> # lr = 0.0005 if epoch >= 80 >>> scheduler = MultiStepLR(optimizer, milestones=[30, 80], gamma=0.1) >>> for epoch in range(100): >>> train(...) >>> validate(...) >>> scheduler.step() .. image:: ../scripts/lr_scheduler_images/MultiStepLR.png cTt||_||_t|||yr-)r milestonesrrri)rYr=rrrerPs r1rizMultiStepLR.__init__4s'"*-  J/r3cFt||j|jvr'|jjDcgc]}|d c}S|jjDcgc].}|d|j |j|jzz0c}Scc}wcc}wr)rrerr=r9rrs r1r~zMultiStepLR.get_lr?s (- ??$// 1-1^^-H-HIEE$KI I44  $K$**(HH H  J s B#3Bc t|jj}|jDcgc](}||jt ||j zz*c}Scc}wr-)sortedrelementsrdrrre)rYrrs r1rzMultiStepLR._get_closed_form_lrKsXDOO4467  ==  djjLT__$MM M   s-A"r) r=rrz Iterable[int]rrrerrrrrrs@r1rrs^> 0 0" 0 0  0  0     r3rcVeZdZdZ d dfd ZeddZdZxZS)raHMultiply the learning rate of each parameter group by a small constant factor. The multiplication is done until the number of epoch reaches a pre-defined milestone: total_iters. Notice that such multiplication of the small constant factor can happen simultaneously with other changes to the learning rate from outside this scheduler. When last_epoch=-1, sets initial lr as lr. Args: optimizer (Optimizer): Wrapped optimizer. factor (float): The number we multiply learning rate until the milestone. Default: 1./3. total_iters (int): The number of steps that the scheduler multiplies the learning rate by the factor. Default: 5. last_epoch (int): The index of the last epoch. Default: -1. Example: >>> # xdoctest: +SKIP >>> # Assuming optimizer uses lr = 0.05 for all groups >>> # lr = 0.025 if epoch == 0 >>> # lr = 0.025 if epoch == 1 >>> # lr = 0.025 if epoch == 2 >>> # lr = 0.025 if epoch == 3 >>> # ... >>> # lr = 0.05 if epoch >= 40 >>> scheduler = ConstantLR(optimizer, factor=0.5, total_iters=40) >>> for epoch in range(100): >>> train(...) >>> validate(...) >>> scheduler.step() .. image:: ../scripts/lr_scheduler_images/ConstantLR.png cl|dkDs|dkr td||_||_t|||y)N?rz>Constant multiplicative factor expected to be between 0 and 1.)r:factor total_itersrri)rYr=rrrerPs r1rizConstantLR.__init__tsB C<6A:P  & J/r3ct||jdk(r4|jjDcgc]}|d|jzc}S|j|j k7r'|jjDcgc]}|d c}S|jjDcgc]}|dd|jz zc}Scc}wcc}wcc}w)rrrGr)rrer=r9rrrs r1r~zConstantLR.get_lrs (- ??a ;?>>;V;VW%E$K$++-W W ??d.. .-1^^-H-HIEE$KI I<@>>;V;V 27E$K3, -  XJ sC C 'Cc|jDcgc];}||j|j|jk\d|jz zzz=c}Scc}w)Nr)rdrrerrs r1rzConstantLR._get_closed_form_lrsX ==  {{doo1A1AAa$++oVV X   sAA)UUUUUU?rF) r=rrrrrrerrrrrrs@r1rrSsbF  000 0  0  0      r3rc\eZdZdZ d dfd ZeddZdZxZS)r aDecays the learning rate of each parameter group by linearly changing small multiplicative factor. The multiplication is done until the number of epoch reaches a pre-defined milestone: total_iters. Notice that such decay can happen simultaneously with other changes to the learning rate from outside this scheduler. When last_epoch=-1, sets initial lr as lr. Args: optimizer (Optimizer): Wrapped optimizer. start_factor (float): The number we multiply learning rate in the first epoch. The multiplication factor changes towards end_factor in the following epochs. Default: 1./3. end_factor (float): The number we multiply learning rate at the end of linear changing process. Default: 1.0. total_iters (int): The number of iterations that multiplicative factor reaches to 1. Default: 5. last_epoch (int): The index of the last epoch. Default: -1. Example: >>> # xdoctest: +SKIP >>> # Assuming optimizer uses lr = 0.05 for all groups >>> # lr = 0.003687 if epoch == 0 >>> # lr = 0.004875 if epoch == 1 >>> # lr = 0.006062 if epoch == 2 >>> # lr = 0.00725 if epoch == 3 >>> # ... >>> # lr = 0.05 if epoch >= 40 >>> scheduler = LinearLR(optimizer, start_factor=0.05, total_iters=40) >>> for epoch in range(100): >>> train(...) >>> validate(...) >>> scheduler.step() .. image:: ../scripts/lr_scheduler_images/LinearLR.png c|dkDs|dkr td|dkDs|dkr td||_||_||_t|||y)NrrzTStarting multiplicative factor expected to be greater than 0 and less or equal to 1.z>> # xdoctest: +SKIP >>> scheduler = ExponentialLR(optimizer, gamma=0.95) >>> for epoch in range(100): >>> train(...) >>> validate(...) >>> scheduler.step() .. image:: ../scripts/lr_scheduler_images/ExponentialLR.png c4||_t| ||yr-)rrri)rYr=rrerPs r1rizExponentialLR.__init__s   J/r3ct||jr'|jjDcgc]}|d c}S|jjDcgc]}|d|jzc}Scc}wcc}wr)rrCr=r9rrs r1r~zExponentialLR.get_lrsg (-   -1^^-H-HIEE$KI I6:nn6Q6QRUd djj(RRJRs A3A8ct|jDcgc]}||j|jzz c}Scc}wr-)rdrrers r1rz!ExponentialLR._get_closed_form_lr&s,EI]]S'$**doo55SSSs#5r)r=rrrrerrrrrrs@r1r!r!sT0 000 0  0SSTr3r!cbeZdZdZ d d dZd dZd dZed dZed dZ y)r"aContains a list of schedulers expected to be called sequentially during the optimization process. Specifically, the schedulers will be called according to the milestone points, which should provide exact intervals by which each scheduler should be called at a given epoch. Args: optimizer (Optimizer): Wrapped optimizer. schedulers (list): List of chained schedulers. milestones (list): List of integers that reflects milestone points. last_epoch (int): The index of last epoch. Default: -1. Example: >>> # xdoctest: +SKIP >>> # Assuming optimizer uses lr = 0.05 for all groups >>> # lr = 0.005 if epoch == 0 >>> # lr = 0.005 if epoch == 1 >>> # lr = 0.005 if epoch == 2 >>> # ... >>> # lr = 0.05 if epoch == 20 >>> # lr = 0.045 if epoch == 21 >>> # lr = 0.0405 if epoch == 22 >>> scheduler1 = ConstantLR(optimizer, factor=0.1, total_iters=20) >>> scheduler2 = ExponentialLR(optimizer, gamma=0.9) >>> scheduler = SequentialLR( ... optimizer, ... schedulers=[scheduler1, scheduler2], ... milestones=[20], ... ) >>> for epoch in range(100): >>> train(...) >>> validate(...) >>> scheduler.step() .. image:: ../scripts/lr_scheduler_images/SequentialLR.png c t|dkr"t|jjdt |D]\}}t |ds%t |jjd|dt|tr%t|jjd|d||jk7s|t|jjd|jjd|d |jd |jjd t|t|dz k7r#td t|d t|||_ ||_ |dz|_ ||_ |jjD] }|d|d< |j|jdj|dj!|_y)Nrz6 expects at least one scheduler, but got no scheduler.r= at index * should have `optimizer` as its attribute. does not support `ReduceLROnPlateau` scheduler as it requires additional kwargs to be specified when calling `step`, but got one at index " in the given schedulers sequence.K expects all schedulers to belong to the same optimizer, but got scheduler r4, which is different from r5zSequential Schedulers expects number of schedulers provided to be one more than the number of milestone points, but got number of schedulers z- and the number of milestones to be equal to rHrGr)r8r:rPr`rbr[r^r.r%r= _schedulers _milestonesrer9recursive_undorfr{rz)rYr= schedulersrre scheduler_idx schedulerrgs r1rizSequentialLR.__init__Os  z?Q >>**++ab )2*(=  $M99k2~~../z-Hrs)%67 ~~../0,,9?:\^ I/// ~~../0%%.%8%8%A%A$B*]O[`ajatat`uv//8/B/B/K/K.LAO $ z?c*o1 1UUXYcUdTef77::6GI  &%$q."^^00 .E -E$K .  ))+"1 113 r3Nc||n|}t|dr#|jD]}|j|yt|dr|xjdzc_yy)zb Recursively undo any step performed by the initialisation of schedulers. Nrrer)r[rrre)rYschedschedsss r1rzSequentialLR.recursive_undos\ E 6= )'' '##A& ' V\ *    " +r3cB|xjdz c_t|j|j}|j|}|dkDr1|j|dz |jk(r|j dn|j |j |_y)rrrN)rerrrrr\r{rz)rYrrs r1r\zSequentialLR.steps 14++T__=$$S) 7t''a0DOOC   # NN !--/ r3c|jjDcic] \}}|dvr||}}}dgt|jz|d<t |jD]\}}|j |d|<|Scc}}wzReturn the state of the scheduler as a :class:`dict`. It contains an entry for every variable in self.__dict__ which is not the optimizer. The wrapped scheduler states will also be saved. )r=rNrrorpr8rrbrsrYrqrrrsrrs r1rszSequentialLR.state_dict#mm113 U66 J  &*FS1A1A-B$B =! 0 01 >> # xdoctest: +SKIP("undefined vars") >>> # Assuming optimizer uses lr = 0.05 for all groups >>> # lr = 0.0490 if epoch == 0 >>> # lr = 0.0481 if epoch == 1 >>> # lr = 0.0472 if epoch == 2 >>> # ... >>> # lr = 0.0 if epoch >= 50 >>> scheduler = PolynomialLR(optimizer, total_iters=50, power=0.9) >>> for epoch in range(100): >>> train(...) >>> validate(...) >>> scheduler.step() .. image:: ../scripts/lr_scheduler_images/PolynomialLR.png cB||_||_t| ||yr-)rpowerrri)rYr=rr'rerPs r1rizPolynomialLR.__init__s#'  J/r3ct||js|j|jkDr'|jj Dcgc]}|d c}Sd|j|jz z d|jdz |jz z z |j z}|jj Dcgc] }|d|z c}Scc}wcc}w)rrGrr)rrCrerr=r9r')rYrg decay_factors r1r~zPolynomialLR.get_lrs (-   t1A1AA-1^^-H-HIEE$KI I4??T%5%55 5doo)T-=-=== ? ZZ 9=8S8STud l*TT J Us C 9Cc |jDcgc]B}|dt|j|j|jz z |jzzDc}Scc}wNr)rdrrrer'rs r1rz PolynomialLR._get_closed_form_lrs_ ==   T--t?$BRBRRR::   sAA)rrrF) r=rrrr'rrerrrrrrs@r1r)r)sc: 0 0 0 0  0  0 U U r3r)cVeZdZdZ d dfd ZeddZddZxZS)r#a Set the learning rate of each parameter group using a cosine annealing schedule. The learning rate is updated recursively using: .. math:: \eta_{t+1} = \eta_{\min} + (\eta_t - \eta_{\min}) \cdot \frac{1 + \cos\left(\frac{(T_{cur}+1) \pi}{T_{max}}\right)} {1 + \cos\left(\frac{T_{cur} \pi}{T_{max}}\right)} This implements a recursive approximation of the closed-form schedule proposed in `SGDR: Stochastic Gradient Descent with Warm Restarts`_: .. math:: \eta_t = \eta_{\min} + \frac{1}{2}(\eta_{\max} - \eta_{\min}) \left( 1 + \cos\left(\frac{T_{cur} \pi}{T_{max}}\right) \right) where: - :math:`\eta_t` is the learning rate at step :math:`t` - :math:`T_{cur}` is the number of epochs since the last restart - :math:`T_{max}` is the maximum number of epochs in a cycle Note: Although SGDR includes periodic restarts, this implementation performs cosine annealing **without restarts**, so :math:`T_{cur} = t` and increases monotonically with each call to :meth:`step`. Args: optimizer (Optimizer): Wrapped optimizer. T_max (int): Maximum number of iterations. eta_min (float): Minimum learning rate. Default: 0. last_epoch (int): The index of the last epoch. Default: -1. .. _SGDR\: Stochastic Gradient Descent with Warm Restarts: https://arxiv.org/abs/1608.03983 Example: >>> # xdoctest: +SKIP >>> num_epochs = 100 >>> scheduler = CosineAnnealingLR(optimizer, T_max=num_epochs) >>> for epoch in range(num_epochs): >>> train(...) >>> validate(...) >>> scheduler.step() .. image:: ../scripts/lr_scheduler_images/CosineAnnealingLR.png cB||_||_t| ||yr-)T_maxeta_minrri)rYr=r.r/rerPs r1rizCosineAnnealingLR.__init__4s"   J/r3c t||jr'|jjDcgc]}|d c}S|jdk(r|j dkDrt |j|jjDcgc]e\}}|j||jz dtj|j tjz|jz zzdz zgc}}S|j dz |jz d|jzzdk(rt |j|jjDcgc]Q\}}|d||jz dtjtj|jz z zdz zSc}}S|jjDcgc]}dtjtj|j z|jz zdtjtj|j dz z|jz zz |d|jz z|jzc}Scc}wcc}}wcc}}wcc}w)z3Retrieve the learning rate of each parameter group.rGrrr) rrCr=r9rkrerrdr/mathcospir.)rYrgrs r1r~zCosineAnnealingLR.get_lr?s# (-   -1^^-H-HIEE$KI I    "t': '*$--9T9T&U  #GU  T\\)txxDGG ;djj HIIK oo!DJJ.1tzz> Ba G'*$--9T9T&U#GUd T\\)a$((477TZZ;O2P.PQTUUV 44   $''DOO3djj@A A488DGGt':;djjHII KT{T\\) +ll   J  s I4 A*I9AI?B+Jc |jDcgc]b}|j||jz dtjtj|j z|j z zzdz zdc}Scc}w)Nrr)rdr/r1r2r3rer.rs r1rz%CosineAnnealingLR._get_closed_form_lr\st  ==   LL%488DGGdoo5 BCCE    sA'A9)rF) r=rr.rr/rrerrrrrrs@r1r#r#s_/j 0 0 0 0  0  0  8 r3r#cReZdZdZ d ddZd dZed dZed dZy) r$awChains a list of learning rate schedulers. Takes in a sequence of chainable learning rate schedulers and calls their step() functions consecutively in just one call to step(). Args: schedulers (sequence): sequence of chained schedulers. optimizer (Optimizer, optional): Wrapped optimizer. Default: None. Example: >>> # xdoctest: +SKIP >>> # Assuming optimizer uses lr = 0.05 for all groups >>> # lr = 0.05 if epoch == 0 >>> # lr = 0.0450 if epoch == 1 >>> # lr = 0.0405 if epoch == 2 >>> # ... >>> # lr = 0.00675 if epoch == 19 >>> # lr = 0.06078 if epoch == 20 >>> # lr = 0.05470 if epoch == 21 >>> scheduler1 = ConstantLR(optimizer, factor=0.1, total_iters=20) >>> scheduler2 = ExponentialLR(optimizer, gamma=0.9) >>> scheduler = ChainedScheduler([scheduler1, scheduler2], optimizer=optimizer) >>> for epoch in range(100): >>> train(...) >>> validate(...) >>> scheduler.step() .. image:: ../scripts/lr_scheduler_images/ChainedScheduler.png Nc t|dkr"t|jjd|xs|dj}t |D]\}}t |ds%t|jjd|dt|tr%t|jjd|d||jk7s|t|jjd |jjd|d |jd |jjd ||_ ||_|jd jjDcgc]}|d c}|_ ycc}w)NrzD expects at least one scheduler to be chained, but got no scheduler.rr=r r r r r r4rr5rFrG) r8r:rPr`r=rbr[r^r.r%rr9rz)rYrr=rrrgs r1rizChainedScheduler.__init__s z?Q >>**++op 8A!8!8 (1*(=  $M99k2~~../z-Hrs)%67 ~~../0,,9?:\^ I/// ~~../0%%.%8%8%A%A$B*]O[`ajatat`uv//8/B/B/K/K.LAO "&"%)%5%5b%9%C%C%P%P !E$K   s E4c|jD]}|j|jdjjDcgc]}|d c}|_ycc}w)rrFrGN)rr\r=r9rz)rYrrgs r1r\zChainedScheduler.stepsZ)) I NN  &*%5%5b%9%C%C%P%P !E$K   s Ac|jjDcic] \}}|dvr||}}}dgt|jz|d<t |jD]\}}|j |d|<|Scc}}wrrrs r1rszChainedScheduler.state_dictrrc|jd}|jj|||d<t|D]#\}}|j|j |%yr!r"r#s r1rxz ChainedScheduler.load_state_dictr$r3r-)rzSequence[LRScheduler]r=zOptional[Optimizer]rrrrr) r`rrrrir\rrsrxrr3r1r$r$fsY>SW / >> # xdoctest: +SKIP >>> optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9) >>> scheduler = ReduceLROnPlateau(optimizer, "min") >>> for epoch in range(10): >>> train(...) >>> val_loss = validate(...) >>> # Note that step should be called after validate() >>> scheduler.step(val_loss) .. image:: ../scripts/lr_scheduler_images/ReduceLROnPlateau.png c |dk\r td||_t|ts!t t |j d||_t|ttfrft|t|jk7r-tdt|jdt|d|_ t||_ n%||_ |gt|jz|_ ||_||_| |_d|_|jjD cgc]} | d c} |_|j'||||j)ycc} w) NrzFactor should be < 1.0.rEz expected z min_lrs, got rrGmode thresholdthreshold_mode)r:rr.rr^r_r`r=r6r7r8r9default_min_lrmin_lrspatiencecooldownepsrerz_init_is_better_reset) rYr=r>rrCr?r@rDmin_lrrErgs r1rizReduceLROnPlateau.__init__ s> S=67 7 )Y/tI7788LMN N" ftUm ,6{c)"8"899 I$:$: ;   Os% EcB|j|_d|_d|_y)z2Reset num_bad_epochs counter and cooldown counter.rN) mode_worsebestcooldown_counternum_bad_epochsrms r1rGzReduceLROnPlateau._reset6sOO !r3Nc>t|}||jdz}ntjtt ||_|j ||jr||_d|_n|xjdz c_|jr|xjdzc_ d|_|j|jkDr)|j||j|_ d|_|jjDcgc]}|d c}|_ycc}w)rNrrrG)rrerrrr _is_betterrKrM in_cooldownrLrC _reduce_lrrDr=r9rz)rYmetricsrcurrentrgs r1r\zReduceLROnPlateau.step<s. =OOa'E MM3[ A ??7DII .DI"#D    1 $      ! !Q & !"#D     . OOE "$(MMD !"#D 26..2M2MNtN Ns DcTt|jjt|jk7r|jBt dt|jjdt|jd|jgt|jjz|_t |jjD]Q\}}t|d}t||jz|j|}||z |jkDsM||d<Sy)Nz/The number of param groups in the `optimizer` (z9) differs from when `ReduceLROnPlateau` was initialized (z), usually due to a new param group being added to the optimizer. Please modify the `min_lrs` field to match the length of the `optimizer` param groups.rG) r8r=r9rBrA RuntimeErrorrbrmaxrrE)rYrrhrold_lrnew_lrs r1rQzReduceLROnPlateau._reduce_lrWs t~~** +s4<>;V;V7WW '(C(CD +NA{;t,-F$++-t||A?F)$* D!  +r3c |jdkDS)Nr)rLrms r1rPzReduceLROnPlateau.in_cooldownls$$q((r3cZ|jdk(r&|jdk(rd|jz }|||zkS|jdk(r!|jdk(r|||jz kS|jdk(r&|jdk(r|jdz}|||zkDS|||jzkDS)NrrelrabsrV)r>r@r?)rYarK rel_epsilons r1rOzReduceLROnPlateau._is_betterps 99 $"5"5">.Ktk)) ) YY% D$7$75$@tdnn,, , YY% D$7$75$@..3.Ktk)) )tdnn,, ,r3c|dvrtd|zdz|dvrtd|zdz|dk(r t|_n t |_||_||_||_y)N>rVrzmode z is unknown!>r\r[zthreshold mode r)r:rrJr>r?r@)rYr>r?r@s r1rFz!ReduceLROnPlateau._init_is_bettersk ~ %Wt^n<= =  /.?.PQ Q 5=!DO"dDO ",r3c|jj||j|j|j|j y)Load the scheduler's state.r=N)rorvrFr>r?r@rws r1rxz!ReduceLROnPlateau.load_state_dicts> Z( dnnTEXEX  r3)rr g-C6?r[rrg:0yE>)r=rr>zLiteral['min', 'max']rrrCrr?rr@zLiteral['rel', 'abs']rDrrHzUnion[list[float], float]rErr-)rRr rrr)r`rrrrirGr\rQpropertyrPrOrFrrxrr3r1r%r%s6v',05,-(($( (  (  (.((*((T O6+*)) --   r3r%ceZdZdZ d d fd ZdZd dZeddZeddZ eddZ e ddZ e dfd Z e dfd ZxZS)r&afSets the learning rate of each parameter group according to cyclical learning rate policy (CLR). The policy cycles the learning rate between two boundaries with a constant frequency, as detailed in the paper `Cyclical Learning Rates for Training Neural Networks`_. The distance between the two boundaries can be scaled on a per-iteration or per-cycle basis. Cyclical learning rate policy changes the learning rate after every batch. `step` should be called after a batch has been used for training. This class has three built-in policies, as put forth in the paper: * "triangular": A basic triangular cycle without amplitude scaling. * "triangular2": A basic triangular cycle that scales initial amplitude by half each cycle. * "exp_range": A cycle that scales initial amplitude by :math:`\text{gamma}^{\text{cycle iterations}}` at each cycle iteration. This implementation was adapted from the github repo: `bckenstler/CLR`_ Args: optimizer (Optimizer): Wrapped optimizer. base_lr (float or list): Initial learning rate which is the lower boundary in the cycle for each parameter group. max_lr (float or list): Upper learning rate boundaries in the cycle for each parameter group. Functionally, it defines the cycle amplitude (max_lr - base_lr). The lr at any cycle is the sum of base_lr and some scaling of the amplitude; therefore max_lr may not actually be reached depending on scaling function. step_size_up (int): Number of training iterations in the increasing half of a cycle. Default: 2000 step_size_down (int): Number of training iterations in the decreasing half of a cycle. If step_size_down is None, it is set to step_size_up. Default: None mode (str): One of {triangular, triangular2, exp_range}. Values correspond to policies detailed above. If scale_fn is not None, this argument is ignored. Default: 'triangular' gamma (float): Constant in 'exp_range' scaling function: gamma**(cycle iterations) Default: 1.0 scale_fn (function): Custom scaling policy defined by a single argument lambda function, where 0 <= scale_fn(x) <= 1 for all x >= 0. If specified, then 'mode' is ignored. Default: None scale_mode (str): {'cycle', 'iterations'}. Defines whether scale_fn is evaluated on cycle number or cycle iterations (training iterations since start of cycle). Default: 'cycle' cycle_momentum (bool): If ``True``, momentum is cycled inversely to learning rate between 'base_momentum' and 'max_momentum'. Default: True base_momentum (float or list): Lower momentum boundaries in the cycle for each parameter group. Note that momentum is cycled inversely to learning rate; at the peak of a cycle, momentum is 'base_momentum' and learning rate is 'max_lr'. Default: 0.8 max_momentum (float or list): Upper momentum boundaries in the cycle for each parameter group. Functionally, it defines the cycle amplitude (max_momentum - base_momentum). The momentum at any cycle is the difference of max_momentum and some scaling of the amplitude; therefore base_momentum may not actually be reached depending on scaling function. Note that momentum is cycled inversely to learning rate; at the start of a cycle, momentum is 'max_momentum' and learning rate is 'base_lr' Default: 0.9 last_epoch (int): The index of the last batch. This parameter is used when resuming a training job. Since `step()` should be invoked after each batch instead of after each epoch, this number represents the total number of *batches* computed, not the total number of epochs computed. When last_epoch=-1, the schedule is started from the beginning. Default: -1 Example: >>> # xdoctest: +SKIP >>> optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9) >>> scheduler = torch.optim.lr_scheduler.CyclicLR( ... optimizer, ... base_lr=0.01, ... max_lr=0.1, ... step_size_up=10, ... ) >>> data_loader = torch.utils.data.DataLoader(...) >>> for epoch in range(10): >>> for batch in data_loader: >>> train_batch(...) >>> scheduler.step() .. image:: ../scripts/lr_scheduler_images/CyclicLR.png .. _Cyclical Learning Rates for Training Neural Networks: https://arxiv.org/abs/1506.01186 .. _bckenstler/CLR: https://github.com/bckenstler/CLR c^t|ts!tt|jd||_t d||}| dk(rmt||jD]T\}}t|dtr7t|tr|jn|}|dj|P||d<Vt d|||_ t|}| t|n|}||z|_||jz |_|dvr | t!d||_||_|||_| |_|j+| |_| rd|j.vrd |j.vr t!d d |j j.v|_t d || |_t d || |_| dk(r_t|j4|j2|jD]1\}}}|j0r|g|d d d|d <n||d<||d <||d <3t6|q|| ||_y)NrErrFrGmax_lr) triangular triangular2 exp_rangez$mode is invalid and scale_fn is NonemomentumbetasMoptimizer must support momentum or beta1 with `cycle_momentum` option enabled base_momentum max_momentumr)r.rr^r_r`r=r?rr9ritemrmax_lrsr total_size step_ratior:r>r_scale_fn_custom scale_mode_init_scale_fncycle_momentumdefaults use_beta1base_momentums max_momentumsrrird)rYr=rrf step_size_upstep_size_downr>rscale_fnrtrvrmrnrerdrGrglr_val m_momentum b_momentumrPs r1rizCyclicLR.__init__sJ")Y/tI7788LMN N" Iw?   9+A+AB % EeDk62*4R*@RWWYbF$K%%f-"$E$K  %%Xy&A \* %3%?E. !\ '7&8 A AhFVCD D   ($ , )"4"449#5#55 c%(?(??DN"/M#D "/~y,!WD R58&&(;(;Y=S=S681J E~~*4)JuW~ab7I)Jg,6j),6E.)-7E/*8 J/  r3c6|jy|jdk(r|j|_d|_y|jdk(r|j |_d|_y|jdk(r-t |j|j|_d|_yy)Nrgcyclerhri iterations) rsr>_triangular_scale_fn _scale_fn_refrt_triangular2_scale_fnr_exp_range_scale_fnrrms r1ruzCyclicLR._init_scale_fnJs  ,  99 $!%!:!:D %DO YY- '!%!;!;D %DO YY+ %!()A)A4::!ND *DO&r3c^|j|j|S|j|S)zGet the scaling policy.)rsr)rYxs r1r}zCyclicLR.scale_fnWs0  ,((+ +%%a( (r3cyr+rrs r1rzCyclicLR._triangular_scale_fn^sr3cdd|dz zz S)Nr@rrs r1rzCyclicLR._triangular2_scale_fnbsCAEN##r3c ||zSr-r)rrs r1rzCyclicLR._exp_range_scale_fnfs axr3ct|tjd|j|jz z}d|j|jz z|z }||j kr||j z }n|dz |j dz z }g}t |j|jD]f\}}||z |z}|jdk(r|||j|zz}n!|||j|jzz}|j|h|jrg} t |j|jD]f\} } | | z |z}|jdk(r| ||j|zz } n!| ||j|jzz } | j| ht |jj | D]&\} } |j"r| g| ddd| d<"| | d<(|S)zCalculate the learning rate at batch index. This function treats `self.last_epoch` as the last batch index. If `self.cycle_momentum` is ``True``, this function has a side effect of updating the optimizer's momentum. rrrrkNrj)rr1floorrerqrrrrdrprtr}appendrvryrzr=r9rx)rYrr scale_factorlrsrrf base_heightrG momentumsrmrnrjrs r1r~zCyclicLR.get_lrjs (- 1t@@A $//DOO3 3e ;  t.LEdoo&9:L"4==$,,? OGV!G+|;K'){T]]5-AAA{T]]4??-KKK JJrN     I/2##T%7%70 ++ | ,m;|K ??g-+kDMM%>,4+P{77KAB7O+PK(.6K +  7  r3ct|}|jdd|jd}d|d<|7t|tj s|j j|d<|S)rrNrs)rrsrr.rrror)rYstaterrPs r1rszCyclicLR.state_dictso"$  /4( YY) *$( ! >*R1C1C"D)+ (8(8(:E$ % r3c|jd}t| ||%|jjj ||j y)rarsN)rrrxrsrorvru)rYrsrrPs r1rxzCyclicLR.load_state_dictsL^^. /  + >  ! ! * * 1 1" 5 r3) iNrgrNrTg?g?rF)r=rrUnion[float, list[float]]rfrr{rr|rr>z1Literal['triangular', 'triangular2', 'exp_range']rrr}z"Optional[Callable[[float], float]]rtzLiteral['cycle', 'iterations']rvrArmrrnrrer)rr)rrrr)rrrrrrrrr)r`rrrrirur} staticmethodrrrrr~rsrxrrs@r1r&r&sB`N!(,BN7;5<#"!M!M!+M!* M!  M! & M!@M!M!5M!3M!M!M!M!M!^ +)$$--^.r3r&cdeZdZdZ d dfd ZeddZedd dZxZS) r'a7Set the learning rate of each parameter group using a cosine annealing schedule. The :math:`\eta_{max}` is set to the initial lr, :math:`T_{cur}` is the number of epochs since the last restart and :math:`T_{i}` is the number of epochs between two warm restarts in SGDR: .. math:: \eta_t = \eta_{min} + \frac{1}{2}(\eta_{max} - \eta_{min})\left(1 + \cos\left(\frac{T_{cur}}{T_{i}}\pi\right)\right) When :math:`T_{cur}=T_{i}`, set :math:`\eta_t = \eta_{min}`. When :math:`T_{cur}=0` after restart, set :math:`\eta_t=\eta_{max}`. It has been proposed in `SGDR: Stochastic Gradient Descent with Warm Restarts`_. Args: optimizer (Optimizer): Wrapped optimizer. T_0 (int): Number of iterations until the first restart. T_mult (int, optional): A factor by which :math:`T_{i}` increases after a restart. Default: 1. eta_min (float, optional): Minimum learning rate. Default: 0. last_epoch (int, optional): The index of the last epoch. Default: -1. .. _SGDR\: Stochastic Gradient Descent with Warm Restarts: https://arxiv.org/abs/1608.03983 Example: >>> # xdoctest: +SKIP >>> optimizer = torch.optim.SGD(model.parameters(), lr=0.05) >>> scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts( ... optimizer, T_0=20 ... ) >>> for epoch in range(100): >>> train(...) >>> validate(...) >>> scheduler.step() .. image:: ../scripts/lr_scheduler_images/CosineAnnealingWarmRestarts.png cX|dkst|tstd||dkst|tstd|t|ttfstd|dt |||_||_||_||_||_ t|-||y)Nrz'Expected positive integer T_0, but got rz&Expected integer T_mult >= 1, but got z'Expected float or int eta_min, but got z of type ) r.rr:rr_T_0T_iT_multr/T_currri)rYr=rrr/rerPs r1riz$CosineAnnealingWarmRestarts.__init__s !8:c3/FseLM M A:Z4EfXNO O'E3<09')DQXM?[     J/r3c t||jDcgc]b}|j||jz dtjtj |j z|jz zzdz zdc}Scc}w)z"Compute the initial learning rate.rr)rrdr/r1r2r3rrrs r1r~z"CosineAnnealingWarmRestarts.get_lrs} (- ==   LL%488DGGdjj0488;<<>    sA'Bcr||jdkrd}|{|jdz}|jdz|_|j|jk\r>|j|jz|_|j|jz|_n|dkrt d|||j k\r|jdk(r||j z|_nt tj||j z |jdz zdz|j}||j |j|zdz z|jdz z z |_|j |j|zz|_n|j |_||_tj||_t|5t|jj|jD] \}}||d<  ddd|jjDcgc]}|d c}|_y#1swY6xYwcc}w)aYStep could be called after every batch update. Example: >>> # xdoctest: +SKIP("Undefined vars") >>> scheduler = CosineAnnealingWarmRestarts(optimizer, T_0, T_mult) >>> iters = len(dataloader) >>> for epoch in range(20): >>> for i, sample in enumerate(dataloader): >>> inputs, labels = sample['inputs'], sample['labels'] >>> optimizer.zero_grad() >>> outputs = net(inputs) >>> loss = criterion(outputs, labels) >>> loss.backward() >>> optimizer.step() >>> scheduler.step(epoch + i / iters) This function can be called in an interleaved way. Example: >>> # xdoctest: +SKIP("Undefined vars") >>> scheduler = CosineAnnealingWarmRestarts(optimizer, T_0, T_mult) >>> for epoch in range(20): >>> scheduler.step() >>> scheduler.step(26) >>> scheduler.step() # scheduler.step(27), instead of scheduler(20) Nrrz%Expected non-negative epoch, but got rG)rerrrr:rrr1logrrrr=r9r~rz)rYrnrrGrgs r1r\z CosineAnnealingWarmRestarts.step s8 =T__q0E =OOa'EaDJzzTXX%!ZZ$((2 88dkk1qy #H!PQQ ;;!#!&!1DJ"TXX-qAAE A "'T[[!^a5G)H a*"DJ $xx$++!*<>> # xdoctest: +SKIP >>> data_loader = torch.utils.data.DataLoader(...) >>> optimizer = torch.optim.SGD(model.parameters(), lr=1e-4, momentum=0.9) >>> scheduler = torch.optim.lr_scheduler.OneCycleLR( ... optimizer, max_lr=0.01, steps_per_epoch=len(data_loader), epochs=10 ... ) >>> for epoch in range(10): >>> for batch in data_loader: >>> train_batch(...) >>> optimizer.step() >>> scheduler.step() .. image:: ../scripts/lr_scheduler_images/OneCycleLR.png .. _Super-Convergence\: Very Fast Training of Neural Networks Using Large Learning Rates: https://arxiv.org/abs/1708.07120 ct|ts!tt|jd||_|+|dkst|t std|||_n`|S|Qt|t r|dkrtd|t|t r|dkrtd|||z|_n td|| r_t||jzdz dd d d d td |z|jzd z d dd d d |jdz ddd d d g|_ n;t||jzdz dd d d d |jdz d dd d d g|_ |dks|dkDst|tstd||dvrtd|||_ td |j |}|dk(rEt|j jD]#\}}||| z |d<|||d <|d| z |d<%||_|jrd|j j vr#d|j j vr tdd|j j v|_td || }td || }|dk(rKt%|||jD]1\}}}|j"r|g|ddd|d<n||d<||d <||d <3t&|Q||y)NrErz/Expected positive integer total_steps, but got z*Expected positive integer epochs, but got z3Expected positive integer steps_per_epoch, but got zBYou must define either total_steps OR (epochs AND steps_per_epoch)rrHrfrnrm)rrrrrrrHz2Expected float between 0 and 1 pct_start, but got )r2linearz>anneal_strategy must be one of 'cos' or 'linear', instead got rFrjrkrl)r.rr^r_r`r=rr: total_stepsr_schedule_phases_anneal_func_typer?rbr9rvrwrxrrri)rYr=rfrepochssteps_per_epoch pct_startanneal_strategyrvrmrn div_factorfinal_div_factor three_phasererprrgrzryrrrPs r1rizOneCycleLR.__init__s$)Y/tI7788LMN N"  "az+s'C Ek]S +D   O$?fc*fk #MfX!VWWos3!7K I/IZ[ &7D T  !&i$2B2B&B Ca G ,&&4$3 !&a)md6F6F&F G! K (*&5$2 !% 0 01 4 ,&&4$2 %D !4!&i$2B2B&B Ca G ,&&4$3 !% 0 01 4 (&&5$2 %D !$ q=IMIu1MDYKP  "3 3PQ`Pab &5D " $..&A  '(C(CD I U&-clZ&?l#")#,h"' "58H"Hh I -   $.."9"994>>#:#:: c%(?(??DN).)\RM*?I}UNR58!>93I3I681J E~~*4)JuW~ab7I)Jg,6j),6E.)-7E/*8 J/r3ct|drZ|jdk(r|j|i|S|jdk(r|j|i|St d|j|j |i|S)Nrr2rzUnknown _anneal_func_type: )r[r_annealing_cos_annealing_linearr: anneal_func)rYrQrRs r1 _anneal_funczOneCycleLR._anneal_func?s 4, -%%.*t**D;F;;''83-t--t>v>> #>t?U?U>V!WXX$4##T4V4 4r3cptjtj|zdz}|||z dz |zzS)z@Cosine anneal from `start` to `end` as pct goes from 0.0 to 1.0.rr)r1r2r3)startendpctcos_outs r1rzOneCycleLR._annealing_cosKs8((477S=)A-eckS(7222r3c||z |z|zS)zBLinearly anneal from `start` to `end` as pct goes from 0.0 to 1.0.r)rrrs r1rzOneCycleLR._annealing_linearQse s"U**r3ct|g}|j}||jkDrtd|d|j|jj D]}d}t |jD]\}}|d}||ks|t|jdz k(rW||z ||z z }|j||d||d|} |jr|j||d||d |} n|d}|j |js|jr g|d dd |d < |d <|S) rzTried to step z/ times. The specified number of total steps is r5rrrrrrrkNrj) rrerr:r=r9rbrr8rrvrrx) rYrstep_numrg start_steprhphaserr computed_lrcomputed_momentums r1r~zOneCycleLR.get_lrVs (-?? d&& &  *YZ^ZjZjYkl ^^00 :EJ%d&;&;< /5 ,x'1D4I4I0JQ0N+N#j0X 5JKC"&"3"3eJ/0%h2H##K**,0,=,=!%(8"9:!%"78-) ":.  / JJ{ #"">>&7%M%.:L%ME'N(9E*%/ :2 r3) NNNg333333?r2Tg333333?gffffff?g9@g@FrF)r=rrfrrrrrrrrrrzLiteral['cos', 'linear']rvrArmrrnrrrrrrrArerr) r`rrrrirrrrrr~rrs@r1r(r(RseV&* $)-49#3726 "%!C0C0*C0# C0  C0 ' C0C02C0C01C00C0C0 C0C0C0J 533 ++%%r3r()r<rr=r)rr*rr)=r __future__rr1rrbisectr collectionsr functoolsrrtypingrr r r r r rrrtyping_extensionsrrweakrefrtorchrrr=rrcollections.abcrr__all__rr?r*rrrrlrrrrrr r!r"r)r#r$r%r&r'rr(rr3r1rst" $   -,2 (<#$Q Q h  ;  2 2##j {j ZcJ{cJL5 [5 p8 +8 vE E P_ {_ D*TK*TZV5;V5r< ;< ~a a Hj5{j5ZB B Ja{aH KO+KO\Yjjr3