
    inR              #       v   d dl mZ d dlZd dlmZ ddlmZmZmZmZm	Z	m
Z
mZmZmZmZmZmZmZ ddgZ G d de      Zd	d
e de de de d	z   e_        	 	 	 	 	 	 	 d$dee   dee   dee   dee   dedz  dedz  dedz  dededz  dedededededededdf"dZd  Zdee   dee   dee   dee   dedz  dedz  dededededededededdfd!Zdee   dee   dee   dee   dedz  dedz  dededededededededdfd"Zdee   dee   dee   dee   dedz  dedz  dededededededededdfd#Zy)%    )castN)Tensor   )_default_to_fused_or_foreach_device_dtype_check_for_fused_differentiable_doc_foreach_doc_get_scalar_dtype
_get_value_maximize_doc_params_doc
_to_scalar_use_grad_for_differentiable_view_as_real	OptimizerParamsTAdagradadagradc                        e Zd Z	 	 	 	 	 	 ddddddedeez  dededed	ed
edz  dedededz  ddf fdZ fdZddZ	d Z
edd       Z xZS )r   NF)maximizedifferentiablefusedparamslrlr_decayweight_decayinitial_accumulator_valueepsforeachr   r   r   returnc          
      6   t        |t              r|j                         dk7  rt        d      d|k  st        d|       d|k  st        d|       d|k  st        d|       d|k  st        d|       d|k  st        d|       ||||||||	|
d		}t        |   ||       |
r!|	rt        d
      |rt        d      d| _        | j                  D ]  }|d   D ]  }| j                  |   }|d   r/t        j                  dt        |d         |j                        nt        j                  dt                     |d<   t        j                  |      rt!        ||      n|}t        j"                  ||t        j$                        |d<     y )Nr   zTensor lr must be 1-elementg        zInvalid learning rate: zInvalid lr_decay value: zInvalid weight_decay value: z)Invalid initial_accumulator_value value: zInvalid epsilon value: )	r   r   r   r   r   r   r   r   r   z)`fused` does not support `differentiable`z0`fused` and `foreach` cannot be `True` together.Tr   r    is_fused)dtypedevicer%   step)memory_formatsum)
isinstancer   numel
ValueErrorsuper__init__RuntimeError"_need_device_dtype_check_for_fusedparam_groupsstatetorchzerosr
   r&   tensor
is_complexcomplex	full_likepreserve_format)selfr   r   r   r   r   r   r   r   r   r   defaultsgrouppr3   
init_value	__class__s                   M/var/www/html/engine/venv/lib/python3.12/site-packages/torch/optim/adagrad.pyr/   zAdagrad.__init__   s    b&!bhhjAo:;;by6rd;<<h7zBCCl";L>JKK//;<U;VW  cz6se<==  ()B ,

 	*"#NOO"#UVV6:D3&& 	E8_ 

1 W~ KK/wH xx c1B1DE f ''* 57PQ2 
  %z1F1F e!	    c                    t         |   |       d }| j                  D ]J  }|j                  dd        |j                  dd       |j                  dd       |j                  dd       }L t	        | j
                  j                               }t        |      dk7  xr t        j                  |d   d         }|s8|D ]2  }t        j                  t        |d         t        |      	      |d<   4 y y )
Nr   r   Fr   r   r   r(   r#   r'   )r.   __setstate__r2   
setdefaultlistr3   valueslenr4   	is_tensorr6   floatr
   )r;   r3   r   r=   state_valuesstep_is_tensorsr@   s          rA   rD   zAdagrad.__setstate__b   s    U# && 	4EY-Z/-u5$$Wd3E		4 DJJ--/0l+q0 
eooOF#7
 ! !LL!F)$,=u,M&	 rB   c                 ~    | j                   D ].  }|d   D ]$  }| j                  |   }|d   j                          & 0 y)z6Calls tensor.share_memory_() on the state sum tensors.r   r*   N)r2   r3   share_memory_)r;   r=   r>   r3   s       rA   share_memoryzAdagrad.share_memoryw   sG    && 	-E8_ -

1e**,-	-rB   c                    d\  }}|d   D ]  }|j                   |d   r!t        | dd      rt        |d       d| _        ||j                   j                  z  }|t        j                  |      z  }|j                  |       |j                  |j                          | j                  |   }	|j                  |	d          |j                  |	d	           ||fS )
N)FFr   r   r1   T)cuda_unsupportedFr*   r(   )	gradgetattrr   r1   	is_sparser4   r7   appendr3   )
r;   r=   params_with_gradgrads
state_sumsstate_stepshas_sparse_gradhas_complexr>   r3   s
             rA   _init_groupzAdagrad._init_group~   s    '3$x 	2Avv!>g8'
 2!dK>CD;166#3#33u//22 ''*QVV$

1!!%,/""5=1	2" ++rB   c                 b   d}|$t        j                         5   |       }ddd       | j                  D ]k  }g }g }g }g }| j                  |||||      \  }}	t	        |||||d   |d   |d   |d   ||d   |d   |d   |	|d	   t        | d
d      t        | dd             m |S # 1 sw Y   xY w)zPerform a single optimization step.

        Args:
            closure (Callable, optional): A closure that reevaluates the model
                and returns the loss.
        Nr   r   r   r   r   r   r   r   
grad_scale	found_inf)r   r   r   r   r[   r   r   r   r\   r   r_   r`   )r4   enable_gradr2   r]   r   rT   )
r;   closurelossr=   rW   rX   rY   rZ   r[   r\   s
             rA   r(   zAdagrad.step   s     ""$ !y! && 	E-/"$E')J(*K+/+;+;'
K,(O[  ;">2z*%L /i(z*$%56'Gn"4t<!$T:!	: A! !s   B%%B.)g{Gz?r   r   r   g|=N)r    NN)__name__
__module____qualname__r   rJ   r   boolr/   rD   rP   r]   r   r(   __classcell__)r@   s   @rA   r   r      s     "+,#D $!DD FND 	D
 D $)D D D D D d{D 
DL*-,* "* "*rB   a[  Implements Adagrad algorithm.

    .. math::
       \begin{aligned}
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{input}      : \gamma \text{ (lr)}, \: \theta_0 \text{ (params)}, \: f(\theta)
                \text{ (objective)}, \: \lambda \text{ (weight decay)},                          \\
            &\hspace{12mm}    \tau \text{ (initial accumulator value)}, \: \eta\text{ (lr decay)}\\
            &\textbf{initialize} :  state\_sum_0 \leftarrow \tau                          \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{for} \: t=1 \: \textbf{to} \: \ldots \: \textbf{do}                         \\
            &\hspace{5mm}g_t           \leftarrow   \nabla_{\theta} f_t (\theta_{t-1})           \\
            &\hspace{5mm} \tilde{\gamma}    \leftarrow \gamma / (1 +(t-1) \eta)                  \\
            &\hspace{5mm} \textbf{if} \: \lambda \neq 0                                          \\
            &\hspace{10mm} g_t \leftarrow g_t + \lambda \theta_{t-1}                             \\
            &\hspace{5mm}state\_sum_t  \leftarrow  state\_sum_{t-1} + g^2_t                      \\
            &\hspace{5mm}\theta_t \leftarrow
                \theta_{t-1}- \tilde{\gamma} \frac{g_t}{\sqrt{state\_sum_t}+\epsilon}            \\
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
            &\bf{return} \:  \theta_t                                                     \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
       \end{aligned}

    For further details regarding the algorithm we refer to `Adaptive Subgradient Methods for Online Learning
    and Stochastic Optimization`_.
    z
    Args:
        a  
        lr (float, Tensor, optional): learning rate (default: 1e-2)
        lr_decay (float, optional): learning rate decay (default: 0)
        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
        initial_accumulator_value (float, optional): initial value of the
            sum of squares of gradients (default: 0)
        eps (float, optional): term added to the denominator to improve
            numerical stability (default: 1e-10)
        z	
        a  
        fused (bool, optional): whether the fused implementation (CPU only) is used.
            Currently, `torch.float64`, `torch.float32`, `torch.float16`, and `torch.bfloat16`
            are supported. (default: None). Please note that the fused implementations does not
            support sparse or complex gradients.
    .. _Adaptive Subgradient Methods for Online Learning and Stochastic
        Optimization: http://jmlr.org/papers/v12/duchi11a.html

    r   rX   rY   rZ   r   r_   r`   r[   r   r   r\   r   r   r   r   r   r    c                   t        d |D              st        d      ||t        | |	d      \  }}|d}|d}|r)t        j                  j                         rt        d      |r)t        j                  j                         rt        d      |r%t        j                  j                         st        }n-|r%t        j                  j                         st        }nt        } || ||||||||||	|
||       y)	ztFunctional API that performs Adagrad algorithm computation.

    See :class:`~torch.optim.Adagrad` for details.
    c              3   P   K   | ]  }t        |t        j                           y wrd   )r+   r4   r   ).0ts     rA   	<genexpr>zadagrad.<locals>.<genexpr>  s     @qz!U\\*@s   $&zPAPI has changed, `state_steps` argument must contain a list of singleton tensorsNF)	use_fusedz6torch.jit.script not supported with foreach optimizersz4torch.jit.script not supported with fused optimizers
r   r   r   r   r[   r   r   r\   r_   r`   )	allr0   r   r4   jitis_scripting_fused_adagrad_multi_tensor_adagrad_single_tensor_adagrad)r   rX   rY   rZ   r   r_   r`   r[   r   r   r\   r   r   r   r   r   _funcs                     rA   r   r      s    2 @K@@^
 	
 }1Ne

7 }599))+STT'')QRRUYY++-	//1$%!'%rB   c                 P    | j                         }t        j                  |||      S rd   )sizer4   sparse_coo_tensor)rS   grad_indicesrG   rz   s       rA   _make_sparser}   >  s"    99;D""<>>rB   c          
         ||t        d      t        j                  j                         st	        |      }t        | |||d      D ]  \  }}}}|dz  }t        |      }|s|n| }|dk7  r*|j                  rt        d      |j                  ||      }|d|dz
  |z  z   z  }|j                  r|j                         }|j                         }|j                         }|j                  t        |||j                  d                   |j!                  |      }|j                         j#                         j                  |	      }|j                  t        ||||z        |        &t        j$                  |      }|r?t        j&                  |      }t        j&                  |      }t        j&                  |      }|j)                  ||d	       |r|j+                         |	z   }n|j+                         j                  |	      }|j-                  ||| 	       |st        j.                  |      }t        j.                  |      }
 y )
N,Expected grad_scale and found_inf to be NoneT)strictr   r   z;weight_decay option is not compatible with sparse gradientsalpha   value)AssertionErrorr4   rr   rs   r   zipr   rU   r0   addcoalesce_indices_valuesadd_r}   powsparse_masksqrt_r7   view_as_realaddcmul_sqrtaddcdiv_view_as_complex)r   rX   rY   rZ   r_   r`   r   r   r   r   r[   r   r   r\   paramrS   	state_sumstep_tr(   clrr|   grad_valuesstd
std_valuesr7   s                            rA   rv   rv   C  s!   " !6KLL99!!#^*-z;t+ *=&tY 	!&!#t$1~~"Q  88E86DAX--.>>==?D==?L,,.KNN<lKOOA<NOP''-C,,.33C8JJJT<z1IJSVRV   ))%0J))$/!..y9	**51tT3nn&,nn&++C0NN4SDN1--e4!11)<	U*=rB   c                   |rt        d      ||t        d      t        |       dk(  ry t        |      }t        j                  | |||g      }|j                         D ]  \  \  }}}}}t        t        t           |      }t        t        t           |      }t        t        t           |      }t        t        t           |      }|
xr t        d |D              }|rt        ||||||||	d|||||       |rt        |||       |rt        j                  |      }t        j                  j                         s=|d   j                   r.t        j"                  |t        j$                  dd	      d
       nt        j"                  |d       |dk7  r3|rt        j"                  |||
       nt        j&                  |||
      }|D cg c]  }| dt)        |      dz
  |z  z   z   }}t        j*                  |||d       t        j,                  |      }t        j"                  ||	       |dk7  s|rt        j.                  ||       |}nt        j0                  ||      }t        j2                  |||       ! y c c}w )Nz#_foreach ops don't support autogradr   r   c              3   4   K   | ]  }|j                     y wrd   )rU   )rl   rS   s     rA   rn   z(_multi_tensor_adagrad.<locals>.<genexpr>  s      9
#DNN9
s   Trp   g      ?cpu)r&   r   r   r   )r   rH   r   r   "_group_tensors_by_device_and_dtyperG   r   rF   r   anyrv   r   r4   _foreach_negcompileris_compilingis_cpu_foreach_add_r6   _foreach_addr   _foreach_addcmul__foreach_sqrt_foreach_mul__foreach_mul_foreach_addcdiv_)r   rX   rY   rZ   r_   r`   r   r   r   r   r[   r   r   r\   grouped_tensorlistsdevice_params_device_grads_device_state_sums_device_state_steps_rw   device_paramsdevice_gradsdevice_state_sumsdevice_state_stepsdevice_has_sparse_gradr(   	minus_clrr   	numerators                                rA   ru   ru     s   " BCC!6KLL 6{a	BB#FF	
K0  &&(M? 		 	T&\>:DL-8 f/AB!$v,0CD!0 "
S 9
'39
 6
 ""!")! $!-'%#   -7HI --l;L ~~**,1CA1F1M1M"ELLU$C3  2A61##L-|T$11 -| 
 GY
>BRC1
4(1,889
	 
 	 1<UVW!!"34C%1i8$I**<CIy#>[M?z
s   Jc                "   | sy |
s|rt        d      |rt        d      t        |      }||j                  |ind }||j                  |ind }t        j                  | |||g      }|j                         D ]  \  \  }}\  \  }}}}}t        t        t           |      }t        t        t           |      }t        t        t           |      }t        t        t           |      }d\  }}|!|||vr|j                  |d      ||<   ||   }|!|||vr|j                  |d      ||<   ||   }t        j                  |d       t        j                  ||||||||	|||       |t        j                  ||gt        |      z          y )Nz5`fused` does not support sparse grad or complex paramz<adagrad with fused=True does not support differentiable=True)NNT)non_blockingr   )r   r   r   r   r   r_   r`   )r0   r   r&   r   r   itemsr   rF   r   tor4   r   _fused_adagrad__foreach_sub_rH   )r   rX   rY   rZ   r_   r`   r   r   r   r   r[   r   r   r\   grad_scale_dictfound_inf_dictgrouped_tensorsr&   rw   r   r   r   r   r   r   r   r   device_grad_scaledevice_found_infs                                rA   rt   rt     s   " +RSSJ
 	
 
BB ,6+A		J't  7@6Ki&&	2QUNBB	
K0O 
			 ( 	 	
	T&\>:DL-8 f/AB!$v,0CD.8++!o&A_,*4--T-*R' / 7 ^%?.)2f4)Pv&-f5.2%(&	
 '"%5$6=O9P$PM(rB   )NNNFNFF)typingr   r4   r   	optimizerr   r   r   r	   r
   r   r   r   r   r   r   r   r   __all__r   __doc__rF   rh   rJ   r   r}   rv   ru   rt   r"   rB   rA   <module>r      s        " i
 ci cN4		 	 
 		 		 5. p  $# " GLG<G VG f	G
 $;G G }G G D[G G G 	G  !G" #G$ 
%G& 'G( 
)GT?
A=LA=<A= VA= f	A=
 A= }A= 	A= A= A= 
A= A= A= A= A=  
!A=Hl?Ll?<l? Vl? f	l?
 l? }l? 	l? l? l? 
l? l? l? l? l?  
!l?^MLM<M VM f	M
 M }M 	M M M 
M M M M M  
!MrB   