
    ia                        d Z ddlZddlZddlmZ ddlmZmZ ddlm	Z	 ddl
Z
ddl
mZ g dZ ed      Z e	d	      Zed
   Zed   Z	 d2dededede
j$                  dz  def
dZ	 d2dededede
j$                  dz  def
dZ	 d2dededededede
j$                  dz  defdZdededefdZdedefdZ	 d2dedeez  dz  defdZ	 	 	 d3dededede
j$                  dz  def
dZ	 	 	 d3dededede
j$                  dz  def
dZ	 	 	 	 	 d4dededededede
j$                  dz  defdZdededefdZdedefd Zdedefd!Zdedefd"Z d5ded#edefd$Z!dede"eef   fd%Z#	 	 d6ded&ede
j$                  dz  defd'Z$	 	 d6ded&ede
j$                  dz  defd(Z%ded)edefd*Z&	 	 	 	 d7deded)edede
j$                  dz  defd+Z'	 	 	 	 d7deded)edede
j$                  dz  defd,Z(	 	 d8ded&ede
j$                  dz  defd-Z)	 	 d9ded.edede
j$                  dz  def
d/Z*d0eeef   deeef   fd1Z+ e+e      Z, e+e      Z- e+e      Z. e+e       Z/ e+e!      Z0 e+e$      Z1 e+e%      Z2 e+e'      Z3 e+e(      Z4 e+e)      Z5 e+e*      Z6y):zHThis file contains utilities for initializing neural network parameters.    N)Callable)LiteralTypeVar)	ParamSpec)Tensor)calculate_gainuniform_normal_trunc_normal_	constant_ones_zeros_eye_dirac_xavier_uniform_xavier_normal_kaiming_uniform_kaiming_normal_orthogonal_sparse_uniformnormalconstanteyediracxavier_uniformxavier_normalkaiming_uniformkaiming_normal
orthogonalsparse_R_P)linearconv1dconv2dconv3dconv_transpose1dconv_transpose2dconv_transpose3dsigmoidtanhrelu
leaky_reluselu)fan_infan_outtensorab	generatorreturnc                 ~    t        j                         5  | j                  |||      cd d d        S # 1 sw Y   y xY wNr5   )torchno_gradr	   r2   r3   r4   r5   s       G/var/www/html/engine/venv/lib/python3.12/site-packages/torch/nn/init.py_no_grad_uniform_r>   E   s4     
 :q!y9: : :   3<meanstdc                 ~    t        j                         5  | j                  |||      cd d d        S # 1 sw Y   y xY wr8   )r:   r;   r
   r2   r@   rA   r5   s       r=   _no_grad_normal_rD   L   s4     
 >~~dC9~=> > >r?   c                    dt         dt         fd}||d|z  z
  k  s||d|z  z   kD  rt        j                  dd       t        j                         5   |||z
  |z        } |||z
  |z        }| j                  d|z  dz
  d|z  dz
  |       | j                          | j                  |t        j                  d	      z         | j                  |       | j                  ||
       | cd d d        S # 1 sw Y   y xY w)Nxr6   c                 d    dt        j                  | t        j                  d      z        z   dz  S )N      ?       @)matherfsqrt)rF   s    r=   norm_cdfz(_no_grad_trunc_normal_.<locals>.norm_cdf_   s(    dhhq499S>122c99       zjmean is more than 2 std from [a, b] in nn.init.trunc_normal_. The distribution of values may be incorrect.
stacklevel   r9   rI   )minmax)floatwarningswarnr:   r;   r	   erfinv_mul_rJ   rL   add_clamp_)	r2   r@   rA   r3   r4   r5   rM   lus	            r=   _no_grad_trunc_normal_r^   V   s    :E :e : 	q1s7{q1s7{ 2;	
 
  a$h#%&a$h#%& 	A	1q519	B 	 	C$))C.()D 	!#+  s   BC55C>valc                 x    t        j                         5  | j                  |      cd d d        S # 1 sw Y   y xY wN)r:   r;   fill_r2   r_   s     r=   _no_grad_fill_rd      s,    	 !||C ! ! !s   09c                 v    t        j                         5  | j                         cd d d        S # 1 sw Y   y xY wra   )r:   r;   zero_r2   s    r=   _no_grad_zero_rh      s)    	 ||~  s   /8nonlinearityparamc                 \   g d}| |v s| dk(  ry| dk(  ry| dk(  rt        j                  d      S | dk(  re|d	}nBt        |t              st        |t              st        |t
              r|}nt        d
| d      t        j                  dd|dz  z   z        S | dk(  r	 yt        d|        )a  Return the recommended gain value for the given nonlinearity function.

    The values are as follows:

    ================= ====================================================
    nonlinearity      gain
    ================= ====================================================
    Linear / Identity :math:`1`
    Conv{1,2,3}D      :math:`1`
    Sigmoid           :math:`1`
    Tanh              :math:`\frac{5}{3}`
    ReLU              :math:`\sqrt{2}`
    Leaky Relu        :math:`\sqrt{\frac{2}{1 + \text{negative\_slope}^2}}`
    SELU              :math:`\frac{3}{4}`
    ================= ====================================================

    .. warning::
        In order to implement `Self-Normalizing Neural Networks`_ ,
        you should use ``nonlinearity='linear'`` instead of ``nonlinearity='selu'``.
        This gives the initial weights a variance of ``1 / N``,
        which is necessary to induce a stable fixed point in the forward pass.
        In contrast, the default gain for ``SELU`` sacrifices the normalization
        effect for more stable gradient flow in rectangular layers.

    Args:
        nonlinearity: the non-linear function (`nn.functional` name)
        param: optional parameter for the non-linear function

    Examples:
        >>> gain = nn.init.calculate_gain(
        ...     "leaky_relu", 0.2
        ... )  # leaky_relu with negative_slope=0.2

    .. _Self-Normalizing Neural Networks: https://papers.nips.cc/paper/2017/hash/5d44ee6f2c3f71b73125876103c8f6c4-Abstract.html
    )r$   r%   r&   r'   r(   r)   r*   r+   rR   r,   g?r-   rI   r.   {Gz?znegative_slope z not a valid numberrO   r/   g      ?zUnsupported nonlinearity )rJ   rL   
isinstanceboolintrU   
ValueError)ri   rj   
linear_fnsnegative_slopes       r=   r   r      s    LJ z!\Y%>				yy~		%=!N5$'5#&%' #Nug5HIJJyyNA$5 5677			
 4\NCDDrN   c                     t         j                  j                  |       r*t         j                  j                  t        | f| |||      S t        | |||      S )a  Fill the input Tensor with values drawn from the uniform distribution.

    :math:`\mathcal{U}(a, b)`.

    Args:
        tensor: an n-dimensional `torch.Tensor`
        a: the lower bound of the uniform distribution
        b: the upper bound of the uniform distribution
        generator: the torch Generator to sample from (default: None)

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.uniform_(w)
    r<   )r:   	overrideshas_torch_function_variadichandle_torch_functionr	   r>   r<   s       r=   r	   r	      sT    ( 226:44vi!qI 5 
 	
 VQ955rN   c                     t         j                  j                  |       r*t         j                  j                  t        | f| |||      S t        | |||      S )a  Fill the input Tensor with values drawn from the normal distribution.

    :math:`\mathcal{N}(\text{mean}, \text{std}^2)`.

    Args:
        tensor: an n-dimensional `torch.Tensor`
        mean: the mean of the normal distribution
        std: the standard deviation of the normal distribution
        generator: the torch Generator to sample from (default: None)

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.normal_(w)
    rC   )r:   rt   ru   rv   r
   rD   rC   s       r=   r
   r
      sT    ( 226:44fYvDcY 5 
 	
 FD#y99rN   c                 $    t        | |||||      S )a  Fill the input Tensor with values drawn from a truncated normal distribution.

    The values are effectively drawn from the
    normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`
    with values outside :math:`[a, b]` redrawn until they are within
    the bounds. The method used for generating the random values works
    best when :math:`a \leq \text{mean} \leq b`.

    Args:
        tensor: an n-dimensional `torch.Tensor`
        mean: the mean of the normal distribution
        std: the standard deviation of the normal distribution
        a: the minimum cutoff value
        b: the maximum cutoff value
        generator: the torch Generator to sample from (default: None)

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.trunc_normal_(w)
    r9   )r^   )r2   r@   rA   r3   r4   r5   s         r=   r   r     s    8 "&$QYOOrN   c                     t         j                  j                  |       r(t         j                  j                  t        | f| |      S t        | |      S )zFill the input Tensor with the value :math:`\text{val}`.

    Args:
        tensor: an n-dimensional `torch.Tensor`
        val: the value to fill the tensor with

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.constant_(w, 0.3)
    rc   )r:   rt   ru   rv   r   rd   rc   s     r=   r   r   +  sL     226:44yS 5 
 	
 &#&&rN   c                     t        | d      S )zFill the input Tensor with the scalar value `1`.

    Args:
        tensor: an n-dimensional `torch.Tensor`

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.ones_(w)
    rH   )rd   rg   s    r=   r   r   =  s     &#&&rN   c                     t        |       S )zFill the input Tensor with the scalar value `0`.

    Args:
        tensor: an n-dimensional `torch.Tensor`

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.zeros_(w)
    )rh   rg   s    r=   r   r   J  s     &!!rN   c                     | j                         dk7  rt        d      t        j                         5  t        j                  | j
                  | | j                  d ddd       | S # 1 sw Y   | S xY w)a=  Fill the 2-dimensional input `Tensor` with the identity matrix.

    Preserves the identity of the inputs in `Linear` layers, where as
    many inputs are preserved as possible.

    Args:
        tensor: a 2-dimensional `torch.Tensor`

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.eye_(w)
    rO   ,Only tensors with 2 dimensions are supported)outrequires_gradN)
ndimensionrp   r:   r;   r   shaper   rg   s    r=   r   r   W  sa     aGHH	 Q		6<<V6;O;OPQMQMs   +A((A2groupsc                    | j                         }|dvrt        d      | j                         }|d   |z  dk7  rt        d      |d   |z  }t        ||d         }t	        j
                         5  | j                          t        |      D ]  }t        |      D ]  }|dk(  r!d| ||z  |z   || j                  d      dz  f<   )|dk(  r4d| ||z  |z   || j                  d      dz  | j                  d      dz  f<   bd| ||z  |z   || j                  d      dz  | j                  d      dz  | j                  d      dz  f<     	 d	d	d	       | S # 1 sw Y   | S xY w)
aF  Fill the {3, 4, 5}-dimensional input `Tensor` with the Dirac delta function.

    Preserves the identity of the inputs in `Convolutional`
    layers, where as many input channels are preserved as possible. In case
    of groups>1, each group of channels preserves identity

    Args:
        tensor: a {3, 4, 5}-dimensional `torch.Tensor`
        groups (int, optional): number of groups in the conv layer (default: 1)
    Examples:
        >>> w = torch.empty(3, 16, 5, 5)
        >>> nn.init.dirac_(w)
        >>> w = torch.empty(3, 24, 5, 5)
        >>> nn.init.dirac_(w, 3)
    )         z5Only tensors with 3, 4, or 5 dimensions are supportedr   z!dim 0 must be divisible by groupsrR   r   rO   r   N)r   rp   sizerS   r:   r;   rf   range)r2   r   
dimensionssizesout_chans_per_grpmin_dimgds           r=   r   r   l  s     ""$J"PQQKKMEQx&A<==aF*#U1X.G	 v 	A7^ ?PQF10014aQ19LLM1_  --1A!+A!+-  --1A!+A!+A!+	-	, M-, Ms   1CEEc                     | j                         }|dk  rt        d      | j                  d      }| j                  d      }d}| j                         dkD  r| j                  dd  D ]  }||z  }	 ||z  }||z  }||fS )NrO   zNFan in and fan out can not be computed for tensor with fewer than 2 dimensionsrR   r   )dimrp   r   r   )r2   r   num_input_fmapsnum_output_fmapsreceptive_field_sizesr0   r1   s           r=   _calculate_fan_in_and_fan_outr     s    JA~\
 	
 kk!nO{{1~zz|a ab! 	&A A% 	&33F!55G7?rN   gainc                     t        |       \  }}|t        j                  dt        ||z         z        z  }t        j                  d      |z  }t	        | | ||      S )a  Fill the input `Tensor` with values using a Xavier uniform distribution.

    The method is described in `Understanding the difficulty of training
    deep feedforward neural networks` - Glorot, X. & Bengio, Y. (2010).
    The resulting tensor will have values sampled from
    :math:`\mathcal{U}(-a, a)` where

    .. math::
        a = \text{gain} \times \sqrt{\frac{6}{\text{fan\_in} + \text{fan\_out}}}

    Also known as Glorot initialization.

    Args:
        tensor: an n-dimensional `torch.Tensor`
        gain: an optional scaling factor
        generator: the torch Generator to sample from (default: None)

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.xavier_uniform_(w, gain=nn.init.calculate_gain("relu"))
    rI         @)r   rJ   rL   rU   r>   )r2   r   r5   r0   r1   rA   r3   s          r=   r   r     sY    4 4F;OFG
3v'7!889
9C		#AVaRI66rN   c                     t        |       \  }}|t        j                  dt        ||z         z        z  }t	        | d||      S )a  Fill the input `Tensor` with values using a Xavier normal distribution.

    The method is described in `Understanding the difficulty of training deep feedforward
    neural networks` - Glorot, X. & Bengio, Y. (2010). The resulting tensor
    will have values sampled from :math:`\mathcal{N}(0, \text{std}^2)` where

    .. math::
        \text{std} = \text{gain} \times \sqrt{\frac{2}{\text{fan\_in} + \text{fan\_out}}}

    Also known as Glorot initialization.

    Args:
        tensor: an n-dimensional `torch.Tensor`
        gain: an optional scaling factor
        generator: the torch Generator to sample from (default: None)

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.xavier_normal_(w)
    rI           )r   rJ   rL   rU   rD   )r2   r   r5   r0   r1   rA   s         r=   r   r     sE    2 4F;OFG
3v'7!889
9CFCi88rN   modec                     |j                         }ddg}||vrt        d| d|       t        |       \  }}|dk(  r|S |S )Nr0   r1   zMode z" not supported, please use one of )lowerrp   r   )r2   r   valid_modesr0   r1   s        r=   _calculate_correct_fanr     sX    ::<DY'K;5&HVWW3F;OFGX%6272rN   c           	         t         j                  j                  |       r+t         j                  j                  t        | f| ||||      S d| j
                  v rt        j                  dd       | S t        | |      }t        ||      }|t        j                  |      z  }t        j                  d      |z  }t        j                         5  | j                  | ||      cddd       S # 1 sw Y   yxY w)	a  Fill the input `Tensor` with values using a Kaiming uniform distribution.

    The method is described in `Delving deep into rectifiers: Surpassing
    human-level performance on ImageNet classification` - He, K. et al. (2015).
    The resulting tensor will have values sampled from
    :math:`\mathcal{U}(-\text{bound}, \text{bound})` where

    .. math::
        \text{bound} = \text{gain} \times \sqrt{\frac{3}{\text{fan\_mode}}}

    Also known as He initialization.

    Args:
        tensor: an n-dimensional `torch.Tensor`
        a: the negative slope of the rectifier used after this layer (only
            used with ``'leaky_relu'``)
        mode: either ``'fan_in'`` (default) or ``'fan_out'``. Choosing ``'fan_in'``
            preserves the magnitude of the variance of the weights in the
            forward pass. Choosing ``'fan_out'`` preserves the magnitudes in the
            backwards pass.
        nonlinearity: the non-linear function (`nn.functional` name),
            recommended to use only with ``'relu'`` or ``'leaky_relu'`` (default).
        generator: the torch Generator to sample from (default: None)

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.kaiming_uniform_(w, mode="fan_in", nonlinearity="relu")

    Note:
        Be aware that ``fan_in`` and ``fan_out`` are calculated assuming
        that the weight matrix is used in a transposed manner,
        (i.e., ``x @ w.T`` in ``Linear`` layers, where ``w.shape = [fan_out, fan_in]``).
        This is important for correct initialization.
        If you plan to use ``x @ w``, where ``w.shape = [fan_in, fan_out]``,
        pass in a transposed weight matrix, i.e. ``nn.init.kaiming_uniform_(w.T, ...)``.
    )r2   r3   r   ri   r5   r   ,Initializing zero-element tensors is a no-oprO   rP   r   r9   N)r:   rt   ru   rv   r   r   rV   rW   r   r   rJ   rL   r;   r	   )	r2   r3   r   ri   r5   fanr   rA   bounds	            r=   r   r     s    V 226:44I% 5 
 	
 	FLLDQRS
 
.C,*D
3
CIIcNS E	 Cvu	BC C Cs   C--C6c                 ,   d| j                   v rt        j                  dd       | S t        | |      }t	        ||      }|t        j                  |      z  }t        j                         5  | j                  d||      cddd       S # 1 sw Y   yxY w)a  Fill the input `Tensor` with values using a Kaiming normal distribution.

    The method is described in `Delving deep into rectifiers: Surpassing
    human-level performance on ImageNet classification` - He, K. et al. (2015).
    The resulting tensor will have values sampled from
    :math:`\mathcal{N}(0, \text{std}^2)` where

    .. math::
        \text{std} = \frac{\text{gain}}{\sqrt{\text{fan\_mode}}}

    Also known as He initialization.

    Args:
        tensor: an n-dimensional `torch.Tensor`
        a: the negative slope of the rectifier used after this layer (only
            used with ``'leaky_relu'``)
        mode: either ``'fan_in'`` (default) or ``'fan_out'``. Choosing ``'fan_in'``
            preserves the magnitude of the variance of the weights in the
            forward pass. Choosing ``'fan_out'`` preserves the magnitudes in the
            backwards pass.
        nonlinearity: the non-linear function (`nn.functional` name),
            recommended to use only with ``'relu'`` or ``'leaky_relu'`` (default).
        generator: the torch Generator to sample from (default: None)

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.kaiming_normal_(w, mode="fan_out", nonlinearity="relu")

    Note:
        Be aware that ``fan_in`` and ``fan_out`` are calculated assuming
        that the weight matrix is used in a transposed manner,
        (i.e., ``x @ w.T`` in ``Linear`` layers, where ``w.shape = [fan_out, fan_in]``).
        This is important for correct initialization.
        If you plan to use ``x @ w``, where ``w.shape = [fan_in, fan_out]``,
        pass in a transposed weight matrix, i.e. ``nn.init.kaiming_normal_(w.T, ...)``.
    r   r   rO   rP   r9   N)
r   rV   rW   r   r   rJ   rL   r:   r;   r
   )r2   r3   r   ri   r5   r   r   rA   s           r=   r   r   B  s    V 	FLLDQRS
 
.C,*D
3
C	 ;~~a	~:; ; ;s   ,B

Bc                    | j                         dk  rt        d      | j                         dk(  r| S | j                  d      }| j                         |z  }| j	                  ||f      j                  dd|      }||k  r|j                          t        j                  j                  |      \  }}t        j                  |d      }|j                         }	||	z  }||k  r|j                          t        j                         5  | j                  |      j                  |       | j                  |       ddd       | S # 1 sw Y   | S xY w)a  Fill the input `Tensor` with a (semi) orthogonal matrix.

    Described in `Exact solutions to the nonlinear dynamics of learning in deep
    linear neural networks` - Saxe, A. et al. (2013). The input tensor must have
    at least 2 dimensions, and for tensors with more than 2 dimensions the
    trailing dimensions are flattened.

    Args:
        tensor: an n-dimensional `torch.Tensor`, where :math:`n \geq 2`
        gain: optional scaling factor
        generator: the torch Generator to sample from (default: None)

    Examples:
        >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_LAPACK)
        >>> w = torch.empty(3, 5)
        >>> nn.init.orthogonal_(w)
    rO   z4Only tensors with 2 or more dimensions are supportedr   rR   r9   N)r   rp   numelr   	new_emptyr
   t_r:   linalgqrdiagsignr;   view_ascopy_rY   )
r2   r   r5   rowscols	flattenedqrr   phs
             r=   r   r   w  s   , QOPP||~;;q>D<<>T!D  $.66q!y6QId{ <<??9%DAq

1aA	
BGAd{		 q"D M Ms   2EEsparsityc                 p   | j                         dk7  rt        d      | j                  \  }}t        j                  ||z        }t        j                         5  | j                  d||       t        |      D ]#  }t        j                  |      }|d| }	d| |	|f<   % 	 ddd       | S # 1 sw Y   | S xY w)a  Fill the 2D input `Tensor` as a sparse matrix.

    The non-zero elements will be drawn from the normal distribution
    :math:`\mathcal{N}(0, 0.01)`, as described in `Deep learning via
    Hessian-free optimization` - Martens, J. (2010).

    Args:
        tensor: an n-dimensional `torch.Tensor`
        sparsity: The fraction of elements in each column to be set to zero
        std: the standard deviation of the normal distribution used to generate
            the non-zero values
        generator: the torch Generator to sample from (default: None)

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.sparse_(w, sparsity=0.1)
    rO   r}   r   r9   N)
r   rp   r   rJ   ceilr:   r;   r
   r   randperm)
r2   r   rA   r5   r   r   	num_zeroscol_idxrow_indiceszero_indicess
             r=   r   r     s    . aGHHJD$		(T/*I	 .q#3T{ 	.G...K&z	2L,-F<()	.. M. Ms   AB++B5methc                       j                   d d dt        j                  dt        j                  dt        f fd}d d d d	|_        |_         |S )
Nargskwargsr6   c                  Z    t        j                  d d dt        d        | i |S )Nz	`nn.init.z)` is now deprecated in favor of `nn.init.z`.rO   rP   )rV   rW   FutureWarning)r   r   r   new_nameold_names     r=   deprecated_initz(_make_deprecate.<locals>.deprecated_init  s;    z!J8*TVW	

 T$V$$rN   z
    z_(...)

    .. warning::
        This method is now deprecated in favor of :func:`torch.nn.init.z"`.

    See :func:`~torch.nn.init.z` for details.)__name__r#   r   r   r"   __doc__)r   r   r   r   s   ` @@r=   _make_deprecater     sy    }}H}H%rww %")) % %$J H IQz R'j:O  (OrN   ra   )r   rH   N)r   rH   g       rI   N)rR   )rH   N)r   r0   r.   N)rR   N)rl   N)7r   rJ   rV   collections.abcr   typingr   r   typing_extensionsr   r:   r   __all__r"   r#   _NonlinearityType_FanModerU   	Generatorr>   rD   r^   rd   rh   ro   r   r	   r
   r   r   r   r   r   r   tupler   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!    rN   r=   <module>r      sb   N   $ # '  > T]t_  &' MQ:::!&:38??T3I:: )-	>>
> 
> %	>
 >  )-))
) 
) 	)
 ) %) )X!6 ! !& !
6 f  BFGE#GE,/%K$,>GE
GEX (,	666 6 %	6
 6: (,	::
: 
: %	:
 :: (,PP
P 
P 	P
 P %P P>'f '5 'V '$
'& 
'V 
'
"6 
"f 
" F *26 23 2v 2j& U38_ . (,77
7 %7 	7F (,99
9 %9 	9>36 3 3c 3 &2(,>C>C>C >C $	>C
 %>C >CF &2(,2;2;2; 2; $	2;
 %2; 2;n (,00
0 %0 	0l (,	### 
# %	#
 #N(2r6* xB/? . (
#		!9%d 1/!"23 1[)
		!rN   