
    iR                         U d dl Z d dlmZ d dlZd dlmZ d dlmZ d dlm	Z	  G d de      Z
 G d d	e      Z G d
 de      Z G d de      Zdaeej                   dz     dz  ed<   dej$                  fdZy)    N)chain)_get_device_index)Function)commc                   ,    e Zd Zed        Zed        Zy)	Broadcastc                    t        d |D              sJ d       |D cg c]  }t        |d       }}|| _        t        |      dk(  ryt        |      | _        |d   j                         | _        t        j                  || j                        }g }t        | j                  dd        D ]"  \  }|r	|j                  fd|D               $  | j                  |  t        t        j                  |            S c c}w )Nc              3   N   K   | ]  }|j                   j                  d k7    ywcpuNdevicetype.0is     V/var/www/html/engine/venv/lib/python3.12/site-packages/torch/nn/parallel/_functions.py	<genexpr>z$Broadcast.forward.<locals>.<genexpr>        :a188==E):   #%z2Broadcast function not implemented for CPU tensorsTr       c              3   (   K   | ]	  }|     y wNr   )r   outputidxs     r   r   z$Broadcast.forward.<locals>.<genexpr>   s     *M66#;*Ms   )allr   target_gpuslen
num_inputs
get_deviceinput_devicer   broadcast_coalesced	enumerateneeds_input_gradextendmark_non_differentiabletupler   from_iterable)ctxr   inputsxoutputsnon_differentiablesinput_requires_gradr   s          @r   forwardzBroadcast.forward   s    :6:: 	
@	
: <GGa(D1GG%v;!V!!9//1**63??C (1#2F2Fqr2J(K 	N$C$&#***MW*MM	N 	$##%89U((122 Hs   C>c                 ^    dt        j                  | j                  | j                  g| z   S )Nr   )ReduceAddCoalescedapplyr"   r    r*   grad_outputss     r   backwardzBroadcast.backward   s4    +11cnn
/;
 
 	
    N__name__
__module____qualname__staticmethodr0   r6   r   r7   r   r   r   
   s(    3 3$ 
 
r7   r   c                   ,    e Zd Zed        Zed        Zy)r2   c                    t        dt        |      |      D cg c]  }||   j                          c}| _        t        dt        |      |      D cg c]
  }||||z     }}t	        j
                  ||      S c c}w c c}w )Nr   )ranger   r!   r   r   reduce_add_coalesced)r*   destinationr    gradsr   grads_s         r   r0   zReduceAddCoalesced.forward&   s     ,1CJ
+K
&'E!H!
 6;1c%j*5UV%A
N+VV((==
 Ws   A:A?c                 H    dt        j                  | j                  g| z   S )NNN)r   r3   r   r4   s     r   r6   zReduceAddCoalesced.backward/   s(    
 OOCOO;l;< 	<r7   Nr8   r   r7   r   r2   r2   %   s(    > > < <r7   r2   c                   ,    e Zd Zed        Zed        Zy)Gatherc                     t        d |D              sJ d       |dk(  rd _        nt        |d      }| _        | _        t	        d |D               _        t        d |D              r6|dk(  r1t	        d |D              }t        j                  d	d
       d _        nd _        t	         fd|D               _	        t        j                  | j                   j                        S )Nc              3   N   K   | ]  }|j                   j                  d k7    ywr   r   r   s     r   r   z!Gather.forward.<locals>.<genexpr>:   r   r   z/Gather function not implemented for CPU tensorsr   Tc              3   <   K   | ]  }|j                           y wr   )r!   r   s     r   r   z!Gather.forward.<locals>.<genexpr>C   s     >!q||~>s   c              3   B   K   | ]  }|j                         d k(    ywr   N)dimr   ts     r   r   z!Gather.forward.<locals>.<genexpr>D   s     ,quuw!|,s   r   c              3   >   K   | ]  }|j                  d         yw)r   N)viewrN   s     r   r   z!Gather.forward.<locals>.<genexpr>E   s     5166!95s   zvWas asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.   )
stacklevelFc              3   T   K   | ]  }|j                  j                         ! y wr   )sizerM   )r   r   r*   s     r   r   z!Gather.forward.<locals>.<genexpr>O   s     @Asww@s   %()r   target_devicer   rM   r(   
input_gpuswarningswarnunsqueezed_scalarinput_sizesr   gather)r*   rV   rM   r+   s   `   r   r0   zGather.forward8   s    :6:: 	
=	
: E! %C-mTBM -C>v>>,V,,5f55FMM' 	 %)C!$)C!@@@{{6377C,=,=>>r7   c                     t         j                  | j                  | j                  | j                  |      }| j
                  rt        d |D              }d|z   S )Nc              3   &   K   | ]	  }|d      ywrL   r   )r   gs     r   r   z"Gather.backward.<locals>.<genexpr>X   s     #BQAaD#Bs   rE   )Scatterr3   rW   r[   rM   rZ   r(   )r*   grad_outputscattered_gradss      r   r6   zGather.backwardR   sK    !--NNCOOSWWk
   ##B/#BBOo--r7   Nr8   r   r7   r   rG   rG   7   s(    ? ?2 . .r7   rG   c                   ,    e Zd Zed        Zed        Zy)r`   c                    |D cg c]  }t        |d       }}|| _        |j                  j                  dk7  r|j	                         nd| _        d }t        j                  j                         r:| j
                  dk(  r+|D cg c]   }t        t        j                  |            " }}t        j                  |||| j                  |      }|t        |      D ]s  \  }	}
t        j                  j                  ||	         5  t        j                  j                         }|j                  ||	          |
j!                  |       d d d        u |S c c}w c c}w # 1 sw Y   xY w)NTr   )r   rM   r   r   r!   r"   torchacceleratoris_available_get_streamr   scatterr$   device_indexcurrent_streamwait_streamrecord_stream)r*   r   chunk_sizesrM   inputr,   streamsr   r-   r   r   main_streams               r   r0   zScatter.forward]   s?   ;FGa(D1GG161B1Be1K5++-QS))+0@0@B0FGRSV{5<<#78SGS,,uk;Q&w/ 6	6&&33KNC 6"'"3"3"B"B"DK++GAJ7((56 66
  H T
6 6s   E%EAEE&	c                 `    d d d t        j                  | j                  | j                  g| fS r   )rG   r3   r"   rM   )r*   ra   s     r   r6   zScatter.backwardp   s+    T4c.>.>!V+!VVVr7   Nr8   r   r7   r   r`   r`   \   s*     $ W Wr7   r`   _streamsr   c                    | j                   dk(  st        j                  j                         syt        j                  j	                         j                   | j                   k(  sJ t
        "dgt        j                  j                         z  at
        | j                     0t        j                  | j                        t
        | j                  <   t
        | j                     S )zBGet a background stream for copying between CPU and target device.r   N)	r   rf   rg   rh   current_acceleratorrt   device_countindexStream)r   s    r   ri   ri   y   s     {{e5#4#4#A#A#C002776;;FFF6E--::<<%!&fll!;FLL!!r7   )rX   	itertoolsr   rf   torch._utilsr   torch.autogradr   torch.nn.parallelr   r   r2   rG   r`   rt   listry   __annotations__r   ri   r   r7   r   <module>r      sy       * # "
 
6< <$".X ".JWh W4 .2$u||d"
#d
* 1
" 
"r7   