
    i#                     V   d dl Z d dlmZ d dlmZmZ d dlZd dlmZ d dl	m
Z
 d dlmZ ddlmZmZ dZ G d	 d
e
      ZdededefdZ G d d      Z ee      	 ddej&                  deej                  j&                     dz  dej&                  fd       Zdej&                  defdZy)    N)Iterable)AnyNoReturn)_State)DistributedDataParallel   )_get_registrycontract c                       e Zd ZU ej                  ed<   d fdZefdej                  de
ej                     de
ej                     deddf
d	Zdd
Zdej                  de
ej                     ddfdZddZddZdej                  deedf   deeef   defdZdej                  deej.                     dej.                  dej.                  fdZ xZS )_ReplicateState_ddp_weakrefreturnNc                     t         |           t        j                         | _        d| _        t        j                         | _        | j                  | _        g | _        d| _	        d | _
        i | _        g | _        y )NF)super__init__nnParameterListmodulehas_initialized_param_list_orig_module_param_names_no_sync
_init_args_init_kwargs_comm_hook_args)self	__class__s    a/var/www/html/engine/venv/lib/python3.12/site-packages/torch/distributed/_composable/replicate.pyr   z_ReplicateState.__init__   sk    !#!1!1!3%*-/-=-=-? !KK')#26,.*,    r   ignored_modulesignored_paramsprefixc                 b   t        |      ry ||v ry |t        k7  r| dnt        }|j                  d      D ]D  \  }}||vs| j                  j	                  |       | j
                  j	                  | |        F |j                         D ]  \  }}	| j                  |	||| |          y )N.F)recurse)r$   )_is_fully_sharded_ROOT_MODULE_PREFIXnamed_parametersr   appendr   named_children_collect_params)
r   r   r"   r#   r$   recurse_prefixnpnamechild_modules
             r    r-   z_ReplicateState._collect_params"   s     V$ _$ #&99vhaL?R 	 ++E+: 	ADAq&  ''*!!((N+;A3)?@	A
 #)"7"7"9 	D,  ()$0	 ! 	r!   c                 P     t        j                  d       fd       } |        y )NT)	recursivec                       j                   J   j                   j                   i  j                    j                          d _         i  _        y )N )r   initr   register_comm_hook)r   s   r    
_lazy_initz-_ReplicateState.lazy_init.<locals>._lazy_initC   sK    ??...DIIt<$*;*;<##% DO "Dr!   )torch_disable_dynamo)r   r9   s   ` r    	lazy_initz_ReplicateState.lazy_initB   s'    				.	# 
/	# 	r!   c                    | j                   ry d| _         || _        |D ch c]  }|j                         D ]  }|  }}}|j                         D ]-  }t	        |      s|j                  |j                                / ddlm}  |||       | j                  |||       d|v rV|d   ;|d   }	t        |	t        j                        r|	j                  dk(  rd |d<   n|	g|d<   nd |d<   |j                  d       t        | j                  fi || _        t#        j$                  | j                         t&        j)                  | j                        _        y c c}}w )NTr   )_localize_dtensor)r#   	device_idcpu
device_ids)r   r   
parametersmodulesr(   update%torch.distributed.tensor.parallel.ddpr>   r-   
isinstancer:   devicetypepopr   r   _ddpweakrefref	replicatestater   )
r   r   r"   kwargsmr0   r#   	submoduler>   r?   s
             r    r7   z_ReplicateState.initM   s@    #%4MallnM!M!MM) 	>I +%%i&:&:&<=	> 	L&@V_nE& 
 k".";/	i69>>U;R+/F<( -6;F<('+|$JJ{#+D,<,<GG	4;KK		4J	$1; Ns   Ec                     | j                   D ]!  \  }} | j                  j                  |i | # | j                   j                          y N)r   rJ   r8   clear)r   	comm_argscomm_kwargss      r    r8   z"_ReplicateState.register_comm_hookw   sI    &*&:&: 	D"I{(DII(()C{C	D""$r!   c                      || _         || _        y rS   )r   r   r   argsrO   s      r    record_init_argsz _ReplicateState.record_init_args|   s    "r!   rY   .rO   c                     | j                   s| j                  r| j                          | j                   | j                  _         | j                  j                  |i |S rS   )r   r   r<   r   rJ   require_backward_grad_sync_pre_forward)r   r   rY   rO   s       r    forward_pre_hookz _ReplicateState.forward_pre_hook   sL     ??d//NN37==/@		,%tyy%%t6v66r!   inputoutputc                 8    | j                   j                  |      S rS   )rJ   _post_forward)r   r   r_   r`   s       r    forward_post_hookz!_ReplicateState.forward_post_hook   s     yy&&v..r!   r   N)__name__
__module____qualname__rK   rL   __annotations__r   r)   r   Moduleset	Parameterstrr-   r<   r7   r8   rZ   tupler   dictr^   r:   Tensorrc   __classcell__)r   s   @r    r   r      s   ++-& *		 RYY BLL)	
  
@	(K		(K RYY(K
 
(KT%
#7ii7',S#X7@DS#X7	7/		/ U\\"/ 	/
 
/r!   r   rY   rO   r   c                      t        d      )NzGDDP does not support deepcopy. Please use state dict for serialization.)AssertionError)rY   rO   s     r    unimplemented_deepcopyrs      s    
Q r!   c                   *    e Zd Zd ZdeddfdZddZy)DDPc                 L    | j                   d   } |j                  |g|i |S )z
        Override ``__new__`` to remove the DDP class and directly construct
        the original class for cases like indexing into a container module.
           )__mro____new__)clsrY   rO   orig_clss       r    ry   zDDP.__new__   s.     ;;q>x:4:6::r!   requires_gradient_syncr   Nc                 :    | t         j                  |       _        y)a  
        Sets if the module should sync gradients. This can be used to implement
        gradient accumulation without communication.

        Args:
            requires_gradient_sync (bool): Whether to reduce gradients for the
                module's parameters.
        N)rM   rN   r   )r   r|   s     r    set_requires_gradient_synczDDP.set_requires_gradient_sync   s     .D)C	&r!   c                 d    t         j                  |       j                  j                  ||f       y rS   )rM   rN   r   r+   rX   s      r    r8   zDDP.register_comm_hook   s#    --44dF^Dr!   rd   )re   rf   rg   ry   boolr~   r8   r6   r!   r    ru   ru      s#    ;	D 	D$ 	DEr!   ru   )	state_clsr   r"   c                    t         j                  j                  d       d|v r=t        |d   t        t         j
                  f      st        dt        |d                t        |       rt        d      |i }nt        |      }t        j                  |       }| j                  |j                  d       |j                  d      }|?|j                         }||k7  r*dd	lm}m} | j                  |       | j'                  |       | j'                  |j(                          |j*                  | |fi | | j,                  }d
t.        i}	t        d|j0                   t2        |f|	      }
|
| _        | S )zReplicates a module

    Args:
        module (torch.nn.Module): module to replicate

    Example::
        >>> # xdoctest: +REQUIRES(module:torch._C._distributed_c10d)
        >>> module = nn.Linear(3, 3)
        >>> replicate(module)
    ztorch.distributed.replicater?   z6Expected device_id to be int or torch.device, but got zGCannot apply `replicate()` on a Module already managed by `fully_shard`T)with_kwargsdevice_meshr   )r>   _reconstruct_dtensor__deepcopy__ru   )r:   _C_log_api_usage_oncerF   intrG   RuntimeErrorrH   r(   rj   rM   rN   register_forward_pre_hookr^   get_get_root_meshrE   r>   r   register_forward_hookrc   rZ   r   rs   re   ru   )r   r"   rO   rN   r   	root_meshr>   r   rz   dctnew_clss              r    rM   rM      sv     
HH  !>? f&-U\\/BC{ 3457 
  U
 	
 o.OOF#E
$$U%;%;$N**]+K..0	 #
 ,,-AB(():;
  !8!89E6?=f= 

C1
2CS'#sS9GFMr!   c                 &    t        |       }|yd|v S )z+Check if module is marked with fully_shard.Ffully_shard)r	   )r   registrys     r    r(   r(      s     V$HH$$r!   rS   )rK   collections.abcr   typingr   r   r:   torch.nnr   #torch.distributed._composable_stater   torch.nn.parallelr   r
   r	   r)   r   rs   ru   ri   rM   r   r(   r6   r!   r    <module>r      s     $     6 5 -  }/f }/@#   E E4 
O$ 9=CIICehhoo.5C YY	C %CL%bii %D %r!   