
    i[F                        d dl Z d dlZd dlmZ d dlmZ d dlZd dlmZ d dl	m
Z
 d dlmZmZmZ g dZ e j                   e      Z	 dZ G d	 d
      Z G d de      Z e ej,                  d      ej.                        Zd Z G d d      Z G d d      Zde
dedee
   fdZdej>                  dededeej>                     fdZ d Z!	 	 d"de"edf   de#e$ef   dz  dede"edf   dz  de#e$ef   dz  de"ee"   ee#   f   fd Z%dee   fd!Z&y)#    N)Sequence)Anymap_aggregate)	BlockMask)tree_flattentree_maptree_unflatten)TensorChunkSpecsplit_args_kwargs_into_chunksmerge_chunksFc                       e Zd ZdZd Zy)_CustomReducera$  
    Custom reducer class that can be used to specify a custom operation that
    reduces losses of multiple microbatches into one value.

    Example:
    >>> # xdoctest: +SKIP
    >>> sum_reducer = _CustomReducer(
    >>>     torch.tensor(0.0),
    >>>     lambda a, b: a + b
    >>> )
    c                      || _         || _        y N)
init_value	reduce_fn)selfr   r   s      a/var/www/html/engine/venv/lib/python3.12/site-packages/torch/distributed/pipelining/microbatch.py__init__z_CustomReducer.__init__+   s    $"    N)__name__
__module____qualname____doc__r    r   r   r   r      s    
#r   r   c                       e Zd Zy)_LossReducerNr   r   r   r   r   r   r   r   0       r   r   g        c                   n    e Zd ZU dZd Zeed<   d Zd Ze	de
edf   fd       Ze	deeef   fd	       Zy
)r   z2
    Class used to specify chunking of inputs
    c                     || _         y r   	split_dim)r   r$   s     r   r   zTensorChunkSpec.__init__@   s	    "r   r$   c                 |    | j                   j                   d| j                   j                   d| j                   dS )N.())	__class__r   r   r$   r   s    r   __repr__zTensorChunkSpec.__repr__E   s9    ~~(()4>>+B+B*C1T^^DTTUV	
r   c                 "    d| j                    dS )NzTensorChunkSpec(r(   r#   r*   s    r   __str__zTensorChunkSpec.__str__J   s    !$..!133r   
chunk_dims.c                      t        | d       }|S )a  
        A helper for creating a tuple of `TensorChunkSpec` from a tuple of chunk
        dimensions (int's).
        Example:
            >>> # xdoctest: +SKIP
            >>> # There are three positional arguments to the model, and
            >>> # we are chunking them along dimension 0, 0 and 1, respectively
            >>> args_chunk_spec = TensorChunkSpec.from_tuple((0, 0, 1))
        c                     t        |       S r   r   dims    r   <lambda>z,TensorChunkSpec.from_tuple.<locals>.<lambda>\       , r   r   )r.   args_chunk_specs     r   
from_tuplezTensorChunkSpec.from_tupleM   s     (,
 r   c                      t        | d       }|S )a\  
        A helper for creating a dictionary of `TensorChunkSpec` from a
        dictionary of chunk dimensions (int's).
        Example:
            >>> # xdoctest: +SKIP
            >>> # Chunk dimension 0 for the "id" argument, 1 for the "mask" argument
            >>> kwargs_chunk_spec = TensorChunkSpec.from_dict({"id": 0, "mask": 1})
        c                     t        |       S r   r1   r2   s    r   r4   z+TensorChunkSpec.from_dict.<locals>.<lambda>n   r5   r   r   )r.   kwargs_chunk_specs     r   	from_dictzTensorChunkSpec.from_dict`   s     *,
 ! r   N)r   r   r   r   r   int__annotations__r+   r-   staticmethodtupler7   dictstrr;   r   r   r   r   r   ;   se    # N

4 #s(O $ !cN! !r   r   c                       e Zd Zy)
_ReplicateNr   r   r   r   rC   rC   t   r    r   rC   
block_mask
num_chunksreturnc                      j                   j                  d      dk(  r g|z  S  j                   j                  d      |k\  sJ d       d}t        j                   j                   ||      }t        j                   j                  ||      } j
                  !t        j                   j
                  ||      ndg|z  } j                  !t        j                   j                  ||      ndg|z  }g }d}t        |      D ]o  }	 fd}
|j                  t        j                  ||	   ||	   ||	   ||	    j                   |
|       j                               |||	   j                  d      z  }q |S )a	  Given a block mask, split the block mask along the batch dimension (dim0).

    Args:
        block_mask: Block mask to split
        num_chunks: Number of chunks to split the block mask into

    Returns:
        chunk_block_masks: List of chunked block masks
    r      z;Block mask has fewer batch size than the number of chunks. Nc                       fd}|S )Nc                 ^    t        j                  |       }j                  | |z   |||      S r   )torch	full_likemask_mod)bhq_idxkv_idxb_offsetrD   idxs        r   batch_offset_mask_modzI_split_block_mask.<locals>.create_mask_mod.<locals>.batch_offset_mask_mod   s.     ??1c2!**1x<E6JJr   r   )rS   rT   rD   s   ` r   create_mask_modz*_split_block_mask.<locals>.create_mask_mod   s    K )(r   )kv_num_blocks
kv_indicesfull_kv_num_blocksfull_kv_indices
BLOCK_SIZErM   seq_lengths)rV   sizerK   tensor_splitrW   rX   rY   rangeappendr   from_kv_blocksrZ   r[   )rD   rE   	batch_dimkv_num_blocks_chunkskv_indices_chunksfull_kv_num_blocks_chunksfull_kv_indices_chunkschunk_block_masksbatch_offset	chunk_idxrU   s   `          r   _split_block_maskri   x   s    $$Q'1,|j((##((+z9 E9 I --  *i **:+@+@*iX ((4 	:88*iPVj   %%1 	:55z9MVj   L:& @		) 	  $$29=,Y7#<Y#G 6y A%00(6&22
	
 	,Y7<<Q??)@* r   tensorspecc                 (   | j                  |j                        |k\  s$J d| j                  |j                         d       t        j                  | ||j                        }t        s|S g }d}|D ]  }t        j
                  |       }||j                  |j                        z   }t        ddd      g|j                  z  }	t        ||      |	|j                  <   |||	<   |j                  |       ||j                  |j                        z  } |S )zGiven a tensor, and a chunking spec, split the tensor.
    Args:

        tensor: Tensor to split
        spec: Chunking spec
        num_chunks: Number of chunks to split the tensor into

    Returns:
        chunk_tensors: List of chunked tensors
    zTensor size z is smaller than num_chunksr   N)	r\   r$   rK   r]   _debug_mask_minibatches
zeros_likeslicendimr_   )
rj   rk   rE   chunk_tensorsexpanded_chunkssplit_dim_idxchunk_tensornew_val	upper_idxslice_indicess
             r   _split_tensorrx      s     ;;t~~&*4 
v{{4>>233NO4 &&vz4>>JM"OM% 
;""6*!L$5$5dnn$EE	tT401GLL@(-mY(Gdnn%!-w'**4>>::
; r   c                    | st        |      D cg c]  }i  c}S t        |       t        |      k(  s;J dt        | j                                dt        |j                                       |J t	        | d       \  }}t	        |d       \  }}g }t        ||d      D ]+  \  }}	|	t        u st        |	t              r|j                  |       1t        |t        j                        r=t        |	t              sJ |j                  |j                  |	j                               t        |t              rt        |	t              sJ |	j                  dk(  sJ d	       |j                  j                  d      d
k(  r|j                  |       |j                  |j                  j                  d             t!        d|	 d| d       t#        g || }
t        |
      D cg c]  }g  }}t        ||d      D ]  \  }}	g }|	t        u st        |	t              r|g|
z  }nWt        |t        j                        rt%        ||	|
      }n/t        |t              rt'        ||
      }nt!        d|	 d| d      t        ||d      D ]  \  }}|j                  |         |D cg c]  }t)        ||       c}S c c}w c c}w c c}w )aW  
    Given a dictionary of args, and a dictionary of chunking specs, shard the
    args according to the chunking specs.

    Args:
        args_dict: Dictionary of args
        args_chunk_spec: Dictionary of chunking specs
        num_chunks: Number of chunks to shard the args into

    Returns:
        args_split: List of sharded args
    zargs_dict.keys() = z args_chunk_spec.keys() = c                 "    t        | t              S r   
isinstancer   xs    r   r4   z%_shard_dict_of_args.<locals>.<lambda>   s    Z9%= r   is_leafc                 "    t        | t              S r   r{   r}   s    r   r4   z%_shard_dict_of_args.<locals>.<lambda>   s    :a+C r   Tstrictr   z#BlockMask only supports split_dim=0rH   zUnsupported chunk spec: z and value: z combination.)r^   lenlistkeysr   ziprC   r|   r_   rK   Tensorr   r\   r$   r   rV   
ValueErrorminrx   ri   r
   )	args_dictr6   rE   _values	tree_specchunk_specssplit_sizesvrk   result_num_chunksflat_split_resultsv_splits_flat_split_result_v_splits                  r   _shard_dict_of_argsr      s   $ !*-.q..y>S11 
d9>>#345 6$$()=)=)?$@#A	C1 &&&$=FI "!CNK
 Kv{48 4 :D*!=z*5<<(dO444qvvdnn569%dO444>>Q&M(MM&##A&!+"":.""1??#7#7#:;*4&QC}M #( 5[5*5167H1I$JAR$J$Jv{48 04"$:D*!=s..H5<<($Q.?@H9%(,=>H*4&QC}M  -0-
 	0( %%h/	00( #5 	)95 u /N %K&s   	K-	K1Kargs.kwargschunksr6   r:   c                 j  	 |i }d }|t        || d       }|t        ||d       }t        t        t        |             t        t        |            |      }t	        |      }t        |||      }t	        |      |k  r<t	        |      }t        t        t        |             t        t        |            |      }t	        |      t	        |      k7  r#t        dt	        |       dt	        |             |D 		cg c](  	t        	fdt        t	        	            D              * }
}	|
|fS c c}	w )a  
    Given a sequence of args and kwargs, split them into a number of chunks
    according to  their respective chunking specs.

    Args:
        args: Tuple of args
        kwargs: Dict of kwargs
        chunks: Number of chunks to split the args and kwargs into
        args_chunk_spec: chunking specs for args, in same shape as args
        kwargs_chunk_spec: chunking specs for kwargs, in same shape as kwargs

    Returns:
        args_split: List of sharded args
        kwargs_split: List of sharded kwargs
    c                 v    t        | t        j                  t        z        rt	        t
              S t               S r   )r|   rK   r   r   r   DEFAULT_CHUNK_DIMrC   r   s    r   default_specz3split_args_kwargs_into_chunks.<locals>.default_specq  s)    a	12"#455<r   c                 "    t        | t              S r   r{   r   s    r   r4   z/split_args_kwargs_into_chunks.<locals>.<lambda>y  s    *Q	2J r   r   c                 "    t        | t              S r   r{   r   s    r   r4   z/split_args_kwargs_into_chunks.<locals>.<lambda>~  s    Jq)4L r   z;args and kwargs are split into different number of chunks: z, c              3   (   K   | ]	  }|     y wr   r   ).0i
chunk_argss     r   	<genexpr>z0split_args_kwargs_into_chunks.<locals>.<genexpr>  s     <jm<s   )r	   r   r@   	enumerater   RuntimeErrorr?   r^   )r   r   r   r6   r:   r   args_split_dictreal_num_chunkskwargs_splitr   
args_splits            ` r   r   r   4  sW   p ~  "$(J
  $&*L
 *Yt_Y'(O
 /*O&L <?* l+-4!?+,
 ?s<00I?#$Bs<'8&9;
 	
 * 	<U3z?%;<<J 
 |##s   =-D0c                    |t        |      \  }}n-t        | d         \  }}t        t              gt        |      z  }g }| D ]I  }t        |      \  }}t        |      t        |      k7  rt	        d| d|       |j                  |       K g }	t        |      D ]  \  }
}t        |t              rht        t        |            D cg c]
  }||   |
    }}t        r|d   j                  }|dd D ]  }|j                  |k(  rJ  t        j                  t        j                  |ddit        |      |j                        }g }d}t        |      t        |      k(  sJ t        ||d	
      D ]o  \  }}||j!                  |j                        z   }t#        ddd      g|j$                  z  }t#        ||      ||j                  <   ||   }|j                  |       |}q n|}|	j                  t        j&                  ||j                               t        |t(              rP|j*                  }t        t        |            D ]  }|j-                  |||   |
         } |	j                  |       |d   |
   }t        dt        |            D ]  }||   |
   |k(  rJ  |	j                  |       " t/        |	|      S c c}w )z
    Given a list of chunks, merge them into a single value according to
    the chunk spec.

    Args:
        chunks: list of chunks
        chunk_spec: Chunking spec for the chunks

    Returns:
        value: Merged value
    Nr   zChunk z did not match chunk spec rH   devicemeta)sectionsr3   Tr   r2   )r   r   r   r   r   r_   r   r|   r^   rm   shaperK   r]   emptyr$   r   r\   ro   rp   catr   r   r   r
   )r   
chunk_specspec_flattenedflatten_specchunk0_flatchunks_flattenedchunkchunk_flattenedr   args_flattenedarg_idxargrh   partial_valuesoverall_shapevalmeta_chunksvalues_to_catchunk_start_idxpartial_value
meta_chunkchunk_end_idxrw   slicedreduced_valvalues                             r   r   r     s   Z '3J'?$ %1$;!\)*;<=K@PP  1)%03~#66veW,FzlSTT01 N!.1 2)c?+ "'s+;'<!= !+G4N 
 ' .q 1 7 7)!"- 6C995556#00KK>v> 0 !#"#>*c+.>>>>14"K2 
4-M: %4joocmm6T$TM%*4t%<$=@R@R$RM38-3XM#--0*=9F!((0&3O
4 !/!!%))Ms}}"MN^,..K"3'7#89 	!mm!1)!<W!E
 !!+.$Q'0E"1c*:&;< E	'	27;uDDDE!!%(e2)j .,77gs   
K)NN)'loggingoperatorcollections.abcr   typingr   rK   torch.fx.noder   !torch.nn.attention.flex_attentionr   torch.utils._pytreer   r	   r
   __all__	getLoggerr   loggerrm   r   r   rj   addsum_reducerr   r   rC   r<   r   ri   r   rx   r   r?   r@   rA   r   r   r   r   r   <module>r      s     $   ' 7 F F 
		8	$
   # #$	> 	 <5<<,hll;  5! 5!r	 	=== 
)_=@&LL&
& & ell	&RPn ;?;?p$
S/p$cNT!p$ p$ ?C/047	p$
 C01D8p$ 4;T
"#p$fy8Iy8r   