
    i                    6   d dl mZ d dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
mZmZmZ d dlmZ d dlZd dlZd dlZd dlmZmZmZmZmZ d dlmZ erd d	lmZ d
ad
ad
ad(d)dZ d*dZ! G d d      Z"d+dZ#d Z$d Z%d Z&d,dZ'd-dZ(d.d/dZ)ejT                  d.d0d       Z+ejT                  d        Z, G d de"      Z- G d de      Z.d1dZ/d2dZ0d3dZ1d4dZ2d5d Z3e G d! d"             Z4e G d# d$             Z5ejl                  d6d%       Z7	 	 	 	 	 	 d7d&Z8d' Z9y)8    )annotationsN)deque)	dataclass)castoverloadProtocolTYPE_CHECKING)TypeIs)_get_dispatch_stack_at_len_torch_dispatch_stack_pop_torch_dispatch_stack_push_on_torch_dispatch_stackDispatchKey)/set_is_in_mode_without_ignore_compile_internals)SequenceFc                    | rt         S t        S N)_is_in_torch_dispatch_mode$_is_in_non_infra_torch_dispatch_mode)include_infra_modess    V/var/www/html/engine/venv/lib/python3.12/site-packages/torch/utils/_python_dispatch.pyis_in_torch_dispatch_moder   )   s      	# 2    c                     t         S r   )0_is_in_any_mode_without_ignore_compile_internals r   r   /is_in_any_mode_without_ignore_compile_internalsr   1   s    ;;r   c                  n    e Zd ZdZdZdddZddZddZd Zd Z	e
d	        Ze
dd
       Ze
dd       Zy)TorchDispatchModea  
    A ``TorchDispatchMode`` allows you to override the meaning of all
    ``__torch_dispatch__`` overrideable functions within a dynamic scope,
    without having to actually create a tensor subclass or manually
    monkey-patch functions in the PyTorch API.  Some common situations
    where you should use a mode:

        * You want to override the meaning of factory functions, or other
          functions that do not otherwise take a tensor as an argument
          (these cannot be overridden with tensor subclasses).

        * You want to override the behavior of all functions without needing
          to wrap your inputs in tensor subclasses; e.g., if you are just
          interested in logging intermediate computations.

        * You want to control the order of execution of various tensor
          subclasses explicitly, rather than implicitly via the return of
          ``NotImplemented``.

    Independent subclasses of :class:`TorchDispatchMode` are compositional:
    modes can be pushed onto a stack using ``with MyMode():``.
    When you call functions in the PyTorch API inside your
    ``__torch_dispatch__`` implementation, by default, they will forward on to
    the next mode on the mode stack.  If you want recursively call back into
    your current ``__torch_dispatch__`` implementation, either explicitly
    invoke ``self.__torch_dispatch__(...)``, or use the context manager
    ``self`` to make PyTorch
    API self-referential (beware of infinite loops, in this case!)
    FNc                    |>t        |t        j                  j                        st	        d      || j
                  d<   t               | _        t               | _        t               | _	        y )Nz,_dispatch_key must be a torch._C.DispatchKey_dispatch_key)

isinstancetorch_Cr   AssertionError__dict__r   old_dispatch_mode_flags!old_non_infra_dispatch_mode_flags8old_without_ignore_compile_internals_dispatch_mode_flags)selfr!   s     r   __init__zTorchDispatchMode.__init__Z   sW    $mUXX-A-AB$%STT-:DMM/*49G$>Cg.G 	Er   c                    t        | d      st               | _        t        | d      st               | _        t        | d      st               | _        y y )Nr'   r(   r)   )hasattrr   r'   r(   r)   r*   s    r   "_lazy_init_old_dispatch_mode_flagsz4TorchDispatchMode._lazy_init_old_dispatch_mode_flagsf   sN    t678=D(t@ABG'D2L

  I
r   c                    t         r   )NotImplementedErrorr*   functypesargskwargss        r   __torch_dispatch__z$TorchDispatchMode.__torch_dispatch__t   s    !!r   c                |   | j                          | j                  j                  t               da| j                  j                  t
               t
        xs | j                          a| j                  j                  t               t        xs | j                          at        t               t        |        | S )NT)r/   r'   appendr   r(   r   is_infra_moder)   r   ignore_compile_internalsr   
_push_moder.   s    r   	__enter__zTorchDispatchMode.__enter__w   s     	//1$$++,FG%)"..550	
 1L8J8J8L4L 	- 	EELL<	
 = 30022 	9 	8<	
 	4r   c                H   | j                   j                  dd       }|| j                   j                  dd       }| j                  j                         a| j
                  j                         a| j                  j                         at        t               t        |       y )Nr!   	_mode_key)r&   getr'   popr   r(   r   r)   r   r   	_pop_mode)r*   exc_typeexc_valexc_tbmb_dk_or_mode_keys        r   __exit__zTorchDispatchMode.__exit__   s     MM--otD$ !% 1 1+t D%)%A%A%E%E%G" 22668 	-
 IIMMO 	9 	8<	
 	#$r   c                D    t        j                  dd        | |i |}|S )NzP`Mode.push()` is no longer necessary and can be replaced with just `with Mode()`   )
stacklevel)warningswarn)clsr5   r6   instances       r   pushzTorchDispatchMode.push   s*    ^	
 ''r   c                     y)NFr   rM   s    r   r:   zTorchDispatchMode.is_infra_mode   s    r   c                &    | j                         ryy)a9  Ignore operators that are compiled via torch.compile.

        If ``True``, then this TorchDispatchMode ignores operators that
        are optimized by :func:`torch.compile`. Mechanically, this involves
        turning off the TorchDispatchMode throughout the whole compilation process,
        and turning it back on for the runtime of the compiled artifact(s).
        For example,

        @torch.compile
        def f(x):
            return x.sin().cos()

        with LoggingMode():
            f(x)

        The above example will not log anything if
        ``LoggingMode.ignore_compile_internals()`` is True.
        torch.compile will fuse sin() and cos() into a single operation
        and this TorchDispatchMode will not be passed sin and cos.

        If ``False`` (default), :func:`torch.compile` will respect
        the eager semantics of passing this TorchDispatchMode all
        operators that would have run during eager execution.
        The way this will usually happen is that :func:`torch.compile`
        will just fallback to eager-mode PyTorch.
        TF)r:   rQ   s    r   r;   z*TorchDispatchMode.ignore_compile_internals   s    8 r   r   returnNoner   NrT   bool)__name__
__module____qualname____doc__supports_higher_order_operatorsr+   r/   r7   r=   rG   classmethodrO   r:   r;   r   r   r   r   r   5   se    D ',#

">%*      r   r   c                 >    t               } | dkD  rt        | dz
        S y)zk
    Return the top user mode on the stack (the next one that would be
    executed) if there are any.
    r      N)r   r   )	stack_lens    r   _get_current_dispatch_moderb      s%    
 *+I1}%i!m44r   c                   | t         j                  j                  j                  t         j                  j                  j                  fvrXt        dt         j                  j                  j                   dt         j                  j                  j                   d|        ddlm}  ||       }t         j                  j                  |       }||t        d      ||S |S )Nzkey must be either FUNCTIONAL (z)                 or PROXY (z1) _TorchDispatchModeKey,                     got r   )_get_dispatch_mode_pre_dispatchzEAt most one of pre_dispatch_mode and post_dispatch_mode may be active)	r#   r$   _TorchDispatchModeKey
FUNCTIONALPROXYr%   
torch._opsrd   _get_dispatch_mode)keyrd   pre_dispatch_modepost_dispatch_modes       r   _detect_infra_moderm      s    
&&11&&,,  -ehh.L.L.W.W-X Y 8899??@ A
 	

 ;7<44S9$);)GS
 	
  !!r   c                    ddl m}m}  ||       }t        j                  j                  |       }|r|rt        d      |r
 ||       }|S |rt        j                  j                  |       S y )Nr   )rd   unset_mode_pre_dispatchzECan't have active infra mode on both pre and post dispatch mode stack)rh   rd   ro   r#   r$   ri   r%   _unset_dispatch_mode)rj   rd   ro   rk   rl   modes         r   _unset_infra_moderr      sl    S7<44S9/S
 	
 &s+xx,,S11 r   c              #    K   | t         j                  j                  j                  t         j                  j                  j                  fvrt        d      t        |       }	 | |t        |       y y # |t        |       w w xY ww)Nz<key must be either FUNCTIONAL or PROXY _TorchDispatchModeKey)r#   r$   re   rf   rg   r%   rr   r<   )rj   
mode_unsets     r   _disable_infra_moderu     s     
&&11&&,,  J
 	
 #3'J#!z" ":!z" "s   A!B
$A7 (B
7BB
c                 d    t               } t        |       D cg c]  }t        |       c}S c c}w )z
    Returns the current stack of dispatch modes, with the most recent
    (i.e., the one that will be processed first) at the end of the
    list (standard stack convention).
    )r   ranger   )ra   is     r    _get_current_dispatch_mode_stackry     s+     *+I/4Y/?@!"1%@@@s   -c                b   t        | d      r| j                  nd }|2|t        j                  j                  j
                  k7  rt        d      |t        |        y ddlm	}m
} t        j                  j                  |      } |       D ]  }|D ]  }|j                  |          ||        y )Nr!   z:mode._dispatch_key must be None or DispatchKey.PreDispatchr   )_set_mode_pre_dispatchget_cached_ops)r-   r!   r#   r$   r   PreDispatchr%   r   rh   r{   r|   _functionality_to_backend_keys_uncache_dispatch)rq   kr{   r|   ksoprj   s          r   r<   r<   )  s    %dO<$A}ehh22>>>H
 	
 	y%d+A 
	0	0	3B & 	&C  %	&& 4 r   c                    | t         j                  j                  j                  k(  rddlm}  |       S | $t        | t         j                  j                        rt        |       S y )Nr   )_pop_mode_from_pre_dispatch)	r#   r$   r   r}   rh   r   r"   re   r   )r   r   s     r   rB   rB   >  sO    EHH  ,,,:*,,yJq%(("@"@A(++ Br   c              #  b   K   t        |       }	 | t        |       y # t        |       w xY wwr   )rB   r<   )r   olds     r   _pop_mode_temporarilyr   H  s'     
A,C	3
3s   / /,/c               #    K   ddl m} m} ddlm} ddlm} ddlm}  |        }t        |      D cg c]	  } |        }}d}d}	d}
|D ]-  }t        ||      rd}t        ||      rd}	t        ||      s,d}
/ t               }t        |      D cg c]  }t                }}|D ]N  }t        ||      r|	rt        d      t        ||      r|rt        d	      t        ||      sB|
sEt        d
       	 ||z    t        |      D ]  }t        |        t        |      D ]  }t        |        y c c}w c c}w # t        |      D ]  }t        |        t        |      D ]  }t        |        w xY ww)Nr   )&_len_torch_dispatch_stack_pre_dispatchr   )FunctionalTensorMode)SchemaCheckMode)ProxyTorchDispatchModeFTzFCan't have FunctionalMode available both in PreDispatch and Python KeyzNCan't have ProxyTorchDispatchMode available both in PreDispatch and Python KeyzGCan't have SchemaCheckMode available both in PreDispatch and Python Key)rh   r   r   #torch._subclasses.functional_tensorr   #torch._subclasses.schema_check_moder   "torch.fx.experimental.proxy_tensorr   rw   r"   r   rB   r%   reversedr<   )r   r   r   r   r   mode_len_pre_dispatch_old_pre_dispatch_modeshas_proxy_mode_in_pre_dispatch#has_functional_mode_in_pre_dispatch%has_schema_check_mode_in_pre_dispatchrx   mode_len	old_modesr   rq   s                   r   _disable_current_modesr   Q  s     ICIBD/45J/K*+#%  &+"*/',1)# 9a/0-1*a-.26/a)4819 )*H&+Ho66I6 s013 X  c127U `  c?+0U Y $$y00Y' 	Dt	34 	Dt	U" 7. Y' 	Dt	34 	Dt	sG   /E:D55E:5E:D:!AE:'E:*E:7D? >AE:?8E77E:c                      e Zd ZddZy)BaseTorchDispatchModeNc                    |i } ||i |S r   r   r2   s        r   r7   z(BaseTorchDispatchMode.__torch_dispatch__  s    >FT$V$$r   rV   )rY   rZ   r[   r7   r   r   r   r   r     s    %r   r   c                  :   e Zd ZU ddZe	 	 	 	 	 	 	 	 	 	 dd       Zded<   eddd       Zedd       Zeddd       Z	edd	       Z	dd
Z
ddZe	 	 ddd	 	 	 	 	 	 	 	 	 dd       Ze	 	 	 	 ddd	 	 	 	 	 	 	 	 	 	 	 dd       Ze	 	 ddd	 	 	 	 	 	 	 	 	 dd       Zy)TensorWithFlattenc                     y r   r   r.   s    r   __tensor_flatten__z$TensorWithFlatten.__tensor_flatten__      r   c                     y r   r   )inner_tensorsflatten_spec
outer_sizeouter_strides       r   __tensor_unflatten__z&TensorWithFlatten.__tensor_unflatten__  s     r   ztorch._C.SizeshapeNc                     y r   r   r*   dims     r   stridezTensorWithFlatten.stride  s    ;>r   c                     y r   r   r   s     r   r   zTensorWithFlatten.stride  s    '*r   c                     y r   r   r   s     r   sizezTensorWithFlatten.size  s    9<r   c                     y r   r   r   s     r   r   zTensorWithFlatten.size  s    %(r   c                     y r   r   r.   s    r   storage_offsetz TensorWithFlatten.storage_offset  r   r   c                     y r   r   r.   s    r   r   zTensorWithFlatten.dim  r   r   )memory_formatc                    y r   r   )r*   dtypenon_blockingcopyr   s        r   tozTensorWithFlatten.to       r   c                    y r   r   )r*   devicer   r   r   r   s         r   r   zTensorWithFlatten.to  s     r   c                    y r   r   )r*   otherr   r   r   s        r   r   zTensorWithFlatten.to  r   r   )rT   ztuple[Sequence[str], object])
r   intr   r   r   r   r   r   rT   torch.Tensorr   )r   rU   rT   ztuple[int, ...])r   r   rT   r   )rT   r   )FF)
r   ztorch.types._dtyper   rX   r   rX   r   torch.memory_format | NonerT   r   )NNFF)r   z)torch._prims_common.DeviceLikeType | Noner   ztorch.types._dtype | Noner   rX   r   rX   r   r   rT   r   )
r   r   r   rX   r   rX   r   r   rT   r   )rY   rZ   r[   r   staticmethodr   __annotations__r   r   r   r   r   r   r   r   r   r   r     s   E*-;>NQ	  > >* *< <( (( #	 59!  	 2 
   =A+/" 599 ) 	
  2 
   #	 59  	 2 
 r   r   c                    t        | t        j                        xr t        |       t        j                  u}|xr t	        | d      xr t	        | d      S )aW  
    Returns whether or not a tensor subclass that implements __torch_dispatch__
    is 'traceable' with torch.compile.
    In order for a tensor subclass to support TorchDispatchMode-style tracing in PT2,
    It must implement two magic methods: __tensor_flatten__ and __tensor_unflatten__.
    It is also expected to obey some restrictions around traceability and aliasing:
        * The subclass's __torch_dispatch__() implementation should desugar into pytorch
            dispatcher operations that can be traced into a graph.
        * The subclass should use return_and_correct_aliasing(). This is needed today to make
            sure that torch.compile does the right thing in a few cases around input mutation
            and output aliasing.

    Expected magic method signatures:
        attrs, ctx = t.__tensor_flatten__()
            attrs: list of attribute name strings for inner tensors
            ctx: dict containing any other subclass-specific metadata needed for unflattening

        t = MySubClass.__tensor_unflatten__(inner_tensors, ctx, outer_size, outer_stride)
            inner_tensors: dict mapping attribute name -> tensor for each inner tensor
            ctx: dict with subclass metadata in the form that __tensor_flatten__() produces
            outer_size: expected (possibly symbolic) size that the returned subclass
                instance should have. Note that this arg is useful for certain subclasses
                that require the shape info to be constructed. In most cases, this arg can be
                safely ignored.
            outer_stride: expected (possibly symbolic) stride that the returned subclass
                instance should have. Note that this arg is useful for certain subclasses
                that require the stride info to be constructed. In most cases, this arg can be
                safely ignored.
    r   r   )r"   r#   Tensortyper-   )tis_subclasss     r   is_traceable_wrapper_subclassr     sN    < Q-M$q'2MK 	/A+,	/A-.r   c                    t        | t        j                        xr. | t        j                  uxr t        | d      xr t        | d      S )z@Same as above, but takes a type argument instead of an instance.r   r   )
issubclassr#   r   r-   )r   s    r   "is_traceable_wrapper_subclass_typer     sJ     	1ell# 	/U\\!	/A+,	/ A-.	r   c           	        ||n| j                         }||n| j                         }| j                         \  }}i }|D ]  } ||t        | |            ||<    t	        |       j                  ||||      }|j                  |k7  r't        dt	        |        d| d|j                         |j                         |k7  r+t        dt	        |        d| d|j                                |S )a  
    Given a traceable, wrapper tensor subclass ``t`` that implements
    ``__torch_dispatch__`` and holds some inner tensors,
    and a callback of type ``Callable[[str, torch.Tensor], torch.Tensor]``,
    `transform_subclass` will construct a fresh instance of the wrapper tensor subclass.
    It will do so by grabbing each inner tensor attribute from the wrapper,
    passing them into ``callback`` to get a transformed tensor,
    and putting each transformed tensor into the fresh tensor subclass instance.

    Note: this function will not handle ensuring that the fresh subclass
    gets the same (autograd, and aliasing) metadata as the original tensor.
    This is generally handled in other subsystems like AOTAutograd.
    zExpected return value from z.__tensor_unflatten__() to have shape equal to z, but got: z/__tensor_unflatten__() to have stride equal to )r   r   r   getattrr   r   r   r%   )	r   callbackr   r   attrsctxtransformed_tensors_dictattrsubs	            r   transform_subclassr      s     *51668J#/#;<L%%'JE3! J)1$48H)I &J
q'
&
& #z<C yyJ)$q' 3(\SYYKA
 	
 zz||#)$q' 3+nK

~G
 	

 Jr   c                v    t         t        j                  j                        st	        dt        |             t        |t              st	        dt        |             t        |t        t        f      st	        dt        |             d fd}|j                  D ]  \  }} |||   ||           y)a  
    Given: an OpOverload, a SchemaInfo (cached information from torchgen about schema),
    and the inputs/outputs to the OpOverload,
    this function checks to see if func is a view operator
    (by checking if any of the outputs in the op's schema
     are immutable aliases of inputs).
    If so, this function manually aliases the storage of the output tensor
    with its corresponding input tensor alias.
    It does this by unsafely overwriting the storage field of the output tensor
    to be the same storage as the input.
    z func must be an OpOverload, got zargs must be a tuple, got z"outs must be a list or tuple, got c                   t        |       }t        |      }||urxt        |      st        |      rbt        |t              r|n|g}|D ]H  }t        |       t        |      ust	        dt               dt        |        dt        |       d       t        |t              r|D ]  }t        j                  ||         y t        |t        j                        st	        dt        |             t        j                  ||        y )NzCalled z with input of type z
and output of type z. But expected types to match.zexpected torch.Tensor, got )	r   r   r"   listr%   strr#   _functionalize_unsafe_setr   )argretarg_typeret_typeret_listrr3   s         r   alias_non_inplace_storagez<_correct_storage_aliasing.<locals>.alias_non_inplace_storage=  s     998#.x81(;(d3s#H 9DG+(!#d),@c L..23i[8VX " c4  8//378 c5<<0$'B49+%NOO++C5r   NrS   )	r"   r#   _ops
OpOverloadr%   r   tupler   read_only_alias_match_indexes)r3   schema_infor5   outsr   arg_idx
return_idxs   `      r   _correct_storage_aliasingr   *  s     dEJJ112?T
|LMMdE"9$t*FGGdT5M*A$t*NOO)6V  +HH C!$w-j1ABCr   c                    | j                   }|r| j                  sy t        |      dk7  rt        d      t	        t        |            S )Nr`   z1Expected alias_set to contain exactly one element)	alias_setis_writelenr%   nextiter)xr   s     r   _get_write_aliasr   l  s@    IAJJ
9~PQQ Y  r   c                  ,    e Zd ZU ded<   ded<   ded<   y)	AliasInfozset[str]r   rX   r   
str | NonenameNrY   rZ   r[   r   r   r   r   r   r   {  s    N
r   r   c                  @    e Zd ZU ded<   ded<   ded<   ded<   ded	<   y
)
SchemaInfozlist[AliasInfo]r5   r   rX   is_inplace_view_opzlist[str] | Noneouts_write_aliaseszlist[tuple[int, int]]r   Nr   r   r   r   r   r     s$    


 )( $98r   r   c                $   | j                   dk(  rt        | j                        }|j                  d      st	        d      |dd  }dd l}|j                  dd|      }|j                  dd	|      }|j                  d
d|      }t        j                  j                  j                  |      }|j                  j                  D cg c]q  }t        |j                  
t               nt        |j                  j                         |j                  d uxr |j                  j"                  |j$                        s }}|j&                  D cg c]q  }t        |j                  
t               nt        |j                  j                         |j                  d uxr |j                  j"                  |j$                        s }}n | j                  j                  D cg c]q  }t        |j(                  
t               nt        |j(                  j*                        |j(                  d uxr |j(                  j"                  |j$                        s }}| j                  j&                  D cg c]q  }t        |j(                  
t               nt        |j(                  j*                        |j(                  d uxr |j(                  j"                  |j$                        s }}g }t-        |      D ]V  \  }}	t-        |      D ]C  \  }
}|	j                   |j                   z  xr |	j"                   }|s1|j/                  ||
f       E X |D cg c]  }t1        |       }}t3        d |D              }|dk(  rd }nF|t5        |      k7  r!t7        dt        | j                        z         t9        t:        t           |      }t=        ||t>        j@                  jB                  | jD                  v ||      }|S c c}w c c}w c c}w c c}w c c}w )Natenzaten::z6Expected torchgen schema string to start with 'aten::'   r   z=\[[0, ]+\]z=0z=\[[1, ]+\]z=1z=\[(-?[0-9]+), (-?[0-9]+)\]z=[\1,\2])r   r   r   c              3  $   K   | ]  }|d u 
 y wr   r   ).0r   s     r   	<genexpr>z!get_alias_info.<locals>.<genexpr>  s     CaATMCs   zUnsupported schema: )r5   r   r   r   r   )#	namespacer   _schema
startswithr%   rer   torchgenmodelFunctionSchemaparse	argumentsflat_allr   
annotationsetr   r   r   returns
alias_info
before_set	enumerater9   r   sumr   RuntimeErrorr   r   r   r#   Taginplace_viewtags)r3   torchgen_schema_strr  torchgen_schemaaarg_schemasout_schemasr   r   
schema_argr   
schema_outis_read_only_alias_matchr   outs_write_aliases_list	non_nonesr   r   s                     r   get_alias_infor!    s    ~~!$,,/"--h7 H 
 2!"5 !ff^T;NO ff^T;NO ff*K9L
 #..77==>QR %..77	
  \\1CEs1<<;Q;Q7RT1Kall6K6KVV	
 	
$ %,,	
  \\1CEs1<<;Q;Q7RT1Kall6K6KVV	
 	
( \\++	
  \\1CEs1<<;R;R7ST1Kall6K6KVV	
 	
$ \\))	
  \\1CEs1<<;R;R7ST1Kall6K6KVV	
 	
 %'!(5 L&/&< 	L"J
$$z';';;(* ))) % (-44gz5JK	LL &11 !1 1 C+BCCIA~/3	c12	21C4EEFF!$s)-DE !9911TYY>-&CK Q	
	
	
	
&1s"   A6O9A6O>A6P-A6PPc                Z   d}|D ]  }t        |t        j                        st        j                  j	                  t        j                  j                  |j                  j                              }|J t        j                  j                  | j                         |      } | S  | S )aN  
    Suppose that an operator has CompositeImplicitAutograd decomp registered.
    Would autograd have used this decomposition?  It will only use it if there
    isn't an explicit backend registration for the device as well.  This function
    will tell if this would have occurred.

    Why do we need to apply these decompositions later?  When inference mode is
    on, the autograd key is bypassed entirely, so a lower level mode cannot rely
    on the decomposition have been applied.  It's easy to accidentally never apply
    the decomposition, resulting in an operator showing up in a graph that
    is unexpected.

    Why do we need to AVOID applying the decomposition when autograd wouldn't
    have decomposed?  If autograd doesn't decompose, this means in eager mode
    we would have run the fused kernel.  It must be possible to trace this
    fused kernel directly into the graph for fidelity with eager (NB: a user
    has the option of then further decomposing at proxy tensor mode via
    decomposition table, but we must preserve it to proxy mode to have the
    choice.)

    Why does functionalization need to also perform the test here?  This is
    because some CompositeImplicitAutograd decompositions are not functional.
    If we are eventually going to decompose, we need to do this while we can
    still turn functionalization back on, so those decompositions get functionalized.
    So an early decomposition in functionalization may still be necessary.  Note that
    if proxy tensor decomposition process could turn functionalization back on, this
    wouldn't be necessary, and maybe that is a useful thing to do anyway because
    the decomposition table is user specified and a user could violate the functional
    decomp requirement with a bad decomp.  If this happened, then you could always
    pass through functionalization.
    F)
r"   r#   r   r$   _parse_dispatch_key_dispatch_key_for_devicer   r   %_dispatch_has_kernel_for_dispatch_keyr   )r3   	flat_argshas_backend_registrationr  backend_keys        r   autograd_would_have_decomposedr)    s    D  % a&((6611!((--@K *** (-xx'U'U		[($ '''#" (''r   c                    t               } fd}t         ||t        |t              s|fn|       |j                  rt        |      D cg c]  \  }}t        |j                  |         |! }}}t        |      dk7  rt        d      ddl
m}	 t        |d   |	      st        j                  j                  j                         5  t        j                   j#                         }
t        j                   j%                  d       	   |i | t        j                   j%                  |
       	 ddd       |j&                  }||S t        |      dk(  r ||d   |||      S  t)        |      |D cg c]  } |||||       c}      }|S c c}}w # t        j                   j%                  |
       w xY w# 1 sw Y   xY wc c}w )aZ  
    This function should be used by wrapper tensor ``__torch_dispatch__`` subclasses
    that would like to work with torch.compile. It ensures that the subclass
    properly implements the aliasing behavior of every op,
    which is needed for correctness in AOTAutograd.
    This function will handle:

        * When we see a view op, we will alias the storages of any
          input and output tensor subclasses

        * When we see an inplace or out= op, we will directly
          return the corresponding input tensor, instead of returning
          a (potentially) fresh output tensor.
    c                   t         j                  j                  j                  ||      \  }}t	        |j
                        D cg c]  \  }}| |j                  v s| }}}t        |      dk7  rt        d      |d   }	|j
                  |	   }
|
j                  |
j                  |v r||
j                     S ||	   S c c}}w )N)r5   r6   r`   z>Expected exactly one argument index for the given output aliasr   )
r#   fxoperator_schemasnormalize_functionr  r5   r   r   r%   r   )output_aliasr   r5   r6   new_args
new_kwargsrx   r  arg_indicesidxarg_infor3   s              r   get_arg_from_aliasz7return_and_correct_aliasing.<locals>.get_arg_from_aliasC  s    $xx88KKtF  L  
*
 $K$4$45
!Q9TA
 
 {q  P  !n##C(==$*)Dhmm,,}
s   	C CNr`   z5expected exactly one mutated arg for inplace_view opsr   )FunctionalTensorT)r!  r   r"   r   r   r  r   r5   r   r%   r   r6  r#   utils_mode_utilsno_dispatchr$   _meta_in_tls_dispatch_include!_set_meta_in_tls_dispatch_includer   r   )r3   r5   r6   outr   r5  rx   r   mutated_argsr6  meta_in_tlsschema_info_outs_write_aliaseswrite_aliasouts_to_returns   `             r   return_and_correct_aliasingrB  /  s   $ !&K* k4z#u/E#3 %%
 "$
1 0 0 34@ 
 
 |! G  	I,q/+;<((446 L $hhDDF::4@L$)&)HH>>{KL &1%C%C"%-

)*a/!*1-{D&
 	

 T#Y  >	
  [$G	
N ]
2 HH>>{KL L0	
s0   $F
>F9	FF94G!F66F99G)T)r   rX   rT   rX   rW   )rT   zTorchDispatchMode | None)rT   zlist[TorchDispatchMode])rq   r   rT   rU   r   )r   z3DispatchKey | torch._C._TorchDispatchModeKey | None)r   zDispatchKey | None)r   objectrT   zTypeIs[TensorWithFlatten])r   r   rT   zTypeIs[type[TensorWithFlatten]])NNrS   )rT   r   )rT   r   )r3   ztorch._ops.OpOverloadr&  zSequence[torch.Tensor | object]rT   rX   ):
__future__r   
contextlib	functoolsrK   collectionsr   dataclassesr   typingr   r   r   r	   typing_extensionsr
   r#   r  torchgen.modeltorch._Cr   r   r   r   r   torch._C._dynamo.guardsr   collections.abcr   r   r   r   r   r   r   rb   rm   rr   ru   ry   r<   rB   contextmanagerr   r   r   r   r   r   r   r   r   r   r   cacher!  r)  rB  r   r   r   <module>rQ     sm   "     ! : : $     T ( # ', $38 0<a aH42"# A!*,   5 5p%- %; ;|#L'T?CD	!    9 9 9$ _ _D4(
4(,K4(	4(n`r   