
    iM              #       J   U d Z ddlZddlmZ ddlmZmZ ddlmZ ddl	Z	ddl
mZ  ed      Z ed      Zi Zee	j                   j"                  ef   ed	<    eh d
      Zdededeeef   deeeef   geeef   f   fdZ	 d9ddddde	j0                  de	j0                  de	j0                  dee	j0                     dededede	j0                  fdZ edde      	 d9ddddde	j0                  de	j0                  de	j0                  dee	j0                     dededede	j0                  fd       Zdee   dedefdZd e	j0                  d!edede	j0                  fd"Zd#e	j0                  d$e	j0                  d%ed&edee   d'ede	j0                  fd(Zd%ed&eddfd)Z d#e	j0                  d$e	j0                  d%ed&edee   de	j0                  fd*Z!	 	 	 d:dddddd+dd,d#e	j0                  d$e	j0                  d-e	j0                  d.ee	j0                     d/ee	j0                     d0ee	j0                     d1ed2ed3ed'edee   d4ed5ee   de"e	j0                  e	j0                  e	j0                  e	j0                  f   fd6Z# ed7de#      	 	 	 d:dddddd+dd,d#e	j0                  d$e	j0                  d-e	j0                  d.ee	j0                     d/ee	j0                     d0ee	j0                     d1ed2ed3ed'edee   d4ed5ee   de"e	j0                  e	j0                  e	j0                  e	j0                  f   fd8       Z$y);zImplementations of ONNX operators as native Torch ops.

NOTE: Fake implementations:
    Refer to https://docs.pytorch.org/docs/stable/library.html#torch.library.register_fake
    for more details on how to create fake kernels.
    N)Callable)OptionalTypeVar)	ParamSpec)_dtype_mappings_P_RONNX_ATEN_DECOMP_TABLE>      
         op_typeopset_version	fake_implreturnc                 h     dt         t        t        f   dt         t        t        f   f fd}|S )zDDecorator to register an ONNX operator with a custom implementation.funcr   c                     d }t        j                  j                  d d| d      |       }| t        t	        t	        t         j
                  j                        |      <   |j                         |S )Nopsetzonnx::. )mutates_args)torchlibrary	custom_opr
   getattropsonnxregister_fake)r   overloadtorch_opr   r   r   s      N/var/www/html/engine/venv/lib/python3.12/site-packages/torch/onnx/ops/_impl.py	decoratorz_onnx_op.<locals>.decorator'   s|    =/*==**WIQxj) + 

  	wwuyy~~w'GRS 	y)    )r   r   r	   )r   r   r   r$   s   ``` r#   _onnx_opr&   "   s0    
	R( 	Xb"f-= 	 r%   F)interleaved	num_headsrotary_embedding_dimx	cos_cache	sin_cacheposition_idsr'   r(   r)   c                "    | j                         S )zFFake implementation for RotaryEmbedding-23 for torch.compile purposes.)clone)r*   r+   r,   r-   r'   r(   r)   s          r#   _rotary_embedding_23_fake_implr0   5   s     779r%   RotaryEmbedding   c                V   | j                   t              }d   d   t        j                  j	                         dk(  fd       t        j                  j                   d   k(  fd       t        j                  j                   d   k(  fd       t        j                  j	                         dk(  xr j	                         dk(  fd	       n@t        j                  j	                         d
k(  xr j	                         d
k(  fd       |dk(  rt        j
                  | d      } nG|d
k(  rBt        j                  |dk7  fd       d   }||z  }	||	g}
t        j                  | |
      } t        j                  t        | j                         dk(  d        | j                   d
   }	|dk(  r|	}| ddddddd|f   }| dddddd|df   }|dz        nt        j                  j                   d   k(  xr j                   d   k(  fd       t        j                  j                   d   k(  xr j                   d   k(  fd       t        j                  j                   d   k(  fd       t        j                  j                   d   k(  fd       t        j                  d      t        j                  d      |r%|dddddddddf   }|dddddddddf   }nt        j                  |dd      \  }}|z  |z  z
  }|z  |z  z   }|rft        j                  |d      }t        j                  |d      }t        j                  ||fd      }t        j                  ||j                         }nt        j                  ||fd      }t        j                  ||fd      }|d
k(  rt        j                  |      S t        j
                  |d      S )z_RotaryEmbedding-23 https://onnx.ai/onnx/operators/onnx__RotaryEmbedding.html#rotaryembedding-23r   N   c                  "    d j                    S )Nz6position_ids must be 2D when provided. Received shape shape)r-   s   r#   <lambda>z%rotary_embedding_23.<locals>.<lambda>Z   s    L\M_M_L`a r%   c                  .    d  dj                   d    S )Nz6position_ids first dim (batch) must match x.shape[0] (). Received r   r7   )
batch_sizer-   s   r#   r9   z%rotary_embedding_23.<locals>.<lambda>^   s$    LZLXdeqewewxyezd{| r%   r   c                  .    d d j                   d    S )Nz;position_ids second dim (sequence) must match x.shape[-2] (r;   r   r7   )r-   sequence_lengths   r#   r9   z%rotary_embedding_23.<locals>.<lambda>b   s=    QRaQbbno{  pB  pB  CD  pE  oF  G r%   c                  <    d j                    dj                    S )NzWcos_cache/sin_cache must be 2D when position_ids is provided. Received cos_cache shape , sin_cache shape r7   r+   r,   s   r#   r9   z%rotary_embedding_23.<locals>.<lambda>f   &     ((1'88J9??J[] r%      c                  <    d j                    dj                    S )Nz[cos_cache/sin_cache must be 3D when position_ids is not provided. Received cos_cache shape r@   r7   rA   s   r#   r9   z%rotary_embedding_23.<locals>.<lambda>l   rB   r%      )r   r5   r   rC   c                      d  S )NzKnum_heads must be provided for 3D inputs. Received input tensor with shape r   )input_shapes   r#   r9   z%rotary_embedding_23.<locals>.<lambda>y   s    abmano r%   c                       y)Nzx should be a 4D tensor by nowr   r   r%   r#   r9   z%rotary_embedding_23.<locals>.<lambda>       r%   c                  0    dj                    d  d dS )Nzcos has shape  but expected (batch=, seq=, ...)r7   )r<   cosr>   s   r#   r9   z%rotary_embedding_23.<locals>.<lambda>   $    .+@FSbRccij r%   c                  0    dj                    d  d dS )Nzsin has shape rK   rL   rM   r7   )r<   r>   sins   r#   r9   z%rotary_embedding_23.<locals>.<lambda>   rO   r%   c                  0    d j                   d    d dS )NzLast dimension of cos cache (rR   ') should match rotary_embedding_dim/2 ().r7   )rN   rotary_embedding_dim_halfs   r#   r9   z%rotary_embedding_23.<locals>.<lambda>   .    /		">ef  fA  AC  D r%   c                  0    dj                   d    d  dS )NzLast dimension of sin cache (rR   rT   rU   r7   )rV   rQ   s   r#   r9   z%rotary_embedding_23.<locals>.<lambda>   rW   r%   dim)
r8   lenr   _checkrZ   permutereshape	unsqueezechunkcat)r*   r+   r,   r-   r'   r(   r)   
input_rankhidden_size	head_size	new_shapex_rotatex_not_rotatex1x2realimagx_rotate_concatoutputr<   rN   rG   rV   r>   rQ   s    ```               @@@@@@r#   rotary_embedding_23rn   C   s    ''K[!JQJ!"oO !#a	
 	q!Z/|	
 	q!_4 G	
 	MMOq 9Y]]_%9]	
 	MMOq 9Y]]_%9]	
 Q MM!\*	qNo	
 "!n9,	)YG	MM!Y'	LLQWW"$LM
I q (Aq////0HQ12334L 4 9 
 
 	LL		!
"Fsyy|'Fj 
LL		!
"Fsyy|'Fj 
LL		"22 	D 
LL		"22 	D //QC //QC
 aAqt!tm$aAqt!tm$Xqb1B 8cBhD8cBhD  tR(tR())T4Lb9==(..A99dD\r2YY,/R8FQ}}V[11 ==..r%   scalerd   c                 :    | | S dt        j                  |      z  S )z/Get the scale factor for attention computation.g      ?)mathsqrt)ro   rd   s     r#   _get_scale_factorrs      s     %5GC$))I2F,FGr%   tensorr<   c                     | j                   d   | j                   d   }}||z  }| j                  ||||      j                  dd      j                         S )z1Reshape 3D tensor to 4D for multi-head attention.r   r5   )r8   view	transpose
contiguous)rt   r<   r(   r>   rc   rd   s         r#   _reshape_3d_to_4dry      sP     $*<<?FLLO[Oy(IJIF	1a	r%   QKcurrent_q_num_headscurrent_kv_num_headsqk_matmul_output_modec           	          |dk(  rt        | ||||      S t        j                  t        j                  | |j	                  dd                  S )z1Get QK output tensor based on the specified mode.r   r4   rR   )_compute_qk_output_for_mode_0r   
zeros_likematmulrw   )rz   r{   r|   r}   ro   r~   s         r#   _get_qk_output_for_aten_spdar      sO     !,q%';U
 	

 QB0C DEEr%   c                 H     t        j                   z  dk(   fd       y)z-Validate Group Query Attention configuration.r   c                      d d  dS )Nzq_num_heads (z%) must be divisible by kv_num_heads (z	) for GQAr   )r}   r|   s   r#   r9   z-_validate_gqa_configuration.<locals>.<lambda>   s    - 344YZnYooxy r%   N)r   r\   )r|   r}   s   ``r#   _validate_gqa_configurationr      s"     
LL22a7yr%   c                     |}||k7  r||z  }|j                  |d      }t        || j                  d         }t        j                  |      }| |z  }	||z  }
t        j                  |	|
j                  dd            S )zDHelper function to compute QK output for qk_matmul_output_mode == 0.r   rY   rC   r4   rR   )repeat_interleavers   r8   rq   rr   r   r   rw   )rz   r{   r|   r}   ro   K_for_qkrepeat_factorscale_factor
sqrt_scaleQ_scaledK_scaleds              r#   r   r      s     H22+/CC&&}!&<$UAGGAJ7L<(J:~H*$H<<("4"4R"<==r%           )	is_causalkv_num_headsq_num_headsr~   ro   softcapsoftmax_precisionV	attn_maskpast_key
past_valuer   r   r   r   r   c                   | j                   d   }t        | j                         dk(  r| j                   d   }| j                   }|4|||j                   d   |j                   d   z   |j                   d   |z  f}n#|||j                   d   |j                   d   |z  f}|}||||d   f}n| j                   d   }| j                   }|K|j                   d   |j                   d   |j                   d   |j                   d   z   |j                   d   f}n|j                   }|}| j                   d   | j                   d   | j                   d   |d   f}t        j                  || j                  | j
                        }t        j                  ||j                  |j
                        }t        j                  ||j                  |j
                        }t        j                  || j                  | j
                        }||||fS )z@Fake implementation for Attention-23 for torch.compile purposes.r   rC   r   r5   dtypedevice)r8   r[   r   emptyr   r   )rz   r{   r   r   r   r   r   r   r   r~   ro   r   r   r<   q_sequence_lengthoutput_shapepresent_key_shapepresent_value_shapeqk_output_shaperm   present_keypresent_value	qk_outputs                          r#   _attention_23_fake_implr     s   " J 177|qGGAJww q!AGGAJ.
l*	! 

l*	! 0 a 	
 GGAJww 

q!AGGAJ.
	! !"/ GGAJGGAJGGAJa 	
 [[QWWQXXFF++/qwwqxxPKKK 3177188TMO177188LI;y88r%   	Attentionc                	   d\  }}}t        | j                        }| j                  d   }t        | j                        dk(  rWt        j                  |dk7  xr |dk7  d        | j                  d   }t	        | ||      } t	        |||      }t	        |||      }t        j                  t        | j                        dk(  xr2 t        |j                        dk(  xr t        |j                        dk(  d        | j                  |   }t        |
|      }
|t        j                  ||g|	      n|j                         }|t        j                  ||g|	      n|j                         }||}}| j                  |   }|j                  |   }| j                  |   }|j                  |   }|d
k(  xr0 |	dk(  xr) |du xr# |du xs |j                  t        j                  k(  }t        ||       |rOt        j                  j                  j                  | |||d
||
t        ||k7              }t        | ||||
|	      }nY||k7  r+||z  }|j                  ||	      }|j                  ||	      }t        j                   ||| j                  | j"                        }|ryt        j                  |du d        t        j$                  t        j&                  ||t        j                  | j"                              }|j)                  | t+        d            }|?|j                  t        j                  k(  r|j)                  | t+        d            }n||z   }t        |
| j                  d         }t-        j.                  |      } | | z  }!|| z  }"t        j0                  |!|"j3                  dd            }#|#}|#|z   }$|	dk(  r|$}|d
kD  r|t        j4                  |$|z        z  }$|	dk(  r|$}|w|t6        v rW|$j                  }%|$j9                  t:        j<                  |         }$t        j>                  |$d	      }&|&j9                  |%      }&n/t        j>                  |$d	      }&nt        j>                  |$d	      }&|	dk(  r|&}t        j0                  |&|      }|dk(  r1|j3                  dd      jA                         jC                  ||d      }||||fS )zMAttention-23 https://onnx.ai/onnx/operators/onnx__Attention.html#attention-23)r   r5   rC   r   rC   c                       y)Nz;q_num_heads and kv_num_heads must be provided for 3D inputsr   r   r%   r#   r9   zattention_23.<locals>.<lambda>  rI   r%   r   rE   c                       y)Nz'Q, K, and V should be 4D tensors by nowr   r   r%   r#   r9   zattention_23.<locals>.<lambda>  rI   r%   NrY   r   )r   	dropout_pr   ro   
enable_gqar   c                       y)Nz'Cannot use both is_causal and attn_maskr   r   r%   r#   r9   zattention_23.<locals>.<lambda>  rI   r%   z-infr4   rR   r5   )"r[   r8   r   r\   ry   rs   ra   r/   r   boolr   nn
functionalscaled_dot_product_attentionr   r   zerosr   trilonesmasked_fillfloatrq   rr   r   rw   tanh-_ATTENTION_23_ALLOWED_INTERMEDIATE_PRECISIONStor   ONNX_DTYPE_TO_TORCH_DTYPEsoftmaxrx   rv   )'rz   r{   r   r   r   r   r   r   r   r~   ro   r   r   num_head_dimsequence_dimhead_diminput_shape_lenr<   r   q_head_sizer   r   r|   r}   kv_sequence_lengthcan_use_sdparm   r   r   	attn_biascausal_maskr   r   r   r   qk_matmul_outputqk_with_biasoriginal_dtype
qk_softmaxs'                                          r#   attention_23r   c  s   & ,3(L, !''lOJ 177|q12!2Q	
 GGAJa[9a\:a\:	LLAGGEc!''la/ECLA4E9 ''(#Ke[1E
  			8Q-\2WWY  ! 			:q/|4WWY  qA '',/77<0-. 	3 	A!Q&	A%	A $?)//UZZ"?	    35IJ$$AA#';; B 
 1 !
	 "66/3GGM##M|#DA##M|#DA KK1
	
 LLT!#T  **

%&**88	K "--{lE&MJI  %**,%119*eFmL	 &	1	 )
; YY|,
z>z> !<<(2D2DR2LM %	 ()3 A%$I S="UZZw0F%GGL A%$I ( $QQ!-!3!3+#==>OP  #]]<R@
']]>:
"]]<R@
|<J A%"I j!, ! Q"--/44ZARTVW 	 ;y88r%   )N)NNN)%__doc__rq   collections.abcr   typingr   r   typing_extensionsr   r   torch.onnx.opsr   r   r	   r
   dict_ops
OpOverload__annotations__	frozensetr   strintr&   Tensorr   r0   rn   r   rs   ry   r   r   r   tupler   r   r   r%   r#   <module>r      s,    $ $ '  * t_T] AC UZZ22H<= B091 -!$19"b&1AxB (2r6"223. ,0	  !|||| || 5<<(	    \\ 

R!?@
 ,0	D/  !D/||D/||D/ ||D/ 5<<(	D/ D/ D/ D/ \\D/ AD/NHXe_ H H H

LL
&)
69

\\
F||F||F F 	F
 E?F F \\F$47	>||>||> > 	>
 E?> \\>4 )-'+)-Q9 !"!'+Q9||Q9||Q9 ||Q9 %	Q9
 u||$Q9 &Q9 Q9 Q9 Q9 Q9 E?Q9 Q9  }Q9 5<<u||U\\ABQ9h 
+r23
 )-'+)-9 !"!'+9||9||9 ||9 %	9
 u||$9 &9 9 9 9 9 E?9 9  }9 5<<u||U\\AB9 49r%   