
    i                     >   d dl Z d dlZd dlZd dlmZmZ d dlmZ ddlm	Z	 ej                  j                  Z eej                  ej                  ej                  ej                  g      Z eej$                  j'                  dd             d k(  rdndZ eg ej*                  ej,                  ej.                  ej0                  ej2                  ej4                  ej6                  ej8                  ej:                  ej<                  ej>                  ej@                  ejB                  ejD                  ejF                  ejH                  ejJ                  ejL                  ejN                  ejP                  ejR                  ejT                  ejV                  ejX                  ejZ                  ej\                  ej^                  ej`                  ejb                  ejd                  ejf                        Z4 eejj                  ejl                  ejn                  ejp                  ejr                  ejt                  ejv                  ejx                  ejz                  g	      Z>e4e>z  Z?de@fd	ZAd
ej                  defdZCde@fdZDy)    N)get_device_tflopsget_gpu_dram_gbps)
OrderedSet   )flop_registryPYTORCH_NO_CUDA_MEMORY_CACHINGi   returnc                     | t         v rbt        |      dk(  sJ d| d|         |j                         }t        |      dz  }d}||z  }t         |    }	 |	|i |d|idz  }
|
|z  dz  }|S y	)
aw  
    Estimates the compute time of an aten operator.

    Args:
        func_packet: The operator overload packet.
        args: The arguments to the operator.
        kwargs: The keyword arguments to the operator.
        out: The output of the operator.
        out_dtypes: The output data types.

    Returns:
        float: The estimated compute time in nanoseconds.
    r   z"Only support single out dtype got z for g  4&kCg      ?out_val   g    eAg        )r   lenpopr   )func_packetargskwargsout
out_dtypesdtypepeak_gpu_flopsfactorpeak_empirical_flopsflop_count_func
flop_countcompute_times               Y/var/www/html/engine/venv/lib/python3.12/site-packages/torch/utils/_runtime_estimation.pyget_compute_timer   R   s     m#:!# 	
0E+O	
#  *51D8%6'4$dBfBcBQF
"%99S@    tc                     | j                         j                         }t        j                  |t        z        t        z  }|S )z
    Calculates the memory consumption of a tensor.

    Args:
        t (torch.Tensor): The input tensor.

    Returns:
        int: The memory consumption of the tensor in bytes.
    )untyped_storagenbytesmathceil_PYTORCH_MIN_ALLOCATE)r   	num_bytesmem_consumeds      r   get_num_bytesr'   s   s;     !!#**,I99Y)>>?BWWLr   c                 v    t               }t        d | D              }t        d |D              }||z   }||z  }|S )aG  
    Estimates the memory transfer time of input and output tensors.

    Args:
        flat_args_kwargs (List[torch.Tensor]): The flat list of arguments and keyword arguments.
        flat_outs (List[torch.Tensor]): The flat list of outputs.

    Returns:
        float: The estimated memory transfer time in nanoseconds.
    c              3   h   K   | ]*  }t        |t        j                        st        |       , y wN
isinstancetorchTensorr'   .0r   s     r   	<genexpr>z$get_transfer_time.<locals>.<genexpr>   s'      jELL6Qa   22c              3   h   K   | ]*  }t        |t        j                        st        |       , y wr*   r+   r/   s     r   r1   z$get_transfer_time.<locals>.<genexpr>   s'      z!U\\/Jar2   )r   sum)flat_args_kwargs	flat_outsgpu_memory_bandwidth
read_byteswrite_bytescounted_bytestransfer_times          r   get_transfer_timer<      sZ     -. "2 J  "+ K ,M!$88Mr   )Er"   osr-   torch._inductor.utilsr   r   torch.utils._ordered_setr   flop_counterr   opsatenfloat16bfloat16float32float64_FLOAT_TYPESintenvirongetr$   
lift_freshr   	transposeviewdetach_unsafe_viewsplitadjoint
as_strideddiagonalexpand	expand_asmovedimpermuteselectsqueezemTmHrealimagview_as	unflattenunfoldunbind	unsqueezevsplithsplitsplit_with_sizesswapaxesswapdimschunk	_VIEW_OPSrandintrandnrand
randn_like	rand_likerandint_likearange	ones_like
zeros_like_CREATE_OPS_IGNORE_OPSfloatr   r.   r'   r<    r   r   <module>rw      s    	  F / ' yy~~	 

?CDIDq 
     	  				 
 	  	  	

  	  	  	  	  	  	  	  	   	! " 	# $ 	% & 			' ( 			) * 	+ , 	- . 	/ 0 	1 2 	3 4 	5 6 	7 8 	9 : 	; < 	= > 	

? "	H 

		
 +%E BU\\ c e r   