
    i|                     2    d dl Z d dlmZ dgZ G d d      Zy)    N)dtype	GPULimitsc                       e Zd ZdZdej
                  fdZdedefdZ	dedefdZ
	 ddededefd	Zdefd
ZdefdZy)r   a  Utility class that provides the theoretical limits of Nvidia GPU devices. The
    limits don't take into account thermal throttling (assume that the GPU run at its
    peak rated frequency). This is because user hardware configuration may influence
    power behavior.
    target_devicec                     t         j                  j                  |      | _        t	        | j                  j
                   | j                  j                         | _        y )N)torchcudaget_device_propertiesdevice_propertiesintmajorminorcompute_capability)selfr   s     S/var/www/html/engine/venv/lib/python3.12/site-packages/torch/cuda/_device_limits.py__init__zGPULimits.__init__   sP     "'!A!A-!P #&%%++,T-C-C-I-I,JK#
    	data_typereturnc           
      8   dddddddddd	}d}|t         j                  u rd| j                   }nF|t         j                  u rd| j                   }n$|t         j                  u rd	| j                   }nd
}||vrt        d| j                   d| d      ||   S )N   @          )	fp16_80fp32_80fp64_80fp16_90fp32_90fp64_90fp16_100fp32_100fp64_100 fp16_fp32_fp64_unknownNo data for sm_ and .)r   float16r   float32float64RuntimeErrorr   r   hardcoded_device_valuesdict_keys       r   #get_fma_per_cycle_per_sm_cuda_coresz-GPULimits.get_fma_per_cycle_per_sm_cuda_cores   s     #
 %t6678H%--'t6678H%--'t6678H H22!$"9"9!:%	{!L  'x00r   c                    dddddddddddddd}d	}|t         j                  u rd
| j                   }n|t         j                  u rd
| j                   }nh|t         j                  u rd| j                   }nF|t         j
                  u rd| j                   }n$|t         j                  u rd| j                   }nd}||vrt        d| j                   d| d      ||   S )Ni   i   i   r   i   r   i    )int8_80r   r   r   int8_90fp8_90r   r   r    int8_100fp8_100r!   r"   r$   r%   r&   int8_r'   r(   r)   r*   r+   )r   r,   r   bfloat16r-   int8r.   r/   r0   s       r   %get_fma_per_cycle_per_sm_tensor_coresz/GPULimits.get_fma_per_cycle_per_sm_tensor_coresA   s    !#
$ %t6678H%..(t6678H%--'t6678H%**$t6678H%--'t6678H H22!$"9"9!:%	{!L  'x00r   use_tensor_coresc                     | j                   j                  }| j                   j                  }d}|r| j                  |      }n| j	                  |      }||z  dz  |z  dz  }|S )Nr      g    eA)r   multi_processor_count
clock_rater=   r3   )r   r   r>   num_smsrB   fma_per_cycletflops_per_seconds          r   get_tflops_per_secondzGPULimits.get_tflops_per_secondj   sp     ((>>++66
 FFyQM DDYOM $m3a7*DsJ  r   c                     t        d| j                  j                  z  dz        }| j                  j                  dz  }||z  dz  }|S )Nr@        )r   r   memory_bus_widthmemory_clock_rate)r   bus_bytes_per_cyclemem_clock_rate_Hzbytes_per_seconds       r   get_memory_bandwidth_Bpsz"GPULimits.get_memory_bandwidth_Bps{   sT     "!d&<&<&M&M"MPQ"QR 22DDtK.1BBQFr   c                 x    | j                   j                  }d}||z  }|| j                   j                  z  dz  }|S )Nr   rI   )r   rA   rB   )r   rC   bytes_per_cycle_per_smbytes_per_cycle_per_devicerN   s        r   get_shared_memory_bandwidth_Bpsz)GPULimits.get_shared_memory_bandwidth_Bps   sM     ((>>!$%,/E%E"&)?)?)J)JJTQ 	  r   N)T)__name__
__module____qualname____doc__r   devicer   r   r   r3   r=   boolfloatrF   rO   rS    r   r   r   r      sy    	
ell 	
 1U 1s 1D'1u '1 '1T :>!!26!	!" #  	  	 r   )r   torch._Cr   __all__r   r[   r   r   <module>r^      s       -D  D r   