
    i
9                    Z   U d dl mZ d dlZd dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
mZmZmZ d dlmZ d dlmZ d dlmZ erd d	lmZ d d
lmZ d dlmZ d ad ad ag aded<   g Zded<   d a d a!ej                   G d d             Z"g a#ded<   d a$d a%d a&d a'd a(d:dZ)e G d d             Z*d;dZ+ G d d      Z,i Z-ded<   e G d d             Z.e.j_                  dg d       e.j_                  d g d!       e.j_                  d"g d#       e.j_                  d$g d%       e.j_                  d&g d'       d<d(Z0d=d)Z1d>d*Z2	 	 	 	 	 	 d?d+Z3d@d,Z4d=d-Z5dAd.Z6dBd/Z7	 	 	 	 	 	 dCd0Z8	 	 	 	 	 	 	 	 dDd1Z9d:d2Z:dEd3Z;e	dFd4       Z<dGd5Z=dHd6Z>e.j_                  d7g d8       	 	 	 	 	 	 	 	 	 	 dId9Z?y)J    )annotationsN)	dataclass)	lru_cache)OptionalTYPE_CHECKINGUnion)config)get_benchmark_name)
OrderedSet)Callable)Config)BaseSchedulerNodez#list[tuple[BaseSchedulerNode, int]]nodes_num_elemz%list[tuple[BaseSchedulerNode, float]]node_runtimesc                  &    e Zd ZU ded<   dZded<   y)CppOuterLoopFusedCountintinner_kernel_numberr   local_buffer_numberN)__name__
__module____qualname____annotations__r        Q/var/www/html/engine/venv/lib/python3.12/site-packages/torch/_inductor/metrics.pyr   r   +   s      r   r   zlist[CppOuterLoopFusedCount]!cpp_outer_loop_fused_inner_countsc                     da dadat        j	                          t
        j	                          dadat        j	                          da	da
dadaday )Nr   )generated_kernel_countgenerated_cpp_vec_kernel_countnum_bytes_accessedr   clearr   ir_nodes_pre_fusioncpp_to_dtype_countr   num_comprehensive_padding)num_matches_for_scatter_upon_const_tensornum_loop_reorderingparallel_reduction_countcodegen_mix_order_reductionr   r   r   resetr*   @   sd     %&"%++- !01- "#r   c                  N    e Zd ZU dZded<   ded<   ded<   ded<   ded<   ded<   y	)
CachedMetricsDeltasz]
    The subset of metrics we want update across cache hits, e.g., the
    FxGraphCache.
    r   r   r    r#   r$   r!   r&   N)r   r   r   __doc__r   r   r   r   r,   r,   \   s-    
  $''/22r   r,   c                 n    t        j                  t              D  cg c]  } | j                   c} S c c} w N)dataclassesfieldsr,   name)fields    r   get_metric_fieldsr4   k   s&    $/$6$67J$KL5EJJLLLs   2c                  2    e Zd ZdZddZddZedd       Zy)	CachedMetricsHelperz
    A helper class to help calculate and apply counter deltas for those
    metrics we want to save with cache entries (e.g., FxGraphCache) and
    apply on a cache hit.
    c                d    i | _         t               D ]  }t               |   | j                   |<    y r/   )cached_metricsr4   globals)selfmetrics     r   __init__zCachedMetricsHelper.__init__v   s3     ') 	<F*1)F*;D'	<r   c                z    i }t               D ]"  }t               |   | j                  |   z
  ||<   $ t        di |S )Nr   )r4   r9   r8   r,   )r:   delta_metricsr;   s      r   
get_deltaszCachedMetricsHelper.get_deltas{   sL    ') 	TF$+If$58K8KF8S$SM&!	T #3]33r   c                `    t               D ]!  }t               |xx   t        | |      z  cc<   # y r/   )r4   r9   getattr)deltar;   s     r   apply_deltasz CachedMetricsHelper.apply_deltas   s-    ') 	8FIf!77	8r   NreturnNone)rE   r,   )rB   r,   rE   rF   )r   r   r   r-   r<   r?   staticmethodrC   r   r   r   r6   r6   o   s%    <
4 8 8r   r6   zdict[str, MetricTable]REGISTERED_METRIC_TABLESc                  j    e Zd ZU ded<   ded<   dZded<   	 	 	 	 ddZdd	Zdd
ZddZe	dd       Z
y)MetricTablestr
table_name	list[str]column_namesr   r   num_rows_addedc                F   | j                   t               vry  |       }t        | j                        t        |      k(  s(J t        | j                         dt        |              t	        | j                        t	        |j                               k(  s6J t	        | j                         dt	        |j                                       t               }|g| j                  D cg c]  }||   	 c}z   }t        d |D              sJ | j                  |       y c c}w )Nz v.s. c           	   3  \   K   | ]$  }t        |t        t        t        d       f       & y wr/   )
isinstancerK   floattype).0is     r   	<genexpr>z&MetricTable.add_row.<locals>.<genexpr>   s"     Hq:a#ud4j!9:Hs   *,)	rL   enabled_metric_tableslenrN   r   keysr
   all
_write_row)r:   row_fnrow_dictbncolumn_namerows         r   add_rowzMetricTable.add_row   s    ??"7"9984$$%X6 	
4$$%&fS]O<	
6 $++,
8==?0KK 	
$++,-VJx}}4O3PQ	
K  !dt?P?PQh{+QQHCHHHH Rs   'Dc                "    d| j                    dS )Nmetric_table_z.csv)rL   )r:   s    r   output_filenamezMetricTable.output_filename   s    t/t44r   c                    | j                         }t        |d      5 }t        j                  |d      }|j	                  dg| j
                  z          d d d        y # 1 sw Y   y xY w)Nw
lineterminator
model_name)re   opencsvwriterwriterowrN   )r:   filenamefdrn   s       r   write_headerzMetricTable.write_header   s^    '')(C  	@BZZ48FOO\NT->->>?	@ 	@ 	@s   7AA&c                   | j                         }| j                  dk(  r/t        j                  j	                  |      s| j                          | xj                  dz  c_        t        |      D ]&  \  }}t        |t              r|d}n|d}n|}|||<   ( t        |d      5 }t        j                  |d      }|j                  |       d d d        y # 1 sw Y   y xY w)Nr      z.6f arh   ri   )re   rO   ospathexistsrr   	enumeraterR   rS   rl   rm   rn   ro   )r:   ra   rp   idxorig_valnew_valrq   rn   s           r   r\   zMetricTable._write_row   s    '')!#BGGNN8,Dq &s^ 	MC(E*%cN!"CH	 (C  	!BZZ48FOOC 	! 	! 	!s   $)CCc                .    t        | |      }|t        | <   y r/   )rJ   rH   )r2   rN   tables      r   register_tablezMetricTable.register_table   s    D,/). &r   N)r]   z4Callable[[], dict[str, Optional[Union[str, float]]]]rE   rF   )rE   rK   rD   )ra   zlist[str | float | None]rE   rF   )r2   rK   rN   rM   rE   rF   )r   r   r   r   rO   rb   re   rr   r\   rG   r   r   r   r   rJ   rJ      sP    ONCJ	(5@!( / /r   rJ   slow_fusion)kernel1_pathkernel1_latencykernel2_pathkernel2_latencyfused_kernel_pathfused_kernel_latencyslow_down_ratiograph_stats)graph_idnum_nodes_before_fusionnum_nodes_after_fusionpersistent_red_perf)
kernel0_pathr   r   kernel3_pathkernel0_latencyr   r   kernel3_latency
size_hintsreduction_hint'fusion_failure_due_to_indexing_mismatch)pre_grad_graph_idpost_grad_graph_id
node1_name
node2_namenode1_debug_strnode2_debug_strcommon_buffer_namesfailure_reasonkernel_metadatakernel_namekernel_pathkernel_categoryr   r   line_of_codenum_load	num_storenum_for_loopnum_atomic_addnum_argsxnumelynumelrnumelkernel_args_num_gbc                    ddl m} ddlm} |j	                  |       } ||      }t        j                  |j                  j                        S )z
    The kernel_module_code is the python module that contains kernel function code.
    kernel function is the proper triton kernel function annotated with
    @triton.jit
    rt   )PyCodeCache)get_triton_kernel)	codecacher   wrapper_benchmarkr   loadinspect	getsourcefn)kernel_module_coder   r   modkernels        r   _parse_kernel_fn_coder     s@     '4


-
.Cs#F VYY\\**r   c                4    t        | j                               S )zJ
    Return the line of code for the kernel excluding the decorators.
    )rY   
splitlines)proper_kernel_fn_codes    r   _parse_kernel_line_of_coder   -  s     $//122r   c                n    |dk(  ry t        j                  d|       }|sJ d       |j                  d      S )Nforeachzsize_hints=(\[[0-9, ]*\]),zsize_hints missing!rt   researchgroup)r   r   ms      r   _parse_size_hintsr   4  s;    )#
		/1CDA###1771:r   c                l    | dvry t        j                  d|      }|sJ d       |j                  d      S )N)	reductionpersistent_reductionz$reduction_hint=ReductionHint\.(\w*),z/reduction_hint not found in kernel source code!rt   r   )r   r   r   s      r   _parse_reduction_hintr   =  s>     CC
		9;MNA???1771:r   c                $    | j                  |      S r/   )count)r   patterns     r   _count_patternr   G  s     &&w//r   c                    | j                         d   }|j                  d      sJ |j                  d      }|j                  d      }||dz   | }|j                  d      }t	        |      S )Nr   def (z):rt   ,)r   
startswithindexsplitrY   )r   def_line	start_idxend_idxdecl_csvcompss         r   _count_argsr   K  sm    $//1!4Hv&&&s#InnT"G	A0HNN3Eu:r   c                .    | j                  d      }| |d S )z
    Skip decorators.
    r   N)r   )kernel_fn_code	start_poss     r   _parse_proper_kernel_fn_coder   U  s!     $$V,I)*%%r   c                n    t        j                  | d|       }|rt        |j                  d            S y )Nz
 = ([\d]+)rt   )r   r   r   r   )r   numel_arg_namer   s      r   _parse_numelr   ]  s3    
		^$K02GHA1771:r   c                j    t        j                  d|       }|rt        |j                  d            S 	 y)z
    inductor meta looks like:
        inductor_meta={... 'mutated_arg_names': [], 'no_x_dim': False, 'kernel_num_gb': 2.0},
    z.kernel_num_gb.:\s*([0-9.]+)rt   N)r   r   rS   r   )r   r   r   s      r   _parse_kernel_args_num_gbr   e  s5     			1>BAQWWQZ  	 r   c           
         	 ddl m}  ||      t        |      t        |      	t	        |      t              t              t        d      j                   	fd       y)z
    An utility to log kernel metadata. We may parse metadata from kernel source code here.

    It's fine to parse the generated kernel code here since the logging is
    disabled by default. It would hurt compilation time.
    rt   )"get_kernel_category_by_source_coder   c                      t        d      t        d      t        d      t        d      t              t        d      t        d      t        d      t               dS )	Nztl.loadztl.storezfor ztl.atomic_addr   r   r   r   )r   r   r   r   )r   r   kernel_line_of_coder   r   r   r   r   s   r   <lambda>z%log_kernel_metadata.<locals>.<lambda>  s    &&.$,/&'<iH'(=zJ*+@&I,-BOT#$9:"#8(C"#8(C"#8(C";#
 r   N)	r   r   r   r   r   r   r   get_metric_tablerb   )
r   r   r   r   r   r   r   r   r   r   s
   ``  @@@@@@r   log_kernel_metadatar   z  sp     F89KLO*?<NON"#5GJ*+=>N8H 55JK&'//	
 	
r   c                     t         j                         D ]f  \  } }| t               v s|j                         }t        j
                  j                  |      rt	        j                  |       |j                          h y)z
    Purge the old log file at the beginning when the benchmark script runs.
    Should do it in the parent process rather than the child processes running
    each individual model.
    N)	rH   itemsrX   re   rw   rx   ry   unlinkrr   )r2   r   rp   s      r   purge_old_log_filesr     sb     0557 !e(**,,.Hww~~h'		(# !r   c                 4    t        t        j                        S r/   )enabled_metric_tables_implr	   rX   r   r   r   rX   rX     s    %f&B&BCCr   c                    t               }| j                  d      D ]9  }|j                         }|s|t        v sJ d| d       |j	                  |       ; |S )Nr   zMetric table name z is not registered)r   r   striprH   add)
config_strenabledr2   s      r   r   r     sl    )|G  % zz|// 	
 &89	
/ 	D Nr   c                    | t               v S r/   )rX   r2   s    r   is_metric_table_enabledr     s    (***r   c                :    | t         v sJ d|  d       t         |    S )NzMetric table z is not defined)rH   r   s    r   r   r     s*    ++R}TF/-RR+#D))r   kernel_autotuner   r   triton_config
latency_msc                L     t        d      j                   fd       y )Nr   c                 $    t               dS )Nr   )rK   )r	   r   r   latencys   r   r   z,log_kernel_autotune_result.<locals>.<lambda>  s    && [!	
 r   )r   rb   )r   r   r	   r   s   ````r   log_kernel_autotune_resultr     s     &'//	
r   rD   )rE   rM   )r   rK   rE   rK   )r   rK   rE   r   )r   rK   r   rK   rE   Optional[str])r   rK   r   rK   rE   r  )r   rK   r   rK   rE   r   )r   rK   rE   rK   )r   rK   r   rK   rE   zOptional[int])r   rK   r   rK   rE   zOptional[float])r   rK   r   rK   r   rK   rE   rF   )rE   OrderedSet[str])r   rK   rE   r  )r2   rK   rE   bool)r2   rK   rE   rJ   )
r   rK   r   rK   r	   r   r   rS   rE   rF   )@
__future__r   rm   r0   r   rw   r   r   	functoolsr   typingr   r   r   torch._inductorr	   torch._inductor.utilsr
   torch.utils._ordered_setr   collections.abcr   %torch._inductor.runtime.triton_compatr   torch._inductor.schedulerr   r   r    r!   r   r   r   r#   r$   r   r   r%   r&   r'   r(   r)   r*   r,   r4   r6   rH   rJ   r   r   r   r   r   r   r   r   r   r   r   r   rX   r   r   r   r   r   r   r   <module>r     s   " 
   	 	 !  1 1 " 4 / (<;  !"       8:4 9     ! ! ! CE !#? D ,- )    $8 3 3 3M8 82 46 0 5 :/ :/ :/z         "   -	   6+ 3.10&*-*))#&)<?)	)X!D 
 
+*
   

#&
06
AF
	
r   