
    ie                    !   U d dl Z d dlZd dlmZ d dlmZmZmZmZm	Z	 d dl
Z
d dlZ
d dlmZ d dlmZmZmZ erd dlmZ e j(                  j+                  dd      dk(  Zd	Zd
ee   fdZd
ee   fdZd
ee   fdZd
ee   fdZd
ee   fdZd
efdZd
efdZe j(                  j+                  dd      dk(  Z d	Z!dZ"d	Z# e       rdndZ$dZ%e&e'd<    edddd      Z(ee'd<   d	Z)ee'd<    e       Z*ee   e'd<    e       Z+ee   e'd<    edd d!      Z,ee'd"<   dZ-ee'd#<    e       Z.ee   e'd$<    e       Z/ee   e'd%<    ed&'      Z0ee'd(<   d	Z1ee'd)<   i Z2e3e4e4f   e'd*<   dZ5ee&   e'd+<   d,Z6ed-   e'd.<   e j(                  j+                  d/d0      dk(  Z7ee'd1<   e j(                  j+                  d2d0      dk(  Z8ee'd3<   e j(                  j+                  d4d0      dk(  Z9ee'd5<    e        Z:ee'd6<   e j(                  j+                  d7d      dk(  Z;d	Z<dZ=e j(                  j+                  d8d      dk(  Z>e j(                  j+                  d9      dk(  Z?e j(                  j+                  d:      dk(  Z@e j(                  j+                  d;d      dk(  ZAe j(                  j+                  d< e       rd0nd      dk(  ZBdZCdZDdZEe j(                  j+                  d=d0      dk(  ZFe j(                  j+                  d>      dk(  ZGe j(                  j+                  d?d@      ZHedA   e'dB<   dZIdZJ e       ZKd	ZLdZMd	ZNdZOe
j                  j                  j                  e'dC<   dZSe
j                  j                  j                  e'dD<   dZTe
j                  j                  j                  e'dE<   dZVe
j                  j                  j                  e'dF<   dZWe
j                  j                  j                  e'dG<   dZXeee
j                  j                  j                  gdf      e'dH<   dZ\eee]dI   ge]dI   f      e'dJ<   dZ^eee]dI   ge]dI   f      e'dK<   dZ_d	Z`d	Zad	ZbdZci Zde3e4e3e4ef   f   e'dL<   i Zee3e4e3e4ef   f   e'dM<   dZfe j(                  j+                  dNd      dk(  Zgd	ZhdZid	dOdPddQZje3e4ef   e'dR<   dSZkedT   e'dU<   d	Zlg Zme]e	e4ee]dI   ge]dI   f   f      e'dV<   dZnee&   e'dW<   dZod	Zpd Zqe&e'dX<   dYZredZ   e'd[<   dZseee&ge&f      e'd\<   dYZted]   e'd^<   dZueee&ge&f      e'd_<   dYZved]   e'd`<   dZweee&ge&f      e'da<   dbZxd	Zyee'dc<   ddZzdeZ{dfZ| eddgdhi      Z}ee'dj<   e j(                  j+                  dk      dk(  Z~e j(                  j+                  dl      dk(  Ze j(                  j+                  dm      dk(  Ze j(                  j+                  dn      dk(  ZdoZee&   e'dp<   e j(                  j+                  dqd      dk(  Ze j(                  j+                  drd      dk(  Ze j(                  j+                  dsd0      dk(  Ze j(                  j+                  dt e       sdnd0      dk(  Zee'du<   g Ze]e4   e'dv<   d	Zee'dw<    edxdyd	!      Zee'dz<   g Ze]e&   e'd{<   e j(                  j+                  d|d}      j                         Ze j(                  j+                  d~d      j                         Ze j(                  j+                  dd      j                         Zed   e'd<   e j(                  j+                  dd      j                         Zed   e'd<   d	Zee'd<   d	Zee'd<   dZee'd<   dZee'd<   dZee'd<   dZee'd<   e j(                  j+                  dd0      dk(  Zee'd<   d	ZdZd	Ze j(                  j+                  d      dk(  Ze j(                  j+                  d      dk(  ZdZdZdZe j(                  j+                  d      dk(  Z e&e j(                  j+                  dd            Z ee j(                  j+                  dd            Ze j(                  j+                  d      dk(  Ze j(                  j+                  d      dk(  Z e&e j(                  j+                  dd            Ze j(                  j+                  dd      Ze j(                  j+                  dd      Ze j(                  j+                  dd0      dk(  Zde4d
efdZde4d
efdZde4d
efdZe j(                  j+                  dd      Ze
jZ                  j\                  sdnd0Ze j(                  j+                  de      dk(  Ze j(                  j+                  dd0      dk(  Ze j(                  j+                  dd      dk(  Ze j(                  j+                  d      dk(  ZdZdZdZdZee&   e'd<   d	Zd	ZdZe j(                  j+                  d      dk(  ZdZeeg df      e'd<   d	Ze j(                  j+                  d      dk(  Zee'd<   e j(                  j+                  d      dk(  Zee'd<   e j(                  j+                  dd      Ze j(                  j+                  d e       rd0nd      dk(  Zee'd<   dZee'd<   doZe j(                  j+                  dd      dk(  ZdPZdZdZdZee&   e'd<   dZd	ZdZd	Zd	Z e j                  dd      dk(  Z e j                  d      dk(  Z e&e j(                  j+                  dd             Ze j(                  j+                  dd0      dk(  ZdZd	ZdZd	Zd	Zd	ZdPZdPZdZdZdZd	Ze j(                  j+                  dd0      dk(  Ze j(                  j+                  dd0      dk(  Zde
j                  v xs de
j                  v Z e       xs eZe j(                  j+                  dd      dk(  ZdZee4   e'd<   dZee4   e'd<   d
e4fdǄZ e       Ze4e'd<   dZe&e'd<    eddd!      Zee'd<    edd	ϫ      Zee'd<   d	ZdeZddgZe]e	ed   e4f      e'd<   d	Zee'd<    G dք d׫      Z G d؄ d٫      Zd
efdڄZd
e&fdۄZ e       rdn e       Zee&   e'd<    eddd!      Zee'd<    ed      Ze&e'd<    e       Zee'd<    eddd	!      Zee'd<   e j(                  j+                  dd0      dk(  Zee'd<   ee4   e'd<    e       rk	 d dlmZ erM ej                  e j                  j                  ej                  de j                        d            Zn ej                  d      ZndZdoZe j(                  j+                  dd      dk(  Ze j(                  j+                  dd      dk(  Zd	Zd	Z	dZ
d	ZdZdZd	ZdZd	Zee'd<   e j(                  j+                  dd0      dk(  Zd	Zd	Zd	Zd	Ze j(                  j+                  dd      Zedk7  Zedk(  rdneZe j(                  j+                  dd      Zee4   e'd<   e j(                  j+                  d      dk(  Zd	Ze j(                  j+                  dd0      dk(  Zee'd<   d	Zee'd<   d	Zee'd<   d	Zee'd<   d	Z ee'd<   d	Z!ee'd<   d	Z"ee'd<   d	Z#ee'd<   d	Z$ee'd <   e j(                  j+                  dd0      dk(  Z%e j(                  j+                  dd0      dk(  Z&ee'd<   dZ'ee'd<   i Z(e3e4e3e4ef   f   e'd<    e&e j(                  j+                  dd            Z)e&e'd<   d	Z*ee'd	<    ed
d	ϫ      Z+ee'd<    eddϫ      Z,ee'd<   d
ee4   fdZ- eddϫ      Z.e4e'd<    G d d      Z/ G d d      Z0 G d d      Z1 G d d      Z2 G d d      Z3 G d d      Z4dZ5ed   e'd<   dZ6ed   e'd <   dZ7ed   e'd!<    G d" d#      Z8 G d$ d%      Z9g d&Z:e]e4   e'd'<   g d(Z;e]e4   e'd)<   g Z<e]ee
jz                  e
jz                  e
jz                  gdf      e'd*<    e j                  d+d      dk(  Z> G d, d-      Z? G d. d/      Z@erd d0lA  eej                  eC          y# eef$ r dZY w xY w(1      N)Callable)AnyLiteralOptionalTYPE_CHECKINGUnion)	is_fbcode)Configget_tristate_envinstall_config_module)InductorChoicesTORCHINDUCTOR_INPLACE_PADDING1Freturnc                      t        d      S )N#TORCHINDUCTOR_FX_GRAPH_REMOTE_CACHEr        P/var/www/html/engine/venv/lib/python3.12/site-packages/torch/_inductor/config.pyfx_graph_remote_cache_defaultr          ABBr   c                      t         j                  j                  d      dk(  ryt         j                  j                  d      dk(  ryy )NTORCHINDUCTOR_VEC_ISA_OKr   T0F)osenvirongetr   r   r   vec_isa_ok_defaultr      s7    	zz~~01S8	zz~~01S8r   c                      t        d      S )N#TORCHINDUCTOR_AUTOTUNE_REMOTE_CACHEr   r   r   r   autotune_remote_cache_defaultr"      r   r   c                      t        d      S )N+TORCHINDUCTOR_BUNDLED_AUTOTUNE_REMOTE_CACHEr   r   r   r   %bundled_autotune_remote_cache_defaultr%   #   s    IJJr   c                  :    t        dt               sd      S d       S )N/TORCHINDUCTOR_BUNDLE_TRITON_INTO_FX_GRAPH_CACHET)r   r	   r   r   r   )bundle_triton_into_fx_graph_cache_defaultr(   '   s&    9K %) r   c                      d} dt         j                  v r"t         j                  j                  d      dk(  S t               r$t        j
                  j                  d      }|| k  S y)N   &TORCHINDUCTOR_USE_STATIC_CUDA_LAUNCHERr   z-pytorch/inductor:static_cuda_launcher_versionTr   r   r   r	   torch_utils_internaljustknobs_getval_int)STATIC_CUDA_LAUNCHER_VERSIONversions     r   static_cuda_launcher_defaultr2   .   s^    #$ /2::=zz~~FG3NN	''<<;
 666 r   c                      d} dt         j                  v r"t         j                  j                  d      dk(  S t               r&d}t        j
                  j                  |      }|| k  S y)Nr   TORCHINDUCTOR_PROLOGUE_FUSIONr   z(pytorch/inductor:prologue_fusion_versionTr,   )ENABLE_PROLOGUE_FUSION_VERSIONjk_namer1   s      r   prologue_fusion_enabledr7   =   s\    %&"&"**4zz~~=>#EE	<''<<WE888r   "TORCHDYNAMO_AUTO_FUNCTIONALIZED_V2T-/logs/dedicated_log_torch_compile_worker_ranki  precompilation_timeout_secondsz0pytorch/remote_cache:enable_local_fx_graph_cache$TORCHINDUCTOR_FX_GRAPH_CACHE_DEFAULTTORCHINDUCTOR_FX_GRAPH_CACHE)justknobenv_name_defaultenv_name_forcedefaultfx_graph_cacheremote_gemm_autotune_cachefx_graph_remote_cache!bundle_triton_into_fx_graph_cachez>pytorch/remote_cache:enable_non_blocking_remote_cache_write_v2-TORCHINDUCTOR_NON_BLOCKING_REMOTE_CACHE_WRITE)r=   r?   r@   non_blocking_remote_cache_writeautotune_local_cacheautotune_remote_cachebundled_autotune_remote_cachez*torch.compiler.config.force_disable_caches)aliasforce_disable_caches&unsafe_skip_cache_dynamic_shape_guards!unsafe_marked_cacheable_functionssleep_sec_TESTING_ONLYneeds_fixed_stride_order)rO   flexible_layout'triton_kernel_default_layout_constraintTORCHINDUCTOR_CPP_WRAPPERr   cpp_wrapper(TORCHINDUCTOR_CPP_WRAPPER_BUILD_SEPARATEcpp_wrapper_build_separateTORCHINDUCTOR_FX_WRAPPER
fx_wrappercpp_cache_precompile_headersTORCHINDUCTOR_ONLINE_SOFTMAXTORCHINDUCTOR_SIZE_ASSERTSTORCHINDUCTOR_NAN_ASSERTS(TORCHINDUCTOR_RUNTIME_TRITON_NAN_ASSERTSTORCHINDUCTOR_SCALAR_ASSERTSTORCHINDUCTOR_ALIGNMENT_ASSERTSTORCHINDUCTOR_MEMORY_PLANNINGTORCHINDUCTOR_USE_FAST_MATHTORCHINDUCTOR_MEMORY_POOLintermediates)nonerb   outputscombinedmemory_poolpost_grad_custom_pre_passpost_grad_custom_post_passcustom_partitioner_fnjoint_custom_pre_passjoint_custom_post_passpre_grad_custom_passz+torch._inductor.scheduler.BaseSchedulerNode_pre_fusion_custom_pass_post_fusion_custom_passpre_grad_fusion_optionspost_grad_fusion_options"TORCHINDUCTOR_DYNAMIC_SCALE_RBLOCKg-C6?   )pre_grad	precisionnum_iterationsrequires_optimizerfx_passes_numeric_check	heuristic)r@   tritonatenrx   mixed_mm_choice'reorder_for_compute_comm_overlap_passesreorder_prefetch_limit&size_threshold_for_succ_based_strategyrc   )rc   all	only_fsdpbucket_all_gathers_fx.bucket_all_gathers_fx_bucket_size_determinator)rc   r   bucket_reduce_scatters_fx2bucket_reduce_scatters_fx_bucket_size_determinatorbucket_all_reduces_fx.bucket_all_reduces_fx_bucket_size_determinatorr@   !runtime_estimations_mms_benchmarki,     g      I@*TORCHINDUCTOR_USE_EXPERIMENTAL_BENCHMARKERz-pytorch/inductor:use_experimental_benchmarker)r@   r?   r=   use_experimental_benchmarker+TORCHINDUCTOR_DISTRIBUTED_MAX_AUTOTUNE_GEMMTORCHINDUCTOR_MAX_AUTOTUNE$TORCHINDUCTOR_MAX_AUTOTUNE_POINTWISETORCHINDUCTOR_MAX_AUTOTUNE_GEMM
   autotune_num_choices_displayed/TORCHINDUCTOR_MAX_AUTOTUNE_REPORT_CHOICES_STATS<TORCHINDUCTOR_MAX_AUTOTUNE_PRUNE_CHOICES_BASED_ON_SHARED_MEM-TORCHINDUCTOR_TRITON_DISABLE_DEVICE_DETECTIONTORCHINDUCTOR_GRAPH_PARTITIONgraph_partitioncustom_should_partition_ops#max_autotune_allow_flexible_layoutsz%pytorch/compiler:force_same_precision"TORCHINDUCTOR_FORCE_SAME_PRECISIONforce_same_precisionmulti_kernel_hints(TORCHINDUCTOR_MAX_AUTOTUNE_GEMM_BACKENDSzATEN,TRITON,CPP(TORCHINDUCTOR_MAX_AUTOTUNE_CONV_BACKENDSzATEN,TRITON,TORCHINDUCTOR_MAX_AUTOTUNE_GEMM_SEARCH_SPACEDEFAULT)r   
EXHAUSTIVEmax_autotune_gemm_search_space,TORCHINDUCTOR_MAX_AUTOTUNE_FLEX_SEARCH_SPACEmax_autotune_flex_search_spacefallback_by_defaultselective_decomposeuse_dceuse_pre_grad_passesuse_joint_graph_passesuse_post_grad_passesCUTEDSL_ENABLE_AUTOTUNINGcutedsl_enable_autotuningi    TORCHINDUCTOR_SAVE_ARGS!TORCHINDUCTOR_AUTOTUNE_IN_SUBPROCg      N@g        #TORCHINDUCTOR_AUTOTUNE_MULTI_DEVICE(TORCHINDUCTOR_COLLECTIVE_BENCHMARK_NRUNS50*TORCHINDUCTOR_COLLECTIVE_BENCHMARK_TIMEOUT30'TORCHINDUCTOR_COORDINATE_DESCENT_TUNING5TORCHINDUCTOR_COORDINATE_DESCENT_CHECK_ALL_DIRECTIONS'TORCHINDUCTOR_COORDINATE_DESCENT_RADIUS#TORCHINDUCTOR_AUTOHEURISTIC_COLLECT TORCHINDUCTOR_AUTOHEURISTIC_USEmixed_mm'TORCHINDUCTOR_RUN_JIT_POST_COMPILE_HOOKnamec                 2    t        |       xs t        |       S )N)collect_autoheuristicuse_autoheuristicr   s    r   run_autoheuristicr     s     &A*;D*AAr   c                 l    | t         j                  j                  j                  j	                  d      v S N,)r-   	_inductorconfigautoheuristic_collectsplitr   s    r   r   r     s(    5??))??EEcJJJr   c                 l    | t         j                  j                  j                  j	                  d      v S r   )r-   r   r   autoheuristic_user   r   s    r   r   r     s(    5??));;AA#FFFr   $TORCHINDUCTOR_AUTOHEURISTIC_LOG_PATH!TORCHINDUCTOR_LAYOUT_OPTIMIZATIONTORCHINDUCTOR_FORCE_LAYOUT_OPT TORCHINDUCTOR_KEEP_OUTPUT_STRIDETORCHINDUCTOR_WARN_MIX_LAYOUT          realize_acc_reads_size_threshold.TORCHINDUCTOR_ASSUME_UNALIGNED_FALLBACK_OUTPUTr   inductor_choices_classTORCHINDUCTOR_DEBUG_FUSIONdebug_fusionTORCHINDUCTOR_BENCHMARK_FUSIONbenchmark_fusion#TORCHINDUCTOR_ENABLED_METRIC_TABLES(TORCHINDUCTOR_LOOP_ORDERING_AFTER_FUSIONloop_ordering_after_fusionloop_index_inversion_in_fusion'TORCHINDUCTOR_BENCHMARK_EPILOGUE_FUSION@   max_fusion_unique_io_buffersTORCHINDUCTOR_SPLIT_REDUCTIONSTORCHINDUCTOR_DETERMINISTICTORCHINDUCTOR_MIN_NUM_SPLITTORCHINDUCTOR_BENCHMARK_KERNEL   %TORCHINDUCTOR_EMULATE_PRECISION_CASTS'TORCHINDUCTOR_EMULATE_DIVISION_ROUNDINGdevgit0TORCHINDUCTOR_OPTIMIZE_SCATTER_UPON_CONST_TENSORadd_pre_grad_passesremove_pre_grad_passesc                  r    dt         j                  v rt         j                  d   } nd} | dv s
J d|         | S )NTORCHINDUCTOR_WORKER_START
subprocess)r   forkspawnzInvalid start method: )r   r   )start_methods    r   decide_worker_start_methodr   k  sR    #rzz1zz">?#   / 
 ~.	/ 
 r   worker_start_methodi   small_memory_access_thresholdz(pytorch/compiler:worker_suppress_logging%TORCHINDUCTOR_WORKER_SUPPRESS_LOGGINGworker_suppress_loggingLOG_TLPARSEr?   r@   log_tlparsefuse_ddp_with_concat_opschedule_comm_wait).N_fuse_ddp_communication_passes_micro_pipeline_tpc                   *    e Zd ZU dZeed<   dZeed<   y)_collectiveFauto_selecti   #one_shot_all_reduce_threshold_bytesN)__name__
__module____qualname__r   bool__annotations__r   intr   r   r   r   r     s    K/9'9r   r   c                   "   e Zd ZU dZdZeed<   dZee   ed<   dZ	ee   ed<   dZ
ee   ed<   dZee   ed<   dZeeej"                  j$                  gee   f      ed	<   d
Zed   ed<   dZee   ed<   dZee   ed<   dZee   ed<   dZee   ed<   y)aten_distributed_optimizationszDConfiguration for distributed optimization passes on ATen FX graphs.Fenable_overlap_schedulingNcollective_bucketinginsert_overlap_depsmax_compute_pre_fetchcompute_overlap_multiplercustom_runtime_estimation
analytical)r  	benchmarkcollective_estimatormax_memory_increase_gbmax_memory_increase_ratiomax_in_flight_gbmax_coll_distance)r  r  r  __doc__r	  r  r  r
  r   r  r  r  r  floatr  r   r-   fxNoder  r   r  r  r  r  r   r   r   r  r    s    N ',t+ ,0(4./ +/$. ,08C=/15x5 	 x%((--(5/1Q(RS  @L'";<K /3HUO215x5 )-huo, (,x}+r   r  c                  R    d} d}t         j                  j                  |      }| |k\  S )a   
    TODO: Remove when parallel compiled is fully enabled internally. For rollout, use a
    knob to enable / disable. The justknob should not be performed at import, however.
    So for fbcode, we assign compile_threads to 'None' below and initialize lazily in
    async_compile.py.
    rr   z0pytorch/inductor:enable_parallel_compile_version)r-   r.   r/   )ENABLE_PARALLEL_COMPILE_VERSIONr6   r1   s      r   #parallel_compile_enabled_internallyr    s0     '(#@G##88AG*g55r   c                  &   ddl } | j                  t              }dt        j                  v r0t        t        j                  d         }|j                  d|       |S t        j                  dk(  rd}|j                  d       |S t               rt               sd}|j                  d       |S t        t        d	      rt        t        j                  d            nt        j                         }|sJ t        d
|      }|j                  d|       |S )a!  
    Here are the precedence to decide compile_threads
    1. User can override it by TORCHINDUCTOR_COMPILE_THREADS.  One may want to disable async compiling by
       setting this to 1 to make pdb happy.
    2. Set to 1 if it's win32 platform
    3. decide by the number of CPU cores
    r   NTORCHINDUCTOR_COMPILE_THREADSz!compile_threads set to %d via envwin32rr   z"compile_threads set to 1 for win32z"compile_threads set to 1 in fbcodesched_getaffinity    zcompile_threads set to %d)logging	getLoggerr  r   r   r  infosysplatformr	   r  hasattrlenr   	cpu_countmin)r"  logcompile_threadsr)  s       r   decide_compile_threadsr-    s      

H
%C&"**4bjj)HIJ4oF" ! 
	 56  
@B56  r./ $$Q'( 	
 yb),,o>r   r,  z+pytorch/inductor:quiesce_async_compile_pool(TORCHINDUCTOR_QUIESCE_ASYNC_COMPILE_POOLquiesce_async_compile_pool<   )r@   quiesce_async_compile_timeuse_static_cuda_launcherz:pytorch/inductor:static_launch_user_defined_triton_kernels7TORCHINDUCTOR_STATIC_LAUNCH_USER_DEFINED_TRITON_KERNELS)static_launch_user_defined_triton_kernels)TORCHINDUCTOR_STRICT_STATIC_CUDA_LAUNCHERstrict_static_cuda_launcherglobal_cache_dir)parutil.zfb/cacheTORCHINDUCTOR_SHAPE_PADDING#TORCHINDUCTOR_COMPREHENSIVE_PADDING   i   force_shape_padTORCHINDUCTOR_PERMUTE_FUSIONTORCHINDUCTOR_PROFILETORCHINDUCTOR_PROFILE_OUTPUTprofile_bandwidth_output3TORCHINDUCTOR_PROFILE_WITH_DO_BENCH_USING_PROFILINGTORCHINDUCTOR_FREEZINGfreezingfreezing_discard_parametersdecompose_mem_bound_mmwrap_inductor_compiled_regionsassume_aligned_inputsassume_32bit_indexing.unsafe_ignore_unsupported_triton_autotune_args"check_stack_no_cycles_TESTING_ONLY*always_complex_memory_overlap_TESTING_ONLY*TORCHINDUCTOR_ENABLE_LINEAR_BINARY_FOLDINGTORCHINDUCTOR_ANNOTATE_TRAININGannotate_training)enable_caching_generated_triton_templatesautotune_lookup_tableTORCHINDUCTOR_FILE_LOCK_TIMEOUT600file_lock_timeoutenable_autograd_for_aotPALLAS_TARGET_TPU_debug_cpu_to_tpu_pallas!PALLAS_TAKE_FIRST_JAX_DEVICE_ONLY!pallas_take_first_jax_device_onlyc                      d } t               rGt        j                  j                  dd       }t        j                  j                  dd      }|d| } | S )NMAST_HPC_JOB_NAME	ROLE_RANKr   r9   )r	   r   r   r   )log_locmast_job_nameglobal_ranks      r   get_worker_log_pathr`    sL    G{

':DAjjnn[#6$Ek]SGNr   TORCHINDUCTOR_WORKER_LOGPATHtorchinductor_worker_logpathc                      e Zd ZU dZdZej                  j                  dd      dk(  Zej                  j                  dd      dk(  Z	dZ
ee   ed<    eej                  j                  d	d
            Zdej                  j                  dej                   dk(  rdnd      fZedef   ed<   ej                  j                  dd      dk(  Zej                  j                  dd      dk(  ZdZee   ed<   dZee   ed<    ed      Zee   ed<   dZed   ed<    eej                  j                  dd            Zej                  j                  dd      dk(  Zej                  j                  dd      dk(  Zej                  j                  dd      Z ej                  j                  dd      dk(  Z!d Z" eej                  j                  d!d            Z#ej                  j                  d"d      Z$ej                  j                  d#d      Z%d$Z&d Z'ej                  j                  d%d      dk(  Z(d Z)ej                  j                  d&d      dk(  Z*ej                  j                  d'd      dk(  Z+y)(cppzu
    Settings for cpp backend.
    This class provides a centralized location for managing cpp backend settings.
    $TORCHINDUCTOR_CPP_NO_REDUNDANT_LOOPSr   !TORCHINDUCTOR_CPP_DYNAMIC_THREADSr   Nsimdlen TORCHINDUCTOR_CPP_MIN_CHUNK_SIZE512CXXdarwinzclang++zg++cxx'TORCHINDUCTOR_CPP_ENABLE_KERNEL_PROFILE TORCHINDUCTOR_CPP_WEIGHT_PREPACKinject_relu_bug_TESTING_ONLYinject_log1p_bug_TESTING_ONLYr   
vec_isa_okoriginal_atenr-   rs  inductor_nodedescriptive_names,TORCHINDUCTOR_CPP_MAX_HORIZONTAL_FUSION_SIZE16-TORCHINDUCTOR_CPP_FALLBACK_SCATTER_REDUCE_SUM-TORCHINDUCTOR_CPP_ENABLE_UNSAFE_MATH_OPT_FLAG5TORCHINDUCTOR_CPP_ENABLE_FLOATING_POINT_CONTRACT_FLAGoff)TORCHINDUCTOR_CPP_ENABLE_TILING_HEURISTICF#TORCHINDUCTOR_CPP_GEMM_MAX_K_SLICES%TORCHINDUCTOR_CPP_GEMM_CACHE_BLOCKING%TORCHINDUCTOR_CPP_GEMM_THREAD_FACTORST$TORCHINDUCTOR_CPP_USE_DECOMPOSE_TANH%TORCHINDUCTOR_CPP_FORCE_INLINE_KERNEL-TORCHINDUCTOR_CPP_USE_CONSTEXPR_FOR_INT_ARRAY),r  r  r  r  threadsr   r   r   no_redundant_loopsdynamic_threadsrh  r   r  r  min_chunk_sizer%  r&  rm  tuplestrenable_kernel_profileweight_prepackrp  rq  r   rr  r  rv  r   max_horizontal_fusion_sizefallback_scatter_reduce_sumenable_unsafe_math_opt_flag#enable_floating_point_contract_flagenable_tiling_heuristicsenable_grouped_gemm_templategemm_max_k_slicesgemm_cache_blockinggemm_thread_factorsenable_loop_tail_vecenable_concat_linearuse_decompose_tanhuse_small_dequant_bufferforce_inline_kerneluse_constexpr_for_int_arrayr   r   r   rd  rd    s    G
 	

=sCsJ  jjnn%H#NRUUO!GXc]!(JERSN 	


u3<<8+CiOCtSy	  	

@#F#M 
 ZZ^^$FLPSSN
 37 (3-637!8C=7 "22L!MJM 	 wHI 
 "%


EtL" 	

FLPSS   	

FLPSS   +-**..?+' 	

BCHCO 
 $)  BJJNN+PRUVW **..)PRVW **..)PRVW   !
 	

=sCsJ 
  % 	

>DK  	

FLPSS  r   rd  c                      e Zd ZU dZej
                  j                  d      dk(  ZdZdZ	dZ
eeeeeedf   f         ed<   dZdZ e        Zd	Zd
Zee   ed<   dZdZ edd      Zeed<   dZeed<   dZdZdZdZdZ ej
                  j                  d e       sdnd      dk(  Z!eed<   dZ"ee   ed<   dZ#eed<   dZ$dZ%dZ&ee   ed<   dZ'eed<   dZ(eed<   dZ)eed<   dZ*dZ+ej
                  j                  dd      dk(  Z,ej
                  j                  dd      dk(  Z-dZ.e/d   ed<   ej
                  j                  dd      dk(  Z0ej
                  j                  dd      dk(  Z1dZ2 eej
                  j                  d d            Z3e/d!   ed"<   ej
                  j                  d#d      dk(  Z4d$Z5dZ6d%Z7eed&<   dZ8dZ9dZ:dZ;ee<   ed'<   dZ=ej
                  j                  d(d      dk(  Z>ej
                  j                  d)d      dk(  Z?ej
                  j                  d*d      dk(  Z@ej
                  j                  d+d      dk(  ZAdZB eej
                  j                  d,d-            ZC eej
                  j                  d.d/            ZDdZEej
                  j                  d0 e       rdnd      dk(  ZFd1ZGdZHee   ed2<   ej
                  j                  d3d      dk(  ZIy)4ry   z.
    Config specific to codegen/triton.py
    TORCHINDUCTOR_CUDAGRAPHSr   TFN.cudagraph_capture_sizesr<  r   "cudagraph_dynamic_shape_warn_limit TORCHINDUCTOR_CUDAGRAPH_OR_ERRORr   cudagraph_or_error%reorder_for_reducing_graph_partitions&TORCHINDUCTOR_COALESCE_TILING_ANALYSISr   coalesce_tiling_analysis	max_tilesprefer_nd_tilingautotune_at_compile_timeautotune_with_sample_inputstile_reductionsnative_matmul!TORCHINDUCTOR_UNIQUE_KERNEL_NAMES&TORCHINDUCTOR_UNIQUE_USER_KERNEL_NAMESrs  rt  rv  #TORCHINDUCTOR_PERSISTENT_REDUCTIONS$TORCHINDUCTOR_COOPERATIVE_REDUCTIONSTORCHINDUCTOR_MULTI_KERNEL)r   rr   r*      multi_kernelTORCHINDUCTOR_DIVISIBLE_BY_16      spill_thresholdrp  ENABLE_PERSISTENT_TMA_MATMULENABLE_TEMPLATE_TMA_STOREENABLE_EPILOGUE_SUBTILINGTORCHINDUCTOR_SKIP_L1$TORCHINDUCTOR_NUM_DECOMPOSE_K_SPLITS10#TORCHINDUCTOR_DECOMPOSE_K_THRESHOLD32!TORCHINDUCTOR_MIX_ORDER_REDUCTIONrr   mix_order_reduction_split_size5TORCHINDUCTOR_MIX_ORDER_REDUCTION_AUTOTUNE_SPLIT_SIZE)Jr  r  r  r  r   r   r   
cudagraphscudagraph_treescudagraph_skip_dynamic_graphsr  r   r  r   r  r  slow_path_cudagraph_asserts!cudagraph_trees_history_recordingr	    cudagraph_support_input_mutation#cudagraph_unexpected_rerecord_limitr  force_cudagraph_syncforce_cudagraphs_warmupr
   r  r  r  fast_path_cudagraph_assertsskip_cudagraph_warmupdebug_sync_graphdebug_sync_kerneldense_indexingr  r  r  autotune_pointwiseautotune_cublasLtr  r  r  r   tiling_prevents_pointwise_fusion tiling_prevents_reduction_fusionunique_kernel_namesunique_user_kernel_namesrv  r   persistent_reductionscooperative_reductionsforce_cooperative_reductionsr  divisible_by_16min_split_scan_rblockstore_cubinr  use_block_ptruse_tensor_descriptor)transpose_discontiguous_tensor_descriptorrp  r  codegen_upcast_to_fp32enable_persistent_tma_matmulenable_template_tma_storeenable_epilogue_subtilingskip_l1_cache.disallow_failing_autotune_kernels_TESTING_ONLYnum_decompose_k_splitsdecompose_k_threshold
enable_pdlmix_order_reduction"mix_order_reduction_initial_xblockr  'mix_order_reduction_autotune_split_sizer   r   r   ry   ry   s  s   
  :;sBJ O %*! MQXeE#uS#X2F,G&HIP #' ).% ,5;$ +.' 9:&9 ! $  &9   37)46 #( "   N 	

4cRU	
 	 d   $Ix}# #d"   04htn3
 ).- "OT!$  M4 (,$'+$ 	

:C@CG  	

?EL  	 wHI  	

<cBcI  	

=sCsJ 
 $)  ),


3S9)L'*% 
 jjnn%DcJcQO   K OS M "
 15-
 37 (3-6 "
 	

5s;sB !
 !#

/JC PTW W "

/JC PTW WJJNN#:C@CGM 6;2 !


=tD  


<dC J 	

:9;CTWX	  *+&48"HSM8


NPST	 ,r   ry   c                      e Zd ZU dZdZej                  j                  dd      dk(  Zej                  j                  dd      dk(  Z	ej                  j                  dd      Z
ej                  j                  d	d      Zed
   ed<   ej                  j                  dd      ZdZdZdZeed<   dZeed<   dZdZeed<   dZee   ed<   dZeed<   i Zeeef   ed<   ej                  j                  dd      dk(  Zeed<   dZeed<   ej                  j                  dd      dk(  Zeed<    eej                  j                  dd            Z eed<   i Z!eee"f   ed<   dZ#eed<   dZ$eed <   ej                  j                  d!d      dk(  Z%eed"<   dZ&eed#<   dZ'ee   ed$<    e(        Z)eed%<   dZ*ee   ed&<   dZ+ee   ed'<   dZ,ee   ed(<   i Z-ee.j^                  j`                  e1e   f   ed)<   dZ2ee1e      ed*<   ej                  j                  d+d      dk(  Z3dZ4eed,<   dZ5ee   ed-<   dZ6eee1e   z     ed.<   dZ7ee   ed/<   y)0aot_inductorz9
    Settings for Ahead-Of-Time Inductor Compilation
    r   AOT_INDUCTOR_DEBUG_COMPILEr   r   AOT_INDUCTOR_DEBUG_SYMBOLS&AOT_INDUCTOR_COMPILE_WRAPPER_OPT_LEVELO1-AOT_INDUCTOR_DEBUG_INTERMEDIATE_VALUE_PRINTER)r   r   23 debug_intermediate_value_printer&AOT_INDUCTOR_FILTERED_KERNELS_TO_PRINTNFuse_runtime_constant_foldingforce_mmap_weightsTpackagepackage_cpp_onlydynamic_linkagemetadata/AOTINDUCTOR_RAISE_ERROR_ON_IGNORED_OPTIMIZATION#raise_error_on_ignored_optimizationcheck_lowerboundDUMP_AOTI_MINIFIERdump_aoti_minifierAOTINDUCTOR_REPRO_LEVELr*   repro_levelpresetsallow_stack_allocationuse_minimal_arrayref_interface)AOT_INDUCTOR_WEIGHT_USE_CACHING_ALLOCATORweight_use_caching_allocatorpackage_constants_in_so package_constants_on_disk_formatprecompile_headersembed_kernel_binaryemit_multi_arch_kernelmodel_name_for_generated_filescustom_ops_to_c_shimscustom_op_libsAOT_INDUCTOR_ENABLE_LTOlink_libtorchcross_target_platformaoti_shim_libraryaoti_shim_library_path)8r  r  r  r  output_pathr   r   r   debug_compiledebug_symbolscompile_wrapper_opt_levelr  r   r  filtered_kernel_namesserialized_in_specserialized_out_specr  r  r  use_consts_asm_buildr  r  r   r  r  dictr  r  r  r  r  r   r  r   r  r  r  r  r  r	   r  r	  r
  r  r  r-   _ops
OpOverloadlistr  
enable_ltor  r  r  r  r   r   r   r  r    s    KJJNN#?ELMJJNN#?ELM !#

0$! EGJJNN7E$g.@&A 
 JJNN0$   */ $.  %$  GT'+htn+ !OT   "Hd38n! 	

H#NRUU (  "d!  "zz~~.BCHCOO 2::>>*CQGHKH !GT#s(^  $)D( ,1"D0 	

BCHCO !$  %)T( 7;$hsm: $-;. +/$.
 .2HTN1 59"HSM8 EG4

 5 5tCy @AF*.NHT#Y'.  93?3FJ M4 ,08C=/ 48xd3i07,0HSM0r   r  c                       e Zd ZU dZeed<   y)aot_inductor_modeFcompile_standaloneN)r  r  r  r"  r  r  r   r   r   r!  r!  A  s      %$r   r!  c            
          e Zd ZU dZdZee   ed<   dZee   ed<   dZ	e
d   ed<   dZdZdZdZej                   j#                  ej$                  j'                  d	ej                   j)                  ej                   j+                  ej.                        d
                  ZdZee   ed<   g dZee   ed<   ej$                  j'                  dd      dk(  ZdZdZee   ed<   dZ eed<   ej$                  j'                  dd      dk(  Z!e"ed<   ej$                  j'                  d      Z#ee   ed<   ej$                  j'                  d      Z$ee   ed<   ej$                  j'                  dd      Z%eed<   ej$                  j'                  dd      dk(  Z&e"ed<   ej$                  j'                  dd      dk(  Z'e"ed<   ej$                  j'                  d d!      Z(eed"<   d#Z)e"ed$<   dZ*e"ed%<   dZ+e"ed&<   d#Z,e"ed'<   y)(cudaz9Settings for cuda backend, today this consists of cutlassNarchr1   -O1)-O0r&  -O2-O3z-OScompile_opt_levelFTORCHINDUCTOR_CUTLASS_DIRz../third_party/cutlass/cutlass_max_profiling_configs)rr   r*   r   r   %cutlass_max_profiling_swizzle_optionsCUTLASS_EPILOGUE_FUSIONr   r   cuda_cxxrr   cutlass_backend_min_gemm_size/INDUCTOR_CUDA_BACKEND_GENERATE_TEST_RUNNER_CODEgenerate_test_runnerTORCHINDUCTOR_CUTLASS_ALLOWLISTcutlass_op_allowlist_regexTORCHINDUCTOR_CUTLASS_DENYLISTcutlass_op_denylist_regex)TORCHINDUCTOR_CUTLASS_INSTANTIATION_LEVELcutlass_instantiation_level+TORCHINDUCTOR_CUTLASS_HASH_WITH_COMPILE_CMDcutlass_hash_with_compile_cmd"TORCHINDUCTOR_CUTLASS_PRESCREENINGcutlass_prescreening!TORCHINDUCTOR_CUTLASS_ENABLED_OPSr   cutlass_enabled_opsTuse_binary_remote_cacheupload_to_binary_remote_cachebinary_remote_cache_force_writeenable_caching_codegen)-r  r  r  r  r%  r   r  r  r1   r*  r   enable_cuda_ltoenable_ptxas_infoenable_debug_infouse_fast_mathr   pathrealpathr   r   joindirnamer-   __file__cutlass_dirr,  r  r-  r  cutlass_epilogue_fusion_enabledcutlass_tma_onlyr/  r0  r2  r  r4  r6  r8  r:  r<  r>  r?  r@  rA  rB  r   r   r   r$  r$  J  s;   C
 D(3-
 "GXc]! EJw@AI O   M ''""


'GGLL8:ST	
K 48!8C=7 8D)49C 	

0#6#= $
  #Hhsm" *+!3*
 	

H#NRUU $  13

)1  02zz~~(0x}  (*zz~~3S(  	

DcJcQ "4  	

;SASH $   "zz~~+U  
 %)T( +0!4/ -2#T1 $(D'r   r$  c                   z   e Zd ZU g Zee   ed<   g dZeed      ed<   dZ	ed   ed<   dZ
dZdZdZdZd	Zee   ed
<   ej$                  j'                  d      Zej$                  j'                  dd      dk(  Zeed<   d	Zee   ed<   d	Zee   ed<   d	Zee   ed<   dZeed<   d	Zeee      ed<   dZeed<   dZeed<   y	)rocmr%  )gfx90agfx942gfx950ck_supported_archr(  )	r'  r&  r(  r)  z-Osz-Ozz-Ominz-Ofastz-Omaxr*  FTN	rocm_homeTORCHINDUCTOR_CK_DIR-INDUCTOR_CK_BACKEND_GENERATE_TEST_RUNNER_CODEr   r   r2  n_max_profiling_configsck_max_profiling_configsck_tile_max_profiling_configsuse_preselected_instanceskBatch_sweepr  split_k_thresholdcontiguous_threshold)r  r  r  r%  r  r  r  rT  r   r*  is_debug
save_tempsrF  flush_denormalsprint_kernel_resource_usagerU  r   r   r   r   ck_dirr2  r  rX  r  rY  rZ  r[  r\  r]  r^  r   r   r   rP  rP    s%    D$s)FtG$@AB  	 wL 
 H J M O #(  $Ix}# ZZ^^23F 	

FLPSS $ 
 .2Xc]1 /3hsm2 48!8C=7 ',t+ )-L(49%,  s !##"r   rP  )rd  ry   halidepallascpu_backend)ry   rd  re  cuda_backendxpu_backendc                   J    e Zd ZU dZdZdZed   ed<   dZed   ed<   dZ	dZ
dZy	)
rd  hostz	host-cudaAnderson2021)rk  Li2018	Adams2019Mullapudi2016scheduler_cudarm  scheduler_cpuFN)r  r  r  
cpu_target
gpu_targetro  r   r  rp  assertsdebugscan_kernelsr   r   r   rd  rd     sQ    J J
 	 GRS  	 7QR 
 G E Lr   rd  c            	          e Zd ZU ej                  j                  dd      dk(  Zej                  j                  dd      dk(  ZdZe	e
   ed<   dZdZdZdZdZdZdZej                  j                  d	d      dk(  Zej                  j                  d
d      dk(  Zej                  j                  dd      Zej                  j                  dd      ZdZdZe	ee
gdf      ed<   ej                  j                  dd      dk(  Z eej                  j                  dej                  j                  dd                  Zeed<   y)traceTORCH_COMPILE_DEBUGr   r   TORCH_COMPILE_DEBUG_SAVE_REALN	debug_dirFTINDUCTOR_POST_FUSION_SVGINDUCTOR_ORIG_FX_SVGINDUCTOR_DOT_GRAPH_SHAPE_SVG INDUCTOR_LOG_URL_FOR_GRAPH_XFORM
upload_tarLOG_AUTOTUNE_RESULTSINDUCTOR_PROVENANCEprovenance_tracking_level)r  r  r  r   r   r   enabledsave_real_tensorsrz  r   r  r  	debug_loginfo_logfx_graphfx_graph_transformedir_pre_fusionir_post_fusionoutput_codegraph_diagramdraw_orig_fx_graphdot_graph_shapelog_url_for_graph_xformcompile_profiler  r   log_autotuning_resultsr  r  r   r   r   rw  rw  <  s\   jjnn2C8C?G 

'FLPSS  $Ix}# I H H   M N K JJNN#=sCsJM (>DK jjnn%CTJO !jjnn-OQUV O 37J3%+./6ZZ^^,BCHCO &)


!2::>>2G#M	
&s r   rw  )
ztrace.upload_tarrj   rk   rl   zaot_inductor.repro_levelzaot_inductor.dump_aoti_minifierrg   rh   r   rm   _save_config_ignore)rw  zcuda.cutlass_dirr   r,  rh   rg   rj   rk   r   rm   rL  rA   rC   rG   rH   _cache_config_ignore_prefixexternal_matmul8TORCHINDUCTOR_WRITE_ARE_DETERMINISTIC_ALGORITHMS_ENABLEDc                   J    e Zd ZU dZeeeeeeef      f      e	d<   dZ
ee	d<   y)lookup_tableNtableTcheck_src_hash)r  r  r  r  r   r  r  r  r   r  r  r  r   r   r   r  r    s4    7;E8Dd4S>2234;  NDr   r  c                       e Zd ZU dZeed<   dZee   ed<   dZ	dZ
dZdZee   ed<   dZee   ed<   dZdZeed      ed<   dZd	Zeed
<    ej*                  d      dk(  Z ej*                  dd      ZdZdZy)test_configsF%force_extern_kernel_in_multi_templateNmax_mm_configsautotune_choice_name_regexautotune_choice_desc_regex)assertr+  track_memory_lifecycleT assume_bucketing_reduces_latency,TORCHINDUCTOR_FORCE_FILTER_REDUCTION_CONFIGSr   )TORCHINDUCTOR_DISTORT_BENCHMARKING_RESULTr   )r  r  r  r  r  r  r  r   r  runtime_triton_dtype_assertruntime_triton_shape_assertstatic_cpp_dtype_assertr  r  r  *graphsafe_rng_func_ignores_fallback_randomr  r   use_libtorchr  r   getenvforce_filter_reduction_configsdistort_benchmarking_resultbisect_pre_grad_graph'bisect_keep_custom_backend_for_inductorr   r   r   r  r    s    27)47$(NHSM("'"'# 15404416.AEHW_%=>E L .2$d1 			@ASH # #,"))3R# ".3+r   r  )*(D  r   r%  collections.abcr   typingr   r   r   r   r   r-   !torch._inductor.custom_graph_passtorch._environmentr	   torch.utils._config_moduler
   r   r   torch._inductor.choicesr   r   r   inplace_paddingcan_inplace_pad_graph_inputr  r   r   r"   r%   r(   r2   r7   enable_auto_functionalized_v2rt  disable_progressverbose_progressworker_log_pathr:   r  r  rA   rB   rC   rD   rF   rG   rH   rI   rK   rL   rM   r  r  rN   rQ   rS   rU   rW   rX   online_softmaxdcestatic_weight_shapessize_assertsnan_assertsruntime_triton_nan_assertsscalar_assertsalignment_assertspick_loop_ordersinplace_buffersallow_buffer_reusememory_planningrF  rf   benchmark_harnessepilogue_fusionprologue_fusionepilogue_fusion_firstpattern_matcherb2b_gemm_passrg   r   custom_graph_passCustomGraphPassTyperh   ri   CustomPartitionerFnTyperj   rk   rl   r  graphGraphrm   r  rn   split_cat_fx_passes efficient_conv_bn_eval_fx_passesis_predispatchgroup_fusionbatch_fusionro   rp   reorder_for_localitydynamic_scale_rblockforce_fuse_int_mm_with_muluse_mixed_mmrw   r{    reorder_for_compute_comm_overlapr|   r}   reorder_for_peak_memoryreorder_for_peak_memory_debugr~   r   r   r   r   r   r   estimate_op_runtimer   intra_node_bwinter_node_bw
cpu_gpu_bwr   distributed_max_autotune_gemmmax_autotunemax_autotune_pointwisemax_autotune_gemmr   !max_autotune_report_choices_stats.max_autotune_prune_choices_based_on_shared_memtriton_disable_device_detectionr   r   r   r   r   uppermax_autotune_gemm_backendsmax_autotune_conv_backendsr   r   r   r   r   r   r   r   r   autotune_fallback_to_atenunbacked_symint_fallbacksearch_autotune_cache	save_argsautotune_in_subproc+max_autotune_subproc_result_timeout_seconds-max_autotune_subproc_graceful_timeout_seconds.max_autotune_subproc_terminate_timeout_secondsautotune_multi_devicecollective_benchmark_nrunsr  collective_benchmark_timeoutcoordinate_descent_tuning'coordinate_descent_check_all_directions coordinate_descent_search_radiusr   r   run_jit_post_compile_hookr   r   r   autoheuristic_log_pathr1   hiplayout_opt_defaultlayout_optimizationforce_layout_optimizationkeep_output_stridewarn_mix_layoutrealize_reads_thresholdrealize_opcount_thresholdrealize_acc_reads_thresholdr   fallback_random"fallback_embedding_bag_byte_unpackimplicit_fallbacks assume_unaligned_fallback_outputr   aggressive_fusionr   r   enabled_metric_tablesr   r   score_fusion_memory_thresholdbenchmark_epilogue_fusion max_epilogue_benchmarked_choicesmax_fusion_size)max_fusion_buffer_group_pairwise_attemptsr   max_pointwise_cat_inputsforce_pointwise_catunroll_reductions_thresholdcomment_originconv_1x1_as_mmr  split_reductionsdeterministicmin_num_splitbenchmark_kernelconstant_and_index_propagationalways_keep_tensor_constantsassert_indirect_indexingcompute_all_boundscombo_kernelsbenchmark_combo_kernelcombo_kernels_autotunecombo_kernel_allow_mixed_sizes#combo_kernel_foreach_dynamic_shapescombo_kernel_max_num_argsjoint_graph_constant_foldingdebug_index_assertsemulate_precision_castsemulate_divison_rounding__version__is_nightly_or_sourcedeveloper_warnings"optimize_scatter_upon_const_tensorr   r   r   r   r   r   r   _fuse_ddp_communication_fuse_ddp_bucket_sizer   r   r   r  r  r-  r,  r/  r1  r2  r4  r6  libfb.pyr8  __package__get_dir_pathrG  rI  replacesepr7  
ValueErrorImportErrorkernel_name_max_opsshape_paddingcomprehensive_paddingpad_channels_lastpad_dynamic_shapesdisable_padding_cpu$expand_dimension_for_pointwise_nodespadding_alignment_bytespadding_stride_thresholdpad_outputsbw_outputs_user_visibler=  permute_fusionprofiler_mark_wrapper_callgenerate_intermediate_hooksdebug_ir_traceback_raise_error_for_testing_profile_varprofile_bandwidthprofile_bandwidth_regexrA  /profile_bandwidth_with_do_bench_using_profilingdisable_cpp_codegenrD  rE  rF  rG  rH  rI  rJ  rK  rL  enable_linear_binary_foldingrO  rP  rQ  rT  rU  rW  rY  r`  rb  rd  ry   r  r!  r$  rP  rf  rg  rh  rd  rw  r  r  r  Tensor*write_are_deterministic_algorithms_enabledr  r  torch.utils._config_typingmodulesr  r   r   r   <module>rO     s   	 
 $ ? ?  ( ( V V 7**..!@#F#M# Cx~ CHTN Cx~ CKx~ K8D> d 
 
 JJNN7=D 
 	     8A{3 
 '.  - ?;1	  $) D ( )F(G x~ G ./ "8D>  )/MB)   " d ! )F(G x~ G 1V0W x~ W $*VW d W 05 & 4 57 !4S> 6 )-  ,  (1*  JJNN#>DKT K JJNN=sCsJ D  ::>>"<cBcI
D I
 *3_ d 4 >DK    zz~~:C@CGjjnn89S@JJNN=>#E   >DK JJNN4Y[ccR
       **..!@#F#M 

<=D HJzz~~HWCD 
    *+     TX 5??<<PP WTX EOO==QQ X TX u88PP W PT u88LL SPT 99MM T
 JN hx)=)=(>(DEF M 	 	;	<=:;	= 	 	 (	;	<=:;	= 	   $)     ( 68 c4S>12 7 79 $sDcN23 8   zz~~&JCPTWW  #   	+ c3h  FQAB P $)  .  (	?@A>?A	
	*  )-  ,   %  /0 & / >D w9: CQU .3%*9M0N U4: 7=1 : 	 3HXseSj=Q4R  17 w}- 6QU .3%*9M0N U   */ !4 /   
 &,?<& d  JJNN@ASH 
 zz~~:;sB (NOSVV  JJNN#DEL  13  2 JJNNDcJcQ " JJNNQSVW
 / JJNNBCHCO   JJNN2y{CPST
   *, T#Y + -2 #T 1 $47 d  !# DI "  ZZ^^.0A%'   ZZ^^.%'  DF::>>2ID%' (? @ 
 DF::>>2ID%' (? @ 
 " T ! " T !   ! T  #  #! d ! JJNN.4; 4 
 "      JJNN45<	 jjnn%HISP 
 /3 +03 -14 . 

'LMQTT  !JJNN=tD 
  %JJNN?F  
 JJNN<=D  JJNNJKsR ( $'JJNN<cB$   

'LbQ JJNN#DjQ  JJNN<cBcI 
BC BD BK K KGC GD G *I 
 !& 1 1Ss JJNN68JKsR  JJNN+KSQUXX  ZZ^^$FLPSS  **..!@ASH
       !(3- 
  &+ "  JJNNCDK !
 EI ".?*?!@A H   ZZ^^$@ASHd H(HISP $ P

'LbQ JJNN29;CC  D   (,  + !#  JJNN<cBcI 
 $%    -/ ) /3 hsm 2          299=sCsJ  		78C? BJJNN#@!DE::>>"BCHCO  "&   %           "# &* #   $    JJNN:C@CG  JJNN<cBcI 
  1 11OUe>O>O5O [8$8  JJNNEsKsR #
 &* Xc] )(,  ,
C 
 67 S 7 &. s - !'7:!    T       I U8I+>+C%D E 
 ! D  : :
), ),X6T 6! !J *3:P:R# R $*:=$ D  #)# C  ">!? $ ? 39IL3 )4  JJNN>DK T 
 3- ;
 $3w33[00bff=zJ   4w33J?
    

<cBcI JJNN8#>#E        (- $  (          >DK # 
 $    ! zz~~5r: B&  , 3"  +-**.."D+ (3- 
 JJNNHISP 0    8#>#E$ E %* T )  %  $ (-  ,
 $ t # $ t # 8= . < ,1 "D 0 49 *D 8 JJNN?EL  **..)JCPTWW 4 W 37 )4 6 46 tCc3h/0 5RZZ^^,MuUV 3 V %  %!'&" $  +16+ !4 
	Xc] 	 %+1% c @ @FY Yxl1 l1d% %( (DF# F#T =BW89 A 7?g23 > "*WX ) 8K K\" T#Y * T#Y 2 UWhellELLI4OPQ V BIIH#NRUU +
   '4 '4T , ckk(+ ,}% $   s   #A)AC CACCAC