
    ,i%\                        d dl mZmZmZ d dlmZmZmZmZ d dl	m
Z
 d dlmZ d dlmZ d dlZd dlmZmZmZmZ d dlmZ d dlZd dlZd dlZd dlZd dlZd dlZd d	lmZ d
efdZde de
jB                  fdZ" ejF                         dde fd       Z$ ejF                         de fd       Z%de fdZ& ejF                         de fd       Z' ejF                  d      d        Z(de fdZ) ed       G d d             Z* G d de      Z+y)    )BaseBackend	GPUTargetLanguage)irpassesllvmnvidia)knobs)
PTXASError)	dataclassN)AnyDictTupleOptional)
ModuleType)Pathtargetc                 >    dt         t        t        t        f   fd}|S )Nreturnc                     | j                   j                  }|j                   j                  }||k(  sJ d       |dk(  ryy)Nz%lhs and rhs bitwidth must be the same   )   r       )r   r      )scalarprimitive_bitwidth)lhs_typerhs_typelhs_bitwidthrhs_bitwidths       i/home/obispo/Crisostomo_bridge/mision_env/lib/python3.12/site-packages/triton/backends/nvidia/compiler.pycheck_dot_compatibilityz-min_dot_size.<locals>.check_dot_compatibility   sB    9999|+T-TT+1    )r   int)r   r"   s     r!   min_dot_sizer%      s!    uS#s]7K  #"r#   archr   c                 t    | dk\  rt         j                  j                  S t         j                  j                  S )Nd   )r
   r	   ptxas_blackwellptxas)r&   s    r!   	get_ptxasr+   "   s'    +/3;5<<''NELL<N<NNr#   c                     t         j                  j                  }||S t        j                  t        |       j                  dg      j                  d      }|S )Nz	--versionutf-8)r
   r	   mock_ptx_version
subprocesscheck_outputr+   pathdecode)r&   mock_verversions      r!   get_ptxas_versionr5   &   sL    ||,,H%%y';';[&IJQQRYZGNr#   c                    t        | t              sJ t        t        | j	                  d            \  }}|dk(  r|dk  rd|z   S d|z   dz
  S |dk(  rd|z   S |dk(  rd	|z   S |d
k\  rd}||d
z
  dz  z   |z   S t        d| z         )zK
    Get the highest PTX version supported by the current CUDA driver.
    .      P   r      F   
   ?      Z   z?Triton only support CUDA 10.0 or higher, but got CUDA version: )
isinstancestrmapr$   splitRuntimeError)cuda_versionmajorminorbase_ptxs       r!   ptx_get_versionrJ   /   s    
 lC(((sL..s34LE5{19::>!{Ez{Ez{52:++e33
X[gg
hhr#   c                 b    | j                   }| t        |      j                  }t        |      }|S N)ptx_versionr+   r4   rJ   )optionsr&   rM   rF   s       r!   get_ptx_version_from_optionsrO   G   s2    %%K ..%l3r#   c                 @    t        | |      }t        d|      }d| }|S )NV   z+ptx)rO   min)rN   r&   rM   llvm_ptx_versionfeaturess        r!   get_featuresrU   O   s0    .w=K 2{+&'(HOr#   c                     t        | d      5 }t        j                  |j                               j	                         cd d d        S # 1 sw Y   y xY w)Nrb)openhashlibsha256read	hexdigest)r1   fs     r!   	file_hashr^   ]   s>    	dD	 4Q~~affh'1134 4 4s   1AA
capabilityc                 "    | dk\  rdnd}d|  | S )Nr@   a sm_ )r_   suffixs     r!   sm_arch_from_capabilityrf   c   s!    "$S"FVH%%r#   T)frozenc                      e Zd ZU dZeed<   dZeed<   dZeed<   dZeed<   d	Z	e
e   ed
<   d	Zeed<   ej                  j                  Ze
e   ed<   d	Ze
e   ed<   dZeed<   dZeed<   dZeed<   dZeed<   dZee   ed<   dZee   ed<   dZeed<   dZee   ed<   d	Zeed<   d	Zeed<   dZeed<   dZ eed <   dZ!eed!<   d	Z"eed"<   d#Z#eed$<   d% Z$d& Z%y	)'CUDAOptions   	num_warpsr   num_ctas   
num_stagesr   	warp_sizeNmaxnregrM   ptx_optionsir_overrideTenable_fp_fusionenable_reflect_ftzFlaunch_cooperative_grid
launch_pdl)fp8e5fp8e4b15supported_fp8_dtypesrd   !deprecated_fp8_dot_operand_dtypestf32default_dot_input_precision)r{   tf32x3ieeebf16x3bf16x6allowed_dot_input_precisionsmax_num_imprecise_acc_defaultextern_libsdebugcudabackend_namesanitize_overflowr&   rb   instrumentation_modec                    t        t              j                  dz  }| j                  i nt	        | j                        }|j                  dd       s-t        j                  j                  xs t        |dz        |d<   t        j                  | dt        |j                                      | j                  dkD  r| j                  | j                  dz
  z  dk(  sJ d       y )Nlib	libdevicezlibdevice.10.bcr   r   r   znum_warps must be a power of 2)r   __file__parentr   dictgetr
   r	   libdevice_pathrB   object__setattr__tupleitemsrk   )selfdefault_libdirr   s      r!   __post_init__zCUDAOptions.__post_init__   s    h..6 ,,4b$t?O?O:P{D1',||'B'B'mc.[lJlFmK$4k6G6G6I0JK~~!t~~!9K'LQR&R 	0/	0R&Rr#   c           	      ^   t        | j                        }t        d t        |d         D              |d<   dj	                  t        |j                               D cg c]  \  }}| d|  c}}      }t        j                  |j                  d            j                         S c c}}w )Nc              3   <   K   | ]  \  }}|t        |      f  y wrL   )r^   ).0kvs      r!   	<genexpr>z#CUDAOptions.hash.<locals>.<genexpr>   s     (htq!!Yq\):(hs   r   _-r-   )
r   __dict__r   sortedjoinr   rY   rZ   encoder\   )r   	hash_dictnamevalkeys        r!   hashzCUDAOptions.hash   s    '	#((hviXeNfGg(h#h	- hh	@Q9RSID#4&#ST~~cjj12<<>> Ts   B)
)&__name__
__module____qualname__rk   r$   __annotations__rl   rn   ro   rp   r   rM   r
   r	   ptxas_optionsrq   rB   rr   rs   boolrt   ru   rv   ry   r   rz   r|   r   r   r   r   r   r   r   r&   r   r   r   rd   r#   r!   ri   ri   i   s    IsHcJIs "GXc]!K!&!;!;K#;!%K#%!d!##$)T)J'<%*<46%uSz6'--/] %*]*.!4.KE4L#"t"D# "#"0?r#   ri   c                        e Zd ZdZedefd       Zd ZdefdZ	deddf fdZ
defdZd	 Zd
 Zdeeef   fdZd Zed        Zed        Zd Zd Zd Zd Zd Z ej6                         d        Z xZS )CUDABackendNr   c                      | j                   dk(  S )Nr   )backend)r   s    r!   supports_targetzCUDABackend.supports_target   s    ~~''r#   c                     d}t        j                  ||      }|st        d|       t        |j	                  d            S )Nz	^sm(\d+)$z(TRITON_OVERRIDE_ARCH must have the form r   )re	fullmatch
ValueErrorr$   group)r   r&   patternmatchs       r!   _parse_archzCUDABackend._parse_arch   s@    Wd+GyQRR5;;q>""r#   r   c                 B    | j                  |j                        }d| S )Ncuda:)r   r&   )r   rN   r_   s      r!   get_target_namezCUDABackend.get_target_name   s#    %%gll3
zl##r#   c                 2    t         |   |       d| _        y )Ncubin)super__init__
binary_ext)r   r   	__class__s     r!   r   zCUDABackend.__init__   s     !r#   c                    d|v r|d   dk(  rd|d<   dt         j                  j                  xs d| j                  j                   i}|j                  t        j                  j                         D ci c]  }||v s||   |||    c}       t        | j                  |d               }|j                  dd      dkD  r|d	k  rt        d
| d      d|vrFt        t        j                        }|dk\  r|j                  d       t!        t#        |            |d<   d|vr
|d	k\  rd|d<   d|vrt         j$                  j&                  |d<   |d	k(  rdnd|d<   t        di |S c c}w )Nr   consanTr   r&   smrl   r   r@   zBnum_ctas > 1 requires NVIDIA SM90+ (Hopper). Current target is sm_zM. This configuration will fail. Please set num_ctas=1 or target an SM90+ GPU.ry   Y   fp8e4nvrz   )rx   rs   i   @r   r   rd   )r
   runtimeoverride_archr   r&   updateri   __dataclass_fields__keysr$   r   r   r   setry   addr   r   languagedefault_fp_fusion)r   optsargsr   r_   ry   s         r!   parse_optionszCUDABackend.parse_options   s   !T)d3I.Jh.V DM33NDKK<L<L;M7NO)I)I)N)N)PuATUY]T]aefgahatQQZuv))$v,78
88J"Q&:? !66@\ BNO Q Q "-#&{'G'G#H R$((3+08L1M+ND'(.d:R<J89T)',~~'G'GD#$9Cr9Iq,-"T""/ vs   4	E)>E)E)c                 H    |j                   |j                  |j                  fS rL   )rk   rl   shared)r   metadatas     r!   pack_metadatazCUDABackend.pack_metadata   s%    OO
 	
r#   c                     dd l mc mc m} t	        | j                  |j                              }|dk\  r|j                  n|j                  t        | j                        d}|S )Nr   r:   )convert_custom_typesr%   )triton.language.extra.cudar   extrar   r$   r   r&   convert_custom_float8_sm80convert_custom_float8_sm70r%   r   )r   rN   r   r_   codegen_fnss        r!   get_codegen_implementationz&CUDABackend.get_codegen_implementation   sV    11))',,78
 0:R/?D++TEdEd%

 r#   c                     ddl m} d|iS )Nr   )r   ztriton.language.extra.libdevice)r   r   )r   r   s     r!   get_module_mapzCUDABackend.get_module_map   s    819==r#   c                     t        j                  |       t        j                  r t        j                  j                  |       y y rL   )r	   load_dialectsr   instrumentation)r   ctxs     r!   r   zCUDABackend.load_dialects   s2    S!&&''55c: 'r#   c                    t        j                  | j                        }|j                          t        j
                  j                  |       t        j                  j                  |       |dz  dk  rt        j                  j                  |       t        j
                  j                  |       t        j                  j                  |       t        j                  j                  |       t        j
                  j                  |       t        j
                  j                  |       t        j                  j                  |       |j!                  | d       | S )Nr=   	   	make_ttir)r   pass_managercontextenable_debugr   commonadd_inlinerttiradd_rewrite_tensor_pointer(add_rewrite_tensor_descriptor_to_pointeradd_canonicalizeradd_combineadd_reorder_broadcastadd_cseadd_symbol_dceadd_loop_unrollrun)modr   optr_   pms        r!   r   zCUDABackend.make_ttir   s    __S[[)
!!"%..r2aKK@@D''+#))"-b!$$R(##B'
sK 
r#   c                    |j                   H| j                  dt        j                  | j                        j                  |j                                t        j                  | j                        }|j                         }|dz  dk\  }t        j                  j                  |d| |j                  d|j                         t        j                  j                  |       t        j                  j                  ||       t         j                  j"                  j%                  |       t        j                  j'                  |       t        j                  j)                  |       t        j                  j+                  |       t        j                  j'                  |       t        j                  j-                  ||dk\         t         j                  j"                  j/                  |       t        j                  j1                  |       |dz  dv rFt        j                  j3                  |       t        j4                  j7                  |       t        j                  j9                  |       t        j4                  j7                  |       t        j                  j;                  |       t         j                  j<                  j?                  ||j@                  |       t        j                  jC                  ||j@                         t        j                  jE                  |       t        j                  jG                  ||j@                  |       n|dz  dk\  rt        j                  j3                  |       t        j4                  j7                  |       t        j                  j9                  |       t        j                  jI                  |       t        j                  jK                  |d       t         j                  j"                  jM                  |       t        j                  jC                  ||j@                         t        j                  jE                  |       t        j                  jO                  ||j@                         t        j                  jG                  ||j@                  |       t        j                  jQ                  |       t        j                  j;                  |       t        j                  jK                  |d	       t         j                  j"                  jS                  |       nt        j                  j9                  |       t        j4                  j7                  |       t        j                  j1                  |       t        j                  jU                  |       t        j                  j-                  ||dk\         t        j                  jW                  |       t         j                  j"                  jY                  |       |dz  d
k\  r)t         j                  j"                  j[                  |       t        j                  j'                  |       t         j                  j"                  j]                  |       t        j                  j_                  |       t        j                  ja                  |       t        j                  j1                  |       t        j4                  jc                  |       t         j                  j"                  je                  ||       t         j                  j"                  jg                  |       t        j4                  ji                  |       t        j4                  jk                  |       t        j4                  j7                  |       |jm                  | d       | jo                         |d<   | S )Nzttg.maxnregr=   r   r   r   r:   )r   r   FTr   
make_ttgirtensordesc_meta)8rp   set_attrr   builderr   get_int32_attrr   r   r   r   add_convert_to_ttgpuirrk   rl   ttgpuiradd_coalesceadd_f32_dot_tcr	   	ttnvgpuiradd_plan_ctaadd_remove_layout_conversionsadd_optimize_thread_localityadd_accelerate_matmuladd_optimize_dot_operands add_optimize_descriptor_encodingadd_loop_aware_cseadd_fuse_nested_loopsr   r   add_triton_licm add_combine_tensor_select_and_ifhopperadd_hopper_warpspecrn   add_assign_latenciesadd_schedule_loopsadd_pipelineadd_optimize_accumulator_initadd_hoist_tmem_allocadd_promote_lhs_to_tmemadd_warp_specializeadd_optimize_partition_warpsadd_remove_tmem_tokensadd_prefetchadd_coalesce_async_copyadd_optimize_tmem_layoutsadd_tma_loweringadd_interleave_tmemadd_reduce_data_duplicationadd_reorder_instructionsr   add_fence_insertionadd_lower_mmaadd_sccpr   r   get_tensordesc_metadata)r   r   r   r_   r   dump_enabledemuTF32s          r!   r   zCUDABackend.make_ttgir   s    ;;"LL

3;;(?(N(Ns{{([\__S[[)(#q(**2zl/CS]]TVX[XdXde##B'%%b'2,,R044R833B7,,R044R800Z25EF@@D&&r*v%NN004MM++B/KK''+MM++B/NN;;B?MM  44RVNN//CNNCNN--b1NN''CNNLI2#NN004MM++B/KK''+NN88<NN//E:MM##;;B?NN//CNNCNN--b1NN..r3>>BNN''CNNLINN77;NN;;B?NN//D9MM##::2>KK''+''+&&r*##B'00Z25EF..r299"=q MM##44R844R833B72226//3&&r*$$R(33B
C--b1r"b!''+
sL!&)&A&A&C"#
r#   c                    |}t        j                  |j                        }|j                          t        j
                  j                  |       t        j
                  j                  |       t        j
                  j                  |       t        j                  j                  j                  |       t        j
                  j                  |       t        j                  j                  |       t        j                  j!                  |       t        j
                  j                  |       t        j"                  j%                  |       |j'                  |d       |j)                         |d<   |S )Ngluon_to_ttgirr   )r   r   r   r   r   gluonr   add_infer_coalesced_encodingsadd_resolve_auto_encodingsr	   r  r   r   r   r&  r   r  r  r  r   r'  )r   srcr   rN   r_   r   r   s          r!   r+  zCUDABackend.gluon_to_ttgir@  s    __S[[)
  $2226//3004&&r*r"&&r*&&r*77;
s$%&)&A&A&C"#
r#   c                    t        || j                  j                        }|}t        j                  |j
                        }|j                          t        j                  j                  |       t        j                  j                  |       t        j                  j                  |       t        j                  j                  |       t        j                  j                  j!                  |||       t        j                  j"                  j%                  |       t        j                  j"                  j'                  |       t(        j*                  j,                  dk(  rt        j                  j/                  |       t        j                  j1                  |       t        j                  j"                  j3                  ||       t4        j6                  r+t4        j6                  j9                  d||j
                         t        j                  j                  j;                  |||       t        j<                  j?                  |       t        j<                  jA                  |       t        j                  j"                  jC                  |       t        j                  j"                  jE                  |       t        j<                  j?                  |       t        j<                  jA                  |       t        j<                  jG                  |       t        j                  jI                  |       t(        j*                  jJ                  s9t(        j*                  jL                  st        jN                  jQ                  |       t4        j6                  r+t4        j6                  j9                  d||j
                         |jS                  |d       t(        j*                  jL                  rt(        j*                  jJ                  s`t        j                  |j
                        }|j                          t        jN                  jQ                  |       |jS                  |d       t        j                  |j
                        }|j                          t        jN                  jU                  |       |jS                  |d       tW        jX                          tW        j
                         }t(        j*                  jZ                  rt]        d      tW        j^                  ||      }	ta        |      }
tc        || j                  j                        }d}t        jd                          tW        jf                  |	||
|       |jh                  rt        jj                  |	       |jl                  rIt        jn                  |	      r4|jl                  D cg c]  \  }}|	 }}}tW        jp                  |	|       tW        jr                  |	tV        jt                         |jw                  d	      }|||d
<   |jw                  d      |d<   |jw                  d      |d<   |jw                  d      |d<   |jw                  d      |d<   |jw                  d      xs d|d<   |jw                  d      xs d|d<   ty        |	      }~	~|S c c}}w )Nr   ttgpuir_to_llvmirllvmir_to_llvm	make_llirzmake_llir.disable_line_infoz,make_llir.dump_ir_extract_di_local_variableszYAddress Sanitizer Error: Address sanitizer is currently only supported on the AMD backendnvptx64-nvidia-cudazttg.total-num-warpsrk   z
ttg.sharedr   zttg.tensor_memory_size	tmem_sizezttg.global_scratch_memory_sizeglobal_scratch_sizez#ttg.global_scratch_memory_alignmentglobal_scratch_alignzttg.profile_scratch_memory_sizer   profile_scratch_sizez$ttg.profile_scratch_memory_alignmentr   profile_scratch_align)=rO   r   r&   r   r   r   r   r   r  r  add_allocate_warp_groupsconvertadd_scf_to_cfr,  r   r	   add_allocate_shared_memory_nvr  add_allocate_tensor_memoryadd_check_matmul_two_ctar
   compilationr   add_concurrency_sanitizer"add_allocate_global_scratch_memoryadd_proxy_fence_insertionr   r   patchadd_to_llvmirr   r   r   add_nvgpu_to_llvmadd_warp_specialize_to_llvmr   add_nvvm_to_llvmdisable_line_info"dump_ir_extract_di_local_variablesllvmiradd_di_scoper   add_di_local_variabler   init_targetsenable_asanrE   	to_modulerf   rU   set_short_ptrattach_datalayoutrt   set_nvvm_reflect_ftzr   has_extern_depslink_extern_libsoptimize_moduleOPTIMIZE_O3get_int_attrrB   )r   r/  r   rN   r_   rM   r   r   r   llvm_modprocrT   tripler   r1   pathstotal_num_warpsrets                     r!   r3  zCUDABackend.make_llirS  s   27DKK<L<LM__S[[)
77;//3$$R(  $;;B
KX::2>88<11X=NN44R899"=99"jI&&''--.A2s{{S++B
KH''+b!11"5;;B?''+b!$$R(''+  225;L;L;o;oMM&&r*&&''--.>CKKP
sK ??$$66__S[[1!**2.s9: -BOOMM//3FF3FG 	,,.((km m>>#w/&z2)9)9:&xx@%%''16#9#9(#C.5.A.ABltTTBEB!!(E2Xt'7'78 **+@A&$3H[! --l; # 0 01I J*-*:*:;[*\&'+.+;+;<a+b'(+.+;+;<]+^+cbc'(,/,<,<=c,d,ihi()(m
' Cs   #[c           	         t        || j                  j                        }d}t        |      }t	        || j                  j                        }dg}	t        j                  |||||	|j                  d      }
t        j                  d|
      }t        |      dk(  sJ |d   |d<   |dz   d	|dz   }t        j                  d
d| |
t        j                        }
t        j                  dd| |
t        j                        }
t        j                  j                  st        j                  dd|
      }
t        j                   j"                  rt%        d       t%        |
       |
S )Nr4  znvptx-mad-wide-optFz(.visible .entry ([a-zA-Z_][a-zA-Z0-9_]*)r   r   r   r=   r7   z\.version \d+\.\d+z	.version )flagsz\.target sm_\d+z.target sm_z,\s*debug|debug,\s*rb   z // -----// NVPTX Dump //----- //)rO   r   r&   rf   rU   r   translate_to_asmrs   r   findalllensub	MULTILINEr
   r@  rJ  r	   
dump_nvptxprint)r   r/  r   r   r_   rM   r[  rZ  rT   r`  r^  namess               r!   make_ptxzCUDABackend.make_ptx  s=   238H8HI&&z2T[[%5%56%&##CxH\H\^cd

FL5zQ 8$b);r>*:;ff*i},EsRTR^R^_ff';zl)CSPRP\P\]  CC &&/S9C<<""45#J
r#   c           
      $   t        | j                  j                        j                  }t	        j
                  ddd      5 }t	        j
                  ddd      5 }|j                  |       |j                          |j                  dz   }g }	t        j                  j                  r|	dd	gz  }	n't        j                  j                  r|	d
gz  }	n|	dgz  }	|j                  rg ndg}
t        |      }t        j                  j                  rddgng }|j                   r|j                   j#                  d      ng }|g|	|
d||d| |j                  d|}	 t%        j&                  |dd|       t        j                  j(                  r7t+        |j                        5 }t-        |j/                                d d d        t0        j                  j3                  |j                        rt1        j4                  |j                         t0        j                  j3                  |j                        rt1        j4                  |j                         t+        |d      5 }|j/                         }d d d        t0        j                  j3                  |      rt1        j4                  |       d d d        d d d        S # 1 sw Y   xY w# t$        j6                  $ r}t+        |j                        5 }|j/                         }d d d        n# 1 sw Y   nxY wt0        j                  j3                  |j                        rt1        j4                  |j                         |j8                  dk(  rd}n2|j8                  dt:        j<                  z   k(  rd}nd|j8                   }| d ddj?                  |       d}t-        d| d| d       tA        |      d }~ww xY w# 1 sw Y   wxY w# 1 sw Y   HxY w# 1 sw Y   S xY w) NFwz.ptx)deletemodere   rz.logz.oz	-lineinfoz-suppress-debug-infoz-gz--fmad=falsez--opt-level0 z-vz--gpu-name=z-oT)check	close_fdsstderr   z!Internal Triton PTX codegen error   z`ptxas` raised SIGSEGVz`ptxas` failed with error code z
`ptxas` stderr:
z
Repro command: 
zC

================================================================
z

zy
================================================================
please share the reproducer above with Triton project.
rW   )!r+   r   r&   r1   tempfileNamedTemporaryFilewriteflushr   r
   r@  rI  r	   disable_ptxas_optrs   rf   rq   rD   r/   r   dump_ptxas_logrX   rg  r[   osexistsremoveCalledProcessError
returncodesignalSIGSEGVr   r   )r   r/  r   r   r_   r*   fsrcflogfbin
debug_infofmadr&   disable_optptx_extra_options	ptxas_cmdlog_fileelogerrorr]   r   s                        r!   
make_cubinzCUDABackend.make_cubin  s   $++**+00((CO G	 SW''u3vNG	 RVJJsOJJL99t#DJ  22{,BCC
//tf$
 {m+
--2N3CD*:6D 38,,2P2P=#.VXK ?Boo 5 5c :SU "%)+/2=@QU`ae`fSgimirirI$(ydS<<..dii /Hhmmo./ 77>>$)),IIdii(77>>$)),IIdii(: dD! !Q!ww~~d#		$OG	  G	 P O/ / 00 ($))_ *"--/C* * *77>>$)),IIdii(<<3&?E\\S6>>%994E=all^LE!7 #--0E 2++.88I+>*?rC       !''5(8! !IG	  G	  G	 P s   PDO8AK'KBKO8%O+6<O82PK	KO(%O#:L		O#LC
O##O((O8+O50O88P	=PPc                 ~     j                  j                        |t        j                  k(  r fd|d<    fd|d<   n|t        j                  k(  r
 fd|d<    fd|d<    fd|d	<    fd
|d<   t
        j                  j                  $t
        j                  j                   ||       y y )Nc                 ,    j                  | |      S rL   )r   r/  r   r_   rN   r   s     r!   <lambda>z(CUDABackend.add_stages.<locals>.<lambda>  s    4>>#xQXZd3e r#   r   c                 ,    j                  | |      S rL   )r   r  s     r!   r  z(CUDABackend.add_stages.<locals>.<lambda>  s    DOOCSZ\f4g r#   ttgirc                 ,    j                  | |      S rL   )r+  r  s     r!   r  z(CUDABackend.add_stages.<locals>.<lambda>  s    D4G4GXW^`j4k r#   c                 ,    j                  | |      S rL   )r3  r  s     r!   r  z(CUDABackend.add_stages.<locals>.<lambda>   s    t~~c8WV`/a r#   llirc                 T    j                  | |j                  j                        S rL   )ri  r   r&   r/  r   rN   r   s     r!   r  z(CUDABackend.add_stages.<locals>.<lambda>!  s#    dmmC7TXT_T_TdTd.e r#   ptxc                 T    j                  | |j                  j                        S rL   )r  r   r&   r  s     r!   r  z(CUDABackend.add_stages.<locals>.<lambda>"  s#    XwX\XcXcXhXh0i r#   r   )r   r&   r   TRITONGLUONr
   r   add_stages_inspection_hook)r   stagesrN   r   r_   s   ` ` @r!   
add_stageszCUDABackend.add_stages  s    %%gll3
x&eF6NgF7O'kF7Oaveuiw==33?MM44T67HV`a @r#   c                 v    t        | j                  j                        }| d| j                  j                   S )Nr   )r5   r   r&   )r   r4   s     r!   r   zCUDABackend.hash&  s2    #DKK$4$45!DKK,,-..r#   )r   r   r   r   staticmethodr   r   r   rB   r   r   r   r   r   r   r   r   r   r   r   r   r+  r3  ri  r  r  	functools	lru_cacher   __classcell__)r   s   @r!   r   r      s    O(	 ( (#$# $"y "T "#S #>
>S*_ 5 >;
    G GR&^@4JXb Y/ /r#   r   )r:   ),triton.backends.compilerr   r   r   triton._C.libtritonr   r   r   r	   tritonr
   triton.runtime.errorsr   dataclassesr   r  typingr   r   r   r   typesr   rY   r   rw  r  r}  r/   pathlibr   r%   r$   
NvidiaToolr+   r  r5   rJ   rO   rU   r^   rf   ri   r   rd   r#   r!   <module>r     s>   E E 8 8  , !  - -   	   	  # #OC OE,, O C   iS i i.  
 
 
 T4 4
& & $)? )? )?XS/+ S/r#   