
    ,iY                         d dl mZmZmZ d dlmZmZmZmZ d dl	m
Z
 d dlmZ d dlmZmZmZ d dlmZ d dlZd dlZd dlZd dlZd dlZd dlmZ d	efd
Zd Zd Z ed       G d d             Z G d de      Zy)    )BaseBackend	GPUTargetLanguage)irpassesllvmamd)knobs)	dataclass)AnyDictTuple)
ModuleTypeN)Pathtargetc                     d S )Nc                      y)N)   r   r    )lhs_typerhs_types     f/home/obispo/Crisostomo_bridge/mision_env/lib/python3.12/site-packages/triton/backends/amd/compiler.py<lambda>z"get_min_dot_size.<locals>.<lambda>   s        r   r   s    r   get_min_dot_sizer      s
     0/r   c                     t         j                  j                  | dk(  xs | dk(  xr |du S t         j                  j                  S )Ngfx942gfx950T)r
   r	   use_block_pingpong)archuse_async_copys     r   is_pingpong_schedule_enabledr#      sI    --5 HM!1!Ln6L X;@99;W;WXr   c                 t    t         j                  j                  | dk(  S t         j                  j                  S )Nr   )r
   r	   use_in_thread_transposer!   s    r   is_in_thread_transpose_enabledr'      s.    !&!B!B!JDHqPUPYPYPqPqqr   T)frozenc                   R   e Zd ZU dZeed<   dZeed<   dZeed<   dZeed<   d	Z	e
ed
<   dZeed<   dZeed<   d	Zeed<   dZee   ed<   dZee   ed<   dZeed<   dZee   ed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   d Zeed!<   d"Zeed#<   d$ Zd% Zy	)&
HIPOptions   	num_warpsr   waves_per_eu   
num_stagesr   num_ctasNextern_libsFdebugTsanitize_overflowr!   )fp8e4nvfp8e5fp8e5b16fp8e4b8supported_fp8_dtypesr   !deprecated_fp8_dot_operand_dtypesieeedefault_dot_input_precision)r:   bf16x3bf16x6allowed_dot_input_precisionsenable_fp_fusionlaunch_cooperative_gridmatrix_instr_nonkdimkpackallow_flush_denormmax_num_imprecise_acc_defaulthipbackend_name instrumentation_modenoneschedule_hintc                    t        | j                  dd       }|dk\  rdnd}t        j                  | d|       | j                  dkD  r| j                  | j                  dz
  z  dk(  sJ d	       | j                  d
k(  rI| j
                  dk7  r:t        j                  d| j
                   d       t        j                  | dd       t        t              j                  dz  }| j                  i nt        | j                        }dD ]  }t        || dz        ||<    t        j                  | dt        |j                                      y )N   
       @   	warp_sizer   r   znum_warps must be a power of 2r   zckpack is deprecated starting from gfx950 and will be removed in later releases. So for now kpack = z7 will be overwritten to 1 to make transitioning easier.rB   lib)ocmlocklz.bcr1   )intr!   object__setattr__r,   rB   warningswarnr   __file__parentr1   dictstrtupleitems)self	gfx_majorrQ   default_libdirr1   rR   s         r   __post_init__zHIPOptions.__post_init__N   sG   		!B(	#r/Br	4i8~~!t~~!9K'LQR&R 	-,	-R II!

aMMuvz  wA  wA  vB  By  z tWa0h..6 ,,4b$t?O?O:P# 	AC">se3K#?@K	A4k6G6G6I0JKr   c           	          dj                  | j                  j                         D cg c]  \  }}| d|  c}}      }t        j                  |j                  d            j                         S c c}}w )N_-utf-8)join__dict__r_   hashlibsha256encode	hexdigest)r`   namevalkeys       r   hashzHIPOptions.hasha   s]    hh9L9L9NOID#4&#OP~~cjj12<<>> Ps   A4
) __name__
__module____qualname__r,   rU   __annotations__r-   r/   r0   r1   r\   r2   boolr3   r!   r]   r8   r   r9   r;   r>   r?   r@   rA   rB   rC   rD   rF   rH   rJ   rc   rq   r   r   r   r*   r*      s    IsL#JHcKE4"t"D#
 (S%*R46%uSz6'--/K %*K!d!$)T) !#!E3N$$)*!3*L# "#"*  M3L&?r   r*   c                   J    e Zd ZdZdZedefd       Zdeddf fdZde	fdZ
defdZd	 Zd
 Zdee	ef   fdZd Zed        Zed        Zed        Zed        Zed        Zed        Zed        Zed        Zed        Zd Z ej<                         d        Z xZ S )
HIPBackendNFr   c                      | j                   dk(  S )NrE   )backendr   s    r   supports_targetzHIPBackend.supports_targetj   s    ~~&&r   returnc                 j    t         |   |       t        |j                  t              sJ d| _        y )Nhsaco)super__init__
isinstancer!   r]   
binary_ext)r`   r   	__class__s     r   r   zHIPBackend.__init__n   s+     &++s+++!r   c                      d|j                    S )Nhip:r&   r`   optionss     r   get_target_namezHIPBackend.get_target_names   s    gll^$$r   c                    dt         j                  j                  xs | j                  j                  i}|j                  dd      dkD  rKt        j                  | j                  j                        s"t        d| j                  j                         | j                  j                  dk(  rBt        t        j                        }|j                  dh       t        t        |            |d<   d|vr%t        t        t        j                              |d<   | j                  j                  d	k(  rCt        t        j                         }|j                  d
dh       t        t        |            |d<   d|vrt         j"                  j$                  |d<   |j                  t        j&                  j)                         D ci c]  }||v s||   |||    c}       t        di |S c c}w )Nr!   r0   r   znum_ctas > 1 not supported on r   tf32r>   r8   r   r6   r7   r9   r?   r   )r
   runtimeoverride_archr   r!   getr	   supports_multi_cta_launch
ValueErrorsetr*   r>   updater^   sortedr8   r9   languagedefault_fp_fusion__dataclass_fields__keys)r`   optsargsr>   r9   ks         r   parse_optionszHIPBackend.parse_optionsv   s   33Gt{{7G7GH88J"Q&s/L/LT[[M]M]/^=dkk>N>N=OPQQ ;;x'+.z/V/V+W((//938@\9]3^D/0!-+0
8W8W1X+YD'(;;x'03J4`4`0a--44j)5LM8=fEf>g8hD45T)',~~'G'GD#$)H)H)M)M)OuASTX\S\aefgahatQQZuv!D!! vs   	G+G+G+c                 H    |j                   |j                  |j                  fS N)r,   r0   shared)r`   metadatas     r   pack_metadatazHIPBackend.pack_metadata   s%    OO
 	
r   c                 0    dt        | j                        iS )Nmin_dot_size)r   r   r   s     r   get_codegen_implementationz%HIPBackend.get_codegen_implementation   s     0 =>>r   c                     ddl m} d|iS )Nr   )	libdeviceztriton.language.extra.libdevice)triton.language.extra.hipr   )r`   r   s     r   get_module_mapzHIPBackend.get_module_map   s    719==r   c                     t        j                  |       t        j                  r t        j                  j                  |       y y r   )r	   load_dialectsrx   instrumentation)r`   ctxs     r   r   zHIPBackend.load_dialects   s2    #%%&&44S9 &r   c                     dd l }d}t        | d      r| j                         |k  S t        | |j                        r-t        | d      r!| j                         j                         |k  S y)Nr   i	ptr_rangeuntyped_storageF)torchhasattrr   r   Tensorr   size)argr   
MAX_INT_32s      r   is_within_2gbzHIPBackend.is_within_2gb   s]    
3$==?j00c5<<(WS:K-L&&(--/:==r   c                 H    t        j                  |       }d| v r|ddggz  }|S )NSztt.pointer_rangerO   )r   
parse_attr)descrets     r   r   zHIPBackend.parse_attr   s1    $$T*$;',--C
r   c                     t        j                  | fi |}t        j                  j                  rt
        j                  |       r|dz  }|S )Nr   )r   get_tensor_specializationr
   r	   use_buffer_opsrx   r   )r   kwargsr   s      r   r   z$HIPBackend.get_tensor_specialization   s?    33CB6B99##
(@(@(E3JC
r   c                    t        j                  | j                        }|j                          t        j
                  j                  |       t        j                  j                  |       t        j                  j                  |       t        j
                  j                  |       t        j                  j                  |       t        j                  j                  |       t        j
                  j                  |       t        j                  j                  |       t        j
                  j                  |       t        j                  j!                  |       |j#                  | d       | S )N	make_ttir)r   pass_managercontextenable_debugr   commonadd_inlinerttiradd_rewrite_tensor_pointer(add_rewrite_tensor_descriptor_to_pointeradd_canonicalizeradd_combineadd_reorder_broadcastadd_cseadd_triton_licmadd_symbol_dceadd_loop_unrollrun)modr   r   pms       r   r   zHIPBackend.make_ttir   s    __S[[)
!!"%..r2<<R@''+#))"-b!##B'$$R(##B'
sK 
r   c                    t        j                  | j                        }|j                          t        j
                  j                  |d|j                   |j                  |j                  |j                         |j                  | d       t        j                  | j                        }|j                          d}t        j                  j                  |       t        j                  j                  ||       t        j                  j                  |       t        j                  j!                  |       t"        j                  j                  j%                  ||j                  |j&                  |j(                         t        j                  j                  |       t"        j                  j                  j+                  |       t"        j                  j                  j-                  ||j                         t"        j                  j                  j/                  |       t        j                  j1                  |       t        j2                  j5                  |       t        j
                  j7                  |       t        j2                  j5                  |       t8        j"                  j:                  }t=        |j                  |      }t"        j                  j                  j?                  ||j@                         t"        j                  j                  jC                  |||       |r4t"        j                  j                  jE                  ||j                         t        j2                  j5                  |       |jF                  jI                         dk7  rJ|jF                  jK                  d      D ],  }t"        j                  j                  jM                  ||       . t        j                  j                  |       t        j                  jO                  |       tQ        |j                        rHt"        j                  j                  jS                  |       t        j                  j                  |       t"        j                  j                  jU                  |       |rC|j@                  dkD  r4t"        j                  j                  jW                  ||j@                         t8        j"                  jX                  rt"        j                  j                  j[                  |       t        j2                  j5                  |       t"        j                  j                  j]                  ||j                  t8        j"                  j^                  t8        j"                  j`                         t"        j                  j                  jc                  |       t        j2                  j5                  |       t        j2                  je                  |       t        j2                  jg                  |       |j                  | d       | ji                         |d<   | S )	Nr   make_ttgir_earlyFrI   ,r   
make_ttgirtensordesc_meta)5r   r   r   r   r   r   add_convert_to_ttgpuirr!   r,   rQ   r0   r   ttgpuiradd_coalesceadd_f32_dot_tcadd_remove_layout_conversionsadd_optimize_thread_localityr	   add_accelerate_matmulrA   rB   add_optimize_epilogueadd_optimize_dot_operandsadd_hoist_layout_conversionsadd_fuse_nested_loopsr   r   r   r
   r"   r#   add_schedule_loopsr/   add_pipelineadd_coalesce_async_copyrJ   lowersplitinsert_instruction_sched_hintsadd_reduce_data_duplicationr'   add_in_thread_transposeadd_reorder_instructionsadd_block_pingpongr   add_canonicalize_pointersadd_convert_to_buffer_opsuse_buffer_atomics%buffer_ops_analyze_small_tensor_rangeadd_fold_true_cmpir   r   get_tensordesc_metadata)r   r   r   r   emuTF32r"   r    hints           r   r   zHIPBackend.make_ttgir   s   __S[[)
**2gll^/DgFWFWY`YjYj+2+;+;	=
s&'__S[[)
##B'%%b'244R833B7

00W\\7C_C_ahanano44R8

004

44RF

77;,,R0''+##B'''+119',,W

--b'2D2DE

''N<NOJJ66r7<<H''+  &&(F2--33C8 L

""AA"dKL44R82226)',,7JJ66r:NN88<

33B7'"4"4q"8JJ11"g6H6HI99##JJ88<MM++B/JJ88		,,		??	 	

--b1''+b!$$R(
sL!&)&A&A&C"#
r   c                 &   | }t        j                  |j                        }|j                          t        j
                  j                  |       t        j
                  j                  |       t        j                  j                  |       t        j                  j                  |       t        j
                  j                  |       t        j                  j                  |       |j                  |d       |j!                         |d<   |S )Ngluon_to_ttgirr   )r   r   r   r   r   gluonr   add_resolve_auto_encodingsr   add_sccpr   add_loop_aware_cser   r    add_combine_tensor_select_and_ifr   r   )srcr   r   r   r   s        r   r   zHIPBackend.gluon_to_ttgir  s    __S[[)
  $//3r"&&r*&&r*77;
s$%&)&A&A&C"#
r   c                    | }t        j                  |j                        }|j                          t        j
                  j                  j                  ||j                         d}t        j
                  j                  j                  ||j                  |       t
        j                  j                  |       t
        j                  j                  |       t
        j                  j                  |       t        j
                  j                  j                  |       t         j"                  r+t         j"                  j%                  d||j                         d}t        j
                  j                  j'                  ||j                  |       t
        j(                  j+                  |       t
        j(                  j-                  |       t
        j                  j/                  |       t
        j                  j1                  |       t
        j(                  j+                  |       t
        j(                  j-                  |       t
        j(                  j3                  |       |j4                  j7                         dk7  r?t        j
                  j                  j9                  ||j                  |j:                         t         j"                  r+t         j"                  j%                  d||j                         t<        j>                  j@                  s9t<        j>                  jB                  st
        jD                  jG                  |       t        j
                  j                  jI                  ||       |jK                  |d       t<        j>                  jB                  rt<        j>                  j@                  s`t        j                  |j                        }|j                          t
        jD                  jG                  |       |jK                  |d       t        j                  |j                        }|j                          t
        jD                  jM                  |       |jK                  |d       tO        jP                          tO        j                         }tO        jR                  ||      }t	        jT                  |       d	}	t<        j>                  jV                  rd
}	tO        jX                  |t        jZ                  |j                  |	       t	        j\                  ||j                         t	        j^                  |d       t	        j`                  |dd       t	        j`                  |dd       t	        j`                  |dd       t	        j`                  |d|jb                  dk(         |je                         D 
cg c]  }
|
jg                         r|
 }}
|d   ji                  t        jj                         |d   jm                  dd|jn                  |jb                  z          d|j4                  jq                  d      v r|d   jm                  dd       |d   jm                  dd       |d   jm                  d|jr                   d|jr                          |jt                  rdnd}|d   jm                  d|       t<        j>                  jV                  r'|d   jw                  d
       |d   jy                          t	        jz                  |d          t<        j>                  jV                  r\t}        t~              j                  dz  }t        |d z        t        |d!z        t        |d"z        g}tO        j                  ||       ne|j                  rY|j                  D cg c]  \  }}t	        j                  ||      s|  }}}t        |      dkD  rtO        j                  ||       tO        j                  |tN        j                  |j                  d	g |j                         t	        j                  |j                        r<|d   j                  d#       |d   j                  d$       |d   j                  d%       t<        j                  j                  rt	        j                  |d          | j                  d&      |d'<   | j                  d(      xs d|d)<   | j                  d*      xs d+|d,<   t	        j                  |       t	        j                  |       t        |      S c c}
w c c}}w )-Nr   ttgpuir_to_llvmirTrI   llvmir_to_llvm	make_llirzmake_llir.disable_line_infoz,make_llir.dump_ir_extract_di_local_variablesrG   +xnacki  __oclc_finite_only_optF__oclc_correctly_rounded_sqrt32__oclc_unsafe_math_opt__oclc_wavefrontsize64rP   zamdgpu-flat-work-group-sizez1,zmemory-bound-attentionr   zamdgpu-sched-strategyziterative-ilpzuniform-work-group-sizetruezamdgpu-waves-per-euz, zpreserve-signr:   zdenormal-fp-math-f32rR   z
asanrtl.bczocml.bczockl.bczamdgpu-no-workgroup-id-xzamdgpu-no-workgroup-id-yzamdgpu-no-workgroup-id-zz
ttg.sharedr   zttg.profile_scratch_memory_sizeprofile_scratch_sizez$ttg.profile_scratch_memory_alignmentr   profile_scratch_align)Pr   r   r   r   r	   r   r   add_update_async_wait_countr!   add_optimize_lds_usageconvertadd_scf_to_cfr   r   add_index_to_llvmiradd_allocate_shared_memoryrx   r   patchadd_to_llvmirr   r   r   add_cf_to_llvmiradd_arith_to_llvmirr   rJ   r   lower_instruction_sched_hintsr/   r
   compilationdisable_line_info"dump_ir_extract_di_local_variablesllvmiradd_di_scopeadd_builtin_func_to_llvmirr   add_di_local_variabler   init_targets	to_moduleattach_target_tripleenable_asanattach_datalayoutTARGET_TRIPLEset_isa_versionset_abi_versionset_bool_control_constantrQ   get_functionsis_declarationset_calling_convCALLING_CONV_AMDGPU_KERNELadd_fn_attrr,   r   r-   rC   add_fn_target_featureadd_fn_asan_attrset_all_fn_arg_inregr   rZ   r[   r]   link_extern_libsr1   need_extern_liblenoptimize_moduleOPTIMIZE_O3r?   has_architected_sgprsremove_fn_attrscalarize_packed_fops#add_scalarize_packed_fops_llvm_passget_int_attrcleanup_bitcode_metadatadisable_print_inline)r   r   r   r   r   custom_lds_size_HIPBackend__HIP_FTZr   llvm_modtarget_featuresfnfnsdenormal_moderb   pathsrn   paths                    r   r   zHIPBackend.make_llir  s   __S[[)


66r7<<H 

11"gllOT$$R(  $**2.

55b9%%&&,,-@"ckkR 	

((W\\9E''+b!''+**2.''+b!$$R(  &&(F2JJ<<RwOaOab %%&&,,-=r3;;O  225;L;L;o;oMM&&r*

55b)D
sK ??$$66__S[[1!**2.s9: -BOOMM//3FF3FG 	,,.>>#w/  *((&Ox):):GLL/Z 	Hgll3Hc*%%h0H%P%%h0QSWX%%h0H%P%%h0H'J[J[_aJab %224PbB<M<M<OrPPA > >?A8Bw?P?PQXQbQb?b>c:de#w'<'<'B'B3'GGF6HA4f= 	A0W5I5I4J"WMaMaLb2cd+2+E+E6A1=A((F((2F##%
 	  Q(((!(^22U:NN\12NY./NY./E
 !!(E2  .5.A.AiltTSEXEXYacgEhTiEi5zA~%%h6Xt'7'7r2wOgOgh $$W\\2F!!"<=F!!"<=F!!"<=99**33CF; !--l;+.+;+;<]+^+cbc'(,/,<,<=c,d,ihi()$$X. 	  *8}A QJ js   e0ee*ec           	         t        j                  d|       }t        |      dk(  sJ |d   |d<   g }d|j                  v rdnd}t	        j
                  | j                  d            j                         }|d   d	z   |z   }t        j                  | t        j                  |j                  |||j                  |      }t        j                  | t        j                  |j                  |||j                  |       t        j                  | t        j                  |j                  |||j                  d
      }	t        j                  j                   rt#        d       t#        |	       |	S )Nz3define amdgpu_kernel void @([a-zA-Z_][a-zA-Z0-9_]*)r   r   rn   gfx11z-real-true16rG   rg   re   Fz!// -----// AMDGCN Dump //----- //)refindallr-  r!   rj   rk   rl   rm   r   translate_to_mirr	   r  r?   dump_sched_dagtranslate_to_asmr
   dump_amdgcnprint)
r   r   r   namesflagsfeaturesir_hashdump_file_idre   amdgcns
             r   make_amdgcnzHIPBackend.make_amdgcn  s/   
 

QSVW5zQ 8%,%<>"..G!45??AQx#~/!!#s'8'8',,RWY`YqYq".0C!2!2GLL(ESZSkSk(	*&&sC,=,=w||XW\^e^v^v',.99  56&Mr   c                 l   d}t         j                  j                  rd}t        j                  | |j
                  |      }t        j                         5 }t        j                         5 }t        |j                  d      5 }|j                  |       d d d        t        j                  |j                  |j                         d d d        t        |j                  d      5 }|j                         }	d d d        d d d        	S # 1 sw Y   txY w# 1 sw Y   NxY w# 1 sw Y   +xY w# 1 sw Y   	S xY w)NrG   r   wbrb)r
   r  r  r	   assemble_amdgcnr!   tempfileNamedTemporaryFileopenrn   write
link_hsacoread)
r   r   r   r:  r~   tmp_outtmp_infd_infd_outr   s
             r   
make_hsacozHIPBackend.make_hsaco  s    ((&O##CG((* 	$g,,. :&&++t, 'KK&'v{{GLL9: gllD) $Vkkm$	$ 
' ': :$ $	$ 
sT   D))D D2DD)"D3D)D
DD	D)D&	"D))D3c                 >    |t         j                  k(  r fd|d<    fd|d<   n|t         j                  k(  r	 fd|d<    fd|d<    fd|d	<    fd
|d<   t        j                  j
                  $t        j                  j                   ||d        y y )Nc                 *    j                  | |      S r   )r   r   r   r   r`   s     r   r   z'HIPBackend.add_stages.<locals>.<lambda>  s    4>>#xQX3Y r   r   c                 *    j                  | |      S r   )r   ra  s     r   r   z'HIPBackend.add_stages.<locals>.<lambda>  s    DOOCSZ4[ r   ttgirc                 *    j                  | |      S r   )r   ra  s     r   r   z'HIPBackend.add_stages.<locals>.<lambda>  s    D4G4GXW^4_ r   c                 *    j                  | |      S r   )r   ra  s     r   r   z'HIPBackend.add_stages.<locals>.<lambda>  s    t~~c8W/U r   llirc                 *    j                  | |      S r   )rO  ra  s     r   r   z'HIPBackend.add_stages.<locals>.<lambda>  s    1A1A#xQX1Y r   rN  c                 *    j                  | |      S r   )r^  ra  s     r   r   z'HIPBackend.add_stages.<locals>.<lambda>  s    Xw0W r   r~   )r   TRITONGLUONr
   r   add_stages_inspection_hook)r`   stagesr   r   s   ` ` r   
add_stageszHIPBackend.add_stages  s    x&YF6N[F7O'_F7OUvYxWw==33?MM44T67HVZ[ @r   c                     | j                    S r   r   )r`   s    r   rq   zHIPBackend.hash  s    ++r   )!rr   rs   rt   r   %supports_native_tensor_specializationstaticmethodr   r{   r   r]   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rO  r^  rm  	functools	lru_cacherq   __classcell__)r   s   @r   rx   rx   f   sS   O,1)'	 ' '"y "T "
%# %"S "2
?>S*_ 5 >
:
          < <|    Z Zx  .  
\ Y   r   rx   )triton.backends.compilerr   r   r   triton._C.libtritonr   r   r   r	   tritonr
   dataclassesr   typingr   r   r   typesr   rj   rT  rB  rq  rX   pathlibr   r   r#   r'   r*   rx   r   r   r   <module>r{     sz    E E 5 5  ! # #    	   0Y 0X
r $D? D? D?NI  I r   