
    *i`"                     "   d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
mZmZmZ d dlZd dlmZ d dlmZmZmZ g dZg dZeez   Zg dZed	gz   Zeez   Zed	gz   Zd
dgZdgez   dgz   ez   d	gz   Z e ee      h dz
        Z d Z!d Z"d Z#d Z$d Z%d Z&d Z'd Z(d Z)d Z*d Z+d Z,d Z-d Z.d Z/d Z0d Z1d  Z2d! Z3d5d"ee   fd#Z4d6d$ejj                  d%eeejl                  f   fd&Z7d$e8d%ejr                  fd'Z:d%e8fd(Z;d) Z<d7d*Z=d+ Z>d7d,Z?ej                  j                   e=         e?       -      ZBd.eCd/eCfd0ZDd1eejl                  ej                  j                  j$                  f   d%ejl                  fd2ZGd6d3eee8      fd4ZHy)8    Nknobs)OptionalSetUnion)RandomState)TensorWrapperreinterprettype_canonicalisation_dict)int8int16int32int64)uint8uint16uint32uint64)float16float32float64bfloat16float8_e4m3fnfloat8_e5m2boolr   >   r   r   r   c                  H    t         j                  j                  dd      dk(  S )NTRITON_INTERPRET01)osenvironget     b/home/obispo/Crisostomo_bridge/mision_env/lib/python3.12/site-packages/triton/_internal_testing.pyis_interpreterr%      s    ::>>,c2c99r#   c                  |    t               ry t        j                  j                  j                  j                         S N)r%   tritonruntimedriveractiveget_current_targetr"   r#   r$   r,   r,      s*    >>  ''::<<r#   c                  <    t               } | dS | j                  dk(  S )NFcudar,   backendtargets    r$   is_cudar3   $   s"    !FN5@&(@@r#   c                  b    t               xr$ t        j                  j                         d   dk\  S )Nr      r3   torchr.   get_device_capabilityr"   r#   r$   is_ampere_or_newerr9   )   &    9C99;A>!CCr#   c                  b    t               xr$ t        j                  j                         d   dk(  S )Nr   
   r6   r"   r#   r$   is_blackwellr=   -   &    9D99;A>"DDr#   c                  b    t               xr$ t        j                  j                         d   dk\  S Nr   	   r6   r"   r#   r$   is_hopper_or_newerrB   1   r:   r#   c                  b    t               xr$ t        j                  j                         d   dk(  S r@   r6   r"   r#   r$   	is_hopperrD   5   r:   r#   c                  b    t               xr$ t        j                  j                         d   dk(  S )Nr      r6   r"   r#   r$   is_sm12xrG   9   r>   r#   c                  <    t               } | dS | j                  dk(  S )NFhipr/   r1   s    r$   is_hiprJ   =   "    !FN5?%(??r#   c                  b    t               } | d uxr  | j                  dk(  xr | j                  dk(  S )NrI   gfx90ar,   r0   archr1   s    r$   is_hip_cdna2rP   B   1    !FU&..E"9UfkkX>UUr#   c                  b    t               } | d uxr  | j                  dk(  xr | j                  dk(  S )NrI   gfx942rN   r1   s    r$   is_hip_cdna3rT   G   rQ   r#   c                  b    t               } | d uxr  | j                  dk(  xr | j                  dk(  S )NrI   gfx950rN   r1   s    r$   is_hip_cdna4rW   L   rQ   r#   c                  `    t               } | d uxr | j                  dk(  xr d| j                  v S )NrI   gfx11rN   r1   s    r$   is_hip_gfx11rZ   Q   1    !FT&..E"9Tg>TTr#   c                  `    t               } | d uxr | j                  dk(  xr d| j                  v S )NrI   gfx12rN   r1   s    r$   is_hip_gfx12r^   V   r[   r#   c                  `    t               } | d uxr | j                  dk(  xr d| j                  v S )NrI   gfx1250rN   r1   s    r$   is_hip_gfx1250ra   [   s1    !FV&..E"9Vi6;;>VVr#   c                  F    t               xs t               xs
 t               S r'   )rP   rT   rW   r"   r#   r$   is_hip_cdnarc   `   s    >=\^=|~=r#   c                      t               rdS dS )Ni  i   )rW   r"   r#   r$   get_hip_lds_sizere   d   s    !^6..r#   c                  <    t               } | dS | j                  dk(  S )NFxpur/   r1   s    r$   is_xpurh   h   rK   r#   c                  H    t               } | dS t        | j                        S )N )r,   strrO   r1   s    r$   get_archrl   m   s"    !F25S%55r#   rsc                 F   t        | t              r| f} |t        d      }|t        t        z   v rt        j                  t        t
        |            }||j                  nt        ||j                        }||j                  nt        ||j                        }t        t
        |      }|j                  ||| |      }d||dk(  <   |S |r)d|v r%|j                  dd| t
        j                        }|S |t        v r"|j                  dd|       j                  |      S |d	k(  rV|j                  dd|       j                  d
      j                  d      t        j                   d      z  j                  d
      S |dv r|j                  dd|       dkD  S t#        d|       )zp
    Override `rs` if you're calling this function twice and don't want the same
    result for both calls.
       )seed)dtype   r   float8   (   r   r   r   l      )r   int1bool_g        zUnknown dtype )
isinstanceintr   
int_dtypesuint_dtypesnpiinfogetattrminmaxrandintr   float_dtypesnormalastypeviewr   RuntimeError)shape	dtype_strrm   lowhighr}   rq   xs           r$   numpy_randomr   r   s|   
 %		zb!J,,Y/0;eiiCUYY,? Luyyc$		.BI&JJsD%uJ5!q&		x9,JJr2uBGGJ4	l	"yyAu%,,Y77	j	 		!Q&--i8==hG"))T^J__eefopp	/	/yyAu%++^I;788r#   r   returnc                    | j                   j                  }|t        v r_|j                  d      }| j	                  t        t        |            }t        t        j                  ||      t        t        |            S |r3d|v r/t        t        j                  | |      t        t        |            S |dk(  r*|dk(  r%t        j                  | |      j                         S t        j                  | |      S )z
    Note: We need dst_type because the type of x can be different from dst_type.
          For example: x is of type `float32`, dst_type is `bfloat16`.
          If dst_type is None, we infer dst_type from x.
    u)devicers   r   r   )rq   namer{   lstripr   r~   r|   r
   r7   tensortlr   )r   r   dst_typetsigned_type_namex_signeds         r$   	to_tritonr      s     	
AK88C=88GB(89:5<<@'"a.QQH,u||Af=wr8?TUU	>h*4<<&1::<<||Af--r#   c                 <    t        j                  t        |    d       S r'   )r   	str_to_tyr   r   s    r$   str_to_triton_dtyper      s    <<215t<<r#   c                 $   t        | t        j                  j                        r| j                  S t        | t
        j                        r0t        j                  dt        |             }|j                  d      S t        dt        |              )Nz^torch\.(\w+)$rr   znot a triton or torch dtype: )rx   r(   languagerq   r   r7   rematchrk   group	TypeErrortype)rq   ms     r$   torch_dtype_namer      sh    %../zz	E5;;	'HH&E
3wwqz7U}EFFr#   c                    t        | t              rX| j                  j                         j	                         j                  t        t        t        | j                                    S t        | t        j                        rf| j                  t        j                  u r,| j                         j                         j	                         S | j                         j	                         S t        d|        )Nz Not a triton-compatible tensor: )rx   r	   basecpunumpyr   r~   r|   r   rq   r7   Tensorr   float
ValueErrorr   s    r$   to_numpyr      s    !]#vvzz|!!#**727G7P+QRR	Au||	$77enn$557==?((**uuw}};A3?@@r#   c                 R   t               ryt               syt        j                  j                  j
                  }| rdnd}t        t        t        |j                  d                  }t        |      dk(  sJ |       t        j                  j                         d   dk\  xr ||k\  S )	NTF)rF   r   )rF      .   r   rA   )r%   r3   r   nvidiaptxasversiontuplemapry   splitlenr7   r.   r8   )
byval_onlycuda_versionmin_cuda_versioncuda_version_tuples       r$   supports_tmar      s    9<<%%--L",w's3(:(:3(?@A!"a';);;'::++-a0A5`:LP`:``r#   c                  v    t               ryt               syt        j                  j	                         d   dk\  S )NTFr   rA   )r%   r3   r7   r.   r8   r"   r#   r$   supports_wsr      s0    9::++-a0A55r#   c                 
    | ryy)NzURequires __grid_constant__ TMA support (NVIDIA Hopper or higher, CUDA 12.0 or higher)zLRequires advanced TMA support (NVIDIA Hopper or higher, CUDA 12.3 or higher)r"   )r   s    r$   tma_skip_msgr      s    f]r#   )reasonsizealignc                 N    t        j                  | t         j                  d      S )Nr.   )rq   r   )r7   emptyr   )r   r   _s      r$   default_alloc_fnr      s    ;;t5::f==r#   r   c                 z    t        | t        j                  j                  j                        r| j
                  S | S r'   )rx   r(   r)   jitr	   r   )r   s    r$   unwrap_tensorr      s*    !V^^''556vvHr#   skipped_attrc                 X  	 ddl m | 
t               } t        j                         j
                  j                         D ci c]1  \  }}t        |j                        r|j                  k7  r|| vr||3 c}}g j                  	fd}	fd}||fS c c}}w )Nr   r   c                     j                         D ]  \  } }t        | |j                         j                                |j                  j                         D ]W  }|j                  t        j                  v rj                  |j                  d       =j                  |j                         Y  d_        S )NF)raisingT)itemssetattrcopyresetknob_descriptorsvalueskeyr   r    delenvappendpropagate_env)r   knobsetknobenv_to_unsetr   	knobs_mapmonkeypatchs      r$   fresh_functionz)_fresh_knobs_impl.<locals>.fresh_function   s    &__. 	2MD'E4!5!5!7800779 288rzz)&&txx&? ''1	2	2 #r#   c                      j                         D ]  \  } }t        | |        j                          D ]&  }|t        j                  v st        j                  |= ( _        y r'   )r   r   undor   r    r   )r   r   kr   r   r   r   prev_propagate_envs      r$   reset_functionz)_fresh_knobs_impl.<locals>.reset_function  sf    &__. 	*MD'E4)	* 	 	"ABJJJJqM	" 1r#   )
r(   r   setpytestMonkeyPatch__dict__r   rx   
base_knobsr   )
r   r   r   r   r   r   r   r   r   r   s
        @@@@@r$   _fresh_knobs_implr      s    u$$&K #^^113D'gu//0W@P@P5PUYamUm 	gI L,,
	1 	1 >))Gs   	6B&)NNNr'   )F)Ir   r   r   r|   r7   r(   triton.languager   r   r   typingr   r   r   r   numpy.randomr   triton.runtime.jitr	   r
   r   rz   r{   integral_dtypesr   float_dtypes_with_bfloat16dtypesdtypes_with_bfloat16torch_float8_dtypestorch_dtypessortedr   
tma_dtypesr%   r,   r3   r9   r=   rB   rD   rG   rJ   rP   rT   rW   rZ   r^   ra   rc   re   rh   rl   r   ndarrayr   r   rk   rq   r   r   r   r   r   r   markskipifrequires_tmary   r   r)   r   r   r   r"   r#   r$   <module>r      s   	 	      ' '  $ U U0
5{*0)ZL8 	<	', &6 x*$y0<?:,NC,-0NNO
:=A
DEDDE@
V
V
V
U
U
W
>/@
6
9x'< 9<. .u]ELL=X7Y .&=3 =288 =Gs GA	a6^ {{!!ln"4\^!L>3 >s >U5<<););)I)IIJ u|| +*HSX$6 +*r#   