
    -i1                       d dl mZ d dlZd dlZd dlZd dlmZmZmZm	Z	m
Z
 d dlZd dlZd dlZd dlmZ d dlZd dlmZ d dlmZ d dlmZ d dlmZ dd	lmZ d d
lmZ ddlmZ  ddlm!Z"  e
d      Z#e G d d             Z$ G d d      Z% G d d      Z& ed       G d d             Z'd Z(d Z)d Z*d Z+d Z,d Z- ej\                  e,ej^                  g      Z0 ej\                  e,ejb                  g      Z2 ej\                  e-ejf                  g      Z4 G d  d!      Z5 G d" d#      Z6 e7       Z8 G d$ d%      Z9d>d&Z:d>d'Z;d>d(Z< G d) d*      Z= G d+ d,e=      Z> G d- d.e=      Z?d>d/Z@d>d0ZAd1 ZBd2 ZCd3 ZD e6       ZE eeE      ZFd4 ZGd5 ZH G d6 d7      ZI G d8 d9ej                        ZK G d: d;      ZL G d< d=ee#         ZMy)?    )annotationsN)TupleListDictCallableTypeVar)	dataclass)TritonSemantic)KernelInterface)TensorDescriptor   )InterpreterError)partial   )interpreter)irTc                  t    e Zd ZU dZded<   ded<    ej                  e      Zded<   d	 Z	d
 Z
d Zd Zd Zy)TensorHandlez
        data: numpy array
        dtype: triton type, either pointer_type or scalar_type.
        we don't store block_type here because the shape information is already available in the data field
        attr: a dictionary of attributes
    znp.arraydataztl.dtypedtype)default_factoryr   attrc                    t        | j                  | j                        sIt        d| j                  j                  dz   d| j                  j
                   d| j                         y )Nznumpy data itemsize (   z) bits) exceeds dtype primitive_bitwidth (z bits) for triton type )_validate_np_data_sizer   r   
ValueErroritemsizeprimitive_bitwidthselfs    d/home/obispo/Crisostomo_bridge/mision_env/lib/python3.12/site-packages/triton/runtime/interpreter.py__post_init__zTensorHandle.__post_init__&   sj    %dii<4TYY5G5G!5K4L M!!%!>!> ??VW[WaWaVbd e e =    c                H    t        | j                  j                               S N)boolr   allr    s    r"   __bool__zTensorHandle.__bool__+   s    DIIMMO$$r$   c                h    | j                   }t        |d      r|j                  }t        |d      r|S )N
element_ty)r   hasattrr+   )r!   r   s     r"   get_element_tyzTensorHandle.get_element_ty.   s1    

e\*$$E e\*r$   c                ^    t        | j                  j                         | j                        S r&   )r   r   copyr   r    s    r"   clonezTensorHandle.clone4   s    DIINN,djj99r$   c                "    || j                   |<   y r&   )r   )r!   keyvalues      r"   set_attrzTensorHandle.set_attr7   s    		#r$   N)__name__
__module____qualname____doc____annotations__dataclassesfielddictr   r#   r)   r-   r0   r4    r$   r"   r   r      sD     NO"""48D$8e
%:r$   r   c                      e Zd Zd Zd Zy)BlockPointerHandlec                X    || _         || _        || _        || _        || _        || _        y r&   )baseshapestridesoffsetsblock_shapeorder)r!   rA   rB   rC   rD   rE   rF   s          r"   __init__zBlockPointerHandle.__init__=   s-    	
&
r$   c                f   | j                   j                         }|j                  dz  }t        j                  | j                   j
                  | j                        }t        j                  | j                  t              }t        t        | j                              D ]  }dgt        | j                        z  }| j                  |   ||<   | j                  |   j
                  t        j                  | j                  |         z   j                  |      }|||z  | j                  |   j
                  z  j                  t        j                         z   }||v s||| j"                  |   j
                  k  z  |dk\  z  } t%        || j                   j&                  j(                        }||fS )Nr   r   r   r   )rA   r-   r   npbroadcast_tor   rE   onesr'   rangelenrD   arangereshaperC   astypeuint64rB   r   r   scalar)	r!   boundary_checkdtype_ttn_bytesptrsmasksdim
bcast_dimsoffs	            r"   materialize_pointersz'BlockPointerHandle.materialize_pointersE   sb   99++---2tyy~~t/?/?@((5T--./ 	JCs4#3#344J"..s3JsO<<$))BIId6F6Fs6K,LLUUV`aC7S=4<<+<+A+AAII"))TTDn$tzz#';';!;<qI	J D$))//"8"89U{r$   N)r5   r6   r7   rG   r\   r=   r$   r"   r?   r?   ;   s    r$   r?   c                  &    e Zd Z	 	 ddZd ZddZy)TensorDescHandlec                j    || _         t        |      | _        || _        || _        || _        || _        y r&   )rA   rN   ndimrB   rC   rE   padding)r!   rA   rB   rC   rE   ra   s         r"   rG   zTensorDescHandle.__init__W   s1    	J	
&r$   c                b   | j                   j                  j                         dz  dk(  sJ d       t        | j                        | j
                  k(  sJ t        | j                        | j
                  k(  sJ | j
                  dk\  sJ d       | j                   j                  j                  }|j                  dz  }| j                  d d D ].  }|j                  j                         |z  }|dz  dk(  r)J d        | j                  d   j                  j                         dk(  sJ d	       y )
N   r   zbase must be 16-byte alignedr   z"descriptor cannot be 0 dimensionalr   zstride must be 16-byte alignedzlast dim must be contiguous)
rA   r   itemrN   rC   r`   rE   r   r+   r   )r!   	scalar_tyr   stridebyte_strides        r"   validatezTensorDescHandle.validate`   s   yy~~""$r)Q.N0NN.4<< DII---4##$		111yyA~CCC~IIOO..	//14ll3B' 	KF ++**,x7K#q(J*JJ(	K ||B$$))+q0O2OO0r$   c                   t        |      | j                  k(  sJ | j                  j                  j                  }|j
                  dz  }|d   j                  |z  dz  dk(  sJ d       t        j                  | j                  j                  | j                        }t        j                  | j                  t              }t        t        | j                              D ]  }dgt        | j                        z  }| j                  |   ||<   ||   j                  t        j                  | j                  |         z   j                  |      }|||z  | j                  |   j                  z  j!                  t        j"                        z   }|d|k  z  || j$                  |   j                  k  z  } |j                  t        j"                  k(  sJ t'        || j                  j                  j(                        }||fS )Nr   rd   rc   r   z*block offset start must be 16-byte alignedrI   r   )rN   r`   rA   r   r+   r   r   rJ   rK   rE   rL   r'   rM   rO   rP   rC   rQ   rR   rB   r   rS   )	r!   rD   rf   r   rW   rX   rY   rZ   r[   s	            r"   r\   z%TensorDescHandle.materialize_pointersm   s   7|tyy(((IIOO..	//14  8+r1Q6d8dd6tyy~~t/?/?@((5T--./ 	FCs4#3#344J"..s3JsO3<$$ryy1A1A#1F'GGPPQ[\C8c>DLL,=,B,BBJJ299UUDQ#X&#

30D0D*DEE	F zzRYY&&&D$))//"8"89U{r$   N)rA   r   rB   List[TensorHandle]rC   rk   rE   	List[int])rD   rk   )r5   r6   r7   rG   ri   r\   r=   r$   r"   r^   r^   U   s    'Pr$   r^   T)frozenc                      e Zd ZU dZded<   dZded<   dZded<   dZd	ed
<   dZded<   dZ	ded<   dZ
d	ed<   dZded<   dZded<   dZd	ed<   y)InterpreterOptionsNr<   extern_libsFr'   debugTsanitize_overflowstrarch)fp8e5fp8e5b16fp8e4nvfp8e4b8fp8e4b15z
Tuple[str]supported_fp8_dtypesr=   !deprecated_fp8_dot_operand_dtypestf32default_dot_input_precision)r|   tf32x3ieeeallowed_dot_input_precisionsr   intmax_num_imprecise_acc_defaultr   backend_name)r5   r6   r7   rp   r9   rq   rr   rt   rz   r{   r}   r   r   r   r=   r$   r"   ro   ro      sl    KE4"t"D#'^*^46%z6'--/I *I)*!3*%L#%r$   ro   c                    t        |t        j                        ry| j                  dz  }|j                  }|dk  rd}||kD  ryy)NTr   F)
isinstancetlpointer_typer   r   )np_arraytl_dtypenp_dtype_bitwidthtl_dtype_bitwidths       r"   r   r      sO    (BOO, ))A- 33 1,,r$   c                   | t         j                  k(  rt         j                  S | t         j                  k(  rt         j                  S | t         j
                  k(  rt         j                  S | t         j                  k(  rt         j                  S | S r&   )	rJ   uint8int8uint16int16uint32int32rR   int64rI   s    r"   _get_signed_np_dtyper      s[    ww		xx		xx		xxLr$   c                `   t        | t        j                        r#t        j                  t        j
                        S i t        j                  t        j                  t              t        j                  t        j                  t        j                        t        j                  t        j                  t        j                        t        j                  t        j                  t        j                        t        j                  t        j                  t        j                        t        j                  t        j                  t        j                        t        j                  t        j                  t        j                        t        j                  t        j                  t        j                        t        j                  t        j                  t        j                        t        j                   t        j                  t        j                         t        j"                  t        j                  t        j"                        t        j
                  t        j                  t        j
                        t        j$                  t        j                  t        j                        t        j&                  t        j                  t        j                        t        j(                  t        j                  t        j                        t        j*                  t        j                  t        j                        t        j,                  t        j                  t        j                        t        j.                  t        j                  t        j                        i}t        | t        j0                        rVt        | j2                  t        j                        r#t        j                  t        j
                        S || j2                     S ||    S r&   )r   r   r   rJ   r   rR   int1r'   float16float32float64r   r   r   r   r   r   r   bfloat16float8e5float8e5b16
float8e4nv
float8e4b8float8e4b15
block_typer+   )tt_dtypenp_typess     r"   _get_np_dtyper      s~   (BOO,xx		""
$


BHHRZZ( 	

BHHRZZ( 	

BHHRZZ(	
 	"''" 	"((288$ 	"((288$ 			288BII& 	"((288$ 			288BII& 	"((288$ 			288BII& 	RXXbii(  	RXXbhh'!" 	*#$ 	rxx)%& 	rxx)'( 	*)H, (BMM*h))2??;88BII&&++,,Hr$   c                   t        t        d|j                         }t        t        d|j                         }t        j                  | j	                         |      }||j                  dz
  z	  dz  }|j                  |j
                  z
  dz
  }|j                  |j
                  z
  dz
  }	|d|j
                  z  dz
  z  }
|j                  }|j                  }||j
                  z	  d|z  dz
  z  j                  t        j                        }|dk(  }t        j                  |      rt        j                  |t        j                        }t        |j
                        D ]  }|
|z	  dz  }|j
                  |z
  ||dk(  <   ! |
dk(  }d||   z
  ||<   ||z
  |||z  <   |
|   ||   z  d|j
                  z  dz
  z  |
|<   t        j                  dt        j                  ||z
  |z   d|	z  dz
              }|j                  |      }|j                  |      }|j                  |j                  kD  r|
|j
                  |j
                  z
  z	  d|j
                  z  dz
  z  }|t        j                  j                   k(  r*|
d|j
                  |j
                  z
  dz
  z  z  }||dkD  z   }|j                  |      }n>|
j                  |      |j
                  |j
                  z
  z  d|j
                  z  dz
  z  }|dk(  }t        j                  |      r||j
                  z	  d|z  dz
  z  j                  t        j                        }|dk7  }||z  }t        j                  |t        j                        }d|z
  ||   |z
  z
  ||<   ||   ||   z	  d|j
                  ||   z
  z  z  ||<   ||j                  dz
  z  ||j
                  z  z  |z  }|j#                  | j$                        S )NuintrI   r   r   )getattrrJ   r   
frombuffertobytesfp_mantissa_widthexponent_biasrQ   r   any
zeros_likerM   maximumminimum_irROUNDING_MODERTNErP   rB   )inputinput_dtypeoutput_dtyperounding_modeinput_uint_dtypeoutput_unint_dtype	input_binsigninput_exponent_widthoutput_exponent_widthsignificand
bias_inputbias_outputexponentsubnormal_indexbit_posi	bit_indexzero_significand_indexexponent_outputsign_outputsignificand_outputcut_offnon_zero_exponent_indexshiftoutputs                             r"   _convert_floatr      s+   rT+*H*H)I#JK tL,K,K+L%MNemmo5EFI+881<=ED&99K<Y<YY\]](;;l>\>\\_``[%B%B BaGHK**J,,Kk;;;FZAZ^_@_`hhikiqiqrH!mO	vvo
 --	:{445 	HA%*d2I&1&C&Ca&GGIN#	H "-!1$%(@$@!=G+=U'/9:(3O(DP_H`(`+///14(6O$ jjBJJ:0E0SWX\qWquvVv$wxO%,,-?@O++01K%%(G(GG)k.K.KlNlNl.lm,000A57C--222!Q;+H+H<KiKi+ilm+m%noG!3w{!C/667IJ)001CD+==@]@]]_#$(F(F#F!"KM &*O	vvo
 +"?"??QJ^E^bcDcdllmomumuv"*a-),CCirxx8"#k/h6OR\6\!]o/A//RV[\kVl/l,0053IIJ/L?+l==AB<999;=OPF>>%++&&r$   c                ,    t        j                  |       S r&   )matherfxs    r"   _erfr   
  s    88A;r$   c                6    t        |       t        |      z  dz	  S )N@   )r   )abs     r"   
_umulhi_64r     s     FSVO""r$   )otypesc                      e Zd Zed        Zy)ExtraFunctionsc                x    t        j                  |j                  j                  | j                  ||      |      S r&   )r   tensorbuildercreate_fp_to_fphandle)r   dst_tyfp_downcast_rounding	_semantics       r"   _convert_custom_typesz$ExtraFunctions._convert_custom_types  s.    yy**::5<<Qefhnoor$   N)r5   r6   r7   staticmethodr   r=   r$   r"   r   r     s    p pr$   r   c                  t   e Zd Zej                  j
                  ej                  j
                  ej                  j                  ej                  j                  ej                  j                  ej                  j                  ej                  j                  ej                  j                  iZ
ej                  j                  ej                  j                  ej                  j                  ej                  j                  ej                  j                  ej                  j                  ej                  j                   ej                  j                   ej                  j"                  ej                  j"                  ej                  j$                  ej                  j$                  ej                  j&                  ej                  j&                  ej                  j(                  ej                  j(                  ej                  j*                  ej                  j*                  ej                  j,                  ej                  j,                  i
ZddZd Zd Zd Zd Zd Zd Zd Zd	 Z d
 Z!d Z"d Z#d Z$d Z%d Z&d Z'd Z(d Z)d Z*d Z+d Z,d Z-d Z.d Z/d Z0d Z1d Z2d Z3d Z4d Z5d Z6d  Z7d! Z8d" Z9d# Z:d$ Z;d% Z<d& Z=d' Z>d( Z?d) Z@d* ZAd+ ZBd, ZCd- ZDd. ZEd/ ZFd0 ZGd1 ZHd2 ZId3 ZJd4 ZKd5 ZLd6 ZMd7 ZNd8 ZOd9 ZPd: ZQd; ZRd< ZSd= ZTd> ZUd? ZVd@ ZWdA ZXdB ZYdC ZZdD Z[dE Z\dF Z]dG Z^dH Z_dI Z`dJ ZadK ZbdL ZcdM ZddN ZedO ZfdP ZgdQ ZhdR ZidS ZjdT ZkdU ZldV ZmdW ZndX ZodY ZpdZ Zqd[ Zrd\ Zsd] Ztd^ Zud_ Zvd` Zwda Zxdb Zydc Zzdd Z{de Z|eKZ}eKZ~df Zdg Zdh Zdi Zdj Zdk Zdl Zdm Zdn Zdo Zdp Zdq Zdr Zds Zdt Zdu Zdv Zdw Zdx Zdy Zdz Zd{ Zd| Zd} Zd~ Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Z	 d	 	 	 	 	 ddZddZddZddZ	 	 ddZd Zy)InterpreterBuilderc                    d | _         t               | _        i | _        t        j
                  | j                  d<   d | j                  d<   y )Nconvert_custom_typesc                     y)N)r   r   r   r=   )lhsTyperhsTypes     r"   <lambda>z-InterpreterBuilder.__init__.<locals>.<lambda>;  s    r$   min_dot_size)rt   ro   optionscodegen_fnsr   r   r    s    r"   rG   zInterpreterBuilder.__init__6  sB    	)+3A3W3W/0+M(r$   c                    || j                   d   k  st        d      || j                   d   k  st        d      || j                   d   k  st        d      |||f| _        y )Nr   zx >= grid_dim[0]r   zy >= grid_dim[1]r   zz >= grid_dim[2])grid_dimr   grid_idxr!   r   yzs       r"   set_grid_idxzInterpreterBuilder.set_grid_idx=  sf    4==##/004==##/004==##/00Aq	r$   c                    |||f| _         y r&   )r   )r!   nxnynzs       r"   set_grid_dimzInterpreterBuilder.set_grid_dimF  s    Rr$   c                "    t         j                  S r&   )r   r   r    s    r"   get_half_tyzInterpreterBuilder.get_half_tyK      zzr$   c                "    t         j                  S r&   )r   r   r    s    r"   get_bf16_tyzInterpreterBuilder.get_bf16_tyN      {{r$   c                "    t         j                  S r&   )r   r   r    s    r"   get_float_tyzInterpreterBuilder.get_float_tyQ  r   r$   c                "    t         j                  S r&   )r   r   r    s    r"   get_double_tyz InterpreterBuilder.get_double_tyT  r   r$   c                "    t         j                  S r&   )r   r   r    s    r"   get_int1_tyzInterpreterBuilder.get_int1_tyW      wwr$   c                "    t         j                  S r&   )r   r   r    s    r"   get_int8_tyzInterpreterBuilder.get_int8_tyZ  r  r$   c                "    t         j                  S r&   )r   r   r    s    r"   get_uint8_tyzInterpreterBuilder.get_uint8_ty]      xxr$   c                "    t         j                  S r&   )r   r   r    s    r"   get_int16_tyzInterpreterBuilder.get_int16_ty`  r  r$   c                "    t         j                  S r&   )r   r   r    s    r"   get_uint16_tyz InterpreterBuilder.get_uint16_tyc      yyr$   c                "    t         j                  S r&   )r   r   r    s    r"   get_int32_tyzInterpreterBuilder.get_int32_tyf  r  r$   c                "    t         j                  S r&   )r   r   r    s    r"   get_uint32_tyz InterpreterBuilder.get_uint32_tyi  r  r$   c                "    t         j                  S r&   )r   r   r    s    r"   get_int64_tyzInterpreterBuilder.get_int64_tyl  r  r$   c                "    t         j                  S r&   )r   rR   r    s    r"   get_uint64_tyz InterpreterBuilder.get_uint64_tyo  r  r$   c                "    t         j                  S r&   )r   r   r    s    r"   get_fp8e4nv_tyz!InterpreterBuilder.get_fp8e4nv_tyr      }}r$   c                "    t         j                  S r&   )r   r   r    s    r"   get_fp8e4b15_tyz"InterpreterBuilder.get_fp8e4b15_tyu      ~~r$   c                "    t         j                  S r&   )r   r   r    s    r"   get_fp8e4b8_tyz!InterpreterBuilder.get_fp8e4b8_tyx  r  r$   c                "    t         j                  S r&   )r   r   r    s    r"   get_fp8e5_tyzInterpreterBuilder.get_fp8e5_ty{  r  r$   c                "    t         j                  S r&   )r   r   r    s    r"   get_fp8e5b16_tyz"InterpreterBuilder.get_fp8e5b16_ty~  r   r$   c                .    t        j                  ||      S r&   )r   r   )r!   elt_ty
addr_spaces      r"   
get_ptr_tyzInterpreterBuilder.get_ptr_ty  s    vz22r$   c                .    t        j                  ||      S r&   )r   r   )r!   r   rB   s      r"   get_block_tyzInterpreterBuilder.get_block_ty  s    }}UE**r$   c                ~    t        t        j                  |gt        j                        t        j
                        S NrI   )r   rJ   arraybool_r   r   r!   r3   s     r"   get_int1zInterpreterBuilder.get_int1  s$    BHHeWBHH=rwwGGr$   c                ~    t        t        j                  |gt        j                        t        j                        S r.  )r   rJ   r/  r   r   r1  s     r"   	get_uint8zInterpreterBuilder.get_uint8  $    BHHeWBHH=rxxHHr$   c                ~    t        t        j                  |gt        j                        t        j                        S r.  )r   rJ   r/  r   r   r1  s     r"   get_int8zInterpreterBuilder.get_int8  s$    BHHeWBGG<bggFFr$   c                ~    t        t        j                  |gt        j                        t        j                        S r.  )r   rJ   r/  r   r   r1  s     r"   
get_uint16zInterpreterBuilder.get_uint16  $    BHHeWBII>		JJr$   c                ~    t        t        j                  |gt        j                        t        j                        S r.  )r   rJ   r/  r   r   r1  s     r"   	get_int16zInterpreterBuilder.get_int16  r5  r$   c                ~    t        t        j                  |gt        j                        t        j                        S r.  )r   rJ   r/  r   r   r1  s     r"   
get_uint32zInterpreterBuilder.get_uint32  r:  r$   c                ~    t        t        j                  |gt        j                        t        j                        S r.  )r   rJ   r/  r   r   r1  s     r"   	get_int32zInterpreterBuilder.get_int32  r5  r$   c                ~    t        t        j                  |gt        j                        t        j                        S r.  )r   rJ   r/  rR   r   r1  s     r"   
get_uint64zInterpreterBuilder.get_uint64  r:  r$   c                ~    t        t        j                  |gt        j                        t        j                        S r.  )r   rJ   r/  r   r   r1  s     r"   	get_int64zInterpreterBuilder.get_int64  r5  r$   c                ~    t        t        j                  |gt        j                        t        j                        S r.  )r   rJ   r/  r   r   r1  s     r"   get_fp16zInterpreterBuilder.get_fp16  $    BHHeWBJJ?LLr$   c                ~    t        t        j                  |gt        j                        t        j                        S r.  )r   rJ   r/  r   r   r1  s     r"   get_fp32zInterpreterBuilder.get_fp32  rG  r$   c                ~    t        t        j                  |gt        j                        t        j                        S r.  )r   rJ   r/  r   r   r1  s     r"   get_fp64zInterpreterBuilder.get_fp64  rG  r$   c                X    t        t        j                  dgt        |            |      S Nr   rI   )r   rJ   r/  r   )r!   types     r"   get_null_valuez!InterpreterBuilder.get_null_value  s!    BHHaSd0CDdKKr$   c                    | j                   t        d      t        t        j                  | j                   |   gt        j
                        t        j
                        S )Nzgrid_idx is NonerI   )r   r   r   rJ   r/  r   r   r!   axiss     r"   create_get_program_idz(InterpreterBuilder.create_get_program_id  sD    == /00BHHdmmD&9%:"((KRXXVVr$   c                    t        t        j                  | j                  |   gt        j                        t
        j                        S r.  )r   rJ   r/  r   r   r   rQ  s     r"   create_get_num_programsz*InterpreterBuilder.create_get_num_programs  s.    BHHdmmD&9%:"((KRXXVVr$   c                    t        t        j                  |j                  t              t
        j                        }d }| j                  ||||||      S r.  )r   rJ   	ones_liker   r'   r   r   create_masked_load)r!   ptr_0_1is_volatilemaskothers          r"   create_loadzInterpreterBuilder.create_load  sA    BLL>H&&sD%RMMr$   c                    t        t        j                  |j                  t              t
        j                        }| j                  |||d d       S r.  )r   rJ   rW  r   r'   r   r   create_masked_store)r!   rY  valrZ  r[  r]  s         r"   create_storezInterpreterBuilder.create_store  s:    BLL>H''S$dCCr$   c                   |j                         }t        |      }|+t        t        j                  |j
                  |      |      }t        j                  |j
                  |j
                  |j
                  |      }	t        |	|      S r.  )r-   r   r   rJ   r   r   _interpreterload)
r!   rW   r]  r^  cache_modifiereviction_policyr\  rU   dtype_nprets
             r"   rX  z%InterpreterBuilder.create_masked_load  si    &&( *= tyy!I8TE		499ejj(KC**r$   c                l    t        j                  |j                  |j                  |j                        S r&   )re  storer   )r!   rW   r3   r]  rg  rh  s         r"   ra  z&InterpreterBuilder.create_masked_store  s#    !!$))UZZCCr$   c                   |j                   j                  }|j                  }|t        j                  k(  r|t        j                  k(  s&|t        j                  k(  rY|t        j                  k(  rFt        |j                  ||d       j                  t        |            }t        ||j                        S t        |j                  j                  t        |            |j                        S r&   )r   rS   r   r   r   r   r   viewr   r   rQ   )r!   srcdst_typesrc_element_typedst_element_typer   s         r"   	cast_implzInterpreterBuilder.cast_impl  s    99++#??+0@BJJ0N

*/?2;;/N!#((,<>NPTUZZ[hiq[rsDhoo66h0G H(//ZZr$   c                &    | j                  ||      S r&   rs  r!   ro  rp  s      r"   r   zInterpreterBuilder.<lambda>      $..h2O r$   c                &    | j                  ||      S r&   ru  rv  s      r"   r   zInterpreterBuilder.<lambda>  rw  r$   c                &    | j                  ||      S r&   ru  rv  s      r"   r   zInterpreterBuilder.<lambda>  rw  r$   c                &    | j                  ||      S r&   ru  rv  s      r"   r   zInterpreterBuilder.<lambda>  rw  r$   c                &    | j                  ||      S r&   ru  rv  s      r"   r   zInterpreterBuilder.<lambda>  s    sH0M r$   c                &    | j                  ||      S r&   ru  rv  s      r"   r   zInterpreterBuilder.<lambda>  rw  r$   c                &    | j                  ||      S r&   ru  )r!   ro  rp  	is_signeds       r"   r   zInterpreterBuilder.<lambda>  s    T^^CQY=Z r$   c                    |j                   j                  }|j                  }t        |j                  |||      j	                  t        |            }t        ||j                        S r&   )r   rS   r   r   rn  r   r   )r!   ro  rp  r   rq  rr  r   s          r"   r   z"InterpreterBuilder.create_fp_to_fp  sU    99++#??chh(8:JMZ__`mnv`wxD(//22r$   c                r    t        |j                  j                  t        |            |j                        S r&   )r   r   rn  r   rS   rv  s      r"   create_bitcastz!InterpreterBuilder.create_bitcast  s%    CHHMM-*ABHOOTTr$   c                     ||j                   |j                         }|j                  j                  }t        ||      s|j	                  t        |            }t        ||      S r&   r   r   rS   r   rQ   r   r   )r!   lhsrhsopr   r   s         r"   	binary_opzInterpreterBuilder.binary_op  sO    CHHchh'99##%fh7]]=#:;FFH--r$   c                D    | j                  ||t        j                        S r&   r  rJ   addr!   r  r  s      r"   r   zInterpreterBuilder.<lambda>  s    S"&&)I r$   c                D    | j                  ||t        j                        S r&   r  rJ   multiplyr  s      r"   r   zInterpreterBuilder.<lambda>      S"++)N r$   c                D    | j                  ||t        j                        S r&   r  rJ   divider  s      r"   r   zInterpreterBuilder.<lambda>  s    S")))L r$   c                D    | j                  ||t        j                        S r&   r  rJ   fmodr  s      r"   r   zInterpreterBuilder.<lambda>      S"'')J r$   c                D    | j                  ||t        j                        S r&   r  rJ   subtractr  s      r"   r   zInterpreterBuilder.<lambda>  r  r$   c                D    | j                  ||t        j                        S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>      sC(M r$   c                D    | j                  ||t        j                        S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>  s    S"))1T r$   c                &    | j                  ||      S r&   create_idivr  s      r"   r   zInterpreterBuilder.<lambda>      )9)9#s)C r$   c                &    | j                  ||      S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>  r  r$   c                D    | j                  ||t        j                        S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>  r  r$   c                D    | j                  ||t        j                        S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>  r  r$   c                D    | j                  ||t        j                        S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>  s    sC(H r$   c                D    | j                  ||t        j                        S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>  r  r$   c                D    | j                  ||t        j                        S r&   )r  rJ   
left_shiftr  s      r"   r   zInterpreterBuilder.<lambda>   s    sC(O r$   c                D    | j                  ||t        j                        S r&   )r  rJ   right_shiftr  s      r"   r   zInterpreterBuilder.<lambda>  s    S"..)Q r$   c                D    | j                  ||t        j                        S r&   r  rJ   r   r  s      r"   r   zInterpreterBuilder.<lambda>      $..c2::*N r$   c                D    | j                  ||t        j                        S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>  r  r$   c                D    | j                  ||t        j                        S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>      T^^Cbjj-Q r$   c                D    | j                  ||t        j                        S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>      DNN3RZZ,P r$   c                D    | j                  ||t        j                        S r&   r  rJ   r   r  s      r"   r   zInterpreterBuilder.<lambda>  r  r$   c                D    | j                  ||t        j                        S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>  r  r$   c                D    | j                  ||t        j                        S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>  r  r$   c                D    | j                  ||t        j                        S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>	  r  r$   c                D    | j                  ||t        j                        S r&   r  rJ   
less_equalr  s      r"   r   zInterpreterBuilder.<lambda>
      DNN3R]],S r$   c                D    | j                  ||t        j                        S r&   r  rJ   lessr  s      r"   r   zInterpreterBuilder.<lambda>      DNN3RWW,M r$   c                D    | j                  ||t        j                        S r&   r  rJ   greater_equalr  s      r"   r   zInterpreterBuilder.<lambda>      DNN3REUEU,V r$   c                D    | j                  ||t        j                        S r&   r  rJ   greaterr  s      r"   r   zInterpreterBuilder.<lambda>  r  r$   c                D    | j                  ||t        j                        S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>  r  r$   c                D    | j                  ||t        j                        S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>  r  r$   c                D    | j                  ||t        j                        S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>  r  r$   c                D    | j                  ||t        j                        S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>  r  r$   c                D    | j                  ||t        j                        S r&   r  rJ   equalr  s      r"   r   zInterpreterBuilder.<lambda>  s    4>>#sBHH+M r$   c                D    | j                  ||t        j                        S r&   r  rJ   	not_equalr  s      r"   r   zInterpreterBuilder.<lambda>  s    4>>#sBLL+Q r$   c                D    | j                  ||t        j                        S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>  r  r$   c                D    | j                  ||t        j                        S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>  r  r$   c                D    | j                  ||t        j                        S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>  r  r$   c                D    | j                  ||t        j                        S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>  r  r$   c                D    | j                  ||t        j                        S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>      DNN3RXX,N r$   c                D    | j                  ||t        j                        S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>      DNN3R\\,R r$   c                D    | j                  ||t        j                        S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>  r  r$   c                D    | j                  ||t        j                        S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>  r  r$   c                D    | j                  ||t        j                        S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>  r  r$   c                D    | j                  ||t        j                        S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>  r  r$   c                D    | j                  ||t        j                        S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>  r  r$   c                D    | j                  ||t        j                        S r&   r  r  s      r"   r   zInterpreterBuilder.<lambda>  r  r$   c                D    | j                  ||t        j                        S r&   )r  rJ   bitwise_andr  s      r"   r   zInterpreterBuilder.<lambda>       sC(P r$   c                D    | j                  ||t        j                        S r&   )r  rJ   bitwise_xorr  s      r"   r   zInterpreterBuilder.<lambda>!  r  r$   c                D    | j                  ||t        j                        S r&   )r  rJ   
bitwise_orr  s      r"   r   zInterpreterBuilder.<lambda>"  s    t~~c3'N r$   c                    t        |j                  t        j                  |j                  |j                        z
  |j                  z  |j                  j
                        S r&   )r   r   rJ   r  r   rS   r  s      r"   r  zInterpreterBuilder.create_idiv&  sC     SXX#(((CCPRUR[R[RbRbccr$   c                @   t        |j                  j                        }t        |j                  j                        }|j                  j                  |      |_        |j                  j                  |      |_        | j	                  ||t
        j                        S r&   )r   r   r   rQ   r  rJ   r  )r!   r  r  	lhs_dtype	rhs_dtypes        r"   create_ashrzInterpreterBuilder.create_ashr,  se    (8	(8	88??9-88??9-~~c377r$   c                R   |j                   j                  }|t        j                  k(  s|t        j                  k(  r>t        t        |j                   |j                         |j                  j                        S t        t        d|j                  dz  dz         }|j                   j                  |      }|j                   j                  |      }t        j                  ||      |j                  dz  z	  }t        |j                  |      |j                  j                        S )Nr   r   r   )r   r   rJ   r   rR   r   np_umulhi_u64rS   r   r   rQ   r  )r!   r  r  r   compute_dtypelhs_datarhs_dataret_datas           r"   create_umulhiz InterpreterBuilder.create_umulhi4  s    BHH 2chh A399CSCSTT#B$u~~/AA/E.F(GHMxx}5Hxx}5H{{8X65>>A;MNH 6		8H8HIIr$   c                     ||j                   |j                   |j                         }|j                  j                  }t        ||      s|j	                  t        |            }t        ||      S r&   r  )r!   r  r  r^  r  r   r   s          r"   
ternary_opzInterpreterBuilder.ternary_op@  sU    CHHchh

3;;%%%fh7]]=#:;FFH--r$   c                F    | j                  |||t        j                        S r&   )r  rJ   clip)r!   arglohipropagate_nanss        r"   r   zInterpreterBuilder.<lambda>I  s    doocSUWY[][b[b>c r$   c                F    | j                  |||t        j                        S r&   )r  rJ   where)r!   condr  r  s       r"   r   zInterpreterBuilder.<lambda>J  s    sCQSQYQY1Z r$   c                    t        |j                  |j                  z  |j                  z   |j                  j                        S r&   r   r   r   rS   r   s       r"   
create_fmazInterpreterBuilder.create_fmaL  s,    AFFQVVOaff4aggnnEEr$   c                b    t         ||j                        |j                  j                        S r&   r  )r!   r  r  s      r"   unary_opzInterpreterBuilder.unary_opP  s!    BsxxL#))*:*:;;r$   c                2   |j                   }|j                  dz
  }t        t        d|j                         }|j                  j                  |      }d|z  dz
  }||z  j                  t        |            }t        ||j                   j                        S )Nr   r   )	r   r   r   rJ   r   rn  r   r   rS   )r!   r  rU   mask_bitwidthnp_uint_dtyper   r]  rj  s           r"   create_fabszInterpreterBuilder.create_fabsS  s    99 33a7d8+F+F*G$HIxx}}]+]"a'd{  x!89C!1!122r$   c                B    | j                  |t        j                        S r&   )r   rJ   cosr!   r  s     r"   r   zInterpreterBuilder.<lambda>]      4==bff#= r$   c                B    | j                  |t        j                        S r&   )r   rJ   expr  s     r"   r   zInterpreterBuilder.<lambda>^  r  r$   c                B    | j                  |t        j                        S r&   )r   rJ   exp2r  s     r"   r   zInterpreterBuilder.<lambda>_      DMM#rww$? r$   c                B    | j                  |t        j                        S r&   )r   rJ   absr  s     r"   r   zInterpreterBuilder.<lambda>`  s    DMM#rvv$> r$   c                B    | j                  |t        j                        S r&   )r   rJ   floorr  s     r"   r   zInterpreterBuilder.<lambda>a  s    T]]3%A r$   c                B    | j                  |t        j                        S r&   )r   rJ   ceilr  s     r"   r   zInterpreterBuilder.<lambda>b  r  r$   c                B    | j                  |t        j                        S r&   )r   rJ   logr  s     r"   r   zInterpreterBuilder.<lambda>c  r  r$   c                B    | j                  |t        j                        S r&   )r   rJ   log2r  s     r"   r   zInterpreterBuilder.<lambda>d  r  r$   c                B    | j                  |t        j                        S r&   r   rJ   sqrtr  s     r"   r   zInterpreterBuilder.<lambda>e  s    DMM#rww,G r$   c                B    | j                  |t        j                        S r&   r  r  s     r"   r   zInterpreterBuilder.<lambda>f  r  r$   c                B    | j                  |t        j                        S r&   )r   rJ   sinr  s     r"   r   zInterpreterBuilder.<lambda>g  r  r$   c                    |j                   j                  t        j                  k(  rt	        |j                         nt        |j                         }t        ||j                  j                        S r&   )r   r   rJ   r   np_erf_fp32np_erf_fp64r   rS   )r!   r  rj  s      r"   
create_erfzInterpreterBuilder.create_erfi  sH    '*xx~~'Ck#((#UXU]U]I^C!1!122r$   c                    t        dt        j                  |j                        z  |j                  j
                        S Nr   )r   rJ   r  r   r   rS   r  s     r"   create_rsqrtzInterpreterBuilder.create_rsqrtm  s+    A 113993C3CDDr$   c                t    t        |j                  j                  |      |j                  j                        S r&   )r   r   rP   r   rS   )r!   r  rB   allow_reorders       r"   r   zInterpreterBuilder.<lambda>q  s*    \#((JZJZ[`Jacfclclcscs=t r$   c                ~    t        t        j                  |j                  |      |j                  j
                        S r&   )r   rJ   	transposer   r   rS   )r!   r  perms      r"   create_transzInterpreterBuilder.create_transs  s(    BLL48#)):J:JKKr$   c                   |j                   }|j                   }|j                  j                  dk(  r|j                  j                         s3|j                  j                  dk(  r|j                  j                         rt	        ||j                  t
        j                  d       j                  t        j                        }t	        ||j                  t
        j                  d       j                  t        j                        }t        t        j                  |||j                   j                        |j                   z   |j                  j                        S )Nr   rI   )r   r   r   is_floatingr   r   r   rn  rJ   r   matmulrS   )r!   r   r   dinput_precisionmax_num_imprecise_acca_datab_datas           r"   
create_dotzInterpreterBuilder.create_dotv  s    GG&&!+0C0C0EGG&&!+0C0C0E#FAGGRZZFKKBJJWF#FAGGRZZFKKBJJWFBIIffAFFLLIAFFRTUT[T[TbTbccr$   c                ~    t        t        j                  ||t        j                        t        j                        S r.  )r   rJ   rO   r   r   )r!   ret_tystartstops       r"   create_make_rangez$InterpreterBuilder.create_make_range  s$    BIIeTBBHHMMr$   c                Z   |=t        t        j                  |j                  t              t
        j                        }t        j                  |j                  |j                  j                        }t        j                  |j                  |j                  t        j                  |j                              }t        j                  ||d|f|      d   }|dxx   t        j                  |j                        j                         z  cc<   t        |t
        j                        S )NrI   r   )binsrM   weights)r   rJ   rW  r   r'   r   r   r   r  r   	histogramlogical_notsumr   )r!   r   r:  r]  dummy_weightsr<  s         r"   create_histogramz#InterpreterBuilder.create_histogram  s    <TYYd CRWWMD TYYdiiooF xx		499bmmDII.FGLLDD	=YZ[\	!tyy15577Irxx00r$   c                    t        t        j                  |j                  |j                  |      |j                  j
                        S )NrR  )r   rJ   take_along_axisr   r   rS   )r!   ro  indicesrR  s       r"   create_gatherz InterpreterBuilder.create_gather  s3    B..sxxDQSVS\S\ScScddr$   c                    |j                         }|j                  }t        d|dz        }t        |j                  ||j                  j                  t        j                        z  z   |j                        S )Nr   r   )	r-   r   maxr   r   rQ   rJ   rR   r   )r!   rY  offsetrU   element_bitwidthelement_bytewidths         r"   create_addptrz InterpreterBuilder.create_addptr  se    %%'#66#3q#89CHH'86;;;M;Mbii;X'XXZ]ZcZcddr$   c                   |j                  |      \  }}|j                         }	t        |	      }
|d }n|t        j                  j
                  k(  r,t        t        j                  |j                  |
      |	      }na|t        j                  j                  k(  r6t        t        j                  |j                  t        d      |
      |	      }nt        d|       | j                  ||||||      S )NrI   nanzunsupported padding option )r\   r-   r   r   PADDING_OPTIONPAD_ZEROr   rJ   r   r   PAD_NAN	full_likefloatr   rX  )r!   rY  rT   padding_optionrg  rh  r\  rW   rX   rU   ri  r^  s               r"   create_tensor_pointer_loadz-InterpreterBuilder.create_tensor_pointer_load  s    ..~>e&&( *!Es11::: tyy!I8TEs11999 diiuX!VX`aE:>:JKLL&&tUE>?\ghhr$   c                T    |j                  |      \  }}| j                  |||||      S r&   r\   ra  )r!   rY  r3   rT   rg  rh  rW   rX   s           r"   create_tensor_pointer_storez.InterpreterBuilder.create_tensor_pointer_store  s/    ..~>e''eUNO\\r$   c                ~    t        t        j                  |j                  |      |j                  j
                        S r&   )r   rJ   expand_dimsr   r   rS   )r!   r  rR  s      r"   create_expand_dimsz%InterpreterBuilder.create_expand_dims  s(    BNN388T:CII<L<LMMr$   c                ~    t        t        j                  |j                  |      |j                  j
                        S r&   )r   rJ   rK   r   r   rS   )r!   r  rB   s      r"   create_broadcastz#InterpreterBuilder.create_broadcast  s(    BOOCHHe<cii>N>NOOr$   c                    t        t        j                  |j                  |j                  g      |j                  j
                        S r&   )r   rJ   concatenater   r   rS   r  s      r"   
create_catzInterpreterBuilder.create_cat  s/    BNNCHHchh+?@#))BRBRSSr$   c                    t        t        j                  |j                  |j                  gd      |j                  j
                        S )Nrd   rB  )r   rJ   stackr   r   rS   r  s      r"   create_joinzInterpreterBuilder.create_join  s1    BHHchh%9CSYYEUEUVVr$   c                    t        |j                  d   |j                  j                        t        |j                  d   |j                  j                        fS )N).r   ).r   r  )r!   rb  s     r"   create_splitzInterpreterBuilder.create_split  sE    SXXf-syy/?/?@,sxxX^O_adajajaqaqBrssr$   c           	        |j                   }t        |j                  t        j                        rVt        t        j                  ||j                  d   t        |j                              |j                  j                        S t        t        j                  ||j                  t        |j                              |j                  j                        S rM  )rB   r   r   r   r   r   rJ   fullr   r   rS   )r!   r5  r  rB   s       r"   create_splatzInterpreterBuilder.create_splat  s    cii/sxx{-PSPYPYBZ []`]f]f]m]mnnsxx}SYY?W XZ]ZcZcZjZjkkr$   c           	         t        t        j                  d|j                  d   t	        |j
                              |j
                  j                        S )Nr   r   rI   )r   rJ   rf  r   r   r   rS   r  s     r"   create_unsplatz!InterpreterBuilder.create_unsplat  s:    BGGE388A;mCII>VWY\YbYbYiYijjr$   c                    || j                   vrt        d|       | j                   |   }t        t        j                  |j
                  |j
                  |j
                  |      |j                  j                        S )Nunsupported semantic )ir_sem_to_interpreter_semr   r   re  
atomic_casr   r   rS   )r!   rY  cmprb  semscopes         r"   create_atomic_casz$InterpreterBuilder.create_atomic_cas  sk    d4444SE:;;,,S1L33CHHchhRUVX[XaXaXhXhiir$   c           	     X   || j                   vrt        d|       || j                  vrt        d|       | j                   |   }| j                  |   }t        t	        j
                  ||j                  |j                  |j                  |      |j                  j                        S )Nzunsupported rmwOp rl  )	ir_rmw_op_to_interpreter_rmw_opr   rm  r   re  
atomic_rmwr   r   rS   )r!   rmwOprY  rb  r]  rp  rq  s          r"   create_atomic_rmwz$InterpreterBuilder.create_atomic_rmw  s    <<<1%9::d4444SE:;;44U;,,S1L33E388SXXtyyZ]^`c`i`i`p`pqqr$   c                    t        d      )Nz4extern_elementwise not supported in interpreter modeNotImplementedError)r!   libNamelibPathsymbolargListretTypeisPures          r"   create_extern_elementwisez,InterpreterBuilder.create_extern_elementwise  s    !"XYYr$   c                    t        d      )Nz,inline_asm not supported in interpreter modery  )r!   	inlineAsmconstraintsvaluesrN  r  packs          r"   create_inline_asmz$InterpreterBuilder.create_inline_asm  s    !"PQQr$   c                *   d| j                   d    d| j                   d    d| j                   d    d}|r|d| z  }|rt        j                  dd	 i
       |D ]  }t        |d|j                   z           |rt        j                  d 
       y y )N(r   z, r   r   ) r(   c                    d| dS )N0x02xr=   r   s    r"   r   z1InterpreterBuilder.create_print.<locals>.<lambda>  s    b3L r$   )	formatter)r   rJ   set_printoptionsprintr   )r!   prefixhexr  isSignedmsgr3   s          r"   create_printzInterpreterBuilder.create_print  s    
 $--"#2dmmA&6%7r$--:J9K1MQvh<C52H*IJ 	*E#!EJJ<(()	*$/ r$   c                    |sJ |        y r&   r=   )r!   	conditionmessages      r"   create_assertz InterpreterBuilder.create_assert  s    &WI&yr$   c                    |sJ d       y )NzAssume failedr=   )r!   r  s     r"   create_assumez InterpreterBuilder.create_assume  s    )/)yr$   c                     y r&   r=   r    s    r"   create_barrierz!InterpreterBuilder.create_barrier  s    r$   c                f    |D cg c]  }|j                          }}t        ||||||      S c c}w r&   )r0   r?   )	r!   rA   rB   rC   rD   rE   rF   rH  new_offsetss	            r"   create_make_block_ptrz(InterpreterBuilder.create_make_block_ptr  s6    4;<&v||~<<!$w[RWXX =s   .c                   t        |j                        t        |      k7  rt        d      |j                  D cg c]  }|j                          }}t	        |j
                  |j                  |j                  ||j                  |j                        }t        t        |            D ]1  }|j                  |   xj                  ||   j                  z  c_        3 |S c c}w )Nz len(ptr.offsets) != len(offsets))rN   rD   r   r0   r?   rA   rB   rC   rE   rF   rM   r   )r!   rY  rD   rH  r  rj  r   s          r"   create_advancez!InterpreterBuilder.create_advance  s    s{{s7|+?@@47KK@&v||~@@ 399ckk;PSP_P_adajajks7|$ 	3AKKN71:??2	3
	 As   C c                D    t        |||||      }|j                          |S r&   )r^   ri   )r!   rA   rB   rC   tensor_shaper~  ra   descs           r"   create_make_tensor_descriptorz0InterpreterBuilder.create_make_tensor_descriptor  s"    eWlGLr$   c                   t        |t              sJ |j                  |      \  }}|j                         }t	        |      }|j
                  }	|	t        j                  j                  k(  r,t        t        j                  |j                  |      |      }
na|	t        j                  j                  k(  r6t        t        j                  |j                  t        d      |      |      }
nt!        d|	       | j#                  |||
||d      S )NrI   rM  zunsupported padding F)rg  rh  r\  )r   r^   r\   r-   r   ra   r   rN  rO  r   rJ   r   r   rP  rQ  rR  r   rX  )r!   r  rD  rg  rh  rW   r]  rU   ri  ra   r^  s              r"   create_descriptor_loadz)InterpreterBuilder.create_descriptor_load  s    $ 0111..w7
d&&( *,,c((111 tyy!I8TE**222 diiuX!VX`aE3G9=>>&&tT57FTY ' [ 	[r$   c                T    |j                  |      \  }}| j                  |||d d       S r&   rV  )r!   r  r3   rD  rW   r]  s         r"   create_descriptor_storez*InterpreterBuilder.create_descriptor_store"  s/    ..w7
d''eT4FFr$   c                   |j                   j                  j                  }t        |      }t	        j
                  |j                  j                  d   |j                  d   g|      }d }d }	t        |j                        D ]F  \  }
}t        |t        j                        |g}| j                  ||||	      j                  ||
d d f<   H t        ||      S )Nr   rd   rI   )rA   r   r+   r   rJ   zerosr   rB   rE   	enumerater   r   r   r  )r!   r  	x_offsetsy_offsetrN  r   np_dtyperesultrg  rh  r   x_offsetrD  s                r"   create_descriptor_gatherz+InterpreterBuilder.create_descriptor_gather&  s    		** '9>>//2D4D4DR4HIQYZ$Y^^4 	lKAx#Hbhh7BG66tWnVefkkF1a4L	l FE**r$   c                    t        |j                        D ]W  \  }}t        |j                  |   |j                        }t        |t        j
                        |g}| j                  |||       Y y r&   )r  r   r   r   r   r   r  )	r!   r  r3   r  r  r   r  slicerD  s	            r"   create_descriptor_scatterz,InterpreterBuilder.create_descriptor_scatter1  s]    $Y^^4 	?KAx A<E#Hbhh7BG((ug>	?r$   c                &   t        |      }d|j                  v r,t        t        j                  dd|      |j
                        S |t        j                  k(  r,t        t        j                  dd|      |j
                        S t        d|       )Nr   r   rd   rI   Tzunsupported type )r   namer   rJ   rf  rS   r0  	TypeError)r!   rN  np_types      r"   get_all_ones_valuez%InterpreterBuilder.get_all_ones_value8  ss    %GLL 2W =t{{KK 4w ?MM/v677r$   NreturnNone)zero)rA   r   rB   rk   rC   rk   r  rl   r~  r'   ra   rs   )r  r^   rD  rk   )r  r^   r3   r   rD  rk   )r  r^   r  r   r  r   )r  r^   r3   r   r  r   r  r   )r5   r6   r7   r   MEM_SEMANTICACQUIREre  RELEASERELAXEDACQUIRE_RELEASErm  	ATOMIC_OPADDRMW_OPFADDMINUMINMAXUMAXANDORXORXCHGrt  rG   r   r   r   r   r  r  r  r
  r  r  r  r  r  r  r  r  r  r"  r$  r&  r*  r,  r2  r4  r7  r9  r<  r>  r@  rB  rD  rF  rI  rK  rO  rS  rU  r_  rc  rX  ra  rs  create_si_to_fpcreate_ui_to_fpcreate_fp_to_sicreate_fp_to_uicreate_fp_extcreate_fp_trunccreate_int_castr   r  r  create_faddcreate_fmulcreate_fdivcreate_fremcreate_fsub
create_mulcreate_precise_divfcreate_sdivcreate_udivcreate_sremcreate_urem
create_add
create_sub
create_shlcreate_lshrcreate_minsicreate_minuicreate_minimumfcreate_minnumfcreate_maxsicreate_maxuicreate_maximumfcreate_maxnumfcreate_icmpSLEcreate_icmpSLTcreate_icmpSGEcreate_icmpSGTcreate_icmpULEcreate_icmpULTcreate_icmpUGEcreate_icmpUGTcreate_icmpEQcreate_icmpNEcreate_fcmpOLTcreate_fcmpOGTcreate_fcmpOLEcreate_fcmpOGEcreate_fcmpOEQcreate_fcmpONEcreate_fcmpULTcreate_fcmpUGTcreate_fcmpULEcreate_fcmpUGEcreate_fcmpUEQcreate_fcmpUNE
create_and
create_xor	create_orcreate_int_to_ptrcreate_ptr_to_intr  r  r  r  create_clampfcreate_selectr  r   r  
create_cos
create_expcreate_exp2create_iabscreate_floorcreate_ceil
create_logcreate_log2create_precise_sqrtcreate_sqrt
create_sinr!  r$  create_reshaper*  r3  r8  r@  rE  rK  rT  rW  rZ  r\  r_  rb  rd  rg  rj  rr  rw  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r=   r$   r"   r   r   !  s     ,";";"C"C  ,";";"C"C  ,";";"C"C((,*C*C*S*S	! 	<..22L//44<..22L//44<..22L//44<..22,--00<..22L//44'#N"%
3+HIGKIKIKIMMMLW
WN
D+D[ POOOOOOOMMOOZO3U. JKNKLKJKNKMJTCKCKJKJKHJMJOJQKNLNLQOPNNLNLQOPNSNMNVNPNSNMNVNPNMMQMMNPNSNVNNNRNMNPNSNVNNNRNPJPJNI&&d8	J. dMZMF<3 >J=J?K>KAL?K=J?KG?K=J3E uNLdN1$e
ei]NPTWtlkjrZR0'*Y
 `f4=JNY\[ G	+?,8?8r$   r   c                  (    e Zd ZdZddZddZddZy)_LangPatchScopez2Tracks patched attributes so they can be restored.c                    g | _         y r&   )_changesr    s    r"   rG   z_LangPatchScope.__init__H  s	    :<r$   c                |    t        ||t              }| j                  j                  |||f       t	        |||       y r&   )r   _MISSINGr  appendsetattr)r!   objr  r3   originals        r"   r4   z_LangPatchScope.set_attrK  s5    3h/c423T5!r$   c                    | j                   rN| j                   j                         \  }}}|t        u rt        ||       nt	        |||       | j                   rMy y r&   )r  popr  delattrr   )r!   r!  r  r"  s       r"   restorez_LangPatchScope.restoreP  sI    mm"&--"3"3"5Cx8#T"T8, mmr$   Nr  )r!  objectr  rs   r3   r'  r  r  )r5   r6   r7   r8   rG   r4   r&  r=   r$   r"   r  r  E  s    <="
-r$   r  c                R    t        |      |dfd
}|j                  | ||       y )N)memberc           
     v     | |i |j                         D ci c]  \  }}|dk7  r|| c}}diS c c}}w )Nr   items)r)  argskwargskvsemantics        r"   r   z_patch_attr.<locals>.<lambda>[  s[     :kMS\\^AVEIQDEDT BCA AV:k bj:k AVs   5)r
   r4   )r!  r  r)  r   rq  
new_memberr1  s         @r"   _patch_attrr3  Y  s*    g&H&, lJ 
NN3j)r$   c                    t        j                  |       D ]4  \  }}t        j                  j	                  |      s&t        | ||||       6 y r&   )inspect
getmembersr   core
is_builtinr3  )pkgr   rq  r  r)  s        r"   _patch_builtinr:  b  sB    **3/ ;f77f%T67E:;r$   c                    d d }|j                  | dd        |j                  | dfd       |j                  | dd        |j                  | d	d
        |j                  | dt        |             y )Nc                f    | j                   j                  }|j                  dk(  rt        |      S dS )Nr   T)r   r   sizer'   )r!   r   s     r"   	_get_boolz%_patch_lang_tensor.<locals>._get_boolj  s,    {{ "YY!^tDz55r$   c                   t        t        j                  | j                  j                        | j                  j
                        }| j                  j                         sJ t        | j                  j                        }|d   |d   c|d<   |d<   t        j                  j                  | j
                  |      }t        j                  j                  ||      S )Nrd   )r   rJ   r(  r   r   r   rN  is_blocklistrB   r   r7  r   r   )r!   r   rE   res_tys       r"   _get_transposez*_patch_lang_tensor.<locals>._get_transposep  s    bll4;;+;+;<dkk>O>OPyy!!###499??++6r?KO(BR##DJJ<ww~~ff--r$   	__index__c                @    t        | j                  j                        S r&   )r   r   r   r    s    r"   r   z$_patch_lang_tensor.<locals>.<lambda>x  s    S9I9I5J r$   r)   c                     |       S r&   r=   )r!   r>  s    r"   r   z$_patch_lang_tensor.<locals>.<lambda>y  s    IdO r$   __repr__c                @    t        | j                  j                        S r&   )reprr   r   r    s    r"   r   z$_patch_lang_tensor.<locals>.<lambda>z  s    D9I9I4J r$   __str__c                @    t        | j                  j                        S r&   )rs   r   r   r    s    r"   r   z$_patch_lang_tensor.<locals>.<lambda>{  s    3t{{7G7G3H r$   r   )r4   property)r   rq  rD  r>  s      @r"   _patch_lang_tensorrN  h  sh    6. 
NN6;(JK	NN6:'CD	NN6:'JK	NN69&HI	NN63 89r$   c                  *    e Zd Zd Zd Zd Zd Zd Zy)ReduceScanOpInterfacec                     || _         || _        y r&   )rR  
combine_fn)r!   rR  rR  s      r"   rG   zReduceScanOpInterface.__init__  s    	$r$   c                H    | |t        |      k\  rt        d| d|       y y )Nzaxis z out of bounds for shape )rN   r   )r!   rB   rR  s      r"   
check_axisz ReduceScanOpInterface.check_axis  s4    E
 2uTF*CE7KLL !3r$   c                    |D ]c  }t        |t        j                  j                        st	        dt        |             | j                  |j                  | j                         e y )Nzinput must be a tensor, got )	r   r   r7  r   r   rN  rT  rB   rR  )r!   r   r  s      r"   check_tensorz"ReduceScanOpInterface.check_tensor  sP     	2Cc277>>2 #?S	{!KLLOOCIItyy1	2r$   c                Z   t        |      }t        |d      rG|j                  r;|j                  |      }t	        j
                  |t        |j                              }nt        j                  |g|      }|}t        j                  j                  t        ||j                        |      S )NrB   rI   )r   r,   rB   rQ   r   r   rB  rJ   r/  r7  r   r   rS   )r!   rj  r   r  ret_types        r"   	to_tensorzReduceScanOpInterface.to_tensor  sy     '3 SYY**X&C}}UDO<H((C51CHww~~l3=xHHr$   c                    t        |t              s| j                  |f      d   S | j                  |       | j	                  |      }t        |t
        t        f      rt        |      S |fS Nr   )r   tupleapplyrV  
apply_implrB  )r!   r   rj  s      r"   r]  zReduceScanOpInterface.apply  s\    %'::ui(++% ooe$'dE];uSzH#Hr$   N)r5   r6   r7   rG   rT  rV  rY  r]  r=   r$   r"   rP  rP    s    %M2IIr$   rP  c                  >     e Zd Z fdZd Zd ZddZd Zd Z xZ	S )	ReduceOpsc                4    t         |   ||       || _        y r&   )superrG   	keep_dims)r!   rR  rR  rc  	__class__s       r"   rG   zReduceOps.__init__  s    z*"r$   c                    g }|D ]e  }||j                  |       d}|j                  | j                  |j                  j                  j	                         |j
                               g t        |      |fS r[  )r  rY  r   r   flattenr   r\  )r!   r   rR  rj  r   s        r"   unravelzReduceOps.unravel  sn     	SD

4 

4>>$++*:*:*B*B*DdjjQR	S Sz4r$   c                     j                   } j                   j                         \  }g }g }d   j                  j                  j                  }|d| ||dz   d  z   }D ]k  }|j                  |j                  j                         |j                  t        j                  ||j                  j                  j                               m t        |d   j                        D ]Y  }	t        j                  |	|      d| |dz   d  z   t         fdt        |      D              }
|   dk(  rGt        t        |            D ]/  }|
|   j                  j                  j                         ||   <   1 t         fdt        |      D              }  j                   j"                  g ||
 }t%        |t              s|fn|}t        t        |            D ][  }t%        ||   t&        j(                  j*                        r'||   j                  j                  j                         n||   ||   <   ] \ g }t        |      D ]  \  }	} j,                  rI|t        j.                  ||      }nBt        t        |            D ]  }t        j.                  |d      } n||j                         }|j                   j1                  ||	   j                                |S )Nr   r   rI   c              3  h   K   | ])  \  }}j                  |   |   j                         + y wr&   rY  r   ).0iir.  r   input_indexr!   s      r"   	<genexpr>z+ReduceOps.generic_reduce.<locals>.<genexpr>  s/     sTYTVXYq~uRy Os   /2c              3  h   K   | ])  \  }}j                  |   |   j                         + y wr&   rj  )rk  oior   output_indexr!   s      r"   rn  z+ReduceOps.generic_reduce.<locals>.<genexpr>  s/     !wW\WY[\$..<%)//"R!wro  )rR  rg  r   r   rB   r  rJ   r  r   rM   r=  unravel_indexr\  r  rN   re   rR  fnr   r   r7  r   rc  rY  rY  )r!   r   original_axisrR  
input_dataoutput_datainput_shapeoutput_shaper  r   input_tuplej	acc_tuplecombine_fn_retrj  r   _rm  rs  s   ``               @@r"   generic_reducezReduceOps.generic_reduce  s   		ll5$))4t
Ahoo**00"1T*[-CC 	TCcjjoo.rxxCJJOO<Q<QRS	T z!}))* 	HA**1k:K&q.TAXY1GGLs]fgq]rssK4 A%s;/0 UA3>q>3H3H3M3M3R3R3TKN<0U "!w`iju`v!ww	!3!3!3!MY!M!M6@QV6W^.]k	s;/0 HAV`!!bggnnW69Q<3F3F3K3K3P3P3R;DQ<  N<0H	H"  - 	=GAt~~ ,>>$5D"3{#34 7!~~dA67 &yy{JJt~~dE!HNN;<	= 
r$   c                   t        |t              r|d   n|}d }d }|rM| j                   ||j                  j                  | j
                  | j                        |j                        }|rQ| j                   ||j                  j                  | j
                  | j                        t        j                        }||||fS ||S ||S t        d      )Nr   rR  keepdimsz-val_reduce_op and idx_reduce_op are both None)r   r\  rY  r   r   rR  rc  r   r   r   r   )r!   r   val_reduce_opidx_reduce_oprb  idxs         r"   min_maxzReduceOps.min_max  s    &ue4a%..u||/@/@tyy[_[i[i!jlqlwlwxC..u||/@/@tyy[_[i[i!jlnltltuC?s8O_J_JLMMr$   c                    | j                  t        j                  |j                  j                  | j
                  | j                        |j                        S )Nr  )rY  rJ   r>  r   r   rR  rc  r   r!   r   s     r"   r>  zReduceOps.sum  s<    ~~bffU\\%6%6TYYQUQ_Q_`bgbmbmnnr$   c                2   | j                   t        j                  j                  k(  r3| j	                  |d   t
        j                  t
        j                        S | j                   t        j                  j                  k(  r3| j	                  |d   t
        j                  t
        j                        S | j                   t        j                  j                  k(  r%| j	                  |d   t
        j                  d       S | j                   t        j                  j                  k(  r%| j	                  |d   t
        j                  d       S | j                   t        j                  j                  k(  r| j!                  |d         S | j#                  |      S )Nr   )r  r  )rR  r   standard_argmin_combine_tie_break_leftr  rJ   minargmin_argmax_combine_tie_break_leftrG  argmax_elementwise_maxnanmax_elementwise_minnanmin_sum_combiner>  r  r  s     r"   r^  zReduceOps.apply_impl  s   ??bkkHHH<<abii<XX__ J JJ<<abii<XX__ < <<<<a		QU<VV__ < <<<<a		QU<VV__ 8 8888E!H%% &&u--r$   r&   )
r5   r6   r7   rG   rg  r  r  r>  r^  __classcell__rd  s   @r"   r`  r`    s$    # )VN$o.r$   r`  c                  6     e Zd Z fdZd Zd Zd Zd Z xZS )ScanOpsc                4    t         |   ||       || _        y r&   )rb  rG   reverse)r!   rR  rR  r  rd  s       r"   rG   zScanOps.__init__  s    z*r$   c                    | j                  t        j                  |j                  j                  | j
                        |j                        gS NrB  rI   )rY  rJ   cumsumr   r   rR  r   r  s     r"   r  zScanOps.cumsum  s8    ryy):):KSXS^S^_``r$   c                    | j                  t        j                  |j                  j                  | j
                        |j                        gS r  )rY  rJ   cumprodr   r   rR  r   r  s     r"   r  zScanOps.cumprod
  s8    rzz%,,*;*;$))LTYT_T_`aar$   c           	         g }g }d   j                   j                  j                  }D ]k  }|j                  |j                   j                         |j                  t	        j
                  ||j                   j                  j                               m t        |d   j                        D ]|  }t	        j                  ||      t         fdt        |      D              } j                     dk(  rGt        t        |            D ]/  }||   j                   j                  j                         ||   <   1 t         fdt        t                    D              t         fdt        |      D              }	  j                  j                   g |	| }
t#        |
t              s|
fn|
}	t        t        |            D ][  }t#        |	|   t$        j&                  j(                        r'|	|   j                   j                  j                         n|	|   ||   <   ]  g }t        |      D ]3  \  }}|j                   j+                  ||   j                               5 |S )Nr   rI   c              3  h   K   | ])  \  }}j                  |   |   j                         + y wr&   rj  )rk  rl  r.  indexr   r!   s      r"   rn  z'ScanOps.generic_scan.<locals>.<genexpr>  s,     fur1%%)//Bfro  c              3  V   K   | ]   }|j                   k(  r|   d z
  n|    " yw)r   NrB  )rk  r   r  r!   s     r"   rn  z'ScanOps.generic_scan.<locals>.<genexpr>  s-     "kTU1		>58a<uQx#O"ks   &)c              3  h   K   | ])  \  }}j                  |   |   j                         + y wr&   rj  )rk  rq  rr  r   
prev_indexr!   s      r"   rn  z'ScanOps.generic_scan.<locals>.<genexpr>  s/     !uUZUWYZ$..:b	"P!uro  )r   r   rB   r  rJ   r  r   rM   r=  rt  r\  r  rR  rN   re   rR  ru  r   r   r7  r   rY  )r!   r   rw  rx  rB   r  r   r   r|  r}  r~  rj  r  r  s   ``          @@r"   generic_scanzScanOps.generic_scan  s!   
a$$** 	MCcjjoo.rxxSZZ__5J5JKL	M z!}))* 	HA$$Q.EfPYZdPeffDTYY1$s;/0 GA,0GNN,?,?,D,D,FKN5)G #"kY^_bch_iYj"kk
!!u^ghs^t!uu	!3!3!3!FY!F!F6@QV6W^.]k	s;/0 HAOY!!bggnnP6IaL,?,?,D,D,I,I,K;DQ<  N5)H	H"  - 	=GAtJJt~~dE!HNN;<	=
r$   c           	        g }| j                   rf|D ]`  }|j                  | j                  t        j                  |j
                  j                  | j                        |j                               b n|}| j                  t        j                  j                  k(  r| j                  |d         }nM| j                  t        j                  j                  k(  r| j                  |d         }n| j!                  |      }| j                   rK|D ]F  }t        j                  |j
                  j                  | j                        |j
                  _        H |S )NrB  r   )r  r  rY  rJ   flipr   r   rR  r   rR  r   r  r  r  _prod_combiner  r  )r!   r   	new_inputr  rj  s        r"   r^  zScanOps.apply_impl+  s    	<< f  

dii0XZ]ZcZc!def I??bkk666++il+C__ 9 99,,y|,C ##I.C<< K"$''#**//		"J

K
r$   )	r5   r6   r7   rG   r  r  r  r^  r  r  s   @r"   r  r    s    ab<r$   r  c                    dd}dd}| j                  t        d|       | j                  t        d|       | j                  t        j                  d|       | j                  t        j                  d|       y )Nc                :    t        |||      j                  |       S r&   )r`  r]  )r   rR  rR  rc  r.  s        r"   _new_reducez'_patch_reduce_scan.<locals>._new_reduceC  s    z95;;EBBr$   c                :    t        |||      j                  |       S r&   )r  r]  )r   rR  rR  r  r.  s        r"   	_new_scanz%_patch_reduce_scan.<locals>._new_scanF  s    tZ177>>r$   reduceassociative_scan)F)r4   r   r7  )rq  r  r  s      r"   _patch_reduce_scanr  ?  sY    C? 
NN2x-	NN2)95	NN277Hk2	NN277.	:r$   c                   d }dd}dd}d }|j                  | d|       |j                  | d|       |j                  | d|       |j                  | dt               |j                  | j                  d	|       |j                  | d
t        |d             |j                  | dt        |d             |j                  | dt        |d             t	        |       y )Nc                >   | j                   dk(  r|j                         S | j                   dk(  r|j                         S | j                   dk(  r|j                         S | j                   dk(  r|j	                         S | j                   dk(  r|j                         S | j                   dk(  r|j                         S | j                   dk(  r|j                         S | j                   dk(  r|j                         S | j                   d	k(  r|j                         S | j                   d
k(  r|j                         S | j                   dk(  r|j                         S | j                   dk(  r|j                         S | j                   dk(  r|j                         S | j                   dk(  r|j                         S | j                   dk(  r|j                         S | j                   dk(  r|j!                         S | j                   dk(  r|j#                         S t%        d|  d      )Nvoidr   r   r   r   r   r   r   r   rR   ru   rw   ry   fp16bf16fp32fp64zfail to convert z to ir type)r  get_void_tyr  r
  r  r  r  r  r  r  r  r$  r  r  r   r   r  r  r   )r!   r   s     r"   
_new_to_irz$_patch_lang_core.<locals>._new_to_irQ  s   99&&((YY& &&((YY& &&((YY'!''))YY'!''))YY("((**YY'!''))YY("((**YY'!''))YY("((**YY'!''))YY)#))++YY*$**,,YY& &&((YY& &&((YY& ''))YY& ((**+D6=>>r$   c                :    |d}|d| }}n| |}}t        |||      S )Nr   r   )rM   )arg1arg2stepr.  r6  ends         r"   
_new_rangez$_patch_lang_core.<locals>._new_rangey  s2    <D<D3Et3EUC&&r$   c                    | sJ |       y r&   r=   )r  r  s     r"   _new_static_assertz,_patch_lang_core.<locals>._new_static_assert  s    Str$   c                   t        | t        j                        s| S t        |t        t        f      s|gn|}|D cg c]*  }t        |t        j
                        r|j                  n|, }}t        |      t        dt        | j                              k7  rt        d|       | j                  j                  ||       | S c c}w )Nr   z$len(values) != len(input.shape) for )r   r   r   rB  r\  	constexprr3   rN   rG  rB   r   r   r4   )r   r  r  r0  s       r"   	_set_attrz#_patch_lang_core.<locals>._set_attr  s    %+L!+FT5M!B&IOPAZ2<<8!''a?PPv;#aU[[!122CD6JKKdF+	 Qs   /CrM   static_rangestatic_assertstatic_printto_irmultiple_ofztt.divisibility)r  max_contiguousztt.contiguitymax_constancyztt.constancy)NN) )r4   r  r   r   r  )langrq  r  r  r  r  s         r"   _patch_lang_corer  O  s    $?P'
 
NN4*-	NN44	NN4*<=	NN4/	NN4::w
3	NN4	@Q(RS	NN4)79?+ST	NN4').*QRur$   c                J   t               }| j                  j                         D cg c]6  \  }}t        j                  |      s|t
        t
        j                  fv s5|8 }}}t        |      dk\  sJ d       |D ]t  }t        |t        |       t        |j                  t        |       |t
        k(  rt        |j                  t        |       t        |j                  |       t        ||       v t        t
        j                  j                  t        |       |S c c}}w )Nr   z:triton.language must be visible from within jit'd function)r  __globals__r,  r5  ismoduler   r7  rN   r:  interpreter_builderr   r   rN  r  tensor_descriptor_base)ru  rq  r  r3   langsr  s         r"   _patch_langr    s    E#%>>#7#7#9pxq%W=M=Me=TY^cegigngnboYoUpEpu:?XXX? &t0%8t{{$7?2:499&95A4;;.u%& 277113FNL qs   DDDc                X    t        | d      r t        |       | S  t        |       |      S )N_fields)r,   rN  )r  contentss     r"   _tuple_creater    s.     $+3	#:949hS	S	(@SSr$   c                   t        | t              rt        j                  t        j
                  j                  j                  |       d       }t        j                  }d| cxk  rdk  rn nt        j                  }nkd| cxk  rdk  rn nt        j                  }nLd| cxk  rdk  rn nt        j                  }n-d| cxk  rdk  rn nt        j                  }nt        d|        t        t        j                  | g|      |      }t        j                   ||      S t#        | d	      rt        j                  t        j
                  j                  j                  |       d       }t        t        j                  | j%                         gt        j                        |      }t        j                   ||      S t        | t&              rt)        | t+        t,        |             S t        | t.              r| j0                  D cg c]  }t-        |       }}| j0                  d
   dk(  sJ t        j2                  d      |d
<   t5        t7                     }|j9                  t-        | j:                        | j<                  D cg c]  }t-        |       c}|| j>                  D cg c]  }t        j2                  |       c}| j@                        S | S c c}w c c}w c c}w )Ni   l        l        l         l            l            zUnsupported integer value rI   data_ptrrd   r   )rA   rB   rC   rE   rS  )!r   r   r   	str_to_tytritonruntimejitmangle_typerJ   r   r   r   rR   r   r   r/  r   r,   r  r\  r  map_implicit_cvtr   rC   r  r
   r   make_tensor_descriptorrA   rB   rE   ra   )r  tyr   r   srC   r1  r   s           r"   r  r    s3   #s\\&..,,88=tDS 5 HHEc!E!IIEs"U"HHEc!E!IIE9#?@@bhhuE:B?yy$$sJ\\&..,,88=tDbhh'7ryyI2Nyy$$	C	S#mS"9::	C)	*-0[[9=#99{{2!###ll1o!"4"67..M#((4KPSPYPY5Z1mA6F5ZdkEH__<V@A =?LLO <Vfifqfq / s 	s J :
 6[<Vs   KK
)K$c                z    t        | t        j                  j                  j                        r| j
                  S | S r&   )r   r  r  r  TensorWrapperrA   )ts    r"   _unwrap_tensorr    s*    !V^^''556vvHr$   c                    t        |t        j                  j                  j                        r4t        j                  j                  j	                  | |j
                        S | S r&   )r   r  r  r  r  r   )r  original_tensors     r"   _rewrap_tensorr    sD    /6>>#5#5#C#CD~~!!//?3H3HIIHr$   c                  (    e Zd Zg fdZd Zd Zd Zy)GridExecutorc                   ddl m} || _        || _        || _        || _        |j                  j                         D ci c]  \  }}| ||       }}}|D cg c]  }|j                  |      dk(  s| c}| _	        y c c}}w c c}w )Nr   )_normalize_tyr  )
r  r  ru  	arg_namesgridpre_run_hooksr9   r,  get
constexprs)	r!   ru  r  r  r  r  r  r  r9   s	            r"   rG   zGridExecutor.__init__  s    &"	*CECUCUC[C[C]^xtR4r!22^^,5bD9L9LT9RVa9a4b _bs    BB4Bc                   	 i 		fd|D cg c]
  } |       }}i }|j                         D ]  \  }} |      ||<    ||fS c c}w )Nc                   t        | t              rt        | t        |             S t        | t              rGt	         | j
                        | j                  | j                  | j                  | j                        S t        | d      s| S t        |       }|j                         j                         vr1|j                         }|j                         |j                         <   |j                         j                            }|j                  dd      }|j!                  ||j#                         |j%                         |j'                                t)        ||       }|S )Nr  r   cpu)device)r  )r   r\  r  r  r   rA   rB   rC   rE   ra   r,   r  untyped_storager  r  	new_emptyset_storage_offsetr=  rg   r  )r  unwrapped_argstoragecpu_arg_to_cpustoragess       r"   r  z,GridExecutor._init_args_hst.<locals>._to_cpu  s-   #u%$S#gs*;<<C!12'CHH%IIKKOOKK  S*-
*3/M,,.779I'779/6{{}))+,}<<>GGIJG#--a->GLL-">">"@-BTBTBVXeXlXlXno$WcBGNr$   r+  )
r!   args_devr.  r  args_hst
kwargs_hstr2   r3   r  r  s
           @@r"   _init_args_hstzGridExecutor._init_args_hst  sc    	2 -55SGCL55 
 ,,. 	-JC%enJsO	-## 6s   A	c                   
 i 
fd
t        ||      D ]  \  }} 
||        |j                         D ]  \  }}||   }	 
||	        j                         D ]  \  }}|j                  |        y )Nc                   t        | d      rVt        |       t        |      }} | j                         |j                         f| j                         j                         <   y t	        | t
              rt        | |      D ]  \  } } | |        y t	        | t              r | j                  |j                         y y )Nr  )	r,   r  r  r  r   r\  zipr   rA   )arg_devarg_hst	_from_cpur  s     r"   r  z1GridExecutor._restore_args_dev.<locals>._from_cpu  s    w
+#1'#:N7<SBIBYBYB[]d]t]t]vAw002;;=>GU+*-gw*? 0&Wggw/0G%56',,5 7r$   )r  r,  r  copy_)r!   r  r  r.  r  r  r  r2   	kwarg_dev	kwarg_hstr  r  s             @@r"   _restore_args_devzGridExecutor._restore_args_dev  s    		6 !$Hh 7 	(GWgw'	( %lln 	,NC"3Ii+	, #+//"3 	#WgMM'"	#r$   c                   t        j                  | j                        }|j                         D ci c]  \  }}||j                  v s|| }}}| j                  ||      \  }}| j                  D ]
  } ||i |  t        | j                        }		 t        j                  | j                  g|i |}
|
j                         D ci c]!  \  }}||| j                  v r|n
t        |      # }
}}t        | j                        r| j                  |
      n| j                  }t        |      dk  sJ d       |ddt        |      z
  z  z   }t        j                  |  	 t!        |d         D ]Q  }t!        |d         D ]>  }t!        |d         D ]+  }t        j#                  |||        | j                  di |
 - @ S 	 |	j3                          | j5                  ||||       y c c}}w c c}}w # t$        $ r?}t&        j(                  j*                  j,                  r t/        t1        |            |d }~ww xY w# |	j3                          w xY w)N   z#grid must have at most 3 dimensionsri  r   r   r   r=   )r5  getfullargspecru  r,  r-  r	  r  r  getcallargsr  r  callabler  rN   r  r   rM   r   	Exceptionr  knobscompilationfront_end_debuggingr   rJ  r&  r  )r!   r  r.  argspecr/  r0  r  r  hookpatch_scoper-  r  r  r  r   r   r   es                     r"   __call__zGridExecutor.__call__1  s&    ((1#)<<>G41aQ',,5F!Q$GG#228VD*&& 	*D()j)	* "$''*	" &&twwHHZHDbfblblbnoU^UY[^D!8#mC>PPoDo&.tyy&9499T?tyyDt9>H#HH>%1s4y=11D,,d3	7tAw ,A"47^ ,!&tAw ,A/<<Q1E#DGGOdO,,, !x6:F? H p  7<<++??&tAw/Q67
 !sI   G+
G+6I &G12A0I #A"G7 1I 7	H? :H::H??I IN)r5   r6   r7   rG   r	  r  r!  r=   r$   r"   r  r    s    :< c"$H#2$Gr$   r  c                      e Zd Zd Zy)ASTTransformerc           	        g }|j                   D ]  }|| j                  |      gz  } t        |      dkD  rt        d      t	        j
                  t	        j                  t	        j                  dt	        j                               dt	        j                               |j                  t	        j                  d      gg 	      |_	        |S )
Nr   z&Multiple assignments are not supportedinterpreter_semantic)idctxrY  )r3   r   r'  F)r3   )funcr-  keywords)targetsvisitrN   r   astCall	AttributeNameLoadr3   Constant)r!   nodenamestargets       r"   visit_AssignzASTTransformer.visit_AssignZ  s    ll 	*Fdjj())E	*u:>EFF XXSXX1GSXXZ%X_j#&88:/6:jj#,,UZB[5\gik
 r$   N)r5   r6   r7   r5  r=   r$   r"   r#  r#  X  s    r$   r#  c                  D    e Zd Z e       Zd Zd Zd Zd Zd Z	d Z
d Zy)	FunctionRewriterc                <    || _         || _        d| _        d| _        y )Nr  r   )ru  r.  filenamedef_file_lineno)r!   ru  r.  s      r"   rG   zFunctionRewriter.__init__k  s    $%r$   c                J   	 t        j                  | j                        \  }}| j	                         \  | _        | _        | j                  |      | _        | j                  |      }| j                  |      }| j                  |      S # t        $ r | j                  cY S w xY wr&   )r5  getsourcelinesru  r  _get_jit_fn_file_liner9  r:  	_find_def
def_lineno_prepare_source_transform_ast_compile_and_exec)r!   linesr  ro  transformed_asts        r"   rewrite_astzFunctionRewriter.rewrite_astr  s    	--dgg6HE1 /3.H.H.J+t+../""5)--c2%%o66  	77N	s   "B
 
B"!B"c                B    ddl m}m}  | || j                              S )Nr   )get_jit_fn_file_lineJITFunction)r  rG  rH  ru  )r!   rG  rH  s      r"   r=  z&FunctionRewriter._get_jit_fn_file_line  s    :#K$899r$   c                z    d}t        |      D ]*  \  }}|j                         j                  d      s&|dz   }, |S )Nr   zdef r   )r  strip
startswith)r!   rC  r?  r   lines        r"   r>  zFunctionRewriter._find_def  sD    
 ' 	#GAtzz|&&v.U
	# r$   c                r    || j                   dz
  d  }dj                  |      }t        j                  |      S )Nr   r  )r?  jointextwrapdedent)r!   rC  ro  s      r"   r@  z FunctionRewriter._prepare_source  s4    doo)*+ggens##r$   c                    t        j                  |      }| j                  j                  |      }t        j                  |       | j
                  dz
  }t        j                  ||       |S r#  )r,  parseast_transformerr+  fix_missing_locationsr:  increment_lineno)r!   ro  
parsed_astrD  
inc_linenos        r"   rA  zFunctionRewriter._transform_ast  s[     YYs^
..44Z@!!/2))A-
_j9r$   c                   t        || j                  d      }i | j                  }| j                  j                  }t               j                         D ]  \  }}||vs|||<    t        |||       || j                  j                     S )Nexec)r9  mode)	compiler9  r.  ru  r  globalsr,  rY  r5   )r!   rD  compiled_codelocal_namespace
fn_globalsr2   r3   s          r"   rB  z"FunctionRewriter._compile_and_exec  s    $--fU)T[[/WW((
!)//+ 	(JC*$"'
3	( 	]J8tww//00r$   N)r5   r6   r7   r#  rS  rG   rE  r=  r>  r@  rA  rB  r=   r$   r"   r7  r7  h  s-    $&O&7(:$
	1r$   r7  c                  L    e Zd ZU i Zded<   d
dZd Zd Zd Ze	d        Z d Z
y	)InterpretedFunctionzDict[Callable, Callable]rewritten_fnc                    || _         t        |fi || _        || _        g | _        t        j                  |      }|j                  j                         D cg c]  }|j                   c}| _
        y c c}w r&   )ru  r7  rewriterr.  r  r5  	signature
parametersr  r  r  )r!   ru  r.  re  r0  s        r"   rG   zInterpretedFunction.__init__  sa    (6v6%%b)	*3*>*>*E*E*GHQ!&&HHs   A4c               x    |ry | j                         } t        || j                  || j                        |i |S r&   )rewriter  r  r  )r!   r  warmupr-  r.  ru  s         r"   runzInterpretedFunction.run  s;    \\^I|Bd6H6HI4ZSYZZr$   c                T    t        |      sJ | j                  j                  |       y r&   )r  r  r  )r!   r  s     r"   add_pre_run_hookz$InterpretedFunction.add_pre_run_hook  s"    ~~!!$'r$   c                    | j                   | j                  vr1| j                  j                         | j                  | j                   <   | j                  | j                      S r&   )ru  rb  rd  rE  r    s    r"   rh  zInterpretedFunction.rewrite  sJ    77$+++)-)B)B)DDdgg&  ))r$   c                .    | j                   j                  S r&   )ru  r5   r    s    r"   r5   zInterpretedFunction.__name__  s    wwr$   c                    t        | j                         | j                         }	  ||i |S # t        $ r}t	        t        |            |d }~ww xY wr&   )r  ru  rh  r  r   rJ  )r!   r-  r.  ru  r   s        r"   r!  zInterpretedFunction.__call__  sO    DGG\\^	3t&v&& 	3"47+2	3s   / 	AAANr  )r5   r6   r7   rb  r9   rG   rj  rl  rh  rM  r!  r=   r$   r"   ra  ra    s<    -/L*/I[(*
    3r$   ra  )rq  r  )N
__future__r   r,  rO  r5  typingr   r   r   r   r   r   numpyrJ   r  triton.languagelanguager   r:   r	   triton.language.semanticr
   triton.runtime.jitr   triton.tools.tensor_descriptorr   errorsr   	functoolsr   _C.libtritonr   re  r   r   r   r   r?   r^   ro   r   r   r   r   r   r   	vectorizer   r  r   r   rR   r  r   r   r'  r  r  r3  r:  rN  rP  r`  r  r  r  r  r  r  r  r%  r  r  r  NodeTransformerr#  r7  ra  r=   r$   r"   <module>r}     s   " 
   7 7      ! 3 . ; $  6 $CL   @ 4( (V $
& 
& 
& 	@='@
# bll45bll45Z<p p^8 ^8B 8- -(*;:.I ID].% ].@;# ;|; K\TD )* %&9: mG mG`S((  B1 B1J'3/!, '3r$   