
    -i|              	         d dl mZmZ d dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlmZ d dlmZ d dlmZ d dlmZmZmZmZmZmZmZmZmZ d dlmZ d dlmZ d	d
lm Z  ddl!m!Z! ddlm"Z" d	dl#m$Z$m%Z%m&Z&m'Z' ddl(m)Z) d dl*m+Z+m,Z, dZ-dZ. ed      Z/ G d dej`                        Z1d6dZ2 G d d      Z3d7dZ4 G d dee/         Z5d Z6d Z7d Z8 G d d       Z9e G d! d"             Z:d# Z;d$ Z< G d% d&e9e5e/         Z=ed8d'       Z>edddddddd(	 	 	 	 	 	 	 	 	 	 	 	 	 d9d)       Z>	 d:dddddddd(	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d;d*Z> G d+ d,      Z? G d- d.      Z@d/ ZAd0 ZB G d1 d2e9      ZC G d3 d4e9      ZDd5 ZEy)<    )annotationsdivisionN)defaultdict)	dataclass)cached_property)	CallableGenericIterableOptionalTypeVaroverloadDictAnyTuple)BaseBackend)
ModuleType   )knobs   )driver)_async_compile)find_paths_ifget_iterable_pathtype_canonicalisation_dictis_namedtuple)get_cache_key)get_cache_invalidating_env_varsnative_specialize_implztriton.languagez"triton.experimental.gluon.languageTc                  ~     e Zd ZdZd fdZed        Zd Zd ZddZ	d Z
d Zd	 Zd
 Zd Zd Zd Zd Zd Z xZS )DependenciesFindera  
    This AST visitor is used to find dependencies of a JITFunction. This can
    be used to invalidate a JITFunction's hash when its source code -- or
    that of its dependencies -- changes.

    This visitor also keeps track of the global variables touched by the
    JITFunction.  When we launch the kernel, we check that these have the same
    values as they did when we ran this visitor.  If not, we raise an error (or
    otherwise we could recompile).
    c                    t         |           || _        t        j                  |j                  d            | _        || _        || _        h d| _	        t        t        ddh| _        i | _        d| _        y )Nutf-8>
   intlenmaxminlistfloatprintrangegetattr
isinstancecopymathF)super__init__namehashlibsha256encodehasherglobals	nonlocalssupported_python_builtinsGLUON_MODULETRITON_MODULEsupported_modulesused_global_valsvisiting_arg_default_value)selfr2   r7   r8   src	__class__s        \/home/obispo/Crisostomo_bridge/mision_env/lib/python3.12/site-packages/triton/runtime/jit.pyr1   zDependenciesFinder.__init__.   st    	nnSZZ%89 "*
& 	"
" TV*/'    c                6    | j                   j                         S N)r6   	hexdigestr?   s    rB   retzDependenciesFinder.retY   s    {{$$&&rC   c                    t        j                  |j                        ryt        |dd      }|j	                  t
              S )NT
__module__ )inspect	isbuiltinfuncr,   
startswithr;   )r?   noderN   modules       rB   _is_triton_builtinz%DependenciesFinder._is_triton_builtin]   s6    TYY'|R0  //rC   c                >   t        |t              sJ | j                  j                         |j                  j                         z  D ]_  }|\  }}| j                  |   \  }}|j                  |   \  }}||k7  s2t	        d| d| d| j
                   d|j                   d| d       | j                  j                  |j                         |j                  }|t        t        |dd            z  }| j                  j                  |j                  d	             y )
NGlobal variable z has value z when compiling z, but inner kernel z has conflicting value z7 from when it was first compiled.  This is not allowed.noinlineFr#   )r-   JITCallabler=   keysRuntimeErrorr2   __name__update	cache_keystrr,   r6   r5   )r?   rN   kvar_name_v1v2func_keys           rB   _update_hashzDependenciesFinder._update_hashc   s/   $,,, &&++-0E0E0J0J0LL 	AKHa))!,EB))!,EBRx"&xjB4?OPTPYPY{Zmnrn{n{m|  }T  UW  TX  XO  P 	 	$$T%:%:;>>Cj%8998??734rC   c                    ddl m} |t        |      t        u ry t	        |dd      r#|j
                  D ]  }| j                  |        y t	        |dd      ry t	        |dd      dk(  ry t        |t              r| j                  |       y t        |      r*t        |t              st        ||      st        d	|       | j                  ry |/t        j                  |      |f| j                  |t!        |      f<   y )
Nr   	constexpr__triton_aggregate__F__triton_builtin__rJ   rK   ztriton.language.extra.libdevicez!Unsupported function referenced: )language.corerf   typer   r,   
hash_attrsrecord_referencer-   rV   rc   callablerX   r>   r.   deepcopyr=   id)r?   valvar_dictr2   rf   attrs         rB   rl   z#DependenciesFinder.record_referenceu   s    - ;$s)z13.6 ,%%d+,3,e4 3b)-NNc;'c"C=C!6z#y?Y!B3%HII **;?==;Mx:XD!!4H"67rC   c                >    t        |j                        t        j                  u r|j                  S |j                   j
                  v ry  fd} ||j                        \  }}|j                   j                  v r|S  j                  |||j                         |S )Nc                    j                   j                  | d       }||j                   fS j                  j                  | d       }||j                  fS y)NNN)r7   getr8   )r2   rp   r?   s     rB   name_lookupz2DependenciesFinder.visit_Name.<locals>.name_lookup   sZ    ,,""4.CDLL((..$$T40CDNN**rC   )rj   ctxastStorero   local_namesr9   rl   )r?   rP   rw   rp   rq   s   `    rB   
visit_NamezDependenciesFinder.visit_Name   s    >SYY&77N77d&&&	 $DGG,X77d444Jc8TWW5
rC   c                ^    |j                   D cg c]  }| j                  |       c}S c c}w rE   )eltsvisit)r?   rP   elts      rB   visit_TuplezDependenciesFinder.visit_Tuple   s$     ,0995C

3555s   *c                f   | j                  |j                        }t        |t        j                        r6| j                  |j                        }t        |t        j                        r6t        |dd      }||| j                  v ry t        ||j                        }| j                  |       |S )NrY   rK   )	r   valuer-   ry   	Attributer,   r<   rr   rl   )r?   rP   lhslhs_namerH   s        rB   visit_Attributez"DependenciesFinder.visit_Attribute   s    jj$cmm,**SYY'C cmm,3
B/;(d&<&<<c499%c"
rC   c                    |j                   j                   D ch c]  }|j                   c}| _        | j                  |       y c c}w rE   )argsargr{   generic_visit)r?   rP   r   s      rB   visit_FunctionDefz$DependenciesFinder.visit_FunctionDef   s4    /3yy~~>CGG>4  ?s   Ac                p     fd}t        j                  |j                  |j                  |j                  r|j                  gng |j
                        D ]  } j                  |         ||j                         |j                   j                  |j                          ||j                         y )Nc                    	 j                   rJ d_         | D ]  }|j                  |        	 d_         y # d_         w xY w)NTF)r>   r   )defaultsexprr?   s     rB   visit_defaultsz:DependenciesFinder.visit_arguments.<locals>.visit_defaults   sS    8::::26/$ )D'

4() 38/%/s   < < 	A)
	itertoolschainposonlyargsr   vararg
kwonlyargsr   kw_defaultskwargr   )r?   rP   r   r   s   `   rB   visit_argumentsz"DependenciesFinder.visit_arguments   s    	8 ??4#3#3TYYQUQ\Q\bdfjfufuv 	CJJsO	 	t''(::!JJtzz"t}}%rC   c                    | j                  |      }t        |t              r| xj                  t	        |      z  c_        y | j                  j                  |       y rE   )r   r-   r(   r{   setadd)r?   rP   targets      rB   visitAssnTargetz"DependenciesFinder.visitAssnTarget   sE     D!fd#F+  (rC   c                    t        |j                        dk7  rt        d      | j                  |j                  d          | j	                  |       y )Nr   z2Simultaneous multiple assignment is not supported.r   )r%   targets	TypeErrorr   r   r?   rP   s     rB   visit_AssignzDependenciesFinder.visit_Assign   sG    t||!
 PQQT\\!_- 	4 rC   c                \    | j                  |j                         | j                  |       y rE   r   r   r   r   s     rB   visit_AnnAssignz"DependenciesFinder.visit_AnnAssign   $    T[[) 	4 rC   c                \    | j                  |j                         | j                  |       y rE   r   r   s     rB   	visit_ForzDependenciesFinder.visit_For  r   rC   )returnNoneru   )rY   rJ   __qualname____doc__r1   propertyrH   rR   rc   rl   r|   r   r   r   r   r   r   r   r   __classcell__rA   s   @rB   r!   r!   "   s`    	)0V ' '05$%N06
	!
&@)!!!rC   r!   c                    dd l mc m} t        | t              r| j                         } | j                  d      r7| j                  d      } t        |       } | j                  d      sJ d| dd  z   S | j                  d      rdt        | d d       z   S | j                  d      rdt        | dd        z   S | j                  d      rt        | j                  d            S t        | |j                        rdt        | j                         S t        | |j                        r| j                  } n(t        | t              r| j                  } nt	        |       } t!        j"                  | j%                  d	d
      |       S )Nr   zconst const**kr   ztl._trK   )triton.language.corelanguagecorer-   r\   striprO   removeprefix_normalize_tyendswithpointer_type
element_tydtyper2   rj   rY   r   rv   replace)tyr   s     rB   r   r     s?   ''"cXXZ==")Br"B==%%%"QR&= ;;sr#2w///==r!"v...== !788	B))	*=/011	B

	#WW	B	[[W%))"**T2*>CCrC   c                      e Zd ZdZ	 	 ddZed        Zedd       Zedd       Zed        Z	ed        Z
ed        Zed	        Zy
)KernelParamzBRepresents a parameter (name plus metadata) to a @jit'ed function.c                <    || _         || _        || _        || _        y rE   )num_paramdo_not_specializedo_not_specialize_on_alignment)r?   r   paramr   r   s        rB   r1   zKernelParam.__init__.  s     !2.L+rC   c                .    | j                   j                  S rE   )r   r2   rG   s    rB   r2   zKernelParam.name5  s    {{rC   c                    | j                   j                  r1| j                   j                  t        j                  j                  k(  ryt        | j                   j                        S )NrK   )r   
annotationrL   	Parameteremptyr   rG   s    rB   r   zKernelParam.annotation9  sD    {{%%)?)?7CTCTCZCZ)ZT[[3344rC   c                    | j                   }|j                  d      r|dd  }n|j                  d      r|dd  }|t        t        j                               v r| j                   S y)Nr   r   r   r   rK   )r   rO   r   r   values)r?   as     rB   annotation_typezKernelParam.annotation_type?  s]    OO<<!"A\\#!"A.55788??"rC   c                    d| j                   v S Nrf   )r   rG   s    rB   is_constexprzKernelParam.is_constexprJ  s    doo--rC   c                r    | j                   ryd| j                  v xs | j                  j                  d      S )NFr   r   )r   r   rO   rG   s    rB   is_constzKernelParam.is_constN  s1    $//)MT__-G-G-MMrC   c                .    | j                   j                  S rE   )r   defaultrG   s    rB   r   zKernelParam.defaultT  s    {{"""rC   c                d    | j                   j                  t        j                  j                  k7  S rE   )r   r   rL   r   r   rG   s    rB   has_defaultzKernelParam.has_defaultX  s#    {{""g&7&7&=&===rC   N)r   r$   r   zinspect.Parameterr   boolr   r   r   r\   )rY   rJ   r   r   r1   r   r2   r   r   r   r   r   r   r    rC   rB   r   r   +  s    LM15M     5 5
   . . N N
 # # > >rC   r   c                6    d}d}t        t        | |||      d   S )NFTr   )r   r   )r   
specializer   aligns       rB   mangle_typer   ]  s%    HE!+sHj%PQRSSrC   c                  ,    e Zd ZU ded<   d Zd ZddZy)KernelInterfacer   runc               \     | j                   t        t        j                  |      |dd|S )NTgridwarmup)r   map
MockTensor
wrap_dtype)r?   r   r   kwargss       rB   r   zKernelInterface.warmupf  s*    txxZ5J5JD1QT$\U[\\rC   c                   t        d      )Nzrun not implemented)NotImplementedError)r?   r   r   r   r   s        rB   r   zKernelInterface.runi  s    !"788rC   c                      fdS )z
        A JIT function is launched with: fn[grid](*args, **kwargs).
        Hence JITFunction.__getitem__ returns a callable proxy that
        memorizes the grid.
        c                 .     j                   | dd|S )NFr   )r   )r   r   r   r?   s     rB   <lambda>z-KernelInterface.__getitem__.<locals>.<lambda>r  s    xtxx$T%'YRX'Y rC   r   )r?   r   s   ``rB   __getitem__zKernelInterface.__getitem__l  s     ZYrC   N)r   r   )rY   rJ   r   __annotations__r   r   r   r   rC   rB   r   r   c  s    	
F]9ZrC   r   c           
     F   |j                         D ci c]S  \  }}||j                  j                  dk(  rt        |      n(|j                  j                  dk(  rd|j                  in|U }}}dd l}| ||j                         D cg c]  }t        |       c}t        |j                               |j                         D cg c]  }t        |       c}t        |j                               |j                  d}	|j                  |	      }
|
S c c}}w c c}w c c}w )Nr   rf   r   )r2   	signatureconstant_keysconstant_vals
attrs_keys
attrs_valsoptionskey)itemsrA   rY   r\   r   jsonrW   r(   r   __dict__dumps)r2   r   	constantsattrsr   r   r   r   xobjserialized_objs              rB   serialize_specialization_datar  v  s     $//+ C 	5??33w>SZ&+oo&>&>+&Mekk"SX	YI  9QZQ_Q_Qa?bAQ?bY %**,0OQa0O_cdidpdpdr_s##CC
 ZZ_N @c0Os   ADDDc                    t        | j                        t        |      k(  sJ g }t        | j                  j                         |      D ]  \  }}|j                  r|j                  d| d       )|j                  rdnd}|j                  rdnd}|j                  rdnd}d| d| d| d| d	}	|j                  rt        |j                  t              r"|j                  dk(  s|j                  dd	 d
v rd}|r#|j                  d|j                   d|	 d       |j                  d|j                   d       |j                  |	        	 d }
ddj                  t        t        |
| j                  j                                     dgz          ddj                  | j                  j                         D cg c]
  }d| d|  c}       ddj                  |       d}| j                  j                         D ci c];  \  }}|j                   t"        j$                  j&                  urd| |j                   = }}}t(        }||d<   ||d<   t*        |d<   t-        ||       |d   S c c}w c c}}w )a2  
    Equivalent to sig.bind followed by apply_defaults. This generates a
    native Python function (using exec) which can be memoized on a per-kernel
    basis to avoid having to run these expensive functions -- which constitute
    much of the kernel launch overhead -- every time we run the kernel.
    z("constexpr", )TrueFalsezspecialize_impl(backend, , u1Nr   )fpbfFz("z",) + z[1:]z", None)c                x    | d   j                   t        j                  j                  u r| d   S | d    d| d    S )Nr   r   z	=default_)r   rL   r   r   )r  s    rB   r   z0create_function_from_signature.<locals>.<lambda>  sA    AaDLLG,=,=,C,CCAaD AaD6QZ[\]^[_Z`Ia rC   z
def dynamic_func(z	**optionsz):
    params = {'z': z}
    specialization = [,z-]
    return params, specialization, options
default_specialize_implbackendrV   dynamic_func)r%   
parametersziprW   r   appendr   r   r   r   r-   r\   joinr(   r   r   r   rL   r   r   r   rV   exec)sigkparamsr  specializationr2   kpr   r   r   rH   r   	func_bodyr   func_namespacer  s                  rB   create_function_from_signaturer"    s    s~~#g,...N++-w7 0b??!!N4&":;!#v'H$&$8$8fJ!@@GfE-dV2hZr*RPUwVWXC!!b00#6))T1R5G5G5K|5[%*
"))Br/A/A.B&T*RS #))Br/A/A.B(*KL%%/'0, bC))DS#..*>*>*@!ABk]RST U		3>>;N;N;PQ4QtfCv.QRS Txx/0 1I >>//1D%== 1 1 7 77 4&5==(N  -O(7N$% 'N9$/N=! 	N# .))+ Rs    I5A I:c                8    | j                    d| j                   S )N.)rJ   r   fns    rB   get_full_namer'    s    mm_Aboo.//rC   c                  l    e Zd Zd Zd Zedd       Zd Zd Zed        Z	d Z
d Zd	 Z eee
      Zy)rV   c                   || _         t        j                  |      | _        	 t        j                  |      \  | _        | _        t        |      | _	        t        j                         | _        t        j                  dj                  | j                              }|t!        j"                  d|t         j$                        j'                         d  }|| _        d | _        i | _        |j.                  | _        |j0                  | _        |j2                  | _        |j4                  | _        |j6                  | _        y # t        $ r}t        d      |d }~ww xY w)Nz1@jit functions should be defined in a Python filerK   z^def\s+\w+\s*\()r&  rL   r   getsourcelinesraw_srcstarting_line_numberOSError
ValueErrorr'  _fn_name	threadingRLock
_hash_locktextwrapdedentr  research	MULTILINEstart_srchashr=   r   rY   r   __globals__rJ   )r?   r&  er@   s       rB   r1   zJITCallable.__init__  s    **2.	Y6=6L6LR6P3DL$3 &b)#//+ oobggdll34")).R\\BHHJKL		 TV zzOO>>--7  	YPQWXX	Ys   "D= =	EEEc                n    | j                   t        j                  | j                        j                  z  S rE   )r;  rL   getclosurevarsr&  r8   rG   s    rB   get_capture_scopezJITCallable.get_capture_scope  s(    '"8"8"A"K"KKKrC   c                   | j                   5  | j                  | j                  cd d d        S d| j                   | _        t        j                  | j
                        j                  }t        | j                  | j                  || j                        }|j                  | j                                |j                  t        | j                        z   | _        t        t!        |j"                  j%                                     | _        ddlm} | xj                  t        | j"                  j%                         D cg c]  \  \  }}\  }}t+        ||      r||f c}}}      z  c_        t-        j.                  | j                  j1                  d            j3                         | _        d d d        | j                  S c c}}}w # 1 sw Y   | j                  S xY w)Nz
recursion:)r2   r7   r8   r@   r   re   r#   )r2  r:  r/  rL   r>  r&  r8   r!   r;  r@   r   parserH   r\   r,  dictsortedr=   r   r   rf   r-   r3   r4   r5   rF   )r?   r8   dependencies_finderrf   r2   r_   rp   s          rB   r[   zJITCallable.cache_key  s    __ 	Nyy$yy	N 	N
 %T]]O4DI..tww7AAI"4$--QUQaQamv9=#C%%djjl3+//#d6O6O2PPDI$(0C0T0T0Z0Z0\)]$^D!6II9=9N9N9T9T9V= ="5)4Xc1!+C!;  $Sk = > >I  tyy'7'7'@AKKMDI#	N$ yy	=	N$ yys$   GDG G
%AG
GG%c                ,    t        | j                        S rE   )r:  r[   rG   s    rB   __hash__zJITCallable.__hash__  s    DNN##rC   c                   t        j                  | j                        }t        |t         j                        sJ t        |j                        dk(  sJ t        |j                  d   t         j                        sJ |S )Nr   r   )ry   rA  r9  r-   Moduler%   bodyFunctionDef)r?   trees     rB   rA  zJITCallable.parse  s_    yy#$

+++499~"""$))A,888rC   c                    ddl m}  ||       S )Nr   )constexpr_type)r   rM  )r?   rM  s     rB   rj   zJITCallable.type  s    7d##rC   c                     d| _         || _        y)a"  
        The only method allowed to modify src.
        Bypasses the __setattr__ restriction by calling super().__setattr__ directly.

        Note that it is the callers responsibility to make sure any triton functions that call this function have the `.hash` value reset to None.
        N)r:  r9  )r?   new_srcs     rB   _unsafe_update_srczJITCallable._unsafe_update_src  s     		rC   c                    t        d      )NzqCannot set attribute 'src' directly. Use '_unsafe_update_src()' and manually clear `.hash` of all callersinstead.)AttributeErrorrG   s    rB   _set_srczJITCallable._set_src!  s     ( ) 	)rC   c                    | j                   S rE   )r9  rG   s    rB   _get_srczJITCallable._get_src&  s    yyrC   )fgetfsetNr   )rY   rJ   r   r1   r?  r   r[   rF  rA  rj   rP  rS  rU  r@   r   rC   rB   rV   rV     s]     (DL  ,$ $ $)
 x
0CrC   rV   c                  ,    e Zd ZU ded<   ded<   ded<   y)JitFunctionInfor   rQ   r\   r2   JITFunctionjit_functionN)rY   rJ   r   r   r   rC   rB   rY  rY  ,  s    
IrC   rY  c                    t        |      t        |      f}| j                  |d       }||S fdt         |            t        |      z   }|| |<   |S )Nc                F   t        | t              r| D cg c]
  } |       c}S t        |       r$| D cg c]
  } |       }} | j                  | S t        | t              rt	        fd| D              S t        | t
              r| j                  S | S c c}w c c}w )Nc              3  .   K   | ]  } |        y wrE   r   ).0r   replace_callabless     rB   	<genexpr>z?compute_cache_key.<locals>.replace_callables.<locals>.<genexpr>A  s     ?C*3/?s   )r-   r(   r   rA   tuplerV   r[   )r  r   resultsr`  s      rB   r`  z,compute_cache_key.<locals>.replace_callables:  s    c4 69:s%c*::39<=#(-=G= 3=='**U#?3???[)== 
 ;=s
   BB)rb  r\   rv   )kernel_key_cacher  r   r   r[   r`  s        @rB   compute_cache_keyre  3  se     #g,
/C $$S$/I
 %n56WEI%SrC   c                ~    t        | t              s| S t        |       D ]  \  }}t        |      | |<    t	        |       S rE   )r-   r(   	enumerateconvert_to_tuple_if_listrb  )iteminested_values      rB   rh  rh  K  sD    dD! %T? 9<*<8Q9 ;rC   c                  l     e Zd Zd Z	 	 ddZd Zd Zd Zd Zd Z		 	 d fd	Z
d	 Zd
 Zd Zd Z xZS )rZ  c                     y)NFr   rG   s    rB   is_gluonzJITFunction.is_gluonY  s    rC   c	                   |sy | j                   j                  }	| j                   j                  }
dj                  t	        | j
                  |d         D cg c]  \  }}|j                   d|  c}}      }|	 d|j                   d|j                   d|j                   d|j                   d|j                   d	| d
}t        | j                         }t        ||||d   ||      }||||j                  |j                  |j                  |j                  |j                  |j                  |||d} |||t        |
|	|       d|i||d      S c c}}w )Nr  r   z: z[num_warps=z, num_ctas=z, num_stages=z, enable_fp_fusion=z, launch_cooperative_grid=](r	  r   )r   devicer  	num_warpsnum_ctas
num_stagesenable_fp_fusionlaunch_cooperative_gridextern_libsconfigsspecialization_data	is_warmupr   F)r   reprr&  compileis_manual_warmupalready_compiled)r&  r   rJ   r  r  paramsr2   rr  rs  rt  ru  rv  r'  r  rw  rY  )r?   hookr   r   rq  r  r   rx  rz  r2   rQ   r   r   	arg_reprsr{  	full_namery  r   s                     rB   
_call_hookzJITFunction._call_hook\  s    ww####IIc$++WZ[\W]F^_%**Rt4_`	{7#4#4"5[AQAQ@RR_`g`r`r_s  tG  HO  H`  H`  Ga  a{  |C  |[  |[  {\  \^  _h  ^i  ij  k!$''*	;IyR[]def]gipruv #" **((!,, ' 8 8'.'F'F"..#6"
 vtT2C*6*&"
 	
+ `s   E
c                T    t        |      sJ | j                  j                  |       y)z
        Add a hook that will be executed prior to the execution of run
        function with args and kwargs passed into the kernel
        N)rm   pre_run_hooksr  )r?   r  s     rB   add_pre_run_hookzJITFunction.add_pre_run_hook  s$    
 ~~!!$'rC   c                    ddl m}m}m}m} t
        j                  j                         } ||      }|| _        || _        || _        t        | j                  | j                  |      }i i |||fS )z1
        Precompute as much as possible.
        r   )CompiledKernelr|  	ASTSourcemake_backend)compilerr  r|  r  r  r   activeget_current_targetr"  r   r  )r?   r  r|  r  r  r   r  binders           rB   create_binderzJITFunction.create_binder  sd     	PO113v&,"/WU2vw..rC   c           
        |j                  |      }| j                  D cg c]  }|j                   }}|D cg c]  }|d   	 }}t        ||      D 	
ci c]  \  }	}
|	|

 }}	}
d|vsJ d       d|vsJ d       d|vsJ d       |D ]#  }	|	|j                  vs|	|vst        d|	z         t        |d	       }|D ci c]&  }|t        t        |j                               |      ( }}|D cg c]  }|d
   	 }}t        |d       }|D 	ci c]  }	|	|j                  t        ||	              }}	||||fS c c}w c c}w c c}
}	w c c}w c c}w c c}	w )Nr   device_typez=device_type option is deprecated; current target will be usedrq  z8device option is deprecated; current device will be usedstreamz8stream option is deprecated; current stream will be usedz2Keyword argument %s was specified but unrecognisedc                    |dk(  S r   r   )r_   rp   s     rB   r   z(JITFunction._pack_args.<locals>.<lambda>  s    3+;M rC   r   c                "    t        |t              S rE   )r-   r\   )r_   r  s     rB   r   z(JITFunction._pack_args.<locals>.<lambda>  s    Z35G rC   )parse_optionsr  r2   r  r   KeyErrorr   r   r(   r   
parse_attr)r?   r  r   
bound_argsr  r   r  sigkeyssigvalsr]   vr   
constexprspathattrvalsr  s                   rB   
_pack_argszJITFunction._pack_args  s   ''/#';;/a166//!/0A1Q400(+GW(=>fq!QT>	>F*k,kk*v%a'aa%v%a'aa% 	YA(((Qg-=SVWWXX	Y #7,MN
[efSWd-d:3D3D3F.GNNf
f"01QAaD11h(GHPUV1G&&'81'EFFVV	:u44% 00> g1Vs"   D;E E +E1E#Ec                  |j                  d| j                        xs t        j                  j                  |d<   t        j                  j
                  |d<   t        j                  j                         }t        j                  j                  |      }| j                  D ]
  } ||i |  | j                  |   \  }}	}
}} ||i |\  }}}t        |	||      }|j                  |d       }|4| j                  |||||      \  }}}}| j                  |||||||      }|y t               }| j                   j#                         D ]6  \  \  }}\  }}|j                  ||      x}|k7  s$t%        d| d| d|        |s|J t'        |      r ||      }t)        |      }|d   }|dkD  r|d   nd}|dkD  r|d   nd}t+        |d	      r|j-                         } |j.                  ||g|j1                          } |j2                  |||||j4                  |j6                  |t        j                  j8                  t        j                  j:                  g	|j1                           |S )
Ndebuginstrumentation_moderT   z1 has changed since we compiled this kernel, from z to r   r   r   result)rv   r  r   runtimecompilationr  r   r  get_current_deviceget_current_streamr  device_cachesre  r  _do_compileobjectr=   r   rX   rm   r%   hasattrr  launch_metadatar   r   functionpacked_metadatalaunch_enter_hooklaunch_exit_hook) r?   r   r   r   r   rq  r  r  kernel_cacherd  r   r  r  r  r  r   r   kernelr   r  r  not_presentr2   r_   rp   globals_dictnewVal	grid_sizegrid_0grid_1grid_2r  s                                    rB   r   zJITFunction.run  s    **Wdjj9PU]]=P=Pw).):):)O)O%& 11311&9 && 	"D$!&!	" CGBTBTU[B\?& /5d.Ef.E+
NG 0.'J!!#t, >48OOGVU_aoDK5M1GY
E %%c9fj'SXZ`aF~ h.2.C.C.I.I.K 	q*IT1*\&**4==#E"&tf,]^a]bbfgmfnoq q	q
 ###~J'D	I!WF )AT!W1F )AT!W1Fvx(4f44T6XJDUDUDWXOFJJvvvvvH^H^`o}}668V8VnYcYjYjYlnrC   c                T    | j                   | j                  S | j                  |      S rE   )_reprr/  )r?   r_   s     rB   r{  zJITFunction.repr  s"     $

 2t}}E

1ErC   c	           	        |r|ng }|r|ng }t         |   |       |j                  | _        || _        || _        || _        || _        || _        g | _	        t        | j                  j                  j                               D ]T  \  }	}
|	|v xs |
j                  |v }|	|v xs |
j                  |v }| j                  j                  t!        |	|
||             V t#        | j$                        | _        d | _        || _        || _        | j                  D cg c]  }|j                   c}| _        | j                  D cg c]  }|j0                  s|j2                   c}| _        g | _        y c c}w c c}w rE   )r0   r1   rJ   rQ   versionr   r   r  r  r  rg  r   r  r   r2   r  r   r   r  r  r  r  rU   	arg_namesr   r   r  r  )r?   r&  r  r   r   r  rU   r{  r  rj  r   dnsdns_oaprA   s                 rB   r1   zJITFunction.__init__  sW   1B-Ki)Goq&mm!2.L+
.!$..";";"B"B"DE 	CHAu((KEJJ:K,KC88hEJJJh<hFKK{1eS&AB	C )););< 
  +/++6Q!&&6*.++HQ155H  	 7Hs   E2E7E7c           	        dd l }dd lm} t        j                  j                         }|j                  |      }|d   | j                  k7  rt        d|d    d| j                         t        t        |d         }|d   }t        ||      D 	ci c]\  \  }}	||j                  j                  |	      r|j                  |	      n)t        |	t              rd|	v r|j!                  |	d         n|	^ }
}}	t        t        |d         }|d	   }t        t        ||            }|d
   j#                         D 	ci c]  \  }}	|t%        |	       }}}	|d   j#                         D 	ci c]#  \  }}	|t        |	t&              rt        |	      n|	% }}}	|d   }| j(                  |   \  }}}}}|j+                  |      }| j-                  ||||
||d      S c c}	}w c c}	}w c c}	}w )Nr   r2   zSpecialization data is for z but trying to preload for r   r   rf   r   r   r   r   r   T)r   )r   triton.languager   r   r  r  loadsr/  rX   r   rb  r  r   is_dtyper-   rB  rf   r   rh  r(   r  r  r  )r?   ry  r   tlrq  deserialized_objr   r   r   r   r  r   r   r  r   r   r_   r  s                     rB   preloadzJITFunction.preload  s   $113::&9:F#t}}4-.>v.F-GGbcgcpcpbqrt tE#3O#DE(9
 "-?	
 U !xx007BHHUO0:5$0GK[`L`BLL{+,fkl

 
  0 >?
%l3
SZ01 M]]hLiLoLoLqrjc5S2599r	r /y9??A
U E4!8ueC
 
 u%"0081a!''0   
 	
'
 s
s   A!G8G'(G!c           
     B     j                      \  }}	} j                  t        j                  j                  g      ry  j                         t        j                  j                         }
|
Ct               t        |	      } fd} f	d}|
j                  |||      }|S  j                  j                        }|<    j                  t        j                  j                  g       |S )Nc                 B    j                  j                         S )N)r   r   	_env_vars)r|  r   )env_varsr   r?   r@   r   s   rB   async_compilez.JITFunction._do_compile.<locals>.async_compileG  s!    ||C@P@P\d|eerC   c           
     r   	 | <   j                  t        j                  j                  g	       y rE   )r  r   r  jit_post_compile_hook)
r  r  r  rq  r  r   r   r?   r   r   s
    rB   finalize_compilez1JITFunction._do_compile.<locals>.finalize_compileJ  s9    $*S! C CS)U[]gip!&1rC   )r   r   )r  r  r   r  jit_cache_hookr  r   active_moderv   r   r   submitr|  r   r  )r?   r   r   rq  r  r   r  r   r_   r  
async_moder[   r  r  r  r  r  r@   r   s   ````````       @@@@rB   r  zJITFunction._do_compile:  s    .2.@.@.H+a!??5==77iQ[]dglfmouvnnT9j%@#//335
!68H%c7GXFIf f1 1
  &&y-AQRF 	 \\#fg>N>N\OF &LOOEMM??iQWYcelotnu"$rC   c                    t        d      )Nz:Cannot call @triton.jit'd outside of the scope of a kernel)rX   r?   r   r   s      rB   __call__zJITFunction.__call__W  s    WXXrC   c                P    d| j                    d| j                  j                   dS )NzJITFunction(:r	  )rQ   r&  r   rG   s    rB   __repr__zJITFunction.__repr__Z  s&    dkk]!DGG,@,@+ACCrC   )r   zbool | None)NNNNNNN)rY   rJ   r   rn  r  r  r  r  r   r{  r1   r  r  r  r  r   r   s   @rB   rZ  rZ  W  sZ    ,
 
,
\(/503jF mq;?" H%
N:YDrC   rZ  c                     y rE   r   r%  s    rB   jitr  c  s    rC   r  r{  r  r   r   r  rU   c                     y rE   r   r  s          rB   r  r  h  s     rC   c               @    dfd}|  ||       S |S )a<  
    Decorator for JIT-compiling a function using the Triton compiler.

    :note: When a jit'd function is called, arguments are
        implicitly converted to pointers if they have a :code:`.data_ptr()` method
        and a `.dtype` attribute.

    :note: This function will be compiled and run on the GPU. It will only have access to:

           * python primitives,
           * builtins within the triton package,
           * arguments to this function,
           * other jit'd functions

    :param fn: the function to be jit-compiled
    :type fn: Callable
    c           
         t        |       sJ t        j                  j                  rddlm}  ||       S t        |       S )Nr   )InterpretedFunction)r  r   r   r  rU   r{  r  )rm   r   r  	interpretinterpreterr  rZ  )	r&  r  r  r   r   r  rU   r{  r  s	     rB   	decoratorzjit.<locals>.decorator  sj    ||==""8&r7N_Fdlq08tUdf f "3/M! /	 	rC   r&  r   r   zJITFunction[T]r   )	r&  r  r{  r  r   r   r  rU   r  s	    ``````` rB   r  r  v  s&    : & 
~} rC   c                  N    e Zd ZdZed        ZddZd Zed        Zed        Z	y)	r   zr
    Can be used in place of real tensors when calling:
        kernel.warmup(MockTensor(torch.float32), ...)
    c                l    | j                   j                  dk(  r| j                  dk(  rt        |       S | S )Nr   torch)rA   rY   rJ   r   )r   s    rB   r   zMockTensor.wrap_dtype  s.    ==!!W,71Jc?"
rC   Nc                *    |dg}|| _         || _        y )Nr   )r   shape)r?   r   r  s      rB   r1   zMockTensor.__init__  s    =CE

rC   c                    dg}| j                   dd  D ]  }|j                  |d   |z          t        t        |            S )Nr   r   )r  r  rb  reversed)r?   stridessizes      rB   stridezMockTensor.stride  sG    #JJqrN 	/DNN72;-.	/Xg&''rC   c                      yNr   r   r   rC   rB   data_ptrzMockTensor.data_ptr      rC   c                      yr  r   r   rC   rB   	ptr_rangezMockTensor.ptr_range  r  rC   rE   )
rY   rJ   r   r   staticmethodr   r1   r  r  r  r   rC   rB   r   r     sM    
  
(    rC   r   c                  J    e Zd Zd Zd Zd ZddZd Zd Zd Z	d Z
d	 Zd
 Zy)TensorWrapperc                    || _         || _        |j                  | _        |j                  | _        | j                  j                  | _        y rE   )r   basedatarq  r  )r?   r  r   s      rB   r1   zTensorWrapper.__init__  s5    
	II	kkYY__
rC   c                6    | j                   j                         S rE   )r  r  rG   s    rB   r  zTensorWrapper.data_ptr  s    yy!!##rC   c                4     | j                   j                  | S rE   )r  r  )r?   r   s     rB   r  zTensorWrapper.stride  s    tyy&&rC   c                <    d| j                    d| j                   dS )NzTensorWrapper[rp  r	  )r   r  rG   s    rB   __str__zTensorWrapper.__str__  s    

|2dii[::rC   c                6    | j                   j                         S rE   )r  element_sizerG   s    rB   r  zTensorWrapper.element_size  s    yy%%''rC   c                ^    t        | j                  j                         | j                        S rE   )r  r  cpur   rG   s    rB   r  zTensorWrapper.cpu  s    TYY]]_djj99rC   c                N    | j                   j                  |j                          y rE   )r  copy_)r?   others     rB   r  zTensorWrapper.copy_  s    		

#rC   c                ^    t        | j                  j                         | j                        S rE   )r  r  cloner   rG   s    rB   r  zTensorWrapper.clone  s    TYY__.

;;rC   c                `    t        | j                  j                  |      | j                        S rE   )r  r  tor   )r?   rq  s     rB   r
  zTensorWrapper.to  s     TYY\\&14::>>rC   c                `    t        | j                  j                  |      | j                        S rE   )r  r  	new_emptyr   )r?   sizess     rB   r  zTensorWrapper.new_empty  s"    TYY007DDrC   Nr   )rY   rJ   r   r1   r  r  r  r  r  r  r  r
  r  r   rC   rB   r  r    s5    %$';(:$<?ErC   r  c                    t        | t              r;|| j                  j                  k(  r| j                  S t        | j                  |      S t	        | d      rt        | |      S t        dt        |        d      )Nr  zCannot reinterpret a r$  )r-   r  r  r   r  r   rj   )tensorr   s     rB   reinterpretr    sk    &-(FKK%%%;; !e44		$VU++/V~Q?@@rC   c                L   | }t        |t              s|j                  }t        |t              s|j                  j                  j                  }|j
                  }t        |j                        D ].  \  }}|j                         j                  d      s&||z  } ||fS  ||fS )Nzdef )
r-   rV   r&  __code__co_filenamer,  rg  r+  r   rO   )r&  base_fn	file_name
begin_lineidxlines         rB   get_jit_fn_file_liner    s    G+.** +.

##//I--J w/ 	T::<""6*#Jj  	 j  rC   c                  (    e Zd Zd Zed        Zd Zy)BoundConstexprFunctionc                     || _         || _        y rE   )__self____func__)r?   instancer&  s      rB   r1   zBoundConstexprFunction.__init__  s     rC   c                .    | j                   j                  S rE   )r  r[   rG   s    rB   r[   z BoundConstexprFunction.cache_key  s    }}&&&rC   c                B     | j                   | j                  g|i |S rE   )r  r  r  s      rB   r  zBoundConstexprFunction.__call__#  s!    t}}T]]<T<V<<rC   N)rY   rJ   r   r1   r   r[   r  r   rC   rB   r  r    s      ' '=rC   r  c                  0     e Zd Z fdZd ZdddZ xZS )ConstexprFunctionc                $    t         |   |       y rE   )r0   r1   )r?   r&  rA   s     rB   r1   zConstexprFunction.__init__)  s    rC   c                "    |t        ||       S | S rE   )r  )r?   r  objclasss      rB   __get__zConstexprFunction.__get__,  s    ?)#t44rC   N)	_semanticc                  ddl m}m} |D cg c]
  } ||       }}|j                         D ci c]  \  }}| ||       }}} | j                  |i |}	||	S t
        j                  j                  r|	S  ||	      S c c}w c c}}w )Nr   )_unwrap_if_constexprrf   )r   r*  rf   r   r&  r   r  r  )
r?   r(  r   r   r*  rf   r  r]   r  ress
             rB   r  zConstexprFunction.__call__2  s    H156A$Q'66;A<<>J!Q!)!,,JJ dggt&v&J ==""J~ 7Js
   BB)rY   rJ   r   r1   r'  r  r   r   s   @rB   r#  r#  '  s     )-  rC   r#  c                    t        |       S )z
    Wraps an arbitrary Python function so that it can be called at
    compile-time on constexpr arguments in a Triton function and
    returns a constexpr result.
    )r#  r%  s    rB   constexpr_functionr-  E  s     R  rC   r   )Fr  )r{  Optional[Callable]r  r.  r   Optional[Iterable[int | str]]r   r/  r  Optional[bool]rU   r0  r   zCallable[[T], JITFunction[T]]rE   )r&  zOptional[T]r{  r.  r  r.  r   r/  r   r/  r  r0  rU   r0  r   zKernelInterface[T])F
__future__r   r   ry   r.   r3   rL   r   r0  r5  r3  collectionsr   dataclassesr   	functoolsr   typingr   r	   r
   r   r   r   r   r   r   triton.backendsr   typesr   rK   r   r   r   _utilsr   r   r   r   cacher   triton._C.libtritonr   r   r;   r:   r   NodeVisitorr!   r   r   r   r   r  r"  r'  rV   rY  re  rh  rZ  r  r   r  r  r  r  r#  r-  r   rC   rB   <module>r<     s0   , 
      	  # ! % ] ] ] '     ` `   W!3CLg! g!^D4/> />dTZgaj Z&"9*x0b1 b1J   0	DD+q1 DDX 
 
 
 #*.7;DH #
 
 (	

 5
 %B
 
 
 #
 

 4 #*.7;DH #44 	4
 (4 54 %B4 4 4 4x B"E "EJA!$=[ = <!rC   