
    -in                         d dl mZmZmZmZmZ d dlZd dlmZ ddl	m
Z ddlmZmZmZmZmZmZ d dlmZmZ d dlmZmZ  ed	      Zefd
edeg ef   fdZd ZddZde_          G d d      Z! G d dee         Z"y)    )SequenceListTypeVarTupleCallableN)TritonSemantic   )_core)
AutoLayoutDistributedLayoutDistributedLinearLayoutSliceLayoutSharedLayoutCoalescedLayout)GluonOpBuildercompute_tmem_reg_layout)flatten_values_to_irunflatten_ir_valuesTensorTycondmsg_fnc                 "    | s | |             y N )r   r   categorys      v/home/obispo/Crisostomo_bridge/mision_env/lib/python3.12/site-packages/triton/experimental/gluon/language/_semantic.py_checkr      s    vx       c                 J    t        | t              xr t        d | D              S )Nc              3   <   K   | ]  }t        |t                y wr   
isinstanceint).0is     r   	<genexpr>z_is_int_list.<locals>.<genexpr>   s     .Qaz!S/A.Q   )r"   r   allvalues    r   _is_int_listr+      s    eX&Q3.Q5.Q+QQr   c                 @   t        t        t              d        t        dv fd       t        t        t              fd       t        dk\  xr dz
  z  dk(  d        t	              t        t        d	 D              fd
       t              }t        |dk(  d        |g }dk(  }|rdn|r |D ]  }t        t        |      |k(  d         t        | ||      t        d ufd       |rd   }	j                  sWt        j                  d   d|	dz  gk(  fd       j                  j                  d|	dz  g       ddgj                  d<   S j                  d   d|	dz  gk7  r| j                  dt        j                        z  }
t        |
dz  kD  fd       j                  }dD ]B  }t        |      }t        |      D ]&  \  }}|d|	dz  gk(  s||   |d   c|d<   ||<   c c S  D J d|        S )Nc                       y)Nzinstr_variant must be a stringr   r   r   r   <lambda>z*_compute_tmem_reg_layout.<locals>.<lambda>       r   )32x32b16x64b16x128b16x256b16x32bx232x32b_splitnc                      d  S )Nzunknown instr_variant: r   )instr_variants   r   r.   z*_compute_tmem_reg_layout.<locals>.<lambda>   s    ,]O< r   c                       dt               S )Nz!num_warps must be an int but got type	num_warpss   r   r.   z*_compute_tmem_reg_layout.<locals>.<lambda>   s    1RSWXaSbRe/f r      r	   r   c                       y)Nz)num_warps must be a power of two and >= 4r   r   r   r   r.   z*_compute_tmem_reg_layout.<locals>.<lambda>   r/   r   c              3   <   K   | ]  }t        |t                y wr   r!   )r$   dims     r   r&   z+_compute_tmem_reg_layout.<locals>.<genexpr>   s     5z#s#5r'   c                      d  S )Nz#shape entries must be ints but got r   shapes   r   r.   z*_compute_tmem_reg_layout.<locals>.<lambda>   s    Adejdk?l r      c                       y)Nzexpected a 2D tensorr   r   r   r   r.   z*_compute_tmem_reg_layout.<locals>.<lambda>   r/   r   r5   r0   c                       y)Nzcga_layout basis rank mismatchr   r   r   r   r.   z*_compute_tmem_reg_layout.<locals>.<lambda>(   r/   r   c                      d  d d S )NzTMEM layout 'z' unsupported for shape z and num_warps r   )atom_variantr<   rC   s   r   r.   z*_compute_tmem_reg_layout.<locals>.<lambda>3   s    ]<.0H_h^ij r   c                      d  S )NzJsplitn with 1 register requires the last lane basis to be [0, N / 2]. Got r   )
layout_objs   r   r.   z*_compute_tmem_reg_layout.<locals>.<lambda>;   s    ghrgst r       c                      dd z   d  dS )NzETo be able to `tmem.load` into `tl.split` you need to have more than rL    z-bit registers, as you need to use the instruction 32x32b.x1 twice. You can always load into instr_variant="32x32b" and then convert_layout to this layout otherwise.r   )bitwidths   r   r.   z*_compute_tmem_reg_layout.<locals>.<lambda>B   s&     3h'q
 3]3] r   )
lane_bases
warp_basesz6splitn requires at least one basis of [0, N / 2]. Got )r   r"   strr#   listr(   lenr   	reg_basesrP   appendprimitive_bitwidthgetattr	enumerate)
element_tyrC   layoutr<   r7   
cga_layoutranksplitnbasisNnum_regrU   	bases_strbasesr%   rH   rO   rK   s    ` ``          @@@r   _compute_tmem_reg_layoutrd      sj   
:mS)+ST
=cc<>
:i%'fg
9>@yIM:q@BuvKE
35u557lmu:D
41945
o-F%8=L 	QE3u:%'OP	Q )J :T!jl !H## :((,AF;tv  ''AF4)*AJ!!"%$ # !!"%!Q!V4!44HZ1122G".( +]^ #,,I9 *	
I6 )% 0 *HAuAF+27(IbM/	"uQx))** \RSYRZ[[5r   Tc                   $    e Zd ZdefdZd Zd Zy)GluonCallerContextr<   c                     || _         y r   r;   )selfr<   s     r   __init__zGluonCallerContext.__init__W   s	    "r   c                      d| j                    S )N_NWr;   rh   s    r   manglezGluonCallerContext.mangleZ   s    T^^$%%r   c                 Z    |j                  d|j                  | j                               y )Nzttg.num-warps)set_attrget_int32_attrr<   )rh   fnbuilders      r   initialize_calleez$GluonCallerContext.initialize_callee]   s    
OW%;%;DNN%KLr   N)__name__
__module____qualname__r#   ri   rm   rs   r   r   r   rf   rf   U   s    ## #&Mr   rf   c            
       p    e Zd ZU ej                  ZeZeed<   defdZd Z	d Z
dee   dee   fdZded	ed
efdZdeded
ef fdZded
eeef   f fdZdedee   d
ef fdZdedee   d
efdZdeded
ef fdZ fdZdedee   def fdZd Zd Zd>dZd Zd Zd  Zd! Zd" Z d# Z!d$ Z"d% Z#d& Z$d' Z%d( Z&d) Z'd* Z(e)d+        Z*d,e+e   d	ed-ed
eed.f   fd/Z,d,e+e   d	ed
eed.f   fd0Z-ded1ed2ed
efd3Z.dededed
efd4Z/d5ed6ed	ed
efd7Z0d5ed
efd8Z1d9e+e   d:e+e   fd;Z2d< Z3d= Z4 xZ5S )?GluonSemanticrr   c                     || _         y r   )rr   )rh   rr   s     r   ri   zGluonSemantic.__init__g   s	    r   c                     |g k(  r|}n0t        j                  ||| j                  j                  |            }| j	                  ||      S r   )ttgldistributed_typerr   get_gluon_layout_from_tensortensor)rh   handle	scalar_tyrC   tys        r   _wrap_handle_infer_layoutz'GluonSemantic._wrap_handle_infer_layoutj   sC    B;B&&y%9b9bci9jkB{{62&&r   c                 x    | j                  |j                  |j                  j                  |j                        S r   )r   r   r:   scalarrC   )rh   r~   s     r   _wrap_tensor_infer_layoutz'GluonSemantic._wrap_tensor_infer_layoutq   s,    --fmmV[[=O=OQWQ]Q]^^r   	lhs_shape	rhs_shapec                 X   t        |      t        |      k7  rt        d| d|       g }t        |      D ]q  \  }}||   }|dk(  r|j                  |       "|dk(  s||k(  r|j                  |       >t        dt	        |      z   dz   t	        |      z   dz   t	        |      z          |S )N!Cannot broadcast, rank mismatch: , r	   z?Cannot make_shape_compatible: incompatible dimensions at index : z and )rT   
ValueErrorrY   rV   rR   )rh   r   r   	ret_shaper%   leftrights          r   _broadcast_shapeszGluonSemantic._broadcast_shapest   s    y>S^+@2i[YZZ	 + 	aGAtaLEqy  '1*%4-  &  "-/21v"68<"=?B4y"IKR"SUXY^U_"` a a	a r   inputaxisreturnc                    j                   D cg c]  }t        j                  |       }}|j                  d       dk  rt	        j                         z  t        t        j                  t        j                        fd       j                  j                  t        t        t        t        t        f      fd       t        t        t        t        f      xs j                  k(  fd       | j                  j                  j                         }| j#                  |j                  j$                  |      S c c}w )Nr	   r   c                  "    d j                   S Nz=expected expand_dims input to be a distributed_type but got: r9   r   s   r   r.   z+GluonSemantic.expand_dims.<locals>.<lambda>       VW\WaWaVde r   c                      d  S )Nz;expected expand_dims input to have a SliceLayout, but got: r   r[   s   r   r.   z+GluonSemantic.expand_dims.<locals>.<lambda>   s    TU[T\] r   c                  (    d  dj                    S )Nz7expected expand_dims input layout to be sliced in axis z	 but got r@   )r   r[   s   r   r.   z+GluonSemantic.expand_dims.<locals>.<lambda>   s    MdVS\]c]g]g\hi r   )rC   r{   _unwrap_if_constexprinsertrT   r   r"   r:   r|   r[   r   r   r   r@   rr   create_expand_dimsr   r   r   )rh   r   r   x	dst_shaper   r[   s    ``   @r   expand_dimszGluonSemantic.expand_dims   s   ;@;;GaT..q1G	Gq!!8C$$Dz%**d&;&;<e	g""z&;
O"LM]	_v
O<=StASi	k 00tD--fejj6G6GSS! Hs   Eabc                     | j                  ||      \  }}t        |j                  g k7  d        t        |   ||      }| j                  |      S )Nc                       y)NzCannot join scalars in gluonr   r   r   r   r.   z$GluonSemantic.join.<locals>.<lambda>   r/   r   )broadcast_impl_valuer   rC   superjoinr   )rh   r   r   r*   	__class__s       r   r   zGluonSemantic.join   sM    ((A.1qww"}DEQ"--e44r   c                 l    t         |   |      \  }}| j                  |      | j                  |      fS r   )r   splitr   )rh   r   lhsrhsr   s       r   r   zGluonSemantic.split   s7    7=#S--c2D4R4RSV4WWWr   dimsc                 F    t         |   ||      }| j                  |      S r   )r   permuter   )rh   r   r   r*   r   s       r   r   zGluonSemantic.permute   s$    t,--e44r   rC   c                    t        t        j                  t        j                        fd       j                  j                         t        t              t              k(  fd       k(  rS t              D ]0  \  }}|   |k7  s|dk7  st        d|    d| d| d d 
       t        j                  j                  j                  j                  j                        }| j                  j                  j                  |j                  | j                              }| j                  ||      S )	Nc                  "    d j                   S r   r9   r   s   r   r.   z4GluonSemantic.broadcast_impl_shape.<locals>.<lambda>   r   r   c                      d d  S )Nr   r   r   )rC   	src_shapes   r   r.   z4GluonSemantic.broadcast_impl_shape.<locals>.<lambda>   s    7XYbXccefkel5m r   r	   z3Cannot broadcast, the expanded size of the tensor (z ) must match the existing size (z) at non-singleton dimension r   r   )r   r"   r:   r{   r|   get_block_shapesrT   rY   r   r   r[   rr   create_broadcastr   to_irr~   )rh   r   rC   r%   itemret_tyr   r   s    ``    @r   broadcast_impl_shapez"GluonSemantic.broadcast_impl_shape   s-   z%**d&;&;<e	gJJ//1	s9~U+-mnIL + 	@GAtQx4DAI #VW\]^W_V` aCCG& I%%&Cr)Bug"? @ @	@
 &&uzz'8'8%ARARS..u||V\\$,,=WX{{66**r   r   r   c                 P  	 |j                   |j                   	j                         r	j                         st        
|   ||      S t	        t        t        j                        fd       t	        t        	t        j                        	fd       j                         }	j                         }| j                  ||      }t        j                  t              }t        	j                  t              }|r|s| j                  |	j                        }n_|r|s| j                  |j                        }n>j                  	j                  k7  r%t        dj                   d	j                         | j                  ||      }| j                  ||      }||fS )Nc                      d S )Nz@expected broadcast left input to be a distributed_type but got: r   )lhs_tys   r   r.   z4GluonSemantic.broadcast_impl_value.<locals>.<lambda>   s    YZ`Ycd r   c                      d S )NzAexpected broadcast right input to be a distributed_type but got: r   )rhs_tys   r   r.   z4GluonSemantic.broadcast_impl_value.<locals>.<lambda>   s    Z[aZde r   zLayout mismatch in broadcast: z vs )r:   is_blockr   r   r   r"   r{   r|   r   r   r[   r   set_auto_layoutr   r   )rh   r   r   r   r   r   is_lhs_autois_rhs_autor   r   r   s           @@r   r   z"GluonSemantic.broadcast_impl_value   sX    (97/S99z&$"7"78d	fz&$"7"78e	g ++-	++-	**9i@	 
; 
;{&&sFMM:C&&sFMM:C]]fmm+=fmm_DQWQ^Q^P_`aa''Y7''Y7Cxr   c                     ||z
  g}|
t               }t        j                  t        j                  ||      }t        |   |||      S )N)r   )r   r{   r|   int32r   arange)rh   startendr[   rC   r   r   s         r   r   zGluonSemantic.arange   sG    u>\F&&tzz5&Aw~eS~88r   r   can_reorderc                 d    t        | d        t        | 	  |||      }| j                  |      S )Nc                       y)Nz%can_reorder is not supported in gluonr   r   r   r   r.   z'GluonSemantic.reshape.<locals>.<lambda>   r/   r   )r   r   reshaper   )rh   r   r   r   r*   r   s        r   r   zGluonSemantic.reshape   s3    ; OPy+>--e44r   c                    t        |      dk(  r|S t        j                  |j                  ||      }| j                  j                  |j                  | j                        |j                        }t        j                  ||      S )Nr   )	rT   r{   r|   dtyperr   create_splatr   r   r~   )rh   r*   rC   r[   r   r   s         r   splatzGluonSemantic.splat   sb    u:?L&&u{{E6B**6<<+Eu||T{{66**r   c                 d    | j                  ||      }|
t               }| j                  |||      S r   )make_scalarr   r   )rh   rC   r*   r   r[   r   s         r   fullzGluonSemantic.full   s3    !!%/>\Fzz&%00r   c                    |j                   t        t        t        j                        fd       t        t        t        j
                        fd       t        j                  j                  j                        }|j                  | j                        }|r| j                  j                  ||j                        s_t        dj                   d d| j                  j                  j                         d| j                  j                               | j                  j                  ||j                        }t        j                   ||      S )Nc                      d S )Nz@expected convert_layout input to be a distributed_type but got: r   )r   s   r   r.   z.GluonSemantic.convert_layout.<locals>.<lambda>   s    YZ\Y_` r   c                      d  S Nz4expected 'layout' to be a DistributedLayout but got r   r   s   r   r.   z.GluonSemantic.convert_layout.<locals>.<lambda>       MfXV r   zlayout conversion from z to z) is not trivial.
The linear layouts are:

)r:   r   r"   r{   r|   r   rZ   rC   r   rr   is_convert_layout_trivialr   	TypeErrorr[   to_linear_layoutcreate_convert_layoutr~   )rh   r*   r[   assert_trivialr   	ret_ty_irr   r   s     `    @r   convert_layoutzGluonSemantic.convert_layout   s   ZZz"d334`	bz&$"8"89V	X&&r}}bhhGLL.	$,,"H"HTYT`T`"a5bii[VH M88<8M8MbiiY[YaYa8b7cce#44VRXXFGI J J 33Iu||L{{66**r   c                 "   t        t        t        j                        fd       t        t	              fd       t        t        t        j
                        fd       t        j                        }|@| j                  j                  |j                  | j                        |j                        }n4| j                  j                  |j                  | j                              }t        j                  |      S )Nc                      d  S )Nz,expected 'element_ty' to be a dtype but got r   )rZ   s   r   r.   z/GluonSemantic.allocate_shared.<locals>.<lambda>   s    =ijtiu;v r   c                      d  S Nz1all elements of 'shape' must be integers but got r   rB   s   r   r.   z/GluonSemantic.allocate_shared.<locals>.<lambda>       ._`e_f,g r   c                      d  S Nz/expected 'layout' to be a SharedLayout but got r   r   s   r   r.   z/GluonSemantic.allocate_shared.<locals>.<lambda>       HQ r   )r   r"   r{   r   r+   r   shared_memory_descriptor_typerr   create_local_allocr   r   shared_memory_descriptor)rh   rZ   rC   r[   r*   r   r   s    ```   r   allocate_sharedzGluonSemantic.allocate_shared   s    z*djj13vw|E"$ghz&$"3"34Q	S//
E65Q\\44RXXdll5KU\\ZF\\44RXXdll5KLF,,VZPUVVr   c                 R   t        t        t        j                        fd       t        j                  |j
                  |j                        }| j                  j                  |j                  | j                        |j                        }t        j                  ||      S )Nc                      d  S r   r   r   s   r   r.   z+GluonSemantic.shared_load.<locals>.<lambda>  r   r   )r   r"   r{   r   r|   r   rC   rr   create_local_loadr   r   r~   )rh   mem_descr[   r   r   s     `  r   shared_loadzGluonSemantic.shared_load  sr    z&$"8"89V	X&&x~~x~~vN//T\\0JHOO\{{66**r   c                 R   t        t        t        j                        fd       t        j                  j                  k(  fd       t        j
                  j
                  k(  fd       | j                  j                  j                  j                         y )Nc                       dt                S )Nz+expected 'value' to be a tensor, but got a r9   r)   s   r   r.   z,GluonSemantic.shared_store.<locals>.<lambda>  s    9deijoepdq7r r   c                  >    dj                    d j                    dS )Nzsource shape z and destination shape  must matchrB   r   r*   s   r   r.   z,GluonSemantic.shared_store.<locals>.<lambda>  !    u{{m3J8>>JZZef r   c                  >    dj                    d j                    dS )Nzsource dtype z and destination dtype r   r   r   s   r   r.   z,GluonSemantic.shared_store.<locals>.<lambda>  r   r   )	r   r"   r{   r~   rC   r   rr   create_local_storer   )rh   r   r*   s    ``r   shared_storezGluonSemantic.shared_store  sk    z%-/rsu{{hnn,f	hu{{hnn,f	h''Fr   c                    t        |t        j                        st        dt	        |             t        |t        j
                        st        dt	        |             |j                  |j                  k7  r&t        d|j                   d|j                   d      |j                  |j                  k7  r&t        d|j                   d|j                   d      |j                  |j                  t        |j                         d  k7  r%t        d	|j                   d
|j                         |j                  j                  | j                        }|j                  j                  | j                        }| j                  j                  ||t        |j                        |j                  j                         S )NzIbank_conflicts expects the register layout to be a distributed_type, got zTbank_conflicts expects the shared layout to be a shared_memory_descriptor_type, got zregister shape z and shared shape r   z$mismatched dtypes between register (z) and shared (z	) layoutsz,bank_conflicts NYI for subslices. Got shape z and alloc_shape )r"   r{   r|   r   r:   r   rC   r   rZ   alloc_shaperT   r[   _to_irrr   get_shared_bank_conflictsrS   rW   )rh   distr_ty	shared_tyreg_attrshared_attrs        r   bank_conflictszGluonSemantic.bank_conflicts  s   (D$9$9:[\`ai\j[kln n )T%G%GHfgklugvfwx  >>Y__,x~~.>>PQZQ`Q`Paalmnn8#6#666x7J7J6K>ZcZnZnYooxy  ??i33S5I4I4JKK>y>OO`ajavav`wx  ??))$,,7&&--dll;||55hTRZR`R`Ma6>6I6I6\6\^ 	^r   c                    t        t        t        t        f      fd       t        |t              st	        |      }t        j                        t        t        t        f      rt        j                        S t        j                  | j                  j                  j                  | j                        |            S )Nc                       dt                S )Nz2Expected a DistributedLayout or SharedLayout, got r9   r   s   r   r.   z0GluonSemantic.to_linear_layout.<locals>.<lambda>1  s    KDQWL>Z r   )r   r"   r   r   rS   r{   r   r   r   	constexprrr   r   r   )rh   r[   rC   s    ` r   r   zGluonSemantic.to_linear_layout/  s    z&#4l"CDZ	\ %&KE**62fz+BCD>>&))~~dll;;FMM$,,<WY^_``r   c                 N    | j                   j                  |j                         y r   )rr   create_local_deallocr   )rh   r   s     r   shared_dealloczGluonSemantic.shared_dealloc=  s    ))(//:r   c                    j                   }t        t        t              fd       t        t        |j                  t
              fd       | j                  j                  j                  | j                        j                        }t        j                  |j                  |j                        }| j                  ||      S )Nc                      d  S )Nz9set_auto_layout must set to a distributed layout but got r   r   s   r   r.   z/GluonSemantic.set_auto_layout.<locals>.<lambda>C  s    RSYRZ[ r   c                  6    d j                   j                   S )Nz4set_auto_layout input must have auto layout but got )r:   r[   r)   s   r   r.   z/GluonSemantic.set_auto_layout.<locals>.<lambda>E  s    MejjN_N_M`a r   )r:   r   r"   r   r[   r   rr   create_set_auto_layoutr   r   r{   r|   rZ   rC   r~   )rh   r*   r[   src_tyr   res_tys    ``   r   r   zGluonSemantic.set_auto_layout@  s    z&"34[	]z&--4a	c44V]]4<<5PRWR^R^_&&v'8'8&,,O{{66**r   c                 H   t        t        t              fd       t        t        t              fd       t        t        t              fd       dg|j                  z  }|<   t	        |j
                        }|<   |j                  }t        j                  |j                  |||j                  j                        }| j                  }	|	j                  |j                  |	      |j                  |      }
t        j                   |
fi |j"                  S )Nc                      d  S )Nz&expected 'start' to be an int but got r   )r   s   r   r.   z-GluonSemantic.memdesc_slice.<locals>.<lambda>K  s    1WX]W^/_ r   c                      d  S )Nz'expected 'length' to be an int but got r   )lengths   r   r.   z-GluonSemantic.memdesc_slice.<locals>.<lambda>L  s    2YZ`Ya0b r   c                      d  S )Nz$expected 'dim' to be an int but got r   r   s   r   r.   z-GluonSemantic.memdesc_slice.<locals>.<lambda>M  s    /STWSX-Y r   r   )r   r"   r#   r]   rS   rC   r[   r{   r   r   r:   r   rr   create_memdesc_subslicer   r   r   __dict__)rh   r   r   r  r@   offsetsrC   r[   r   rr   r   s     ```      r   memdesc_slicezGluonSemantic.memdesc_sliceJ  s    z%%'_`z&#&(bcz#s#%YZ#%X^^$c
//vx}}OhOhi,,00'1BHOOU\],,VCr{{CCr   c                    | j                        t        j                  t        j                  k(  fd       |j
                  dd  }| j                        j                  |j                  }t        j                  |j                  |||      }| j                  }|j                  |j                  |      |j                        }t        j                  |fi |j                  S )Nc                  "    d j                    S )Nz%expected 'index' to be int32 but got r9   indexs   r   r.   z-GluonSemantic.memdesc_index.<locals>.<lambda>Z  s    3XY^YcYcXd1e r   r	   )	to_tensorr   r:   r{   r   rC   r   r[   r   r   rr   create_memdesc_indexr   r   r  )rh   r   r  rC   r[   r   rr   r   s     `     r   memdesc_indexzGluonSemantic.memdesc_indexX  s    u%uzzTZZ')efqr"u%,,//vuU,,--bhhw.?RWX,,VCr{{CCr   c                 j   t        t              fd       t        t              t        j                        k(  fd       D cg c]  }j                  |    }}j                  j
                  }|d t        |      j                  z
   }|D cg c]   }|t        |      j                  z
  d  |   " c}z  }| j                  j                  j                        }| j                  j                  |      }t        j                  |j                  |||      S c c}w c c}w )Nc                      d  S )Nz1all elements of 'order' must be integers but got r   )orders   r   r.   z-GluonSemantic.memdesc_trans.<locals>.<lambda>d  r   r   c                  <    d j                    dt               dS )Nzsource rank (z) and order length (z) must match)r]   rT   )r   r$  s   r   r.   z-GluonSemantic.memdesc_trans.<locals>.<lambda>g  s     mHMM?2Fs5zlR^_ r   rZ   rC   r   r[   )r   r+   rT   rC   r:   r   r]   rr   create_memdesc_transr   get_gluon_layout_from_memdescr{   r   r   )	rh   r   r$  r%   rC   r   new_alloc_shaper   r[   s	    ``      r   memdesc_transzGluonSemantic.memdesc_transc  s   |E"$ghJ#hnn--_	a -22q"22mm//%&Gs;'7(--'GHW\]RSKK(88==(H(IJ1M]]228??EJ;;FC,,VV[9HQWY 	Y 3 ^s   D+%D0c                    t        t              fd       t        t        j                        t        j                  j                        k(  fd       | j
                  j                  j                        }| j
                  j                  |      }j                  j                  }t        |      j                  z
  }|d | t              z   }t        j                  |j                   ||      S )Nc                      d  S r   r   rB   s   r   r.   z/GluonSemantic.memdesc_reshape.<locals>.<lambda>t  r   r   c                  (    d j                    d S )Nz)memdesc_reshape total elements mismatch: z -> rB   )r   rC   s   r   r.   z/GluonSemantic.memdesc_reshape.<locals>.<lambda>w  s    @ 'tE74 r   r&  )r   r+   mathprodrC   rr   create_memdesc_reshaper   r(  r:   r   rT   r]   rS   r{   r   r   )rh   r   rC   r   r[   r   
prefix_lenr)  s    ``     r   memdesc_reshapezGluonSemantic.memdesc_reshapes  s    |E"$ghIIe		(.. 995	
 44X__eL;;FCmm//%5
%kz2T%[@,,~~'
 	
r   c                    t        t        t        j                        fd       t        t	              fd       t        t        t        j
                        fd       t        j                        }| j                  j                  |j                  | j                        |j                        }t        j                  |fi |j                  S )Nc                      d  S )Nz'expected 'dtype' to be a dtype but got r   r   s   r   r.   z3GluonSemantic.memdesc_reinterpret.<locals>.<lambda>  s    8_`e_f6g r   c                      d  S r   r   rB   s   r   r.   z3GluonSemantic.memdesc_reinterpret.<locals>.<lambda>  r   r   c                      d  S r   r   r   s   r   r.   z3GluonSemantic.memdesc_reinterpret.<locals>.<lambda>  r   r   )r   r"   r{   r   r+   r   r   rr   create_memdesc_reinterpretr   r   r   r  )rh   r   r   rC   r[   r   r   s     ```  r   memdesc_reinterpretz!GluonSemantic.memdesc_reinterpret  s    z%,.gh|E"$ghz&$"3"34Q	S//ufeL88$,,9OQYQ`Q`a,,VCr{{CCr   c                 ^    |rt        j                  |||      }n|}| j                  ||      S r   )r{   r|   r~   )rh   r   r   r   r[   r  s         r   wrap_tensorzGluonSemantic.wrap_tensor  s/    **9iHFF{{1f%%r   c                    | D ]3  t        t        j                  t        j                        fd       5 | D cg c]  }|j                  j
                   c}d   t        t        fddd  D              fd       y c c}w )Nc                  "    d j                   S Nz#expected distributed_type but got: r9   )r   s   r   r.   z2GluonSemantic._check_same_layout.<locals>.<lambda>  s    HklmlrlrkuFv r   r   c              3   (   K   | ]	  }|k(    y wr   r   )r$   ll0s     r   r&   z3GluonSemantic._check_same_layout.<locals>.<genexpr>  s     0q170s   r	   c                      d  S )Nz3Expected inputs to have matching layouts, but got: r   )layoutss   r   r.   z2GluonSemantic._check_same_layout.<locals>.<lambda>  s    LWIV r   )r   r"   r:   r{   r|   r[   r(   )xsr   r@  rB  s    `@@r   _check_same_layoutz GluonSemantic._check_same_layout  su     	xA:affd&;&;<>vw	x*,-Q166==-QZs0GABK00V	X .s    B
inputsreverse.c                     d   j                   j                  t              }| |cxk  r|k  sn J d| d| d       |dk  r||z  }D ]"  }|j                   j                  k(  rJ d         j                  j	                  D cg c]  }|j
                   c}||       |       j                         sJ t         fdt        t                    D              S c c}w )Nr   z
scan axis z must be < inputs rank ()z(all scan inputs must have the same shapec              3      K   | ]=  }j                  j                  |      |   j                  j                         ? y wr   r   
get_resultr:   r   )r$   r%   rE  scan_oprh   rC   s     r   r&   z1GluonSemantic.associative_scan.<locals>.<genexpr>  sB      ) **7+=+=a+@&)..BWBWY^_)   AA)	r:   rC   rT   rr   create_scanr   verifytuplerange)	rh   rE  r   region_builder_fnrF  r]   trL  rC   s	   ``     @@r   associative_scanzGluonSemantic.associative_scan  s    q	$$5zu#t#Wz$7OPTvUV%WW#!8DLD 	UA66<<5(T*TT(	U ,,**f+EAHH+EtWU'"~~ )3v;') ) 	)	 ,Fs   C.c                 h   	
 t         fdD              dd   j                  j                  
t        
      t	        dcxk  xr k  nc fd        j                         t        
      D cg c]  \  }}|k7  s| c}}	t        
fdD              sJ d        j                  j                  D cg c]  }|j                   c}       |       j                         sJ t        	 fdt        t                    D              S c c}}w c c}w )Nc              3   p   K   | ]-  }j                  ||j                  j                  gd        / yw)F)r   N)r   numelr*   )r$   rS  rh   s     r   r&   z*GluonSemantic.reduction.<locals>.<genexpr>  s+     _ST4<<AGGMM?<N_s   36r   c                      d d  S )Nz/expected reduction axis to be in the range [0, z
) but got r   r   r]   s   r   r.   z)GluonSemantic.reduction.<locals>.<lambda>  s    +Z[_Z``jkojp)q r   c              3   P   K   | ]  }|j                   j                  k(    y wr   )r:   rC   )r$   rS  rC   s     r   r&   z*GluonSemantic.reduction.<locals>.<genexpr>  s     9Q166<<5(9s   #&z-all reduction inputs must have the same shapec              3      K   | ]=  }j                  j                  |      |   j                  j                         ? y wr   rJ  )r$   r%   rE  	reduce_opr   rh   s     r   r&   z*GluonSemantic.reduction.<locals>.<genexpr>  sB      ) **9+?+?+BF1INNDYDY[de)rM  )rP  r:   rC   rT   r   rD  rY   r(   rr   create_reducer   rO  rQ  )rh   rE  r   rR  r%   srS  r]   r\  r   rC   s   ```    @@@@r   	reductionzGluonSemantic.reduction  s   <_X^__FDq	$$5zqD4!qr'#,U#3A41aqDyQA	9&99j;jj9LL..&/IQ/I4P	)$!!! )3v;') ) 	) B 0Js   D)D)D/num_binsmaskc                     t        t        |j                        dk(  d        t        |j                  j	                         d        t        |d ud        |P| j                  ||      \  }}t        |j                  j                  j                         d        |j                  }|j                  | j                        }| j                  j                  |j                  |||      }| j                  |t        j                  |g|      S )Nr	   c                       y)Nz histogram only supports 1D inputr   r   r   r   r.   z)GluonSemantic.histogram.<locals>.<lambda>  r/   r   c                       y)Nz%histogram only supports integer inputr   r   r   r   r.   z)GluonSemantic.histogram.<locals>.<lambda>  r/   r   c                       y)Nz'histogram requires a destination layoutr   r   r   r   r.   z)GluonSemantic.histogram.<locals>.<lambda>  r/   r   c                       y)Nz"Mask must have boolean scalar typer   r   r   r   r.   z)GluonSemantic.histogram.<locals>.<lambda>  r/   r   )r   rT   rC   r   is_intr   r:   r   is_boolr   r   rr   create_histogramr:  r{   r   )rh   r   r`  ra  r[   layout_attrr   s          r   	histogramzGluonSemantic.histogram  s    s5;;1$&PQu{{!!#%TUvT!#TU33D%@KD%499##++-/[\;;DmmDLL1..u||Xt[Y

XJGGr   c           	         t        |d ud        t        |d        t        t        |j                        dk(  d        t        j                  |j
                  j                  |j                  d   |j                  d   z   g|      }| j                  | j                  j                  |j                  |j                  |j                  | j                              |      S )Nc                       y)Nz!cat requires a destination layoutr   r   r   r   r.   z#GluonSemantic.cat.<locals>.<lambda>  r/   r   c                       y)Nz;current implementation of `cat` always may reorder elementsr   r   r   r   r.   z#GluonSemantic.cat.<locals>.<lambda>  r/   r   r	   c                       y)Nzcat requires a rank-1 inputr   r   r   r   r.   z#GluonSemantic.cat.<locals>.<lambda>  r/   r   r   )r   rT   rC   r{   r|   r:   r   r~   rr   
create_catr   r   )rh   r   r   r   r[   ret_types         r   catzGluonSemantic.cat  s    vT!#NO{abs399~"$IJ((399Q<#))TU,;V:WY_`{{4<<223::szz8>>Z^ZfZfKghjrssr   srcr  c                    t        t        j                  t        j                        fd       t        t        j                  t        j                        fd       t        j                  j
                  j                         fd       t        j                  j                        t        t        j                  j                        k(  d        t         cxk  xr k  nc fd       dk  rz  t              D ]H  }|k(  r	t        j                  j                  |   j                  j                  |   k(  fd       J | j                  j                  j                  j                        }| j                  |j                  j
                  j                  j                  j                  j                        S )Nc                  "    d j                   S r=  r9   )rs  s   r   r.   z&GluonSemantic.gather.<locals>.<lambda>  s    FijmjrjriuDv r   c                  "    d j                   S r=  r9   r  s   r   r.   z&GluonSemantic.gather.<locals>.<lambda>  s    <UZZNK r   c                  6    d j                   j                  S )Nz&expected integer scalar type but got: )r:   r   r  s   r   r.   z&GluonSemantic.gather.<locals>.<lambda>  s    5[\a\f\f\m\m[p3q r   c                       y)Nz0source and index tensors must have the same rankr   r   r   r   r.   z&GluonSemantic.gather.<locals>.<lambda>  r/   r   c                      d  d dS )Nzgather axis z must be < source rank (rH  r   rY  s   r   r.   z&GluonSemantic.gather.<locals>.<lambda>  s    |D6AYZ^Y__`-a r   r   c                      d  dS )Nz
index dim z( must match the corresponding source dimr   )r   s   r   r.   z&GluonSemantic.gather.<locals>.<lambda>  s    *TF*RS r   )r   r"   r:   r{   r|   r   rg  rT   rC   rQ  rr   create_gatherr   r:  r[   )rh   rs  r  r   dgatherr]   s    ```  @r   r}  zGluonSemantic.gather  sU   z#((D$9$9:<vwz%**d&;&;<K	Muzz  '')+qr388>>"s5::##$,.hiu#t#%ab!8DLDt 	ADy

  #sxx~~a'88S	 ++CJJdK9I9I5::K\K\]]r   c                     | j                   j                  |j                  |j                  | j                         |      }t	        |j
                  j                        }||xx   dz  cc<   | j                  |||      S )NrD   )rr   create_fp4_to_fpr   r   rS   r:   rC   r   )rh   rs  	elem_typer   resultrC   s         r   	fp4_to_fpzGluonSemantic.fp4_to_fp  sa    ..szz9??4<<;XZ^_SXX^^$dq--fiGGr   worker_num_warpsworker_num_regsc           	         |D ]2  \  }t        t        t        t        j                  f      fd       4 t	        |      dk\  sJ d       |d   \  }}t	        |      dz
  }|dd  }	|t	        |      k(  sJ d| dt	        |       d       |t	        |      k(  sJ d| dt	        |       d       | j
                  }
|
j                         }|
j                         }|
j                  |       |j                  ||i 	      }g }|t        |      }|
j                  |       |D cg c]  }|j                          }}|	D cg c]  \  }}t        |       }}}t        |g       }|
j                  |       |
j                  |||      }|j!                         j#                  |       |j%                  |       |
j'                  |j)                         g        |
j+                  |      }|D cg c]  }|j                          }}d}t-        |	      D ]  \  }\  }t/        ||   
      }|
j'                  |j1                  |      |      }||   }t3        t	        |            D cg c]  }|j5                  ||z          }}t7        |D cg c]  }|j8                   c}      }|j                  ||i |       |
j;                          |t	        |      z  } |
j=                  |j?                                t3        t	        |            D cg c]  }|jA                  |       }}|y t        t7        ||D cg c]  }|j8                   c}            S c c}w c c}}w c c}w c c}w c c}w c c}w c c}w )Nc                       dt                S )Nz9function arguments must be a tuple of arguments, but got r9   )argss   r   r.   z/GluonSemantic.warp_specialize.<locals>.<lambda>  s    VW[\`WaVbc r   r	   z8expected at least one function for the default partitionr   zwarp specialize got z partitions but z warp countsz register counts)kwargsr;   )r  caller_context)!r   r"   rP  r{   rT   rr   get_insertion_point	new_blockset_insertion_point_to_startcall_JitFunctionr   create_warp_yieldget_typesumrestore_insertion_pointcreate_warp_specializeget_default_region	push_backset_requested_registerscreate_block_with_parentget_partition_op_holder!create_warp_specialize_partitionsrY   rf   
get_regionrQ  get_argumentr   r:   create_warp_returnset_insertion_point_afterget_operationrK  )rh   functions_and_argsr  r  	generator_default_partitiondefault_argsnum_partitionsworkersrr   	insert_ptdefault_blockdefault_resultsmlir_resultsrresult_typesr  worker_args	mlir_argsws_oppartitions_oparg	arg_typesarg_itr%   funcr  blockj
block_argss                    `             r   warp_specializezGluonSemantic.warp_specialize  s   ) 	eGAt:dUDJJ$78ce	e %&!+g-gg+*<Q*?'</014$QR("
 
 	f!.!11A#FVBWAXXde	f 
 "
 
 	i!.!11A#oBVAWWgh	i 
 ,,//1	  ))+,,];#445F]_4`&/@L!!,/.:;

;; BIIga+D1IIR(	''	2..|YHXY  ",,];%%o6 	(()F)F)H"MAA.Q/89S\\^9	9(1 	%OA|d/:J1:MNN44]5M5Ma5PR[\E#AIBGIBWXQ%,,VaZ8XJX,Zd9Ss#((9STJ&&tZSa&b&&(c)n$F	% 	))%*=*=*?@5:3|;L5MN((+NN"(7X17XYZZ; < J : Y9S O 8Ys*   (M"M'3M--M2M7M<Nc                 h    t        j                  | j                  j                  j                        S r   )r{   r  rr   optionsnum_ctasrl   s    r   r  zGluonSemantic.num_ctas6  s!    ~~dll22;;<<r   c                 
   |j                   Et        |j                   t              sJ t        j                  |j                   j
                        S t        j                  | j                  j                  j
                        S r   )r  r"   rf   r{   r  r<   rr   r  )rh   r  s     r   r<   zGluonSemantic.num_warps9  s_    ##/i668JKKK>>)":":"D"DEE~~dll22<<==r   )F)6rt   ru   rv   r{   r~   langr   __annotations__ri   r   r   r   r#   r   r   r   r   r   r   r   r   r   r   boolr   r   r   r   r   r   r   r  r   r  r   r  r!  r*  r2  r8  r:  staticmethodrD  r   rT  r_  rk  rr  r}  r  r  r  r<   __classcell__)r   s   @r   rx   rx   a   s   [[FD '_49 c  T T T T&5h 58 5 5Xx XE(H*<$= X5X 5U3Z 5H 5+( +5: +( +  x H :95X 5$s) 5$ 5
+1+
W+G^4a;+D	DY 
,D& X X)x'9 ) )"&)+03+?)*) 2 )# )UZ[ceh[hUi )(
Hx 
H3 
Hh 
HS[ 
Htx th tT th t^( ^8 ^3 ^8 ^,HX H8 H:[HSM :[dlmpdq :[x=>r   rx   r   )#typingr   r   r   r   r   r.  triton.language.semanticr    r
   r{   _layoutsr   r   r   r   r   r   triton._C.libtriton.gluon_irr   r   triton.compiler.code_generatorr   r   r   r   r  rR   r   r+   rd   __triton_builtin__rf   rx   r   r   r   <module>r     s    ; ;  3  x x P T: <F ! !xC0 !
R:z /3  +	M 	M\>N8, \>r   