
    ip=                        d dl mZ d dlmZ ddlmZ ddlmZ ddlm	Z	  e	       rd dl
Z
 ej                  e      Z	 	 dd	e
j                  d
e
j                  de
j                  dz  dede
j                  f
dZde
j"                  j$                  de
j                  de
j                  de
j                  de
j                  f
dZ	 	 	 d d	e
j                  d
e
j                  de
j                  dz  de
j                  dz  dede
j                  fdZde
j"                  j$                  de
j                  de
j                  de
j                  de
j                  f
dZ G d de      Z e       Zde
j                  de
j                  fdZ	 d!ddddee
j"                  j$                     dz  dededee
j"                  j$                     fdZy)"    )Callable)wraps   )logging)GeneralInterface)is_torch_availableNFinputweightbiasis_transposedreturnc                     |r5t        j                  | j                  d      |      j                  d      }n4t        j                  || j                  d            j                  d      }|||z   }|S )a  Batched linear layer supporting optional bias and transposed weights.

    Args:
        input (`torch.Tensor`):
            Input tensor of shape (batch_size, input_dim).
        weight (`torch.Tensor`):
            Weight tensor of shape (batch_size, output_dim, input_dim) if transposed is `False`,
            else of shape (batch_size, input_dim, output_dim).
        bias (`torch.Tensor`, *optional*):
            Bias tensor of shape (batch_size, output_dim). Default is `None`.
        is_transposed (`bool`, *optional*, defaults to `False`):
            Whether the weight tensor is transposed.
    Returns:
        `torch.Tensor`: Output tensor of shape (batch_size, output_dim).
       )torchbmm	unsqueezesqueeze)r	   r
   r   r   outs        g/home/obispo/Crisostomo_bridge/mision_env/lib/python3.12/site-packages/transformers/integrations/moe.py_batched_linearr   D   se    * ii*F3;;A> ii 34<<R@DjJ    selfhidden_statestop_k_indextop_k_weightsc                    |j                   }|j                  d      }|j                  d      }|j                  d      }t        j                  ||      j	                  d      j                  d|      j                  d      }|j                         t        j                  d|j                         k(  rt        j                  |      S |j                  d      }	|j                  d      }
|
| j                  k  }|
j                  d| j                  dz
        }||   }| j                  |   }| j                  |   }| j                  r| j                  |   nd }| j                  r| j                   |   nd }t#        |||| j$                        }| j'                  |      }t#        |||| j$                        }|	j(                  |j(                  k7  r|	j+                  d|      }	||	j	                  d      z  }||j	                  d      j-                  |j.                        z  }|j1                  |||      j                  d      }|j-                  |j.                        S )Nr   r   devicer   g        r   dim)r   sizer   aranger   expandreshapesumtensor
zeros_likenum_expertsclampgate_up_proj	down_projhas_biasgate_up_proj_biasdown_proj_biasr   r   _apply_gateshapegathertodtypeview)r   r   r   r   r   	num_top_k
num_tokens
hidden_dim	token_idxsample_weights
expert_ids
valid_maskexpert_ids_clampedselected_hidden_statesselected_gate_upselected_downselected_gate_up_biasselected_down_biasgate_up_out	gated_outout_per_samplefinal_hidden_statess                         r   batched_mm_experts_forwardrH   f   sM    !!F  $I##A&J##B'J Z7AA!DKKBPYZbbcefIell3}7K7KLL.."**2.N$$R(J d...J#))!T-=-=-AB +95 (();<NN#56MJN--D223EF]aDHMM,,-?@W[ " 02GW[WiWiK
   -I %="4DDVDVN
 1777'..q2DE#n&>&>r&BBN#j&:&:2&>&A&A.BVBV&WWN )--j)ZPTTYZT[!!-"5"566r   offsc                     |rt        j                  | ||      }n(t        j                  | |j                  dd      |      }|||z   }|S )a*  Grouped linear layer supporting optional bias and transposed weights.

    Args:
        input (`torch.Tensor`):
            Input tensor of shape (S, input_dim).
        weight (`torch.Tensor`):
            Weight tensor of shape (num_experts, output_dim, input_dim) if transposed is `False`,
            else of shape (num_experts, input_dim, output_dim).
        bias (`torch.Tensor`, *optional*):
            Bias tensor of shape (num_experts, output_dim). Default is `None`.
        offs (`torch.Tensor`, *optional*):
            Offsets tensor indicating the boundaries of each group in the input tensor.
        is_transposed (`bool`, *optional*, defaults to `False`):
            Whether the weight tensor is transposed.
    Returns:
        `torch.Tensor`: Output tensor of shape (S, output_dim).
    )rI   r   )r   _grouped_mm	transpose)r	   r
   r   rI   r   r   s         r   _grouped_linearrN      sR    0 vD9 v'7'7B'?dKDjJr   c                    t        t        d      st        d      |j                  }|j	                  d      }|j	                  d      }|j	                  d      }t        j
                  ||      j                  d      j                  d|      j                  d      }|j                  d      }	|j                  d      }
||   }t        j                  |
      }t        j                  |      }|
|   }|	|   }||   }| j                  }| j                  }| j                  r| j                  |   nd }| j                  r| j                  |   nd }|j                  dk(  r|j!                         n|j#                         }t        j$                  || j&                  d| j&                  dz
        }t        j(                  |dt        j*                  	      }t-        ||||| j.                  
      }| j1                  |      }t-        ||||| j.                  
      }||j                  d      z  }||   }|j3                  |||      j5                  d      }|j7                  |j8                        S )NrL   zmtorch._grouped_mm is not available. Please make sure you are using a PyTorch version that includes it (2.9+).r   r   r   r   cpu)binsminmax)r"   r5   r    r!   )hasattrr   ImportErrorr   r#   r$   r   r%   r&   argsortr,   r-   r.   r/   r0   typefloatinthistcr*   cumsumint32rN   r   r1   r6   r'   r4   r5   )r   r   r   r   r   r7   r8   r9   r:   r;   r<   r?   perminv_permexpert_ids_gsample_weights_gselected_hidden_states_gr@   rA   rB   rC   histc_inputnum_tokens_per_expertoffsetsrD   rE   out_per_sample_grF   rG   s                                r   grouped_mm_experts_forwardrf      sa    5-({
 	
 !!F  $I##A&J##B'J Z7AA!DKKBPYZbbcefI"**2.N$$R(J +95 ==$D}}T"Hd#L%d+5d; ((NNMDHMMD22<@W[>Bmm,,\:QU
 +1++*>,$$&LDTDTDVK!KK$:J:JPQW[WgWgjkWklll0au{{KG " "24I7bfbtbtK
   -I '="4gTM_M_
 (*:*D*DR*HH &h/N )--j)ZPTTYZT[!!-"5"566r   c                   :     e Zd ZdZeedZdededef fdZ	 xZ
S )ExpertsInterfacez9Interface for registering custom experts implementations.)
batched_mm
grouped_mmexperts_implementationdefaultr   c                     |t         j                  d       n|dk7  r|| vrt        d| d      t        |   ||      S )zfReturn the requested `experts_implementation`. Also strictly check its validity, and raise if invalid.a
  You tried to access the `ExpertsInterface` with a `config._experts_implementation` set to `None`. This is expected if you use an Expert Module as a standalone Module. If this is not the case, something went wrong with the dispatch of `config._experts_implementation`eager`zL` is not a valid experts implementation registered in the `ExpertsInterface`)loggerwarning_onceKeyErrorsuperget)r   rk   rl   	__class__s      r   get_interfacezExpertsInterface.get_interface  s`    !)N
 $w.3IQU3U*++wx  w{17;;r   )__name__
__module____qualname____doc__rH   rf   _global_mappingstrr   rv   __classcell__)ru   s   @r   rh   rh     s4    C 10O
<C <( <x < <r   rh   rD   c                 V    |j                  dd      \  }}| j                  |      |z  S )a  
    Default gating mechanism: splits the gate_up_out into gate and up parts,
    applies the activation function to the gate part, and multiplies it with the up part.
    Args:
        gate_up_out (`torch.Tensor`):
            The output tensor from the gate and up projection of shape (S, 2 * intermediate_dim).
    Returns:
        `torch.Tensor`: The gated output tensor of shape (S, intermediate_dim).
    r   r   r!   )chunkact_fn)r   rD   gateups       r   _default_apply_gater   .  s1        +HD";;tr!!r   )r   r.   experts_classr.   c                    dt         t        j                  j                     dt         t        j                  j                     ffd}|  ||       S |S )aV  Decorator to modify experts class to support different experts implementations.

    Args:
        experts_class (`type[torch.nn.Module]`, *optional*):
            The experts class to modify. If not provided, returns a decorator that can be applied to the class.
        is_transposed (`bool`, *optional*, defaults to `False`):
            Whether the expert weights are stored in transposed format.
        has_bias (`bool`, *optional*, defaults to `False`):
            Whether the expert layers include bias terms.

    Returns:
        `type[torch.nn.Module]`: The modified experts class.
    r   r   c                     | j                   | j                  t              fd       }t              fd       }t        | d      st        | _        || _         || _        | S )Nc                 J     | |g|i | || _         | _        | _        y N)configr.   r   )r   r   argskwargsr.   r   original_inits       r   __init__z=use_experts_implementation.<locals>.wrapper.<locals>.__init__Q  s-    $888 DK$DM!.Dr   c                 p    t         j                  | j                  j                        } || g|i |S r   )ALL_EXPERTS_FUNCTIONSrv   r   _experts_implementation)r   r   r   experts_forwardoriginal_forwards       r   forwardz<use_experts_implementation.<locals>.wrapper.<locals>.forwardX  s:    3AA335EO #49$9&99r   r1   )r   r   r   rT   r   r1   )r   r   r   r   r   r.   r   s      @@r   wrapperz+use_experts_implementation.<locals>.wrapperM  su    %..(00	}		/ 
	/ 
	 	: 
!	: }m4(;M%!) 'r   )rW   r   nnModule)r   r   r.   r   s    `` r   use_experts_implementationr   <  sH    "tEHHOO4 ehhoo9N 0  }%%Nr   )NF)NNFr   )collections.abcr   	functoolsr   utilsr   utils.genericr   utils.import_utilsr   r   
get_loggerrw   rp   Tensorboolr   r   r   rH   rN   rf   rh   r   r   rW   r    r   r   <module>r      s   %   , 3 			H	%Z !%	<<LL ,,
 	
 \\D:7
((//:7<<:7 :7 <<	:7
 \\:7@ !% $#<<#LL# ,,
# ,,
	#
 # \\#LH7
((//H7<<H7 H7 <<	H7
 \\H7V<' <. )* "5<< "ELL " 37,QVin,(4/,JN,bf,	%((//,r   