
    i                        d dl mZ d dlZd dlmZ d dlmZ ddlmZmZ ddl	m
Z
mZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZmZmZ ddlmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$ ddl%m&Z&  ejN                  e(      Z) G d de       Z* G d de      Z+ G d de      Z, G d de"      Z- G d de!      Z. G d de      Z/ G d de      Z0 G d  d!e      Z1 G d" d#ee-      Z2g d$Z3y)%    )CallableN)nn)check_model_inputs   )CacheDynamicCache)create_causal_mask!create_sliding_window_causal_mask)FlashAttentionKwargs)GenericForQuestionAnswering)BaseModelOutputWithPast)ALL_ATTENTION_FUNCTIONS)Unpack)TransformersKwargsauto_docstringlogging   )
LlamaAttentionLlamaDecoderLayerLlamaForCausalLMLlamaForSequenceClassificationLlamaForTokenClassificationLlamaMLP
LlamaModelLlamaPreTrainedModelapply_rotary_pos_embeager_attention_forward   )MistralConfigc                        e Zd Z fdZ xZS )
MistralMLPc                 J   t         |   |       t        j                  | j                  | j
                  d      | _        t        j                  | j                  | j
                  d      | _        t        j                  | j
                  | j                  d      | _        y )NFbias)	super__init__r   Linearhidden_sizeintermediate_size	gate_projup_proj	down_proj)selfconfig	__class__s     u/home/obispo/Crisostomo_bridge/mision_env/lib/python3.12/site-packages/transformers/models/mistral/modular_mistral.pyr&   zMistralMLP.__init__%   ss     4#3#3T5K5KRWXyy!1!143I3IPUV4#9#94;K;KRWX    )__name__
__module____qualname__r&   __classcell__r/   s   @r0   r!   r!   $   s    Y Yr1   r!   c                       e Zd Zdedef fdZ	 	 ddej                  deej                  ej                  f   dej                  dz  de	dz  d	ej                  dz  d
ee   deej                  ej                  dz  f   fdZ xZS )MistralAttentionr.   	layer_idxc                 p   t         |   ||       t        |dd       xs |j                  |j                  z  | _        t        j                  |j                  |j                  | j
                  z  d      | _        t        j                  |j                  |j                  | j
                  z  d      | _
        t        j                  |j                  |j                  | j
                  z  d      | _        t        j                  |j                  | j
                  z  |j                  d      | _        y )Nhead_dimFr#   )r%   r&   getattrr(   num_attention_headsr;   r   r'   q_projnum_key_value_headsk_projv_projo_projr-   r.   r9   r/   s      r0   r&   zMistralAttention.__init__-   s    +
D9mV=O=OSYSmSm=mii 2 2F4N4NQUQ^Q^4^ejkii 2 2F4N4NQUQ^Q^4^ejkii 2 2F4N4NQUQ^Q^4^ejkii : :T]] JFL^L^ejkr1   Nhidden_statesposition_embeddingsattention_maskpast_key_valuescache_positionkwargsreturnc           
      D   |j                   d d }g |d| j                  }| j                  |      j                  |      j	                  dd      }	| j                  |      j                  |      j	                  dd      }
| j                  |      j                  |      j	                  dd      }|\  }}t        |	|
||      \  }	}
|'|||d}|j                  |
|| j                  |      \  }
}t        j                  | j                  j                  t              } || |	|
||f| j                  sdn| j                   | j"                  t%        | j                  dd       d|\  }} |j&                  g |d j)                         }| j+                  |      }||fS )Nr   r   )sincosrH   g        sliding_window)dropoutscalingrO   )shaper;   r>   view	transposer@   rA   r   updater9   r   get_interfacer.   _attn_implementationr   trainingattention_dropoutrQ   r<   reshape
contiguousrB   )r-   rD   rE   rF   rG   rH   rI   input_shapehidden_shapequery_states
key_statesvalue_statesrN   rM   cache_kwargsattention_interfaceattn_outputattn_weightss                     r0   forwardzMistralAttention.forward5   s    $))#2.88b8$--8{{=166|DNNqRST[[/44\BLLQPQR
{{=166|DNNqRST&S#7jRUWZ#[ j&#&snUL'6'='=j,X\XfXfht'u$J(?(M(MKK,,.E)
 %8
%
  $}}C$2H2HLL"4;;0@$G
%
 
%
!\ *k));;;;FFHkk+.L((r1   )NN)r2   r3   r4   r   intr&   torchTensortupler   
LongTensorr   r   re   r5   r6   s   @r0   r8   r8   ,   s    l} l l )-26*)||*) #5<<#=>*) t+	*)
 *) ((4/*) -.*) 
u||U\\D00	1*)r1   r8   c                   (     e Zd Zdedef fdZ xZS )MistralDecoderLayerr.   r9   c                 j    t         |   ||       t        ||      | _        t	        |      | _        y )N)r.   r9   )r%   r&   r8   	self_attnr!   mlprC   s      r0   r&   zMistralDecoderLayer.__init__c   s,    +)9Mf%r1   )r2   r3   r4   r   rf   r&   r5   r6   s   @r0   rl   rl   b   s    &} & & &r1   rl   c                       e Zd ZeedZy)MistralPreTrainedModel)rD   
attentionsN)r2   r3   r4   rl   r8   _can_record_outputs r1   r0   rq   rq   i   s    ,&r1   rq   c                       e Zd Zee	 	 	 	 	 	 	 ddej                  dz  dej                  dz  dej                  dz  dedz  dej                  dz  de
dz  dej                  dz  d	ee   d
efd              Zy)MistralModelN	input_idsrF   position_idsrG   inputs_embeds	use_cacherH   rI   rJ   c                    |d u |d uz  rt        d      || j                  |      }|r|t        | j                        }|F||j	                         nd}	t        j                  |	|	|j                  d   z   |j                        }||j                  d      }| j                  j                  t        nt        }
 |
| j                  |||||      }|}| j                  ||      }| j                  d | j                  j                   D ]  } ||f||||||d|} | j!                  |      }t#        ||r|	      S d 	      S )
Nz:You must specify exactly one of input_ids or inputs_embeds)r.   r   r   )device)r.   input_embedsrF   rH   rG   rx   )rx   )rF   rx   rG   rz   rH   rE   )last_hidden_staterG   )
ValueErrorembed_tokensr   r.   get_seq_lengthrg   arangerR   r|   	unsqueezerO   r	   r
   
rotary_emblayersnum_hidden_layersnormr   )r-   rw   rF   rx   rG   ry   rz   rH   rI   past_seen_tokensmask_functioncausal_maskrD   rE   decoder_layers                  r0   re   zMistralModel.forwardq   s    -t";<YZZ  --i8M0*$++>O!CRC^==?de"\\ "2]5H5H5K"KTaThThN )33A6L.2kk.H.H.P*Vw#;;&))+%
 &"oom,oW![[)H4;;+H+HI 
	M)	*) /#-$7	 	M
	 		-0&+/8O
 	
>B
 	
r1   )NNNNNNN)r2   r3   r4   r   r   rg   rj   rh   r   FloatTensorboolr   r   r   re   rt   r1   r0   rv   rv   p   s     .2.204(,26!%269
##d*9
 t+9
 &&-	9

 9
 ((4/9
 $;9
 ((4/9
 +,9
 
!9
  9
r1   rv   c                       e Zd Zy)MistralForCausalLMNr2   r3   r4   rt   r1   r0   r   r          r1   r   c                       e Zd Zy)MistralForTokenClassificationNr   rt   r1   r0   r   r      r   r1   r   c                       e Zd Zy) MistralForSequenceClassificationNr   rt   r1   r0   r   r      r   r1   r   c                       e Zd Zy)MistralForQuestionAnsweringNr   rt   r1   r0   r   r      s    r1   r   )r   r   rv   rq   r   r   )4collections.abcr   rg   r   transformers.utils.genericr   cache_utilsr   r   masking_utilsr	   r
   modeling_flash_attention_utilsr   modeling_layersr   modeling_outputsr   modeling_utilsr   processing_utilsr   utilsr   r   r   llama.modeling_llamar   r   r   r   r   r   r   r   r   r   configuration_mistralr   
get_loggerr2   loggerr!   r8   rl   rq   rv   r   r   r   r   __all__rt   r1   r0   <module>r      s    $   9 . R B 8 5 & @ @   1 
		H	%Y Y3)~ 3)l&+ &1 <
: <
~	) 		$? 		'E 	 \"=?U [r1   