
    i$                        d dl mZ d dlZd dlmZ d dlmZ ddlmZmZ ddl	m
Z
 ddlmZmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZmZmZ ddlmZ ddlmZ ddlmZ ddl m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z* ddl+m,Z, ddl-m.Z.  ej^                  e0      Z1 G d de'      Z2 G d de      Z3 G d de!      Z4 ejj                   e              ejj                  d      k\  r G d dejl                        Z7n  e
d       G d  dejp                               Z7 G d! d"e"      Z9 G d# d$e(      Z: G d% d&e,      Z; G d' d(e#      Z< G d) d*e%      Z= G d+ d,e&      Z> G d- d.e$      Z?g d/Z@y)0    )CallableN)version)nn   )CacheDynamicCache)use_kernel_forward_from_hub)create_causal_mask!create_sliding_window_causal_mask)FlashAttentionKwargs)BaseModelOutputWithPast)ALL_ATTENTION_FUNCTIONS)Unpack)TransformersKwargsauto_docstringlogging)check_model_inputs)get_torch_version   )Gemma2RotaryEmbedding)
LlamaAttentionLlamaDecoderLayerLlamaForCausalLMLlamaForQuestionAnsweringLlamaForSequenceClassificationLlamaForTokenClassificationLlamaMLPLlamaPreTrainedModelapply_rotary_pos_embeager_attention_forward)MistralModel   )Qwen2Configc                        e Zd Z fdZ xZS )Qwen2MLPc                 J   t         |   |       t        j                  | j                  | j
                  d      | _        t        j                  | j                  | j
                  d      | _        t        j                  | j
                  | j                  d      | _        y )NFbias)	super__init__r   Linearhidden_sizeintermediate_size	gate_projup_proj	down_projselfconfig	__class__s     q/home/obispo/Crisostomo_bridge/mision_env/lib/python3.12/site-packages/transformers/models/qwen2/modular_qwen2.pyr*   zQwen2MLP.__init__(   ss     4#3#3T5K5KRWXyy!1!143I3IPUV4#9#94;K;KRWX    )__name__
__module____qualname__r*   __classcell__r4   s   @r5   r%   r%   '   s    Y Yr6   r%   c                       e Zd Zy)Qwen2RotaryEmbeddingNr7   r8   r9    r6   r5   r=   r=   /       r6   r=   c                       e Zd Zdedef fdZ	 	 ddej                  deej                  ej                  f   dej                  dz  de	dz  d	ej                  dz  d
ee   deej                  ej                  dz  f   fdZ xZS )Qwen2Attentionr3   	layer_idxc                    t        |d      r|j                  |   nd | _        t        |   ||       t        j                  |j                  |j                  | j                  z  d      | _
        t        j                  |j                  |j                  | j                  z  d      | _        t        j                  |j                  |j                  | j                  z  d      | _        t        j                  |j                  | j                  z  |j                  d      | _        | j                  dk(  r|j                  | _        y d | _        y )Nlayer_typesTr'   Fsliding_attention)hasattrrE   
layer_typer)   r*   r   r+   r,   num_attention_headshead_dimq_projnum_key_value_headsk_projv_projo_projsliding_windowr2   r3   rC   r4   s      r5   r*   zQwen2Attention.__init__4   s   ;B6=;Y&,,Y7_c+ii 2 2F4N4NQUQ^Q^4^eijii 2 2F4N4NQUQ^Q^4^eijii 2 2F4N4NQUQ^Q^4^eijii : :T]] JFL^L^ejk7;J]7]f33cgr6   Nhidden_statesposition_embeddingsattention_maskpast_key_valuescache_positionkwargsreturnc                 .   |j                   d d }g |d| j                  }| j                  |      j                  |      j	                  dd      }	| j                  |      j                  |      j	                  dd      }
| j                  |      j                  |      j	                  dd      }|\  }}t        |	|
||      \  }	}
|'|||d}|j                  |
|| j                  |      \  }
}t        j                  | j                  j                  t              } || |	|
||f| j                  sdn| j                   | j"                  | j$                  d|\  }} |j&                  g |d j)                         }| j+                  |      }||fS )Nr"   r   )sincosrV   g        )dropoutscalingrP   )shaperJ   rK   view	transposerM   rN   r   updaterC   r   get_interfacer3   _attn_implementationr    trainingattention_dropoutr^   rP   reshape
contiguousrO   )r2   rR   rS   rT   rU   rV   rW   input_shapehidden_shapequery_states
key_statesvalue_statesr\   r[   cache_kwargsattention_interfaceattn_outputattn_weightss                     r5   forwardzQwen2Attention.forward=   s    $))#2.88b8$--8{{=166|DNNqRST[[/44\BLLQPQR
{{=166|DNNqRST&S#7jRUWZ#[ j&#&snUL'6'='=j,X\XfXfht'u$J(?(M(MKK,,.E)
 %8
%
  $}}C$2H2HLL..
%
 
%
!\ *k));;;;FFHkk+.L((r6   )NN)r7   r8   r9   r#   intr*   torchTensortupler   
LongTensorr   r   rr   r:   r;   s   @r5   rB   rB   3   s    h{ hs h )-26*)||*) #5<<#=>*) t+	*)
 *) ((4/*) -.*) 
u||U\\D00	1*)r6   rB   z2.3.0c                   *     e Zd Zddeddf fdZ xZS )Qwen2RMSNormepsrX   Nc                 *    t         |   ||d       y )NT)normalized_shaperz   elementwise_affine)r)   r*   r2   r,   rz   r4   s      r5   r*   zQwen2RMSNorm.__init__m   s    GksW[\r6   gư>)r7   r8   r9   floatr*   r:   r;   s   @r5   ry   ry   l   s    	]U 	]d 	] 	]r6   ry   RMSNormc                   h     e Zd Zddeddf fdZdej                  dej                  fdZd Z xZ	S )	ry   rz   rX   Nc                     t         |           t        j                  t	        j
                  |            | _        || _        y)zC
            Qwen2RMSNorm is equivalent to T5LayerNorm
            N)r)   r*   r   	Parameterrt   onesweightvariance_epsilonr~   s      r5   r*   zQwen2RMSNorm.__init__t   s1     G,,uzz+'>?DK$'D!r6   rR   c                 "   |j                   }|j                  t        j                        }|j	                  d      j                  dd      }|t        j                  || j                  z         z  }| j                  |j                  |      z  S )Nr   rZ   T)keepdim)	dtypetort   float32powmeanrsqrtr   r   )r2   rR   input_dtypevariances       r5   rr   zQwen2RMSNorm.forward|   sy    '--K),,U]];M$((+00T0BH)EKK4CXCX8X,YYM;;!1!1+!>>>r6   c                 ^    t        | j                  j                         d| j                   S )Nz, eps=)rv   r   r_   r   )r2   s    r5   
extra_reprzQwen2RMSNorm.extra_repr   s*    DKK--./vd6K6K5LMMr6   r   )
r7   r8   r9   r   r*   rt   ru   rr   r   r:   r;   s   @r5   ry   ry   r   s7    	(U 	(d 	(	? 	?%,, 	?	Nr6   c                   (     e Zd Zdedef fdZ xZS )Qwen2DecoderLayerr3   rC   c                 P    t         |   ||       |j                  |   | _        y )N)r3   rC   )r)   r*   rE   attention_typerQ   s      r5   r*   zQwen2DecoderLayer.__init__   s(    )<$00;r6   )r7   r8   r9   r#   rs   r*   r:   r;   s   @r5   r   r      s    <{ <s < <r6   r   c                       e Zd Zy)Qwen2PreTrainedModelNr>   r?   r6   r5   r   r      r@   r6   r   c                       e Zd Zdef fdZee	 	 	 	 	 	 	 ddej                  dz  dej                  dz  dej                  dz  de
dz  dej                  dz  d	edz  d
ej                  dz  dee   defd              Z xZS )
Qwen2Modelr3   c                 ^    t         |   |       d| j                  j                  v | _        y )NrF   )r)   r*   r3   rE   has_sliding_layersr1   s     r5   r*   zQwen2Model.__init__   s'     "59P9P"Pr6   N	input_idsrT   position_idsrU   inputs_embeds	use_cacherV   rW   rX   c                    |d u |d uz  rt        d      || j                  |      }|r|t        | j                        }|F||j	                         nd}	t        j                  |	|	|j                  d   z   |j                        }||j                  d      }t        |x}
t              s:| j                  |||||d}dt        di |i}
| j                  rt        di ||
d<   |}| j                  ||      }| j                   d | j                  j"                   D ]  } ||f|
|j$                     |||||d	|}! | j'                  |      }t)        ||r|
      S d 
      S )Nz:You must specify exactly one of input_ids or inputs_embeds)r3   r   r"   )device)r3   input_embedsrT   rV   rU   r   full_attentionrF   )rT   rS   r   rU   r   rV   )last_hidden_staterU   r?   )
ValueErrorembed_tokensr   r3   get_seq_lengthrt   aranger_   r   	unsqueeze
isinstancedictr
   r   r   
rotary_emblayersnum_hidden_layersr   normr   )r2   r   rT   r   rU   r   r   rV   rW   past_seen_tokenscausal_mask_mappingmask_kwargsrR   rS   decoder_layers                  r5   rr   zQwen2Model.forward   s    -t";<YZZ  --i8M0*$++>O!CRC^==?de"\\ "2]5H5H5K"KTaThThN )33A6L ?-F ++ -"0"0#2 ,K !"4"C{"C# &&;\;k_j;k#$78%"oom\J![[)H4;;+H+HI 
	M)	2=3O3OP$7) /#-	 	M
	 		-0&+/8O
 	
>B
 	
r6   )NNNNNNN)r7   r8   r9   r#   r*   r   r   rt   rw   ru   r   FloatTensorboolr   r   r   rr   r:   r;   s   @r5   r   r      s    Q{ Q  .2.204(,26!%26C
##d*C
 t+C
 &&-	C

 C
 ((4/C
 $;C
 ((4/C
 +,C
 
!C
  C
r6   r   c                       e Zd Zy)Qwen2ForCausalLMNr>   r?   r6   r5   r   r      r@   r6   r   c                       e Zd Zy)Qwen2ForSequenceClassificationNr>   r?   r6   r5   r   r      r@   r6   r   c                       e Zd Zy)Qwen2ForTokenClassificationNr>   r?   r6   r5   r   r      r@   r6   r   c                       e Zd Zy)Qwen2ForQuestionAnsweringNr>   r?   r6   r5   r   r      r@   r6   r   )r   r   r   ry   r   r   r   )Acollections.abcr   rt   	packagingr   r   cache_utilsr   r   integrationsr	   masking_utilsr
   r   modeling_flash_attention_utilsr   modeling_outputsr   modeling_utilsr   processing_utilsr   utilsr   r   r   utils.genericr   utils.import_utilsr   gemma2.modeling_gemma2r   llama.modeling_llamar   r   r   r   r   r   r   r   r   r    mistral.modeling_mistralr!   configuration_qwen2r#   
get_loggerr7   loggerr%   r=   rB   parser   ry   Moduler   r   r   r   r   r   r   __all__r?   r6   r5   <module>r      sM   $    . 7 R B 6 & @ @ / 3 :   4 , 
		H	%Yx Y	0 	4)^ 4)n 7=="$%w)??]rzz ] !+Nryy N ,N(<) <	/ 	J
 J
Z	' 		%C 		"= 		 9 	r6   