
    i$                        d Z ddlmZ ddlZddlmZ ddlmZ ddlmZ ddl	m
Z
mZ dd	lmZmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZmZ ddlmZmZmZmZmZm Z m!Z!m"Z" ddl#m$Z$  ejJ                  e&      Z' G d dejP                        Z) G d de      Z* G d de      Z+ G d de       Z, G d de      Z- G d de      Z. G d d e      Z/g d!Z0y)"zPyTorch Starcoder2 model.    )CallableN)nn)check_model_inputs   )ACT2FN)CacheDynamicCache)create_causal_mask!create_sliding_window_causal_mask)FlashAttentionKwargs)BaseModelOutputWithPast)ALL_ATTENTION_FUNCTIONS)Unpack)TransformersKwargslogging   )MistralAttentionMistralDecoderLayerMistralForCausalLM MistralForSequenceClassificationMistralForTokenClassificationMistralModelapply_rotary_pos_embeager_attention_forward   )Starcoder2Configc                   h     e Zd Zdef fdZdeej                     dz  dej                  fdZ xZ	S )Starcoder2MLPconfigc                 P   t         |           |j                  }t        j                  ||j
                  |j                        | _        t        j                  |j
                  ||j                        | _        t        |j                     | _        |j                  | _        y )Nbias)super__init__hidden_sizer   Linearintermediate_sizeuse_biasc_fcc_projr   
hidden_actactresidual_dropout)selfr   	embed_dim	__class__s      {/home/obispo/Crisostomo_bridge/mision_env/lib/python3.12/site-packages/transformers/models/starcoder2/modular_starcoder2.pyr$   zStarcoder2MLP.__init__5   su    &&	IIi)A)AX	ii 8 8)&//Z&++, & 7 7    hidden_statesNreturnc                     | j                  |      }| j                  |      }| j                  |      }t        j                  j                  || j                  | j                        }|S )Nptraining)r)   r,   r*   r   
functionaldropoutr-   r8   )r.   r3   s     r1   forwardzStarcoder2MLP.forward=   sZ    		-0/M2--mt?T?T_c_l_l-mr2   )
__name__
__module____qualname__r   r$   tupletorchFloatTensorr;   __classcell__r0   s   @r1   r   r   4   s9    8/ 8U5+<+<%=%D IZIZ r2   r   c                   :    e Zd Zddededz  f fdZ	 	 ddej                  deej                  ej                  f   dej                  dz  de	dz  d	ej                  dz  d
ee   deej                  ej                  dz  eej                     dz  f   fdZ xZS )Starcoder2AttentionNr   	layer_idxc                    t         |   ||       |j                  | _        t        j                  |j
                  |j                  | j                  z  |j                        | _	        t        j                  |j
                  |j                  | j                  z  |j                        | _        t        j                  |j
                  |j                  | j                  z  |j                        | _        t        j                  |j                  | j                  z  |j
                  |j                        | _        y )Nr   rF   r!   )r#   r$   r-   r   r&   r%   num_attention_headshead_dimr(   q_projnum_key_value_headsk_projv_projo_projr.   r   rF   r0   s      r1   r$   zStarcoder2Attention.__init__F   s    )< & 7 7ii 2 2F4N4NQUQ^Q^4^eketetuii 2 2F4N4NQUQ^Q^4^eketetuii 2 2F4N4NQUQ^Q^4^eketetuii : :T]] JFL^L^eketetur2   r3   position_embeddingsattention_maskpast_key_valuescache_positionkwargsr4   c           
         |j                   d d }g |d| j                  }| j                  |      j                  |      j	                  dd      }	| j                  |      j                  |      j	                  dd      }
| j                  |      j                  |      j	                  dd      }|\  }}t        |	|
||      \  }	}
|'|||d}|j                  |
|| j                  |      \  }
}t        j                  | j                  j                  t              } || |	|
||f| j                  sdn| j                   | j"                  t%        | j                  dd       d|\  }} |j&                  g |d j)                         }| j+                  |      }t,        j.                  j1                  || j2                  | j                        }||fS )	Nr   r   )sincosrT   g        sliding_window)r:   scalingrZ   r6   )shaperJ   rK   view	transposerM   rN   r   updaterF   r   get_interfacer   _attn_implementationr   r8   attention_dropoutr[   getattrreshape
contiguousrO   r   r9   r:   r-   )r.   r3   rQ   rR   rS   rT   rU   input_shapehidden_shapequery_states
key_statesvalue_statesrY   rX   cache_kwargsattention_interfaceattn_outputattn_weightss                     r1   r;   zStarcoder2Attention.forwardN   s    $))#2.88b8$--8{{=166|DNNqRST[[/44\BLLQPQR
{{=166|DNNqRST&S#7jRUWZ#[ j&#&snUL'6'='=j,X\XfXfht'u$J(?(M(MKK,,.E)
 %8
%
  $}}C$2H2HLL"4;;0@$G
%
 
%
!\ *k));;;;FFHkk+.mm++4004== , 
 L((r2   )N)NN)r<   r=   r>   r   intr$   r@   Tensorr?   r   
LongTensorr   r   r;   rB   rC   s   @r1   rE   rE   E   s    v/ vC$J v )-26.)||.) #5<<#=>.) t+	.)
 .) ((4/.) -..) 
u||U\\D0%2E2LL	M.)r2   rE   c                   (     e Zd Zdedef fdZ xZS )Starcoder2DecoderLayerr   rF   c                 *   t         |   ||       t        ||      | _        t	        |      | _        t        j                  |j                  |j                        | _
        t        j                  |j                  |j                        | _        y )NrH   eps)r#   r$   rE   	self_attnr   mlpr   	LayerNormr%   norm_epsiloninput_layernormpost_attention_layernormrP   s      r1   r$   zStarcoder2DecoderLayer.__init__   sj    +,FiP (!||F,>,>FDWDWX(*V5G5GVM`M`(a%r2   )r<   r=   r>   r   ro   r$   rB   rC   s   @r1   rs   rs      s     b/ bC b br2   rs   c                       e Zd Zdef fdZe	 	 	 	 	 	 	 ddej                  dz  dej                  dz  dej                  dz  de	dz  dej                  dz  d	edz  d
ej                  dz  dee   deez  fd       Z xZS )Starcoder2Modelr   c           	      :   t         |   |       t        j                  t	        |j
                        D cg c]  }t        ||       c}      | _        t        j                  |j                  |j                        | _        |j                  | _        y c c}w )Nru   )r#   r$   r   
ModuleListrangenum_hidden_layersrs   layersry   r%   rz   normembedding_dropoutrP   s      r1   r$   zStarcoder2Model.__init__   su     mmHMfNfNfHgh9#FI6h
 LL!3!39L9LM	!'!9!9 is   BN	input_idsrR   position_idsrS   inputs_embeds	use_cacherT   rU   r4   c                    |d u |d uz  rt        d      || j                  |      }|r|t        | j                        }|F||j	                         nd}	t        j                  |	|	|j                  d   z   |j                        }||j                  d      }| j                  j                  t        nt        }
 |
| j                  |||||      }|}t        j                  j                  || j                   | j"                        }| j%                  ||      }| j&                  d | j                  j(                   D ]  } ||f||||||d	|} | j+                  |      }t-        ||r|
      S d 
      S )Nz:You must specify exactly one of input_ids or inputs_embeds)r   r   r   )device)r   input_embedsrR   rT   rS   r   r6   )r   )rR   r   rS   r   rT   rQ   )last_hidden_staterS   )
ValueErrorembed_tokensr	   r   get_seq_lengthr@   aranger\   r   	unsqueezerZ   r
   r   r   r9   r:   r   r8   
rotary_embr   r   r   r   )r.   r   rR   r   rS   r   r   rT   rU   past_seen_tokensmask_functioncausal_maskr3   rQ   decoder_layers                  r1   r;   zStarcoder2Model.forward   s    -t";<YZZ  --i8M0*$++>O!CRC^==?de"\\ "2]5H5H5K"KTaThThN )33A6L.2kk.H.H.P*Vw#;;&))+%
 &--T33dmm . 
 #oom,oW![[)H4;;+H+HI 
	M)	*) /#-$7	 	M
	 		-0&+/8O
 	
>B
 	
r2   )NNNNNNN)r<   r=   r>   r   r$   r   r@   rq   rp   r   rA   boolr   r   r?   r   r;   rB   rC   s   @r1   r~   r~      s    :/ :  .2.204(,26!%26>
##d*>
 t+>
 &&-	>

 >
 ((4/>
 $;>
 ((4/>
 +,>
 
(	(>
 >
r2   r~   c                       e Zd Zy)Starcoder2ForCausalLMNr<   r=   r>    r2   r1   r   r          r2   r   c                       e Zd Zy)#Starcoder2ForSequenceClassificationNr   r   r2   r1   r   r      r   r2   r   c                       e Zd Zy) Starcoder2ForTokenClassificationNr   r   r2   r1   r   r      r   r2   r   )r   r~   Starcoder2PreTrainedModelr   r   )1__doc__collections.abcr   r@   r   transformers.utils.genericr   activationsr   cache_utilsr   r	   masking_utilsr
   r   modeling_flash_attention_utilsr   modeling_outputsr   modeling_utilsr   processing_utilsr   utilsr   r   mistral.modeling_mistralr   r   r   r   r   r   r   r   configuration_starcoder2r   
get_loggerr<   loggerModuler   rE   rs   r~   r   r   r   __all__r   r2   r1   <module>r      s   &   $   9 ! . R B 7 5 & 0	 	 	 7 
		H	%BII "7)* 7)tb0 bH
l H
V	. 		*J 		'D 	r2   