
    i                     T    d dl mZmZmZ d dlmZ ddlmZ ddiZ G d de      Z	dgZ
y	)
    )	Tokenizerdecodersnormalizers)BPE   )TokenizersBackendtokenizer_fileztokenizer.jsonc                        e Zd ZdZeZdZddgZeZ		 	 	 	 	 	 	 dde
ee
ef   z  dz  de
ee
   z  dz  de
d	e
d
e
de
de
f fdZ xZS )Siglip2TokenizerzN
    Gemma tokenizer + SigLIP2 training default: lowercase normalization.
    left	input_idsattention_maskNvocabmerges	unk_token	bos_token	eos_token	pad_token
mask_tokenc                    |9t        |      dt        |      dt        |      dt        |      dt        |      di}|| _        |xs g | _        t        t	        | j                  | j                  dt        |      d d            | _        t        j                  t        j                  dd	      t        j                         t        j                         g      | j
                  _        t        j                  d	d      | j
                  _        t        
| <  d|||||d
| t!        | d      rJt#        | j$                  t&              r0| j$                  j)                  d| j*                  j,                         t/        | dd       }	|	F|	j                  9t        j                  t        j0                         |	j                  g      |	_        y y y )Nr         r      T)r   r   fuse_unkr   dropoutbyte_fallbacku   ▁ )r   r   r   r   r   init_kwargstokenizer_class
_tokenizer )str_vocab_mergesr   r   r    r   SequenceReplaceByteFallbackFusedecoderr   
normalizersuper__init__hasattr
isinstancer   dict
setdefault	__class____name__getattr	Lowercase)selfr   r   r   r   r   r   r   kwargsbackendr1   s             z/home/obispo/Crisostomo_bridge/mision_env/lib/python3.12/site-packages/transformers/models/siglip2/tokenization_siglip2.pyr,   zSiglip2Tokenizer.__init__(   s    =IIIIJE |#kk||i."	
 #+"3"3eS)8+@+@+BHMMOT#
 &1%8%8e%D" 	
!	
 	
 4'Jt7G7G,N''(94>>;R;RS$d37#5#5#A!,!5!5{7L7L7NPWPbPb6c!dG $B    )NNz<unk>z<bos>z<eos>z<pad>z<mask>)r2   
__module____qualname____doc__VOCAB_FILES_NAMESvocab_files_namespadding_sidemodel_input_namesr   modelr"   r/   intlistr,   __classcell__)r1   s   @r8   r   r      s     *L$&67E .2)-    "4eT#s(^#d*4e d3i$&4e 	4e
 4e 4e 4e 4e 4er9   r   N)
tokenizersr   r   r   tokenizers.modelsr   tokenization_utils_tokenizersr   r=   r   __all__r!   r9   r8   <module>rI      s;   * 8 7 ! > &'78 >e( >eB 
r9   