
    ei                    r    d dl mZ 	 d dlmZ d dlZd dlZd dlm	Z	  G d de	      Z
y# e$ r	 d dlmZ Y 'w xY w)    )annotations)SelfN)InputModulec                       e Zd ZU dZded<   dd fdZddZedd       Zej                  dd       ZddZ
ddd	Zedd
       ZddddZe	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 dd       Z xZS )	CLIPModelTboolsave_in_rootc                    t         |           ||}t        j                  j	                  |      | _        t        j                  j	                  |      | _        y N)super__init__transformersr   from_pretrainedmodelCLIPProcessor	processor)self
model_nameprocessor_name	__class__s      p/home/obispo/Crisostomo_bridge/mision_env/lib/python3.12/site-packages/sentence_transformers/models/CLIPModel.pyr   zCLIPModel.__init__   sI    !'N!++;;JG
%33CCNS    c                     y)NzCLIPModel() r   s    r   __repr__zCLIPModel.__repr__   s    r   c                B    | j                   j                  j                  S r   r   	tokenizermodel_max_lengthr   s    r   max_seq_lengthzCLIPModel.max_seq_length   s    ~~''888r   c                :    || j                   j                  _        y r   r   )r   values     r   r!   zCLIPModel.max_seq_length!   s    49  1r   c           
        g }g }d|v r=| j                   j                  |d         }| j                   j                  |d         }d|v r| j                   j                  |j	                  d      |j	                  dd       |j	                  dd       |j	                  dd       |j	                  dd       	      }| j                   j                  |d         }g }t        |      }t        |      }t        |d
         D ]?  \  }	}
|
dk(  r|j                  t        |             &|j                  t        |             A t        j                  |      j                         |d<   |S )Npixel_values)r%      	input_idsattention_maskposition_idsoutput_attentionsoutput_hidden_states)r'   r(   r)   r*   r+   image_text_infor   sentence_embedding)r   vision_modelvisual_projection
text_modelgettext_projectioniter	enumerateappendnexttorchstackfloat)r   featuresimage_embedstext_embedsvision_outputstext_outputsr-   image_featurestext_featuresidx
input_types              r   forwardzCLIPModel.forward%   sU   X%!ZZ44(>BZ4[N::77q8IJL("::00",,{3'||,<dC%\\.$?"*,,/BD"I%-\\2H$%O 1 L **44\!_EKl+[)(2C)DE 	?OCQ"))$~*>?"))$}*=>		? */5G)H)N)N)P%&r   c                   ddl m} g }g }g }t        |      D ]V  \  }}t        ||      r#|j	                  |       |j	                  d       5|j	                  |       |j	                  d       X i }	t        |      r| j                  j                  ||dd      }	t        |      r,| j                  j                  |d      }
|
j                  |	d<   ||	d	<   t        |	      S )
Nr   )Imager&   Tpt)padding
truncationreturn_tensors)rI   r%   r,   )	PIL.ImagerE   r4   
isinstancer5   lenr   r   image_processorr%   dict)r   textsrG   rE   imagestexts_valuesr,   rA   dataencodingr?   s              r   tokenizezCLIPModel.tokenizeE   s    #"5) 	*IC$&d#&&q)##D)&&q)	* |~~//gZ^os/tHv;!^^;;FSW;XN'5'B'BH^$&5"#H~r   c                    | j                   S r   )r   r   s    r   r   zCLIPModel.tokenizer`   s    ~~r   safe_serializationc               t    | j                   j                  ||       | j                  j                  |       y )NrV   )r   save_pretrainedr   )r   output_pathrW   argskwargss        r   savezCLIPModel.saved   s,    

"";CU"V&&{3r   c                @    | j                  ||||||      } | |      S )N)model_name_or_path	subfoldertokencache_folderrevisionlocal_files_only)load_dir_path)	clsr_   r`   ra   rb   rc   rd   r\   
local_paths	            r   loadzCLIPModel.loadh   s7     &&1%- ' 

 :r   )zopenai/clip-vit-base-patch32N)r   strreturnNone)rj   ri   )rj   int)r#   rl   rj   rk   )r:   dict[str, torch.Tensor]rj   rm   )T)rG   z
str | boolrj   rm   )rj   ztransformers.CLIPProcessor)rZ   ri   rW   r   rj   rk   ) NNNF)r_   ri   r`   ri   ra   zbool | str | Nonerb   
str | Nonerc   ro   rd   r   rj   r   )__name__
__module____qualname__r	   __annotations__r   r   propertyr!   setterrC   rT   r   r]   classmethodrh   __classcell__)r   s   @r   r   r      s    L$T 9 9 : :@6   HL 4  #'#'#!&  !	
 !   
 r   r   )
__future__r   typingr   ImportErrortyping_extensionsr7   r   #sentence_transformers.models.Routerr   r   r   r   r   <module>r}      s=    "'   ;m m  '&'s   ( 66