
    iL                        d dl Z d dlmZmZmZ d dlZd dlZd dlm	Z	m
Z
mZmZ ddlmZ ddlmZmZmZ ddlmZmZ ddlmZ dd	lmZmZmZmZmZ d
dlmZm Z  d
dl!m"Z"m#Z#m$Z$ ddlm%Z%  e       rd dl&Z& ejN                  e(      Z)d Z*d Z+d Z,	 d!dejZ                  deej\                     de/fdZ0d"dedejZ                  dej\                  fdZ1 G d de      Z2 G d de      Z3 G d de      Z4 G d d e      Z5y)#    N)ListOptionalUnion)CLIPTextModelCLIPTokenizerT5EncoderModelT5TokenizerFast   )
FrozenDict)VaeImageProcessoris_valid_imageis_valid_image_imagelist)FluxLoraLoaderMixinTextualInversionLoaderMixin)AutoencoderKL)USE_PEFT_BACKENDis_ftfy_availableloggingscale_lora_layersunscale_lora_layers   )ModularPipelineBlocksPipelineState)ComponentSpec
InputParamOutputParam   )FluxModularPipelinec                     t        j                  |       } t        j                  t        j                  |             } | j	                         S N)ftfyfix_texthtmlunescapestriptexts    s/home/obispo/Crisostomo_bridge/mision_env/lib/python3.12/site-packages/diffusers/modular_pipelines/flux/encoders.pybasic_cleanr)   '   s3    ==D==t,-D::<    c                 T    t        j                  dd|       } | j                         } | S )Nz\s+ )resubr%   r&   s    r(   whitespace_cleanr/   -   s$    66&#t$D::<DKr*   c                 .    t        t        |             } | S r    )r/   r)   r&   s    r(   prompt_cleanr1   3   s    K-.DKr*   encoder_output	generatorsample_modec                     t        | d      r |dk(  r| j                  j                  |      S t        | d      r|dk(  r| j                  j                         S t        | d      r| j                  S t        d      )Nlatent_distsampleargmaxlatentsz3Could not access latents of provided encoder_output)hasattrr6   r7   moder9   AttributeError)r2   r3   r4   s      r(   retrieve_latentsr=   9   st     ~}-+2I))00;;		/K84K))..00		+%%%RSSr*   vaeimagec                    t        |t              rat        |j                  d         D cg c](  }t	        | j                  |||dz          ||   |      * }}t        j                  |d      }nt	        | j                  |      ||      }|| j                  j                  z
  | j                  j                  z  }|S c c}w )Nr   r   )r3   r4   )dim)
isinstancelistrangeshaper=   encodetorchcatconfigshift_factorscaling_factor)r>   r?   r3   r4   iimage_latentss         r(   encode_vae_imagerN   F   s    )T" 5;;q>*
 SZZa!a%(89Yq\_jk
 
 		-Q7(E):i]hi"SZZ%<%<<

@Y@YYM
s   -Cc                       e Zd ZdZedefd       Zedee   fd       Z	edee
   fd       Zedee   fd       Zed        Z ej"                         ded	efd
       Zy)FluxProcessImagesInputStepfluxreturnc                      y)NzImage Preprocess step. selfs    r(   descriptionz&FluxProcessImagesInputStep.descriptionX   s    'r*   c           	      B    t        dt        t        ddd      d      gS )Nimage_processor   )vae_scale_factorvae_latent_channelsfrom_configrI   default_creation_methodr   r   r   rU   s    r(   expected_componentsz.FluxProcessImagesInputStep.expected_components\   s-     !!!rRT"UV(5	
 	
r*   c                 V    t        d      t        d      t        d      t        d      gS )Nresized_imager?   heightwidth)r   rU   s    r(   inputsz!FluxProcessImagesInputStep.inputsg   s'    ?+Z-@*XBVXbcjXkllr*   c                     t        d      gS Nprocessed_image)namer   rU   s    r(   intermediate_outputsz/FluxProcessImagesInputStep.intermediate_outputsk       !2344r*   c                     | | |dz  z  dk7  rt        d|dz   d|        | ||dz  z  dk7  rt        d|dz   d|       y y )Nr   r   zHeight must be divisible by z but is zWidth must be divisible by )
ValueErrorrd   re   r[   s      r(   check_inputsz'FluxProcessImagesInputStep.check_inputso   s    &,<q,@"AQ"F;<Lq<P;QQYZ`Yabcc*:Q*>!?1!D:;Ka;O:PPXY^X_`aa "Er*   
componentsstatec                 *   | j                  |      }|j                  |j                  t        d      |j                  s|j                  }| j	                  |j
                  |j                  |j                         |j
                  xs |j                  }|j                  xs |j                  }n(|j                  d   j                  \  }}|j                  }|j                  j                  |||      |_        | j                  ||       ||fS )Nz;`resized_image` and `image` cannot be None at the same timerp   r   )r?   rd   re   )get_block_staterc   r?   ro   rq   rd   re   r[   default_heightdefault_widthsizerY   
preprocessri   set_block_state)rV   rr   rs   block_stater?   rd   re   s          r(   __call__z#FluxProcessImagesInputStep.__call__w   s
   **51$$,1B1B1JZ[[$$,%%E"))1B1BU_UpUp   !''D:+D+DF%%A)A)AE'55a8==ME6--E&0&@&@&K&KRW`fns&K&t#UK05  r*   N)__name__
__module____qualname__
model_namepropertystrrW   r   r   ra   r   rf   r   rl   staticmethodrq   rG   no_gradr   r   r|   rT   r*   r(   rP   rP   U   s    J(S ( ( 
T-%8 
 
 mZ( m m 5d;&7 5 5 b b U]]_!#6 !} ! !r*   rP   c                       e Zd ZdZedefd       Zedee   fd       Z	edee
   fd       Zedee   fd       Z ej                         dedefd	       Zy
)!FluxKontextProcessImagesInputStepzflux-kontextrR   c                      	 y)NzImage preprocess step for Flux Kontext. The preprocessed image goes to the VAE.
Kontext works as a T2I model, too, in case no input image is provided.rT   rU   s    r(   rW   z-FluxKontextProcessImagesInputStep.description   s    U	
r*   c                 @    t        dt        t        ddi      d      gS )NrY   r[   rZ   r]   r^   r`   rU   s    r(   ra   z5FluxKontextProcessImagesInputStep.expected_components   s-     !!!#5r":;(5	
 	
r*   c                 <    t        d      t        dt        d      gS )Nr?   _auto_resizeT)	type_hintdefault)r   boolrU   s    r(   rf   z(FluxKontextProcessImagesInputStep.inputs   s    7#Z$X\%]^^r*   c                     t        d      gS rh   rk   rU   s    r(   rl   z6FluxKontextProcessImagesInputStep.intermediate_outputs   rm   r*   rr   rs   c                 d   ddl m} | j                  |      }|j                  }|d |_        n|j
                  j                  j                  }t        |      st        dt        |             t        |      r|g}|d   }|j
                  j                  |      \  }}	|	|z  |j                  }
|
rt        fd|D              \  }}	}|	|z  |z  }	||z  |z  }|j
                  j                  |||	      }|j
                  j!                  |||	      |_        | j#                  ||       ||fS )Nr
   )PREFERRED_KONTEXT_RESOLUTIONSz/Images must be image or list of images but are r   c              3   L   K   | ]  \  }}t        ||z  z
        ||f  y wr    )abs).0whaspect_ratios      r(   	<genexpr>z=FluxKontextProcessImagesInputStep.__call__.<locals>.<genexpr>   s/      3:>!QSA-.153s   !$)$pipelines.flux.pipeline_flux_kontextr   ru   r?   ri   rY   rI   r[   r   ro   typer   get_default_height_widthr   minresizery   rz   )rV   rr   rs   r   r{   imagesmultiple_ofimgimage_heightimage_widthr   _r   s               @r(   r|   z*FluxKontextProcessImagesInputStep.__call__   sI   Y**51"">*.K' %44;;LLK+F3 #RSWX^S_R`!abbf% )C(2(B(B([([\_(`%L+&5L&33L/2 3B_3 0,; &4{BK';6DL//66v|[YF*4*D*D*O*OPVXdfq*rK'UK05  r*   N)r}   r~   r   r   r   r   rW   r   r   ra   r   rf   r   rl   rG   r   r   r   r|   rT   r*   r(   r   r      s    J
S 
 
 
T-%8 
 
 _Z( _ _ 5d;&7 5 5 U]]_!!#6 !!} !! !!r*   r   c                        e Zd ZdZ	 ddededef fdZedefd       Zedee	   fd       Z
edee   fd	       Zedee   fd
       Z ej                          dededefd       Z xZS )FluxVaeEncoderDynamicSteprQ   
input_nameoutput_namer4   c                 L    || _         || _        || _        t        |           y)a9  Initialize a VAE encoder step for converting images to latent representations.

        Both the input and output names are configurable so this block can be configured to process to different image
        inputs (e.g., "processed_image" -> "image_latents", "processed_control_image" -> "control_image_latents").

        Args:
            input_name (str, optional): Name of the input image tensor. Defaults to "processed_image".
                Examples: "processed_image" or "processed_control_image"
            output_name (str, optional): Name of the output latent tensor. Defaults to "image_latents".
                Examples: "image_latents" or "control_image_latents"
            sample_mode (str, optional): Sampling mode to be used.

        Examples:
            # Basic usage with default settings (includes image processor): # FluxImageVaeEncoderDynamicStep()

            # Custom input/output names for control image: # FluxImageVaeEncoderDynamicStep(
                input_name="processed_control_image", output_name="control_image_latents"
            )
        N)_image_input_name_image_latents_output_namer4   super__init__)rV   r   r   r4   	__class__s       r(   r   z"FluxVaeEncoderDynamicStep.__init__   s(    , ",*5'&r*   rR   c                 <    d| j                    d| j                   dS )Nz'Dynamic VAE Encoder step that converts z into latent representations z.
)r   r   rU   s    r(   rW   z%FluxVaeEncoderDynamicStep.description   s>    89O9O8PPmnr  oN  oN  nO  OR  S  	Sr*   c                 (    t        dt              g}|S )Nr>   )r   r   )rV   rr   s     r(   ra   z-FluxVaeEncoderDynamicStep.expected_components   s    #E=9:
r*   c                 F    t        | j                        t        d      g}|S )Nr3   )r   r   )rV   rf   s     r(   rf   z FluxVaeEncoderDynamicStep.inputs   s!    T334j6MNr*   c                 P    t        | j                  t        j                  d      gS )Nz,The latents representing the reference image)r   rW   )r   r   rG   TensorrU   s    r(   rl   z.FluxVaeEncoderDynamicStep.intermediate_outputs   s)     //,,J
 	
r*   rr   rs   c                    | j                  |      }t        || j                        }|t        || j                  d        ny|j
                  }|j                  j                  }|j                  ||      }t        ||j                  |j                  | j                        }t        || j                  |       | j                  ||       ||fS )N)devicedtype)r?   r>   r3   r4   )ru   getattrr   setattrr   _execution_devicer>   r   torN   r3   r4   rz   )rV   rr   rs   r{   r?   r   r   rM   s           r(   r|   z"FluxVaeEncoderDynamicStep.__call__  s    **51T%;%;<=K!@!@$G11FNN((EHHF%H8E -;;P;P^b^n^nM K!@!@-PUK05  r*   )ri   rM   r7   )r}   r~   r   r   r   r   r   rW   r   r   ra   r   rf   r   rl   rG   r   r   r   r|   __classcell__)r   s   @r(   r   r      s    J ks@Cdg6 SS S S T-%8   Z(   
d;&7 
 
 U]]_!#6 !} !Q^ ! !r*   r   c                      e Zd ZdZedefd       Zedee   fd       Z	edee
   fd       Zedee   fd       Zed        Zedeeee   f   d	ed
ej&                  fd       Zedeeee   f   d
ej&                  fd       Ze	 	 	 	 	 ddeeee   f   deeee   f   d
eej&                     deej.                     deej.                     d	edee   fd       Z ej4                         dededefd       Zy)FluxTextEncoderSteprQ   rR   c                      y)NzMText Encoder step that generate text_embeddings to guide the image generationrT   rU   s    r(   rW   zFluxTextEncoderStep.description!  s    ^r*   c                 ~    t        dt              t        dt              t        dt              t        dt              gS )Ntext_encoder	tokenizertext_encoder_2tokenizer_2)r   r   r   r   r	   rU   s    r(   ra   z'FluxTextEncoderStep.expected_components%  s7     .-8+}5*N;-9	
 	
r*   c                 f    t        d      t        d      t        dt        dd      t        d      gS )Npromptprompt_2max_sequence_length   F)r   r   requiredjoint_attention_kwargs)r   intrU   s    r(   rf   zFluxTextEncoderStep.inputs.  s7     x z",SSXY/0	
 	
r*   c                 v    t        ddt        j                  d      t        ddt        j                  d      gS )Nprompt_embedsdenoiser_input_fieldsz2text embeddings used to guide the image generation)kwargs_typer   rW   pooled_prompt_embedsz9pooled text embeddings used to guide the image generation)r   rG   r   rU   s    r(   rl   z(FluxTextEncoderStep.intermediate_outputs7  s@     3,,P	 &3,,W	
 	
r*   c                     | j                   | j                  fD ]=  }|t        |t              rt        |t              r(t        dt        |              y )Nz@`prompt` or `prompt_2` has to be of type `str` or `list` but is )r   r   rB   r   rC   ro   r   )r{   r   s     r(   rq   z FluxTextEncoderStep.check_inputsH  sY    "));+?+?@ 	tF!:fc+B:V\^bKc #cdhiodpcq!rss	tr*   r   r   r   c           	         | j                   j                  }t        |t              r|gn|}t        | t              r| j                  || j                        }| j                  |d|dddd      }|j                  }| j                  |dd      j                  }|j                  d   |j                  d   k\  rXt        j                  ||      sB| j                  j                  |d d |d	z
  df         }t        j                  d
| d|        | j                  |j                  |      d      d   }	|	j                  ||      }	|	S )N
max_lengthTFpt)paddingr   
truncationreturn_lengthreturn_overflowing_tokensreturn_tensorslongestr   r   r   zXThe following part of your input was truncated because `max_sequence_length` is set to  	 tokens: output_hidden_statesr   r   r   )r   r   rB   r   r   maybe_convert_promptr   	input_idsrE   rG   equalbatch_decodeloggerwarningr   )
rr   r   r   r   r   text_inputstext_input_idsuntruncated_idsremoved_textr   s
             r(   _get_t5_prompt_embedsz)FluxTextEncoderStep._get_t5_prompt_embedsN  s`    ))//'4&&j"=>44VZ=S=STF ,, *&+ - 
 %..$00[_0`jj  $(<(<R(@@UcetIu%11>>qRehiRilnRnOn?opLNN'(	,A
 #11.2C2CF2Kbg1hijk%((uV(Dr*   c           	         t        |t              r|gn|}t        | t              r| j                  || j                        }| j	                  |d| j                  j
                  dddd      }|j                  }| j                  j
                  }| j	                  |dd      j                  }|j                  d   |j                  d   k\  rXt        j                  ||      sB| j                  j                  |d d |d	z
  df         }t        j                  d
| d|        | j                  |j                  |      d      }|j                  }|j                  | j                  j                   |      }|S )Nr   TFr   )r   r   r   r   r   r   r   r   r   r   z\The following part of your input was truncated because CLIP can only handle sequences up to r   r   r   )rB   r   r   r   r   model_max_lengthr   rE   rG   r   r   r   r   r   r   pooler_outputr   )	rr   r   r   r   r   tokenizer_max_lengthr   r   r   s	            r(   _get_clip_prompt_embedsz+FluxTextEncoderStep._get_clip_prompt_embedso  s   '4&&j"=>44VZ=Q=QRF ** !++<<&+ + 
 %..)33DD$..vyY].^hh  $(<(<R(@@UcetIu%//<<_QPdghPhkmPmMm=noLNN()<.B #//0A0A&0I`e/f &33%((z/F/F/L/LU[(\r*   Nr   r   r   
lora_scalec                    |xs | j                   }|gt        | t              rW|| _        | j                  t
        rt        | j                  |       | j                  t
        rt        | j                  |       t        |t              r|gn|}|L|xs |}t        |t              r|gn|}t        j                  | ||      }t        j                  | |||      }| j                  ,t        | t              rt
        rt        | j                  |       | j                  ,t        | t              rt
        rt        | j                  |       ||fS )N)r   r   )r   r   r   )r   rB   r   _lora_scaler   r   r   r   r   r   r   r   r   )rr   r   r   r   r   r   r   r   s           r(   encode_promptz!FluxTextEncoderStep.encode_prompt  sJ    7:77 !j=P&Q%/J" &&27G!*"9"9:F((49I!*";";ZH'4&& )6H%/#%>zHH $7#N#N $O $ 
 0EE$7	 F M "".*&9:?O#J$;$;ZH$$0*&9:?O#J$=$=zJ222r*   rr   rs   c           
         | j                  |      }| j                  |       |j                  |_        |j                  |j                  j                  dd       nd |_        | j                  ||j                  d d d |j                  |j                  |j                        \  |_
        |_        | j                  ||       ||fS )Nscale)r   r   r   r   r   r   r   )ru   rq   r   r   r   gettext_encoder_lora_scaler   r   r   r   r   rz   )rV   rr   rs   r{   s       r(   r|   zFluxTextEncoderStep.__call__  s     **51+&'99
 11= ..227DA 	+
 GKFXFX%%!%%% + ? ?":: GY 	G
C!;#C 	UK05  r*   )NNNr   N)r}   r~   r   r   r   r   rW   r   r   ra   r   rf   r   rl   r   rq   r   r   rG   r   r   r   r   FloatTensorfloatr   r   r   r   r|   rT   r*   r(   r   r     s   J_S _ _ 
T-%8 
 
 
Z( 
 
 
d;&7 
 
  t t
 !#tCy.1HKUZUaUa @ E#tCy.4I SXS_S_  B 
 *.59<@#&&*43c49n%43 T#Y'43 &	43
   1 1243 'u'8'8943 !43 UO43 43l U]]_!#6 !} !Q^ ! !r*   r   )Nr7   )r7   )6r#   typingr   r   r   regexr-   rG   transformersr   r   r   r	   configuration_utilsr   rY   r   r   r   loadersr   r   modelsr   utilsr   r   r   r   r   modular_pipeliner   r   modular_pipeline_utilsr   r   r   r   r!   
get_loggerr}   r   r)   r/   r1   r   	Generatorr   r=   rN   rP   r   r   r   rT   r*   r(   <module>r     s     ( (   V V - Z Z G # i i C K K 1  
		H	% ck
TLL
T-5eoo-F
T\_
T-   7!!6 7!t?!(= ?!DJ! 5 J!ZE!/ E!r*   