
    i                     f   d dl mZmZmZmZ d dlZd dlZd dlmZm	Z	m
Z
 ddlmZ ddlmZ ddlmZmZmZmZ ddlmZmZmZ dd	lmZ dd
lmZ ddlmZ ddlmZm Z  ddl!m"Z"m#Z#m$Z$m%Z% ddlm&Z&  ejN                  e(      Z)dejT                  dejT                  fdZ+	 	 	 	 	 d:dee,ee,   f   de,de-de-deej\                     f
dZ/	 	 	 	 	 d;dee,ee,   f   deejT                     de,de-deej\                     f
dZ0	 	 	 	 	 	 d<dee,ee,   f   deeejT                  eejb                  jb                     ejb                  jb                  f      de,de,de-deej\                     fdZ2	 d=dejT                  deejf                     d e,fd!Z4	 	 d>dejT                  d"edejf                  dej\                  d#ejj                  d$e-d e,fd%Z6 G d& d'e      Z7 G d( d)e7      Z8 G d* d+e      Z9 G d, d-e      Z: G d. d/e:      Z; G d0 d1e      Z< G d2 d3e      Z= G d4 d5e=      Z> G d6 d7e      Z? G d8 d9e      Z@y)?    )DictListOptionalUnionN)"Qwen2_5_VLForConditionalGenerationQwen2TokenizerQwen2VLProcessor   )
FrozenDict)ClassifierFreeGuidance)InpaintProcessorVaeImageProcessoris_valid_imageis_valid_image_imagelist)AutoencoderKLQwenImageQwenImageControlNetModelQwenImageMultiControlNetModel)calculate_dimensions)logging)unwrap_module   )ModularPipelineBlocksPipelineState)ComponentSpec
ConfigSpec
InputParamOutputParam   )QwenImageModularPipelinehidden_statesmaskc                     |j                         }|j                  d      }| |   }t        j                  ||j	                         d      }|S )Nr   dimr   )boolsumtorchsplittolist)r    r!   	bool_maskvalid_lengthsselectedsplit_results         x/home/obispo/Crisostomo_bridge/mision_env/lib/python3.12/site-packages/diffusers/modular_pipelines/qwenimage/encoders.py_extract_masked_hiddenr/   $   sH    		IMMaM(MY'H;;x)=)=)?QGL    promptprompt_template_encode prompt_template_encode_start_idxtokenizer_max_lengthdevicec                 .   t        |t              r|gn|}|}|}|D 	cg c]  }	|j                  |	       }
}	 ||
||z   ddd      j                  |      } | |j                  |j
                  d      }|j                  d   }t        ||j
                        }|D 	cg c]  }	|	|d  	 }}	|D 	cg c]A  }	t        j                  |	j                  d      t        j                  |	j                        C }}	t        |D 	cg c]  }	|	j                  d       c}	      }t        j                  |D cg c]J  }t        j                  ||j!                  ||j                  d      z
  |j                  d            g      L c}      }t        j                  |D cg c]:  }t        j                  ||j!                  ||j                  d      z
        g      < c}      }|j                  |	      }||fS c c}	w c c}	w c c}	w c c}	w c c}w c c}w )
NTpt)
max_lengthpadding
truncationreturn_tensors)	input_idsattention_maskoutput_hidden_statesr   dtyper5   r   r5   )
isinstancestrformattor<   r=   r    r/   r'   onessizelongr5   maxstackcat	new_zeros)text_encoder	tokenizerr1   r2   r3   r4   r5   templatedrop_idxetxt
txt_tokensencoder_hidden_statesr    split_hidden_statesattn_mask_listmax_seq_lenuprompt_embedsencoder_attention_masks                       r.   get_qwen_prompt_embedsr\   ,   s    $FC0fXfF%H/H'-
.!8??1
.C
.,x7RVgkbj  )&&!00!
 *77;M0
@Y@YZ1DEA1XY<EEXklSTejj%**QXXNlNl*=>Qqvvay>?KKKRefQAq{{;#:AFF1IFG	HfM #[[GUV!Aq{{;#:;<	=V "$$F$3M0001 / Fl>f 	Ws%   G9G>1AHH4AH?Himagec                 R   t        |t              r|gn|}|}|}|D 	cg c]  }	|j                  |	       }
}	 ||
|dd      j                  |      } | |j                  |j
                  |j                  |j                  d      }|j                  d   }t        ||j
                        }|D 	cg c]  }	|	|d  	 }}	|D 	cg c]A  }	t        j                  |	j                  d      t        j                  |	j                        C }}	t        |D 	cg c]  }	|	j                  d       c}	      }t        j                   |D cg c]J  }t        j"                  ||j%                  ||j                  d      z
  |j                  d            g      L c}      }t        j                   |D cg c]:  }t        j"                  ||j%                  ||j                  d      z
        g      < c}      }|j                  |	      }||fS c c}	w c c}	w c c}	w c c}	w c c}w c c}w )
NTr7   textimagesr9   r;   r<   r=   pixel_valuesimage_grid_thwr>   r?   r   r@   r   rB   )rC   rD   rE   rF   r<   r=   rc   rd   r    r/   r'   rG   rH   rI   r5   rJ   rK   rL   rM   )rN   	processorr1   r]   r2   r3   r5   rP   rQ   rR   rS   model_inputsoutputsr    rV   rW   rX   rY   rZ   r[   s                       r.   get_qwen_prompt_embeds_editrh   T   s    $FC0fXfF%H/H'-
.!8??1
.C
.	
 	bj  ((#22!..#22!G ))"-M0@[@[\1DEA1XY<EEXklSTejj%**QXXNlNl*=>Qqvvay>?KKKRefQAq{{;#:AFF1IFG	HfM #[[GUV!Aq{{;#:;<	=V "$$F$3M000? /& Fl>f 	Ws%   H1HAHHAH/?H$img_template_encodec                    t        |t              r|gn|}t        |t              r-d}t        |      D ]  \  }	}
||j	                  |	dz         z  } n||j	                  d      }nd}|}|}|D cg c]  }|j	                  ||z          }} |||dd      j                  |      } | |j                  |j                  |j                  |j                  d      }|j                  d   }t        ||j                        }|D cg c]  }||d  	 }}|D cg c]A  }t        j                  |j                  d      t        j                  |j                   	      C }}t#        |D cg c]  }|j                  d       c}      }t        j$                  |D cg c]J  }t        j&                  ||j)                  ||j                  d      z
  |j                  d            g      L c}      }t        j$                  |D cg c]:  }t        j&                  ||j)                  ||j                  d      z
        g      < c}      }|j                  |
      }||fS c c}w c c}w c c}w c c}w c c}w c c}w )N r   Tr7   r_   rb   r?   r   r@   rB   )rC   rD   list	enumeraterE   rF   r<   r=   rc   rd   r    r/   r'   rG   rH   rI   r5   rJ   rK   rL   rM   )rN   re   r1   r]   r2   ri   r3   r5   base_img_promptiimgrP   rQ   rR   rS   rf   rg   r    rV   rW   rX   rY   rZ   r[   s                           r.    get_qwen_prompt_embeds_edit_plusrq      sM    $FC0fXfF%& 	AFAs299!a%@@O	A		-44Q7%H/H9?
@A8???Q./
@C
@	
 	bj  ((#22!..#22!G ))"-M0@[@[\1DEA1XY<EEXklSTejj%**QXXNlNl*=>Qqvvay>?KKKRefQAq{{;#:AFF1IFG	HfM #[[GUV!Aq{{;#:;<	=V "$$F$3M000; A$ Fl>f 	Ws&   1I!I&AI+*I0AI5?I:encoder_output	generatorsample_modec                     t        | d      r |dk(  r| j                  j                  |      S t        | d      r|dk(  r| j                  j                         S t        | d      r| j                  S t        d      )Nlatent_distsampleargmaxlatentsz3Could not access latents of provided encoder_output)hasattrrv   rw   modery   AttributeError)rr   rs   rt   s      r.   retrieve_latentsr}      st     ~}-+2I))00;;		/K84K))..00		+%%%RSSr0   vaerA   latent_channelsc                    t        | t        j                        st        dt	        |        d      | j                         dk(  r| j                  d      } n0| j                         dk7  rt        d| j                          d      | j                  ||      } t        |t              rat        | j                  d         D cg c](  }t        |j                  | ||d	z          ||   |
      * }}t        j                  |d      }nt        |j                  |       ||
      }t        j                  |j                  j                         j#                  d	|d	d	d	      j                  |j$                  |j&                        }	t        j                  |j                  j(                        j#                  d	|d	d	d	      j                  |j$                  |j&                        }
||	z
  |
z  }|S c c}w )Nz#Expected image to be a tensor, got .   r      z Expected image dims 4 or 5, got )r5   rA   r   r   )rs   rt   r#   )rC   r'   Tensor
ValueErrortyper$   	unsqueezerF   rl   rangeshaper}   encoderL   tensorconfiglatents_meanviewr5   rA   latents_std)r]   r~   rs   r5   rA   r   rt   ro   image_latentsr   r   s              r.   encode_vae_imager      s    eU\\*>tE{m1MNN yy{a"		;EIIK=JKKHHF%H0E)T" 5;;q>*
 SZZa!a%(89Yq\_jk
 
 		-Q7(E):i]hiSZZ,,-	a!Q	*	M  -"5"5	6  	SZZ++,	a!Q	*	M  -"5"5	6 
 #\1[@M'
s   -G3c                        e Zd ZdZddedef fdZedefd       Zedee	   fd       Z
edee   fd       Zedee   fd	       Z ej                          d
edefd       Z xZS )QwenImageEditResizeDynamicStep	qwenimage
input_nameoutput_namec                     t        |t              rt        |t              s#t        dt        |       dt        |             || _        || _        t        |           y)a  Create a configurable step for resizing images to the target area (1024 * 1024) while maintaining the aspect ratio.

        This block resizes an input image tensor and exposes the resized result under configurable input and output
        names. Use this when you need to wire the resize step to different image fields (e.g., "image",
        "control_image")

        Args:
            input_name (str, optional): Name of the image field to read from the
                pipeline state. Defaults to "image".
            output_name (str, optional): Name of the resized image field to write
                back to the pipeline state. Defaults to "resized_image".
        3input_name and output_name must be strings but are  and N)rC   rD   r   r   _image_input_name_resized_image_output_namesuper__init__selfr   r   	__class__s      r.   r   z'QwenImageEditResizeDynamicStep.__init__   sa     *c**[#2NEd:FVEWW\]abm]n\op  ",*5'r0   returnc                 "    d| j                    dS )Nz"Image Resize step that resize the zE to the target area (1024 * 1024) while maintaining the aspect ratio.)r   r   s    r.   descriptionz*QwenImageEditResizeDynamicStep.description  s#    3D4J4J3K  LQ  R  	Rr0   c                 @    t        dt        t        ddi      d      gS )Nimage_resize_processorvae_scale_factor   from_configr   default_creation_methodr   r   r   r   s    r.   expected_componentsz2QwenImageEditResizeDynamicStep.expected_components  s-     (!!#5r":;(5	
 	
r0   c                 R    t        | j                  dt        j                  d      gS )NTzThe image to resizenamerequired	type_hintr   )r   r   r'   r   r   s    r.   inputsz%QwenImageEditResizeDynamicStep.inputs  s*     ++dell`u
 	
r0   c                 r    t        | j                  t        t        j                  j                     d      gS )NzThe resized imagesr   r   r   )r   r   r   PILImager   s    r.   intermediate_outputsz3QwenImageEditResizeDynamicStep.intermediate_outputs#  s1     44SYY__@Ucw
 	
r0   
componentsstatec                    | j                  |      }t        || j                        }t        |      st	        dt        |             t        |      r|g}|d   j                  \  }}t        d||z        \  }}}	|D 
cg c]   }
|j                  j                  |
||      " }}
t        || j                  |       | j                  ||       ||fS c c}
w )N/Images must be image or list of images but are r      )heightwidth)get_block_stategetattrr   r   r   r   r   rH   r   r   resizesetattrr   set_block_state)r   r   r   block_statera   image_widthimage_heightcalculated_widthcalculated_height_r]   resized_imagess               r.   __call__z'QwenImageEditResizeDynamicStep.__call__+  s    **51d&<&<='/NtTZ|n]^^&!XF$*1INN!\1EkS^amSm1n.+Q  
 --44UCT\l4m
 

 	T<<nMUK05  
s   %C)r]   resized_image__name__
__module____qualname__
model_namerD   r   propertyr   r   r   r   r   r   r   r   r'   no_gradr   r   r   __classcell__r   s   @r.   r   r      s    J3 s * RS R R 
T-%8 
 
 
Z( 
 
 
d;&7 
 
 U]]_!#; !M ! !r0   r   c                        e Zd ZdZ	 	 	 ddededef fdZedee   f fd       Z	 e
j                         ded	efd
       Z xZS )"QwenImageEditPlusResizeDynamicStepr   r   r   vae_image_output_namec                     t        |t              rt        |t              s#t        dt        |       dt        |             d| _        || _        || _        || _        t        | %          y)a  Create a configurable step for resizing images to the target area (1024 * 1024) while maintaining the aspect ratio.

        This block resizes an input image or a list input images and exposes the resized result under configurable
        input and output names. Use this when you need to wire the resize step to different image fields (e.g.,
        "image", "control_image")

        Args:
            input_name (str, optional): Name of the image field to read from the
                pipeline state. Defaults to "image".
            output_name (str, optional): Name of the resized image field to write
                back to the pipeline state. Defaults to "resized_image".
            vae_image_output_name (str, optional): Name of the image field
                to write back to the pipeline state. This is used by the VAE encoder step later on. QwenImage Edit Plus
                processes the input image(s) differently for the VL and the VAE.
        r   r   i @ N)
rC   rD   r   r   condition_image_sizer   r   _vae_image_output_namer   r   )r   r   r   r   r   s       r.   r   z+QwenImageEditPlusResizeDynamicStep.__init__G  sq    * *c**[#2NEd:FVEWW\]abm]n\op  %.!!+*5'&;#r0   r   c                     t         |   t        | j                  t        t
        j                  j                     d      gz   S )NzIThe images to be processed which will be further used by the VAE encoder.r   )r   r   r   r   r   r   r   )r   r   s    r.   r   z7QwenImageEditPlusResizeDynamicStep.intermediate_outputsf  s>    w+00syy/g/
 
 	
r0   r   r   c                    | j                  |      }t        || j                        }t        |      st	        dt        |             t        |t        j                        s7t        |t        j                  j                        rt        |t              s|g}g }g }|D ]k  }|j                  \  }}	t        | j                  ||	z        \  }
}}|j                  |j                   j#                  |||
             |j                  |       m t%        || j&                  |       t%        || j(                  |       | j+                  ||       ||fS )Nr   )r   r   r   r   r   r   rC   r'   r   r   r   rl   rH   r   r   appendr   r   r   r   r   r   )r   r   r   r   ra   condition_images
vae_imagesrp   r   r   condition_widthcondition_heightr   s                r.   r   z+QwenImageEditPlusResizeDynamicStep.__call__p  s:   **51d&<&<='/NtTZ|n]^^ 65<<06399??3vt,XF 
 	#C(+%K3G));+E40O-q ##J$E$E$L$LSRbds$tuc"	# 	T<<>NOT88*EUK05  r0   )r]   r   	vae_image)r   r   r   r   rD   r   r   r   r   r   r'   r   r   r   r   r   r   s   @r.   r   r   D  s    J "*%0	   #	> 
d;&7 
 
 U]]_!#; !M ! !r0   r   c                       e Zd ZdZedefd       Zedee   fd       Z	edee
   fd       Zedee   fd       Zedee   fd       Zed        Z ej&                         d	ed
efd       Zy)QwenImageTextEncoderStepr   r   c                      y)NzMText Encoder step that generate text_embeddings to guide the image generation r   s    r.   r   z$QwenImageTextEncoderStep.description  s    ^r0   c           
          t        dt        d      t        dt        d      t        dt        t	        ddi      d	
      gS )NrN   zThe text encoder to use)r   rO   zThe tokenizer to useguiderguidance_scale      @r   r   )r   r   r   r   r   r   s    r.   r   z,QwenImageTextEncoderStep.expected_components  sH     .*LZst+~CYZ&!#3S"9:(5		
 		
r0   c                 N    t        dd      t        dd      t        dd      gS )Nr2   <|im_start|>system
Describe the image by detailing the color, shape, size, texture, quantity, text, spatial relationships of the objects and background:<|im_end|>
<|im_start|>user
{}<|im_end|>
<|im_start|>assistant
r   defaultr3   "   r4      r   r   s    r.   expected_configsz)QwenImageTextEncoderStep.expected_configs  s7     - w >K2DA
 	
r0   c                 p    t        ddt        d      t        dt        d      t        dt        d	d
      gS )Nr1   TThe prompt to encoder   negative_promptThe negative prompt to encoder   max_sequence_lengthzThe max sequence length to user   )r   r   r   r   )r   rD   intr   s    r.   r   zQwenImageTextEncoderStep.inputs  s>     HtsPfg-Jij*cGgqu
 	
r0   c           	          t        ddt        j                  d      t        ddt        j                  d      t        ddt        j                  d      t        d	dt        j                  d
      gS NrZ   denoiser_input_fieldszThe prompt embeddings)r   kwargs_typer   r   prompt_embeds_maskzThe encoder attention masknegative_prompt_embedszThe negative prompt embeddingsnegative_prompt_embeds_maskz#The negative prompt embeddings maskr   r'   r   r   s    r.   r   z-QwenImageTextEncoderStep.intermediate_outputs  t     $3,,3	 )3,,8	 -3,,<	 23,,A	'
 	
r0   c                    t        | t              s't        | t              st        dt	        |              |7t        |t              s't        |t              st        dt	        |             ||dkD  rt        d|       y y )N2`prompt` has to be of type `str` or `list` but is ;`negative_prompt` has to be of type `str` or `list` but is r   z9`max_sequence_length` cannot be greater than 1024 but is rC   rD   rl   r   r   )r1   r   r   s      r.   check_inputsz%QwenImageTextEncoderStep.check_inputs  s    &#&z&$/GQRVW]R^Q_`aa '45Z[_`o[pZqrss*/BT/IXYlXmnoo 0J*r0   r   r   c           	         | j                  |      }|j                  }| j                  |j                  |j                  |j
                         t        |j                  |j                  |j                  |j                  j                  |j                  j                  |j                  j                  |      \  |_        |_        |j                  d d d |j
                  f   |_        |j                  d d d |j
                  f   |_        d |_        d |_        |j"                  r|j                  xs d}t        |j                  |j                  ||j                  j                  |j                  j                  |j                  j                  |      \  |_        |_        |j                  d d d |j
                  f   |_        |j                   d d d |j
                  f   |_        | j%                  ||       ||fS )N)r1   r2   r3   r4   r5   rk   )r   _execution_devicer  r1   r   r   r\   rN   rO   r   r2   r3   r4   rZ   r   r   r  requires_unconditional_embedsr   r   r   r   r   r5   r   s         r.   r   z!QwenImageTextEncoderStep.__call__  s   **51--+,,k.I.I;KjKjkDZ##  %%#-#4#4#K#K-7->->-_-_!+!2!2!G!GE
A!;#A %0$=$=aAb;CbCbAb>b$c!)4)G)GKl[MlMlKlHl)m&-1*26/33)99?ROZp''$$&'1'8'8'O'O1;1B1B1c1c%/%6%6%K%K[WK.0W 2=1S1S4[44442K. 7B6]6]4[44447K3 	UK05  r0   Nr   r   r   r   r   rD   r   r   r   r   r   r   r   r   r   r   staticmethodr  r'   r   r   r   r   r   r0   r.   r   r     s    J_S _ _ 

T-%8 

 

 
$z"2 
 
 
Z( 
 
 
d;&7 
 
8 p p U]]_(!#; (!M (! (!r0   r   c                       e Zd ZdZedefd       Zedee   fd       Z	edee
   fd       Zedee   fd       Zedee   fd       Zed        Z ej&                         d	ed
efd       Zy)QwenImageEditTextEncoderStepr   r   c                      y)NzxText Encoder step that processes both prompt and image together to generate text embeddings for guiding image generationr   r   s    r.   r   z(QwenImageEditTextEncoderStep.description  s     Jr0   c           
      |    t        dt              t        dt              t        dt        t	        ddi      d      gS )NrN   re   r   r   r   r   r   )r   r   r	   r   r   r   s    r.   r   z0QwenImageEditTextEncoderStep.expected_components  sC     .*LM+'78&!#3S"9:(5		
 		
r0   c                 6    t        dd      t        dd      gS )Nr2     <|im_start|>system
Describe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate.<|im_end|>
<|im_start|>user
<|vision_start|><|image_pad|><|vision_end|>{}<|im_end|>
<|im_start|>assistant
r   r3   @   r   r   s    r.   r   z-QwenImageEditTextEncoderStep.expected_configs&  s,     - M >K
 	
r0   c                     t        ddt        d      t        dt        d      t        ddt        j                  d	      gS )
Nr1   Tr   r   r   r   r   r   z?The image prompt to encode, should be resized using resize step)r   rD   r'   r   r   s    r.   r   z#QwenImageEditTextEncoderStep.inputs0  sC     HtsPfg-Jij$,,]		
 		
r0   c           	          t        ddt        j                  d      t        ddt        j                  d      t        ddt        j                  d      t        d	dt        j                  d
      gS r   r  r   s    r.   r   z1QwenImageEditTextEncoderStep.intermediate_outputs=  r  r0   c                     t        | t              s't        | t              st        dt	        |              |9t        |t              s(t        |t              st        dt	        |             y y y )Nr  r  r  )r1   r   s     r.   r  z)QwenImageEditTextEncoderStep.check_inputsZ  sv    &#&z&$/GQRVW]R^Q_`aa '45Z[_`o[pZqrss 6 5 (r0   r   r   c           	         | j                  |      }| j                  |j                  |j                         |j                  }t        |j                  |j                  |j                  |j                  |j                  j                  |j                  j                  |      \  |_        |_        d |_        d |_        |j                   ru|j                  xs d}t        |j                  |j                  ||j                  |j                  j                  |j                  j                  |      \  |_        |_        | j#                  ||       ||fS )N)r1   r]   r2   r3   r5    )r   r  r1   r   r
  rh   rN   re   r   r   r2   r3   rZ   r   r   r  r  r   r  s         r.   r   z%QwenImageEditTextEncoderStep.__call__f  s1   **51+,,k.I.IJ--D_##  %%++#-#4#4#K#K-7->->-_-_E
A!;#A .2*26/33)99@SOZu''$$&!//'1'8'8'O'O1;1B1B1c1c[WK.0W 	UK05  r0   Nr  r   r0   r.   r  r    s    JJS J J 

T-%8 

 

 
$z"2 
 
 

Z( 

 

 
d;&7 
 
8 	t 	t U]]_ !#;  !M  !  !r0   r  c                   d    e Zd ZdZedee   fd       Z ej                         de
defd       Zy) QwenImageEditPlusTextEncoderStepr   r   c                 N    t        dd      t        dd      t        dd      gS )Nr2     <|im_start|>system
Describe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate.<|im_end|>
<|im_start|>user
{}<|im_end|>
<|im_start|>assistant
r   ri   7Picture {}: <|vision_start|><|image_pad|><|vision_end|>r3   r  r   r   s    r.   r   z1QwenImageEditPlusTextEncoderStep.expected_configs  s<     - b *Q >K

 
	
r0   r   r   c           
         | j                  |      }| j                  |j                  |j                         |j                  }t        |j                  |j                  |j                  |j                  |j                  j                  |j                  j                  |j                  j                  |      \  |_        |_        d |_        d |_        |j"                  r|j                  xs d}t        |j                  |j                  ||j                  |j                  j                  |j                  j                  |j                  j                  |      \  |_        |_        | j%                  ||       ||fS )N)r1   r]   r2   ri   r3   r5   r  )r   r  r1   r   r
  rq   rN   re   r   r   r2   ri   r3   rZ   r   r   r  r  r   r  s         r.   r   z)QwenImageEditPlusTextEncoderStep.__call__  sQ   **51+,,k.I.IJ--Dd##  %%++#-#4#4#K#K * 1 1 E E-7->->-_-_	E
A!;#A .2*26/33)99@SO0++((*%33+5+<+<+S+S(2(9(9(M(M5?5F5F5g5g!	 XK.0W 	UK05  r0   N)r   r   r   r   r   r   r   r   r'   r   r   r   r   r   r0   r.   r  r    sP    J
$z"2 
 
 U]]_$!#; $!M $! $!r0   r  c                       e Zd ZdZedefd       Zedee   fd       Z	edee
   fd       Zedee   fd       Zed        Z ej"                         ded	efd
       Zy)&QwenImageInpaintProcessImagesInputStepr   r   c                      y)NzImage Preprocess step for inpainting task. This processes the image and mask inputs together. Images can be resized first using QwenImageEditResizeDynamicStep.r   r   s    r.   r   z2QwenImageInpaintProcessImagesInputStep.description  s     qr0   c                 @    t        dt        t        ddi      d      gS )Nimage_mask_processorr   r   r   r   )r   r   r   r   s    r.   r   z:QwenImageInpaintProcessImagesInputStep.expected_components  s-     & !#5r":;(5	
 	
r0   c                     t        dd      t        d      t        d      t        d      t        d      t        d      gS )	N
mask_imageTr   r   r]   r   r   padding_mask_cropr   r   s    r.   r   z-QwenImageInpaintProcessImagesInputStep.inputs  sA     |d3'wx w*+
 	
r0   c                 T    t        d      t        d      t        dt        d      gS )Nprocessed_imager   processed_mask_imagemask_overlay_kwargsz=The kwargs for the postprocess step to apply the mask overlayr   )r   r   r   s    r.   r   z;QwenImageInpaintProcessImagesInputStep.intermediate_outputs  s1     ./34*[
 	
r0   c                     | | |dz  z  dk7  rt        d|dz   d|        | ||dz  z  dk7  rt        d|dz   d|       y y Nr   r   zHeight must be divisible by z but is zWidth must be divisible by r   r   r   r   s      r.   r  z3QwenImageInpaintProcessImagesInputStep.check_inputs      &,<q,@"AQ"F;<Lq<P;QQYZ`Yabcc*:Q*>!?1!D:;Ka;O:PPXY^X_`aa "Er0   r   r   c                 r   | j                  |      }|j                  |j                  t        d      |j                  s|j                  }| j	                  |j
                  |j                  |j                         |j
                  xs |j                  }|j                  xs |j                  }n(|j                  d   j                  \  }}|j                  }|j                  j                  ||j                  |||j                        \  |_        |_        |_        | j%                  ||       ||fS )N7resized_image and image cannot be None at the same timer3  r   )r]   r!   r   r   r)  )r   r   r]   r   r  r   r   r   default_heightdefault_widthrH   r%  
preprocessr'  r)  r,  r.  r/  r   r   r   r   r   r]   r   r   s          r.   r   z/QwenImageInpaintProcessImagesInputStep.__call__  s4   **51$$,1B1B1JVWW$$,%%E"))1B1BU_UpUp   !''D:+D+DF%%A)A)AE'55a8==ME6--E ++66 ++"-"?"? 7  	g#[%E{Gf 	UK05  r0   Nr   r   r   r   r   rD   r   r   r   r   r   r   r   r   r  r  r'   r   r   r   r   r   r0   r.   r"  r"    s    JqS q q 
T-%8 
 
 
Z( 
 
 	
d;&7 	
 	
 b b U]]_!#; !M ! !r0   r"  c                       e Zd ZdZedefd       Zedee   fd       Z	edee
   fd       Zedee   fd       Zed        Z ej"                         ded	efd
       Zy)QwenImageProcessImagesInputStepr   r   c                      y)NzXImage Preprocess step. Images can be resized first using QwenImageEditResizeDynamicStep.r   r   s    r.   r   z+QwenImageProcessImagesInputStep.description  s    ir0   c                 @    t        dt        t        ddi      d      gS )Nimage_processorr   r   r   r   r   r   s    r.   r   z3QwenImageProcessImagesInputStep.expected_components  s-     !!!#5r":;(5	
 	
r0   c                 V    t        d      t        d      t        d      t        d      gS )Nr   r]   r   r   r*  r   s    r.   r   z&QwenImageProcessImagesInputStep.inputs&  s'    ?+Z-@*XBVXbcjXkllr0   c                     t        d      gS )Nr,  r-  )r   r   s    r.   r   z4QwenImageProcessImagesInputStep.intermediate_outputs*  s     ./
 	
r0   c                     | | |dz  z  dk7  rt        d|dz   d|        | ||dz  z  dk7  rt        d|dz   d|       y y r1  r2  r3  s      r.   r  z,QwenImageProcessImagesInputStep.check_inputs0  r4  r0   r   r   c                 *   | j                  |      }|j                  |j                  t        d      |j                  s|j                  }| j	                  |j
                  |j                  |j                         |j
                  xs |j                  }|j                  xs |j                  }n(|j                  d   j                  \  }}|j                  }|j                  j                  |||      |_        | j                  ||       ||fS )Nr6  r3  r   r]   r   r   )r   r   r]   r   r  r   r   r   r7  r8  rH   r@  r9  r,  r   r:  s          r.   r   z(QwenImageProcessImagesInputStep.__call__8  s   **51$$,1B1B1JVWW$$,%%E"))1B1BU_UpUp   !''D:+D+DF%%A)A)AE'55a8==ME6--E&0&@&@&K&K 'L '
# 	UK05  r0   Nr;  r   r0   r.   r=  r=    s    JjS j j 
T-%8 
 
 mZ( m m 
d;&7 
 

 b b U]]_!#; !M ! !r0   r=  c                   ~    e Zd ZdZdZedefd       Zedee	   fd       Z
 ej                         dedefd       Zy	)
'QwenImageEditPlusProcessImagesInputStepzqwenimage-edit-plusr   r   c                      y)NzImage Preprocess step for QwenImage Edit Plus. Unlike QwenImage Edit, QwenImage Edit Plus doesn't use the same resized image for further preprocessing.r   r   s    r.   r   z3QwenImageEditPlusProcessImagesInputStep.descriptionX  s     ir0   c                 V    t        d      t        d      t        d      t        d      gS )Nr   r]   r   r   r*  r   s    r.   r   z.QwenImageEditPlusProcessImagesInputStep.inputs\  s'    ;'G)<j>RT^_fTghhr0   r   r   c                 p   | j                  |      }|j                  |j                  t        d      |j                  |j                  }| j	                  |j
                  |j                  |j                         |j
                  xs |j                  }|j                  xs |j                  }|j                  j                  |||      |_        nK|j                  d   j                  \  }}|j                  }|j                  j                  |||      |_        | j                  ||       ||fS )Nz7`vae_image` and `image` cannot be None at the same timer3  rE  r   )r   r   r]   r   r  r   r   r   r7  r8  r@  r9  r,  rH   r   r:  s          r.   r   z0QwenImageEditPlusProcessImagesInputStep.__call__`  s7   **51  ([->->-FVWW  (%%E"))1B1BU_UpUp   !''D:+D+DF%%A)A)AE*4*D*D*O*OF% +P +K' (11!499ME6))E*4*D*D*O*OF% +P +K' 	UK05  r0   N)r   r   r   r   vae_image_sizer   rD   r   r   r   r   r'   r   r   r   r   r   r0   r.   rG  rG  T  sv    &J NiS i i iZ( i i U]]_!#; !M ! !r0   rG  c                        e Zd ZdZ	 	 ddedef fdZedefd       Zedee	   fd       Z
edee   fd       Zedee   fd	       Z ej                          d
ededefd       Z xZS )QwenImageVaeEncoderDynamicStepr   r   r   c                 >    || _         || _        t        |           y)a  Initialize a VAE encoder step for converting images to latent representations.

        Both the input and output names are configurable so this block can be configured to process to different image
        inputs (e.g., "processed_image" -> "image_latents", "processed_control_image" -> "control_image_latents").

        Args:
            input_name (str, optional): Name of the input image tensor. Defaults to "processed_image".
                Examples: "processed_image" or "processed_control_image"
            output_name (str, optional): Name of the output latent tensor. Defaults to "image_latents".
                Examples: "image_latents" or "control_image_latents"

        Examples:
            # Basic usage with default settings (includes image processor) QwenImageVaeEncoderDynamicStep()

            # Custom input/output names for control image QwenImageVaeEncoderDynamicStep(
                input_name="processed_control_image", output_name="control_image_latents"
            )
        N)r   _image_latents_output_namer   r   r   s      r.   r   z'QwenImageVaeEncoderDynamicStep.__init__  s     . ",*5'r0   r   c                 <    d| j                    d| j                   dS )Nz'Dynamic VAE Encoder step that converts z into latent representations z.
)r   rO  r   s    r.   r   z*QwenImageVaeEncoderDynamicStep.description  s>    89O9O8PPmnr  oN  oN  nO  OR  S  	Sr0   c                 (    t        dt              g}|S )Nr~   )r   r   r   r   s     r.   r   z2QwenImageVaeEncoderDynamicStep.expected_components  s     %!78

 r0   c                 J    t        | j                  d      t        d      g}|S )NTr(  rs   )r   r   r   r   s     r.   r   z%QwenImageVaeEncoderDynamicStep.inputs  s*     t--={#
 r0   c                 P    t        | j                  t        j                  d      gS )Nz,The latents representing the reference imager   r   )r   rO  r'   r   r   s    r.   r   z3QwenImageVaeEncoderDynamicStep.intermediate_outputs  s)     //,,J
 	
r0   r   r   c                 L   | j                  |      }|j                  }|j                  j                  }t	        || j
                        }t        ||j                  |j                  |||j                        }t        || j                  |       | j                  ||       ||fS )N)r]   r~   rs   r5   rA   r   )r   r
  r~   rA   r   r   r   rs   num_channels_latentsr   rO  r   )r   r   r   r   r5   rA   r]   r   s           r.   r   z'QwenImageVaeEncoderDynamicStep.__call__  s    **51--$$T%;%;< )!++&;;
 	T<<mLUK05  r0   )r,  r   r   r   s   @r.   rM  rM  }  s    J ,* 6 SS S S T-%8   Z(   
d;&7 
 
 U]]_!#; !M !Vc ! !r0   rM  c                       e Zd ZdZedefd       Zedee   fd       Z	edee
   fd       Zedee   fd       Zed        Z ej"                         ded	edefd
       Zy)!QwenImageControlNetVaeEncoderStepr   r   c                      y)NzbVAE Encoder step that converts `control_image` into latent representations control_image_latents.
r   r   s    r.   r   z-QwenImageControlNetVaeEncoderStep.description  s    tr0   c           
          t        dt              t        dt              t        dt        t	        ddi      d      g}|S )Nr~   
controlnetcontrol_image_processorr   r   r   r   )r   r   r   r   r   rR  s     r.   r   z5QwenImageControlNetVaeEncoderStep.expected_components  sH     %!78,(@A)!!#5r":;(5		

 r0   c                 ^    t        dd      t        d      t        d      t        d      g}|S )Ncontrol_imageTr(  r   r   rs   r*  rT  s     r.   r   z(QwenImageControlNetVaeEncoderStep.inputs  s4     6x w{#	
 r0   c                 <    t        dt        j                  d      gS )Ncontrol_image_latentsz*The latents representing the control imagerV  r  r   s    r.   r   z6QwenImageControlNetVaeEncoderStep.intermediate_outputs  s#     ',,H
 	
r0   c                     | | |dz  z  dk7  rt        d|dz   d|        | ||dz  z  dk7  rt        d|dz   d|       y y r1  r2  r3  s      r.   r  z.QwenImageControlNetVaeEncoderStep.check_inputs  r4  r0   r   r   c           
      .   | j                  |      }| j                  |j                  |j                  |j                         |j
                  }|j                  j                  }|j                  xs |j                  }|j                  xs |j                  }t        |j                        }t        |t              r,t        |j                  t              s|j                  g|_        t        |t              rg |_        |j                  D ]k  }	|j"                  j%                  |	||      }	t'        |	|j                  |j(                  |||j*                  d      }
|j                   j-                  |
       m nt        |t.              r^|j"                  j%                  |j                  ||      }t'        ||j                  |j(                  |||j*                  d      |_        nt1        dt3        |             | j5                  ||       ||fS )NrE  rw   )r]   r~   rs   r5   rA   r   rt   z[Expected controlnet to be a QwenImageControlNetModel or QwenImageMultiControlNetModel, got )r   r  r   r   r   r
  r~   rA   r7  r8  r   r]  rC   r   r`  rl   rb  r^  r9  r   rs   rX  r   r   r   r   r   )r   r   r   r   r5   rA   r   r   r]  control_image_control_image_latents_r`  s               r.   r   z*QwenImageControlNetVaeEncoderStep.__call__  s   **51+,,k.?.?A\A\]--$$##@z'@'@!!=Z%=%=":#8#89
j"?@T_TmTmosIt)4)B)B(CK%j"?@02K-"-";"; Q!+!C!C!N!N(! "O " *:(")33!$.$C$C (*& 11889OP!Q$ 
$<=&>>II!// J M
 1A#NN%// * ? ?$1K- mnrs}n~m  A  	UK05  r0   Nr;  r   r0   r.   rZ  rZ    s    JuS u u T-%8   Z(   
d;&7 
 
 b b U]]_:!#; :!M :!Vc :! :!r0   rZ  )Nr   r   r   N)NNr  r  N)NNr  r  r  N)Nrw   )r   rx   )Atypingr   r   r   r   r   r'   transformersr   r   r	   configuration_utilsr   guidersr   r@  r   r   r   r   modelsr   r   r   +pipelines.qwenimage.pipeline_qwenimage_editr   utilsr   utils.torch_utilsr   modular_pipeliner   r   modular_pipeline_utilsr   r   r   r   r   
get_loggerr   loggerr   r/   rD   r   r5   r\   rh   r   rq   	Generatorr}   rA   r   r   r   r   r  r  r"  r=  rG  rM  rZ  r   r0   r.   <module>rt     s   / . 
  ] ] - - l l e e O  . C W W 6 
		H	%%,, ell  %) #A,. $%)%1 #tCy.!%1  	%1
 '*%1 %1 U\\"%1V %)$( #W,.%),1 #tCy.!,1 ELL!	,1
  ,1 '*,1 U\\",1d %)SW #lX,.%)41 #tCy.!41 E%,,SYY__(=syyNOP	41
  41 41 '*41 U\\"41r ck
TLL
T-5eoo-F
T\_
T( (<<(	( ( LL	(
 ;;( ( (VM!%: M!`J!)G J!Z~!4 ~!Bu!#8 u!p6!'C 6!rN!-B N!b=!&; =!@&!.M &!RQ!%: Q!hl!(= l!r0   