
    iL                        d dl mZmZ d dlZddlmZ ddlmZmZ ddl	m
Z
mZmZ dd	lmZmZ 	 dd
edej                   dededej                   f
dZdej                   dedeeef   fdZ G d de      Z G d de      Z G d de      Zy)    )ListTupleN   )QwenImageMultiControlNetModel   )ModularPipelineBlocksPipelineState)ComponentSpec
InputParamOutputParam   )QwenImageModularPipelineQwenImagePachifier
input_nameinput_tensor
batch_sizenum_images_per_promptreturnc           	         t        |t        j                        st        d|  d      |j                  d   dk(  r||z  }n6|j                  d   |k(  r|}n!t        d|  d| d|j                  d          |j                  |d      }|S )a(  Repeat tensor elements to match the final batch size.

    This function expands a tensor's batch dimension to match the final batch size (batch_size * num_images_per_prompt)
    by repeating each element along dimension 0.

    The input tensor must have batch size 1 or batch_size. The function will:
    - If batch size is 1: repeat each element (batch_size * num_images_per_prompt) times
    - If batch size equals batch_size: repeat each element num_images_per_prompt times

    Args:
        input_name (str): Name of the input tensor (used for error messages)
        input_tensor (torch.Tensor): The tensor to repeat. Must have batch size 1 or batch_size.
        batch_size (int): The base batch size (number of prompts)
        num_images_per_prompt (int, optional): Number of images to generate per prompt. Defaults to 1.

    Returns:
        torch.Tensor: The repeated tensor with final batch size (batch_size * num_images_per_prompt)

    Raises:
        ValueError: If input_tensor is not a torch.Tensor or has invalid batch size

    Examples:
        tensor = torch.tensor([[1, 2, 3]]) # shape: [1, 3] repeated = repeat_tensor_to_batch_size("image", tensor,
        batch_size=2, num_images_per_prompt=2) repeated # tensor([[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3]]) - shape:
        [4, 3]

        tensor = torch.tensor([[1, 2, 3], [4, 5, 6]]) # shape: [2, 3] repeated = repeat_tensor_to_batch_size("image",
        tensor, batch_size=2, num_images_per_prompt=2) repeated # tensor([[1, 2, 3], [1, 2, 3], [4, 5, 6], [4, 5, 6]])
        - shape: [4, 3]
    `z` must be a tensorr   r   z!` must have have batch size 1 or z
, but got )dim)
isinstancetorchTensor
ValueErrorshaperepeat_interleave)r   r   r   r   	repeat_bys        v/home/obispo/Crisostomo_bridge/mision_env/lib/python3.12/site-packages/diffusers/modular_pipelines/qwenimage/inputs.pyrepeat_tensor_to_batch_sizer       s    J lELL11ZL(:;<< !!!66				A	*	,)	
|<ZL
S_SeSefgShRij
 	

  11)1CL    latentsvae_scale_factorc                     | j                   dk7  r'| j                   dk7  rt        d| j                          | j                  dd \  }}||z  }||z  }||fS )a  Calculate image dimensions from latent tensor dimensions.

    This function converts latent space dimensions to image space dimensions by multiplying the latent height and width
    by the VAE scale factor.

    Args:
        latents (torch.Tensor): The latent tensor. Must have 4 or 5 dimensions.
            Expected shapes: [batch, channels, height, width] or [batch, channels, frames, height, width]
        vae_scale_factor (int): The scale factor used by the VAE to compress images.
            Typically 8 for most VAEs (image is 8x larger than latents in each dimension)

    Returns:
        Tuple[int, int]: The calculated image dimensions as (height, width)

    Raises:
        ValueError: If latents tensor doesn't have 4 or 5 dimensions

          z6unpacked latents must have 4 or 5 dimensions, but got N)ndimr   r   )r"   r#   latent_heightlatent_widthheightwidths         r    calculate_dimension_from_latentsr-   Q   sk    ( ||qW\\Q.QRYR^R^Q_`aa")--"4M<--F++E5=r!   c                       e Zd ZdZedefd       Zedee   fd       Z	edee   fd       Z
ed        Zdededefd	Zy
)QwenImageTextInputsStep	qwenimager   c                     d}d}||z   S )NzText input processing step that standardizes text embeddings for the pipeline.
This step:
  1. Determines `batch_size` and `dtype` based on `prompt_embeds`
  2. Ensures all text embeddings have consistent batch sizes (batch_size * num_images_per_prompt)z

This block should be placed after all encoder steps to process the text embeddings before they are used in subsequent pipeline steps. )selfsummary_sectionplacement_sections      r   descriptionz#QwenImageTextInputsStep.descriptions   s"    p 	 h!222r!   c                     t        dd      t        ddd      t        ddd      t        d	d
      t        dd
      gS )Nr   r   namedefaultprompt_embedsTdenoiser_input_fields)r9   requiredkwargs_typeprompt_embeds_masknegative_prompt_embeds)r9   r>   negative_prompt_embeds_maskr   r3   s    r   inputszQwenImageTextInputsStep.inputs   sL     3Q?OdH_`04Mde4BYZ9G^_
 	
r!   c                 ^    t        dt        d      t        dt        j                  d      gS )Nr   zdNumber of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt)	type_hintr6   dtypez@Data type of model tensor inputs (determined by `prompt_embeds`))r   intr   rG   rC   s    r   intermediate_outputsz,QwenImageTextInputsStep.intermediate_outputs   s9      C
 ++^
 	
r!   c                 F   ||t        d      ||t        d      |j                  d   | j                  d   k7  rt        d      |*|j                  d   | j                  d   k7  rt        d      |+|j                  d   | j                  d   k7  rt        d      y y )NzS`negative_prompt_embeds_mask` is required when `negative_prompt_embeds` is not NonezJcannot pass `negative_prompt_embeds_mask` without `negative_prompt_embeds`r   zE`prompt_embeds_mask` must have the same batch size as `prompt_embeds`zI`negative_prompt_embeds` must have the same batch size as `prompt_embeds`zN`negative_prompt_embeds_mask` must have the same batch size as `prompt_embeds`)r   r   r;   r?   r@   rA   s       r   check_inputsz$QwenImageTextInputsStep.check_inputs   s     "-2M2Urss!).I.Uijj##A&-*=*=a*@@dee#/4J4P4PQR4SWdWjWjklWm4mhii (38S8Y8YZ[8\`m`s`stu`v8vmnn 9w3r!   
componentsstatec                    | j                  |      }| j                  |j                  |j                  |j                  |j
                         |j                  j                  d   |_        |j                  j                  |_        |j                  j                  \  }}}|j                  j                  d|j                  d      |_        |j                  j                  |j                  |j                  z  |d      |_        |j                  j                  d|j                  d      |_        |j                  j                  |j                  |j                  z  |      |_        |j                  |j                  j                  \  }}}|j                  j                  d|j                  d      |_        |j                  j                  |j                  |j                  z  |d      |_        |j
                  j                  d|j                  d      |_        |j
                  j                  |j                  |j                  z  |      |_        | j                  ||       ||fS )NrK   r   r   )get_block_staterL   r;   r?   r@   rA   r   r   rG   repeatr   viewset_block_state)r3   rM   rN   block_state_seq_lens         r   __call__z QwenImageTextInputsStep.__call__   s2   **51%33*==#.#E#E(3(O(O	 	 	
 "-!:!:!@!@!C'55;;#11777A$/$=$=$D$DQHiHikl$m!$/$=$=$B$B""[%F%FFQS%
! *5)G)G)N)NqR]RsRsuv)w&)4)G)G)L)L""[%F%FF*
& --9'>>DDMAw1<1S1S1Z1Z;44a2K. 2=1S1S1X1X&&)J)JJGUW2K. 7B6]6]6d6d;44a7K3 7B6]6]6b6b&&)J)JJG7K3 	UK05  r!   N)__name__
__module____qualname__
model_namepropertystrr6   r   r   rD   rI   staticmethodrL   r   r	   rX   r2   r!   r   r/   r/   p   s    J3S 3 3 
Z( 
 
 
d3i 
 
 o o.+!#; +!M +!Vc +!r!   r/   c                        e Zd ZdZdgg fdee   dee   f fdZedefd       Zedee	   fd       Z
edee   fd	       Zedee   fd
       ZdededefdZ xZS )QwenImageInputsDynamicStepr0   image_latentsimage_latent_inputsadditional_batch_inputsc                     t        |t              s|g}t        |t              s|g}|| _        || _        t        |           y)a  Initialize a configurable step that standardizes the inputs for the denoising step. It:
"

        This step handles multiple common tasks to prepare inputs for the denoising step:
        1. For encoded image latents, use it update height/width if None, patchifies, and expands batch size
        2. For additional_batch_inputs: Only expands batch dimensions to match final batch size

        This is a dynamic block that allows you to configure which inputs to process.

        Args:
            image_latent_inputs (List[str], optional): Names of image latent tensors to process.
                These will be used to determine height/width, patchified, and batch-expanded. Can be a single string or
                list of strings. Defaults to ["image_latents"]. Examples: ["image_latents"], ["control_image_latents"]
            additional_batch_inputs (List[str], optional):
                Names of additional conditional input tensors to expand batch size. These tensors will only have their
                batch dimensions adjusted to match the final batch size. Can be a single string or list of strings.
                Defaults to []. Examples: ["processed_mask_image"]

        Examples:
            # Configure to process image_latents (default behavior) QwenImageInputsDynamicStep()

            # Configure to process multiple image latent inputs
            QwenImageInputsDynamicStep(image_latent_inputs=["image_latents", "control_image_latents"])

            # Configure to process image latents and additional batch inputs QwenImageInputsDynamicStep(
                image_latent_inputs=["image_latents"], additional_batch_inputs=["processed_mask_image"]
            )
        N)r   list_image_latent_inputs_additional_batch_inputssuper__init__)r3   rc   rd   	__class__s      r   rj   z#QwenImageInputsDynamicStep.__init__   sI    @ -t4#6"7148'>&?#$7!(?%r!   r   c                     d}d}| j                   s| j                  r>d}| j                   r|d| j                    z  }| j                  r|d| j                   z  }d}||z   |z   S )NzInput processing step that:
  1. For image latent inputs: Updates height/width if None, patchifies latents, and expands batch size
  2. For additional batch inputs: Expands batch dimensions to match final batch size z

Configured inputs:z
  - Image latent inputs: z
  - Additional batch inputs: zN

This block should be placed after the encoder steps and the text input step.)rg   rh   )r3   r4   inputs_infor5   s       r   r6   z&QwenImageInputsDynamicStep.description  s    c 	 $$(E(E2K((!<T=V=V<WXX,,!@A^A^@_`` o,/@@@r!   c                    t        dd      t        dd      t        d      t        d	      g}| j                  D ]  }|j                  t        |              | j                  D ]  }|j                  t        |              |S )
Nr   r   r8   r   Tr9   r=   r+   r9   r,   )r   rg   appendrh   )r3   rD   image_latent_input_namer   s       r   rD   z!QwenImageInputsDynamicStep.inputs#  s     3Q?L48H%G$	
 (,'@'@ 	D#MM**ABC	D 77 	7JMM**56	7 r!   c                 J    t        dt        d      t        dt        d      gS )Nimage_heightzThe height of the image latents)r9   rF   r6   image_widthzThe width of the image latents)r   rH   rC   s    r   rI   z/QwenImageInputsDynamicStep.intermediate_outputs6  s(     ^sHij]cGgh
 	
r!   c                 (    t        dt        d      gS )N	pachifierfrom_config)default_creation_method)r
   r   rC   s    r   expected_componentsz.QwenImageInputsDynamicStep.expected_components=  s     +'9S`a
 	
r!   rM   rN   c                    | j                  |      }| j                  D ]  }t        ||      }|t        ||j                        \  }}|j
                  xs ||_        |j                  xs ||_        t        |d      s||_        t        |d      s||_	        |j                  j                  |      }t        |||j                  |j                        }t        |||        | j                   D ]A  }t        ||      }	|	t        ||	|j                  |j                        }	t        |||	       C | j#                  ||       ||fS )Nru   rv   r   r   r   r   )rQ   rg   getattrr-   r#   r+   r,   hasattrru   rv   rx   pack_latentsr    r   r   setattrrh   rT   )
r3   rM   rN   rU   rs   image_latent_tensorr+   r,   r   r   s
             r   rX   z#QwenImageInputsDynamicStep.__call__C  sq   **51 (,'@'@ 	O#")+7N"O"* ==PR\RmRmnMFE!,!3!3!=vK + 1 1 :UK;7+1(;6*/' #-"6"6"C"CDW"X #>20&1&G&G&11	# K!8:MN5	O: 77 	;J";
;L# 7%)&1&G&G&11	L K\:	; 	UK05  r!   )rY   rZ   r[   r\   r   r^   rj   r]   r6   r   rD   r   rI   r
   r{   r   r	   rX   __classcell__)rk   s   @r   ra   ra      s    J +:):-/'!#Y' "&c'R AS A A, Z(  $ 
d;&7 
 
 
T-%8 
 

1!#; 1!M 1!Vc 1!r!   ra   c                   ~    e Zd ZdZedefd       Zedee   fd       Z	 e
j                         dededefd       Zy)	QwenImageControlNetInputsStepr0   r   c                      y)Nz\prepare the `control_image_latents` for controlnet. Insert after all the other inputs steps.r2   rC   s    r   r6   z)QwenImageControlNetInputsStep.descriptionz  s    mr!   c                 z    t        dd      t        dd      t        dd      t        d	      t        d
	      gS )Ncontrol_image_latentsTrp   r   r   r   r8   r+   rq   r,   rB   rC   s    r   rD   z$QwenImageControlNetInputsStep.inputs~  s>     3dCL483Q?H%G$
 	
r!   rM   rN   c                 r   | j                  |      }t        |j                  t              rg }t	        |j
                        D ]  \  }}t        ||j                        \  }}|j                  xs ||_        |j                  xs ||_	        |j                  j                  |      }t        d| d||j                  |j                        }|j                  |        ||_        nt        |j
                  |j                        \  }}|j                  xs ||_        |j                  xs ||_	        |j                  j                  |j
                        |_        t        d|j
                  |j                  |j                        |_        |j
                  |_        | j!                  ||       ||fS )Nzcontrol_image_latents[]r}   r   )rQ   r   
controlnetr   	enumerater   r-   r#   r+   r,   rx   r   r    r   r   rr   rT   )	r3   rM   rN   rU   r   icontrol_image_latents_r+   r,   s	            r   rX   z&QwenImageControlNetInputsStep.__call__  s   **51j++-JK$&!-6{7X7X-Y E)) @AWYcYtYt u%0%7%7%A6"$/$5$5$>! *4)=)=)J)JKa)b& *E!7s!<!7*5*K*K*55	*& &,,-CD#E& 1FK- =11:3N3NMFE "-!3!3!=vK + 1 1 :UK 1;0D0D0Q0QR]RsRs0tK- 1L2(>>&1&G&G&11	1K- 1<0Q0QK-UK05  r!   N)rY   rZ   r[   r\   r]   r^   r6   r   r   rD   r   no_gradr   r	   rX   r2   r!   r   r   r   w  sv    JnS n n 
Z( 
 
 U]]_2!#; 2!M 2!Vc 2! 2!r!   r   )r   )typingr   r   r   modelsr   modular_pipeliner   r	   modular_pipeline_utilsr
   r   r   r   r   r^   r   rH   r    r-   r/   ra   r   r2   r!   r   <module>r      s      3 C K K J "#	55,,5 5 	5
 \\5pell c V[\_ad\dVe >m!3 m!`T!!6 T!nD!$9 D!r!   