
    ilA                        d dl mZ ddlmZmZ ddlmZ ddlmZm	Z	m
Z
mZmZmZ ddlmZ ddlmZmZmZmZmZ dd	lmZmZmZmZmZmZmZ  ej<                  e      Z  G d
 de      Z! G d de      Z" G d de      Z# G d de      Z$ G d de      Z% G d de      Z& G d de      Z' G d de      Z( G d de      Z) G d de      Z* G d de      Z+ G d  d!e      Z, G d" d#e      Z- G d$ d%e      Z. G d& d'e      Z/ ed(efd)efd*efd+efd,efd-efg      Z0 ed.efd/e"fd0e#fd)efd1 ed2g3      fd*efd+efd4e	fd,efd-efg
      Z1 ed.efd5efd/e%fd0e&fd)efd1 ed6g3      fd*efd+efd7e
fd,efd-efg      Z2 ed(efd/e(fd0e)fd,e*fd-efg      Z3 ed(efd)efd*efd+efd,efd-efg      Z4 ed.efd0e#fd)efd*efd+efd,efd-efg      Z5 ed(efd0e)fd,e.fd-efg      Z6e0e1e2e3d8e4e5e6d9d:Z7y;)<   )logging   )AutoPipelineBlocksSequentialPipelineBlocks)InsertableDict   )WanAdditionalInputsStepWanPrepareFirstFrameLatentsStep#WanPrepareFirstLastFrameLatentsStepWanPrepareLatentsStepWanSetTimestepsStepWanTextInputStep)WanImageVaeDecoderStep)Wan22DenoiseStepWan22Image2VideoDenoiseStepWanDenoiseStepWanFLF2VDenoiseStepWanImage2VideoDenoiseStep)!WanFirstLastFrameImageEncoderStep$WanFirstLastFrameVaeImageEncoderStepWanImageCropResizeStepWanImageEncoderStepWanImageResizeStepWanTextEncoderStepWanVaeImageEncoderStepc                   0    e Zd ZeeeegZg dZe	d        Z
y)WanCoreDenoiseStepinputset_timestepsprepare_latentsdenoisec                      	 y)Na`  denoise block that takes encoded conditions and runs the denoising process.
This is a sequential pipeline blocks:
 - `WanTextInputStep` is used to adjust the batch size of the model inputs
 - `WanSetTimestepsStep` is used to set the timesteps
 - `WanPrepareLatentsStep` is used to prepare the latents
 - `WanDenoiseStep` is used to denoise the latents
 selfs    x/home/obispo/Crisostomo_bridge/mision_env/lib/python3.12/site-packages/diffusers/modular_pipelines/wan/modular_blocks.pydescriptionzWanCoreDenoiseStep.description;   s    E	
    N)__name__
__module____qualname__r   r   r   r   block_classesblock_namespropertyr(   r$   r)   r'   r   r   2   s/    	M KK
 
r)   r   c                   0    e Zd ZdZeegZddgZed        Z	y)WanImage2VideoImageEncoderStepwanimage_resizeimage_encoderc                      y)NzjImage2Video Image Encoder step that resize the image and encode the image to generate the image embeddingsr$   r%   s    r'   r(   z*WanImage2VideoImageEncoderStep.descriptionN   s    {r)   N)
r*   r+   r,   
model_namer   r   r-   r.   r/   r(   r$   r)   r'   r1   r1   I   s0    J')<=M!?3K| |r)   r1   c                   0    e Zd ZdZeegZddgZed        Z	y)!WanImage2VideoVaeImageEncoderStepr2   r3   vae_image_encoderc                      y)NzvImage2Video Vae Image Encoder step that resize the image and encode the first frame image to its latent representationr$   r%   s    r'   r(   z-WanImage2VideoVaeImageEncoderStep.descriptionY   s     Hr)   N)
r*   r+   r,   r6   r   r   r-   r.   r/   r(   r$   r)   r'   r8   r8   T   s1    J')?@M!#67KH Hr)   r8   c                   D    e Zd Ze edg      eeeegZ	g dZ
ed        Zy)WanImage2VideoCoreDenoiseStepfirst_frame_latentsimage_latent_inputsr   additional_inputsr    r!   prepare_first_frame_latentsr"   c                      	 y)Na2  denoise block that takes encoded text and image latent conditions and runs the denoising process.
This is a sequential pipeline blocks:
 - `WanTextInputStep` is used to adjust the batch size of the model inputs
 - `WanAdditionalInputsStep` is used to adjust the batch size of the latent conditions
 - `WanSetTimestepsStep` is used to set the timesteps
 - `WanPrepareLatentsStep` is used to prepare the latents
 - `WanPrepareFirstFrameLatentsStep` is used to prepare the first frame latent conditions
 - `WanImage2VideoDenoiseStep` is used to denoise the latents
r$   r%   s    r'   r(   z)WanImage2VideoCoreDenoiseStep.descriptionq       P		
r)   N)r*   r+   r,   r   r	   r   r   r
   r   r-   r.   r/   r(   r$   r)   r'   r<   r<   _   s?    5J4KL'!MK 

 

r)   r<   c                   2    e Zd ZdZeeegZg dZe	d        Z
y)WanFLF2VImageEncoderStepr2   )r3   last_image_resizer4   c                      y)Nz{FLF2V Image Encoder step that resize and encode and encode the first and last frame images to generate the image embeddingsr$   r%   s    r'   r(   z$WanFLF2VImageEncoderStep.description   s     Mr)   N)r*   r+   r,   r6   r   r   r   r-   r.   r/   r(   r$   r)   r'   rF   rF      s.    J')?AbcMHKM Mr)   rF   c                   2    e Zd ZdZeeegZg dZe	d        Z
y)WanFLF2VVaeImageEncoderStepr2   )r3   rG   r9   c                      y)NzFLF2V Vae Image Encoder step that resize and encode and encode the first and last frame images to generate the latent conditionsr$   r%   s    r'   r(   z'WanFLF2VVaeImageEncoderStep.description   s     Rr)   N)r*   r+   r,   r6   r   r   r   r-   r.   r/   r(   r$   r)   r'   rJ   rJ      s.    J')?AefMLKR Rr)   rJ   c                   D    e Zd Ze edg      eeeegZ	g dZ
ed        Zy)WanFLF2VCoreDenoiseStepfirst_last_frame_latentsr>   )r   rA   r    r!    prepare_first_last_frame_latentsr"   c                      	 y)Na*  denoise block that takes encoded text and image latent conditions and runs the denoising process.
This is a sequential pipeline blocks:
 - `WanTextInputStep` is used to adjust the batch size of the model inputs
 - `WanAdditionalInputsStep` is used to adjust the batch size of the latent conditions
 - `WanSetTimestepsStep` is used to set the timesteps
 - `WanPrepareLatentsStep` is used to prepare the latents
 - `WanPrepareFirstLastFrameLatentsStep` is used to prepare the latent conditions
 - `WanImage2VideoDenoiseStep` is used to denoise the latents
r$   r%   s    r'   r(   z#WanFLF2VCoreDenoiseStep.description   rD   r)   N)r*   r+   r,   r   r	   r   r   r   r   r-   r.   r/   r(   r$   r)   r'   rM   rM      s?    5O4PQ+MK 

 

r)   rM   c                   4    e Zd ZeegZddgZddgZed        Z	y)WanAutoImageEncoderStepflf2v_image_encoderimage2video_image_encoder
last_imageimagec                      	 y)Nal  Image Encoder step that encode the image to generate the image embeddingsThis is an auto pipeline block that works for image2video tasks. - `WanFLF2VImageEncoderStep` (flf2v) is used when `last_image` is provided. - `WanImage2VideoImageEncoderStep` (image2video) is used when `image` is provided. - if `last_image` or `image` is not provided, step will be skipped.r$   r%   s    r'   r(   z#WanAutoImageEncoderStep.description       U	
r)   N)
r*   r+   r,   rF   r1   r-   r.   block_trigger_inputsr/   r(   r$   r)   r'   rR   rR      s5    -/MNM(*EFK('2
 
r)   rR   c                   4    e Zd ZeegZddgZddgZed        Z	y)WanAutoVaeImageEncoderStepflf2v_vae_image_encoderimage2video_vae_image_encoderrU   rV   c                      	 y)Nas  Vae Image Encoder step that encode the image to generate the image latentsThis is an auto pipeline block that works for image2video tasks. - `WanFLF2VVaeImageEncoderStep` (flf2v) is used when `last_image` is provided. - `WanImage2VideoVaeImageEncoderStep` (image2video) is used when `image` is provided. - if `last_image` or `image` is not provided, step will be skipped.r$   r%   s    r'   r(   z&WanAutoVaeImageEncoderStep.description   rX   r)   N)
r*   r+   r,   rJ   r8   r-   r.   rY   r/   r(   r$   r)   r'   r[   r[      s5    02STM,.MNK('2
 
r)   r[   c                   <    e Zd ZeeegZg dZg dZe	de
fd       Zy)WanAutoDenoiseStep)flf2vimage2video
text2video)rN   r=   Nreturnc                      	 y)Na  Denoise step that iteratively denoise the latents. This is a auto pipeline block that works for text2video and image2video tasks. - `WanCoreDenoiseStep` (text2video) for text2vid tasks. - `WanCoreImage2VideoCoreDenoiseStep` (image2video) for image2video tasks. - if `first_frame_latents` is provided, `WanCoreImage2VideoDenoiseStep` will be used.
 - if `first_frame_latents` is not provided, `WanCoreDenoiseStep` will be used.
r$   r%   s    r'   r(   zWanAutoDenoiseStep.description   s    b	
r)   N)r*   r+   r,   rM   r<   r   r-   r.   rY   r/   strr(   r$   r)   r'   r`   r`      s8    %M
 9KT
S 
 
r)   r`   c                   2    e Zd ZeeeeegZg dZ	e
d        Zy)WanAutoBlocks)text_encoderr4   r9   r"   decodec                      	 y)NzvAuto Modular pipeline for text-to-video using Wan.
- for text-to-video generation, all you need to provide is `prompt`r$   r%   s    r'   r(   zWanAutoBlocks.description      T	
r)   N)r*   r+   r,   r   rR   r[   r`   r   r-   r.   r/   r(   r$   r)   r'   rh   rh      s3    "MK 
 
r)   rh   c                   0    e Zd ZeeeegZg dZe	d        Z
y)Wan22CoreDenoiseStepr   c                      	 y)Nal  denoise block that takes encoded conditions and runs the denoising process.
This is a sequential pipeline blocks:
 - `WanTextInputStep` is used to adjust the batch size of the model inputs
 - `WanSetTimestepsStep` is used to set the timesteps
 - `WanPrepareLatentsStep` is used to prepare the latents
 - `Wan22DenoiseStep` is used to denoise the latents in wan2.2
r$   r%   s    r'   r(   z Wan22CoreDenoiseStep.description  s    Q	
r)   N)r*   r+   r,   r   r   r   r   r-   r.   r/   r(   r$   r)   r'   rn   rn     s/    	M KK
 
r)   rn   c                   D    e Zd Ze edg      eeeegZ	g dZ
ed        Zy)Wan22Image2VideoCoreDenoiseStepr=   r>   r@   c                      	 y)Na>  denoise block that takes encoded text and image latent conditions and runs the denoising process.
This is a sequential pipeline blocks:
 - `WanTextInputStep` is used to adjust the batch size of the model inputs
 - `WanAdditionalInputsStep` is used to adjust the batch size of the latent conditions
 - `WanSetTimestepsStep` is used to set the timesteps
 - `WanPrepareLatentsStep` is used to prepare the latents
 - `WanPrepareFirstFrameLatentsStep` is used to prepare the first frame latent conditions
 - `Wan22Image2VideoDenoiseStep` is used to denoise the latents in wan2.2
r$   r%   s    r'   r(   z+Wan22Image2VideoCoreDenoiseStep.description9  s    \		
r)   N)r*   r+   r,   r   r	   r   r   r
   r   r-   r.   r/   r(   r$   r)   r'   rq   rq   '  s?    5J4KL'#MK 

 

r)   rq   c                   :    e Zd ZeegZddgZddgZede	fd       Z
y)Wan22AutoDenoiseSteprb   rc   r=   Nrd   c                      	 y)Na  Denoise step that iteratively denoise the latents. This is a auto pipeline block that works for text2video and image2video tasks. - `Wan22Image2VideoCoreDenoiseStep` (image2video) for image2video tasks. - `Wan22CoreDenoiseStep` (text2video) for text2vid tasks. - if `first_frame_latents` is provided, `Wan22Image2VideoCoreDenoiseStep` will be used.
 - if `first_frame_latents` is not provided, `Wan22CoreDenoiseStep` will be used.
r$   r%   s    r'   r(   z Wan22AutoDenoiseStep.descriptionO  s    d	
r)   )r*   r+   r,   rq   rn   r-   r.   rY   r/   rf   r(   r$   r)   r'   rt   rt   G  s?    'M !,/K148
S 
 
r)   rt   c                   0    e Zd ZeeeegZg dZe	d        Z
y)Wan22AutoBlocks)ri   r9   r"   rj   c                      	 y)NzyAuto Modular pipeline for text-to-video using Wan2.2.
- for text-to-video generation, all you need to provide is `prompt`r$   r%   s    r'   r(   zWan22AutoBlocks.descriptioni  rl   r)   N)r*   r+   r,   r   r[   rt   r   r-   r.   r/   r(   r$   r)   r'   rw   rw   [  s0    "	MK 
 
r)   rw   ri   r   r    r!   r"   rj   r3   r4   r9   rA   r=   r>   rB   rG   rN   rO   )rc   rb   ra   auto)rc   rb   ry   )zwan2.1zwan2.2N)8utilsr   modular_pipeliner   r   modular_pipeline_utilsr   before_denoiser	   r
   r   r   r   r   decodersr   r"   r   r   r   r   r   encodersr   r   r   r   r   r   r   
get_loggerr*   loggerr   r1   r8   r<   rF   rJ   rM   rR   r[   r`   rh   rn   rq   rt   rw   TEXT2VIDEO_BLOCKSIMAGE2VIDEO_BLOCKSFLF2V_BLOCKSAUTO_BLOCKSTEXT2VIDEO_BLOCKS_WAN22IMAGE2VIDEO_BLOCKS_WAN22AUTO_BLOCKS_WAN22
ALL_BLOCKSr$   r)   r'   <module>r      s    K 3  -    
		H	%

1 
.|%= |H(@ H
$< 
HM7 MR": R
6 
D
0 
"
!3 
"
+ 
,
, 
:
3 
.
&> 
@
- 
(
. 
2 #	+,	"#	-.	12	N#	)*	  $	+,	89	?@	"#	5K`Jabc	-.	12	&(GH	-.	)*   	+,	45	23	9:	"#	5KeJfgh	-.	12	+-PQ	'(	)*  	+,	12	89	&'	)* )	+,	"#	-.	12	$%	)*	  *	+,	?@	"#	-.	12	$%	)*
  #	+,	89	()	)*	  ()	 ./!
r)   