
    ie                     x    d dl Z d dlmZmZmZmZ d dlZd dlZd dl	Z	d dl
mc mZ ddlmZmZmZ  G d de      Zy)    N)ListOptionalTupleUnion   )VaeImageProcessoris_valid_imageis_valid_image_imagelistc                   J   e Zd ZdZddee   dee   dej                  fdZ	 ddej                  de	de
ej                  ej                  eej                  j                     f   fd	Zededed
edeeef   fd       Zedej                  dededej                  fd       Zy)VideoProcessorzSimple video processor.Nheightwidthreturnc           
         t        |t              r`t        |d   t        j                        rC|d   j                  dk(  r1t        j                  dt               t        j                  |d      }t        |t              r`t        |d   t        j                        rC|d   j                  dk(  r1t        j                  dt               t        j                  |d      }t        |t        j                  t        j                  f      r|j                  dk(  rt        |      }nYt        |t              rt        |d         st        |      r|g}n,t        |t              rt        |d         r|}nt        d      t        j                  |D cg c]  }| j!                  |||       c}d      }|j#                  dd	d
dd      }|S c c}w )a  
        Preprocesses input video(s).

        Args:
            video (`List[PIL.Image]`, `List[List[PIL.Image]]`, `torch.Tensor`, `np.array`, `List[torch.Tensor]`, `List[np.array]`):
                The input video. It can be one of the following:
                * List of the PIL images.
                * List of list of PIL images.
                * 4D Torch tensors (expected shape for each tensor `(num_frames, num_channels, height, width)`).
                * 4D NumPy arrays (expected shape for each array `(num_frames, height, width, num_channels)`).
                * List of 4D Torch tensors (expected shape for each tensor `(num_frames, num_channels, height,
                  width)`).
                * List of 4D NumPy arrays (expected shape for each array `(num_frames, height, width, num_channels)`).
                * 5D NumPy arrays: expected shape for each array `(batch_size, num_frames, height, width,
                  num_channels)`.
                * 5D Torch tensors: expected shape for each array `(batch_size, num_frames, num_channels, height,
                  width)`.
            height (`int`, *optional*, defaults to `None`):
                The height in preprocessed frames of the video. If `None`, will use the `get_default_height_width()` to
                get default height.
            width (`int`, *optional*`, defaults to `None`):
                The width in preprocessed frames of the video. If `None`, will use get_default_height_width()` to get
                the default width.
        r      zPassing `video` as a list of 5d np.ndarray is deprecated.Please concatenate the list along the batch dimension and pass it as a single 5d np.ndarray)axiszPassing `video` as a list of 5d torch.Tensor is deprecated.Please concatenate the list along the batch dimension and pass it as a single 5d torch.TensorzeInput is in incorrect format. Currently, we only support numpy.ndarray, torch.Tensor, PIL.Image.Image)r   r   )dim   r         )
isinstancelistnpndarrayndimwarningswarnFutureWarningconcatenatetorchTensorcatr	   r
   
ValueErrorstack
preprocesspermute)selfvideor   r   imgs        c/home/obispo/Crisostomo_bridge/mision_env/lib/python3.12/site-packages/diffusers/video_processor.pypreprocess_videozVideoProcessor.preprocess_video   sx   2 eT"z%(BJJ'GERSHMM]^L^MMn
 NN5q1EeT"z%(ELL'IeTUhmm_`N`MMp
 IIe!,E
 ebjj%,,78UZZ1_KEt$a)AE]^cEdGEt$)A%()KEw  Y^_RUT__Su_M_efg aAq!, `s   Gr(   output_typec                 R   |j                   d   }g }t        |      D ]<  }||   j                  dddd      }| j                  ||      }|j	                  |       > |dk(  rt        j                  |      }|S |dk(  rt        j                  |      }|S |dk(  st        | d      |S )	z
        Converts a video tensor to a list of frames for export.

        Args:
            video (`torch.Tensor`): The video as a tensor.
            output_type (`str`, defaults to `"np"`): Output type of the postprocessed `video` tensor.
        r   r   r   r   r   ptpilz9 does not exist. Please choose one of ['np', 'pt', 'pil'])	shaperanger&   postprocessappendr   r$   r    r#   )r'   r(   r,   
batch_sizeoutputs	batch_idx	batch_vidbatch_outputs           r*   postprocess_videoz VideoProcessor.postprocess_videoZ   s     [[^
z* 	)Ii(00Aq!<I++I{CLNN<(	)
 $hhw'G  D kk'*G  %},efgg    ratiosc                     t        | |z        t        |j                         fd      }||   }t        |d         t        |d         fS )a  
        Returns the binned height and width based on the aspect ratio.

        Args:
            height (`int`): The height of the image.
            width (`int`): The width of the image.
            ratios (`dict`): A dictionary where keys are aspect ratios and values are tuples of (height, width).

        Returns:
            `Tuple[int, int]`: The closest binned height and width.
        c                 2    t        t        |       z
        S )N)absfloat)ratioars    r*   <lambda>z:VideoProcessor.classify_height_width_bin.<locals>.<lambda>   s    SuPRAR=S r:   )keyr   r   )r?   minkeysint)r   r   r;   closest_ratio
default_hwrA   s        @r*   classify_height_width_binz(VideoProcessor.classify_height_width_bint   sM     6E>"FKKM/STM*
:a=!3z!}#555r:   samples	new_width
new_heightc                    | j                   d   | j                   d   }}||k7  s||k7  rt        ||z  ||z        }t        ||z        }t        ||z        }| j                   \  }}	}
}}| j                  ddddd      j	                  ||
z  |	||      } t        j                  | ||fdd      } ||z
  dz  }||z   }||z
  dz  }||z   }| d	d	d	d	||||f   } | j	                  ||
|	||      j                  ddddd      } | S )
a4  
        Resizes and crops a tensor of videos to the specified dimensions.

        Args:
            samples (`torch.Tensor`):
                A tensor of shape (N, C, T, H, W) where N is the batch size, C is the number of channels, T is the
                number of frames, H is the height, and W is the width.
            new_width (`int`): The desired width of the output videos.
            new_height (`int`): The desired height of the output videos.

        Returns:
            `torch.Tensor`: A tensor containing the resized and cropped videos.
        r   r   r   r   r   bilinearF)sizemodealign_cornersN)r0   maxrF   r&   reshapeFinterpolate)rJ   rK   rL   orig_height
orig_widthr@   resized_widthresized_heightncthwstart_xend_xstart_yend_ys                    r*   resize_and_crop_tensorz%VideoProcessor.resize_and_crop_tensor   sF    #*--"2GMM!4DZ *$
i(?
[0)j2HIE
U 23M u!45N $MMMAq!QooaAq!4<<QUAq!LG mm~}=J^cG
 %y0Q6Gi'E%
2q8Gj(EaGEM75=@AG ooaAz9EMMaQRTUWXZ[\Gr:   )NN)r   )__name__
__module____qualname____doc__r   rF   r    r!   r+   strr   r   r   r   PILImager9   staticmethoddictr   rI   rc    r:   r*   r   r      s    ";hsm ;8TW= ;didpdp ;| 7;\\03	rzz5<<ciioo)>>	?4 6# 6c 64 6ERUWZRZO 6 6" ) ) )RU )Z_ZfZf ) )r:   r   )r   typingr   r   r   r   numpyr   ri   r    torch.nn.functionalnn
functionalrT   image_processorr   r	   r
   r   rm   r:   r*   <module>rt      s4     / /  
    X XV& Vr:   