
    i8                        d dl Z d dlmZmZ d dlmZ d dlZddlmZm	Z	 ddl
mZ ddlmZ  e       rd dlZdZ ej                   d	      Zd
eej&                  eeef   fdZ G d de      Ze G d d             Ze G d d             Zy)    N)	dataclassfield)Enum   )is_psutil_availableis_torch_xpu_available)logging)tracedContinuousBatchingLoggerreturnc                     t         j                  j                         rt        j                  d      } t         j                  j	                          t         j                  j                          t         j                  j                  |       j                  }t         j                  j                  |       }t         j                  j                  |       }nt               rt        j                  d      } t         j                  j	                          t         j                  j                          t         j                  j                  |       j                  }t         j                  j                  |       }t         j                  j                  |       }n.t         j                  j                  j                         rt         j                  j                  j                         rWt        j                  d      } t         j                  j                         }|t         j                  j!                         z
  }d}nt        j                  d      } t#               rMt%        j&                         j(                  }t%        j*                         j-                         j.                  }|}nt0        j3                  d       d}d}d}| |||fS )Ncudaxpumpsr   cpuzCannot get memory breakdown on CPU without psutil: returning 0 for all memory values. Please install psutil to get an actual memory breakdown.)torchr   is_availabledeviceempty_cachesynchronizeget_device_propertiestotal_memorymemory_reservedmemory_allocatedr   r   backendsr   is_builtdriver_allocated_memoryrecommended_max_memoryr   psutilvirtual_memorytotalProcessmemory_inforssloggererror)r   r   reserved_memoryallocated_memorys       ~/home/obispo/Crisostomo_bridge/mision_env/lib/python3.12/site-packages/transformers/generation/continuous_batching/requests.pyget_device_and_memory_breakdownr+   $   s   zz f%

 

 zz77?LL**44V< ::66v>		!e$				yy66v>KK))33F; 9955f=				(	(	*u~~/A/A/J/J/Le$yy88:'%))*J*J*LLe$ !00288L%~~/;;=AA.OLL< LO <2BBB    c                   ,    e Zd ZdZdZdZdZdZdZdZ	dZ
y	)
RequestStatusz5Status of a generation request through its lifecycle.pending
prefillingprefilling_splitsplit_pending_remainderdecodingfinishedfailedN)__name__
__module____qualname____doc__PENDING
PREFILLINGPREFILLING_SPLITSPLIT_PENDING_REMAINDERDECODINGFINISHEDFAILED r,   r*   r.   r.   K   s*    ?GJ)7HHFr,   r.   c                   &   e Zd ZU dZeed<    ee      Zee	   ed<    ee      Z
ee	   ed<    ee      Zee   ed<   dZedz  ed<   ej                  Zeed	<    eej$                        Zeed
<   dZeeef   ed<   dZee   dz  ed<   defdZy)GenerationOutputa  Tracks the output of a generation request.

    Attributes:
        request_id (str): The ID of the generation request.
        prompt_ids (list[int]): The IDs of the prompt tokens.
        generated_tokens (list[int]): The generated tokens.
        logprobs (list[float]): The log probabilities of the generated tokens.
        error (Optional[str]): Any error message associated with the request. When None, the request was successful.
        status (RequestStatus): The status of the request.
        created_time (float): The time the request was created.
        lifespan (tuple[float, float]): The time the request was no longer pending and the time the request finished.
    
request_iddefault_factory
prompt_idsgenerated_tokenslogprobsNr'   statuscreated_timer   r   lifespan
timestampsr   c                 <    | j                   t        j                  k(  S N)rJ   r.   r?   selfs    r*   is_finishedzGenerationOutput.is_finishedp   s    {{m4444r,   )r6   r7   r8   r9   str__annotations__r   listrG   intrH   rI   floatr'   r.   r:   rJ   timeperf_counterrK   rM   tuplerN   boolrS   rA   r,   r*   rC   rC   W   s     O!$7JS	7"'"=d3i=!$7Hd5k7E3:)11FM10A0ABL%B$,HeE5L!,%)JUd")5T 5r,   rC   c                   z   e Zd ZU dZeed<   ee   ed<   dZe	ed<   dZ
eed<    ee      Zee   ed	<    ee      Zee   ed
<    ee      Zee   ed<   dZeed<   dZeed<   ej$                  Zeed<   dZedz  ed<   dZeed<   dZe	ed<    eej0                        Zeed<   dZedz  ed<   dZeeef   ed<    ee      Zee   ed<   dZeed<   dZ eed<   d Z!e"defd       Z#e#jH                  d efd!       Z#e"dee   dz  fd"       Z%d# Z&defd$Z'defd%Z(e)d&ede	fd'       Z*d( Z+d) Z,d*edd fd+Z-d-d,Z.y).RequestStateaC  Tracks the state of a generation request through its lifecycle.

    Attributes:
        request_id (str): The ID of the generation request.
        initial_tokens (list[int]): The initial prompt tokens.
        num_children (int): The number of children requests
        full_prompt_ids (list[int] | None): The tokens IDs of the full prompt.
        prompt_ids (list[int] | None): The tokens IDs currently being processed.
        remaining_prompt_ids (list[int]): The tokens IDs remaining to be processed (for split requests).
        static_outputs (list[int]): The generated tokens.
        allocated_blocks (int): The number of blocks allocated to the request.
        position_offset (int): The current position in the sequence for position_ids.
        status (RequestStatus): The status of the request: can be one of PENDING, PREFILLING, PREFILLING_SPLIT,
                                SPLIT_PENDING_REMAINDER, DECODING, FINISHED, FAILED
        max_new_tokens (int | None): The maximum number of new tokens to generate.
        eos_token_id (int): The ID of the end-of-sequence token.
        streaming (bool): Whether to stream tokens as they're generated
        created_time (float): The time the request was created.
        error (Optional[str]): Any error message associated with the request. When None, has had no error yet.
    rD   initial_tokensFrecord_timestampsr   num_childrenrE   tokens_to_processremaining_prefill_tokensrH   allocated_blocksposition_offset_status   Nmax_new_tokensr   eos_token_id	streamingrK   r'   rL   rM   _timestamps_true_initial_tokens_new_tokens_limitc                 N    | j                   d| _        y | j                   | _        y )Nrm   )rh   rn   rQ   s    r*   __post_init__zRequestState.__post_init__   s#    /3/B/B/JPTPcPcr,   r   c                     | j                   S rP   )rf   rQ   s    r*   rJ   zRequestState.status   s    ||r,   valuec                 (   | j                   t        j                  k(  r#t        j                         df| _        || _         y |t        j                  k(  r8| j
                  d   t        j                         f| _        | j                          || _         y )Nr   r   )rf   r.   r:   rY   rZ   rM   r?   log_end_of_request)rR   rr   s     r*   rJ   zRequestState.status   ss    <<=000!..0"5DM  m,,,!]]1-t/@/@/BCDM##%r,   c                 6    | j                   r| j                  S d S rP   )r`   rk   rQ   s    r*   rN   zRequestState.timestamps   s    #'#9#9tCtCr,   c                    t        | j                        }| j                         }| j                  d   | j                  z
  }| j                  d   | j                  z
  }t
        j                  d| j                   d|d|d|d|
       y )Nr      Request z finished: prefill_len = z decode_len = z start_time = z end_time = )lenr_   generated_lenrM   rK   r&   inforD   )rR   prefill_len
decode_len
start_timeend_times        r*   rt   zRequestState.log_end_of_request   s    $--.'')
]]1%(9(99
==#d&7&77t''A;2B/J?RaT^Sbbodlcpq	
r,   c                     | j                   S )zCGet the current length of the sequence (prompt + generated tokens).)re   rQ   s    r*   current_lenzRequestState.current_len   s    ###r,   c                 ,    t        | j                        S )z*Get the number of tokens generated so far.)ry   rH   rQ   s    r*   rz   zRequestState.generated_len   s    4(())r,   token_idc                 0   | j                   t        j                  k7  ry| j                  r-| j                  j                  t        j                                || j                  k(  xr | j                  dk7  }| j                         dz
  }|s|| j                  k  r|| j                  d<   |dz  }n?t        j                  d| j                   d|        | j                  j                          |s|| j                  k\  rt        j                   | _         yy)zUpdate the request with a newly generated token and check for completion.

        Args:
            token_id: The token ID to add to the output sequence

        Returns:
            bool: True if the request is now complete, False otherwise
        Fr   rw   rx   z generated a useless token: T)rJ   r.   r>   r`   rk   appendrY   rZ   ri   rz   rn   rH   r&   warningrD   popr?   )rR   r   is_eosr   s       r*   update_and_check_completionz(RequestState.update_and_check_completion   s     ;;-000 !!##D$5$5$78 T...J43D3D3J((*Q. kD$:$::(0D!!"%1KNNXdoo%66RS[R\]^!!%%'[D$:$::'00DKr,   c           
      n   d| j                    d| j                   d| j                          dt        | j                         dt        | j
                         d| j                   dt        | j                         d| j                   d	| j                   g	}d
dj                  |      z   dz   S )Nzrequest_id=zstatus=zout_tokens=zquery_length=zremaining_tokens=z
kv_length=zfull_prompt_length=zallocated_blocks=zgenerated_tokens=zRequestState(
	z,
	z
))rD   rf   rz   ry   rb   rc   re   r_   rd   rH   join)rR   msgs     r*   __repr__zRequestState.__repr__   s    $//*+dll^$$,,./0C 6 6789D$A$A BCD--./!#d&9&9":!;< 5 567 5 567

 #W\\#%66>>r,   c                    | j                   r0| j                   d   t        k(  r| j                   j                          | j                  rI| j                  | j                  d | j                   z   | _         | j                  d| j                   | _        t        | j                  | j                  | j                   g | j                  | j                  | j                  | j                  | j                  	      S )z7Convert the request state to a GenerationOutput object.r   N)	rD   rG   rH   rI   r'   rJ   rK   rM   rN   )rH   TMP_TOKEN_IDr   rl   r_   rC   rD   r'   rJ   rK   rM   rN   rQ   s    r*   to_generation_outputz!RequestState.to_generation_output   s      T%:%:2%>,%N!!%%'$$$($7$78Q8Q8S$TW[WlWl$lD!"&"5"56Q8Q8Q"RD**!22**;;**]]

 
	
r,   new_request_idc                    t        j                         }t        di d|d| j                  d| j                  d| j
                  dd d| j                  dd d| j                  dd d| j                  d	| j                  d
| j                  d| j                  d| j                  d| j                  d|d|dfdg d| j                  d| j                  }|S )ziFork the request into a new request with the same state expect for request_id, created_time and lifespan.rD   r_   ra   rb   Nrc   rH   rd   re   rf   rh   ri   rj   rK   rM   r   rk   r'   r`   rA   )rY   rZ   r^   r_   ra   rb   rc   rH   rd   re   rJ   rh   ri   rj   r'   r`   )rR   r   tnew_requests       r*   forkzRequestState.fork  s#   " 
%
..
 **
 #44Q7	

 &*%B%B1%E
 "2215
 "22
 !00
 KK
  ..
 **
 nn
 
 W
 
  **!
" #44#
& r,   c           
         | j                   r0| j                   d   t        k(  r| j                   j                          | j                  dn!| j                  t	        | j                         z
  }t        | j                  | j                  | j                   z   | j                  | j                  | j                  | j                   z   || j                  | j                        }| j                  t	        | j                        z   |_        |S )a  Creates an equivalent new request by removing the generated tokens and adding them to the initial prompt. The
        created request has THE SAME request_id. Notably, we can retrieve the original request from the created one with
        the _true_initial_tokens attribute.r   N)rD   r_   ra   r`   rb   rh   ri   rj   )rH   r   r   rh   ry   r^   rD   r_   ra   r`   ri   rj   rl   )rR   rh   	new_states      r*   !create_equivalent_initial_requestz.RequestState.create_equivalent_initial_request'  s    
   T%:%:2%>,%N!!%%'!%!4!4!<4CVCVY\]a]r]rYsCs ..1F1FF**"44"11D4I4II)**nn	
	 *.)B)BSI\I\E])]	&r,   )r   r^   )/r6   r7   r8   r9   rT   rU   rV   rW   r`   r\   ra   r   rb   rc   rH   rd   re   r.   r:   rf   rh   ri   rj   rY   rZ   rK   rX   r'   rM   r[   rk   rl   rn   rp   propertyrJ   setterrN   rt   r   rz   r
   r   r   r   r   r   rA   r,   r*   r^   r^   t   s   , OI#t#L##(#>tCy>*/*Ed3iE"'"=d3i=cOS*22G]2!#NC$J#L#It0A0ABL%BE3:$,HeE5L!,$T:Ke: !#!'s'd    ]]M   DDK$. D D
$S $*s *
 !C !D ! !F?
&3 > 0r,   r^   )rY   dataclassesr   r   enumr   r   utilsr   r   utils.loggingr	   utils.metricsr
   r    r   	getLoggerr&   r[   r   rW   r+   r.   rC   r^   rA   r,   r*   <module>r      s     (   @ $ #   
		5	6$Cu||S#s/J)K $CN	D 	 5 5 58 E E Er,   