
    iu                     R   d Z ddlmZ ddlZddlmZ ddlmZmZmZ ddl	m
Z ddlmZ dd	lmZ dd
lmZmZmZmZmZmZ ddlmZmZ ddlmZ ddlmZmZmZ ddl m!Z!  ejD                  e#      Z$ejJ                  Z& G d dejN                        Z(	 d@dejN                  dejR                  dejR                  dejR                  dejR                  dz  de*de*fdZ+ G d dejN                        Z, G d dejN                        Z- G d dejN                        Z. G d  d!ejN                        Z/ G d" d#ejN                        Z0 G d$ d%e      Z1 G d& d'ejN                        Z2 G d( d)ejN                        Z3 G d* d+ejN                        Z4 G d, d-ejN                        Z5 G d. d/ejN                        Z6e G d0 d1e             Z7e G d2 d3e7             Z8e G d4 d5e7             Z9 ed67       G d8 d9e7             Z: ed:7       G d; d<e7             Z;e G d= d>e7             Z<g d?Z=y)AzPyTorch LayoutLM model.    )CallableN)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )initialization)ACT2FN)GradientCheckpointingLayer)BaseModelOutputBaseModelOutputWithPoolingMaskedLMOutputQuestionAnsweringModelOutputSequenceClassifierOutputTokenClassifierOutput)ALL_ATTENTION_FUNCTIONSPreTrainedModel)apply_chunking_to_forward)auto_docstringcan_return_tuplelogging   )LayoutLMConfigc                   4     e Zd ZdZ fdZ	 	 	 	 	 ddZ xZS )LayoutLMEmbeddingszGConstruct the embeddings from word, position and token_type embeddings.c                    t         |           t        j                  |j                  |j
                  |j                        | _        t        j                  |j                  |j
                        | _	        t        j                  |j                  |j
                        | _        t        j                  |j                  |j
                        | _        t        j                  |j                  |j
                        | _        t        j                  |j                  |j
                        | _        t        j                  |j                  |j
                        | _        t#        |j
                  |j$                        | _        t        j(                  |j*                        | _        | j/                  dt1        j2                  |j                        j5                  d      d       y )N)padding_idxepsposition_idsr   F)
persistent)super__init__r   	Embedding
vocab_sizehidden_sizepad_token_idword_embeddingsmax_position_embeddingsposition_embeddingsmax_2d_position_embeddingsx_position_embeddingsy_position_embeddingsh_position_embeddingsw_position_embeddingstype_vocab_sizetoken_type_embeddingsLayoutLMLayerNormlayer_norm_eps	LayerNormDropouthidden_dropout_probdropoutregister_buffertorcharangeexpandselfconfig	__class__s     x/home/obispo/Crisostomo_bridge/mision_env/lib/python3.12/site-packages/transformers/models/layoutlm/modeling_layoutlm.pyr%   zLayoutLMEmbeddings.__init__0   s[   !||F,=,=v?Q?Q_e_r_rs#%<<0N0NPVPbPb#c %'\\&2S2SU[UgUg%h"%'\\&2S2SU[UgUg%h"%'\\&2S2SU[UgUg%h"%'\\&2S2SU[UgUg%h"%'\\&2H2H&J\J\%]"*6+=+=6CXCXYzz&"<"<=ELL)G)GHOOPWXej 	 	
    c                    ||j                         }n|j                         d d }|d   }||j                  n|j                  }|| j                  d d d |f   }|&t        j                  |t        j
                  |      }|| j                  |      }|}	| j                  |      }
	 | j                  |d d d d df         }| j                  |d d d d df         }| j                  |d d d d df         }| j                  |d d d d df         }| j                  |d d d d df   |d d d d df   z
        }| j                  |d d d d df   |d d d d df   z
        }| j                  |      }|	|
z   |z   |z   |z   |z   |z   |z   |z   }| j                  |      }| j                  |      }|S # t        $ r}t        d      |d }~ww xY w)Nr"   r   dtypedevicer      r   z:The `bbox`coordinate values should be within 0-1000 range.)sizerG   r    r;   zeroslongr*   r,   r.   r/   
IndexErrorr0   r1   r3   r6   r9   )r?   	input_idsbboxtoken_type_idsr    inputs_embedsinput_shape
seq_lengthrG   words_embeddingsr,   left_position_embeddingsupper_position_embeddingsright_position_embeddingslower_position_embeddingser0   r1   r3   
embeddingss                       rB   forwardzLayoutLMEmbeddings.forwardA   s1     #..*K',,.s3K ^
%.%:!!@T@T,,Q^<L!"[[EJJvVN  00;M("66|D	b'+'A'A$q!Qw-'P$(,(B(B41a=(Q%(,(B(B41a=(Q%(,(B(B41a=(Q% !% : :41a=4PQSTVWPW=;X Y $ : :41a=4PQSTVWPW=;X Y $ : :> J !"&' (( (	(
 (( $$ $$ $$ 	 ^^J/
\\*-
)  	bYZ`aa	bs   ,A,F7 7	G GG)NNNNN)__name__
__module____qualname____doc__r%   rZ   __classcell__rA   s   @rB   r   r   -   s!    Q
& 5rC   r   modulequerykeyvalueattention_maskscalingr9   c                    t        j                  ||j                  dd            |z  }|#|d d d d d d d |j                  d   f   }	||	z   }t        j
                  j                  |dt         j                        j                  |j                        }t        j
                  j                  ||| j                        }t        j                  ||      }
|
j                  dd      j                         }
|
|fS )NrH   r   r"   )dimrF   )ptrainingr   )r;   matmul	transposeshaper   
functionalsoftmaxfloat32torF   r9   rk   
contiguous)ra   rb   rc   rd   re   rf   r9   kwargsattn_weightscausal_maskattn_outputs              rB   eager_attention_forwardrx   z   s     <<s}}Q':;gEL!$Q1o		"o%=>#k1==((2U]](SVVW\WbWbcL==((6??([L,,|U3K''1-88:K$$rC   c            
            e Zd Z fdZ	 	 ddej
                  dej                  dz  dedz  deej
                     fdZ	 xZ
S )	LayoutLMSelfAttentionc                 $   t         |           |j                  |j                  z  dk7  r2t	        |d      s&t        d|j                   d|j                   d      || _        |j                  | _        t        |j                  |j                  z        | _        | j                  | j                  z  | _	        t        j                  |j                  | j                        | _        t        j                  |j                  | j                        | _        t        j                  |j                  | j                        | _        t        j                  |j                         | _        |j                   | _        | j                  dz  | _        y )Nr   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ()g      )r$   r%   r(   num_attention_headshasattr
ValueErrorr@   intattention_head_sizeall_head_sizer   Linearrb   rc   rd   r7   attention_probs_dropout_probr9   attention_dropoutrf   r>   s     rB   r%   zLayoutLMSelfAttention.__init__   sC    : ::a?PVXhHi#F$6$6#7 8 445Q8 
 #)#=#= #&v'9'9F<V<V'V#W !558P8PPYYv1143E3EF
99V//1C1CDYYv1143E3EF
zz&"E"EF!'!D!D//5rC   Nhidden_statesre   output_attentionsreturnc                    |j                   d d }g |d| j                  }| j                  |      j                  |      j	                  dd      }| j                  |      j                  |      j	                  dd      }| j                  |      j                  |      j	                  dd      }	t        j                  | j                  j                  t              }
 |
| |||	|f| j                  sdn| j                  | j                  d|\  }} |j                  g |d j!                         }|r||f}|S |f}|S )Nr"   r   rH           )r9   rf   )rn   r   rb   viewrm   rc   rd   r   get_interfacer@   _attn_implementationrx   rk   r   rf   reshapers   )r?   r   re   r   rt   rQ   hidden_shapequery_states
key_statesvalue_statesattention_interfacerw   ru   outputss                 rB   rZ   zLayoutLMSelfAttention.forward   sT    $))#2.CCbC$*B*BCzz-055lCMMaQRSXXm,11,?II!QO
zz-055lCMMaQRS(?(M(MKK,,.E)
 %8	%
  $}}C$2H2HLL	%
 	%
!\ *k));;;;FFH1B;- JUrC   NFr[   r\   r]   r%   r;   TensorFloatTensorbooltuplerZ   r_   r`   s   @rB   rz   rz      sW    60 48).	|| ))D0  $;	 
u||	rC   rz   c                   n     e Zd Z fdZdej
                  dej
                  dej
                  fdZ xZS )LayoutLMSelfOutputc                 (   t         |           t        j                  |j                  |j                        | _        t        j                  |j                  |j                        | _        t        j                  |j                        | _
        y Nr   )r$   r%   r   r   r(   denser6   r5   r7   r8   r9   r>   s     rB   r%   zLayoutLMSelfOutput.__init__   s`    YYv1163E3EF
f&8&8f>S>STzz&"<"<=rC   r   input_tensorr   c                 r    | j                  |      }| j                  |      }| j                  ||z         }|S Nr   r9   r6   r?   r   r   s      rB   rZ   zLayoutLMSelfOutput.forward   7    

=1]3}|'CDrC   r[   r\   r]   r%   r;   r   rZ   r_   r`   s   @rB   r   r      1    >U\\  RWR^R^ rC   r   c            
            e Zd Z fdZ	 	 ddej
                  dej                  dz  dedz  deej
                     fdZ	 xZ
S )	LayoutLMAttentionc                 b    t         |           t        |      | _        t	        |      | _        y r   )r$   r%   rz   r?   r   outputr>   s     rB   r%   zLayoutLMAttention.__init__   s&    )&1	(0rC   Nr   re   r   r   c                 n     | j                   |f||d|}| j                  |d   |      }|f|dd  z   }|S N)re   r   r   r   )r?   r   )r?   r   re   r   rt   self_outputsattention_outputr   s           rB   rZ   zLayoutLMAttention.forward   s\     !tyy
)/
 	
  ;;|AF#%QR(88rC   r   r   r`   s   @rB   r   r      sW    1 48).	|| ))D0  $;	 
u||	rC   r   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )LayoutLMIntermediatec                    t         |           t        j                  |j                  |j
                        | _        t        |j                  t              rt        |j                     | _        y |j                  | _        y r   )r$   r%   r   r   r(   intermediate_sizer   
isinstance
hidden_actstrr
   intermediate_act_fnr>   s     rB   r%   zLayoutLMIntermediate.__init__   s]    YYv1163K3KL
f''-'-f.?.?'@D$'-'8'8D$rC   r   r   c                 J    | j                  |      }| j                  |      }|S r   )r   r   r?   r   s     rB   rZ   zLayoutLMIntermediate.forward   s&    

=100?rC   r   r`   s   @rB   r   r      s#    9U\\ ell rC   r   c                   n     e Zd Z fdZdej
                  dej
                  dej
                  fdZ xZS )LayoutLMOutputc                 (   t         |           t        j                  |j                  |j
                        | _        t        j                  |j
                  |j                        | _        t        j                  |j                        | _        y r   )r$   r%   r   r   r   r(   r   r6   r5   r7   r8   r9   r>   s     rB   r%   zLayoutLMOutput.__init__  s`    YYv779K9KL
f&8&8f>S>STzz&"<"<=rC   r   r   r   c                 r    | j                  |      }| j                  |      }| j                  ||z         }|S r   r   r   s      rB   rZ   zLayoutLMOutput.forward
  r   rC   r   r`   s   @rB   r   r     r   rC   r   c            
            e Zd Z fdZ	 	 d	dej
                  dej                  dz  dedz  deej
                     fdZ	d Z
 xZS )
LayoutLMLayerc                     t         |           |j                  | _        d| _        t	        |      | _        t        |      | _        t        |      | _	        y )Nr   )
r$   r%   chunk_size_feed_forwardseq_len_dimr   	attentionr   intermediater   r   r>   s     rB   r%   zLayoutLMLayer.__init__  sI    '-'E'E$*6208$V,rC   Nr   re   r   r   c                      | j                   |f||d|}|d   }|dd  }t        | j                  | j                  | j                  |      }|f|z   }|S r   )r   r   feed_forward_chunkr   r   )	r?   r   re   r   rt   self_attention_outputsr   r   layer_outputs	            rB   rZ   zLayoutLMLayer.forward  s     "0"
)/"
 	"
 2!4(,0##T%A%A4CSCSUe
  /G+rC   c                 L    | j                  |      }| j                  ||      }|S r   )r   r   )r?   r   intermediate_outputr   s       rB   r   z LayoutLMLayer.feed_forward_chunk2  s,    "//0@A{{#68HIrC   r   )r[   r\   r]   r%   r;   r   r   r   r   rZ   r   r_   r`   s   @rB   r   r     s\    - 48).	|| ))D0  $;	 
u||	.rC   r   c                        e Zd Z fdZe	 	 	 	 d
dej                  dej                  dz  dedz  dedz  dedz  de	ej                     e
z  fd	       Z xZS )LayoutLMEncoderc                     t         |           || _        t        j                  t        |j                        D cg c]  }t        |       c}      | _        d| _	        y c c}w r   )
r$   r%   r@   r   
ModuleListrangenum_hidden_layersr   layergradient_checkpointing)r?   r@   irA   s      rB   r%   zLayoutLMEncoder.__init__:  sN    ]]5IaIaCb#caM&$9#cd
&+# $ds   A#Nr   re   r   output_hidden_statesreturn_dictr   c                     |rdnd }|rdnd }t        | j                        D ])  \  }	}
|r||fz   } |
|||fi |}|d   }|s!||d   fz   }+ |r||fz   }t        |||      S )N r   r   )last_hidden_stater   
attentions)	enumerater   r   )r?   r   re   r   r   r   rt   all_hidden_statesall_self_attentionsr   layer_modulelayer_outputss               rB   rZ   zLayoutLMEncoder.forward@  s     #7BD$5b4(4 	POA|#$58H$H!(! 	M *!,M &9]1=M<O&O#	P   1]4D D++*
 	
rC   )NFFT)r[   r\   r]   r%   r   r;   r   r   r   r   r   rZ   r_   r`   s   @rB   r   r   9  s    ,  48).,1#'"
||"
 ))D0"
  $;	"

 #Tk"
 D["
 
u||		."
 "
rC   r   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )LayoutLMPoolerc                     t         |           t        j                  |j                  |j                        | _        t        j                         | _        y r   )r$   r%   r   r   r(   r   Tanh
activationr>   s     rB   r%   zLayoutLMPooler.__init__h  s9    YYv1163E3EF
'')rC   r   r   c                 \    |d d df   }| j                  |      }| j                  |      }|S )Nr   )r   r   )r?   r   first_token_tensorpooled_outputs       rB   rZ   zLayoutLMPooler.forwardm  s6     +1a40

#566rC   r   r`   s   @rB   r   r   g  s#    $
U\\ ell rC   r   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )LayoutLMPredictionHeadTransformc                 h   t         |           t        j                  |j                  |j                        | _        t        |j                  t              rt        |j                     | _
        n|j                  | _
        t        j                  |j                  |j                        | _        y r   )r$   r%   r   r   r(   r   r   r   r   r
   transform_act_fnr6   r5   r>   s     rB   r%   z(LayoutLMPredictionHeadTransform.__init__x  s{    YYv1163E3EF
f''-$*6+<+<$=D!$*$5$5D!f&8&8f>S>STrC   r   r   c                 l    | j                  |      }| j                  |      }| j                  |      }|S r   )r   r   r6   r   s     rB   rZ   z'LayoutLMPredictionHeadTransform.forward  s4    

=1--m<}5rC   r   r`   s   @rB   r   r   w  s$    UU\\ ell rC   r   c                   $     e Zd Z fdZd Z xZS )LayoutLMLMPredictionHeadc                    t         |           t        |      | _        t	        j
                  |j                  |j                  d      | _        t	        j                  t        j                  |j                              | _        y )NT)bias)r$   r%   r   	transformr   r   r(   r'   decoder	Parameterr;   rJ   r   r>   s     rB   r%   z!LayoutLMLMPredictionHead.__init__  s[    8@ yy!3!3V5F5FTRLLV->->!?@	rC   c                 J    | j                  |      }| j                  |      }|S r   )r   r   r   s     rB   rZ   z LayoutLMLMPredictionHead.forward  s$    }5]3rC   )r[   r\   r]   r%   rZ   r_   r`   s   @rB   r   r     s    ArC   r   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )LayoutLMOnlyMLMHeadc                 B    t         |           t        |      | _        y r   )r$   r%   r   predictionsr>   s     rB   r%   zLayoutLMOnlyMLMHead.__init__  s    3F;rC   sequence_outputr   c                 (    | j                  |      }|S r   )r   )r?   r   prediction_scoress      rB   rZ   zLayoutLMOnlyMLMHead.forward  s     ,,_=  rC   r   r`   s   @rB   r   r     s#    <!u|| ! !rC   r   c                   Z     e Zd ZU eed<   dZdZ ej                          fd       Z	 xZ
S )LayoutLMPreTrainedModelr@   layoutlmTc                 X   t         |   |       t        |t              r t	        j
                  |j                         yt        |t              rZt	        j                  |j                  t        j                  |j                  j                  d         j                  d             yy)zInitialize the weightsr"   r!   N)r$   _init_weightsr   r   initzeros_r   r   copy_r    r;   r<   rn   r=   )r?   ra   rA   s     rB   r   z%LayoutLMPreTrainedModel._init_weights  sx     	f%f67KK$ 23JJv**ELL9L9L9R9RSU9V,W,^,^_f,gh 4rC   )r[   r\   r]   r   __annotations__base_model_prefixsupports_gradient_checkpointingr;   no_gradr   r_   r`   s   @rB   r   r     s1    "&*#U]]_i irC   r   c                   6    e Zd Z fdZd Zd Zee	 	 	 	 	 	 	 	 	 ddej                  dz  dej                  dz  dej                  dz  dej                  dz  d	ej                  dz  d
ej                  dz  dedz  dedz  dedz  deez  fd              Z xZS )LayoutLMModelc                     t         |   |       || _        t        |      | _        t        |      | _        t        |      | _        | j                          y r   )
r$   r%   r@   r   rY   r   encoderr   pooler	post_initr>   s     rB   r%   zLayoutLMModel.__init__  sG     ,V4&v.$V, 	rC   c                 .    | j                   j                  S r   rY   r*   r?   s    rB   get_input_embeddingsz"LayoutLMModel.get_input_embeddings  s    ...rC   c                 &    || j                   _        y r   r  )r?   rd   s     rB   set_input_embeddingsz"LayoutLMModel.set_input_embeddings  s    */'rC   NrM   rN   re   rO   r    rP   r   r   r   r   c
                    ||n| j                   j                  }||n| j                   j                  }|	|	n| j                   j                  }	||t	        d      |#| j                  ||       |j                         }n!||j                         dd }nt	        d      ||j                  n|j                  }|t        j                  ||      }|&t        j                  |t        j                  |      }|)t        j                  |dz   t        j                  |      }|j                  d      j                  d	      }|j                  | j                  
      }d|z
  t        j                  | j                        j                   z  }| j#                  |||||      }| j%                  ||||d      }|d   }| j'                  |      }t)        |||j*                  |j,                        S )a  
        bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
            Bounding boxes of each input sequence tokens. Selected in the range `[0,
            config.max_2d_position_embeddings-1]`. Each bounding box should be a normalized version in (x0, y0, x1, y1)
            format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1,
            y1) represents the position of the lower right corner. See [Overview](#Overview) for normalization.

        Examples:

        ```python
        >>> from transformers import AutoTokenizer, LayoutLMModel
        >>> import torch

        >>> tokenizer = AutoTokenizer.from_pretrained("microsoft/layoutlm-base-uncased")
        >>> model = LayoutLMModel.from_pretrained("microsoft/layoutlm-base-uncased")

        >>> words = ["Hello", "world"]
        >>> normalized_word_boxes = [637, 773, 693, 782], [698, 773, 733, 782]

        >>> token_boxes = []
        >>> for word, box in zip(words, normalized_word_boxes):
        ...     word_tokens = tokenizer.tokenize(word)
        ...     token_boxes.extend([box] * len(word_tokens))
        >>> # add bounding boxes of cls + sep tokens
        >>> token_boxes = [[0, 0, 0, 0]] + token_boxes + [[1000, 1000, 1000, 1000]]

        >>> encoding = tokenizer(" ".join(words), return_tensors="pt")
        >>> input_ids = encoding["input_ids"]
        >>> attention_mask = encoding["attention_mask"]
        >>> token_type_ids = encoding["token_type_ids"]
        >>> bbox = torch.tensor([token_boxes])

        >>> outputs = model(
        ...     input_ids=input_ids, bbox=bbox, attention_mask=attention_mask, token_type_ids=token_type_ids
        ... )

        >>> last_hidden_states = outputs.last_hidden_state
        ```NzDYou cannot specify both input_ids and inputs_embeds at the same timer"   z5You have to specify either input_ids or inputs_embeds)rG   rE   )   r   rH   )rF   g      ?)rM   rN   r    rO   rP   T)r   r   r   r   )r   pooler_outputr   r   )r@   r   r   use_return_dictr   %warn_if_padding_and_no_attention_maskrI   rG   r;   onesrJ   rK   	unsqueezerr   rF   finfominrY   r  r  r   r   r   )r?   rM   rN   re   rO   r    rP   r   r   r   rt   rQ   rG   extended_attention_maskembedding_outputencoder_outputsr   r   s                     rB   rZ   zLayoutLMModel.forward  s   j 2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B] ]%>cdd"66y.Q#..*K&',,.s3KTUU%.%:!!@T@T!"ZZFCN!"[[EJJvVN<;;{T1FSD"0":":1"="G"G"J"9"<"<4::"<"N#&)@#@EKKPTPZPZD[D_D_"_??%)' + 
 ,,#/!5 ' 
 *!,O4)-')77&11	
 	
rC   )	NNNNNNNNN)r[   r\   r]   r%   r  r  r   r   r;   
LongTensorr   r   r   r   rZ   r_   r`   s   @rB   r	  r	    s   	/0  .2(,37260426)-,0#'h
##d*h
 %h
 ))D0	h

 ((4/h
 &&-h
 ((4/h
  $;h
 #Tkh
 D[h
 
+	+h
  h
rC   r	  c                   f    e Zd ZdddZ fdZd Zd Zd Zee		 	 	 	 	 	 	 	 	 	 dd	e
j                  dz  d
e
j                  dz  de
j                  dz  de
j                  dz  de
j                  dz  de
j                  dz  de
j                  dz  dedz  dedz  dedz  deez  fd              Z xZS )LayoutLMForMaskedLMzcls.predictions.biasz*layoutlm.embeddings.word_embeddings.weight)zcls.predictions.decoder.biaszcls.predictions.decoder.weightc                     t         |   |       t        |      | _        t	        |      | _        | j                          y r   )r$   r%   r	  r   r   clsr  r>   s     rB   r%   zLayoutLMForMaskedLM.__init__;  s4     %f-&v. 	rC   c                 B    | j                   j                  j                  S r   r   rY   r*   r  s    rB   r  z(LayoutLMForMaskedLM.get_input_embeddingsD      }}''777rC   c                 B    | j                   j                  j                  S r   )r$  r   r   r  s    rB   get_output_embeddingsz)LayoutLMForMaskedLM.get_output_embeddingsG  s    xx##+++rC   c                     || j                   j                  _        |j                  | j                   j                  _        y r   )r$  r   r   r   )r?   new_embeddingss     rB   set_output_embeddingsz)LayoutLMForMaskedLM.set_output_embeddingsJ  s,    '5$$2$7$7!rC   NrM   rN   re   rO   r    rP   labelsr   r   r   r   c                 p   |
|
n| j                   j                  }
| j                  ||||||||	d	      }|d   }| j                  |      }d}|Ft	               } ||j                  d| j                   j                        |j                  d            }t        |||j                  |j                        S )a2	  
        bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
            Bounding boxes of each input sequence tokens. Selected in the range `[0,
            config.max_2d_position_embeddings-1]`. Each bounding box should be a normalized version in (x0, y0, x1, y1)
            format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1,
            y1) represents the position of the lower right corner. See [Overview](#Overview) for normalization.
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
            config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
            loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`

        Examples:

        ```python
        >>> from transformers import AutoTokenizer, LayoutLMForMaskedLM
        >>> import torch

        >>> tokenizer = AutoTokenizer.from_pretrained("microsoft/layoutlm-base-uncased")
        >>> model = LayoutLMForMaskedLM.from_pretrained("microsoft/layoutlm-base-uncased")

        >>> words = ["Hello", "[MASK]"]
        >>> normalized_word_boxes = [637, 773, 693, 782], [698, 773, 733, 782]

        >>> token_boxes = []
        >>> for word, box in zip(words, normalized_word_boxes):
        ...     word_tokens = tokenizer.tokenize(word)
        ...     token_boxes.extend([box] * len(word_tokens))
        >>> # add bounding boxes of cls + sep tokens
        >>> token_boxes = [[0, 0, 0, 0]] + token_boxes + [[1000, 1000, 1000, 1000]]

        >>> encoding = tokenizer(" ".join(words), return_tensors="pt")
        >>> input_ids = encoding["input_ids"]
        >>> attention_mask = encoding["attention_mask"]
        >>> token_type_ids = encoding["token_type_ids"]
        >>> bbox = torch.tensor([token_boxes])

        >>> labels = tokenizer("Hello world", return_tensors="pt")["input_ids"]

        >>> outputs = model(
        ...     input_ids=input_ids,
        ...     bbox=bbox,
        ...     attention_mask=attention_mask,
        ...     token_type_ids=token_type_ids,
        ...     labels=labels,
        ... )

        >>> loss = outputs.loss
        ```NT)re   rO   r    rP   r   r   r   r   r"   losslogitsr   r   )
r@   r  r   r$  r   r   r'   r   r   r   )r?   rM   rN   re   rO   r    rP   r-  r   r   r   rt   r   r   r   masked_lm_lossloss_fcts                    rB   rZ   zLayoutLMForMaskedLM.forwardN  s    @ &1%<k$++B]B]--))%'/!5   

 "!* HH_5')H%!&&r4;;+A+ABBN
 $!//))	
 	
rC   
NNNNNNNNNN)r[   r\   r]   _tied_weights_keysr%   r  r)  r,  r   r   r;   r   r   r   r   r   rZ   r_   r`   s   @rB   r"  r"  4  s:    )?*V
8,8  .2(,37260426*.)-,0#'\
##d*\
 %\
 ))D0	\

 ((4/\
 &&-\
 ((4/\
   4'\
  $;\
 #Tk\
 D[\
 
	\
  \
rC   r"  z
    LayoutLM Model with a sequence classification head on top (a linear layer on top of the pooled output) e.g. for
    document image classification tasks such as the [RVL-CDIP](https://www.cs.cmu.edu/~aharley/rvl-cdip/) dataset.
    )custom_introc                   P    e Zd Z fdZd Zee	 	 	 	 	 	 	 	 	 	 ddej                  dz  dej                  dz  dej                  dz  dej                  dz  dej                  dz  d	ej                  dz  d
ej                  dz  de
dz  de
dz  de
dz  deez  fd              Z xZS )!LayoutLMForSequenceClassificationc                 ,   t         |   |       |j                  | _        t        |      | _        t        j                  |j                        | _        t        j                  |j                  |j                        | _        | j                          y r   r$   r%   
num_labelsr	  r   r   r7   r8   r9   r   r(   
classifierr  r>   s     rB   r%   z*LayoutLMForSequenceClassification.__init__  i      ++%f-zz&"<"<=))F$6$68I8IJ 	rC   c                 B    | j                   j                  j                  S r   r&  r  s    rB   r  z6LayoutLMForSequenceClassification.get_input_embeddings  r'  rC   NrM   rN   re   rO   r    rP   r-  r   r   r   r   c                    |
|
n| j                   j                  }
| j                  ||||||||	d	      }|d   }| j                  |      }| j	                  |      }d}|| j                   j
                  | j                  dk(  rd| j                   _        nl| j                  dkD  rL|j                  t        j                  k(  s|j                  t        j                  k(  rd| j                   _        nd| j                   _        | j                   j
                  dk(  rIt               }| j                  dk(  r& ||j                         |j                               }n |||      }n| j                   j
                  dk(  r=t               } ||j                  d| j                        |j                  d            }n,| j                   j
                  dk(  rt               } |||      }t!        |||j"                  |j$                  	      S )
aB	  
        bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
            Bounding boxes of each input sequence tokens. Selected in the range `[0,
            config.max_2d_position_embeddings-1]`. Each bounding box should be a normalized version in (x0, y0, x1, y1)
            format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1,
            y1) represents the position of the lower right corner. See [Overview](#Overview) for normalization.
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).

        Examples:

        ```python
        >>> from transformers import AutoTokenizer, LayoutLMForSequenceClassification
        >>> import torch

        >>> tokenizer = AutoTokenizer.from_pretrained("microsoft/layoutlm-base-uncased")
        >>> model = LayoutLMForSequenceClassification.from_pretrained("microsoft/layoutlm-base-uncased")

        >>> words = ["Hello", "world"]
        >>> normalized_word_boxes = [637, 773, 693, 782], [698, 773, 733, 782]

        >>> token_boxes = []
        >>> for word, box in zip(words, normalized_word_boxes):
        ...     word_tokens = tokenizer.tokenize(word)
        ...     token_boxes.extend([box] * len(word_tokens))
        >>> # add bounding boxes of cls + sep tokens
        >>> token_boxes = [[0, 0, 0, 0]] + token_boxes + [[1000, 1000, 1000, 1000]]

        >>> encoding = tokenizer(" ".join(words), return_tensors="pt")
        >>> input_ids = encoding["input_ids"]
        >>> attention_mask = encoding["attention_mask"]
        >>> token_type_ids = encoding["token_type_ids"]
        >>> bbox = torch.tensor([token_boxes])
        >>> sequence_label = torch.tensor([1])

        >>> outputs = model(
        ...     input_ids=input_ids,
        ...     bbox=bbox,
        ...     attention_mask=attention_mask,
        ...     token_type_ids=token_type_ids,
        ...     labels=sequence_label,
        ... )

        >>> loss = outputs.loss
        >>> logits = outputs.logits
        ```NT	rM   rN   re   rO   r    rP   r   r   r   r   
regressionsingle_label_classificationmulti_label_classificationr"   r/  )r@   r  r   r9   r<  problem_typer;  rF   r;   rK   r   r   squeezer   r   r   r   r   r   )r?   rM   rN   re   rO   r    rP   r-  r   r   r   rt   r   r   r1  r0  r3  s                    rB   rZ   z)LayoutLMForSequenceClassification.forward  s   @ &1%<k$++B]B]--))%'/!5   

  
]3/{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#FF3D))-JJ+-B @&++b/R))-II,./'!//))	
 	
rC   r4  )r[   r\   r]   r%   r  r   r   r;   r   r   r   r   r   rZ   r_   r`   s   @rB   r8  r8    s!   8  .2(,37260426*.)-,0#'m
##d*m
 %m
 ))D0	m

 ((4/m
 &&-m
 ((4/m
   4'm
  $;m
 #Tkm
 D[m
 
)	)m
  m
rC   r8  a3  
    LayoutLM Model with a token classification head on top (a linear layer on top of the hidden-states output) e.g. for
    sequence labeling (information extraction) tasks such as the [FUNSD](https://guillaumejaume.github.io/FUNSD/)
    dataset and the [SROIE](https://rrc.cvc.uab.es/?ch=13) dataset.
    c                   P    e Zd Z fdZd Zee	 	 	 	 	 	 	 	 	 	 ddej                  dz  dej                  dz  dej                  dz  dej                  dz  dej                  dz  d	ej                  dz  d
ej                  dz  de
dz  de
dz  de
dz  deez  fd              Z xZS )LayoutLMForTokenClassificationc                 ,   t         |   |       |j                  | _        t        |      | _        t        j                  |j                        | _        t        j                  |j                  |j                        | _        | j                          y r   r:  r>   s     rB   r%   z'LayoutLMForTokenClassification.__init__=  r=  rC   c                 B    | j                   j                  j                  S r   r&  r  s    rB   r  z3LayoutLMForTokenClassification.get_input_embeddingsG  r'  rC   NrM   rN   re   rO   r    rP   r-  r   r   r   r   c                 ~   |
|
n| j                   j                  }
| j                  ||||||||	d	      }|d   }| j                  |      }| j	                  |      }d}|<t               } ||j                  d| j                        |j                  d            }t        |||j                  |j                        S )a  
        bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
            Bounding boxes of each input sequence tokens. Selected in the range `[0,
            config.max_2d_position_embeddings-1]`. Each bounding box should be a normalized version in (x0, y0, x1, y1)
            format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1,
            y1) represents the position of the lower right corner. See [Overview](#Overview) for normalization.
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.

        Examples:

        ```python
        >>> from transformers import AutoTokenizer, LayoutLMForTokenClassification
        >>> import torch

        >>> tokenizer = AutoTokenizer.from_pretrained("microsoft/layoutlm-base-uncased")
        >>> model = LayoutLMForTokenClassification.from_pretrained("microsoft/layoutlm-base-uncased")

        >>> words = ["Hello", "world"]
        >>> normalized_word_boxes = [637, 773, 693, 782], [698, 773, 733, 782]

        >>> token_boxes = []
        >>> for word, box in zip(words, normalized_word_boxes):
        ...     word_tokens = tokenizer.tokenize(word)
        ...     token_boxes.extend([box] * len(word_tokens))
        >>> # add bounding boxes of cls + sep tokens
        >>> token_boxes = [[0, 0, 0, 0]] + token_boxes + [[1000, 1000, 1000, 1000]]

        >>> encoding = tokenizer(" ".join(words), return_tensors="pt")
        >>> input_ids = encoding["input_ids"]
        >>> attention_mask = encoding["attention_mask"]
        >>> token_type_ids = encoding["token_type_ids"]
        >>> bbox = torch.tensor([token_boxes])
        >>> token_labels = torch.tensor([1, 1, 0, 0]).unsqueeze(0)  # batch size of 1

        >>> outputs = model(
        ...     input_ids=input_ids,
        ...     bbox=bbox,
        ...     attention_mask=attention_mask,
        ...     token_type_ids=token_type_ids,
        ...     labels=token_labels,
        ... )

        >>> loss = outputs.loss
        >>> logits = outputs.logits
        ```NTr@  r   r"   r/  )r@   r  r   r9   r<  r   r   r;  r   r   r   )r?   rM   rN   re   rO   r    rP   r-  r   r   r   rt   r   r   r1  r0  r3  s                    rB   rZ   z&LayoutLMForTokenClassification.forwardJ  s    | &1%<k$++B]B]--))%'/!5   

 "!*,,71')HFKKDOO<fkk"oND$!//))	
 	
rC   r4  )r[   r\   r]   r%   r  r   r   r;   r   r   r   r   r   rZ   r_   r`   s   @rB   rG  rG  5  s!   8  .2(,37260426*.)-,0#'Y
##d*Y
 %Y
 ))D0	Y

 ((4/Y
 &&-Y
 ((4/Y
   4'Y
  $;Y
 #TkY
 D[Y
 
&	&Y
  Y
rC   rG  c                   r    e Zd Zd fd	Zd Zee	 	 	 	 	 	 	 	 	 	 	 ddej                  dz  dej                  dz  dej                  dz  dej                  dz  dej                  dz  d	ej                  dz  d
ej                  dz  dej                  dz  de
dz  de
dz  de
dz  deez  fd              Z xZS )LayoutLMForQuestionAnsweringc                     t         |   |       |j                  | _        t        |      | _        t        j                  |j                  |j                        | _        | j                          y)z
        has_visual_segment_embedding (`bool`, *optional*, defaults to `True`):
            Whether or not to add visual segment embeddings.
        N)
r$   r%   r;  r	  r   r   r   r(   
qa_outputsr  )r?   r@   has_visual_segment_embeddingrA   s      rB   r%   z%LayoutLMForQuestionAnswering.__init__  sU    
 	  ++%f-))F$6$68I8IJ 	rC   c                 B    | j                   j                  j                  S r   r&  r  s    rB   r  z1LayoutLMForQuestionAnswering.get_input_embeddings  r'  rC   NrM   rN   re   rO   r    rP   start_positionsend_positionsr   r   r   r   c                    ||n| j                   j                  }| j                  |||||||	|
d	      }|d   }| j                  |      }|j	                  dd      \  }}|j                  d      j                         }|j                  d      j                         }d}||t        |j                               dkD  r|j                  d      }t        |j                               dkD  r|j                  d      }|j                  d      }|j                  d|      }|j                  d|      }t        |      } |||      } |||      }||z   d	z  }t        ||||j                  |j                  
      S )a4	  
        bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
            Bounding boxes of each input sequence tokens. Selected in the range `[0,
            config.max_2d_position_embeddings-1]`. Each bounding box should be a normalized version in (x0, y0, x1, y1)
            format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1,
            y1) represents the position of the lower right corner. See [Overview](#Overview) for normalization.

        Example:

        In the example below, we prepare a question + context pair for the LayoutLM model. It will give us a prediction
        of what it thinks the answer is (the span of the answer within the texts parsed from the image).

        ```python
        >>> from transformers import AutoTokenizer, LayoutLMForQuestionAnswering
        >>> from datasets import load_dataset
        >>> import torch

        >>> tokenizer = AutoTokenizer.from_pretrained("impira/layoutlm-document-qa", add_prefix_space=True)
        >>> model = LayoutLMForQuestionAnswering.from_pretrained("impira/layoutlm-document-qa", revision="1e3ebac")

        >>> dataset = load_dataset("nielsr/funsd", split="train")
        >>> example = dataset[0]
        >>> question = "what's his name?"
        >>> words = example["words"]
        >>> boxes = example["bboxes"]

        >>> encoding = tokenizer(
        ...     question.split(), words, is_split_into_words=True, return_token_type_ids=True, return_tensors="pt"
        ... )
        >>> bbox = []
        >>> for i, s, w in zip(encoding.input_ids[0], encoding.sequence_ids(0), encoding.word_ids(0)):
        ...     if s == 1:
        ...         bbox.append(boxes[w])
        ...     elif i == tokenizer.sep_token_id:
        ...         bbox.append([1000] * 4)
        ...     else:
        ...         bbox.append([0] * 4)
        >>> encoding["bbox"] = torch.tensor([bbox])

        >>> word_ids = encoding.word_ids(0)
        >>> outputs = model(**encoding)
        >>> loss = outputs.loss
        >>> start_scores = outputs.start_logits
        >>> end_scores = outputs.end_logits
        >>> start, end = word_ids[start_scores.argmax(-1)], word_ids[end_scores.argmax(-1)]
        >>> print(" ".join(words[start : end + 1]))
        M. Hamann P. Harper, P. Martinez
        ```NTr@  r   r   r"   )ri   )ignore_indexrH   )r0  start_logits
end_logitsr   r   )r@   r  r   rN  splitrE  rs   lenrI   clampr   r   r   r   )r?   rM   rN   re   rO   r    rP   rQ  rR  r   r   r   rt   r   r   r1  rU  rV  
total_lossignored_indexr3  
start_lossend_losss                          rB   rZ   z$LayoutLMForQuestionAnswering.forward  s   D &1%<k$++B]B]--))%'/!5   

 "!*1#)<<r<#: j#++B/::<''+668

&=+D?'')*Q."1"9"9""==%%'(1, - 5 5b 9(--a0M-33A}EO)//=AM']CH!,@J
M:H$x/14J+%!!//))
 	
rC   )T)NNNNNNNNNNN)r[   r\   r]   r%   r  r   r   r;   r   r   r   r   r   rZ   r_   r`   s   @rB   rL  rL    s:   8  .2(,372604263715)-,0#'l
##d*l
 %l
 ))D0	l

 ((4/l
 &&-l
 ((4/l
 ))D0l
 ''$.l
  $;l
 #Tkl
 D[l
 
-	-l
  l
rC   rL  )r"  r8  rG  rL  r	  r   )r   )>r^   collections.abcr   r;   r   torch.nnr   r   r    r	   r  activationsr
   modeling_layersr   modeling_outputsr   r   r   r   r   r   modeling_utilsr   r   pytorch_utilsr   utilsr   r   r   configuration_layoutlmr   
get_loggerr[   loggerr6   r4   Moduler   r   floatrx   rz   r   r   r   r   r   r   r   r   r   r   r   r	  r"  r8  rG  rL  __all__r   rC   rB   <module>rm     s_    $   A A & ! 9  G 6 > > 2 
		H	% LL I Ih %II%<<% 
% <<	%
 LL4'% % %05BII 5r 		 2299  RYY #. #N*
bii *
\RYY  bii $ryy "!")) ! io i i |
+ |
 |
~ w
1 w
 w
t }
(? }
}
@ i
%< i
i
X @
#: @
 @
FrC   