
    eik"                        d dl mZ d dlmZ d dlZd dlmc mZ d dlm	Z	mZ d dl
mZ d dlmZ  G d dej                        Zy)	    )annotations)IterableN)Tensornn)util)SentenceTransformerc                  b     e Zd Z	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 d fdZddZddZedd       Z xZS )	MegaBatchMarginLossc                    t         |           || _        || _        || _        || _        |r| j                  | _        y| j                  | _        y)a  
        Given a large batch (like 500 or more examples) of (anchor_i, positive_i) pairs, find for each pair in the batch
        the hardest negative, i.e. find j != i such that cos_sim(anchor_i, positive_j) is maximal. Then create from this a
        triplet (anchor_i, positive_i, positive_j) where positive_j serves as the negative for this triplet.

        Then train as with the triplet loss.

        Args:
            model: SentenceTransformerModel
            positive_margin: Positive margin, cos(anchor, positive)
                should be > positive_margin
            negative_margin: Negative margin, cos(anchor, negative)
                should be < negative_margin
            use_mini_batched_version: As large batch sizes require a lot
                of memory, we can use a mini-batched version. We break
                down the large batch into smaller batches with fewer
                examples.
            mini_batch_size: Size for the mini-batches. Should be a
                divisor for the batch size in your data loader.

        References:
            - This loss function was inspired by the ParaNMT paper: https://www.aclweb.org/anthology/P18-1042/

        Requirements:
            1. (anchor, positive) pairs
            2. Large batches (500 or more examples)

        Inputs:
            +---------------------------------------+--------+
            | Texts                                 | Labels |
            +=======================================+========+
            | (anchor, positive) pairs              | none   |
            +---------------------------------------+--------+

        Recommendations:
            - Use ``BatchSamplers.NO_DUPLICATES`` (:class:`docs <sentence_transformers.training_args.BatchSamplers>`) to
              ensure that no in-batch negatives are duplicates of the anchor or positive samples.

        Example:
            ::

                from sentence_transformers import SentenceTransformer, SentenceTransformerTrainingArguments, SentenceTransformerTrainer, losses
                from datasets import Dataset

                train_batch_size = 250
                train_mini_batch_size = 32

                model = SentenceTransformer('all-MiniLM-L6-v2')
                train_dataset = Dataset.from_dict({
                    "anchor": [f"This is sentence number {i}" for i in range(500)],
                    "positive": [f"This is sentence number {i}" for i in range(1, 501)],
                })
                loss = losses.MegaBatchMarginLoss(model=model, mini_batch_size=train_mini_batch_size)

                args = SentenceTransformerTrainingArguments(
                    output_dir="output",
                    per_device_train_batch_size=train_batch_size,
                )
                trainer = SentenceTransformerTrainer(
                    model=model,
                    args=args,
                    train_dataset=train_dataset,
                    loss=loss,
                )
                trainer.train()
        N)	super__init__modelpositive_marginnegative_marginmini_batch_sizeforward_mini_batchedforward_non_mini_batchedforward)selfr   r   r   use_mini_batched_versionr   	__class__s         z/home/obispo/Crisostomo_bridge/mision_env/lib/python3.12/site-packages/sentence_transformers/losses/MegaBatchMarginLoss.pyr   zMegaBatchMarginLoss.__init__   sL    T 	
...4Lt00RVRoRo    c           
     j   |\  }}t        |j                               }t        |t        t	        |                     }g }t        j                         5  | j                  j                          t        d|| j                        D ]h  }|| j                  z   }	|j                         D 
ci c]  \  }
}|
|||	  }}
}|j                  | j                  |      d   j                                j | j                  j                          d d d        t        j                  |d      }t        j                   t        |      t        |      |j"                        }t        dt        |      | j                        D ]  }|| j                  z   }	| j                  |D ci c]  }|||   ||	  c}      d   }|D ci c]  }|g  }}t        j                         5  t%        j&                  ||      }|d|||	 z  z
  }t        j(                  |d      \  }}d d d        D ]#  }|D ]  }||   j                  ||   |           % |D ]  }t        j*                  ||         ||<    | j                  |D ci c]  }|||   ||	  c}      d   }| j                  |      d   }|j,                  |j,                  k(  sJ |j,                  |j,                  k(  sJ t/        j0                  ||      }t/        j0                  ||      }t/        j2                  | j4                  |z
        t/        j2                  || j6                  z
        z   }|j9                         }|	t              k  s|j;                           S c c}}
w # 1 sw Y   xY wc c}w c c}w # 1 sw Y   xY wc c}w )Nr   sentence_embeddingdim)device      )listkeyslennextitertorchno_gradr   evalranger   itemsappenddetachtraincateyer   r   pytorch_cos_simmaxstackshapeFcosine_similarityrelur   r   meanbackward)r   sentence_featureslabelsanchorpositivefeature_names
batch_sizeall_positive_emb	start_idxend_idxkvinput_mini_batchdiagonal_matrixkey
anchor_embhard_negative_features
cos_scoresnegative_scoresnegatives_maxnegatives_idshard_negative_idpositive_embnegative_emb
pos_cosine
neg_cosinelossess                              r   r   z(MegaBatchMarginLoss.forward_mini_batched_   s   ,V[[]+$tH~"678
]]_ 	JJOO"1j$2F2FG e	#d&:&::HPHX#Y1Aq7';$;#Y #Y ''

3C(DEY(Z(a(a(cde JJ	 !99%51=))C(8$93?O;PYiYpYpq q#&6"79M9MN (	"I$"6"66GTa$bSS&+i*H%H$bc$J :G%G#c2g%G"%G Q!11*>NO
_Yw%G!GG   05yya/P,}Q %2 X ( XC*3/66x}EU7VWXX % W.3kk:PQT:U.V&s+W  ::Xe&fQTsHSM)G,L'L&fg$L  ::&<=>RSL##|'9'9999##|'9'9999 ,,ZFJ,,ZFJVVD00:=>
UYUiUiHiAjjF[[]F Z(!Q(	"T e $Z		 	 %c &HQ Q 'gs>   AN,N
<AN!N
?
N<N#<N0
NN#N-	c                   |D cg c]  }| j                  |      d    }}|\  }}t        j                  ||      }t        j                  |      }|dt        j
                  |j                  d|j                  iz  z
  }	t        j                  |	d      \  }
}t        j                  | j                  |z
        t        j                  |
| j                  z
        z   }|j                         S c c}w )Nr   r   r   r    r   )r   r   r0   r&   diagonalr/   r3   r   r1   r4   r6   r   r   r7   )r   r9   r:   sentence_featurerepsembeddings_aembeddings_brI   positive_scoresrJ   rK   _rR   s                r   r   z,MegaBatchMarginLoss.forward_non_mini_batched   s    [lmGW

+,-ABmm%)"l)),E
..4$		:++FJ4E4EFF
 !99_!<q,,>?!&&Y]YmYmImBnn{{} ns   C0c                     y)Na  
@inproceedings{wieting-gimpel-2018-paranmt,
    title = "{P}ara{NMT}-50{M}: Pushing the Limits of Paraphrastic Sentence Embeddings with Millions of Machine Translations",
    author = "Wieting, John and Gimpel, Kevin",
    editor = "Gurevych, Iryna and Miyao, Yusuke",
    booktitle = "Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
    month = jul,
    year = "2018",
    address = "Melbourne, Australia",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/P18-1042",
    doi = "10.18653/v1/P18-1042",
    pages = "451--462",
}
 )r   s    r   citationzMegaBatchMarginLoss.citation   s    r   )g?g333333?T2   )r   r   r   floatr   r_   r   boolr   intreturnNone)r9   zIterable[dict[str, Tensor]]r:   r   rb   r   )rb   str)	__name__
__module____qualname__r   r   r   propertyr]   __classcell__)r   s   @r   r
   r
      s~     "%!$)-!Op"Op Op 	Op
 #'Op Op 
Opb<~  r   r
   )
__future__r   collections.abcr   r&   torch.nn.functionalr   
functionalr4   r   sentence_transformersr   )sentence_transformers.SentenceTransformerr   Moduler
   r\   r   r   <module>rq      s/    " $     & In")) nr   