U
    <Af                     @   s   d dl Z d dlZd dlmZmZmZmZmZmZ d dl	m
  mZ d dlmZm
Z
 d dlmZ d dlmZ d dlmZ G dd dZG d	d
 d
e
jZdS )    N)AnyDictIterableListOptionalUnion)Tensornn)SentenceTransformer)CachedGISTEmbedLoss)"CachedMultipleNegativesRankingLossc                   @   sX   e Zd ZddddZddddZeeddd	Zeeef eeef d
ddZ	dS )ForwardDecoratorNreturnc                 C   s"   || _ d | _g | _d | _d| _d S Nr   )fndimcache	cache_dimidx)selfr    r   O/tmp/pip-unpacked-wheel-i7fohqg6/sentence_transformers/losses/MatryoshkaLoss.py__init__   s
    zForwardDecorator.__init__c                 C   s   || _ d| _d S r   )r   r   )r   r   r   r   r   set_dim   s    zForwardDecorator.set_dim)tensorr   c                 C   sP   |j d }| j|kr*td| j d| |dd | jf }tj|ddd}|S )Nz
Dimension zL in matryoshka_dims cannot be greater than the model's embedding dimension: .   )pr   )shaper   
ValueErrorF	normalize)r   r   Z
tensor_dimr   r   r   shrink   s    

zForwardDecorator.shrink)featuresr   c                 C   sx   | j d ks| j | jkr6| |}| j| | j| _ n| j| j }| |d |d< | |d |d< |  jd7  _|S )NZtoken_embeddingsZsentence_embedding   )r   r   r   r   appendr   r#   )r   r$   outputr   r   r   __call__$   s    

zForwardDecorator.__call__)
__name__
__module____qualname__r   r   r   r#   r   strr(   r   r   r   r   r      s   
r   c                	       s   e Zd Zdeejee eee	e
ef   edd fddZeeeef  eedddZeeef d	d
dZeed	ddZ  ZS )MatryoshkaLossNr   )modellossmatryoshka_dimsmatryoshka_weightsn_dims_per_stepr   c                    s   t    || _|| _t|tr.tjddd t|trFtjddd |dkr\dgt	| }t
||}t
t|dd d	d
 \| _| _|| _dS )a  
        The MatryoshkaLoss can be seen as a loss *modifier* that allows you to use other loss functions at various
        different embedding dimensions. This is useful for when you want to train a model where users have the option
        to lower the embedding dimension to improve their embedding comparison speed and costs.

        Args:
            model: SentenceTransformer model
            loss: The loss function to be used, e.g.
                :class:`MultipleNegativesRankingLoss`,
                :class:`CoSENTLoss`, etc.
            matryoshka_dims: A list of embedding dimensions to be used
                for the loss function, e.g. [768, 512, 256, 128, 64].
            matryoshka_weights: A list of weights to be used for the
                loss function, e.g. [1, 1, 1, 1, 1]. If None, then the
                weights will be set to 1 for all dimensions.
            n_dims_per_step: The number of dimensions to use per step.
                If -1, then all dimensions are used. If > 0, then a
                random sample of n_dims_per_step dimensions are used per
                step. The default value is -1.

        References:
            - The concept was introduced in this paper: https://arxiv.org/abs/2205.13147
            - `Matryoshka Embeddings <../../examples/training/matryoshka/README.html>`_

        Requirements:
            1. The base loss cannot be :class:`CachedMultipleNegativesRankingLoss` or :class:`CachedGISTEmbedLoss`.

        Relations:
            - :class:`Matryoshka2dLoss` uses this loss in combination with :class:`AdaptiveLayerLoss` which allows for
                layer reduction for faster inference.

        Input:
            +---------------------------------------+--------+
            | Texts                                 | Labels |
            +=======================================+========+
            | any                                   | any    |
            +---------------------------------------+--------+

        Example:
            ::

                from sentence_transformers import SentenceTransformer, losses, InputExample
                from torch.utils.data import DataLoader

                model = SentenceTransformer("microsoft/mpnet-base")
                train_examples = [
                    InputExample(texts=['Anchor 1', 'Positive 1']),
                    InputExample(texts=['Anchor 2', 'Positive 2']),
                ]
                train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=32)
                train_loss = losses.MultipleNegativesRankingLoss(model=model)
                train_loss = losses.MatryoshkaLoss(model, train_loss, [768, 512, 256, 128, 64])
                model.fit(
                    [(train_dataloader, train_loss)],
                    epochs=10,
                )
        zIMatryoshkaLoss is not compatible with CachedMultipleNegativesRankingLoss.r   )
stacklevelz:MatryoshkaLoss is not compatible with CachedGISTEmbedLoss.Nr%   c                 S   s   | d S r   r   )xr   r   r   <lambda>       z)MatryoshkaLoss.__init__.<locals>.<lambda>T)keyreverse)superr   r.   r/   
isinstancer   warningswarnr   lenzipsortedr0   r1   r2   )r   r.   r/   r0   r1   r2   Zdims_weights	__class__r   r   r   4   s    A



zMatryoshkaLoss.__init__)sentence_featureslabelsr   c           
      C   s   | j j}zt|}|| j _tt| j}| jdkrN| jt|k rNt|| j}d}|D ]6}| j| }| j	| }	|
| ||	| || 7 }qVW 5 || j _X |S )Nr   g        )r.   forwardr   ranger=   r0   r2   randomsampler1   r   r/   )
r   rB   rC   Zoriginal_forwardZdecorated_forwardZdim_indicesr/   r   r   Zweightr   r   r   rD      s    



zMatryoshkaLoss.forwardr   c                 C   s   | j jj| j| j| jdS )N)r/   r0   r1   r2   )r/   rA   r)   r0   r1   r2   r   r   r   r   get_config_dict   s
    zMatryoshkaLoss.get_config_dictc                 C   s   dS )Na  
@misc{kusupati2024matryoshka,
    title={Matryoshka Representation Learning}, 
    author={Aditya Kusupati and Gantavya Bhatt and Aniket Rege and Matthew Wallingford and Aditya Sinha and Vivek Ramanujan and William Howard-Snyder and Kaifeng Chen and Sham Kakade and Prateek Jain and Ali Farhadi},
    year={2024},
    eprint={2205.13147},
    archivePrefix={arXiv},
    primaryClass={cs.LG}
}
r   rH   r   r   r   citation   s    zMatryoshkaLoss.citation)Nr   )r)   r*   r+   r
   r	   Moduler   intr   r   floatr   r   r   r,   r   rD   r   rI   propertyrJ   __classcell__r   r   r@   r   r-   3   s     Pr-   )rF   r;   typingr   r   r   r   r   r   Ztorch.nn.functionalr	   Z
functionalr!   Ztorchr   Zsentence_transformersr
   Z0sentence_transformers.losses.CachedGISTEmbedLossr   Z?sentence_transformers.losses.CachedMultipleNegativesRankingLossr   r   rK   r-   r   r   r   r   <module>   s    &