U
    <A·f²  ã                   @   s`   d dl Z d dlZd dlZd dlmZmZ d dlZd dlmZmZ e 	e
¡ZG dd„ dejƒZdS )é    N)ÚDictÚList)ÚTensorÚnnc                       sj   e Zd ZdZdee eeef edœ‡ fdd„Zeee	f dœdd„Z
d	d
„ Zdd„ Zedd„ ƒZ‡  ZS )ÚWordWeightszDThis model can weight word embeddings, for example, with idf-values.é   )ÚvocabÚword_weightsÚunknown_word_weightc                    sÊ   t t| ƒ ¡  dddg| _|| _|| _|| _g }d}|D ]F}|}||krR|| }n"| ¡ |krl|| ¡  }n|d7 }| |¡ q8t	 
d |t|ƒ|¡¡ t t|ƒd¡| _| j dt |¡ d¡i¡ dS )	aZ  
        Initializes the WordWeights class.

        Args:
            vocab (List[str]): Vocabulary of the tokenizer.
            word_weights (Dict[str, float]): Mapping of tokens to a float weight value. Word embeddings are multiplied
                by this float value. Tokens in word_weights must not be equal to the vocab (can contain more or less values).
            unknown_word_weight (float, optional): Weight for words in vocab that do not appear in the word_weights lookup.
                These can be, for example, rare words in the vocab where no weight exists. Defaults to 1.
        r   r	   r
   r   r   z:{} of {} words without a weighting value. Set weight to {}ÚweightN)Úsuperr   Ú__init__Úconfig_keysr   r	   r
   ÚlowerÚappendÚloggerÚinfoÚformatÚlenr   Z	EmbeddingÚ	emb_layerZload_state_dictÚtorchZFloatTensorÚ	unsqueeze)Úselfr   r	   r
   ÚweightsZnum_unknown_wordsÚwordr   ©Ú	__class__© úL/tmp/pip-unpacked-wheel-i7fohqg6/sentence_transformers/models/WordWeights.pyr      s0    
  ÿÿzWordWeights.__init__)Úfeaturesc                 C   sl   |d }|d }|   |d ¡ d¡}|| ¡  }t |d¡}| d¡ | ¡ ¡}|| }| ||dœ¡ |S )NÚattention_maskÚtoken_embeddingsZ	input_idséÿÿÿÿr   )r!   Útoken_weights_sum)	r   ZsqueezeÚfloatr   Úsumr   ÚexpandÚsizeÚupdate)r   r   r    r!   Ztoken_weights_rawZtoken_weightsr#   Ztoken_weights_expandedr   r   r   Úforward5   s    zWordWeights.forwardc                    s   ‡ fdd„ˆ j D ƒS )Nc                    s   i | ]}|ˆ j | “qS r   )Ú__dict__)Ú.0Úkey©r   r   r   Ú
<dictcomp>F   s      z/WordWeights.get_config_dict.<locals>.<dictcomp>)r   r-   r   r-   r   Úget_config_dictE   s    zWordWeights.get_config_dictc              	   C   s8   t tj |d¡dƒ}tj|  ¡ |dd W 5 Q R X d S )Núconfig.jsonÚwé   )Úindent)ÚopenÚosÚpathÚjoinÚjsonÚdumpr/   )r   Zoutput_pathZfOutr   r   r   ÚsaveH   s    zWordWeights.savec              	   C   s2   t tj | d¡ƒ}t |¡}W 5 Q R X tf |ŽS )Nr0   )r4   r5   r6   r7   r8   Úloadr   )Z
input_pathZfInÚconfigr   r   r   r;   L   s    zWordWeights.load)r   )Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   Ústrr   r$   r   r   r)   r/   r:   Ústaticmethodr;   Ú__classcell__r   r   r   r   r      s   $&r   )r8   Úloggingr5   Útypingr   r   r   r   r   Ú	getLoggerr=   r   ÚModuler   r   r   r   r   Ú<module>   s   
