U
    DAf                  	   @   s   d dl Z d dlmZ d dlmZmZmZmZmZm	Z	m
Z
 d dlmZmZmZmZmZ d dlmZ d dlmZ d dlmZ edZd dlZW 5 Q R X eeZed	 ZeG d
d dZdS )    N)Path)AnyDictListLiteralOptionalUnionget_args)Document	componentdefault_from_dictdefault_to_dictlogging)
ByteStream)
LazyImport)ComponentDevicez@Run 'pip install "openai-whisper>=20231106"' to install whisper.)basezbase.enlargezlarge-v1zlarge-v2zlarge-v3Zmediumz	medium.ensmallzsmall.enZtinyztiny.enc                   @   s   e Zd ZdZdeee eeee	f  dddZ
dddd	Zeee	f dd
dZeeee	f d dddZejee ddeeeeef  eeee	f  dddZeeeeef  ee dddZeeeeef  eee	f dddZdS )LocalWhisperTranscriberaM  
    Transcribes audio files using OpenAI's Whisper model in your local machine.

    For the supported audio formats, languages, and other parameters, see the
    [Whisper API documentation](https://platform.openai.com/docs/guides/speech-to-text) and the official Whisper
    [github repository](https://github.com/openai/whisper).

    Usage example:
    ```python
    from haystack.components.audio import LocalWhisperTranscriber

    whisper = LocalWhisperTranscriber(model="small")
    whisper.warm_up()
    transcription = whisper.run(audio_files=["path/to/audio/file"])
    ```
    r   Nmodeldevicewhisper_paramsc                 C   sZ   t   |ttkr4td| ddtt d|| _|p@i | _t	|| _
d| _dS )a  
        Creates an instance of the LocalWhisperTranscriber component.

        :param model:
            Name of the model to use. Set it to one of the following values:
        :type model:
            Literal["tiny", "small", "medium", "large", "large-v2"]
        :param device:
            The device on which the model is loaded. If `None`, the default device is automatically selected.
        zModel name 'z$' not recognized. Choose one among: z, .N)whisper_importcheckr	   WhisperLocalModel
ValueErrorjoinr   r   r   Zresolve_devicer   _model)selfr   r   r    r"   K/tmp/pip-unpacked-wheel-z752163x/haystack/components/audio/whisper_local.py__init__6   s    
z LocalWhisperTranscriber.__init__)returnc                 C   s"   | j stj| j| j d| _ dS )z,
        Loads the model in memory.
        )r   N)r    whisperZ
load_modelr   r   Zto_torchr!   r"   r"   r#   warm_upP   s    zLocalWhisperTranscriber.warm_upc                 C   s   t | | j| j | jdS )z{
        Serializes the component to a dictionary.

        :returns:
            Dictionary with serialized data.
        r   )r   r   r   to_dictr   r'   r"   r"   r#   r)   W   s    zLocalWhisperTranscriber.to_dict)datar%   c                 C   s2   |d }| ddk	r(t|d |d< t| |S )z
        Deserializes the component from a dictionary.

        :param data:
            The dictionary to deserialize from.
        :returns:
            The deserialized component.
        Zinit_parametersr   N)getr   	from_dictr   )clsr*   Zinit_paramsr"   r"   r#   r,   `   s    
z!LocalWhisperTranscriber.from_dict)	documents)sourcesr   c                 C   s6   | j dkrtd|dkr | j}| j|f|}d|iS )a6  
        Transcribes the audio files into a list of Documents, one for each input file.

        For the supported audio formats, languages, and other parameters, see the
        [Whisper API documentation](https://platform.openai.com/docs/guides/speech-to-text) and the official Whisper
        [github repo](https://github.com/openai/whisper).

        :param audio_files:
            A list of paths or binary streams to transcribe.

        :returns: A dictionary with the following keys:
            - `documents`: A list of Documents, one for each file. The content of the document is the transcription
                text, while the document's metadata contains the values returned by the Whisper model, such as the
                alignment data and the path to the audio file used for the transcription.
        Nz`The component LocalWhisperTranscriber was not warmed up. Run 'warm_up()' before calling 'run()'.r.   )r    RuntimeErrorr   
transcribe)r!   r/   r   r.   r"   r"   r#   runo   s    
zLocalWhisperTranscriber.run)r/   r%   c           	      K   sP   | j |f|}g }| D ]0\}}|d}t|d|i|d}|| q|S )a  
        Transcribes the audio files into a list of Documents, one for each input file.

        For the supported audio formats, languages, and other parameters, see the
        [Whisper API documentation](https://platform.openai.com/docs/guides/speech-to-text) and the official Whisper
        [github repo](https://github.com/openai/whisper).

        :param audio_files:
            A list of paths or binary streams to transcribe.
        :returns:
            A list of Documents, one for each file.
        textZ
audio_file)contentmeta)_raw_transcribeitemspopr
   append)	r!   r/   kwargstranscriptionsr.   pathZ
transcriptr4   docr"   r"   r#   r1      s    
z"LocalWhisperTranscriber.transcribec           	   
   K   s   | j dkrtd|dd}i }|D ]}t|ts<t|n
|jd}t|tr|dkrtj	dd}t|j
}|| W 5 Q R X | j jt|f|}|s|dd |||< q&|S )a'  
        Transcribes the given audio files. Returns the output of the model, a dictionary, for each input file.

        For the supported audio formats, languages, and other parameters, see the
        [Whisper API documentation](https://platform.openai.com/docs/guides/speech-to-text) and the official Whisper
        [github repo](https://github.com/openai/whisper).

        :param audio_files:
            A list of paths or binary streams to transcribe.
        :returns:
            A dictionary mapping 'file_path' to 'transcription'.
        NzBModel is not loaded, please run 'warm_up()' before calling 'run()'return_segmentsF	file_path)deletesegments)r    r0   r8   
isinstancer   r   r5   r+   tempfileNamedTemporaryFilenameZto_filer1   str)	r!   r/   r:   r>   r;   sourcer<   fpZtranscriptionr"   r"   r#   r6      s    


z'LocalWhisperTranscriber._raw_transcribe)r   NN)N)__name__
__module____qualname____doc__r   r   r   r   rF   r   r$   r(   r)   classmethodr,   r   Zoutput_typesr   r
   r   r   r   r2   r1   r6   r"   r"   r"   r#   r   #   s"      	."r   )rC   pathlibr   typingr   r   r   r   r   r   r	   Zhaystackr
   r   r   r   r   Zhaystack.dataclassesr   Zhaystack.lazy_importsr   Zhaystack.utilsr   r   r&   	getLoggerrI   loggerr   r   r"   r"   r"   r#   <module>   s   $

