U
    5Afq+                  	   @   s   d dl Z d dlZd dlZd dlmZmZmZ d dlZe	e
ejdddZde
eeee ddd	Zde
eee
 eeeeef ef  eee d
ddZde
ee
e
f edddZe
dddZdd ZdS )    N)OptionalTupleUnion)bpayloadsampling_ratereturnc                 C   s   | }d}d}dddd|d|d|d	d
ddg}z.t j|t jt jd}|| }W 5 Q R X W n, tk
r } ztd|W 5 d}~X Y nX |d }	t|	tj}
|
j	d dkrtd|
S )z?
    Helper function to read an audio file through ffmpeg.
    1f32leffmpeg-izpipe:0-ac-ar-f-hide_banner	-loglevelquietpipe:1)stdinstdoutzFffmpeg was not found but is required to load audio files from filenameNr   a  Soundfile is either not in the correct format or is malformed. Ensure that the soundfile has a valid audio file extension (e.g. wav, flac or mp3) and is not corrupted. If reading from a remote URL, ensure that the URL is the full address to **download** the audio file.)

subprocessPopenPIPEcommunicateFileNotFoundError
ValueErrornp
frombufferfloat32shape)r   r   aracformat_for_conversionffmpeg_commandffmpeg_processZoutput_streamerrorZ	out_bytesaudio r&   F/tmp/pip-unpacked-wheel-zw5xktn0/transformers/pipelines/audio_utils.pyffmpeg_read
   s:    r(   r	   )r   chunk_length_sr!   ffmpeg_input_devicec                 c   s   |  }d}|dkrd}n|dkr&d}nt d| dt }|dkrTd	}|pPd
}	n,|dkrjd}|pfd}	n|dkrd}|p~t }	dd|d|	d|d|d|ddddddg}
tt| | | }t|
|}|D ]
}|V  qdS )a  
    Helper function to read audio from a microphone using ffmpeg. The default input device will be used unless another
    input device is specified using the `ffmpeg_input_device` argument. Uses 'alsa' on Linux, 'avfoundation' on MacOS and
    'dshow' on Windows.

    Arguments:
        sampling_rate (`int`):
            The sampling_rate to use when reading the data from the microphone. Try using the model's sampling_rate to
            avoid resampling later.
        chunk_length_s (`float` or `int`):
            The length of the maximum chunk of audio to be sent returned.
        format_for_conversion (`str`, defaults to `f32le`):
            The name of the format of the audio samples to be returned by ffmpeg. The standard is `f32le`, `s16le`
            could also be used.
        ffmpeg_input_device (`str`, *optional*):
            The indentifier of the input device to be used by ffmpeg (i.e. ffmpeg's '-i' argument). If unset,
            the default input device will be used. See `https://www.ffmpeg.org/ffmpeg-devices.html#Input-Devices`
            for how to specify and list input devices.
    Returns:
        A generator yielding audio chunks of `chunk_length_s` seconds as `bytes` objects of length
        `int(round(sampling_rate * chunk_length_s)) * size_of_sample`.
    r   s16le   r	      Unhandled format ` `. Please use `s16le` or `f32le`LinuxZalsadefaultDarwinZavfoundationz:defaultWindowsdshowr
   r   r   r   r   z-fflagsZnobufferr   r   r   r   N)r   platformsystem_get_microphone_nameintround_ffmpeg_stream)r   r)   r!   r*   r   r    size_of_sampler6   Zformat_Zinput_r"   	chunk_leniteratoritemr&   r&   r'   ffmpeg_microphone1   sN    



r?   )r   r)   stream_chunk_sstride_length_sr!   r*   c                 c   sb  |dk	r|}n|}t | |||d}|dkr6tj}d}	n$|dkrJtj}d}	ntd| d|dkrj|d	 }tt| | |	 }
t|ttfr||g}tt| |d
  |	 }tt| |d  |	 }t	j	
 }t	j|d}t||
||fddD ]n}tj|d |d|d< |d d
 |	 |d d |	 f|d< | |d< ||7 }t	j	
 |d|  krVq|V  qdS )a	  
    Helper function to read audio from a microphone using ffmpeg. This will output `partial` overlapping chunks starting
    from `stream_chunk_s` (if it is defined) until `chunk_length_s` is reached. It will make use of striding to avoid
    errors on the "sides" of the various chunks. The default input device will be used unless another input device is
    specified using the `ffmpeg_input_device` argument. Uses 'alsa' on Linux, 'avfoundation' on MacOS and 'dshow' on Windows.

    Arguments:
        sampling_rate (`int`):
            The sampling_rate to use when reading the data from the microphone. Try using the model's sampling_rate to
            avoid resampling later.
        chunk_length_s (`float` or `int`):
            The length of the maximum chunk of audio to be sent returned. This includes the eventual striding.
        stream_chunk_s (`float` or `int`):
            The length of the minimal temporary audio to be returned.
        stride_length_s (`float` or `int` or `(float, float)`, *optional*):
            The length of the striding to be used. Stride is used to provide context to a model on the (left, right) of
            an audio sample but without using that part to actually make the prediction. Setting this does not change
            the length of the chunk.
        format_for_conversion (`str`, *optional*, defaults to `f32le`):
            The name of the format of the audio samples to be returned by ffmpeg. The standard is `f32le`, `s16le`
            could also be used.
        ffmpeg_input_device (`str`, *optional*):
            The identifier of the input device to be used by ffmpeg (i.e. ffmpeg's '-i' argument). If unset,
            the default input device will be used. See `https://www.ffmpeg.org/ffmpeg-devices.html#Input-Devices`
            for how to specify and list input devices.
    Return:
        A generator yielding dictionaries of the following form

        `{"sampling_rate": int, "raw": np.array(), "partial" bool}` With optionally a `"stride" (int, int)` key if
        `stride_length_s` is defined.

        `stride` and `raw` are all expressed in `samples`, and `partial` is a boolean saying if the current yield item
        is a whole chunk, or a partial temporary result to be later replaced by another larger chunk.
    N)r!   r*   r+   r,   r	   r-   r.   r/      r      )secondsT)stridestreamraw)dtyperE   r   
   )r?   r   Zint16r   r   r8   r9   
isinstancefloatdatetimenow	timedeltachunk_bytes_iterr   )r   r)   r@   rA   r!   r*   Zchunk_sZ
microphonerH   r;   r<   stride_leftstride_rightZ
audio_timedeltar>   r&   r&   r'   ffmpeg_microphone_live{   sF    *   
rS   F)r<   rE   rF   c           
      c   s   d}|\}}|| |kr2t d| d| d| d}| D ]}||7 }|rvt||k rv|df}|d| |ddV  q:t||kr:||f}|d| |d	}	|rd
|	d< |	V  |}||| | d }qvq:t||kr||dfd	}	|rd
|	d< |	V  dS )z
    Reads raw bytes from an iterator and does chunks of length `chunk_len`. Optionally adds `stride` to each chunks to
    get overlaps. `stream` is used to return partial results even if a full `chunk_len` is not yet available.
        z5Stride needs to be strictly smaller than chunk_len: (z, z) vs r   NT)rG   rE   partial)rG   rE   FrU   )r   len)
r=   r<   rE   rF   accrP   rQ   Z_stride_leftrG   r>   r&   r&   r'   rO      s2    rO   )buflenc              
   c   sv   d}z@t j| t j|d$}|j|}|dkr0q8|V  qW 5 Q R X W n, tk
rp } ztd|W 5 d}~X Y nX dS )zJ
    Internal function to create the generator of data through ffmpeg
    i   )r   bufsizerT   zHffmpeg was not found but is required to stream audio files from filenameN)r   r   r   r   readr   r   )r"   rX   rY   r#   rG   r$   r&   r&   r'   r:      s    r:   c                  C   s   dddddddg} z\t j| dt jd	d
}dd |j D }|rl|d dd }td|  d| W S W n tk
r   td Y nX dS )z3
    Retrieve the microphone name in Windows .
    r
   z-list_devicestruer   r4   r    Tzutf-8)textstderrencodingc                 S   s   g | ]}d |kr|qS )z(audio)r&   ).0liner&   r&   r'   
<listcomp>
  s      z(_get_microphone_name.<locals>.<listcomp>r   "rC   zUsing microphone: zaudio=zOffmpeg was not found. Please install it or make sure it is in your system PATH.r1   )r   runr   r^   
splitlinessplitprintr   )commandZffmpeg_devicesZmicrophone_linesZmicrophone_namer&   r&   r'   r7     s    r7   )r	   N)NNr	   N)F)rL   r5   r   typingr   r   r   Znumpyr   bytesr8   arrayr(   rK   strr?   rS   boolrO   r:   r7   r&   r&   r&   r'   <module>   s8   *  M    T#