U
    DAf                  	   @   s   d dl Z d dlmZ d dlmZmZmZmZmZ d dl	m
Z
mZmZ d dlmZmZ d dlmZ d dlmZ edZd d	lmZ W 5 Q R X eeZeG d
d dZdS )    N)Path)AnyDictListOptionalUnion)Document	componentlogging)get_bytestream_from_sourcenormalize_metadata)
ByteStream)
LazyImportzRun 'pip install python-pptx')Presentationc                
   @   sz   e Zd ZdZdd ZejedddZe	j
ee ddeeeeef  eeeeef eeeef  f  d	d
dZdS )PPTXToDocumenta  
    Converts PPTX files to Documents.

    Usage example:
    ```python
    from haystack.components.converters.pptx import PPTXToDocument

    converter = PPTXToDocument()
    results = converter.run(sources=["sample.pptx"], meta={"date_added": datetime.now().isoformat()})
    documents = results["documents"]
    print(documents[0].content)
    # 'This is the text from the PPTX file.'
    ```
    c                 C   s   t   dS )z5
        Create an PPTXToDocument component.
        N)pptx_importcheck)self r   G/tmp/pip-unpacked-wheel-z752163x/haystack/components/converters/pptx.py__init__&   s    zPPTXToDocument.__init__)file_contentreturnc                 C   s\   t |}g }|jD ]:}g }|jD ]}t|dr ||j q |d| qd|}|S )z1
        Converts the PPTX file to text.
        text
)r   ZslidesZshapeshasattrappendr   join)r   r   Zpptx_presentationZtext_all_slidesZslideZtext_on_slideshaper   r   r   r   _convert,   s    



zPPTXToDocument._convert)	documentsN)sourcesmetac                 C   s   g }t |t|d}t||D ]\}}zt|}W n: tk
rl } ztjd||d W Y qW 5 d}~X Y nX z| t	|j
}	W n: tk
r } ztjd||d W Y qW 5 d}~X Y nX |j|}
|t|	|
d qd|iS )a  
        Converts PPTX files to Documents.

        :param sources:
            List of file paths or ByteStream objects.
        :param meta:
            Optional metadata to attach to the Documents.
            This value can be either a list of dictionaries or a single dictionary.
            If it's a single dictionary, its content is added to the metadata of all produced Documents.
            If it's a list, the length of the list must match the number of sources, because the two lists will
            be zipped.
            If `sources` contains ByteStream objects, their `meta` will be added to the output Documents.

        :returns:
            A dictionary with the following keys:
            - `documents`: Created Documents
        )Zsources_countz4Could not read {source}. Skipping it. Error: {error})sourceerrorNzECould not read {source} and convert it to Document, skipping. {error})contentr#   r!   )r   lenzipr   	Exceptionloggerwarningr    ioBytesIOdatar#   r   r   )r   r"   r#   r!   Z	meta_listr$   metadataZ
bytestreamer   Zmerged_metadatar   r   r   run;   s(      
zPPTXToDocument.run)N)__name__
__module____qualname____doc__r   r,   r-   strr    r	   Zoutput_typesr   r   r   r   r   r   r   r   r1   r   r   r   r   r      s    "r   )r,   pathlibr   typingr   r   r   r   r   Zhaystackr   r	   r
   Z$haystack.components.converters.utilsr   r   Zhaystack.dataclassesr   Zhaystack.lazy_importsr   r   Zpptxr   	getLoggerr2   r*   r   r   r   r   r   <module>   s   

