U
    DAf                  	   @   s   d dl mZ d dlmZmZmZmZmZ d dlmZ d dl	m
Z
mZmZ d dlmZmZ d dlmZ d dlmZ edZd d	lmZ d d
lmZ W 5 Q R X eeZeG dd dZdS )    )Path)AnyDictListOptionalUnion)tqdm)Document	componentlogging)get_bytestream_from_sourcenormalize_metadata)
ByteStream)
LazyImportz+Run 'pip install markdown-it-py mdit_plain')
MarkdownIt)RendererPlainc                
   @   sr   e Zd ZdZdeedddZejee	 ddee
eeef  ee
eeef eeeef  f  d	d
dZdS )MarkdownToDocumenta  
    Converts a Markdown file into a text Document.

    Usage example:
    ```python
    from haystack.components.converters import MarkdownToDocument

    converter = MarkdownToDocument()
    results = converter.run(sources=["path/to/sample.md"], meta={"date_added": datetime.now().isoformat()})
    documents = results["documents"]
    print(documents[0].content)
    # 'This is a text from the markdown file.'
    ```
    FT)table_to_single_lineprogress_barc                 C   s   t   || _|| _dS )z
        Create a MarkdownToDocument component.

        :param table_to_single_line:
            If True converts table contents into a single line.
        :param progress_bar:
            If True shows a progress bar when running.
        N)markdown_conversion_importscheckr   r   )selfr   r    r   K/tmp/pip-unpacked-wheel-z752163x/haystack/components/converters/markdown.py__init__(   s    	zMarkdownToDocument.__init__)	documentsN)sourcesmetac                 C   s  t td}| jr|d g }t|t|d}tt||t|d| j dD ]\}}zt	|}W n: t
k
r }	 ztjd||	d W Y qLW 5 d}	~	X Y nX z|jd	}
||
}W n: t
k
r } ztjd
||d W Y qLW 5 d}~X Y nX |j|}t||d}|| qLd|iS )a  
        Converts a list of Markdown files to Documents.

        :param sources:
            List of file paths or ByteStream objects.
        :param meta:
            Optional metadata to attach to the Documents.
            This value can be either a list of dictionaries or a single dictionary.
            If it's a single dictionary, its content is added to the metadata of all produced Documents.
            If it's a list, the length of the list must match the number of sources, because the two lists will
            be zipped.
            If `sources` contains ByteStream objects, their `meta` will be added to the output Documents.

        :returns:
            A dictionary with the following keys:
            - `documents`: List of created Documents
        )Zrenderer_clstable)r   Zsources_countz&Converting markdown files to Documents)totaldescdisablez4Could not read {source}. Skipping it. Error: {error})sourceerrorNzutf-8zAFailed to extract text from {source}. Skipping it. Error: {error})contentr   r   )r   r   r   enabler   lenr   zipr   r   	Exceptionloggerwarningdatadecoderenderr   r	   append)r   r   r   parserr   Z	meta_listr"   metadataZ
bytestreameZfile_contenttextZconversion_eZmerged_metadatadocumentr   r   r   run6   s<    


zMarkdownToDocument.run)FT)N)__name__
__module____qualname____doc__boolr   r
   Zoutput_typesr   r	   r   strr   r   r   r   r   r4   r   r   r   r   r      s    "r   N)pathlibr   typingr   r   r   r   r   r   Zhaystackr	   r
   r   Z$haystack.components.converters.utilsr   r   Zhaystack.dataclassesr   Zhaystack.lazy_importsr   r   Zmarkdown_itr   Zmdit_plain.rendererr   	getLoggerr5   r)   r   r   r   r   r   <module>   s   

