U
    DAf]                     @   sx   d dl mZ d dlmZmZmZmZmZ d dlm	Z	m
Z
mZ d dlmZmZ d dlmZ eeZe
G dd dZdS )	    )Path)AnyDictListOptionalUnion)Document	componentlogging)get_bytestream_from_sourcenormalize_metadata)
ByteStreamc                
   @   sp   e Zd ZdZdedddZejee	 ddee
eeef  ee
eeef eeeef  f  dd	d
ZdS )TextFileToDocumentai  
    Converts text files to Documents.

    Usage example:
    ```python
    from haystack.components.converters.txt import TextFileToDocument

    converter = TextFileToDocument()
    results = converter.run(sources=["sample.txt"])
    documents = results["documents"]
    print(documents[0].content)
    # 'This is the content from the txt file.'
    ```
    utf-8encodingc                 C   s
   || _ dS )a  
        Create a TextFileToDocument component.

        :param encoding:
            The encoding of the text files.
            Note that if the encoding is specified in the metadata of a source ByteStream,
            it will override this value.
        Nr   )selfr    r   F/tmp/pip-unpacked-wheel-z752163x/haystack/components/converters/txt.py__init__    s    	zTextFileToDocument.__init__)	documentsN)sourcesmetac                 C   s   g }t |t|d}t||D ]\}}zt|}W n: tk
rl } ztjd||d W Y qW 5 d}~X Y nX z |jd| j	}	|j
|	}
W n: tk
r } ztjd||d W Y qW 5 d}~X Y nX |j|}t|
|d}|| qd|iS )	a  
        Converts text files to Documents.

        :param sources:
            List of HTML file paths or ByteStream objects.
        :param meta:
            Optional metadata to attach to the Documents.
            This value can be either a list of dictionaries or a single dictionary.
            If it's a single dictionary, its content is added to the metadata of all produced Documents.
            If it's a list, the length of the list must match the number of sources, because the two lists will
            be zipped.
            If `sources` contains ByteStream objects, their `meta` will be added to the output Documents.

        :returns:
            A dictionary with the following keys:
            - `documents`: Created Documents
        )Zsources_countz4Could not read {source}. Skipping it. Error: {error})sourceerrorNr   zDCould not convert file {source}. Skipping it. Error message: {error})contentr   r   )r   lenzipr   	Exceptionloggerwarningr   getr   datadecoder   append)r   r   r   r   Z	meta_listr   metadataZ
bytestreamer   textZmerged_metadatadocumentr   r   r   run+   s,      
zTextFileToDocument.run)r   )N)__name__
__module____qualname____doc__strr   r	   Zoutput_typesr   r   r   r   r   r   r   r   r)   r   r   r   r   r      s    "r   N)pathlibr   typingr   r   r   r   r   Zhaystackr   r	   r
   Z$haystack.components.converters.utilsr   r   Zhaystack.dataclassesr   	getLoggerr*   r   r   r   r   r   r   <module>   s   
