U
    DAfE                     @   s:   d dl mZmZmZ d dlmZmZ eG dd dZdS )    )DictListOptional)Document	componentc                	   @   sh   e Zd ZdZd
ee ee dddZeje	e
 dde	e
 ee ee eee	e
 f ddd	ZdS )LostInTheMiddleRankera  
    A LostInTheMiddle Ranker.

    Ranks documents based on the 'lost in the middle' order so that the most relevant documents are either at the
    beginning or end, while the least relevant are in the middle.

    LostInTheMiddleRanker assumes that some prior component in the pipeline has already ranked documents by relevance
    and requires no query as input but only documents. It is typically used as the last component before building a
    prompt for an LLM to prepare the input context for the LLM.

    Lost in the Middle ranking lays out document contents into LLM context so that the most relevant contents are at
    the beginning or end of the input context, while the least relevant is in the middle of the context. See the
    paper ["Lost in the Middle: How Language Models Use Long Contexts"](https://arxiv.org/abs/2307.03172) for more
    details.

    Usage example:
    ```python
    from haystack.components.rankers import LostInTheMiddleRanker
    from haystack import Document

    ranker = LostInTheMiddleRanker()
    docs = [Document(content="Paris"), Document(content="Berlin"), Document(content="Madrid")]
    result = ranker.run(documents=docs)
    for doc in result["documents"]:
        print(doc.content)
    ```
    N)word_count_thresholdtop_kc                 C   sR   t |tr"|dkr"td| dt |trB|dkrBtd| || _|| _dS )aS  
        Initialize the LostInTheMiddleRanker.

        If 'word_count_threshold' is specified, this ranker includes all documents up until the point where adding
        another document would exceed the 'word_count_threshold'. The last document that causes the threshold to
        be breached will be included in the resulting list of documents, but all subsequent documents will be
        discarded.

        :param word_count_threshold: The maximum total number of words across all documents selected by the ranker.
        :param top_k: The maximum number of documents to return.
        r   (Invalid value for word_count_threshold: #. word_count_threshold must be > 0.top_k must be > 0, but got N)
isinstanceint
ValueErrorr   r	   )selfr   r	    r   R/tmp/pip-unpacked-wheel-z752163x/haystack/components/rankers/lost_in_the_middle.py__init__(   s    
zLostInTheMiddleRanker.__init__)	documents)r   r	   r   returnc           
         s~  t |tr"|dkr"td| dt |trB|dkrBtd| |sNdg iS |pV| j}|p`| j}|rr|d| n| t dkrd iS tdd	  D rtd
d}ttt }dg}|r d j	rt d j	
 }||krd d giS |dd D ]b}t|d t|d  }||| |r  | j	r |t | j	
 7 }||kr  qdq  fdd|D }	d|	iS )a  
        Reranks documents based on the "lost in the middle" order.

        :param documents: List of Documents to reorder.
        :param top_k: The maximum number of documents to return.
        :param word_count_threshold: The maximum total number of words across all documents selected by the ranker.
        :returns:
            A dictionary with the following keys:
            - `documents`: Reranked list of Documents

        :raises ValueError:
            If any of the documents is not textual.
        r   r
   r   r   r   N   c                 s   s   | ]}|j d k V  qdS )textN)content_type).0docr   r   r   	<genexpr>c   s     z,LostInTheMiddleRanker.run.<locals>.<genexpr>zUSome provided documents are not textual; LostInTheMiddleRanker can process only text.   c                    s   g | ]} | qS r   r   )r   idxZdocuments_to_reorderr   r   
<listcomp>   s     z-LostInTheMiddleRanker.run.<locals>.<listcomp>)r   r   r   r	   r   lenanylistrangecontentsplitinsert)
r   r   r	   r   Z
word_countZdocument_indexZlost_in_the_middle_indicesZdoc_idxZinsertion_indexZranked_docsr   r   r   run>   s>    




zLostInTheMiddleRanker.run)NN)NN)__name__
__module____qualname____doc__r   r   r   r   Zoutput_typesr   r   r   strr'   r   r   r   r   r   
   s        r   N)typingr   r   r   Zhaystackr   r   r   r   r   r   r   <module>   s   