U
    5Af                     @   s   d dl mZ d dlmZmZ ddlmZmZmZm	Z	m
Z
mZ ddlmZmZ e	 rjd dlmZ ddlmZ e rd d	lZdd
lmZ e rddlmZ ddlmZ e
eZeeddG dd deZd	S )    )UserDict)ListUnion   )add_end_docstringsis_tf_availableis_torch_availableis_vision_availableloggingrequires_backends   )Pipelinebuild_pipeline_init_args)Image)
load_imageN)6MODEL_FOR_ZERO_SHOT_IMAGE_CLASSIFICATION_MAPPING_NAMES)9TF_MODEL_FOR_ZERO_SHOT_IMAGE_CLASSIFICATION_MAPPING_NAMES)stable_softmaxT)Zhas_image_processorc                       sh   e Zd ZdZ fddZeeee ded f d fddZdd	 Z	dddZ
dd Zdd Z  ZS )#ZeroShotImageClassificationPipelineaL  
    Zero shot image classification pipeline using `CLIPModel`. This pipeline predicts the class of an image when you
    provide an image and a set of `candidate_labels`.

    Example:

    ```python
    >>> from transformers import pipeline

    >>> classifier = pipeline(model="google/siglip-so400m-patch14-384")
    >>> classifier(
    ...     "https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png",
    ...     candidate_labels=["animals", "humans", "landscape"],
    ... )
    [{'score': 0.965, 'label': 'animals'}, {'score': 0.03, 'label': 'humans'}, {'score': 0.005, 'label': 'landscape'}]

    >>> classifier(
    ...     "https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png",
    ...     candidate_labels=["black and white", "photorealist", "painting"],
    ... )
    [{'score': 0.996, 'label': 'black and white'}, {'score': 0.003, 'label': 'photorealist'}, {'score': 0.0, 'label': 'painting'}]
    ```

    Learn more about the basics of using a pipeline in the [pipeline tutorial](../pipeline_tutorial)

    This image classification pipeline can currently be loaded from [`pipeline`] using the following task identifier:
    `"zero-shot-image-classification"`.

    See the list of available models on
    [huggingface.co/models](https://huggingface.co/models?filter=zero-shot-image-classification).
    c                    s4   t  jf | t| d | | jdkr*tnt d S )NZvisiontf)super__init__r   Zcheck_model_type	frameworkr   r   )selfkwargs	__class__ Y/tmp/pip-unpacked-wheel-zw5xktn0/transformers/pipelines/zero_shot_image_classification.pyr   B   s    
z,ZeroShotImageClassificationPipeline.__init__r   )imagesc                    s   t  j|f|S )a  
        Assign labels to the image(s) passed as inputs.

        Args:
            images (`str`, `List[str]`, `PIL.Image` or `List[PIL.Image]`):
                The pipeline handles three types of images:

                - A string containing a http link pointing to an image
                - A string containing a local path to an image
                - An image loaded in PIL directly

            candidate_labels (`List[str]`):
                The candidate labels for this image

            hypothesis_template (`str`, *optional*, defaults to `"This is a photo of {}"`):
                The sentence used in cunjunction with *candidate_labels* to attempt the image classification by
                replacing the placeholder with the candidate_labels. Then likelihood is estimated by using
                logits_per_image

            timeout (`float`, *optional*, defaults to None):
                The maximum time in seconds to wait for fetching images from the web. If None, no timeout is set and
                the call may block forever.

        Return:
            A list of dictionaries containing result, one dictionary per proposed label. The dictionaries contain the
            following keys:

            - **label** (`str`) -- The label identified by the model. It is one of the suggested `candidate_label`.
            - **score** (`float`) -- The score attributed by the model for that label (between 0 and 1).
        )r   __call__)r   r   r   r   r   r   r    L   s    z,ZeroShotImageClassificationPipeline.__call__c                 K   sJ   i }d|kr|d |d< d|kr,|d |d< d|kr@|d |d< |i i fS )Ncandidate_labelstimeouthypothesis_templater   )r   r   Zpreprocess_paramsr   r   r   _sanitize_parametersm   s    z8ZeroShotImageClassificationPipeline._sanitize_parametersNThis is a photo of {}.c           	         s   t ||d}| j|g| jd}| jdkr4|| j}||d<  fdd|D }| jjjdkr`dnd	}| j|| j|d
}|g|d< |S )N)r"   )r   return_tensorsptr!   c                    s   g | ]}  |qS r   )format).0xr#   r   r   
<listcomp>~   s     zBZeroShotImageClassificationPipeline.preprocess.<locals>.<listcomp>siglip
max_lengthT)r&   paddingtext_inputs)	r   Zimage_processorr   toZtorch_dtypemodelconfig
model_type	tokenizer)	r   imager!   r#   r"   inputs	sequencesr/   r0   r   r+   r   
preprocessx   s    

z.ZeroShotImageClassificationPipeline.preprocessc                 C   sX   | d}| d}t|d tr,|d }n|d d }| jf ||}||jd}|S )Nr!   r0   r   )r!   logits)pop
isinstancer   r2   Zlogits_per_image)r   Zmodel_inputsr!   r0   outputsmodel_outputsr   r   r   _forward   s    


z,ZeroShotImageClassificationPipeline._forwardc                 C   s   | d}|d d }| jdkrX| jjjdkrXt|d}| }t	|t
s|g}nj| jdkr|jddd}| }t	|t
s|g}n4| jdkrt|dd	}|  }ntd
| j dd tt||dd dD }|S )Nr!   r:   r   r'   r-   )Zdimr   )ZaxiszUnsupported framework: c                 S   s   g | ]\}}||d qS ))scorelabelr   )r)   rA   Zcandidate_labelr   r   r   r,      s   zCZeroShotImageClassificationPipeline.postprocess.<locals>.<listcomp>c                 S   s
   | d  S )Nr   r   )r*   r   r   r   <lambda>       zAZeroShotImageClassificationPipeline.postprocess.<locals>.<lambda>)key)r;   r   r2   r3   r4   torchZsigmoidZsqueezetolistr<   listZsoftmaxr   Znumpy
ValueErrorsortedzip)r   r>   r!   r:   ZprobsZscoresresultr   r   r   postprocess   s(    




z/ZeroShotImageClassificationPipeline.postprocess)Nr%   N)__name__
__module____qualname____doc__r   r   strr   r    r$   r9   r?   rM   __classcell__r   r   r   r   r       s    
&!
r   )collectionsr   typingr   r   utilsr   r   r   r	   r
   r   baser   r   ZPILr   Zimage_utilsr   rF   Zmodels.auto.modeling_autor   Zmodels.auto.modeling_tf_autor   Ztf_utilsr   Z
get_loggerrN   loggerr   r   r   r   r   <module>   s    
