U
    5Af0                     @   s*  d dl Z d dlZd dlZd dlmZmZmZmZmZm	Z	m
Z
mZmZ eeeeeeddfZedejZedejZedejejB ZedejZG dd	 d	eZG d
d deZeeeef dddZeedddZeedddZeeee ee ee f dddZ eedddZ!dS )    N)	AnyCallableDictOptionalTupleUnionget_args
get_originget_type_hints.z(^(.*?)[\n\s]*(Args:|Returns:|Raises:|\Z)z0\n\s*Args:\n\s*(.*?)[\n\s]*(Returns:|Raises:|\Z)a1  
(?:^|\n)  # Match the start of the args block, or a newline
\s*(\w+):\s*  # Capture the argument name and strip spacing
(.*?)\s*  # Capture the argument description, which can span multiple lines, and strip trailing spacing
(?=\n\s*\w+:|\Z)  # Stop when you hit the next argument or the end of the block
z*\n\s*Returns:\n\s*(.*?)[\n\s]*(Raises:|\Z)c                   @   s   e Zd ZdZdS )TypeHintParsingExceptionzJException raised for errors in parsing type hints to generate JSON schemasN__name__
__module____qualname____doc__ r   r   J/tmp/pip-unpacked-wheel-zw5xktn0/transformers/utils/chat_template_utils.pyr   (   s   r   c                   @   s   e Zd ZdZdS )DocstringParsingExceptionzJException raised for errors in parsing docstrings to generate JSON schemasNr   r   r   r   r   r   .   s   r   )
param_typereturnc              
   C   s8   t dditdditdditdditi i}|| ddiS )Ntypeintegernumberstringbooleanobject)intfloatstrboolr   get)r   Ztype_mappingr   r   r   _get_json_schema_type4   s         r!   )hintr   c                 C   s  t | }t| }|d krFz
t| W S  tk
r@   td| Y nX n:|tkrdd |D }t|dkrr|d }n2tdd |D rdtd	d |D i}nd
|i}t	d |krd|d< |S |t
kr|sddiS dt|d dS n|tkrL| sddiS t|dkr&tdt| dd dd|kr8tdddd |D dS |tkrddi}t|dkr|t|d |d< |S td| d S )NzGCouldn't parse this type hint, likely due to a custom class or object: c                 S   s    g | ]}|t d k	rt|qS )N)r   _parse_type_hint.0tr   r   r   
<listcomp>M   s      z$_parse_type_hint.<locals>.<listcomp>   r   c                 s   s   | ]}t |d  tV  qdS )r   N)
isinstancer   r%   subtyper   r   r   	<genexpr>Q   s     z#_parse_type_hint.<locals>.<genexpr>r   c                 S   s   g | ]}|d  qS )r   r   r*   r   r   r   r'   S   s     ZanyOfTZnullablearray)r   itemszThe type hint ztyping. a1   is a Tuple with a single element, which we do not automatically convert to JSON schema as it is rarely necessary. If this input can contain more than one element, we recommend using a List[] type instead, or if it really is a single element, remove the Tuple[] wrapper and just pass the element directly..znConversion of '...' is not supported in Tuple type hints. Use List[] types for variable-length inputs instead.c                 S   s   g | ]}t |qS r   )r#   r$   r   r   r   r'   s   s     )r   ZprefixItemsr      ZadditionalProperties)r	   r   r!   KeyErrorr   r   lenallsortedr   listr#   tupler   replacedict)r"   originargsZsubtypesreturn_dictoutr   r   r   r#   ?   sT    
 



r#   )funcr   c           	      C   s   t | }t| }g }|j D ]F\}}|jtjjkrNtd|j	 d| j
 |jtjjkr || q i }| D ]\}}t|||< qtd|d}|r||d< |S )Nz	Argument z$ is missing a type hint in function r   )r   
propertiesrequired)r
   inspect	signature
parametersr.   
annotation	Parameteremptyr   namer   defaultappendr#   )	r=   Z
type_hintsrA   r?   
param_nameparamr>   r   schemar   r   r   "_convert_type_hints_to_json_schema   s    

rL   )	docstringr   c           	      C   s   t | }t| }t| }|r0|d nd}|rF|d nd}|r\|d nd}|dk	rddd |dD }t	|}dd |D }ni }|||fS )a  
    Parses a Google-style docstring to extract the function description,
    argument descriptions, and return description.

    Args:
        docstring (str): The docstring to parse.

    Returns:
        The function description, arguments, and return description.
    r(   N
c                 S   s   g | ]}|  r|qS r   strip)r%   liner   r   r   r'      s      z1parse_google_format_docstring.<locals>.<listcomp>c              	   S   s(   i | ] }|d  t dd|d  qS )r   z	\s*\n+\s* r(   )resubrP   )r%   matchr   r   r   
<dictcomp>   s      z1parse_google_format_docstring.<locals>.<dictcomp>)
description_researchargs_re
returns_regrouprP   joinsplitargs_split_refindall)	rM   Zdescription_matchZ
args_matchZreturns_matchdescriptionZdocstring_argsZreturnsmatchesZ	args_dictr   r   r   parse_google_format_docstring   s    



rb   c                 C   s   t | }|s td| j d| }t|\}}}t| }|d dd }dk	rf|dk	rf||d< |d  D ]\}}||krtd| j d| d|| }	t	j
d	|	t	jd
}
|
rdd t|
dD |d< |
jd|
   }	|	|d< qr| j||d}|dk	r||d< d|dS )a  
    This function generates a JSON schema for a given function, based on its docstring and type hints. This is
    mostly used for passing lists of tools to a chat template. The JSON schema contains the name and description of
    the function, as well as the names, types and descriptions for each of its arguments. `get_json_schema()` requires
    that the function has a docstring, and that each argument has a description in the docstring, in the standard
    Google docstring format shown below. It also requires that all the function arguments have a valid Python type hint.

    Although it is not required, a `Returns` block can also be added, which will be included in the schema. This is
    optional because most chat templates ignore the return value of the function.

    Args:
        func: The function to generate a JSON schema for.

    Returns:
        A dictionary containing the JSON schema for the function.

    Examples:
    ```python
    >>> def multiply(x: float, y: float):
    >>>    '''
    >>>    A function that multiplies two numbers
    >>>
    >>>    Args:
    >>>        x: The first number to multiply
    >>>        y: The second number to multiply
    >>>    '''
    >>>    return x * y
    >>>
    >>> print(get_json_schema(multiply))
    {
        "name": "multiply",
        "description": "A function that multiplies two numbers",
        "parameters": {
            "type": "object",
            "properties": {
                "x": {"type": "number", "description": "The first number to multiply"},
                "y": {"type": "number", "description": "The second number to multiply"}
            },
            "required": ["x", "y"]
        }
    }
    ```

    The general use for these schemas is that they are used to generate tool descriptions for chat templates that
    support them, like so:

    ```python
    >>> from transformers import AutoTokenizer
    >>> from transformers.utils import get_json_schema
    >>>
    >>> def multiply(x: float, y: float):
    >>>    '''
    >>>    A function that multiplies two numbers
    >>>
    >>>    Args:
    >>>        x: The first number to multiply
    >>>        y: The second number to multiply
    >>>    return x * y
    >>>    '''
    >>>
    >>> multiply_schema = get_json_schema(multiply)
    >>> tokenizer = AutoTokenizer.from_pretrained("CohereForAI/c4ai-command-r-v01")
    >>> messages = [{"role": "user", "content": "What is 179 x 4571?"}]
    >>> formatted_chat = tokenizer.apply_chat_template(
    >>>     messages,
    >>>     tools=[multiply_schema],
    >>>     chat_template="tool_use",
    >>>     return_dict=True,
    >>>     return_tensors="pt",
    >>>     add_generation_prompt=True
    >>> )
    >>> # The formatted chat can now be passed to model.generate()
    ```

    Each argument description can also have an optional `(choices: ...)` block at the end, such as
    `(choices: ["tea", "coffee"])`, which will be parsed into an `enum` field in the schema. Note that this will
    only be parsed correctly if it is at the end of the line:

    ```python
    >>> def drink_beverage(beverage: str):
    >>>    '''
    >>>    A function that drinks a beverage
    >>>
    >>>    Args:
    >>>        beverage: The beverage to drink (choices: ["tea", "coffee"])
    >>>    '''
    >>>    pass
    >>>
    >>> print(get_json_schema(drink_beverage))
    ```
    {
        'name': 'drink_beverage',
        'description': 'A function that drinks a beverage',
        'parameters': {
            'type': 'object',
            'properties': {
                'beverage': {
                    'type': 'string',
                    'enum': ['tea', 'coffee'],
                    'description': 'The beverage to drink'
                    }
                },
            'required': ['beverage']
        }
    }
    z Cannot generate JSON schema for z because it has no docstring!r>   r   Nr`   z< because the docstring has no description for the argument ''z\(choices:\s*(.*?)\)\s*$)flagsc                 S   s   g | ]}|  qS r   rO   )r%   cr   r   r   r'   5  s     z#get_json_schema.<locals>.<listcomp>r(   enum)rF   r`   rB   function)r   rg   )r@   getdocr   r   rP   rb   rL   popr.   rS   rX   
IGNORECASEjsonloadsr[   r   start)r=   docZmain_docZparam_descriptionsZ
return_docZjson_schemar;   argrK   descZenum_choicesoutputr   r   r   get_json_schema   s4    k


rr   )"r@   rk   rS   typingr   r   r   r   r   r   r   r	   r
   r   r   r   r   r   ZBASIC_TYPEScompileDOTALLrW   rY   VERBOSEr^   rZ   	Exceptionr   r   r!   r#   rL   rb   rr   r   r   r   r   <module>   s$   ,

A&!