U
    @fp{                  	   @  s  U d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	m
Z
mZ d dlmZmZmZmZmZmZmZmZmZ erd dlZd dlmZ edZnedZedZedZeeegef ZG d	d
 d
Z G dd de Z!G dd de Z"G dd de Z#G dd de Z$G dd de Z%G dd de Z&G dd de Z'G dd deeef Z(G dd de Z)de iZ*de+d< d&d d!d"d#d$d%Z,e e!e%e"e$e#e&e'e)f	D ]Z-e,e- qdS )'    )annotationsN)FutureThreadPoolExecutor)	TYPE_CHECKINGAnyCallableClassVarGeneric
NamedTupleOptionalOrderedDictTypeVar)	ParamSpecPTZfsspecc                   @  sn   e Zd ZU dZdZded< dddddd	d
ZddddddZddddZddddZ	ddddZ
dS )	BaseCacheag  Pass-though cache: doesn't keep anything, calls every time

    Acts as base class for other cachers

    Parameters
    ----------
    blocksize: int
        How far to read ahead in numbers of bytes
    fetcher: func
        Function of the form f(start, end) which gets bytes from remote as
        specified
    size: int
        How big this file is
    noneClassVar[str]nameintFetcherNone	blocksizefetchersizereturnc                 C  s.   || _ d| _|| _|| _d| _d| _d| _d S Nr   )r   nblocksr   r   	hit_count
miss_counttotal_requested_bytesselfr   r   r    r$   2/tmp/pip-unpacked-wheel-yjyh6wby/fsspec/caching.py__init__:   s    zBaseCache.__init__
int | Nonebytesstartstopr   c                 C  s<   |d krd}|d kr| j }|| j ks,||kr0dS | ||S )Nr       )r   r   r#   r*   r+   r$   r$   r%   _fetchD   s    zBaseCache._fetchr   c                 C  s   d| _ d| _d| _dS )zAReset hit and miss counts for a more ganular report e.g. by file.r   N)r   r    r!   r#   r$   r$   r%   _reset_statsM   s    zBaseCache._reset_statsstrc                 C  s0   | j dkr| jdkrdS d| j| j | j| jf S )z2Return a formatted string of the cache statistics.r    z3 , %s: %d hits, %d misses, %d total requested bytes)r   r    r   r!   r0   r$   r$   r%   
_log_statsS   s    zBaseCache._log_statsc                 C  s@   d| j j d| j d| j d| j d| j d| j d| j dS )	Nz

        <z:
            block size  :   z
            block count :   z
            file size   :   z
            cache hits  :   z
            cache misses:   z$
            total requested bytes: z
>
        )	__class____name__r   r   r   r   r    r!   r0   r$   r$   r%   __repr___   s    zBaseCache.__repr__N)r6   
__module____qualname____doc__r   __annotations__r&   r.   r1   r4   r7   r$   r$   r$   r%   r   (   s   

	r   c                      st   e Zd ZdZdZdddddddd	 fd
dZddddZddddddZddddZdddddZ	  Z
S )	MMapCachezmemory-mapped sparse file cache

    Opens temporary file, which is filled blocks-wise when data is requested.
    Ensure there is enough disc space in the temporary location.

    This cache method might only work on posix
    mmapNr   r   z
str | Nonezset[int] | Noner   )r   r   r   locationblocksr   c                   s8   t  ||| |d krt n|| _|| _|  | _d S N)superr&   setr?   r>   	_makefilecache)r#   r   r   r   r>   r?   r5   r$   r%   r&   w   s    zMMapCache.__init__zmmap.mmap | bytearrayr/   c                 C  s   dd l }dd l}| jdkr t S | jd ks8tj| js| jd krT| }t	 | _
nt| jd}|| jd  |d |  nt| jd}| | | jS )Nr   zwb+      1zr+b)r=   tempfiler   	bytearrayr>   ospathexistsTemporaryFilerB   r?   openseekwriteflushfileno)r#   r=   rH   fdr$   r$   r%   rC      s    




zMMapCache._makefiler'   r(   r*   endr   c           
   	     sD  t d| d|  |d kr"d}|d kr0 j}| jksB||krFdS | j }| j } fddt||d D } fddt||d D }  jt|7  _  jt|7  _|r6|d}| j }t	| j  j}	  j
|	| 7  _
t d	| d
| d|	 d  ||	 j||	<  j| q j|| S )NzMMap cache fetching -r   r,   c                   s   g | ]}| j kr|qS r$   r?   .0ir0   r$   r%   
<listcomp>   s     
 z$MMapCache._fetch.<locals>.<listcomp>rF   c                   s   g | ]}| j kr|qS r$   rW   rX   r0   r$   r%   r[      s     
 zMMap get block #z ())loggerdebugr   r   ranger    lenr   popminr!   r   rD   r?   add)
r#   r*   rU   Zstart_blockZ	end_blockZneedhitsrZ   Zsstartsendr$   r0   r%   r.      s,    



zMMapCache._fetchdict[str, Any]c                 C  s   | j  }|d= |S )NrD   )__dict__copyr#   stater$   r$   r%   __getstate__   s    
zMMapCache.__getstate__rj   r   c                 C  s   | j | |  | _d S r@   )rg   updaterC   rD   ri   r$   r$   r%   __setstate__   s    zMMapCache.__setstate__)NN)r6   r8   r9   r:   r   r&   rC   r.   rk   rn   __classcell__r$   r$   rE   r%   r<   l   s     r<   c                      sB   e Zd ZdZdZddddd fddZd	d	d
dddZ  ZS )ReadAheadCachea!  Cache which reads only when we get beyond a block of data

    This is a much simpler version of BytesCache, and does not attempt to
    fill holes in the cache or keep fragments alive. It is best suited to
    many small reads in a sequential order (e.g., reading lines from a file).
    Z	readaheadr   r   r   r   c                   s&   t  ||| d| _d| _d| _d S )Nr,   r   )rA   r&   rD   r*   rU   r"   rE   r$   r%   r&      s    zReadAheadCache.__init__r'   r(   rT   c                 C  s>  |d krd}|d ks|| j kr$| j }|| j ks6||kr:dS || }|| jkr~|| jkr~|  jd7  _| j|| j || j  S | j|  kr| jk rn n6|  jd7  _| j|| j d  }|t|8 }| j}n|  jd7  _d}t| j || j }|  j	|| 7  _	| 
||| _|| _| jt| j | _|| jd |  S Nr   r,   rF   )r   r*   rU   r   rD   r    r`   rb   r   r!   r   )r#   r*   rU   lpartr$   r$   r%   r.      s.    zReadAheadCache._fetchr6   r8   r9   r:   r   r&   r.   ro   r$   r$   rE   r%   rp      s   rp   c                      sB   e Zd ZdZdZddddd fddZd	d	d
dddZ  ZS )FirstChunkCachezCaches the first block of a file only

    This may be useful for file types where the metadata is stored in the header,
    but is randomly accessed.
    firstr   r   r   r   c                   s&   ||kr|}t  ||| d | _d S r@   )rA   r&   rD   r"   rE   r$   r%   r&      s    zFirstChunkCache.__init__r'   r(   rT   c                 C  s0  |pd}|| j kr td dS t|| j }|| jk r | jd kr|  jd7  _|| jkr|  j|7  _| d|}|d | j | _||d  S | d| j| _|  j| j7  _| j|| }|| jkr|  j|| j 7  _|| | j|7 }|  j	d7  _	|S |  jd7  _|  j|| 7  _| ||S d S )Nr   z,FirstChunkCache: requested start > file sizer,   rF   )
r   r]   r^   rb   r   rD   r    r!   r   r   )r#   r*   rU   datars   r$   r$   r%   r.      s0    




zFirstChunkCache._fetchrt   r$   r$   rE   r%   ru      s   ru   c                      s   e Zd ZdZdZddddddd fdd	Zd
d ZddddZdddddZddddddZ	ddd fddZ
ddddddddZ  ZS )
BlockCachea  
    Cache holding memory as a set of blocks.

    Requests are only ever made ``blocksize`` at a time, and are
    stored in an LRU cache. The least recently accessed block is
    discarded when more than ``maxblocks`` are stored.

    Parameters
    ----------
    blocksize : int
        The number of bytes to store in each block.
        Requests are only ever made for ``blocksize``, so this
        should balance the overhead of making a request against
        the granularity of the blocks.
    fetcher : Callable
    size : int
        The total size of the file being cached.
    maxblocks : int
        The maximum number of blocks to cache for. The maximum memory
        use for this cache is then ``blocksize * maxblocks``.
    Z
blockcache    r   r   r   r   r   r   	maxblocksr   c                   s<   t  ||| t|| | _|| _t|| j| _	d S r@   )
rA   r&   mathceilr   r{   	functools	lru_cache_fetch_block_fetch_block_cachedr#   r   r   r   r{   rE   r$   r%   r&   7  s    zBlockCache.__init__c                 C  s
   | j  S z
        The statistics on the block cache.

        Returns
        -------
        NamedTuple
            Returned directly from the LRU Cache used internally.
        r   
cache_infor0   r$   r$   r%   r   ?  s    	zBlockCache.cache_inforf   r/   c                 C  s   | j }|d= |S )Nr   rg   ri   r$   r$   r%   rk   J  s    zBlockCache.__getstate__rl   c                 C  s&   | j | t|d | j| _d S )Nr{   )rg   rm   r~   r   r   r   ri   r$   r$   r%   rn   O  s    zBlockCache.__setstate__r'   r(   rT   c                 C  st   |d krd}|d kr| j }|| j ks,||kr0dS || j }|| j }t||d D ]}| | qR| j||||dS )Nr   r,   rF   start_block_numberend_block_number)r   r   r_   r   _read_cache)r#   r*   rU   r   r   block_numberr$   r$   r%   r.   U  s     

zBlockCache._fetch)r   r   c                   st   || j kr"td| d| j  d|| j }|| j }|  j|| 7  _|  jd7  _td| t ||}|S )=
        Fetch the block of data for `block_number`.
        'block_number=(' is greater than the number of blocks (r\   rF   zBlockCache fetching block %d)	r   
ValueErrorr   r!   r    r]   inforA   r.   )r#   r   r*   rU   block_contentsrE   r$   r%   r   l  s    


zBlockCache._fetch_blockr*   rU   r   r   r   c           	      C  s   || j  }|| j  }|  jd7  _||kr@| |}||| S | ||d g}|t| jt|d | || |d|  d|S dS z
        Read from our block cache.

        Parameters
        ----------
        start, end : int
            The start and end byte positions.
        start_block_number, end_block_number : int
            The start and end block numbers.
        rF   Nr,   r   r   r   extendmapr_   appendjoin	r#   r*   rU   r   r   Z	start_posZend_posblockoutr$   r$   r%   r   ~  s    


zBlockCache._read_cache)ry   )r6   r8   r9   r:   r   r&   r   rk   rn   r.   r   r   ro   r$   r$   rE   r%   rx     s    rx   c                      s^   e Zd ZU dZdZded< dddddd	d
 fddZddddddZddddZ  Z	S )
BytesCacheaK  Cache which holds data in a in-memory bytes object

    Implements read-ahead by the block size, for semi-random reads progressing
    through the file.

    Parameters
    ----------
    trim: bool
        As we read more data, whether to discard the start of the buffer when
        we are more than a blocksize ahead of it.
    r(   r   r   Tr   r   boolr   )r   r   r   trimr   c                   s,   t  ||| d| _d | _d | _|| _d S )Nr,   )rA   r&   rD   r*   rU   r   )r#   r   r   r   r   rE   r$   r%   r&     s
    zBytesCache.__init__r'   rT   c                 C  s  |d krd}|d kr| j }|| j ks,||kr0dS | jd k	r|| jkr| jd k	r|| jk r|| j }|  jd7  _| j||| |  S | jrt| j || j }n|}||ks|| j krdS | jd ks|| jk r| jd ks|| jkr|  j|| 7  _|  jd7  _| 	||| _|| _nF| jd k	s.t
| jd k	s>t
|  jd7  _|| jk r| jd ksv| j| | jkr|  j|| 7  _| 	||| _|| _n4|  j| j| 7  _| 	|| j}|| _|| j | _n| jd k	rd|| jkrd| j| j krnh|| j | jkr6|  j|| 7  _| 	||| _|| _n.|  j|| j 7  _| 	| j|}| j| | _| jt| j | _|| j }| j||| |  }| jr| j| j | jd  }|dkr|  j| j| 7  _| j| j| d  | _|S rq   )r   r*   rU   r   rD   r   rb   r!   r    r   AssertionErrorr`   r   )r#   r*   rU   offsetZbendnewr   numr$   r$   r%   r.     s|    



zBytesCache._fetchr/   c                 C  s
   t | jS r@   )r`   rD   r0   r$   r$   r%   __len__  s    zBytesCache.__len__)T)
r6   r8   r9   r:   r   r;   r&   r.   r   ro   r$   r$   rE   r%   r     s   
 	Ir   c                      sP   e Zd ZU dZdZded< dddddd	d
 fddZddddddZ  ZS )AllBytesz!Cache entire contents of the fileallr   r   Nr'   zFetcher | Nonezbytes | Noner   )r   r   r   rw   r   c                   sN   t  ||| |d krD|  jd7  _|  j| j7  _| d| j}|| _d S )NrF   r   )rA   r&   r    r!   r   r   rw   )r#   r   r   r   rw   rE   r$   r%   r&     s    zAllBytes.__init__r(   r)   c                 C  s   |  j d7  _ | j|| S )NrF   )r   rw   r-   r$   r$   r%   r.     s    zAllBytes._fetch)NNNN	r6   r8   r9   r:   r   r;   r&   r.   ro   r$   r$   rE   r%   r     s   
    r   c                      sV   e Zd ZU dZdZded< ddddd	d
dd fddZdddd fddZ  ZS )KnownPartsOfAFilea  
    Cache holding known file parts.

    Parameters
    ----------
    blocksize: int
        How far to read ahead in numbers of bytes
    fetcher: func
        Function of the form f(start, end) which gets bytes from remote as
        specified
    size: int
        How big this file is
    data: dict
        A dictionary mapping explicit `(start, stop)` file-offset tuples
        with known bytes.
    strict: bool, default True
        Whether to fetch reads that go beyond a known byte-range boundary.
        If `False`, any read that ends outside a known part will be zero
        padded. Note that zero padding will not be used for reads that
        begin outside a known byte-range.
    partsr   r   NTr   r   z&Optional[dict[tuple[int, int], bytes]]r   r   )r   r   r   rw   strict_c                   s   t  ||| || _|rt| }|d g}||d g}	|dd  D ]f\}
}|d \}}|
|kr||f|d< |	d  ||
|f7  < qL||
|f |	||
|f qLtt||	| _	ni | _	d S )Nr   rF   )
rA   r&   r   sortedkeysra   r   dictziprw   )r#   r   r   r   rw   r   r   Zold_offsetsoffsetsr?   r*   r+   Zstart0Zstop0rE   r$   r%   r&   =  s    	
zKnownPartsOfAFile.__init__r'   r(   r)   c                   s:  |d krd}|d kr| j }d}| j D ]\\}}}||  krH|k r(n q(|| }|||| |  }| jr||  kr|krn n.|d|| t|  7 }|  jd7  _|  S |} qq(| jd krtd||f dt	d||f d t
d| d	|  |  j|| 7  _|  jd7  _|t || S )
Nr   r,       rF   z&Read is outside the known file parts: z. z%. IO/caching performance may be poor!z!KnownPartsOfAFile cache fetching rV   )r   rw   itemsr   r`   r   r   r   warningswarnr]   r^   r!   r    rA   r.   )r#   r*   r+   r   Zloc0loc1rw   offrE   r$   r%   r.   [  s0    
zKnownPartsOfAFile._fetch)NTr   r$   r$   rE   r%   r   $  s   
  r   c                   @  sv   e Zd ZdZG dd deZdddddd	d
ZddddddZdddddZddddddZ	ddddZ
dS )UpdatableLRUzg
    Custom implementation of LRU cache that allows updating keys

    Used by BackgroudBlockCache
    c                   @  s.   e Zd ZU ded< ded< ded< ded< dS )UpdatableLRU.CacheInfor   rd   missesmaxsizecurrsizeN)r6   r8   r9   r;   r$   r$   r$   r%   	CacheInfo  s   
r      zCallable[P, T]r   r   )funcmax_sizer   c                 C  s0   t  | _|| _|| _d| _d| _t | _	d S r   )
collectionsr   _cache_func	_max_size_hits_misses	threadingLock_lock)r#   r   r   r$   r$   r%   r&     s    
zUpdatableLRU.__init__zP.argszP.kwargsr   )argskwargsr   c              
   O  s   |rt d|  | j@ || jkrX| j| |  jd7  _| j| W  5 Q R  S W 5 Q R X | j||}| j< || j|< |  jd7  _t| j| j	kr| jj
dd W 5 Q R X |S )Nz Got unexpected keyword argument rF   Flast)	TypeErrorr   r   r   move_to_endr   r   r   r`   r   popitem)r#   r   r   resultr$   r$   r%   __call__  s    
 
zUpdatableLRU.__call__r   r   )r   r   c              
   G  s(   | j  || jkW  5 Q R  S Q R X d S r@   )r   r   )r#   r   r$   r$   r%   is_key_cached  s    zUpdatableLRU.is_key_cached)r   r   r   c              	   G  s>   | j . || j|< t| j| jkr0| jjdd W 5 Q R X d S )NFr   )r   r   r`   r   r   )r#   r   r   r$   r$   r%   add_key  s    
zUpdatableLRU.add_keyr   r/   c              
   C  s<   | j , | j| jt| j| j| jdW  5 Q R  S Q R X d S )N)r   r   rd   r   )r   r   r   r`   r   r   r   r0   r$   r$   r%   r     s    zUpdatableLRU.cache_infoN)r   )r6   r8   r9   r:   r
   r   r&   r   r   r   r   r$   r$   r$   r%   r     s   r   c                      s   e Zd ZU dZdZded< d"dddddd	 fd
dZddddZddddZddddZ	ddddddZ
d#dddd fddZddddddd d!Z  ZS )$BackgroundBlockCachea  
    Cache holding memory as a set of blocks with pre-loading of
    the next block in the background.

    Requests are only ever made ``blocksize`` at a time, and are
    stored in an LRU cache. The least recently accessed block is
    discarded when more than ``maxblocks`` are stored. If the
    next block is not in cache, it is loaded in a separate thread
    in non-blocking way.

    Parameters
    ----------
    blocksize : int
        The number of bytes to store in each block.
        Requests are only ever made for ``blocksize``, so this
        should balance the overhead of making a request against
        the granularity of the blocks.
    fetcher : Callable
    size : int
        The total size of the file being cached.
    maxblocks : int
        The maximum number of blocks to cache for. The maximum memory
        use for this cache is then ``blocksize * maxblocks``.
    Z
backgroundr   r   ry   r   r   r   rz   c                   sZ   t  ||| t|| | _|| _t| j|| _t	dd| _
d | _d | _t | _d S )NrF   max_workers)rA   r&   r|   r}   r   r{   r   r   r   r   _thread_executor_fetch_future_block_number_fetch_futurer   r   _fetch_future_lockr   rE   r$   r%   r&     s    zBackgroundBlockCache.__init__r   r/   c                 C  s
   | j  S r   r   r0   r$   r$   r%   r     s    	zBackgroundBlockCache.cache_inforf   c                 C  s(   | j }|d= |d= |d= |d= |d= |S )Nr   r   r   r   r   r   ri   r$   r$   r%   rk     s    z!BackgroundBlockCache.__getstate__c                 C  sD   | j | t| j|d | _tdd| _d | _d | _t	
 | _d S )Nr{   rF   r   )rg   rm   r   r   r   r   r   r   r   r   r   r   ri   r$   r$   r%   rn     s    z!BackgroundBlockCache.__setstate__r'   r(   rT   c           
   	   C  s  |d krd}|d kr| j }|| j ks,||kr0dS || j }|| j }d }d }| j | jd k	r| jd k	slt| j rtd | j	
| j | j d | _d | _n:t|| j  ko|kn  }|r| j}| j}d | _d | _W 5 Q R X |d k	rtd | j	
| | t||d D ]}| 	| q|d }	| jF | jd kr~|	| jkr~| j	|	s~|	| _| j| j|	d| _W 5 Q R X | j||||dS )Nr   r,   z3BlockCache joined background fetch without waiting.z(BlockCache waiting for background fetch.rF   asyncr   )r   r   r   r   r   r   doner]   r   r   r   r   r   r_   r   r   r   Zsubmitr   r   )
r#   r*   rU   r   r   Zfetch_future_block_numberZfetch_futureZ	must_joinr   Zend_block_plus_1r$   r$   r%   r.   
  sz    




 

 
  zBackgroundBlockCache._fetchsyncr2   )r   log_infor   c                   sv   || j kr"td| d| j  d|| j }|| j }td|| |  j|| 7  _|  jd7  _t ||}|S )r   r   r   r\   z!BlockCache fetching block (%s) %drF   )	r   r   r   r]   r   r!   r    rA   r.   )r#   r   r   r*   rU   r   rE   r$   r%   r   V  s    


z!BackgroundBlockCache._fetch_blockr   c           	      C  s   || j  }|| j  }|  jd7  _||kr@| |}||| S | ||d g}|t| jt|d | || |d|  d|S dS r   r   r   r$   r$   r%   r   h  s    


z BackgroundBlockCache._read_cache)ry   )r   )r6   r8   r9   r:   r   r;   r&   r   rk   rn   r.   r   r   ro   r$   r$   rE   r%   r     s   
 	Lr   z!dict[str | None, type[BaseCache]]cachesFztype[BaseCache]r   r   )clsclobberr   c                 C  s6   | j }|s*|tkr*td|dt|  | t|< dS )z'Register' cache implementation.

    Parameters
    ----------
    clobber: bool, optional
        If set to True (default is False) - allow to overwrite existing
        entry.

    Raises
    ------
    ValueError
    zCache with name z is already known: N)r   r   r   )r   r   r   r$   r$   r%   register_cache  s    r   )F).
__future__r   r   r~   loggingr|   rJ   r   r   concurrent.futuresr   r   typingr   r   r   r   r	   r
   r   r   r   r=   Ztyping_extensionsr   r   r   	getLoggerr]   r   r(   r   r   r<   rp   ru   rx   r   r   r   r   r   r   r;   r   cr$   r$   r$   r%   <module>   sZ    ,

DV.. 
ee< Q 