Upgrade CacheControl to 0.12.10

This commit is contained in:
Pradyun Gedam 2022-01-21 15:41:36 +00:00
parent 78ef0b216f
commit 5f5a0b3560
No known key found for this signature in database
GPG Key ID: FF99710C4332258E
16 changed files with 152 additions and 43 deletions

View File

@ -0,0 +1 @@
Upgrade CacheControl to 0.12.10

View File

@ -1,4 +1,4 @@
Copyright 2015 Eric Larson
Copyright 2012-2021 Eric Larson
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@ -8,8 +8,6 @@ You may obtain a copy of the License at
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied.
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@ -1,11 +1,18 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
"""CacheControl import Interface.
Make it easy to import from cachecontrol without long namespaces.
"""
__author__ = "Eric Larson"
__email__ = "eric@ionrock.org"
__version__ = "0.12.6"
__version__ = "0.12.10"
from .wrapper import CacheControl
from .adapter import CacheControlAdapter
from .controller import CacheController
import logging
logging.getLogger(__name__).addHandler(logging.NullHandler())

View File

@ -1,3 +1,7 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
import logging
from pip._vendor import requests

View File

@ -1,16 +1,20 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
import types
import functools
import zlib
from pip._vendor.requests.adapters import HTTPAdapter
from .controller import CacheController
from .controller import CacheController, PERMANENT_REDIRECT_STATUSES
from .cache import DictCache
from .filewrapper import CallbackFileWrapper
class CacheControlAdapter(HTTPAdapter):
invalidating_methods = {"PUT", "DELETE"}
invalidating_methods = {"PUT", "PATCH", "DELETE"}
def __init__(
self,
@ -93,7 +97,7 @@ class CacheControlAdapter(HTTPAdapter):
response = cached_response
# We always cache the 301 responses
elif response.status == 301:
elif int(response.status) in PERMANENT_REDIRECT_STATUSES:
self.controller.cache_response(request, response)
else:
# Wrap the response file with a wrapper that will cache the

View File

@ -1,3 +1,7 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
"""
The cache object API for implementing caches. The default is a thread
safe in-memory dictionary.
@ -10,7 +14,7 @@ class BaseCache(object):
def get(self, key):
raise NotImplementedError()
def set(self, key, value):
def set(self, key, value, expires=None):
raise NotImplementedError()
def delete(self, key):
@ -29,7 +33,7 @@ class DictCache(BaseCache):
def get(self, key):
return self.data.get(key, None)
def set(self, key, value):
def set(self, key, value, expires=None):
with self.lock:
self.data.update({key: value})

View File

@ -1,2 +1,6 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
from .file_cache import FileCache # noqa
from .redis_cache import RedisCache # noqa

View File

@ -1,3 +1,7 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
import hashlib
import os
from textwrap import dedent
@ -114,7 +118,7 @@ class FileCache(BaseCache):
except FileNotFoundError:
return None
def set(self, key, value):
def set(self, key, value, expires=None):
name = self._fn(key)
# Make sure the directory exists

View File

@ -1,3 +1,7 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
from __future__ import division
from datetime import datetime

View File

@ -1,3 +1,7 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
try:
from urllib.parse import urljoin
except ImportError:
@ -9,7 +13,6 @@ try:
except ImportError:
import pickle
# Handle the case where the requests module has been patched to not have
# urllib3 bundled as part of its source.
try:

View File

@ -1,3 +1,7 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
"""
The httplib2 algorithms ported for use with requests.
"""
@ -17,6 +21,8 @@ logger = logging.getLogger(__name__)
URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")
PERMANENT_REDIRECT_STATUSES = (301, 308)
def parse_uri(uri):
"""Parses a URI using the regex given in Appendix B of RFC 3986.
@ -37,7 +43,7 @@ class CacheController(object):
self.cache = DictCache() if cache is None else cache
self.cache_etags = cache_etags
self.serializer = serializer or Serializer()
self.cacheable_status_codes = status_codes or (200, 203, 300, 301)
self.cacheable_status_codes = status_codes or (200, 203, 300, 301, 308)
@classmethod
def _urlnorm(cls, uri):
@ -147,17 +153,18 @@ class CacheController(object):
logger.warning("Cache entry deserialization failed, entry ignored")
return False
# If we have a cached 301, return it immediately. We don't
# need to test our response for other headers b/c it is
# If we have a cached permanent redirect, return it immediately. We
# don't need to test our response for other headers b/c it is
# intrinsically "cacheable" as it is Permanent.
#
# See:
# https://tools.ietf.org/html/rfc7231#section-6.4.2
#
# Client can try to refresh the value by repeating the request
# with cache busting headers as usual (ie no-cache).
if resp.status == 301:
if int(resp.status) in PERMANENT_REDIRECT_STATUSES:
msg = (
'Returning cached "301 Moved Permanently" response '
"Returning cached permanent redirect response "
"(ignoring date and etag information)"
)
logger.debug(msg)
@ -261,6 +268,11 @@ class CacheController(object):
response_headers = CaseInsensitiveDict(response.headers)
if "date" in response_headers:
date = calendar.timegm(parsedate_tz(response_headers["date"]))
else:
date = 0
# If we've been given a body, our response has a Content-Length, that
# Content-Length is valid then we can check to see if the body we've
# been given matches the expected size, and if it doesn't we'll just
@ -304,35 +316,62 @@ class CacheController(object):
# If we've been given an etag, then keep the response
if self.cache_etags and "etag" in response_headers:
expires_time = 0
if response_headers.get("expires"):
expires = parsedate_tz(response_headers["expires"])
if expires is not None:
expires_time = calendar.timegm(expires) - date
expires_time = max(expires_time, 14 * 86400)
logger.debug("etag object cached for {0} seconds".format(expires_time))
logger.debug("Caching due to etag")
self.cache.set(
cache_url, self.serializer.dumps(request, response, body=body)
cache_url,
self.serializer.dumps(request, response, body),
expires=expires_time,
)
# Add to the cache any 301s. We do this before looking that
# the Date headers.
elif response.status == 301:
logger.debug("Caching permanant redirect")
self.cache.set(cache_url, self.serializer.dumps(request, response))
# Add to the cache any permanent redirects. We do this before looking
# that the Date headers.
elif int(response.status) in PERMANENT_REDIRECT_STATUSES:
logger.debug("Caching permanent redirect")
self.cache.set(cache_url, self.serializer.dumps(request, response, b""))
# Add to the cache if the response headers demand it. If there
# is no date header then we can't do anything about expiring
# the cache.
elif "date" in response_headers:
date = calendar.timegm(parsedate_tz(response_headers["date"]))
# cache when there is a max-age > 0
if "max-age" in cc and cc["max-age"] > 0:
logger.debug("Caching b/c date exists and max-age > 0")
expires_time = cc["max-age"]
self.cache.set(
cache_url, self.serializer.dumps(request, response, body=body)
cache_url,
self.serializer.dumps(request, response, body),
expires=expires_time,
)
# If the request can expire, it means we should cache it
# in the meantime.
elif "expires" in response_headers:
if response_headers["expires"]:
logger.debug("Caching b/c of expires header")
expires = parsedate_tz(response_headers["expires"])
if expires is not None:
expires_time = calendar.timegm(expires) - date
else:
expires_time = None
logger.debug(
"Caching b/c of expires header. expires in {0} seconds".format(
expires_time
)
)
self.cache.set(
cache_url, self.serializer.dumps(request, response, body=body)
cache_url,
self.serializer.dumps(request, response, body=body),
expires=expires_time,
)
def update_cached_response(self, request, response):

View File

@ -1,4 +1,9 @@
from io import BytesIO
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
from tempfile import NamedTemporaryFile
import mmap
class CallbackFileWrapper(object):
@ -11,10 +16,17 @@ class CallbackFileWrapper(object):
This class uses members with a double underscore (__) leading prefix so as
not to accidentally shadow an attribute.
The data is stored in a temporary file until it is all available. As long
as the temporary files directory is disk-based (sometimes it's a
memory-backed-``tmpfs`` on Linux), data will be unloaded to disk if memory
pressure is high. For small files the disk usually won't be used at all,
it'll all be in the filesystem memory cache, so there should be no
performance impact.
"""
def __init__(self, fp, callback):
self.__buf = BytesIO()
self.__buf = NamedTemporaryFile("rb+", delete=True)
self.__fp = fp
self.__callback = callback
@ -49,7 +61,19 @@ class CallbackFileWrapper(object):
def _close(self):
if self.__callback:
self.__callback(self.__buf.getvalue())
if self.__buf.tell() == 0:
# Empty file:
result = b""
else:
# Return the data without actually loading it into memory,
# relying on Python's buffer API and mmap(). mmap() just gives
# a view directly into the filesystem's memory cache, so it
# doesn't result in duplicate memory use.
self.__buf.seek(0, 0)
result = memoryview(
mmap.mmap(self.__buf.fileno(), 0, access=mmap.ACCESS_READ)
)
self.__callback(result)
# We assign this to None here, because otherwise we can get into
# really tricky problems where the CPython interpreter dead locks
@ -58,9 +82,16 @@ class CallbackFileWrapper(object):
# and allows the garbage collector to do it's thing normally.
self.__callback = None
# Closing the temporary file releases memory and frees disk space.
# Important when caching big files.
self.__buf.close()
def read(self, amt=None):
data = self.__fp.read(amt)
self.__buf.write(data)
if data:
# We may be dealing with b'', a sign that things are over:
# it's passed e.g. after we've already closed self.__buf.
self.__buf.write(data)
if self.__is_fp_closed():
self._close()

View File

@ -1,3 +1,7 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
import calendar
import time

View File

@ -1,3 +1,7 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
import base64
import io
import json
@ -17,24 +21,18 @@ def _b64_decode_str(s):
return _b64_decode_bytes(s).decode("utf8")
class Serializer(object):
_default_body_read = object()
class Serializer(object):
def dumps(self, request, response, body=None):
response_headers = CaseInsensitiveDict(response.headers)
if body is None:
# When a body isn't passed in, we'll read the response. We
# also update the response with a new file handler to be
# sure it acts as though it was never read.
body = response.read(decode_content=False)
# NOTE: 99% sure this is dead code. I'm only leaving it
# here b/c I don't have a test yet to prove
# it. Basically, before using
# `cachecontrol.filewrapper.CallbackFileWrapper`,
# this made an effort to reset the file handle. The
# `CallbackFileWrapper` short circuits this code by
# setting the body as the content is consumed, the
# result being a `body` argument is *always* passed
# into cache_response, and in turn,
# `Serializer.dump`.
response._fp = io.BytesIO(body)
# NOTE: This is all a bit weird, but it's really important that on

View File

@ -1,3 +1,7 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
from .adapter import CacheControlAdapter
from .cache import DictCache

View File

@ -1,4 +1,4 @@
CacheControl==0.12.6 # Make sure to update the license in pyproject.toml for this.
CacheControl==0.12.10 # Make sure to update the license in pyproject.toml for this.
colorama==0.4.4
distlib==0.3.3
distro==1.6.0