Upgrade CacheControl to 0.12.11

This commit is contained in:
Pradyun Gedam 2022-04-22 15:38:00 +01:00
parent c247ddce12
commit e747a22b57
9 changed files with 137 additions and 43 deletions

View File

@ -0,0 +1 @@
Upgrade CacheControl to 0.12.11

View File

@ -8,7 +8,7 @@ Make it easy to import from cachecontrol without long namespaces.
"""
__author__ = "Eric Larson"
__email__ = "eric@ionrock.org"
__version__ = "0.12.10"
__version__ = "0.12.11"
from .wrapper import CacheControl
from .adapter import CacheControlAdapter

View File

@ -41,3 +41,25 @@ class DictCache(BaseCache):
with self.lock:
if key in self.data:
self.data.pop(key)
class SeparateBodyBaseCache(BaseCache):
"""
In this variant, the body is not stored mixed in with the metadata, but is
passed in (as a bytes-like object) in a separate call to ``set_body()``.
That is, the expected interaction pattern is::
cache.set(key, serialized_metadata)
cache.set_body(key)
Similarly, the body should be loaded separately via ``get_body()``.
"""
def set_body(self, key, body):
raise NotImplementedError()
def get_body(self, key):
"""
Return the body as file-like object.
"""
raise NotImplementedError()

View File

@ -2,5 +2,8 @@
#
# SPDX-License-Identifier: Apache-2.0
from .file_cache import FileCache # noqa
from .redis_cache import RedisCache # noqa
from .file_cache import FileCache, SeparateBodyFileCache
from .redis_cache import RedisCache
__all__ = ["FileCache", "SeparateBodyFileCache", "RedisCache"]

View File

@ -6,7 +6,7 @@ import hashlib
import os
from textwrap import dedent
from ..cache import BaseCache
from ..cache import BaseCache, SeparateBodyBaseCache
from ..controller import CacheController
try:
@ -57,7 +57,8 @@ def _secure_open_write(filename, fmode):
raise
class FileCache(BaseCache):
class _FileCacheMixin:
"""Shared implementation for both FileCache variants."""
def __init__(
self,
@ -120,20 +121,25 @@ class FileCache(BaseCache):
def set(self, key, value, expires=None):
name = self._fn(key)
self._write(name, value)
def _write(self, path, data: bytes):
"""
Safely write the data to the given path.
"""
# Make sure the directory exists
try:
os.makedirs(os.path.dirname(name), self.dirmode)
os.makedirs(os.path.dirname(path), self.dirmode)
except (IOError, OSError):
pass
with self.lock_class(name) as lock:
with self.lock_class(path) as lock:
# Write our actual file
with _secure_open_write(lock.path, self.filemode) as fh:
fh.write(value)
fh.write(data)
def delete(self, key):
name = self._fn(key)
def _delete(self, key, suffix):
name = self._fn(key) + suffix
if not self.forever:
try:
os.remove(name)
@ -141,6 +147,38 @@ class FileCache(BaseCache):
pass
class FileCache(_FileCacheMixin, BaseCache):
"""
Traditional FileCache: body is stored in memory, so not suitable for large
downloads.
"""
def delete(self, key):
self._delete(key, "")
class SeparateBodyFileCache(_FileCacheMixin, SeparateBodyBaseCache):
"""
Memory-efficient FileCache: body is stored in a separate file, reducing
peak memory usage.
"""
def get_body(self, key):
name = self._fn(key) + ".body"
try:
return open(name, "rb")
except FileNotFoundError:
return None
def set_body(self, key, body):
name = self._fn(key) + ".body"
self._write(name, body)
def delete(self, key):
self._delete(key, "")
self._delete(key, ".body")
def url_to_file_path(url, filecache):
"""Return the file cache path based on the URL.

View File

@ -19,9 +19,11 @@ class RedisCache(BaseCache):
def set(self, key, value, expires=None):
if not expires:
self.conn.set(key, value)
else:
elif isinstance(expires, datetime):
expires = expires - datetime.utcnow()
self.conn.setex(key, int(expires.total_seconds()), value)
else:
self.conn.setex(key, expires, value)
def delete(self, key):
self.conn.delete(key)

View File

@ -13,7 +13,7 @@ from email.utils import parsedate_tz
from pip._vendor.requests.structures import CaseInsensitiveDict
from .cache import DictCache
from .cache import DictCache, SeparateBodyBaseCache
from .serialize import Serializer
@ -27,15 +27,14 @@ PERMANENT_REDIRECT_STATUSES = (301, 308)
def parse_uri(uri):
"""Parses a URI using the regex given in Appendix B of RFC 3986.
(scheme, authority, path, query, fragment) = parse_uri(uri)
(scheme, authority, path, query, fragment) = parse_uri(uri)
"""
groups = URI.match(uri).groups()
return (groups[1], groups[3], groups[4], groups[6], groups[8])
class CacheController(object):
"""An interface to see if request should cached or not.
"""
"""An interface to see if request should cached or not."""
def __init__(
self, cache=None, cache_etags=True, serializer=None, status_codes=None
@ -147,8 +146,13 @@ class CacheController(object):
logger.debug("No cache entry available")
return False
if isinstance(self.cache, SeparateBodyBaseCache):
body_file = self.cache.get_body(cache_url)
else:
body_file = None
# Check whether it can be deserialized
resp = self.serializer.loads(request, cache_data)
resp = self.serializer.loads(request, cache_data, body_file)
if not resp:
logger.warning("Cache entry deserialization failed, entry ignored")
return False
@ -251,6 +255,26 @@ class CacheController(object):
return new_headers
def _cache_set(self, cache_url, request, response, body=None, expires_time=None):
"""
Store the data in the cache.
"""
if isinstance(self.cache, SeparateBodyBaseCache):
# We pass in the body separately; just put a placeholder empty
# string in the metadata.
self.cache.set(
cache_url,
self.serializer.dumps(request, response, b""),
expires=expires_time,
)
self.cache.set_body(cache_url, body)
else:
self.cache.set(
cache_url,
self.serializer.dumps(request, response, body),
expires=expires_time,
)
def cache_response(self, request, response, body=None, status_codes=None):
"""
Algorithm for caching requests.
@ -326,17 +350,13 @@ class CacheController(object):
logger.debug("etag object cached for {0} seconds".format(expires_time))
logger.debug("Caching due to etag")
self.cache.set(
cache_url,
self.serializer.dumps(request, response, body),
expires=expires_time,
)
self._cache_set(cache_url, request, response, body, expires_time)
# Add to the cache any permanent redirects. We do this before looking
# that the Date headers.
elif int(response.status) in PERMANENT_REDIRECT_STATUSES:
logger.debug("Caching permanent redirect")
self.cache.set(cache_url, self.serializer.dumps(request, response, b""))
self._cache_set(cache_url, request, response, b"")
# Add to the cache if the response headers demand it. If there
# is no date header then we can't do anything about expiring
@ -347,10 +367,12 @@ class CacheController(object):
if "max-age" in cc and cc["max-age"] > 0:
logger.debug("Caching b/c date exists and max-age > 0")
expires_time = cc["max-age"]
self.cache.set(
self._cache_set(
cache_url,
self.serializer.dumps(request, response, body),
expires=expires_time,
request,
response,
body,
expires_time,
)
# If the request can expire, it means we should cache it
@ -368,10 +390,12 @@ class CacheController(object):
expires_time
)
)
self.cache.set(
self._cache_set(
cache_url,
self.serializer.dumps(request, response, body=body),
expires=expires_time,
request,
response,
body,
expires_time,
)
def update_cached_response(self, request, response):
@ -410,6 +434,6 @@ class CacheController(object):
cached_response.status = 200
# update our cache
self.cache.set(cache_url, self.serializer.dumps(request, cached_response))
self._cache_set(cache_url, request, cached_response)
return cached_response

View File

@ -44,7 +44,7 @@ class Serializer(object):
# enough to have msgpack know the difference.
data = {
u"response": {
u"body": body,
u"body": body, # Empty bytestring if body is stored separately
u"headers": dict(
(text_type(k), text_type(v)) for k, v in response.headers.items()
),
@ -69,7 +69,7 @@ class Serializer(object):
return b",".join([b"cc=4", msgpack.dumps(data, use_bin_type=True)])
def loads(self, request, data):
def loads(self, request, data, body_file=None):
# Short circuit if we've been given an empty set of data
if not data:
return
@ -92,14 +92,14 @@ class Serializer(object):
# Dispatch to the actual load method for the given version
try:
return getattr(self, "_loads_v{}".format(ver))(request, data)
return getattr(self, "_loads_v{}".format(ver))(request, data, body_file)
except AttributeError:
# This is a version we don't have a loads function for, so we'll
# just treat it as a miss and return None
return
def prepare_response(self, request, cached):
def prepare_response(self, request, cached, body_file=None):
"""Verify our vary headers match and construct a real urllib3
HTTPResponse object.
"""
@ -125,7 +125,10 @@ class Serializer(object):
cached["response"]["headers"] = headers
try:
body = io.BytesIO(body_raw)
if body_file is None:
body = io.BytesIO(body_raw)
else:
body = body_file
except TypeError:
# This can happen if cachecontrol serialized to v1 format (pickle)
# using Python 2. A Python 2 str(byte string) will be unpickled as
@ -137,21 +140,22 @@ class Serializer(object):
return HTTPResponse(body=body, preload_content=False, **cached["response"])
def _loads_v0(self, request, data):
def _loads_v0(self, request, data, body_file=None):
# The original legacy cache data. This doesn't contain enough
# information to construct everything we need, so we'll treat this as
# a miss.
return
def _loads_v1(self, request, data):
def _loads_v1(self, request, data, body_file=None):
try:
cached = pickle.loads(data)
except ValueError:
return
return self.prepare_response(request, cached)
return self.prepare_response(request, cached, body_file)
def _loads_v2(self, request, data):
def _loads_v2(self, request, data, body_file=None):
assert body_file is None
try:
cached = json.loads(zlib.decompress(data).decode("utf8"))
except (ValueError, zlib.error):
@ -169,18 +173,18 @@ class Serializer(object):
for k, v in cached["vary"].items()
)
return self.prepare_response(request, cached)
return self.prepare_response(request, cached, body_file)
def _loads_v3(self, request, data):
def _loads_v3(self, request, data, body_file):
# Due to Python 2 encoding issues, it's impossible to know for sure
# exactly how to load v3 entries, thus we'll treat these as a miss so
# that they get rewritten out as v4 entries.
return
def _loads_v4(self, request, data):
def _loads_v4(self, request, data, body_file=None):
try:
cached = msgpack.loads(data, raw=False)
except ValueError:
return
return self.prepare_response(request, cached)
return self.prepare_response(request, cached, body_file)

View File

@ -1,4 +1,4 @@
CacheControl==0.12.10 # Make sure to update the license in pyproject.toml for this.
CacheControl==0.12.11 # Make sure to update the license in pyproject.toml for this.
colorama==0.4.4
distlib==0.3.3
distro==1.6.0