Upgrade CacheControl to 0.12.10

2022-01-21 15:41:36 +00:00 · 2022-01-21 15:41:36 +00:00 · 5f5a0b3560
parent 78ef0b216f
commit 5f5a0b3560
16 changed files with 152 additions and 43 deletions
--- a/news/CacheControl.vendor.rst
+++ b/news/CacheControl.vendor.rst
@ -0,0 +1 @@
+Upgrade CacheControl to 0.12.10
--- a/src/pip/_vendor/cachecontrol/LICENSE.txt
+++ b/src/pip/_vendor/cachecontrol/LICENSE.txt
@ -1,4 +1,4 @@
-Copyright 2015 Eric Larson
+Copyright 2012-2021  Eric Larson

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@ -8,8 +8,6 @@ You may obtain a copy of the License at

 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-implied.
-
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
--- a/src/pip/_vendor/cachecontrol/init.py
+++ b/src/pip/_vendor/cachecontrol/init.py
@ -1,11 +1,18 @@
+# SPDX-FileCopyrightText: 2015 Eric Larson
+#
+# SPDX-License-Identifier: Apache-2.0
+
 """CacheControl import Interface.

 Make it easy to import from cachecontrol without long namespaces.
 """
 __author__ = "Eric Larson"
 __email__ = "eric@ionrock.org"
-__version__ = "0.12.6"
+__version__ = "0.12.10"

 from .wrapper import CacheControl
 from .adapter import CacheControlAdapter
 from .controller import CacheController
+
+import logging
+logging.getLogger(__name__).addHandler(logging.NullHandler())
--- a/src/pip/_vendor/cachecontrol/_cmd.py
+++ b/src/pip/_vendor/cachecontrol/_cmd.py
@ -1,3 +1,7 @@
+# SPDX-FileCopyrightText: 2015 Eric Larson
+#
+# SPDX-License-Identifier: Apache-2.0
+
 import logging

 from pip._vendor import requests
--- a/src/pip/_vendor/cachecontrol/adapter.py
+++ b/src/pip/_vendor/cachecontrol/adapter.py
@ -1,16 +1,20 @@
+# SPDX-FileCopyrightText: 2015 Eric Larson
+#
+# SPDX-License-Identifier: Apache-2.0
+
 import types
 import functools
 import zlib

 from pip._vendor.requests.adapters import HTTPAdapter

-from .controller import CacheController
+from .controller import CacheController, PERMANENT_REDIRECT_STATUSES
 from .cache import DictCache
 from .filewrapper import CallbackFileWrapper


 class CacheControlAdapter(HTTPAdapter):
-    invalidating_methods = {"PUT", "DELETE"}
+    invalidating_methods = {"PUT", "PATCH", "DELETE"}

    def __init__(
        self,
@ -93,7 +97,7 @@ class CacheControlAdapter(HTTPAdapter):
                response = cached_response

            # We always cache the 301 responses
-            elif response.status == 301:
+            elif int(response.status) in PERMANENT_REDIRECT_STATUSES:
                self.controller.cache_response(request, response)
            else:
                # Wrap the response file with a wrapper that will cache the
--- a/src/pip/_vendor/cachecontrol/cache.py
+++ b/src/pip/_vendor/cachecontrol/cache.py
@ -1,3 +1,7 @@
+# SPDX-FileCopyrightText: 2015 Eric Larson
+#
+# SPDX-License-Identifier: Apache-2.0
+
 """
 The cache object API for implementing caches. The default is a thread
 safe in-memory dictionary.
@ -10,7 +14,7 @@ class BaseCache(object):
    def get(self, key):
        raise NotImplementedError()

-    def set(self, key, value):
+    def set(self, key, value, expires=None):
        raise NotImplementedError()

    def delete(self, key):
@ -29,7 +33,7 @@ class DictCache(BaseCache):
    def get(self, key):
        return self.data.get(key, None)

-    def set(self, key, value):
+    def set(self, key, value, expires=None):
        with self.lock:
            self.data.update({key: value})

--- a/src/pip/_vendor/cachecontrol/caches/init.py
+++ b/src/pip/_vendor/cachecontrol/caches/init.py
@ -1,2 +1,6 @@
+# SPDX-FileCopyrightText: 2015 Eric Larson
+#
+# SPDX-License-Identifier: Apache-2.0
+
 from .file_cache import FileCache  # noqa
 from .redis_cache import RedisCache  # noqa
--- a/src/pip/_vendor/cachecontrol/caches/file_cache.py
+++ b/src/pip/_vendor/cachecontrol/caches/file_cache.py
@ -1,3 +1,7 @@
+# SPDX-FileCopyrightText: 2015 Eric Larson
+#
+# SPDX-License-Identifier: Apache-2.0
+
 import hashlib
 import os
 from textwrap import dedent
@ -114,7 +118,7 @@ class FileCache(BaseCache):
        except FileNotFoundError:
            return None

-    def set(self, key, value):
+    def set(self, key, value, expires=None):
        name = self._fn(key)

        # Make sure the directory exists
--- a/src/pip/_vendor/cachecontrol/caches/redis_cache.py
+++ b/src/pip/_vendor/cachecontrol/caches/redis_cache.py
@ -1,3 +1,7 @@
+# SPDX-FileCopyrightText: 2015 Eric Larson
+#
+# SPDX-License-Identifier: Apache-2.0
+
 from __future__ import division

 from datetime import datetime
--- a/src/pip/_vendor/cachecontrol/compat.py
+++ b/src/pip/_vendor/cachecontrol/compat.py
@ -1,3 +1,7 @@
+# SPDX-FileCopyrightText: 2015 Eric Larson
+#
+# SPDX-License-Identifier: Apache-2.0
+
 try:
    from urllib.parse import urljoin
 except ImportError:
@ -9,7 +13,6 @@ try:
 except ImportError:
    import pickle

-
 # Handle the case where the requests module has been patched to not have
 # urllib3 bundled as part of its source.
 try:
--- a/src/pip/_vendor/cachecontrol/controller.py
+++ b/src/pip/_vendor/cachecontrol/controller.py
@ -1,3 +1,7 @@
+# SPDX-FileCopyrightText: 2015 Eric Larson
+#
+# SPDX-License-Identifier: Apache-2.0
+
 """
 The httplib2 algorithms ported for use with requests.
 """
@ -17,6 +21,8 @@ logger = logging.getLogger(__name__)

 URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")

+PERMANENT_REDIRECT_STATUSES = (301, 308)
+

 def parse_uri(uri):
    """Parses a URI using the regex given in Appendix B of RFC 3986.
@ -37,7 +43,7 @@ class CacheController(object):
        self.cache = DictCache() if cache is None else cache
        self.cache_etags = cache_etags
        self.serializer = serializer or Serializer()
-        self.cacheable_status_codes = status_codes or (200, 203, 300, 301)
+        self.cacheable_status_codes = status_codes or (200, 203, 300, 301, 308)

    @classmethod
    def _urlnorm(cls, uri):
@ -147,17 +153,18 @@ class CacheController(object):
            logger.warning("Cache entry deserialization failed, entry ignored")
            return False

-        # If we have a cached 301, return it immediately. We don't
-        # need to test our response for other headers b/c it is
+        # If we have a cached permanent redirect, return it immediately. We
+        # don't need to test our response for other headers b/c it is
        # intrinsically "cacheable" as it is Permanent.
+        #
        # See:
        #   https://tools.ietf.org/html/rfc7231#section-6.4.2
        #
        # Client can try to refresh the value by repeating the request
        # with cache busting headers as usual (ie no-cache).
-        if resp.status == 301:
+        if int(resp.status) in PERMANENT_REDIRECT_STATUSES:
            msg = (
-                'Returning cached "301 Moved Permanently" response '
+                "Returning cached permanent redirect response "
                "(ignoring date and etag information)"
            )
            logger.debug(msg)
@ -261,6 +268,11 @@ class CacheController(object):

        response_headers = CaseInsensitiveDict(response.headers)

+        if "date" in response_headers:
+            date = calendar.timegm(parsedate_tz(response_headers["date"]))
+        else:
+            date = 0
+
        # If we've been given a body, our response has a Content-Length, that
        # Content-Length is valid then we can check to see if the body we've
        # been given matches the expected size, and if it doesn't we'll just
@ -304,35 +316,62 @@ class CacheController(object):

        # If we've been given an etag, then keep the response
        if self.cache_etags and "etag" in response_headers:
+            expires_time = 0
+            if response_headers.get("expires"):
+                expires = parsedate_tz(response_headers["expires"])
+                if expires is not None:
+                    expires_time = calendar.timegm(expires) - date
+
+            expires_time = max(expires_time, 14 * 86400)
+
+            logger.debug("etag object cached for {0} seconds".format(expires_time))
            logger.debug("Caching due to etag")
            self.cache.set(
-                cache_url, self.serializer.dumps(request, response, body=body)
+                cache_url,
+                self.serializer.dumps(request, response, body),
+                expires=expires_time,
            )

-        # Add to the cache any 301s. We do this before looking that
-        # the Date headers.
-        elif response.status == 301:
-            logger.debug("Caching permanant redirect")
-            self.cache.set(cache_url, self.serializer.dumps(request, response))
+        # Add to the cache any permanent redirects. We do this before looking
+        # that the Date headers.
+        elif int(response.status) in PERMANENT_REDIRECT_STATUSES:
+            logger.debug("Caching permanent redirect")
+            self.cache.set(cache_url, self.serializer.dumps(request, response, b""))

        # Add to the cache if the response headers demand it. If there
        # is no date header then we can't do anything about expiring
        # the cache.
        elif "date" in response_headers:
+            date = calendar.timegm(parsedate_tz(response_headers["date"]))
            # cache when there is a max-age > 0
            if "max-age" in cc and cc["max-age"] > 0:
                logger.debug("Caching b/c date exists and max-age > 0")
+                expires_time = cc["max-age"]
                self.cache.set(
-                    cache_url, self.serializer.dumps(request, response, body=body)
+                    cache_url,
+                    self.serializer.dumps(request, response, body),
+                    expires=expires_time,
                )

            # If the request can expire, it means we should cache it
            # in the meantime.
            elif "expires" in response_headers:
                if response_headers["expires"]:
-                    logger.debug("Caching b/c of expires header")
+                    expires = parsedate_tz(response_headers["expires"])
+                    if expires is not None:
+                        expires_time = calendar.timegm(expires) - date
+                    else:
+                        expires_time = None
+
+                    logger.debug(
+                        "Caching b/c of expires header. expires in {0} seconds".format(
+                            expires_time
+                        )
+                    )
                    self.cache.set(
-                        cache_url, self.serializer.dumps(request, response, body=body)
+                        cache_url,
+                        self.serializer.dumps(request, response, body=body),
+                        expires=expires_time,
                    )

    def update_cached_response(self, request, response):
--- a/src/pip/_vendor/cachecontrol/filewrapper.py
+++ b/src/pip/_vendor/cachecontrol/filewrapper.py
@ -1,4 +1,9 @@
-from io import BytesIO
+# SPDX-FileCopyrightText: 2015 Eric Larson
+#
+# SPDX-License-Identifier: Apache-2.0
+
+from tempfile import NamedTemporaryFile
+import mmap


 class CallbackFileWrapper(object):
@ -11,10 +16,17 @@ class CallbackFileWrapper(object):

    This class uses members with a double underscore (__) leading prefix so as
    not to accidentally shadow an attribute.
+
+    The data is stored in a temporary file until it is all available.  As long
+    as the temporary files directory is disk-based (sometimes it's a
+    memory-backed-``tmpfs`` on Linux), data will be unloaded to disk if memory
+    pressure is high.  For small files the disk usually won't be used at all,
+    it'll all be in the filesystem memory cache, so there should be no
+    performance impact.
    """

    def __init__(self, fp, callback):
-        self.__buf = BytesIO()
+        self.__buf = NamedTemporaryFile("rb+", delete=True)
        self.__fp = fp
        self.__callback = callback

@ -49,7 +61,19 @@ class CallbackFileWrapper(object):

    def _close(self):
        if self.__callback:
-            self.__callback(self.__buf.getvalue())
+            if self.__buf.tell() == 0:
+                # Empty file:
+                result = b""
+            else:
+                # Return the data without actually loading it into memory,
+                # relying on Python's buffer API and mmap(). mmap() just gives
+                # a view directly into the filesystem's memory cache, so it
+                # doesn't result in duplicate memory use.
+                self.__buf.seek(0, 0)
+                result = memoryview(
+                    mmap.mmap(self.__buf.fileno(), 0, access=mmap.ACCESS_READ)
+                )
+            self.__callback(result)

        # We assign this to None here, because otherwise we can get into
        # really tricky problems where the CPython interpreter dead locks
@ -58,9 +82,16 @@ class CallbackFileWrapper(object):
        # and allows the garbage collector to do it's thing normally.
        self.__callback = None

+        # Closing the temporary file releases memory and frees disk space.
+        # Important when caching big files.
+        self.__buf.close()
+
    def read(self, amt=None):
        data = self.__fp.read(amt)
-        self.__buf.write(data)
+        if data:
+            # We may be dealing with b'', a sign that things are over:
+            # it's passed e.g. after we've already closed self.__buf.
+            self.__buf.write(data)
        if self.__is_fp_closed():
            self._close()

--- a/src/pip/_vendor/cachecontrol/heuristics.py
+++ b/src/pip/_vendor/cachecontrol/heuristics.py
@ -1,3 +1,7 @@
+# SPDX-FileCopyrightText: 2015 Eric Larson
+#
+# SPDX-License-Identifier: Apache-2.0
+
 import calendar
 import time

--- a/src/pip/_vendor/cachecontrol/serialize.py
+++ b/src/pip/_vendor/cachecontrol/serialize.py
@ -1,3 +1,7 @@
+# SPDX-FileCopyrightText: 2015 Eric Larson
+#
+# SPDX-License-Identifier: Apache-2.0
+
 import base64
 import io
 import json
@ -17,24 +21,18 @@ def _b64_decode_str(s):
    return _b64_decode_bytes(s).decode("utf8")


-class Serializer(object):
+_default_body_read = object()

+
+class Serializer(object):
    def dumps(self, request, response, body=None):
        response_headers = CaseInsensitiveDict(response.headers)

        if body is None:
+            # When a body isn't passed in, we'll read the response. We
+            # also update the response with a new file handler to be
+            # sure it acts as though it was never read.
            body = response.read(decode_content=False)
-
-            # NOTE: 99% sure this is dead code. I'm only leaving it
-            #       here b/c I don't have a test yet to prove
-            #       it. Basically, before using
-            #       `cachecontrol.filewrapper.CallbackFileWrapper`,
-            #       this made an effort to reset the file handle. The
-            #       `CallbackFileWrapper` short circuits this code by
-            #       setting the body as the content is consumed, the
-            #       result being a `body` argument is *always* passed
-            #       into cache_response, and in turn,
-            #       `Serializer.dump`.
            response._fp = io.BytesIO(body)

        # NOTE: This is all a bit weird, but it's really important that on
--- a/src/pip/_vendor/cachecontrol/wrapper.py
+++ b/src/pip/_vendor/cachecontrol/wrapper.py
@ -1,3 +1,7 @@
+# SPDX-FileCopyrightText: 2015 Eric Larson
+#
+# SPDX-License-Identifier: Apache-2.0
+
 from .adapter import CacheControlAdapter
 from .cache import DictCache

--- a/src/pip/_vendor/vendor.txt
+++ b/src/pip/_vendor/vendor.txt
@ -1,4 +1,4 @@
-CacheControl==0.12.6  # Make sure to update the license in pyproject.toml for this.
+CacheControl==0.12.10  # Make sure to update the license in pyproject.toml for this.
 colorama==0.4.4
 distlib==0.3.3
 distro==1.6.0