From 0337e3399d98a8df019af03d071833befd46e5ea Mon Sep 17 00:00:00 2001 From: Donald Stufft Date: Tue, 19 Jan 2016 17:16:25 -0500 Subject: [PATCH] Upgrade CacheControl to 0.11.6 --- pip/_vendor/cachecontrol/__init__.py | 2 +- pip/_vendor/cachecontrol/_cmd.py | 60 ++++++++++++++++ pip/_vendor/cachecontrol/compat.py | 6 ++ pip/_vendor/cachecontrol/controller.py | 94 ++++++++++++++++++++------ pip/_vendor/cachecontrol/heuristics.py | 12 ++-- pip/_vendor/cachecontrol/serialize.py | 12 +++- pip/_vendor/vendor.txt | 2 +- 7 files changed, 159 insertions(+), 29 deletions(-) create mode 100644 pip/_vendor/cachecontrol/_cmd.py diff --git a/pip/_vendor/cachecontrol/__init__.py b/pip/_vendor/cachecontrol/__init__.py index d6af9b934..724e220dc 100644 --- a/pip/_vendor/cachecontrol/__init__.py +++ b/pip/_vendor/cachecontrol/__init__.py @@ -4,7 +4,7 @@ Make it easy to import from cachecontrol without long namespaces. """ __author__ = 'Eric Larson' __email__ = 'eric@ionrock.org' -__version__ = '0.11.5' +__version__ = '0.11.6' from .wrapper import CacheControl from .adapter import CacheControlAdapter diff --git a/pip/_vendor/cachecontrol/_cmd.py b/pip/_vendor/cachecontrol/_cmd.py new file mode 100644 index 000000000..afdcc88c2 --- /dev/null +++ b/pip/_vendor/cachecontrol/_cmd.py @@ -0,0 +1,60 @@ +import logging + +from pip._vendor import requests + +from pip._vendor.cachecontrol.adapter import CacheControlAdapter +from pip._vendor.cachecontrol.cache import DictCache +from pip._vendor.cachecontrol.controller import logger + +from argparse import ArgumentParser + + +def setup_logging(): + logger.setLevel(logging.DEBUG) + handler = logging.StreamHandler() + logger.addHandler(handler) + + +def get_session(): + adapter = CacheControlAdapter( + DictCache(), + cache_etags=True, + serializer=None, + heuristic=None, + ) + sess = requests.Session() + sess.mount('http://', adapter) + sess.mount('https://', adapter) + + sess.cache_controller = adapter.controller + return sess + + +def get_args(): + parser = ArgumentParser() + parser.add_argument('url', help='The URL to try and cache') + return parser.parse_args() + + +def main(args=None): + args = get_args() + sess = get_session() + + # Make a request to get a response + resp = sess.get(args.url) + + # Turn on logging + setup_logging() + + # try setting the cache + sess.cache_controller.cache_response(resp.request, resp.raw) + + # Now try to get it + if sess.cache_controller.cached_request(resp.request): + print('Cached!') + else: + print('Not cached :(') + + +if __name__ == '__main__': + main() diff --git a/pip/_vendor/cachecontrol/compat.py b/pip/_vendor/cachecontrol/compat.py index 9878becff..018e6ac5a 100644 --- a/pip/_vendor/cachecontrol/compat.py +++ b/pip/_vendor/cachecontrol/compat.py @@ -12,3 +12,9 @@ except ImportError: from pip._vendor.requests.packages.urllib3.response import HTTPResponse from pip._vendor.requests.packages.urllib3.util import is_fp_closed + +# Replicate some six behaviour +try: + text_type = (unicode,) +except NameError: + text_type = (str,) diff --git a/pip/_vendor/cachecontrol/controller.py b/pip/_vendor/cachecontrol/controller.py index f489b98cd..6e591f8b0 100644 --- a/pip/_vendor/cachecontrol/controller.py +++ b/pip/_vendor/cachecontrol/controller.py @@ -1,6 +1,7 @@ """ The httplib2 algorithms ported for use with requests. """ +import logging import re import calendar import time @@ -12,6 +13,8 @@ from .cache import DictCache from .serialize import Serializer +logger = logging.getLogger(__name__) + URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?") @@ -86,23 +89,28 @@ class CacheController(object): return False. """ cache_url = self.cache_url(request.url) + logger.debug('Looking up "%s" in the cache', cache_url) cc = self.parse_cache_control(request.headers) - # non-caching states - no_cache = True if 'no-cache' in cc else False - if 'max-age' in cc and cc['max-age'] == 0: - no_cache = True - - # Bail out if no-cache was set - if no_cache: + # Bail out if the request insists on fresh data + if 'no-cache' in cc: + logger.debug('Request header has "no-cache", cache bypassed') return False - # It is in the cache, so lets see if it is going to be - # fresh enough - resp = self.serializer.loads(request, self.cache.get(cache_url)) + if 'max-age' in cc and cc['max-age'] == 0: + logger.debug('Request header has "max_age" as 0, cache bypassed') + return False - # Check to see if we have a cached object + # Request allows serving from the cache, let's see if we find something + cache_data = self.cache.get(cache_url) + if cache_data is None: + logger.debug('No cache entry available') + return False + + # Check whether it can be deserialized + resp = self.serializer.loads(request, cache_data) if not resp: + logger.warning('Cache entry deserialization failed, entry ignored') return False # If we have a cached 301, return it immediately. We don't @@ -114,14 +122,19 @@ class CacheController(object): # Client can try to refresh the value by repeating the request # with cache busting headers as usual (ie no-cache). if resp.status == 301: + msg = ('Returning cached "301 Moved Permanently" response ' + '(ignoring date and etag information)') + logger.debug(msg) return resp headers = CaseInsensitiveDict(resp.headers) if not headers or 'date' not in headers: - # With date or etag, the cached response can never be used - # and should be deleted. if 'etag' not in headers: + # Without date or etag, the cached response can never be used + # and should be deleted. + logger.debug('Purging cached response: no date or etag') self.cache.delete(cache_url) + logger.debug('Ignoring cached response: no date') return False now = time.time() @@ -129,6 +142,7 @@ class CacheController(object): parsedate_tz(headers['date']) ) current_age = max(0, now - date) + logger.debug('Current age based on date: %i', current_age) # TODO: There is an assumption that the result will be a # urllib3 response object. This may not be best since we @@ -142,6 +156,8 @@ class CacheController(object): # Check the max-age pragma in the cache control header if 'max-age' in resp_cc and resp_cc['max-age'].isdigit(): freshness_lifetime = int(resp_cc['max-age']) + logger.debug('Freshness lifetime from max-age: %i', + freshness_lifetime) # If there isn't a max-age, check for an expires header elif 'expires' in headers: @@ -149,11 +165,16 @@ class CacheController(object): if expires is not None: expire_time = calendar.timegm(expires) - date freshness_lifetime = max(0, expire_time) + logger.debug("Freshness lifetime from expires: %i", + freshness_lifetime) - # determine if we are setting freshness limit in the req + # Determine if we are setting freshness limit in the + # request. Note, this overrides what was in the response. if 'max-age' in cc: try: freshness_lifetime = int(cc['max-age']) + logger.debug('Freshness lifetime from request max-age: %i', + freshness_lifetime) except ValueError: freshness_lifetime = 0 @@ -164,15 +185,20 @@ class CacheController(object): min_fresh = 0 # adjust our current age by our min fresh current_age += min_fresh + logger.debug('Adjusted current age from min-fresh: %i', + current_age) - # see how fresh we actually are - fresh = (freshness_lifetime > current_age) - - if fresh: + # Return entry if it is fresh enough + if freshness_lifetime > current_age: + logger.debug('The response is "fresh", returning cached response') + logger.debug('%i > %i', freshness_lifetime, current_age) return resp # we're not fresh. If we don't have an Etag, clear it out if 'etag' not in headers: + logger.debug( + 'The cached response is "stale" with no etag, purging' + ) self.cache.delete(cache_url) # return the original handler @@ -202,23 +228,48 @@ class CacheController(object): """ # From httplib2: Don't cache 206's since we aren't going to # handle byte range requests - if response.status not in [200, 203, 300, 301]: + cacheable_status_codes = [200, 203, 300, 301] + if response.status not in cacheable_status_codes: + logger.debug( + 'Status code %s not in %s', + response.status, + cacheable_status_codes + ) return response_headers = CaseInsensitiveDict(response.headers) + # If we've been given a body, our response has a Content-Length, that + # Content-Length is valid then we can check to see if the body we've + # been given matches the expected size, and if it doesn't we'll just + # skip trying to cache it. + if (body is not None and + "content-length" in response_headers and + response_headers["content-length"].isdigit() and + int(response_headers["content-length"]) != len(body)): + return + cc_req = self.parse_cache_control(request.headers) cc = self.parse_cache_control(response_headers) cache_url = self.cache_url(request.url) + logger.debug('Updating cache with response from "%s"', cache_url) # Delete it from the cache if we happen to have it stored there - no_store = cc.get('no-store') or cc_req.get('no-store') + no_store = False + if cc.get('no-store'): + no_store = True + logger.debug('Response header has "no-store"') + if cc_req.get('no-store'): + no_store = True + logger.debug('Request header has "no-store"') if no_store and self.cache.get(cache_url): + logger.debug('Purging existing cache entry to honor "no-store"') self.cache.delete(cache_url) # If we've been given an etag, then keep the response if self.cache_etags and 'etag' in response_headers: + logger.debug('Caching due to etag') self.cache.set( cache_url, self.serializer.dumps(request, response, body=body), @@ -227,6 +278,7 @@ class CacheController(object): # Add to the cache any 301s. We do this before looking that # the Date headers. elif response.status == 301: + logger.debug('Caching permanant redirect') self.cache.set( cache_url, self.serializer.dumps(request, response) @@ -239,6 +291,7 @@ class CacheController(object): # cache when there is a max-age > 0 if cc and cc.get('max-age'): if int(cc['max-age']) > 0: + logger.debug('Caching b/c date exists and max-age > 0') self.cache.set( cache_url, self.serializer.dumps(request, response, body=body), @@ -248,6 +301,7 @@ class CacheController(object): # in the meantime. elif 'expires' in response_headers: if response_headers['expires']: + logger.debug('Caching b/c of expires header') self.cache.set( cache_url, self.serializer.dumps(request, response, body=body), diff --git a/pip/_vendor/cachecontrol/heuristics.py b/pip/_vendor/cachecontrol/heuristics.py index 01b631416..94715a4e7 100644 --- a/pip/_vendor/cachecontrol/heuristics.py +++ b/pip/_vendor/cachecontrol/heuristics.py @@ -40,10 +40,14 @@ class BaseHeuristic(object): return {} def apply(self, response): - warning_header_value = self.warning(response) - response.headers.update(self.update_headers(response)) - if warning_header_value is not None: - response.headers.update({'Warning': warning_header_value}) + updated_headers = self.update_headers(response) + + if updated_headers: + response.headers.update(updated_headers) + warning_header_value = self.warning(response) + if warning_header_value is not None: + response.headers.update({'Warning': warning_header_value}) + return response diff --git a/pip/_vendor/cachecontrol/serialize.py b/pip/_vendor/cachecontrol/serialize.py index e803a880a..ffbfbf6f8 100644 --- a/pip/_vendor/cachecontrol/serialize.py +++ b/pip/_vendor/cachecontrol/serialize.py @@ -5,7 +5,7 @@ import zlib from pip._vendor.requests.structures import CaseInsensitiveDict -from .compat import HTTPResponse, pickle +from .compat import HTTPResponse, pickle, text_type def _b64_encode_bytes(b): @@ -16,6 +16,12 @@ def _b64_encode_str(s): return _b64_encode_bytes(s.encode("utf8")) +def _b64_encode(s): + if isinstance(s, text_type): + return _b64_encode_str(s) + return _b64_encode_bytes(s) + + def _b64_decode_bytes(b): return base64.b64decode(b.encode("ascii")) @@ -48,7 +54,7 @@ class Serializer(object): "response": { "body": _b64_encode_bytes(body), "headers": dict( - (_b64_encode_str(k), _b64_encode_str(v)) + (_b64_encode(k), _b64_encode(v)) for k, v in response.headers.items() ), "status": response.status, @@ -69,7 +75,7 @@ class Serializer(object): # Encode our Vary headers to ensure they can be serialized as JSON data["vary"] = dict( - (_b64_encode_str(k), _b64_encode_str(v) if v is not None else v) + (_b64_encode(k), _b64_encode(v) if v is not None else v) for k, v in data["vary"].items() ) diff --git a/pip/_vendor/vendor.txt b/pip/_vendor/vendor.txt index b959735a9..72231f75a 100644 --- a/pip/_vendor/vendor.txt +++ b/pip/_vendor/vendor.txt @@ -3,7 +3,7 @@ html5lib==1.0b8 six==1.10.0 colorama==0.3.6 requests==2.9.1 -CacheControl==0.11.5 +CacheControl==0.11.6 lockfile==0.12.2 progress==1.2 ipaddress==1.0.16 # Only needed on 2.6 and 2.7