Use legacy cache entries when they exist.

Pip 20 changes the cache key format to include the
interpreter name. To avoid invalidating all existing caches,
we continue using existing cache entries that were computed
with the legacy algorithm. This should not regress issue #3025
because wheel cached in such legacy entries should have
the python implementation tag set.
This commit is contained in:
Stéphane Bidoul (ACSONE) 2019-11-17 11:44:20 +01:00
parent c4ef6163e5
commit 66ba51ca7d
No known key found for this signature in database
GPG Key ID: BCAB2555446B5B92
2 changed files with 70 additions and 8 deletions

View File

@ -4,7 +4,6 @@
# The following comment should be removed at some point in the future.
# mypy: strict-optional=False
import errno
import hashlib
import logging
import os
@ -62,6 +61,34 @@ class Cache(object):
_valid_formats = {"source", "binary"}
assert self.allowed_formats.union(_valid_formats) == _valid_formats
def _get_cache_path_parts_legacy(self, link):
# type: (Link) -> List[str]
"""Get parts of part that must be os.path.joined with cache_dir
Legacy cache key (pip < 20) for compatibility with older caches.
"""
# We want to generate an url to use as our cache key, we don't want to
# just re-use the URL because it might have other items in the fragment
# and we don't care about those.
key_parts = [link.url_without_fragment]
if link.hash_name is not None and link.hash is not None:
key_parts.append("=".join([link.hash_name, link.hash]))
key_url = "#".join(key_parts)
# Encode our key url with sha224, we'll use this because it has similar
# security properties to sha256, but with a shorter total output (and
# thus less secure). However the differences don't make a lot of
# difference for our use case here.
hashed = hashlib.sha224(key_url.encode()).hexdigest()
# We want to nest the directories some to prevent having a ton of top
# level directories where we might run out of sub directories on some
# FS.
parts = [hashed[:2], hashed[2:4], hashed[4:6], hashed[6:]]
return parts
def _get_cache_path_parts(self, link):
# type: (Link) -> List[str]
"""Get parts of part that must be os.path.joined with cache_dir
@ -116,13 +143,19 @@ class Cache(object):
if not self.allowed_formats.intersection(formats):
return []
root = self.get_path_for_link(link)
try:
return os.listdir(root)
except OSError as err:
if err.errno in {errno.ENOENT, errno.ENOTDIR}:
return []
raise
candidates = []
path = self.get_path_for_link(link)
if os.path.isdir(path):
candidates.extend(os.listdir(path))
# TODO remove legacy path lookup in pip>=21
legacy_path = self.get_path_for_link_legacy(link)
if os.path.isdir(legacy_path):
candidates.extend(os.listdir(legacy_path))
return candidates
def get_path_for_link_legacy(self, link):
# type: (Link) -> str
raise NotImplementedError()
def get_path_for_link(self, link):
# type: (Link) -> str
@ -164,6 +197,11 @@ class SimpleWheelCache(Cache):
cache_dir, format_control, {"binary"}
)
def get_path_for_link_legacy(self, link):
# type: (Link) -> str
parts = self._get_cache_path_parts_legacy(link)
return os.path.join(self.cache_dir, "wheels", *parts)
def get_path_for_link(self, link):
# type: (Link) -> str
"""Return a directory to store cached wheels for link
@ -256,6 +294,10 @@ class WheelCache(Cache):
self._wheel_cache = SimpleWheelCache(cache_dir, format_control)
self._ephem_cache = EphemWheelCache(format_control)
def get_path_for_link_legacy(self, link):
# type: (Link) -> str
return self._wheel_cache.get_path_for_link_legacy(link)
def get_path_for_link(self, link):
# type: (Link) -> str
return self._wheel_cache.get_path_for_link(link)

View File

@ -49,3 +49,23 @@ def test_cache_hash():
assert h == "c7d60d08b1079254d236e983501fa26c016d58d16010725b27ed0af2"
h = _hash_dict({"url": "https://g.c/o/r", "subdirectory": "sd"})
assert h == "9cba35d4ccf04b7cde751b44db347fd0f21fa47d1276e32f9d47864c"
def test_get_path_for_link_legacy(tmpdir):
"""
Test that an existing cache entry that was created with the legacy hashing
mechanism is used.
"""
wc = WheelCache(tmpdir, FormatControl())
link = Link("https://g.c/o/r")
path = wc.get_path_for_link(link)
legacy_path = wc.get_path_for_link_legacy(link)
assert path != legacy_path
ensure_dir(path)
with open(os.path.join(path, "test-pyz-none-any.whl"), "w"):
pass
ensure_dir(legacy_path)
with open(os.path.join(legacy_path, "test-pyx-none-any.whl"), "w"):
pass
expected_candidates = {"test-pyx-none-any.whl", "test-pyz-none-any.whl"}
assert set(wc._get_candidates(link, "test")) == expected_candidates