mirror of https://github.com/pypa/pip
New cache key generation algorithm
Instead of building an URL-ish string that could be complex to describe and reproduce, generate a dictionary that is hashed with a simple algorithm.
This commit is contained in:
parent
e0165e7b30
commit
c4ef6163e5
|
@ -22,13 +22,25 @@ from pip._internal.utils.typing import MYPY_CHECK_RUNNING
|
|||
from pip._internal.utils.urls import path_to_url
|
||||
|
||||
if MYPY_CHECK_RUNNING:
|
||||
from typing import Optional, Set, List, Any
|
||||
from typing import Optional, Set, List, Any, Dict
|
||||
from pip._internal.models.format_control import FormatControl
|
||||
from pip._internal.pep425tags import Pep425Tag
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _hash_dict(d):
|
||||
# type: (Dict[str, str]) -> str
|
||||
"""Return a sha224 of a dictionary where keys and values are strings."""
|
||||
h = hashlib.new('sha224')
|
||||
for k in sorted(d.keys()):
|
||||
h.update(k.encode())
|
||||
h.update("=".encode())
|
||||
h.update(d[k].encode())
|
||||
h.update(b"\0")
|
||||
return h.hexdigest()
|
||||
|
||||
|
||||
class Cache(object):
|
||||
"""An abstract class - provides cache directories for data from links
|
||||
|
||||
|
@ -58,32 +70,28 @@ class Cache(object):
|
|||
# We want to generate an url to use as our cache key, we don't want to
|
||||
# just re-use the URL because it might have other items in the fragment
|
||||
# and we don't care about those.
|
||||
key_parts = [link.url_without_fragment]
|
||||
key_parts = {"url": link.url_without_fragment}
|
||||
if link.hash_name is not None and link.hash is not None:
|
||||
key_parts.append("=".join([link.hash_name, link.hash]))
|
||||
key_parts[link.hash_name] = link.hash
|
||||
if link.subdirectory_fragment:
|
||||
key_parts.append(
|
||||
"=".join(["subdirectory", link.subdirectory_fragment])
|
||||
)
|
||||
key_url = "#".join(key_parts)
|
||||
key_parts["subdirectory"] = link.subdirectory_fragment
|
||||
|
||||
# Include interpreter name, major and minor version in cache key
|
||||
# to cope with ill-behaved sdists that build a different wheel
|
||||
# depending on the python version their setup.py is being run on,
|
||||
# and don't encode the difference in compatibility tags.
|
||||
# https://github.com/pypa/pip/issues/7296
|
||||
key = "{}-{}.{} {}".format(
|
||||
key_parts["interpreter"] = "{}-{}.{}".format(
|
||||
interpreter_name(),
|
||||
sys.version_info[0],
|
||||
sys.version_info[1],
|
||||
key_url,
|
||||
)
|
||||
|
||||
# Encode our key url with sha224, we'll use this because it has similar
|
||||
# security properties to sha256, but with a shorter total output (and
|
||||
# thus less secure). However the differences don't make a lot of
|
||||
# difference for our use case here.
|
||||
hashed = hashlib.sha224(key.encode()).hexdigest()
|
||||
hashed = _hash_dict(key_parts)
|
||||
|
||||
# We want to nest the directories some to prevent having a ton of top
|
||||
# level directories where we might run out of sub directories on some
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
import os
|
||||
|
||||
from pip._internal.cache import WheelCache
|
||||
from pip._internal.cache import WheelCache, _hash_dict
|
||||
from pip._internal.models.format_control import FormatControl
|
||||
from pip._internal.models.link import Link
|
||||
from pip._internal.utils.compat import expanduser
|
||||
|
@ -42,3 +42,10 @@ def test_wheel_name_filter(tmpdir):
|
|||
assert wc.get(link, "package", [("py3", "none", "any")]) is not link
|
||||
# package2 does not match wheel name
|
||||
assert wc.get(link, "package2", [("py3", "none", "any")]) is link
|
||||
|
||||
|
||||
def test_cache_hash():
|
||||
h = _hash_dict({"url": "https://g.c/o/r"})
|
||||
assert h == "c7d60d08b1079254d236e983501fa26c016d58d16010725b27ed0af2"
|
||||
h = _hash_dict({"url": "https://g.c/o/r", "subdirectory": "sd"})
|
||||
assert h == "9cba35d4ccf04b7cde751b44db347fd0f21fa47d1276e32f9d47864c"
|
||||
|
|
Loading…
Reference in New Issue