Fix link hashes

2023-12-13 21:30:23 +01:00 · 2022-06-25 15:37:43 -04:00 · 2022-06-25 15:37:43 -04:00 · 6f9ccfc136
parent d67cac3c13
commit 6f9ccfc136
3 changed files with 21 additions and 18 deletions
--- a/src/pip/_internal/index/collector.py
+++ b/src/pip/_internal/index/collector.py
@ -34,7 +34,7 @@ from pip._vendor.requests import Response
 from pip._vendor.requests.exceptions import RetryError, SSLError

 from pip._internal.exceptions import NetworkConnectionError
-from pip._internal.models.link import SUPPORTED_HASHES, Link
+from pip._internal.models.link import Link
 from pip._internal.models.search_scope import SearchScope
 from pip._internal.network.session import PipSession
 from pip._internal.network.utils import raise_for_status
@ -274,11 +274,6 @@ def _clean_link(url: str) -> str:
    return urllib.parse.urlunparse(result._replace(path=path))


-_HASH_RE = re.compile(
-    r"({choices})=([a-f0-9]+)".format(choices="|".join(SUPPORTED_HASHES))
-)
-
-
 def _create_link_from_element(
    element_attribs: Dict[str, Optional[str]],
    page_url: str,
@ -295,17 +290,11 @@ def _create_link_from_element(
    pyrequire = element_attribs.get("data-requires-python")
    yanked_reason = element_attribs.get("data-yanked")

-    hashes = {}
-    hm = _HASH_RE.search(url)
-    if hm is not None:
-        hashes[hm.group(1).lower()] = hm.group(2)
-
    link = Link(
        url,
        comes_from=page_url,
        requires_python=pyrequire,
        yanked_reason=yanked_reason,
-        hashes=hashes,
    )

    return link
--- a/src/pip/_internal/models/link.py
+++ b/src/pip/_internal/models/link.py
@ -33,7 +33,7 @@ logger = logging.getLogger(__name__)

 # Order matters, earlier hashes have a precedence over later hashes for what
 # we will pick to use.
-SUPPORTED_HASHES = ("sha512", "sha384", "sha256", "sha224", "sha1", "md5")
+_SUPPORTED_HASHES = ("sha512", "sha384", "sha256", "sha224", "sha1", "md5")


 class Link(KeyBasedCompareMixin):
@ -179,18 +179,32 @@ class Link(KeyBasedCompareMixin):
            return None
        return match.group(1)

+    _hash_re = re.compile(
+        r"({choices})=([a-f0-9]+)".format(choices="|".join(_SUPPORTED_HASHES))
+    )
+
    @property
    def hash(self) -> Optional[str]:
-        for hashname in SUPPORTED_HASHES:
+        for hashname in _SUPPORTED_HASHES:
            if hashname in self._hashes:
                return self._hashes[hashname]
+
+        match = self._hash_re.search(self._url)
+        if match:
+            return match.group(2)
+
        return None

    @property
    def hash_name(self) -> Optional[str]:
-        for hashname in SUPPORTED_HASHES:
+        for hashname in _SUPPORTED_HASHES:
            if hashname in self._hashes:
                return hashname
+
+        match = self._hash_re.search(self._url)
+        if match:
+            return match.group(1)
+
        return None

    @property
@ -284,7 +298,7 @@ def _clean_link(link: Link) -> _CleanResult:
        subdirectory = ""
    # If there are multiple hash values under the same algorithm, use the
    # first one. This matches the behavior of Link.hash_value.
-    hashes = {k: fragment[k][0] for k in SUPPORTED_HASHES if k in fragment}
+    hashes = {k: fragment[k][0] for k in _SUPPORTED_HASHES if k in fragment}
    return _CleanResult(
        parsed=parsed._replace(netloc=netloc, query="", fragment=""),
        query=urllib.parse.parse_qs(parsed.query),
--- a/tests/unit/test_collector.py
+++ b/tests/unit/test_collector.py
@ -201,7 +201,7 @@ def test_get_simple_response_no_head(
                "Cache-Control": "max-age=0",
            },
        ),
-        mock.call().headers.get("Content-Type", ""),
+        mock.call().headers.get("Content-Type", "Unknown"),
    ]
    mock_raise_for_status.assert_called_once_with(resp)

@ -667,7 +667,7 @@ def test_get_index_content_invalid_content_type(
    assert (
        "pip._internal.index.collector",
        logging.WARNING,
-        "Skipping page {} because the GET request got Content-Type: {}."
+        "Skipping page {} because the GET request got Content-Type: {}. "
        "The only supported Content-Types are application/vnd.pypi.simple.v1+json, "
        "application/vnd.pypi.simple.v1+html, and text/html'".format(url, content_type),
    ) in caplog.record_tuples