Use data-dist-info-metadata (PEP 658) to decouple resolution from downloading (#11111)

Co-authored-by: Tzu-ping Chung <uranusjr@gmail.com>
2023-12-13 21:30:23 +01:00 · 2022-09-10 10:28:57 +00:00 · 2022-09-10 10:28:57 +00:00 · bad03ef931
parent a66406a409
commit bad03ef931
14 changed files with 834 additions and 180 deletions
--- a/news/11111.feature.rst
+++ b/news/11111.feature.rst
@ -0,0 +1 @@
+Use the ``data-dist-info-metadata`` attribute from :pep:`658` to resolve distribution metadata without downloading the dist yet.
--- a/src/pip/_internal/exceptions.py
+++ b/src/pip/_internal/exceptions.py
@ -335,8 +335,8 @@ class MetadataInconsistent(InstallationError):
    """Built metadata contains inconsistent information.

    This is raised when the metadata contains values (e.g. name and version)
-    that do not match the information previously obtained from sdist filename
-    or user-supplied ``#egg=`` value.
+    that do not match the information previously obtained from sdist filename,
+    user-supplied ``#egg=`` value, or an install requirement name.
    """

    def __init__(
@ -348,11 +348,10 @@ class MetadataInconsistent(InstallationError):
        self.m_val = m_val

    def __str__(self) -> str:
-        template = (
-            "Requested {} has inconsistent {}: "
-            "filename has {!r}, but metadata has {!r}"
+        return (
+            f"Requested {self.ireq} has inconsistent {self.field}: "
+            f"expected {self.f_val!r}, but metadata has {self.m_val!r}"
        )
-        return template.format(self.ireq, self.field, self.f_val, self.m_val)


 class LegacyInstallFailure(DiagnosticPipError):
--- a/src/pip/_internal/index/collector.py
+++ b/src/pip/_internal/index/collector.py
@ -9,10 +9,8 @@ import itertools
 import json
 import logging
 import os
-import re
 import urllib.parse
 import urllib.request
-import xml.etree.ElementTree
 from html.parser import HTMLParser
 from optparse import Values
 from typing import (
@ -39,7 +37,7 @@ from pip._internal.models.search_scope import SearchScope
 from pip._internal.network.session import PipSession
 from pip._internal.network.utils import raise_for_status
 from pip._internal.utils.filetypes import is_archive_file
-from pip._internal.utils.misc import pairwise, redact_auth_from_url
+from pip._internal.utils.misc import redact_auth_from_url
 from pip._internal.vcs import vcs

 from .sources import CandidatesFromPage, LinkSource, build_source
@ -51,7 +49,6 @@ else:

 logger = logging.getLogger(__name__)

-HTMLElement = xml.etree.ElementTree.Element
 ResponseHeaders = MutableMapping[str, str]


@ -191,94 +188,6 @@ def _get_encoding_from_headers(headers: ResponseHeaders) -> Optional[str]:
    return None


-def _clean_url_path_part(part: str) -> str:
-    """
-    Clean a "part" of a URL path (i.e. after splitting on "@" characters).
-    """
-    # We unquote prior to quoting to make sure nothing is double quoted.
-    return urllib.parse.quote(urllib.parse.unquote(part))
-
-
-def _clean_file_url_path(part: str) -> str:
-    """
-    Clean the first part of a URL path that corresponds to a local
-    filesystem path (i.e. the first part after splitting on "@" characters).
-    """
-    # We unquote prior to quoting to make sure nothing is double quoted.
-    # Also, on Windows the path part might contain a drive letter which
-    # should not be quoted. On Linux where drive letters do not
-    # exist, the colon should be quoted. We rely on urllib.request
-    # to do the right thing here.
-    return urllib.request.pathname2url(urllib.request.url2pathname(part))
-
-
-# percent-encoded:                   /
-_reserved_chars_re = re.compile("(@|%2F)", re.IGNORECASE)
-
-
-def _clean_url_path(path: str, is_local_path: bool) -> str:
-    """
-    Clean the path portion of a URL.
-    """
-    if is_local_path:
-        clean_func = _clean_file_url_path
-    else:
-        clean_func = _clean_url_path_part
-
-    # Split on the reserved characters prior to cleaning so that
-    # revision strings in VCS URLs are properly preserved.
-    parts = _reserved_chars_re.split(path)
-
-    cleaned_parts = []
-    for to_clean, reserved in pairwise(itertools.chain(parts, [""])):
-        cleaned_parts.append(clean_func(to_clean))
-        # Normalize %xx escapes (e.g. %2f -> %2F)
-        cleaned_parts.append(reserved.upper())
-
-    return "".join(cleaned_parts)
-
-
-def _clean_link(url: str) -> str:
-    """
-    Make sure a link is fully quoted.
-    For example, if ' ' occurs in the URL, it will be replaced with "%20",
-    and without double-quoting other characters.
-    """
-    # Split the URL into parts according to the general structure
-    # `scheme://netloc/path;parameters?query#fragment`.
-    result = urllib.parse.urlparse(url)
-    # If the netloc is empty, then the URL refers to a local filesystem path.
-    is_local_path = not result.netloc
-    path = _clean_url_path(result.path, is_local_path=is_local_path)
-    return urllib.parse.urlunparse(result._replace(path=path))
-
-
-def _create_link_from_element(
-    element_attribs: Dict[str, Optional[str]],
-    page_url: str,
-    base_url: str,
-) -> Optional[Link]:
-    """
-    Convert an anchor element's attributes in a simple repository page to a Link.
-    """
-    href = element_attribs.get("href")
-    if not href:
-        return None
-
-    url = _clean_link(urllib.parse.urljoin(base_url, href))
-    pyrequire = element_attribs.get("data-requires-python")
-    yanked_reason = element_attribs.get("data-yanked")
-
-    link = Link(
-        url,
-        comes_from=page_url,
-        requires_python=pyrequire,
-        yanked_reason=yanked_reason,
-    )
-
-    return link
-
-
 class CacheablePageContent:
    def __init__(self, page: "IndexContent") -> None:
        assert page.cache_link_parsing
@ -326,25 +235,10 @@ def parse_links(page: "IndexContent") -> Iterable[Link]:
    if content_type_l.startswith("application/vnd.pypi.simple.v1+json"):
        data = json.loads(page.content)
        for file in data.get("files", []):
-            file_url = file.get("url")
-            if file_url is None:
+            link = Link.from_json(file, page.url)
+            if link is None:
                continue
-
-            # The Link.yanked_reason expects an empty string instead of a boolean.
-            yanked_reason = file.get("yanked")
-            if yanked_reason and not isinstance(yanked_reason, str):
-                yanked_reason = ""
-            # The Link.yanked_reason expects None instead of False
-            elif not yanked_reason:
-                yanked_reason = None
-
-            yield Link(
-                _clean_link(urllib.parse.urljoin(page.url, file_url)),
-                comes_from=page.url,
-                requires_python=file.get("requires-python"),
-                yanked_reason=yanked_reason,
-                hashes=file.get("hashes", {}),
-            )
+            yield link
        return

    parser = HTMLLinkParser(page.url)
@ -354,11 +248,7 @@ def parse_links(page: "IndexContent") -> Iterable[Link]:
    url = page.url
    base_url = parser.base_url or url
    for anchor in parser.anchors:
-        link = _create_link_from_element(
-            anchor,
-            page_url=url,
-            base_url=base_url,
-        )
+        link = Link.from_element(anchor, page_url=url, base_url=base_url)
        if link is None:
            continue
        yield link
--- a/src/pip/_internal/metadata/init.py
+++ b/src/pip/_internal/metadata/init.py
@ -103,3 +103,25 @@ def get_wheel_distribution(wheel: Wheel, canonical_name: str) -> BaseDistributio
    :param canonical_name: Normalized project name of the given wheel.
    """
    return select_backend().Distribution.from_wheel(wheel, canonical_name)
+
+
+def get_metadata_distribution(
+    metadata_contents: bytes,
+    filename: str,
+    canonical_name: str,
+) -> BaseDistribution:
+    """Get the dist representation of the specified METADATA file contents.
+
+    This returns a Distribution instance from the chosen backend sourced from the data
+    in `metadata_contents`.
+
+    :param metadata_contents: Contents of a METADATA file within a dist, or one served
+                              via PEP 658.
+    :param filename: Filename for the dist this metadata represents.
+    :param canonical_name: Normalized project name of the given dist.
+    """
+    return select_backend().Distribution.from_metadata_file_contents(
+        metadata_contents,
+        filename,
+        canonical_name,
+    )
--- a/src/pip/_internal/metadata/base.py
+++ b/src/pip/_internal/metadata/base.py
@ -113,6 +113,24 @@ class BaseDistribution(Protocol):
        """
        raise NotImplementedError()

+    @classmethod
+    def from_metadata_file_contents(
+        cls,
+        metadata_contents: bytes,
+        filename: str,
+        project_name: str,
+    ) -> "BaseDistribution":
+        """Load the distribution from the contents of a METADATA file.
+
+        This is used to implement PEP 658 by generating a "shallow" dist object that can
+        be used for resolution without downloading or building the actual dist yet.
+
+        :param metadata_contents: The contents of a METADATA file.
+        :param filename: File name for the dist with this metadata.
+        :param project_name: Name of the project this dist represents.
+        """
+        raise NotImplementedError()
+
    @classmethod
    def from_wheel(cls, wheel: "Wheel", name: str) -> "BaseDistribution":
        """Load the distribution from a given wheel.
--- a/src/pip/_internal/metadata/importlib/_dists.py
+++ b/src/pip/_internal/metadata/importlib/_dists.py
@ -28,6 +28,7 @@ from pip._internal.metadata.base import (
 )
 from pip._internal.utils.misc import normalize_path
 from pip._internal.utils.packaging import safe_extra
+from pip._internal.utils.temp_dir import TempDirectory
 from pip._internal.utils.wheel import parse_wheel, read_wheel_metadata_file

 from ._compat import BasePath, get_dist_name
@ -109,6 +110,23 @@ class Distribution(BaseDistribution):
        dist = importlib.metadata.Distribution.at(info_location)
        return cls(dist, info_location, info_location.parent)

+    @classmethod
+    def from_metadata_file_contents(
+        cls,
+        metadata_contents: bytes,
+        filename: str,
+        project_name: str,
+    ) -> BaseDistribution:
+        # Generate temp dir to contain the metadata file, and write the file contents.
+        temp_dir = pathlib.Path(
+            TempDirectory(kind="metadata", globally_managed=True).path
+        )
+        metadata_path = temp_dir / "METADATA"
+        metadata_path.write_bytes(metadata_contents)
+        # Construct dist pointing to the newly created directory.
+        dist = importlib.metadata.Distribution.at(metadata_path.parent)
+        return cls(dist, metadata_path.parent, None)
+
    @classmethod
    def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution:
        try:
--- a/src/pip/_internal/metadata/pkg_resources.py
+++ b/src/pip/_internal/metadata/pkg_resources.py
@ -33,7 +33,7 @@ class EntryPoint(NamedTuple):
    group: str


-class WheelMetadata:
+class InMemoryMetadata:
    """IMetadataProvider that reads metadata files from a dictionary.

    This also maps metadata decoding exceptions to our internal exception type.
@ -92,12 +92,29 @@ class Distribution(BaseDistribution):
        dist = dist_cls(base_dir, project_name=dist_name, metadata=metadata)
        return cls(dist)

+    @classmethod
+    def from_metadata_file_contents(
+        cls,
+        metadata_contents: bytes,
+        filename: str,
+        project_name: str,
+    ) -> BaseDistribution:
+        metadata_dict = {
+            "METADATA": metadata_contents,
+        }
+        dist = pkg_resources.DistInfoDistribution(
+            location=filename,
+            metadata=InMemoryMetadata(metadata_dict, filename),
+            project_name=project_name,
+        )
+        return cls(dist)
+
    @classmethod
    def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution:
        try:
            with wheel.as_zipfile() as zf:
                info_dir, _ = parse_wheel(zf, name)
-                metadata_text = {
+                metadata_dict = {
                    path.split("/", 1)[-1]: read_wheel_metadata_file(zf, path)
                    for path in zf.namelist()
                    if path.startswith(f"{info_dir}/")
@ -108,7 +125,7 @@ class Distribution(BaseDistribution):
            raise UnsupportedWheel(f"{name} has an invalid wheel, {e}")
        dist = pkg_resources.DistInfoDistribution(
            location=wheel.location,
-            metadata=WheelMetadata(metadata_text, wheel.location),
+            metadata=InMemoryMetadata(metadata_dict, wheel.location),
            project_name=name,
        )
        return cls(dist)
--- a/src/pip/_internal/models/link.py
+++ b/src/pip/_internal/models/link.py
@ -1,11 +1,14 @@
 import functools
+import itertools
 import logging
 import os
 import posixpath
 import re
 import urllib.parse
+from dataclasses import dataclass
 from typing import (
    TYPE_CHECKING,
+    Any,
    Dict,
    List,
    Mapping,
@ -18,6 +21,7 @@ from typing import (
 from pip._internal.utils.filetypes import WHEEL_EXTENSION
 from pip._internal.utils.hashes import Hashes
 from pip._internal.utils.misc import (
+    pairwise,
    redact_auth_from_url,
    split_auth_from_netloc,
    splitext,
@ -36,6 +40,119 @@ logger = logging.getLogger(__name__)
 _SUPPORTED_HASHES = ("sha512", "sha384", "sha256", "sha224", "sha1", "md5")


+@dataclass(frozen=True)
+class LinkHash:
+    """Links to content may have embedded hash values. This class parses those.
+
+    `name` must be any member of `_SUPPORTED_HASHES`.
+
+    This class can be converted to and from `ArchiveInfo`. While ArchiveInfo intends to
+    be JSON-serializable to conform to PEP 610, this class contains the logic for
+    parsing a hash name and value for correctness, and then checking whether that hash
+    conforms to a schema with `.is_hash_allowed()`."""
+
+    name: str
+    value: str
+
+    _hash_re = re.compile(
+        # NB: we do not validate that the second group (.*) is a valid hex
+        # digest. Instead, we simply keep that string in this class, and then check it
+        # against Hashes when hash-checking is needed. This is easier to debug than
+        # proactively discarding an invalid hex digest, as we handle incorrect hashes
+        # and malformed hashes in the same place.
+        r"({choices})=(.*)".format(
+            choices="|".join(re.escape(hash_name) for hash_name in _SUPPORTED_HASHES)
+        ),
+    )
+
+    def __post_init__(self) -> None:
+        assert self._hash_re.match(f"{self.name}={self.value}")
+
+    @classmethod
+    @functools.lru_cache(maxsize=None)
+    def split_hash_name_and_value(cls, url: str) -> Optional["LinkHash"]:
+        """Search a string for a checksum algorithm name and encoded output value."""
+        match = cls._hash_re.search(url)
+        if match is None:
+            return None
+        name, value = match.groups()
+        return cls(name=name, value=value)
+
+    def as_hashes(self) -> Hashes:
+        """Return a Hashes instance which checks only for the current hash."""
+        return Hashes({self.name: [self.value]})
+
+    def is_hash_allowed(self, hashes: Optional[Hashes]) -> bool:
+        """
+        Return True if the current hash is allowed by `hashes`.
+        """
+        if hashes is None:
+            return False
+        return hashes.is_hash_allowed(self.name, hex_digest=self.value)
+
+
+def _clean_url_path_part(part: str) -> str:
+    """
+    Clean a "part" of a URL path (i.e. after splitting on "@" characters).
+    """
+    # We unquote prior to quoting to make sure nothing is double quoted.
+    return urllib.parse.quote(urllib.parse.unquote(part))
+
+
+def _clean_file_url_path(part: str) -> str:
+    """
+    Clean the first part of a URL path that corresponds to a local
+    filesystem path (i.e. the first part after splitting on "@" characters).
+    """
+    # We unquote prior to quoting to make sure nothing is double quoted.
+    # Also, on Windows the path part might contain a drive letter which
+    # should not be quoted. On Linux where drive letters do not
+    # exist, the colon should be quoted. We rely on urllib.request
+    # to do the right thing here.
+    return urllib.request.pathname2url(urllib.request.url2pathname(part))
+
+
+# percent-encoded:                   /
+_reserved_chars_re = re.compile("(@|%2F)", re.IGNORECASE)
+
+
+def _clean_url_path(path: str, is_local_path: bool) -> str:
+    """
+    Clean the path portion of a URL.
+    """
+    if is_local_path:
+        clean_func = _clean_file_url_path
+    else:
+        clean_func = _clean_url_path_part
+
+    # Split on the reserved characters prior to cleaning so that
+    # revision strings in VCS URLs are properly preserved.
+    parts = _reserved_chars_re.split(path)
+
+    cleaned_parts = []
+    for to_clean, reserved in pairwise(itertools.chain(parts, [""])):
+        cleaned_parts.append(clean_func(to_clean))
+        # Normalize %xx escapes (e.g. %2f -> %2F)
+        cleaned_parts.append(reserved.upper())
+
+    return "".join(cleaned_parts)
+
+
+def _ensure_quoted_url(url: str) -> str:
+    """
+    Make sure a link is fully quoted.
+    For example, if ' ' occurs in the URL, it will be replaced with "%20",
+    and without double-quoting other characters.
+    """
+    # Split the URL into parts according to the general structure
+    # `scheme://netloc/path;parameters?query#fragment`.
+    result = urllib.parse.urlparse(url)
+    # If the netloc is empty, then the URL refers to a local filesystem path.
+    is_local_path = not result.netloc
+    path = _clean_url_path(result.path, is_local_path=is_local_path)
+    return urllib.parse.urlunparse(result._replace(path=path))
+
+
 class Link(KeyBasedCompareMixin):
    """Represents a parsed link from a Package Index's simple URL"""

@ -46,6 +163,8 @@ class Link(KeyBasedCompareMixin):
        "comes_from",
        "requires_python",
        "yanked_reason",
+        "dist_info_metadata",
+        "link_hash",
        "cache_link_parsing",
    ]

@ -55,6 +174,8 @@ class Link(KeyBasedCompareMixin):
        comes_from: Optional[Union[str, "IndexContent"]] = None,
        requires_python: Optional[str] = None,
        yanked_reason: Optional[str] = None,
+        dist_info_metadata: Optional[str] = None,
+        link_hash: Optional[LinkHash] = None,
        cache_link_parsing: bool = True,
        hashes: Optional[Mapping[str, str]] = None,
    ) -> None:
@ -72,6 +193,14 @@ class Link(KeyBasedCompareMixin):
            a simple repository HTML link. If the file has been yanked but
            no reason was provided, this should be the empty string. See
            PEP 592 for more information and the specification.
+        :param dist_info_metadata: the metadata attached to the file, or None if no such
+            metadata is provided. This is the value of the "data-dist-info-metadata"
+            attribute, if present, in a simple repository HTML link. This may be parsed
+            into its own `Link` by `self.metadata_link()`. See PEP 658 for more
+            information and the specification.
+        :param link_hash: a checksum for the content the link points to. If not
+            provided, this will be extracted from the link URL, if the URL has
+            any checksum.
        :param cache_link_parsing: A flag that is used elsewhere to determine
                                   whether resources retrieved from this link
                                   should be cached. PyPI index urls should
@ -94,11 +223,75 @@ class Link(KeyBasedCompareMixin):
        self.comes_from = comes_from
        self.requires_python = requires_python if requires_python else None
        self.yanked_reason = yanked_reason
+        self.dist_info_metadata = dist_info_metadata
+        self.link_hash = link_hash or LinkHash.split_hash_name_and_value(self._url)

        super().__init__(key=url, defining_class=Link)

        self.cache_link_parsing = cache_link_parsing

+    @classmethod
+    def from_json(
+        cls,
+        file_data: Dict[str, Any],
+        page_url: str,
+    ) -> Optional["Link"]:
+        """
+        Convert an pypi json document from a simple repository page into a Link.
+        """
+        file_url = file_data.get("url")
+        if file_url is None:
+            return None
+
+        url = _ensure_quoted_url(urllib.parse.urljoin(page_url, file_url))
+        pyrequire = file_data.get("requires-python")
+        yanked_reason = file_data.get("yanked")
+        dist_info_metadata = file_data.get("dist-info-metadata")
+        hashes = file_data.get("hashes", {})
+
+        # The Link.yanked_reason expects an empty string instead of a boolean.
+        if yanked_reason and not isinstance(yanked_reason, str):
+            yanked_reason = ""
+        # The Link.yanked_reason expects None instead of False.
+        elif not yanked_reason:
+            yanked_reason = None
+
+        return cls(
+            url,
+            comes_from=page_url,
+            requires_python=pyrequire,
+            yanked_reason=yanked_reason,
+            hashes=hashes,
+            dist_info_metadata=dist_info_metadata,
+        )
+
+    @classmethod
+    def from_element(
+        cls,
+        anchor_attribs: Dict[str, Optional[str]],
+        page_url: str,
+        base_url: str,
+    ) -> Optional["Link"]:
+        """
+        Convert an anchor element's attributes in a simple repository page to a Link.
+        """
+        href = anchor_attribs.get("href")
+        if not href:
+            return None
+
+        url = _ensure_quoted_url(urllib.parse.urljoin(base_url, href))
+        pyrequire = anchor_attribs.get("data-requires-python")
+        yanked_reason = anchor_attribs.get("data-yanked")
+        dist_info_metadata = anchor_attribs.get("data-dist-info-metadata")
+
+        return cls(
+            url,
+            comes_from=page_url,
+            requires_python=pyrequire,
+            yanked_reason=yanked_reason,
+            dist_info_metadata=dist_info_metadata,
+        )
+
    def __str__(self) -> str:
        if self.requires_python:
            rp = f" (requires-python:{self.requires_python})"
@ -181,32 +374,36 @@ class Link(KeyBasedCompareMixin):
            return None
        return match.group(1)

-    _hash_re = re.compile(
-        r"({choices})=([a-f0-9]+)".format(choices="|".join(_SUPPORTED_HASHES))
-    )
+    def metadata_link(self) -> Optional["Link"]:
+        """Implementation of PEP 658 parsing."""
+        # Note that Link.from_element() parsing the "data-dist-info-metadata" attribute
+        # from an HTML anchor tag is typically how the Link.dist_info_metadata attribute
+        # gets set.
+        if self.dist_info_metadata is None:
+            return None
+        metadata_url = f"{self.url_without_fragment}.metadata"
+        link_hash: Optional[LinkHash] = None
+        # If data-dist-info-metadata="true" is set, then the metadata file exists,
+        # but there is no information about its checksum or anything else.
+        if self.dist_info_metadata != "true":
+            link_hash = LinkHash.split_hash_name_and_value(self.dist_info_metadata)
+        return Link(metadata_url, link_hash=link_hash)
+
+    def as_hashes(self) -> Optional[Hashes]:
+        if self.link_hash is not None:
+            return self.link_hash.as_hashes()
+        return None

    @property
    def hash(self) -> Optional[str]:
-        for hashname in _SUPPORTED_HASHES:
-            if hashname in self._hashes:
-                return self._hashes[hashname]
-
-        match = self._hash_re.search(self._url)
-        if match:
-            return match.group(2)
-
+        if self.link_hash is not None:
+            return self.link_hash.value
        return None

    @property
    def hash_name(self) -> Optional[str]:
-        for hashname in _SUPPORTED_HASHES:
-            if hashname in self._hashes:
-                return hashname
-
-        match = self._hash_re.search(self._url)
-        if match:
-            return match.group(1)
-
+        if self.link_hash is not None:
+            return self.link_hash.name
        return None

    @property
@ -236,19 +433,15 @@ class Link(KeyBasedCompareMixin):

    @property
    def has_hash(self) -> bool:
-        return self.hash_name is not None
+        return self.link_hash is not None

    def is_hash_allowed(self, hashes: Optional[Hashes]) -> bool:
        """
-        Return True if the link has a hash and it is allowed.
+        Return True if the link has a hash and it is allowed by `hashes`.
        """
-        if hashes is None or not self.has_hash:
+        if self.link_hash is None:
            return False
-        # Assert non-None so mypy knows self.hash_name and self.hash are str.
-        assert self.hash_name is not None
-        assert self.hash is not None
-
-        return hashes.is_hash_allowed(self.hash_name, hex_digest=self.hash)
+        return self.link_hash.is_hash_allowed(hashes)


 class _CleanResult(NamedTuple):
--- a/src/pip/_internal/operations/prepare.py
+++ b/src/pip/_internal/operations/prepare.py
@ -19,12 +19,13 @@ from pip._internal.exceptions import (
    HashMismatch,
    HashUnpinned,
    InstallationError,
+    MetadataInconsistent,
    NetworkConnectionError,
    PreviousBuildDirError,
    VcsHashUnsupported,
 )
 from pip._internal.index.package_finder import PackageFinder
-from pip._internal.metadata import BaseDistribution
+from pip._internal.metadata import BaseDistribution, get_metadata_distribution
 from pip._internal.models.direct_url import ArchiveInfo
 from pip._internal.models.link import Link
 from pip._internal.models.wheel import Wheel
@ -346,19 +347,72 @@ class RequirementPreparer:
        # showing the user what the hash should be.
        return req.hashes(trust_internet=False) or MissingHashes()

+    def _fetch_metadata_only(
+        self,
+        req: InstallRequirement,
+    ) -> Optional[BaseDistribution]:
+        if self.require_hashes:
+            logger.debug(
+                "Metadata-only fetching is not used as hash checking is required",
+            )
+            return None
+        # Try PEP 658 metadata first, then fall back to lazy wheel if unavailable.
+        return self._fetch_metadata_using_link_data_attr(
+            req
+        ) or self._fetch_metadata_using_lazy_wheel(req.link)
+
+    def _fetch_metadata_using_link_data_attr(
+        self,
+        req: InstallRequirement,
+    ) -> Optional[BaseDistribution]:
+        """Fetch metadata from the data-dist-info-metadata attribute, if possible."""
+        # (1) Get the link to the metadata file, if provided by the backend.
+        metadata_link = req.link.metadata_link()
+        if metadata_link is None:
+            return None
+        assert req.req is not None
+        logger.info(
+            "Obtaining dependency information for %s from %s",
+            req.req,
+            metadata_link,
+        )
+        # (2) Download the contents of the METADATA file, separate from the dist itself.
+        metadata_file = get_http_url(
+            metadata_link,
+            self._download,
+            hashes=metadata_link.as_hashes(),
+        )
+        with open(metadata_file.path, "rb") as f:
+            metadata_contents = f.read()
+        # (3) Generate a dist just from those file contents.
+        metadata_dist = get_metadata_distribution(
+            metadata_contents,
+            req.link.filename,
+            req.req.name,
+        )
+        # (4) Ensure the Name: field from the METADATA file matches the name from the
+        #     install requirement.
+        #
+        #     NB: raw_name will fall back to the name from the install requirement if
+        #     the Name: field is not present, but it's noted in the raw_name docstring
+        #     that that should NEVER happen anyway.
+        if metadata_dist.raw_name != req.req.name:
+            raise MetadataInconsistent(
+                req, "Name", req.req.name, metadata_dist.raw_name
+            )
+        return metadata_dist
+
    def _fetch_metadata_using_lazy_wheel(
        self,
        link: Link,
    ) -> Optional[BaseDistribution]:
        """Fetch metadata using lazy wheel, if possible."""
+        # --use-feature=fast-deps must be provided.
        if not self.use_lazy_wheel:
            return None
-        if self.require_hashes:
-            logger.debug("Lazy wheel is not used as hash checking is required")
-            return None
        if link.is_file or not link.is_wheel:
            logger.debug(
-                "Lazy wheel is not used as %r does not points to a remote wheel",
+                "Lazy wheel is not used as %r does not point to a remote wheel",
                link,
            )
            return None
@ -414,13 +468,12 @@ class RequirementPreparer:
    ) -> BaseDistribution:
        """Prepare a requirement to be obtained from req.link."""
        assert req.link
-        link = req.link
        self._log_preparing_link(req)
        with indent_log():
            # Check if the relevant file is already available
            # in the download directory
            file_path = None
-            if self.download_dir is not None and link.is_wheel:
+            if self.download_dir is not None and req.link.is_wheel:
                hashes = self._get_linked_req_hashes(req)
                file_path = _check_download_dir(req.link, self.download_dir, hashes)

@ -429,10 +482,10 @@ class RequirementPreparer:
                self._downloaded[req.link.url] = file_path
            else:
                # The file is not available, attempt to fetch only metadata
-                wheel_dist = self._fetch_metadata_using_lazy_wheel(link)
-                if wheel_dist is not None:
+                metadata_dist = self._fetch_metadata_only(req)
+                if metadata_dist is not None:
                    req.needs_more_preparation = True
-                    return wheel_dist
+                    return metadata_dist

            # None of the optimizations worked, fully prepare the requirement
            return self._prepare_linked_requirement(req, parallel_builds)
--- a/tests/functional/test_download.py
+++ b/tests/functional/test_download.py
@ -1,17 +1,24 @@
 import os
+import re
 import shutil
 import textwrap
+import uuid
+from dataclasses import dataclass
+from enum import Enum
 from hashlib import sha256
 from pathlib import Path
-from typing import List
+from textwrap import dedent
+from typing import Callable, Dict, List, Tuple

 import pytest

 from pip._internal.cli.status_codes import ERROR
+from pip._internal.utils.urls import path_to_url
 from tests.conftest import MockServer, ScriptFactory
 from tests.lib import (
    PipTestEnvironment,
    TestData,
+    TestPipResult,
    create_basic_sdist_for_package,
    create_really_basic_wheel,
 )
@ -1230,3 +1237,349 @@ def test_download_use_pep517_propagation(

    downloads = os.listdir(download_dir)
    assert len(downloads) == 2
+
+
+class MetadataKind(Enum):
+    """All the types of values we might be provided for the data-dist-info-metadata
+    attribute from PEP 658."""
+
+    # Valid: will read metadata from the dist instead.
+    No = "none"
+    # Valid: will read the .metadata file, but won't check its hash.
+    Unhashed = "unhashed"
+    # Valid: will read the .metadata file and check its hash matches.
+    Sha256 = "sha256"
+    # Invalid: will error out after checking the hash.
+    WrongHash = "wrong-hash"
+    # Invalid: will error out after failing to fetch the .metadata file.
+    NoFile = "no-file"
+
+
+@dataclass(frozen=True)
+class Package:
+    """Mock package structure used to generate a PyPI repository.
+
+    Package name and version should correspond to sdists (.tar.gz files) in our test
+    data."""
+
+    name: str
+    version: str
+    filename: str
+    metadata: MetadataKind
+    # This will override any dependencies specified in the actual dist's METADATA.
+    requires_dist: Tuple[str, ...] = ()
+
+    def metadata_filename(self) -> str:
+        """This is specified by PEP 658."""
+        return f"{self.filename}.metadata"
+
+    def generate_additional_tag(self) -> str:
+        """This gets injected into the <a> tag in the generated PyPI index page for this
+        package."""
+        if self.metadata == MetadataKind.No:
+            return ""
+        if self.metadata in [MetadataKind.Unhashed, MetadataKind.NoFile]:
+            return 'data-dist-info-metadata="true"'
+        if self.metadata == MetadataKind.WrongHash:
+            return 'data-dist-info-metadata="sha256=WRONG-HASH"'
+        assert self.metadata == MetadataKind.Sha256
+        checksum = sha256(self.generate_metadata()).hexdigest()
+        return f'data-dist-info-metadata="sha256={checksum}"'
+
+    def requires_str(self) -> str:
+        if not self.requires_dist:
+            return ""
+        joined = " and ".join(self.requires_dist)
+        return f"Requires-Dist: {joined}"
+
+    def generate_metadata(self) -> bytes:
+        """This is written to `self.metadata_filename()` and will override the actual
+        dist's METADATA, unless `self.metadata == MetadataKind.NoFile`."""
+        return dedent(
+            f"""\
+        Metadata-Version: 2.1
+        Name: {self.name}
+        Version: {self.version}
+        {self.requires_str()}
+        """
+        ).encode("utf-8")
+
+
+@pytest.fixture(scope="function")
+def write_index_html_content(tmpdir: Path) -> Callable[[str], Path]:
+    """Generate a PyPI package index.html within a temporary local directory."""
+    html_dir = tmpdir / "index_html_content"
+    html_dir.mkdir()
+
+    def generate_index_html_subdir(index_html: str) -> Path:
+        """Create a new subdirectory after a UUID and write an index.html."""
+        new_subdir = html_dir / uuid.uuid4().hex
+        new_subdir.mkdir()
+
+        with open(new_subdir / "index.html", "w") as f:
+            f.write(index_html)
+
+        return new_subdir
+
+    return generate_index_html_subdir
+
+
+@pytest.fixture(scope="function")
+def html_index_for_packages(
+    shared_data: TestData,
+    write_index_html_content: Callable[[str], Path],
+) -> Callable[..., Path]:
+    """Generate a PyPI HTML package index within a local directory pointing to
+    blank data."""
+
+    def generate_html_index_for_packages(packages: Dict[str, List[Package]]) -> Path:
+        """
+        Produce a PyPI directory structure pointing to the specified packages.
+        """
+        # (1) Generate the content for a PyPI index.html.
+        pkg_links = "\n".join(
+            f'    <a href="{pkg}/index.html">{pkg}</a>' for pkg in packages.keys()
+        )
+        index_html = f"""\
+<!DOCTYPE html>
+<html>
+  <head>
+    <meta name="pypi:repository-version" content="1.0">
+    <title>Simple index</title>
+  </head>
+  <body>
+{pkg_links}
+  </body>
+</html>"""
+        # (2) Generate the index.html in a new subdirectory of the temp directory.
+        index_html_subdir = write_index_html_content(index_html)
+
+        # (3) Generate subdirectories for individual packages, each with their own
+        # index.html.
+        for pkg, links in packages.items():
+            pkg_subdir = index_html_subdir / pkg
+            pkg_subdir.mkdir()
+
+            download_links: List[str] = []
+            for package_link in links:
+                # (3.1) Generate the <a> tag which pip can crawl pointing to this
+                # specific package version.
+                download_links.append(
+                    f'    <a href="{package_link.filename}" {package_link.generate_additional_tag()}>{package_link.filename}</a><br/>'  # noqa: E501
+                )
+                # (3.2) Copy over the corresponding file in `shared_data.packages`.
+                shutil.copy(
+                    shared_data.packages / package_link.filename,
+                    pkg_subdir / package_link.filename,
+                )
+                # (3.3) Write a metadata file, if applicable.
+                if package_link.metadata != MetadataKind.NoFile:
+                    with open(pkg_subdir / package_link.metadata_filename(), "wb") as f:
+                        f.write(package_link.generate_metadata())
+
+            # (3.4) After collating all the download links and copying over the files,
+            # write an index.html with the generated download links for each
+            # copied file for this specific package name.
+            download_links_str = "\n".join(download_links)
+            pkg_index_content = f"""\
+<!DOCTYPE html>
+<html>
+  <head>
+    <meta name="pypi:repository-version" content="1.0">
+    <title>Links for {pkg}</title>
+  </head>
+  <body>
+    <h1>Links for {pkg}</h1>
+{download_links_str}
+  </body>
+</html>"""
+            with open(pkg_subdir / "index.html", "w") as f:
+                f.write(pkg_index_content)
+
+        return index_html_subdir
+
+    return generate_html_index_for_packages
+
+
+@pytest.fixture(scope="function")
+def download_generated_html_index(
+    script: PipTestEnvironment,
+    html_index_for_packages: Callable[[Dict[str, List[Package]]], Path],
+    tmpdir: Path,
+) -> Callable[..., Tuple[TestPipResult, Path]]:
+    """Execute `pip download` against a generated PyPI index."""
+    download_dir = tmpdir / "download_dir"
+
+    def run_for_generated_index(
+        packages: Dict[str, List[Package]],
+        args: List[str],
+        allow_error: bool = False,
+    ) -> Tuple[TestPipResult, Path]:
+        """
+        Produce a PyPI directory structure pointing to the specified packages, then
+        execute `pip download -i ...` pointing to our generated index.
+        """
+        index_dir = html_index_for_packages(packages)
+        pip_args = [
+            "download",
+            "-d",
+            str(download_dir),
+            "-i",
+            path_to_url(str(index_dir)),
+            *args,
+        ]
+        result = script.pip(*pip_args, allow_error=allow_error)
+        return (result, download_dir)
+
+    return run_for_generated_index
+
+
+# The package database we generate for testing PEP 658 support.
+_simple_packages: Dict[str, List[Package]] = {
+    "simple": [
+        Package("simple", "1.0", "simple-1.0.tar.gz", MetadataKind.Sha256),
+        Package("simple", "2.0", "simple-2.0.tar.gz", MetadataKind.No),
+        # This will raise a hashing error.
+        Package("simple", "3.0", "simple-3.0.tar.gz", MetadataKind.WrongHash),
+    ],
+    "simple2": [
+        # Override the dependencies here in order to force pip to download
+        # simple-1.0.tar.gz as well.
+        Package(
+            "simple2",
+            "1.0",
+            "simple2-1.0.tar.gz",
+            MetadataKind.Unhashed,
+            ("simple==1.0",),
+        ),
+        # This will raise an error when pip attempts to fetch the metadata file.
+        Package("simple2", "2.0", "simple2-2.0.tar.gz", MetadataKind.NoFile),
+    ],
+    "colander": [
+        # Ensure we can read the dependencies from a metadata file within a wheel
+        # *without* PEP 658 metadata.
+        Package(
+            "colander", "0.9.9", "colander-0.9.9-py2.py3-none-any.whl", MetadataKind.No
+        ),
+    ],
+    "compilewheel": [
+        # Ensure we can override the dependencies of a wheel file by injecting PEP
+        # 658 metadata.
+        Package(
+            "compilewheel",
+            "1.0",
+            "compilewheel-1.0-py2.py3-none-any.whl",
+            MetadataKind.Unhashed,
+            ("simple==1.0",),
+        ),
+    ],
+    "has-script": [
+        # Ensure we check PEP 658 metadata hashing errors for wheel files.
+        Package(
+            "has-script",
+            "1.0",
+            "has.script-1.0-py2.py3-none-any.whl",
+            MetadataKind.WrongHash,
+        ),
+    ],
+    "translationstring": [
+        Package(
+            "translationstring", "1.1", "translationstring-1.1.tar.gz", MetadataKind.No
+        ),
+    ],
+    "priority": [
+        # Ensure we check for a missing metadata file for wheels.
+        Package(
+            "priority", "1.0", "priority-1.0-py2.py3-none-any.whl", MetadataKind.NoFile
+        ),
+    ],
+}
+
+
+@pytest.mark.parametrize(
+    "requirement_to_download, expected_outputs",
+    [
+        ("simple2==1.0", ["simple-1.0.tar.gz", "simple2-1.0.tar.gz"]),
+        ("simple==2.0", ["simple-2.0.tar.gz"]),
+        (
+            "colander",
+            ["colander-0.9.9-py2.py3-none-any.whl", "translationstring-1.1.tar.gz"],
+        ),
+        (
+            "compilewheel",
+            ["compilewheel-1.0-py2.py3-none-any.whl", "simple-1.0.tar.gz"],
+        ),
+    ],
+)
+def test_download_metadata(
+    download_generated_html_index: Callable[..., Tuple[TestPipResult, Path]],
+    requirement_to_download: str,
+    expected_outputs: List[str],
+) -> None:
+    """Verify that if a data-dist-info-metadata attribute is present, then it is used
+    instead of the actual dist's METADATA."""
+    _, download_dir = download_generated_html_index(
+        _simple_packages,
+        [requirement_to_download],
+    )
+    assert sorted(os.listdir(download_dir)) == expected_outputs
+
+
+@pytest.mark.parametrize(
+    "requirement_to_download, real_hash",
+    [
+        (
+            "simple==3.0",
+            "95e0f200b6302989bcf2cead9465cf229168295ea330ca30d1ffeab5c0fed996",
+        ),
+        (
+            "has-script",
+            "16ba92d7f6f992f6de5ecb7d58c914675cf21f57f8e674fb29dcb4f4c9507e5b",
+        ),
+    ],
+)
+def test_incorrect_metadata_hash(
+    download_generated_html_index: Callable[..., Tuple[TestPipResult, Path]],
+    requirement_to_download: str,
+    real_hash: str,
+) -> None:
+    """Verify that if a hash for data-dist-info-metadata is provided, it must match the
+    actual hash of the metadata file."""
+    result, _ = download_generated_html_index(
+        _simple_packages,
+        [requirement_to_download],
+        allow_error=True,
+    )
+    assert result.returncode != 0
+    expected_msg = f"""\
+        Expected sha256 WRONG-HASH
+             Got        {real_hash}"""
+    assert expected_msg in result.stderr
+
+
+@pytest.mark.parametrize(
+    "requirement_to_download, expected_url",
+    [
+        ("simple2==2.0", "simple2-2.0.tar.gz.metadata"),
+        ("priority", "priority-1.0-py2.py3-none-any.whl.metadata"),
+    ],
+)
+def test_metadata_not_found(
+    download_generated_html_index: Callable[..., Tuple[TestPipResult, Path]],
+    requirement_to_download: str,
+    expected_url: str,
+) -> None:
+    """Verify that if a data-dist-info-metadata attribute is provided, that pip will
+    fetch the .metadata file at the location specified by PEP 658, and error
+    if unavailable."""
+    result, _ = download_generated_html_index(
+        _simple_packages,
+        [requirement_to_download],
+        allow_error=True,
+    )
+    assert result.returncode != 0
+    expected_re = re.escape(expected_url)
+    pattern = re.compile(
+        f"ERROR: 404 Client Error: FileNotFoundError for url:.*{expected_re}"
+    )
+    assert pattern.search(result.stderr), (pattern, result.stderr)
--- a/tests/functional/test_new_resolver.py
+++ b/tests/functional/test_new_resolver.py
@ -1363,7 +1363,7 @@ def test_new_resolver_skip_inconsistent_metadata(script: PipTestEnvironment) ->
    )

    assert (
-        " inconsistent version: filename has '3', but metadata has '2'"
+        " inconsistent version: expected '3', but metadata has '2'"
    ) in result.stdout, str(result)
    script.assert_installed(a="1")

--- a/tests/lib/server.py
+++ b/tests/lib/server.py
@ -150,14 +150,6 @@ def html5_page(text: str) -> str:
    )


-def index_page(spec: Dict[str, str]) -> "WSGIApplication":
-    def link(name: str, value: str) -> str:
-        return '<a href="{}">{}</a>'.format(value, name)
-
-    links = "".join(link(*kv) for kv in spec.items())
-    return text_html_response(html5_page(links))
-
-
 def package_page(spec: Dict[str, str]) -> "WSGIApplication":
    def link(name: str, value: str) -> str:
        return '<a href="{}">{}</a>'.format(value, name)
--- a/tests/unit/metadata/test_metadata_pkg_resources.py
+++ b/tests/unit/metadata/test_metadata_pkg_resources.py
@ -11,7 +11,7 @@ from pip._internal.exceptions import UnsupportedWheel
 from pip._internal.metadata.pkg_resources import (
    Distribution,
    Environment,
-    WheelMetadata,
+    InMemoryMetadata,
 )

 pkg_resources = pytest.importorskip("pip._vendor.pkg_resources")
@ -99,7 +99,7 @@ def test_wheel_metadata_works() -> None:
    dist = Distribution(
        pkg_resources.DistInfoDistribution(
            location="<in-memory>",
-            metadata=WheelMetadata({"METADATA": metadata.as_bytes()}, "<in-memory>"),
+            metadata=InMemoryMetadata({"METADATA": metadata.as_bytes()}, "<in-memory>"),
            project_name=name,
        ),
    )
@ -116,7 +116,7 @@ def test_wheel_metadata_works() -> None:


 def test_wheel_metadata_throws_on_bad_unicode() -> None:
-    metadata = WheelMetadata({"METADATA": b"\xff"}, "<in-memory>")
+    metadata = InMemoryMetadata({"METADATA": b"\xff"}, "<in-memory>")

    with pytest.raises(UnsupportedWheel) as e:
        metadata.get_metadata("METADATA")
--- a/tests/unit/test_collector.py
+++ b/tests/unit/test_collector.py
@ -11,13 +11,12 @@ from unittest import mock

 import pytest
 from pip._vendor import requests
+from pip._vendor.packaging.requirements import Requirement

 from pip._internal.exceptions import NetworkConnectionError
 from pip._internal.index.collector import (
    IndexContent,
    LinkCollector,
-    _clean_link,
-    _clean_url_path,
    _get_index_content,
    _get_simple_response,
    _make_index_content,
@ -28,7 +27,12 @@ from pip._internal.index.collector import (
 from pip._internal.index.sources import _FlatDirectorySource, _IndexDirectorySource
 from pip._internal.models.candidate import InstallationCandidate
 from pip._internal.models.index import PyPI
-from pip._internal.models.link import Link
+from pip._internal.models.link import (
+    Link,
+    LinkHash,
+    _clean_url_path,
+    _ensure_quoted_url,
+)
 from pip._internal.network.session import PipSession
 from tests.lib import TestData, make_test_link_collector

@ -402,13 +406,13 @@ def test_clean_url_path_with_local_path(path: str, expected: str) -> None:
        ),
    ],
 )
-def test_clean_link(url: str, clean_url: str) -> None:
-    assert _clean_link(url) == clean_url
+def test_ensure_quoted_url(url: str, clean_url: str) -> None:
+    assert _ensure_quoted_url(url) == clean_url


 def _test_parse_links_data_attribute(
    anchor_html: str, attr: str, expected: Optional[str]
-) -> None:
+) -> Link:
    html = (
        "<!DOCTYPE html>"
        '<html><head><meta charset="utf-8"><head>'
@ -427,6 +431,7 @@ def _test_parse_links_data_attribute(
    (link,) = links
    actual = getattr(link, attr)
    assert actual == expected
+    return link


@pytest.mark.parametrize(
@ -454,6 +459,12 @@ def test_parse_links__requires_python(
    _test_parse_links_data_attribute(anchor_html, "requires_python", expected)


+# TODO: this test generates its own examples to validate the json client implementation
+# instead of sharing those examples with the html client testing. We expect this won't
+# hide any bugs because operations like resolving PEP 658 metadata should use the same
+# code for both types of indices, but it might be nice to explicitly have all our tests
+# in test_download.py execute over both html and json indices with
+# a pytest.mark.parameterize decorator to ensure nothing slips through the cracks.
 def test_parse_links_json() -> None:
    json_bytes = json.dumps(
        {
@ -474,6 +485,14 @@ def test_parse_links_json() -> None:
                    "requires-python": ">=3.7",
                    "dist-info-metadata": False,
                },
+                # Same as above, but parsing dist-info-metadata.
+                {
+                    "filename": "holygrail-1.0-py3-none-any.whl",
+                    "url": "/files/holygrail-1.0-py3-none-any.whl",
+                    "hashes": {"sha256": "sha256 hash", "blake2b": "blake2b hash"},
+                    "requires-python": ">=3.7",
+                    "dist-info-metadata": "sha512=aabdd41",
+                },
            ],
        }
    ).encode("utf8")
@ -502,8 +521,25 @@ def test_parse_links_json() -> None:
            yanked_reason=None,
            hashes={"sha256": "sha256 hash", "blake2b": "blake2b hash"},
        ),
+        Link(
+            "https://example.com/files/holygrail-1.0-py3-none-any.whl",
+            comes_from=page.url,
+            requires_python=">=3.7",
+            yanked_reason=None,
+            hashes={"sha256": "sha256 hash", "blake2b": "blake2b hash"},
+            dist_info_metadata="sha512=aabdd41",
+        ),
    ]

+    # Ensure the metadata info can be parsed into the correct link.
+    metadata_link = links[2].metadata_link()
+    assert metadata_link is not None
+    assert (
+        metadata_link.url
+        == "https://example.com/files/holygrail-1.0-py3-none-any.whl.metadata"
+    )
+    assert metadata_link.link_hash == LinkHash("sha512", "aabdd41")
+

@pytest.mark.parametrize(
    "anchor_html, expected",
@ -534,6 +570,48 @@ def test_parse_links__yanked_reason(anchor_html: str, expected: Optional[str]) -
    _test_parse_links_data_attribute(anchor_html, "yanked_reason", expected)


+# Requirement objects do not == each other unless they point to the same instance!
+_pkg1_requirement = Requirement("pkg1==1.0")
+
+
+@pytest.mark.parametrize(
+    "anchor_html, expected, link_hash",
+    [
+        # Test not present.
+        (
+            '<a href="/pkg1-1.0.tar.gz"></a>',
+            None,
+            None,
+        ),
+        # Test with value "true".
+        (
+            '<a href="/pkg1-1.0.tar.gz" data-dist-info-metadata="true"></a>',
+            "true",
+            None,
+        ),
+        # Test with a provided hash value.
+        (
+            '<a href="/pkg1-1.0.tar.gz" data-dist-info-metadata="sha256=aa113592bbe"></a>',  # noqa: E501
+            "sha256=aa113592bbe",
+            None,
+        ),
+        # Test with a provided hash value for both the requirement as well as metadata.
+        (
+            '<a href="/pkg1-1.0.tar.gz#sha512=abc132409cb" data-dist-info-metadata="sha256=aa113592bbe"></a>',  # noqa: E501
+            "sha256=aa113592bbe",
+            LinkHash("sha512", "abc132409cb"),
+        ),
+    ],
+)
+def test_parse_links__dist_info_metadata(
+    anchor_html: str,
+    expected: Optional[str],
+    link_hash: Optional[LinkHash],
+) -> None:
+    link = _test_parse_links_data_attribute(anchor_html, "dist_info_metadata", expected)
+    assert link.link_hash == link_hash
+
+
 def test_parse_links_caches_same_page_by_url() -> None:
    html = (
        "<!DOCTYPE html>"
@ -963,3 +1041,23 @@ def test_link_collector_create_find_links_expansion(
    expected_temp2_dir = os.path.normcase(temp2_dir)
    assert search_scope.find_links == ["~/temp1", expected_temp2_dir]
    assert search_scope.index_urls == ["default_url"]
+
+
+@pytest.mark.parametrize(
+    "url, result",
+    [
+        (
+            "https://pypi.org/pip-18.0.tar.gz#sha256=aa113592bbe",
+            LinkHash("sha256", "aa113592bbe"),
+        ),
+        (
+            "https://pypi.org/pip-18.0.tar.gz#md5=aa113592bbe",
+            LinkHash("md5", "aa113592bbe"),
+        ),
+        ("https://pypi.org/pip-18.0.tar.gz", None),
+        # We don't recognize the "sha500" algorithm, so we discard it.
+        ("https://pypi.org/pip-18.0.tar.gz#sha500=aa113592bbe", None),
+    ],
+)
+def test_link_hash_parsing(url: str, result: Optional[LinkHash]) -> None:
+    assert LinkHash.split_hash_name_and_value(url) == result
				`@ -0,0 +1 @@`
				Use the ``data-dist-info-metadata`` attribute from :pep:`658` to resolve distribution metadata without downloading the dist yet.