From a814290ff4710ce02fe15b76e22c96afc09ffb1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Bidoul?= Date: Sat, 4 Jun 2022 11:12:18 +0200 Subject: [PATCH] Make sure metadata has Requires-Dist and Provides-Extra egg-info distributions may not have the Requires-Dist and Provides-Extra fields in their metadata. For consistency and to provide an unsurprising metadata property, we emulate it by reading requires.txt. --- src/pip/_internal/metadata/base.py | 92 ++++++++++++++++++- .../_internal/metadata/importlib/_dists.py | 92 +------------------ src/pip/_internal/metadata/pkg_resources.py | 3 +- tests/unit/metadata/test_metadata.py | 29 +++++- 4 files changed, 125 insertions(+), 91 deletions(-) diff --git a/src/pip/_internal/metadata/base.py b/src/pip/_internal/metadata/base.py index f1a1ee62f..d226dec8b 100644 --- a/src/pip/_internal/metadata/base.py +++ b/src/pip/_internal/metadata/base.py @@ -1,5 +1,6 @@ import csv import email.message +import functools import json import logging import pathlib @@ -13,6 +14,7 @@ from typing import ( Iterable, Iterator, List, + NamedTuple, Optional, Tuple, Union, @@ -33,6 +35,7 @@ from pip._internal.models.direct_url import ( from pip._internal.utils.compat import stdlib_pkgs # TODO: Move definition here. from pip._internal.utils.egg_link import egg_link_path_from_sys_path from pip._internal.utils.misc import is_local, normalize_path +from pip._internal.utils.packaging import safe_extra from pip._internal.utils.urls import url_to_path if TYPE_CHECKING: @@ -91,6 +94,12 @@ def _convert_installed_files_path( return str(pathlib.Path(*info, *entry)) +class RequiresEntry(NamedTuple): + requirement: str + extra: str + marker: str + + class BaseDistribution(Protocol): @classmethod def from_directory(cls, directory: str) -> "BaseDistribution": @@ -348,6 +357,17 @@ class BaseDistribution(Protocol): def iter_entry_points(self) -> Iterable[BaseEntryPoint]: raise NotImplementedError() + def _metadata_impl(self) -> email.message.Message: + raise NotImplementedError() + + @functools.lru_cache(maxsize=1) + def _metadata_cached(self) -> email.message.Message: + # When we drop python 3.7 support, move this to the metadata property and use + # functools.cached_property instead of lru_cache. + metadata = self._metadata_impl() + self._add_egg_info_requires(metadata) + return metadata + @property def metadata(self) -> email.message.Message: """Metadata of distribution parsed from e.g. METADATA or PKG-INFO. @@ -357,7 +377,7 @@ class BaseDistribution(Protocol): :raises NoneMetadataError: If the metadata file is available, but does not contain valid metadata. """ - raise NotImplementedError() + return self._metadata_cached() @property def metadata_version(self) -> Optional[str]: @@ -451,6 +471,76 @@ class BaseDistribution(Protocol): or self._iter_declared_entries_from_legacy() ) + def _iter_requires_txt_entries(self) -> Iterator[RequiresEntry]: + """Parse a ``requires.txt`` in an egg-info directory. + + This is an INI-ish format where an egg-info stores dependencies. A + section name describes extra other environment markers, while each entry + is an arbitrary string (not a key-value pair) representing a dependency + as a requirement string (no markers). + + There is a construct in ``importlib.metadata`` called ``Sectioned`` that + does mostly the same, but the format is currently considered private. + """ + try: + content = self.read_text("requires.txt") + except FileNotFoundError: + return + extra = marker = "" # Section-less entries don't have markers. + for line in content.splitlines(): + line = line.strip() + if not line or line.startswith("#"): # Comment; ignored. + continue + if line.startswith("[") and line.endswith("]"): # A section header. + extra, _, marker = line.strip("[]").partition(":") + continue + yield RequiresEntry(requirement=line, extra=extra, marker=marker) + + def _iter_egg_info_extras(self) -> Iterable[str]: + """Get extras from the egg-info directory.""" + known_extras = {""} + for entry in self._iter_requires_txt_entries(): + if entry.extra in known_extras: + continue + known_extras.add(entry.extra) + yield entry.extra + + def _iter_egg_info_dependencies(self) -> Iterable[str]: + """Get distribution dependencies from the egg-info directory. + + To ease parsing, this converts a legacy dependency entry into a PEP 508 + requirement string. Like ``_iter_requires_txt_entries()``, there is code + in ``importlib.metadata`` that does mostly the same, but not do exactly + what we need. + + Namely, ``importlib.metadata`` does not normalize the extra name before + putting it into the requirement string, which causes marker comparison + to fail because the dist-info format do normalize. This is consistent in + all currently available PEP 517 backends, although not standardized. + """ + for entry in self._iter_requires_txt_entries(): + if entry.extra and entry.marker: + marker = f'({entry.marker}) and extra == "{safe_extra(entry.extra)}"' + elif entry.extra: + marker = f'extra == "{safe_extra(entry.extra)}"' + elif entry.marker: + marker = entry.marker + else: + marker = "" + if marker: + yield f"{entry.requirement} ; {marker}" + else: + yield entry.requirement + + def _add_egg_info_requires(self, metadata: email.message.Message) -> None: + """Add egg-info requires.txt information to the metadata.""" + if not metadata.get_all("Requires-Dist"): + for dep in self._iter_egg_info_dependencies(): + metadata["Requires-Dist"] = dep + if not metadata.get_all("Provides-Extra"): + for extra in self._iter_egg_info_extras(): + metadata["Provides-Extra"] = extra + class BaseEnvironment: """An environment containing distributions to introspect.""" diff --git a/src/pip/_internal/metadata/importlib/_dists.py b/src/pip/_internal/metadata/importlib/_dists.py index cf66de54d..ba039c4bf 100644 --- a/src/pip/_internal/metadata/importlib/_dists.py +++ b/src/pip/_internal/metadata/importlib/_dists.py @@ -3,16 +3,7 @@ import importlib.metadata import os import pathlib import zipfile -from typing import ( - Collection, - Dict, - Iterable, - Iterator, - Mapping, - NamedTuple, - Optional, - Sequence, -) +from typing import Collection, Dict, Iterable, Iterator, Mapping, Optional, Sequence from pip._vendor.packaging.requirements import Requirement from pip._vendor.packaging.utils import NormalizedName, canonicalize_name @@ -92,12 +83,6 @@ class WheelDistribution(importlib.metadata.Distribution): return text -class RequiresEntry(NamedTuple): - requirement: str - extra: str - marker: str - - class Distribution(BaseDistribution): def __init__( self, @@ -187,84 +172,17 @@ class Distribution(BaseDistribution): # importlib.metadata's EntryPoint structure sasitfies BaseEntryPoint. return self._dist.entry_points - @property - def metadata(self) -> email.message.Message: + def _metadata_impl(self) -> email.message.Message: return self._dist.metadata - def _iter_requires_txt_entries(self) -> Iterator[RequiresEntry]: - """Parse a ``requires.txt`` in an egg-info directory. - - This is an INI-ish format where an egg-info stores dependencies. A - section name describes extra other environment markers, while each entry - is an arbitrary string (not a key-value pair) representing a dependency - as a requirement string (no markers). - - There is a construct in ``importlib.metadata`` called ``Sectioned`` that - does mostly the same, but the format is currently considered private. - """ - content = self._dist.read_text("requires.txt") - if content is None: - return - extra = marker = "" # Section-less entries don't have markers. - for line in content.splitlines(): - line = line.strip() - if not line or line.startswith("#"): # Comment; ignored. - continue - if line.startswith("[") and line.endswith("]"): # A section header. - extra, _, marker = line.strip("[]").partition(":") - continue - yield RequiresEntry(requirement=line, extra=extra, marker=marker) - - def _iter_egg_info_extras(self) -> Iterable[str]: - """Get extras from the egg-info directory.""" - known_extras = {""} - for entry in self._iter_requires_txt_entries(): - if entry.extra in known_extras: - continue - known_extras.add(entry.extra) - yield entry.extra - def iter_provided_extras(self) -> Iterable[str]: - iterator = ( - self._dist.metadata.get_all("Provides-Extra") - or self._iter_egg_info_extras() + return ( + safe_extra(extra) for extra in self.metadata.get_all("Provides-Extra", []) ) - return (safe_extra(extra) for extra in iterator) - - def _iter_egg_info_dependencies(self) -> Iterable[str]: - """Get distribution dependencies from the egg-info directory. - - To ease parsing, this converts a legacy dependency entry into a PEP 508 - requirement string. Like ``_iter_requires_txt_entries()``, there is code - in ``importlib.metadata`` that does mostly the same, but not do exactly - what we need. - - Namely, ``importlib.metadata`` does not normalize the extra name before - putting it into the requirement string, which causes marker comparison - to fail because the dist-info format do normalize. This is consistent in - all currently available PEP 517 backends, although not standardized. - """ - for entry in self._iter_requires_txt_entries(): - if entry.extra and entry.marker: - marker = f'({entry.marker}) and extra == "{safe_extra(entry.extra)}"' - elif entry.extra: - marker = f'extra == "{safe_extra(entry.extra)}"' - elif entry.marker: - marker = entry.marker - else: - marker = "" - if marker: - yield f"{entry.requirement} ; {marker}" - else: - yield entry.requirement def iter_dependencies(self, extras: Collection[str] = ()) -> Iterable[Requirement]: - req_string_iterator = ( - self._dist.metadata.get_all("Requires-Dist") - or self._iter_egg_info_dependencies() - ) contexts: Sequence[Dict[str, str]] = [{"extra": safe_extra(e)} for e in extras] - for req_string in req_string_iterator: + for req_string in self.metadata.get_all("Requires-Dist", []): req = Requirement(req_string) if not req.marker: yield req diff --git a/src/pip/_internal/metadata/pkg_resources.py b/src/pip/_internal/metadata/pkg_resources.py index ffde8c77e..bf79ba139 100644 --- a/src/pip/_internal/metadata/pkg_resources.py +++ b/src/pip/_internal/metadata/pkg_resources.py @@ -171,8 +171,7 @@ class Distribution(BaseDistribution): name, _, value = str(entry_point).partition("=") yield EntryPoint(name=name.strip(), value=value.strip(), group=group) - @property - def metadata(self) -> email.message.Message: + def _metadata_impl(self) -> email.message.Message: """ :raises NoneMetadataError: if the distribution reports `has_metadata()` True but `get_metadata()` returns None. diff --git a/tests/unit/metadata/test_metadata.py b/tests/unit/metadata/test_metadata.py index c519ba89a..57e09acf7 100644 --- a/tests/unit/metadata/test_metadata.py +++ b/tests/unit/metadata/test_metadata.py @@ -1,11 +1,12 @@ import logging +from pathlib import Path from typing import cast from unittest import mock import pytest from pip._vendor.packaging.utils import NormalizedName -from pip._internal.metadata import BaseDistribution +from pip._internal.metadata import BaseDistribution, get_directory_distribution from pip._internal.models.direct_url import DIRECT_URL_METADATA_NAME, ArchiveInfo @@ -40,6 +41,32 @@ def test_dist_get_direct_url_invalid_json( ) +def test_metadata_reads_egg_info_requires_txt(tmp_path: Path) -> None: + """Check Requires-Dist is obtained from requires.txt if absent in PKG-INFO.""" + egg_info_path = tmp_path / "whatever.egg-info" + egg_info_path.mkdir() + dist = get_directory_distribution(str(egg_info_path)) + assert dist.installed_with_setuptools_egg_info + pkg_info_path = egg_info_path / "PKG-INFO" + pkg_info_path.write_text("Name: whatever\n") + egg_info_path.joinpath("requires.txt").write_text("pkga\npkgb\n") + assert dist.metadata.get_all("Requires-Dist") == ["pkga", "pkgb"] + + +def test_metadata_pkg_info_requires_priority(tmp_path: Path) -> None: + """Check Requires-Dist in PKG-INFO has priority over requires.txt.""" + egg_info_path = tmp_path / "whatever.egg-info" + egg_info_path.mkdir() + dist = get_directory_distribution(str(egg_info_path)) + assert dist.installed_with_setuptools_egg_info + pkg_info_path = egg_info_path / "PKG-INFO" + pkg_info_path.write_text( + "Name: whatever\nRequires-Dist: pkgc\nRequires-Dist: pkgd\n" + ) + egg_info_path.joinpath("requires.txt").write_text("pkga\npkgb\n") + assert dist.metadata.get_all("Requires-Dist") == ["pkgc", "pkgd"] + + @mock.patch.object( BaseDistribution, "read_text",