From 2e365bdab1a37c296eae264991c2e88d57f73a67 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Tue, 1 Aug 2023 22:11:07 -0400 Subject: [PATCH 1/6] move test_download_metadata mock pypi index utilities to conftest.py --- tests/conftest.py | 242 +++++++++++++++++++++++ tests/functional/test_download.py | 311 +++++------------------------- 2 files changed, 286 insertions(+), 267 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index a183cadf2..f481e06c8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,22 +1,32 @@ import compileall import fnmatch +import http.server import io import os import re import shutil import subprocess import sys +import threading from contextlib import ExitStack, contextmanager +from dataclasses import dataclass +from enum import Enum +from hashlib import sha256 from pathlib import Path +from textwrap import dedent from typing import ( TYPE_CHECKING, + Any, AnyStr, Callable, + ClassVar, Dict, Iterable, Iterator, List, Optional, + Set, + Tuple, Union, ) from unittest.mock import patch @@ -750,3 +760,235 @@ def proxy(request: pytest.FixtureRequest) -> str: @pytest.fixture def enable_user_site(virtualenv: VirtualEnvironment) -> None: virtualenv.user_site_packages = True + + +class MetadataKind(Enum): + """All the types of values we might be provided for the data-dist-info-metadata + attribute from PEP 658.""" + + # Valid: will read metadata from the dist instead. + No = "none" + # Valid: will read the .metadata file, but won't check its hash. + Unhashed = "unhashed" + # Valid: will read the .metadata file and check its hash matches. + Sha256 = "sha256" + # Invalid: will error out after checking the hash. + WrongHash = "wrong-hash" + # Invalid: will error out after failing to fetch the .metadata file. + NoFile = "no-file" + + +@dataclass(frozen=True) +class FakePackage: + """Mock package structure used to generate a PyPI repository. + + FakePackage name and version should correspond to sdists (.tar.gz files) in our test + data.""" + + name: str + version: str + filename: str + metadata: MetadataKind + # This will override any dependencies specified in the actual dist's METADATA. + requires_dist: Tuple[str, ...] = () + # This will override the Name specified in the actual dist's METADATA. + metadata_name: Optional[str] = None + + def metadata_filename(self) -> str: + """This is specified by PEP 658.""" + return f"{self.filename}.metadata" + + def generate_additional_tag(self) -> str: + """This gets injected into the tag in the generated PyPI index page for this + package.""" + if self.metadata == MetadataKind.No: + return "" + if self.metadata in [MetadataKind.Unhashed, MetadataKind.NoFile]: + return 'data-dist-info-metadata="true"' + if self.metadata == MetadataKind.WrongHash: + return 'data-dist-info-metadata="sha256=WRONG-HASH"' + assert self.metadata == MetadataKind.Sha256 + checksum = sha256(self.generate_metadata()).hexdigest() + return f'data-dist-info-metadata="sha256={checksum}"' + + def requires_str(self) -> str: + if not self.requires_dist: + return "" + joined = " and ".join(self.requires_dist) + return f"Requires-Dist: {joined}" + + def generate_metadata(self) -> bytes: + """This is written to `self.metadata_filename()` and will override the actual + dist's METADATA, unless `self.metadata == MetadataKind.NoFile`.""" + return dedent( + f"""\ + Metadata-Version: 2.1 + Name: {self.metadata_name or self.name} + Version: {self.version} + {self.requires_str()} + """ + ).encode("utf-8") + + +@pytest.fixture(scope="session") +def fake_packages() -> Dict[str, List[FakePackage]]: + """The package database we generate for testing PEP 658 support.""" + return { + "simple": [ + FakePackage("simple", "1.0", "simple-1.0.tar.gz", MetadataKind.Sha256), + FakePackage("simple", "2.0", "simple-2.0.tar.gz", MetadataKind.No), + # This will raise a hashing error. + FakePackage("simple", "3.0", "simple-3.0.tar.gz", MetadataKind.WrongHash), + ], + "simple2": [ + # Override the dependencies here in order to force pip to download + # simple-1.0.tar.gz as well. + FakePackage( + "simple2", + "1.0", + "simple2-1.0.tar.gz", + MetadataKind.Unhashed, + ("simple==1.0",), + ), + # This will raise an error when pip attempts to fetch the metadata file. + FakePackage("simple2", "2.0", "simple2-2.0.tar.gz", MetadataKind.NoFile), + # This has a METADATA file with a mismatched name. + FakePackage( + "simple2", + "3.0", + "simple2-3.0.tar.gz", + MetadataKind.Sha256, + metadata_name="not-simple2", + ), + ], + "colander": [ + # Ensure we can read the dependencies from a metadata file within a wheel + # *without* PEP 658 metadata. + FakePackage( + "colander", + "0.9.9", + "colander-0.9.9-py2.py3-none-any.whl", + MetadataKind.No, + ), + ], + "compilewheel": [ + # Ensure we can override the dependencies of a wheel file by injecting PEP + # 658 metadata. + FakePackage( + "compilewheel", + "1.0", + "compilewheel-1.0-py2.py3-none-any.whl", + MetadataKind.Unhashed, + ("simple==1.0",), + ), + ], + "has-script": [ + # Ensure we check PEP 658 metadata hashing errors for wheel files. + FakePackage( + "has-script", + "1.0", + "has.script-1.0-py2.py3-none-any.whl", + MetadataKind.WrongHash, + ), + ], + "translationstring": [ + FakePackage( + "translationstring", + "1.1", + "translationstring-1.1.tar.gz", + MetadataKind.No, + ), + ], + "priority": [ + # Ensure we check for a missing metadata file for wheels. + FakePackage( + "priority", + "1.0", + "priority-1.0-py2.py3-none-any.whl", + MetadataKind.NoFile, + ), + ], + "requires-simple-extra": [ + # Metadata name is not canonicalized. + FakePackage( + "requires-simple-extra", + "0.1", + "requires_simple_extra-0.1-py2.py3-none-any.whl", + MetadataKind.Sha256, + metadata_name="Requires_Simple.Extra", + ), + ], + } + + +@pytest.fixture(scope="session") +def html_index_for_packages( + shared_data: TestData, + fake_packages: Dict[str, List[FakePackage]], + tmpdir_factory: pytest.TempPathFactory, +) -> Path: + """Generate a PyPI HTML package index within a local directory pointing to + synthetic test data.""" + html_dir = tmpdir_factory.mktemp("fake_index_html_content") + + # (1) Generate the content for a PyPI index.html. + pkg_links = "\n".join( + f' {pkg}' for pkg in fake_packages.keys() + ) + index_html = f"""\ + + + + + Simple index + + +{pkg_links} + +""" + # (2) Generate the index.html in a new subdirectory of the temp directory. + (html_dir / "index.html").write_text(index_html) + + # (3) Generate subdirectories for individual packages, each with their own + # index.html. + for pkg, links in fake_packages.items(): + pkg_subdir = html_dir / pkg + pkg_subdir.mkdir() + + download_links: List[str] = [] + for package_link in links: + # (3.1) Generate the tag which pip can crawl pointing to this + # specific package version. + download_links.append( + f' {package_link.filename}
' # noqa: E501 + ) + # (3.2) Copy over the corresponding file in `shared_data.packages`. + shutil.copy( + shared_data.packages / package_link.filename, + pkg_subdir / package_link.filename, + ) + # (3.3) Write a metadata file, if applicable. + if package_link.metadata != MetadataKind.NoFile: + with open(pkg_subdir / package_link.metadata_filename(), "wb") as f: + f.write(package_link.generate_metadata()) + + # (3.4) After collating all the download links and copying over the files, + # write an index.html with the generated download links for each + # copied file for this specific package name. + download_links_str = "\n".join(download_links) + pkg_index_content = f"""\ + + + + + Links for {pkg} + + +

Links for {pkg}

+{download_links_str} + +""" + with open(pkg_subdir / "index.html", "w") as f: + f.write(pkg_index_content) + + return html_dir diff --git a/tests/functional/test_download.py b/tests/functional/test_download.py index 8da185c06..bedadc704 100644 --- a/tests/functional/test_download.py +++ b/tests/functional/test_download.py @@ -1,14 +1,11 @@ +import http.server import os import re import shutil import textwrap -import uuid -from dataclasses import dataclass -from enum import Enum from hashlib import sha256 from pathlib import Path -from textwrap import dedent -from typing import Callable, Dict, List, Optional, Tuple +from typing import Callable, List, Tuple import pytest @@ -1237,181 +1234,16 @@ def test_download_use_pep517_propagation( assert len(downloads) == 2 -class MetadataKind(Enum): - """All the types of values we might be provided for the data-dist-info-metadata - attribute from PEP 658.""" - - # Valid: will read metadata from the dist instead. - No = "none" - # Valid: will read the .metadata file, but won't check its hash. - Unhashed = "unhashed" - # Valid: will read the .metadata file and check its hash matches. - Sha256 = "sha256" - # Invalid: will error out after checking the hash. - WrongHash = "wrong-hash" - # Invalid: will error out after failing to fetch the .metadata file. - NoFile = "no-file" - - -@dataclass(frozen=True) -class Package: - """Mock package structure used to generate a PyPI repository. - - Package name and version should correspond to sdists (.tar.gz files) in our test - data.""" - - name: str - version: str - filename: str - metadata: MetadataKind - # This will override any dependencies specified in the actual dist's METADATA. - requires_dist: Tuple[str, ...] = () - # This will override the Name specified in the actual dist's METADATA. - metadata_name: Optional[str] = None - - def metadata_filename(self) -> str: - """This is specified by PEP 658.""" - return f"{self.filename}.metadata" - - def generate_additional_tag(self) -> str: - """This gets injected into the tag in the generated PyPI index page for this - package.""" - if self.metadata == MetadataKind.No: - return "" - if self.metadata in [MetadataKind.Unhashed, MetadataKind.NoFile]: - return 'data-dist-info-metadata="true"' - if self.metadata == MetadataKind.WrongHash: - return 'data-dist-info-metadata="sha256=WRONG-HASH"' - assert self.metadata == MetadataKind.Sha256 - checksum = sha256(self.generate_metadata()).hexdigest() - return f'data-dist-info-metadata="sha256={checksum}"' - - def requires_str(self) -> str: - if not self.requires_dist: - return "" - joined = " and ".join(self.requires_dist) - return f"Requires-Dist: {joined}" - - def generate_metadata(self) -> bytes: - """This is written to `self.metadata_filename()` and will override the actual - dist's METADATA, unless `self.metadata == MetadataKind.NoFile`.""" - return dedent( - f"""\ - Metadata-Version: 2.1 - Name: {self.metadata_name or self.name} - Version: {self.version} - {self.requires_str()} - """ - ).encode("utf-8") - - @pytest.fixture(scope="function") -def write_index_html_content(tmpdir: Path) -> Callable[[str], Path]: - """Generate a PyPI package index.html within a temporary local directory.""" - html_dir = tmpdir / "index_html_content" - html_dir.mkdir() - - def generate_index_html_subdir(index_html: str) -> Path: - """Create a new subdirectory after a UUID and write an index.html.""" - new_subdir = html_dir / uuid.uuid4().hex - new_subdir.mkdir() - - with open(new_subdir / "index.html", "w") as f: - f.write(index_html) - - return new_subdir - - return generate_index_html_subdir - - -@pytest.fixture(scope="function") -def html_index_for_packages( - shared_data: TestData, - write_index_html_content: Callable[[str], Path], -) -> Callable[..., Path]: - """Generate a PyPI HTML package index within a local directory pointing to - blank data.""" - - def generate_html_index_for_packages(packages: Dict[str, List[Package]]) -> Path: - """ - Produce a PyPI directory structure pointing to the specified packages. - """ - # (1) Generate the content for a PyPI index.html. - pkg_links = "\n".join( - f' {pkg}' for pkg in packages.keys() - ) - index_html = f"""\ - - - - - Simple index - - -{pkg_links} - -""" - # (2) Generate the index.html in a new subdirectory of the temp directory. - index_html_subdir = write_index_html_content(index_html) - - # (3) Generate subdirectories for individual packages, each with their own - # index.html. - for pkg, links in packages.items(): - pkg_subdir = index_html_subdir / pkg - pkg_subdir.mkdir() - - download_links: List[str] = [] - for package_link in links: - # (3.1) Generate the tag which pip can crawl pointing to this - # specific package version. - download_links.append( - f' {package_link.filename}
' # noqa: E501 - ) - # (3.2) Copy over the corresponding file in `shared_data.packages`. - shutil.copy( - shared_data.packages / package_link.filename, - pkg_subdir / package_link.filename, - ) - # (3.3) Write a metadata file, if applicable. - if package_link.metadata != MetadataKind.NoFile: - with open(pkg_subdir / package_link.metadata_filename(), "wb") as f: - f.write(package_link.generate_metadata()) - - # (3.4) After collating all the download links and copying over the files, - # write an index.html with the generated download links for each - # copied file for this specific package name. - download_links_str = "\n".join(download_links) - pkg_index_content = f"""\ - - - - - Links for {pkg} - - -

Links for {pkg}

-{download_links_str} - -""" - with open(pkg_subdir / "index.html", "w") as f: - f.write(pkg_index_content) - - return index_html_subdir - - return generate_html_index_for_packages - - -@pytest.fixture(scope="function") -def download_generated_html_index( +def download_local_html_index( script: PipTestEnvironment, - html_index_for_packages: Callable[[Dict[str, List[Package]]], Path], + html_index_for_packages: Path, tmpdir: Path, ) -> Callable[..., Tuple[TestPipResult, Path]]: """Execute `pip download` against a generated PyPI index.""" download_dir = tmpdir / "download_dir" def run_for_generated_index( - packages: Dict[str, List[Package]], args: List[str], allow_error: bool = False, ) -> Tuple[TestPipResult, Path]: @@ -1419,13 +1251,12 @@ def download_generated_html_index( Produce a PyPI directory structure pointing to the specified packages, then execute `pip download -i ...` pointing to our generated index. """ - index_dir = html_index_for_packages(packages) pip_args = [ "download", "-d", str(download_dir), "-i", - path_to_url(str(index_dir)), + path_to_url(str(html_index_for_packages)), *args, ] result = script.pip(*pip_args, allow_error=allow_error) @@ -1434,84 +1265,35 @@ def download_generated_html_index( return run_for_generated_index -# The package database we generate for testing PEP 658 support. -_simple_packages: Dict[str, List[Package]] = { - "simple": [ - Package("simple", "1.0", "simple-1.0.tar.gz", MetadataKind.Sha256), - Package("simple", "2.0", "simple-2.0.tar.gz", MetadataKind.No), - # This will raise a hashing error. - Package("simple", "3.0", "simple-3.0.tar.gz", MetadataKind.WrongHash), - ], - "simple2": [ - # Override the dependencies here in order to force pip to download - # simple-1.0.tar.gz as well. - Package( - "simple2", - "1.0", - "simple2-1.0.tar.gz", - MetadataKind.Unhashed, - ("simple==1.0",), - ), - # This will raise an error when pip attempts to fetch the metadata file. - Package("simple2", "2.0", "simple2-2.0.tar.gz", MetadataKind.NoFile), - # This has a METADATA file with a mismatched name. - Package( - "simple2", - "3.0", - "simple2-3.0.tar.gz", - MetadataKind.Sha256, - metadata_name="not-simple2", - ), - ], - "colander": [ - # Ensure we can read the dependencies from a metadata file within a wheel - # *without* PEP 658 metadata. - Package( - "colander", "0.9.9", "colander-0.9.9-py2.py3-none-any.whl", MetadataKind.No - ), - ], - "compilewheel": [ - # Ensure we can override the dependencies of a wheel file by injecting PEP - # 658 metadata. - Package( - "compilewheel", - "1.0", - "compilewheel-1.0-py2.py3-none-any.whl", - MetadataKind.Unhashed, - ("simple==1.0",), - ), - ], - "has-script": [ - # Ensure we check PEP 658 metadata hashing errors for wheel files. - Package( - "has-script", - "1.0", - "has.script-1.0-py2.py3-none-any.whl", - MetadataKind.WrongHash, - ), - ], - "translationstring": [ - Package( - "translationstring", "1.1", "translationstring-1.1.tar.gz", MetadataKind.No - ), - ], - "priority": [ - # Ensure we check for a missing metadata file for wheels. - Package( - "priority", "1.0", "priority-1.0-py2.py3-none-any.whl", MetadataKind.NoFile - ), - ], - "requires-simple-extra": [ - # Metadata name is not canonicalized. - Package( - "requires-simple-extra", - "0.1", - "requires_simple_extra-0.1-py2.py3-none-any.whl", - MetadataKind.Sha256, - metadata_name="Requires_Simple.Extra", - ), - ], -} +@pytest.fixture(scope="function") +def download_server_html_index( + script: PipTestEnvironment, + tmpdir: Path, + html_index_with_onetime_server: http.server.ThreadingHTTPServer, +) -> Callable[..., Tuple[TestPipResult, Path]]: + """Execute `pip download` against a generated PyPI index.""" + download_dir = tmpdir / "download_dir" + + def run_for_generated_index( + args: List[str], + allow_error: bool = False, + ) -> Tuple[TestPipResult, Path]: + """ + Produce a PyPI directory structure pointing to the specified packages, then + execute `pip download -i ...` pointing to our generated index. + """ + pip_args = [ + "download", + "-d", + str(download_dir), + "-i", + "http://localhost:8000", + *args, + ] + result = script.pip(*pip_args, allow_error=allow_error) + return (result, download_dir) + + return run_for_generated_index @pytest.mark.parametrize( @@ -1530,14 +1312,13 @@ _simple_packages: Dict[str, List[Package]] = { ], ) def test_download_metadata( - download_generated_html_index: Callable[..., Tuple[TestPipResult, Path]], + download_local_html_index: Callable[..., Tuple[TestPipResult, Path]], requirement_to_download: str, expected_outputs: List[str], ) -> None: """Verify that if a data-dist-info-metadata attribute is present, then it is used instead of the actual dist's METADATA.""" - _, download_dir = download_generated_html_index( - _simple_packages, + _, download_dir = download_local_html_index( [requirement_to_download], ) assert sorted(os.listdir(download_dir)) == expected_outputs @@ -1557,14 +1338,13 @@ def test_download_metadata( ], ) def test_incorrect_metadata_hash( - download_generated_html_index: Callable[..., Tuple[TestPipResult, Path]], + download_local_html_index: Callable[..., Tuple[TestPipResult, Path]], requirement_to_download: str, real_hash: str, ) -> None: """Verify that if a hash for data-dist-info-metadata is provided, it must match the actual hash of the metadata file.""" - result, _ = download_generated_html_index( - _simple_packages, + result, _ = download_local_html_index( [requirement_to_download], allow_error=True, ) @@ -1583,15 +1363,14 @@ def test_incorrect_metadata_hash( ], ) def test_metadata_not_found( - download_generated_html_index: Callable[..., Tuple[TestPipResult, Path]], + download_local_html_index: Callable[..., Tuple[TestPipResult, Path]], requirement_to_download: str, expected_url: str, ) -> None: """Verify that if a data-dist-info-metadata attribute is provided, that pip will fetch the .metadata file at the location specified by PEP 658, and error if unavailable.""" - result, _ = download_generated_html_index( - _simple_packages, + result, _ = download_local_html_index( [requirement_to_download], allow_error=True, ) @@ -1604,11 +1383,10 @@ def test_metadata_not_found( def test_produces_error_for_mismatched_package_name_in_metadata( - download_generated_html_index: Callable[..., Tuple[TestPipResult, Path]], + download_local_html_index: Callable[..., Tuple[TestPipResult, Path]], ) -> None: """Verify that the package name from the metadata matches the requested package.""" - result, _ = download_generated_html_index( - _simple_packages, + result, _ = download_local_html_index( ["simple2==3.0"], allow_error=True, ) @@ -1628,7 +1406,7 @@ def test_produces_error_for_mismatched_package_name_in_metadata( ), ) def test_canonicalizes_package_name_before_verifying_metadata( - download_generated_html_index: Callable[..., Tuple[TestPipResult, Path]], + download_local_html_index: Callable[..., Tuple[TestPipResult, Path]], requirement: str, ) -> None: """Verify that the package name from the command line and the package's @@ -1636,8 +1414,7 @@ def test_canonicalizes_package_name_before_verifying_metadata( Regression test for https://github.com/pypa/pip/issues/12038 """ - result, download_dir = download_generated_html_index( - _simple_packages, + result, download_dir = download_local_html_index( [requirement], allow_error=True, ) From 50a2fb4f9fca0427c192608c30f3cf536b4e4ed4 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Wed, 2 Aug 2023 00:02:04 -0400 Subject: [PATCH 2/6] add mock server to test that each dist is downloaded exactly once --- tests/conftest.py | 48 +++++++++++++++++++++++++++++ tests/functional/test_download.py | 51 +++++++++++++++++++++++++++++++ 2 files changed, 99 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index f481e06c8..25581af9a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -992,3 +992,51 @@ def html_index_for_packages( f.write(pkg_index_content) return html_dir + + +class OneTimeDownloadHandler(http.server.SimpleHTTPRequestHandler): + """Serve files from the current directory, but error if a file is downloaded more + than once.""" + + _seen_paths: ClassVar[Set[str]] = set() + + def do_GET(self) -> None: + if self.path in self._seen_paths: + self.send_error( + http.HTTPStatus.NOT_FOUND, + f"File {self.path} not available more than once!", + ) + return + super().do_GET() + if not (self.path.endswith("/") or self.path.endswith(".metadata")): + self._seen_paths.add(self.path) + + +@pytest.fixture(scope="function") +def html_index_with_onetime_server( + html_index_for_packages: Path, +) -> Iterator[http.server.ThreadingHTTPServer]: + """Serve files from a generated pypi index, erroring if a file is downloaded more + than once. + + Provide `-i http://localhost:8000` to pip invocations to point them at this server. + """ + + class InDirectoryServer(http.server.ThreadingHTTPServer): + def finish_request(self, request: Any, client_address: Any) -> None: + self.RequestHandlerClass( + request, client_address, self, directory=str(html_index_for_packages) # type: ignore[call-arg] # noqa: E501 + ) + + class Handler(OneTimeDownloadHandler): + _seen_paths: ClassVar[Set[str]] = set() + + with InDirectoryServer(("", 8000), Handler) as httpd: + server_thread = threading.Thread(target=httpd.serve_forever) + server_thread.start() + + try: + yield httpd + finally: + httpd.shutdown() + server_thread.join() diff --git a/tests/functional/test_download.py b/tests/functional/test_download.py index bedadc704..c204f424b 100644 --- a/tests/functional/test_download.py +++ b/tests/functional/test_download.py @@ -1324,6 +1324,57 @@ def test_download_metadata( assert sorted(os.listdir(download_dir)) == expected_outputs +@pytest.mark.parametrize( + "requirement_to_download, expected_outputs, doubled_path", + [ + ( + "simple2==1.0", + ["simple-1.0.tar.gz", "simple2-1.0.tar.gz"], + "/simple2/simple2-1.0.tar.gz", + ), + ("simple==2.0", ["simple-2.0.tar.gz"], "/simple/simple-2.0.tar.gz"), + ( + "colander", + ["colander-0.9.9-py2.py3-none-any.whl", "translationstring-1.1.tar.gz"], + "/colander/colander-0.9.9-py2.py3-none-any.whl", + ), + ( + "compilewheel", + [ + "compilewheel-1.0-py2.py3-none-any.whl", + "simple-1.0.tar.gz", + ], + "/compilewheel/compilewheel-1.0-py2.py3-none-any.whl", + ), + ], +) +def test_download_metadata_server( + download_server_html_index: Callable[..., Tuple[TestPipResult, Path]], + requirement_to_download: str, + expected_outputs: List[str], + doubled_path: str, +) -> None: + """Verify that if a data-dist-info-metadata attribute is present, then it is used + instead of the actual dist's METADATA. + + Additionally, verify that each dist is downloaded exactly once using a mock server. + + This is a regression test for issue https://github.com/pypa/pip/issues/11847. + """ + _, download_dir = download_server_html_index( + [requirement_to_download, "--no-cache-dir"], + ) + assert sorted(os.listdir(download_dir)) == expected_outputs + shutil.rmtree(download_dir) + result, _ = download_server_html_index( + [requirement_to_download, "--no-cache-dir"], + allow_error=True, + ) + assert result.returncode != 0 + expected_msg = f"File {doubled_path} not available more than once!" + assert expected_msg in result.stderr + + @pytest.mark.parametrize( "requirement_to_download, real_hash", [ From 22637722aa7c9da0aaf58be82b6bef16d6efd097 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Tue, 1 Aug 2023 20:45:26 -0400 Subject: [PATCH 3/6] fix #11847 for sdists --- src/pip/_internal/operations/prepare.py | 50 ++++++++++++++++--------- 1 file changed, 33 insertions(+), 17 deletions(-) diff --git a/src/pip/_internal/operations/prepare.py b/src/pip/_internal/operations/prepare.py index 8402be01b..81bf48fbb 100644 --- a/src/pip/_internal/operations/prepare.py +++ b/src/pip/_internal/operations/prepare.py @@ -7,7 +7,7 @@ import mimetypes import os import shutil -from typing import Dict, Iterable, List, Optional +from typing import Dict, Iterable, List, Optional, Set from pip._vendor.packaging.utils import canonicalize_name @@ -474,6 +474,8 @@ class RequirementPreparer: assert req.link links_to_fully_download[req.link] = req + reqs_with_newly_unpacked_source_dirs: Set[Link] = set() + batch_download = self._batch_download( links_to_fully_download.keys(), temp_dir, @@ -481,25 +483,35 @@ class RequirementPreparer: for link, (filepath, _) in batch_download: logger.debug("Downloading link %s to %s", link, filepath) req = links_to_fully_download[link] + # Record the downloaded file path so wheel reqs can extract a Distribution + # in .get_dist(). req.local_file_path = filepath - # TODO: This needs fixing for sdists - # This is an emergency fix for #11847, which reports that - # distributions get downloaded twice when metadata is loaded - # from a PEP 658 standalone metadata file. Setting _downloaded - # fixes this for wheels, but breaks the sdist case (tests - # test_download_metadata). As PyPI is currently only serving - # metadata for wheels, this is not an immediate issue. - # Fixing the problem properly looks like it will require a - # complete refactoring of the `prepare_linked_requirements_more` - # logic, and I haven't a clue where to start on that, so for now - # I have fixed the issue *just* for wheels. - if req.is_wheel: - self._downloaded[req.link.url] = filepath + # Record that the file is downloaded so we don't do it again in + # _prepare_linked_requirement(). + self._downloaded[req.link.url] = filepath + + # If this is an sdist, we need to unpack it and set the .source_dir + # immediately after downloading, as _prepare_linked_requirement() assumes + # the req is either not downloaded at all, or both downloaded and + # unpacked. The downloading and unpacking is is typically done with + # unpack_url(), but we separate the downloading and unpacking steps here in + # order to use the BatchDownloader. + if not req.is_wheel: + hashes = self._get_linked_req_hashes(req) + assert filepath == _check_download_dir(req.link, temp_dir, hashes) + self._ensure_link_req_src_dir(req, parallel_builds) + unpack_file(filepath, req.source_dir) + reqs_with_newly_unpacked_source_dirs.add(req.link) # This step is necessary to ensure all lazy wheels are processed # successfully by the 'download', 'wheel', and 'install' commands. for req in partially_downloaded_reqs: - self._prepare_linked_requirement(req, parallel_builds) + self._prepare_linked_requirement( + req, + parallel_builds, + source_dir_exists_already=req.link + in reqs_with_newly_unpacked_source_dirs, + ) def prepare_linked_requirement( self, req: InstallRequirement, parallel_builds: bool = False @@ -570,7 +582,10 @@ class RequirementPreparer: ) def _prepare_linked_requirement( - self, req: InstallRequirement, parallel_builds: bool + self, + req: InstallRequirement, + parallel_builds: bool, + source_dir_exists_already: bool = False, ) -> BaseDistribution: assert req.link link = req.link @@ -602,7 +617,8 @@ class RequirementPreparer: req.link = req.cached_wheel_source_link link = req.link - self._ensure_link_req_src_dir(req, parallel_builds) + if not source_dir_exists_already: + self._ensure_link_req_src_dir(req, parallel_builds) if link.is_existing_dir(): local_file = None From 957ad95c7d2ca70077a61a3adb551824818f929b Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Tue, 1 Aug 2023 21:18:15 -0400 Subject: [PATCH 4/6] add news entry --- news/12191.bugfix.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 news/12191.bugfix.rst diff --git a/news/12191.bugfix.rst b/news/12191.bugfix.rst new file mode 100644 index 000000000..1f384835f --- /dev/null +++ b/news/12191.bugfix.rst @@ -0,0 +1 @@ +Prevent downloading sdists twice when PEP 658 metadata is present. From bfa8a5532d45815d2229ba2e2a920fde6bffc800 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Thu, 3 Aug 2023 05:50:37 -0400 Subject: [PATCH 5/6] clean up duplicated code --- src/pip/_internal/operations/prepare.py | 54 +++++-------------------- src/pip/_internal/req/req_install.py | 29 ++++++++++++- tests/conftest.py | 52 +++++++++++++----------- 3 files changed, 68 insertions(+), 67 deletions(-) diff --git a/src/pip/_internal/operations/prepare.py b/src/pip/_internal/operations/prepare.py index 81bf48fbb..1b32d7eec 100644 --- a/src/pip/_internal/operations/prepare.py +++ b/src/pip/_internal/operations/prepare.py @@ -7,7 +7,8 @@ import mimetypes import os import shutil -from typing import Dict, Iterable, List, Optional, Set +from pathlib import Path +from typing import Dict, Iterable, List, Optional from pip._vendor.packaging.utils import canonicalize_name @@ -20,7 +21,6 @@ from pip._internal.exceptions import ( InstallationError, MetadataInconsistent, NetworkConnectionError, - PreviousBuildDirError, VcsHashUnsupported, ) from pip._internal.index.package_finder import PackageFinder @@ -47,7 +47,6 @@ from pip._internal.utils.misc import ( display_path, hash_file, hide_url, - is_installable_dir, ) from pip._internal.utils.temp_dir import TempDirectory from pip._internal.utils.unpacking import unpack_file @@ -319,21 +318,7 @@ class RequirementPreparer: autodelete=True, parallel_builds=parallel_builds, ) - - # If a checkout exists, it's unwise to keep going. version - # inconsistencies are logged later, but do not fail the - # installation. - # FIXME: this won't upgrade when there's an existing - # package unpacked in `req.source_dir` - # TODO: this check is now probably dead code - if is_installable_dir(req.source_dir): - raise PreviousBuildDirError( - "pip can't proceed with requirements '{}' due to a" - "pre-existing build directory ({}). This is likely " - "due to a previous installation that failed . pip is " - "being responsible and not assuming it can delete this. " - "Please delete it and try again.".format(req, req.source_dir) - ) + req.ensure_pristine_source_checkout() def _get_linked_req_hashes(self, req: InstallRequirement) -> Hashes: # By the time this is called, the requirement's link should have @@ -474,8 +459,6 @@ class RequirementPreparer: assert req.link links_to_fully_download[req.link] = req - reqs_with_newly_unpacked_source_dirs: Set[Link] = set() - batch_download = self._batch_download( links_to_fully_download.keys(), temp_dir, @@ -490,28 +473,17 @@ class RequirementPreparer: # _prepare_linked_requirement(). self._downloaded[req.link.url] = filepath - # If this is an sdist, we need to unpack it and set the .source_dir - # immediately after downloading, as _prepare_linked_requirement() assumes - # the req is either not downloaded at all, or both downloaded and - # unpacked. The downloading and unpacking is is typically done with - # unpack_url(), but we separate the downloading and unpacking steps here in - # order to use the BatchDownloader. + # If this is an sdist, we need to unpack it after downloading, but the + # .source_dir won't be set up until we are in _prepare_linked_requirement(). + # Add the downloaded archive to the install requirement to unpack after + # preparing the source dir. if not req.is_wheel: - hashes = self._get_linked_req_hashes(req) - assert filepath == _check_download_dir(req.link, temp_dir, hashes) - self._ensure_link_req_src_dir(req, parallel_builds) - unpack_file(filepath, req.source_dir) - reqs_with_newly_unpacked_source_dirs.add(req.link) + req.needs_unpacked_archive(Path(filepath)) # This step is necessary to ensure all lazy wheels are processed # successfully by the 'download', 'wheel', and 'install' commands. for req in partially_downloaded_reqs: - self._prepare_linked_requirement( - req, - parallel_builds, - source_dir_exists_already=req.link - in reqs_with_newly_unpacked_source_dirs, - ) + self._prepare_linked_requirement(req, parallel_builds) def prepare_linked_requirement( self, req: InstallRequirement, parallel_builds: bool = False @@ -582,10 +554,7 @@ class RequirementPreparer: ) def _prepare_linked_requirement( - self, - req: InstallRequirement, - parallel_builds: bool, - source_dir_exists_already: bool = False, + self, req: InstallRequirement, parallel_builds: bool ) -> BaseDistribution: assert req.link link = req.link @@ -617,8 +586,7 @@ class RequirementPreparer: req.link = req.cached_wheel_source_link link = req.link - if not source_dir_exists_already: - self._ensure_link_req_src_dir(req, parallel_builds) + self._ensure_link_req_src_dir(req, parallel_builds) if link.is_existing_dir(): local_file = None diff --git a/src/pip/_internal/req/req_install.py b/src/pip/_internal/req/req_install.py index 542d6c78f..614c6de9c 100644 --- a/src/pip/_internal/req/req_install.py +++ b/src/pip/_internal/req/req_install.py @@ -6,6 +6,7 @@ import sys import uuid import zipfile from optparse import Values +from pathlib import Path from typing import Any, Collection, Dict, Iterable, List, Optional, Sequence, Union from pip._vendor.packaging.markers import Marker @@ -17,7 +18,7 @@ from pip._vendor.packaging.version import parse as parse_version from pip._vendor.pyproject_hooks import BuildBackendHookCaller from pip._internal.build_env import BuildEnvironment, NoOpBuildEnvironment -from pip._internal.exceptions import InstallationError +from pip._internal.exceptions import InstallationError, PreviousBuildDirError from pip._internal.locations import get_scheme from pip._internal.metadata import ( BaseDistribution, @@ -47,11 +48,13 @@ from pip._internal.utils.misc import ( backup_dir, display_path, hide_url, + is_installable_dir, redact_auth_from_url, ) from pip._internal.utils.packaging import safe_extra from pip._internal.utils.subprocess import runner_with_spinner_message from pip._internal.utils.temp_dir import TempDirectory, tempdir_kinds +from pip._internal.utils.unpacking import unpack_file from pip._internal.utils.virtualenv import running_under_virtualenv from pip._internal.vcs import vcs @@ -180,6 +183,9 @@ class InstallRequirement: # This requirement needs more preparation before it can be built self.needs_more_preparation = False + # This requirement needs to be unpacked before it can be installed. + self._archive_source: Optional[Path] = None + def __str__(self) -> str: if self.req: s = str(self.req) @@ -645,6 +651,27 @@ class InstallRequirement: parallel_builds=parallel_builds, ) + def needs_unpacked_archive(self, archive_source: Path) -> None: + assert self._archive_source is None + self._archive_source = archive_source + + def ensure_pristine_source_checkout(self) -> None: + """Ensure the source directory has not yet been built in.""" + assert self.source_dir is not None + if self._archive_source is not None: + unpack_file(str(self._archive_source), self.source_dir) + elif is_installable_dir(self.source_dir): + # If a checkout exists, it's unwise to keep going. + # version inconsistencies are logged later, but do not fail + # the installation. + raise PreviousBuildDirError( + "pip can't proceed with requirements '{}' due to a " + "pre-existing build directory ({}). This is likely " + "due to a previous installation that failed . pip is " + "being responsible and not assuming it can delete this. " + "Please delete it and try again.".format(self, self.source_dir) + ) + # For editable installations def update_editable(self) -> None: if not self.link: diff --git a/tests/conftest.py b/tests/conftest.py index 25581af9a..cd9931c66 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -935,17 +935,21 @@ def html_index_for_packages( pkg_links = "\n".join( f' {pkg}' for pkg in fake_packages.keys() ) - index_html = f"""\ - - - - - Simple index - - -{pkg_links} - -""" + # Output won't be nicely indented because dedent() acts after f-string + # arg insertion. + index_html = dedent( + f"""\ + + + + + Simple index + + + {pkg_links} + + """ + ) # (2) Generate the index.html in a new subdirectory of the temp directory. (html_dir / "index.html").write_text(index_html) @@ -976,18 +980,20 @@ def html_index_for_packages( # write an index.html with the generated download links for each # copied file for this specific package name. download_links_str = "\n".join(download_links) - pkg_index_content = f"""\ - - - - - Links for {pkg} - - -

Links for {pkg}

-{download_links_str} - -""" + pkg_index_content = dedent( + f"""\ + + + + + Links for {pkg} + + +

Links for {pkg}

+ {download_links_str} + + """ + ) with open(pkg_subdir / "index.html", "w") as f: f.write(pkg_index_content) From 39da6e051a30e90b12608ca90e96e554d82fd15f Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Mon, 14 Aug 2023 07:55:55 -0400 Subject: [PATCH 6/6] use f-string in exception message --- src/pip/_internal/req/req_install.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/pip/_internal/req/req_install.py b/src/pip/_internal/req/req_install.py index 614c6de9c..8110114ca 100644 --- a/src/pip/_internal/req/req_install.py +++ b/src/pip/_internal/req/req_install.py @@ -665,11 +665,11 @@ class InstallRequirement: # version inconsistencies are logged later, but do not fail # the installation. raise PreviousBuildDirError( - "pip can't proceed with requirements '{}' due to a " - "pre-existing build directory ({}). This is likely " + f"pip can't proceed with requirements '{self}' due to a " + f"pre-existing build directory ({self.source_dir}). This is likely " "due to a previous installation that failed . pip is " "being responsible and not assuming it can delete this. " - "Please delete it and try again.".format(self, self.source_dir) + "Please delete it and try again." ) # For editable installations