1
1
Fork 0
mirror of https://github.com/pypa/pip synced 2023-12-13 21:30:23 +01:00

Merge pull request #11137 from sbidoul/download-info-sbi

Add download_info: DirectUrl to InstallRequirement
This commit is contained in:
Stéphane Bidoul 2022-06-01 08:21:12 +02:00 committed by GitHub
commit e58a8a591b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 259 additions and 13 deletions

3
news/11137.feature.rst Normal file
View file

@ -0,0 +1,3 @@
Record in wheel cache entries the URL of the original artifiact that was downloaded
to build the cached wheels. The record is named ``origin.json`` and uses the PEP 610
Direct URL format.

View file

@ -5,12 +5,14 @@ import hashlib
import json
import logging
import os
from pathlib import Path
from typing import Any, Dict, List, Optional, Set
from pip._vendor.packaging.tags import Tag, interpreter_name, interpreter_version
from pip._vendor.packaging.utils import canonicalize_name
from pip._internal.exceptions import InvalidWheelFilename
from pip._internal.models.direct_url import DirectUrl
from pip._internal.models.format_control import FormatControl
from pip._internal.models.link import Link
from pip._internal.models.wheel import Wheel
@ -19,6 +21,8 @@ from pip._internal.utils.urls import path_to_url
logger = logging.getLogger(__name__)
ORIGIN_JSON_NAME = "origin.json"
def _hash_dict(d: Dict[str, str]) -> str:
"""Return a stable sha224 of a dictionary."""
@ -204,6 +208,10 @@ class CacheEntry:
):
self.link = link
self.persistent = persistent
self.origin: Optional[DirectUrl] = None
origin_direct_url_path = Path(self.link.file_path).parent / ORIGIN_JSON_NAME
if origin_direct_url_path.exists():
self.origin = DirectUrl.from_json(origin_direct_url_path.read_text())
class WheelCache(Cache):
@ -262,3 +270,20 @@ class WheelCache(Cache):
return CacheEntry(retval, persistent=False)
return None
@staticmethod
def record_download_origin(cache_dir: str, download_info: DirectUrl) -> None:
origin_path = Path(cache_dir) / ORIGIN_JSON_NAME
if origin_path.is_file():
origin = DirectUrl.from_json(origin_path.read_text())
# TODO: use DirectUrl.equivalent when https://github.com/pypa/pip/pull/10564
# is merged.
if origin.url != download_info.url:
logger.warning(
"Origin URL %s in cache entry %s does not match download URL %s. "
"This is likely a pip bug or a cache corruption issue.",
origin.url,
cache_dir,
download_info.url,
)
origin_path.write_text(download_info.to_json())

View file

@ -25,6 +25,7 @@ from pip._internal.exceptions import (
)
from pip._internal.index.package_finder import PackageFinder
from pip._internal.metadata import BaseDistribution
from pip._internal.models.direct_url import ArchiveInfo
from pip._internal.models.link import Link
from pip._internal.models.wheel import Wheel
from pip._internal.network.download import BatchDownloader, Downloader
@ -35,9 +36,18 @@ from pip._internal.network.lazy_wheel import (
from pip._internal.network.session import PipSession
from pip._internal.operations.build.build_tracker import BuildTracker
from pip._internal.req.req_install import InstallRequirement
from pip._internal.utils.direct_url_helpers import (
direct_url_for_editable,
direct_url_from_link,
)
from pip._internal.utils.hashes import Hashes, MissingHashes
from pip._internal.utils.logging import indent_log
from pip._internal.utils.misc import display_path, hide_url, is_installable_dir
from pip._internal.utils.misc import (
display_path,
hash_file,
hide_url,
is_installable_dir,
)
from pip._internal.utils.temp_dir import TempDirectory
from pip._internal.utils.unpacking import unpack_file
from pip._internal.vcs import vcs
@ -489,6 +499,23 @@ class RequirementPreparer:
hashes.check_against_path(file_path)
local_file = File(file_path, content_type=None)
# If download_info is set, we got it from the wheel cache.
if req.download_info is None:
# Editables don't go through this function (see
# prepare_editable_requirement).
assert not req.editable
req.download_info = direct_url_from_link(link, req.source_dir)
# Make sure we have a hash in download_info. If we got it as part of the
# URL, it will have been verified and we can rely on it. Otherwise we
# compute it from the downloaded file.
if (
isinstance(req.download_info.info, ArchiveInfo)
and not req.download_info.info.hash
and local_file
):
hash = hash_file(local_file.path)[0].hexdigest()
req.download_info.info.hash = f"sha256={hash}"
# For use in later processing,
# preserve the file path on the requirement.
if local_file:
@ -547,6 +574,8 @@ class RequirementPreparer:
)
req.ensure_has_source_dir(self.src_dir)
req.update_editable()
assert req.source_dir
req.download_info = direct_url_for_editable(req.unpacked_source_directory)
dist = _get_prepared_distribution(
req,

View file

@ -26,6 +26,7 @@ from pip._internal.metadata import (
get_default_environment,
get_directory_distribution,
)
from pip._internal.models.direct_url import DirectUrl
from pip._internal.models.link import Link
from pip._internal.operations.build.metadata import generate_metadata
from pip._internal.operations.build.metadata_editable import generate_editable_metadata
@ -112,6 +113,10 @@ class InstallRequirement:
self.link = self.original_link = link
self.original_link_is_in_wheel_cache = False
# Information about the location of the artifact that was downloaded . This
# property is guaranteed to be set in resolver results.
self.download_info: Optional[DirectUrl] = None
# Path to any downloaded or already-existing package.
self.local_file_path: Optional[str] = None
if self.link and self.link.is_file:
@ -762,6 +767,7 @@ class InstallRequirement:
if self.is_wheel:
assert self.local_file_path
direct_url = None
# TODO this can be refactored to direct_url = self.download_info
if self.editable:
direct_url = direct_url_for_editable(self.unpacked_source_directory)
elif self.original_link:

View file

@ -45,6 +45,7 @@ from pip._internal.req.req_set import RequirementSet
from pip._internal.resolution.base import BaseResolver, InstallRequirementProvider
from pip._internal.utils import compatibility_tags
from pip._internal.utils.compatibility_tags import get_supported
from pip._internal.utils.direct_url_helpers import direct_url_from_link
from pip._internal.utils.logging import indent_log
from pip._internal.utils.misc import normalize_version_info
from pip._internal.utils.packaging import check_requires_python
@ -431,6 +432,14 @@ class Resolver(BaseResolver):
logger.debug("Using cached wheel link: %s", cache_entry.link)
if req.link is req.original_link and cache_entry.persistent:
req.original_link_is_in_wheel_cache = True
if cache_entry.origin is not None:
req.download_info = cache_entry.origin
else:
# Legacy cache entry that does not have origin.json.
# download_info may miss the archive_info.hash field.
req.download_info = direct_url_from_link(
req.link, link_is_in_wheel_cache=cache_entry.persistent
)
req.link = cache_entry.link
def _get_dist_for(self, req: InstallRequirement) -> BaseDistribution:

View file

@ -18,6 +18,7 @@ from pip._internal.req.constructors import (
install_req_from_line,
)
from pip._internal.req.req_install import InstallRequirement
from pip._internal.utils.direct_url_helpers import direct_url_from_link
from pip._internal.utils.misc import normalize_version_info
from .base import Candidate, CandidateVersion, Requirement, format_name
@ -281,12 +282,17 @@ class LinkCandidate(_InstallRequirementBackedCandidate):
version, wheel_version, name
)
if (
cache_entry is not None
and cache_entry.persistent
and template.link is template.original_link
):
ireq.original_link_is_in_wheel_cache = True
if cache_entry is not None:
if cache_entry.persistent and template.link is template.original_link:
ireq.original_link_is_in_wheel_cache = True
if cache_entry.origin is not None:
ireq.download_info = cache_entry.origin
else:
# Legacy cache entry that does not have origin.json.
# download_info may miss the archive_info.hash field.
ireq.download_info = direct_url_from_link(
source_link, link_is_in_wheel_cache=cache_entry.persistent
)
super().__init__(
link=link,

View file

@ -354,6 +354,12 @@ def build(
req.editable and req.permit_editable_wheels,
)
if wheel_file:
# Record the download origin in the cache
if req.download_info is not None:
# download_info is guaranteed to be set because when we build an
# InstallRequirement it has been through the preparer before, but
# let's be cautious.
wheel_cache.record_download_origin(cache_dir, req.download_info)
# Update the link for this.
req.link = Link(path_to_url(wheel_file))
req.local_file_path = req.link.file_path

View file

@ -1550,9 +1550,9 @@ def test_install_builds_wheels(script: PipTestEnvironment, data: TestData) -> No
)
# Must have installed it all
assert expected in str(res), str(res)
wheels = []
wheels: List[str] = []
for _, _, files in os.walk(wheels_cache):
wheels.extend(files)
wheels.extend(f for f in files if f.endswith(".whl"))
# and built wheels for upper and wheelbroken
assert "Building wheel for upper" in str(res), str(res)
assert "Building wheel for wheelb" in str(res), str(res)

View file

@ -5,13 +5,14 @@ import shutil
import sys
import tempfile
from functools import partial
from typing import Iterator, Tuple, cast
from typing import Iterator, Optional, Tuple, cast
from unittest import mock
import pytest
from pip._vendor.packaging.markers import Marker
from pip._vendor.packaging.requirements import Requirement
from pip._internal.cache import WheelCache
from pip._internal.commands import create_command
from pip._internal.commands.install import InstallCommand
from pip._internal.exceptions import (
@ -22,6 +23,9 @@ from pip._internal.exceptions import (
)
from pip._internal.index.package_finder import PackageFinder
from pip._internal.metadata import select_backend
from pip._internal.models.direct_url import ArchiveInfo, DirectUrl, DirInfo, VcsInfo
from pip._internal.models.format_control import FormatControl
from pip._internal.models.link import Link
from pip._internal.network.session import PipSession
from pip._internal.operations.build.build_tracker import get_build_tracker
from pip._internal.operations.prepare import RequirementPreparer
@ -42,7 +46,7 @@ from pip._internal.req.req_file import (
)
from pip._internal.resolution.legacy.resolver import Resolver
from pip._internal.utils.urls import path_to_url
from tests.lib import TestData, make_test_finder, requirements_file
from tests.lib import TestData, make_test_finder, requirements_file, wheel
from tests.lib.path import Path
@ -76,7 +80,10 @@ class TestRequirementSet:
@contextlib.contextmanager
def _basic_resolver(
self, finder: PackageFinder, require_hashes: bool = False
self,
finder: PackageFinder,
require_hashes: bool = False,
wheel_cache: Optional[WheelCache] = None,
) -> Iterator[Resolver]:
make_install_req = partial(
install_req_from_req_string,
@ -105,7 +112,7 @@ class TestRequirementSet:
preparer=preparer,
make_install_req=make_install_req,
finder=finder,
wheel_cache=None,
wheel_cache=wheel_cache,
use_user_site=False,
upgrade_strategy="to-satisfy-only",
ignore_dependencies=False,
@ -342,6 +349,161 @@ class TestRequirementSet:
)
)
def test_download_info_find_links(self, data: TestData) -> None:
"""Test that download_info is set for requirements via find_links."""
finder = make_test_finder(find_links=[data.find_links])
with self._basic_resolver(finder) as resolver:
ireq = get_processed_req_from_line("simple")
reqset = resolver.resolve([ireq], True)
assert len(reqset.all_requirements) == 1
req = reqset.all_requirements[0]
assert req.download_info
assert isinstance(req.download_info.info, ArchiveInfo)
assert req.download_info.info.hash
@pytest.mark.network
def test_download_info_index_url(self) -> None:
"""Test that download_info is set for requirements via index."""
finder = make_test_finder(index_urls=["https://pypi.org/simple"])
with self._basic_resolver(finder) as resolver:
ireq = get_processed_req_from_line("initools")
reqset = resolver.resolve([ireq], True)
assert len(reqset.all_requirements) == 1
req = reqset.all_requirements[0]
assert req.download_info
assert isinstance(req.download_info.info, ArchiveInfo)
@pytest.mark.network
def test_download_info_web_archive(self) -> None:
"""Test that download_info is set for requirements from a web archive."""
finder = make_test_finder()
with self._basic_resolver(finder) as resolver:
ireq = get_processed_req_from_line(
"pip-test-package @ "
"https://github.com/pypa/pip-test-package/tarball/0.1.1"
)
reqset = resolver.resolve([ireq], True)
assert len(reqset.all_requirements) == 1
req = reqset.all_requirements[0]
assert req.download_info
assert (
req.download_info.url
== "https://github.com/pypa/pip-test-package/tarball/0.1.1"
)
assert isinstance(req.download_info.info, ArchiveInfo)
assert (
req.download_info.info.hash == "sha256="
"ad977496000576e1b6c41f6449a9897087ce9da6db4f15b603fe8372af4bf3c6"
)
def test_download_info_archive_legacy_cache(
self, tmp_path: Path, shared_data: TestData
) -> None:
"""Test download_info hash is not set for an archive with legacy cache entry."""
url = path_to_url(shared_data.packages / "simple-1.0.tar.gz")
finder = make_test_finder()
wheel_cache = WheelCache(str(tmp_path / "cache"), FormatControl())
cache_entry_dir = wheel_cache.get_path_for_link(Link(url))
Path(cache_entry_dir).mkdir(parents=True)
wheel.make_wheel(name="simple", version="1.0").save_to_dir(cache_entry_dir)
with self._basic_resolver(finder, wheel_cache=wheel_cache) as resolver:
ireq = get_processed_req_from_line(f"simple @ {url}")
reqset = resolver.resolve([ireq], True)
assert len(reqset.all_requirements) == 1
req = reqset.all_requirements[0]
assert req.original_link_is_in_wheel_cache
assert req.download_info
assert req.download_info.url == url
assert isinstance(req.download_info.info, ArchiveInfo)
assert not req.download_info.info.hash
def test_download_info_archive_cache_with_origin(
self, tmp_path: Path, shared_data: TestData
) -> None:
"""Test download_info hash is set for a web archive with cache entry
that has origin.json."""
url = path_to_url(shared_data.packages / "simple-1.0.tar.gz")
hash = "sha256=ad977496000576e1b6c41f6449a9897087ce9da6db4f15b603fe8372af4bf3c6"
finder = make_test_finder()
wheel_cache = WheelCache(str(tmp_path / "cache"), FormatControl())
cache_entry_dir = wheel_cache.get_path_for_link(Link(url))
Path(cache_entry_dir).mkdir(parents=True)
Path(cache_entry_dir).joinpath("origin.json").write_text(
DirectUrl(url, ArchiveInfo(hash=hash)).to_json()
)
wheel.make_wheel(name="simple", version="1.0").save_to_dir(cache_entry_dir)
with self._basic_resolver(finder, wheel_cache=wheel_cache) as resolver:
ireq = get_processed_req_from_line(f"simple @ {url}")
reqset = resolver.resolve([ireq], True)
assert len(reqset.all_requirements) == 1
req = reqset.all_requirements[0]
assert req.original_link_is_in_wheel_cache
assert req.download_info
assert req.download_info.url == url
assert isinstance(req.download_info.info, ArchiveInfo)
assert req.download_info.info.hash == hash
def test_download_info_local_wheel(self, data: TestData) -> None:
"""Test that download_info is set for requirements from a local wheel."""
finder = make_test_finder()
with self._basic_resolver(finder) as resolver:
ireq = get_processed_req_from_line(
f"{data.packages}/simplewheel-1.0-py2.py3-none-any.whl"
)
reqset = resolver.resolve([ireq], True)
assert len(reqset.all_requirements) == 1
req = reqset.all_requirements[0]
assert req.download_info
assert req.download_info.url.startswith("file://")
assert isinstance(req.download_info.info, ArchiveInfo)
assert (
req.download_info.info.hash == "sha256="
"e63aa139caee941ec7f33f057a5b987708c2128238357cf905429846a2008718"
)
def test_download_info_local_dir(self, data: TestData) -> None:
"""Test that download_info is set for requirements from a local dir."""
finder = make_test_finder()
with self._basic_resolver(finder) as resolver:
ireq_url = path_to_url(data.packages / "FSPkg")
ireq = get_processed_req_from_line(f"FSPkg @ {ireq_url}")
reqset = resolver.resolve([ireq], True)
assert len(reqset.all_requirements) == 1
req = reqset.all_requirements[0]
assert req.download_info
assert req.download_info.url.startswith("file://")
assert isinstance(req.download_info.info, DirInfo)
def test_download_info_local_editable_dir(self, data: TestData) -> None:
"""Test that download_info is set for requirements from a local editable dir."""
finder = make_test_finder()
with self._basic_resolver(finder) as resolver:
ireq_url = path_to_url(data.packages / "FSPkg")
ireq = get_processed_req_from_line(f"-e {ireq_url}#egg=FSPkg")
reqset = resolver.resolve([ireq], True)
assert len(reqset.all_requirements) == 1
req = reqset.all_requirements[0]
assert req.download_info
assert req.download_info.url.startswith("file://")
assert isinstance(req.download_info.info, DirInfo)
assert req.download_info.info.editable
@pytest.mark.network
def test_download_info_vcs(self) -> None:
"""Test that download_info is set for requirements from git."""
finder = make_test_finder()
with self._basic_resolver(finder) as resolver:
ireq = get_processed_req_from_line(
"pip-test-package @ git+https://github.com/pypa/pip-test-package"
)
reqset = resolver.resolve([ireq], True)
assert len(reqset.all_requirements) == 1
req = reqset.all_requirements[0]
assert req.download_info
assert isinstance(req.download_info.info, VcsInfo)
assert req.download_info.url == "https://github.com/pypa/pip-test-package"
assert req.download_info.info.vcs == "git"
class TestInstallRequirement:
def setup(self) -> None: