1
1
Fork 0
mirror of https://github.com/pypa/pip synced 2023-12-13 21:30:23 +01:00

reuse should_cache(req) logic

This commit is contained in:
Danny McClanahan 2023-09-03 09:12:36 -04:00
parent 0f20f629f3
commit 7591507dfe
No known key found for this signature in database
GPG key ID: CE8D0DA71DEFC1DF
6 changed files with 101 additions and 111 deletions

View file

@ -6,6 +6,7 @@ import hashlib
import json
import logging
import os
import re
from pathlib import Path
from typing import Any, Dict, List, Optional
@ -16,14 +17,61 @@ from pip._internal.exceptions import InvalidWheelFilename
from pip._internal.models.direct_url import DirectUrl
from pip._internal.models.link import Link
from pip._internal.models.wheel import Wheel
from pip._internal.req.req_install import InstallRequirement
from pip._internal.utils.temp_dir import TempDirectory, tempdir_kinds
from pip._internal.utils.urls import path_to_url
from pip._internal.vcs import vcs
logger = logging.getLogger(__name__)
_egg_info_re = re.compile(r"([a-z0-9_.]+)-([a-z0-9_.!+-]+)", re.IGNORECASE)
ORIGIN_JSON_NAME = "origin.json"
def _contains_egg_info(s: str) -> bool:
"""Determine whether the string looks like an egg_info.
:param s: The string to parse. E.g. foo-2.1
"""
return bool(_egg_info_re.search(s))
def should_cache(
req: InstallRequirement,
) -> bool:
"""
Return whether a built InstallRequirement can be stored in the persistent
wheel cache, assuming the wheel cache is available, and _should_build()
has determined a wheel needs to be built.
"""
if not req.link:
return False
if req.editable or (not req.source_dir and not req.link.is_wheel):
# never cache editable requirements
return False
if req.link and req.link.is_vcs:
# VCS checkout. Do not cache
# unless it points to an immutable commit hash.
assert not req.editable
assert req.source_dir
vcs_backend = vcs.get_backend_for_scheme(req.link.scheme)
assert vcs_backend
if vcs_backend.is_immutable_rev_checkout(req.link.url, req.source_dir):
return True
return False
assert req.link
base, ext = req.link.splitext()
if _contains_egg_info(base):
return True
# Otherwise, do not cache.
return False
def _hash_dict(d: Dict[str, str]) -> str:
"""Return a stable sha224 of a dictionary."""
s = json.dumps(d, sort_keys=True, separators=(",", ":"), ensure_ascii=True)
@ -244,6 +292,15 @@ class WheelCache(WheelCacheBase):
def get_ephem_path_for_link(self, link: Link) -> str:
return self._ephem_cache.get_path_for_link(link)
def resolve_cache_dir(self, req: InstallRequirement) -> str:
"""Return the persistent or temporary cache directory where the built or
downloaded wheel should be stored."""
cache_available = bool(self.cache_dir)
assert req.link, req
if cache_available and should_cache(req):
return self.get_path_for_link(req.link)
return self.get_ephem_path_for_link(req.link)
def get(
self,
link: Link,

View file

@ -113,7 +113,7 @@ def _get_http_response_filename(resp: Response, link: Link) -> str:
def _http_get_download(session: PipSession, link: Link) -> Response:
target_url = link.url.split("#", 1)[0]
target_url = link.url_without_fragment
resp = session.get(target_url, headers=HEADERS, stream=True)
raise_for_status(resp)
return resp

View file

@ -14,9 +14,8 @@ from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional
from pip._vendor.packaging.utils import canonicalize_name
from pip._vendor.requests.exceptions import InvalidSchema
from pip._internal.cache import LinkMetadataCache
from pip._internal.cache import LinkMetadataCache, should_cache
from pip._internal.distributions import make_distribution_for_install_requirement
from pip._internal.exceptions import (
DirectoryUrlHashUnsupported,
@ -416,7 +415,7 @@ class RequirementPreparer:
)
return None
cached_metadata = self._fetch_cached_metadata(req.link)
cached_metadata = self._fetch_cached_metadata(req)
if cached_metadata is not None:
return cached_metadata
@ -425,21 +424,22 @@ class RequirementPreparer:
) or self._fetch_metadata_using_lazy_wheel(req.link)
# Populate the metadata cache.
if computed_metadata is not None:
self._cache_metadata(req.link, computed_metadata)
self._cache_metadata(req, computed_metadata)
return computed_metadata
def _fetch_cached_metadata(
self,
link: Link,
self, req: InstallRequirement
) -> Optional[BaseDistribution]:
if self._metadata_cache is None:
return None
if not should_cache(req):
return None
try:
cached_path = self._metadata_cache.cache_path(link)
cached_path = self._metadata_cache.cache_path(req.link)
os.makedirs(str(cached_path.parent), exist_ok=True)
with cached_path.open("rb") as f:
logger.debug(
"found cached metadata for link %s at %s", link.url, f.name
"found cached metadata for link %s at %s", req.link, f.name
)
args = json.load(f)
cached_dist = CacheableDist.from_json(args)
@ -447,7 +447,7 @@ class RequirementPreparer:
except (OSError, json.JSONDecodeError, KeyError) as e:
logger.debug(
"no cached metadata for link %s at %s %s(%s)",
link.url,
req.link,
cached_path,
e.__class__.__name__,
str(e),
@ -456,24 +456,26 @@ class RequirementPreparer:
def _cache_metadata(
self,
link: Link,
req: InstallRequirement,
metadata_dist: BaseDistribution,
) -> None:
if self._metadata_cache is None:
return
if not should_cache(req):
return
try:
cached_path = self._metadata_cache.cache_path(link)
cached_path = self._metadata_cache.cache_path(req.link)
os.makedirs(str(cached_path.parent), exist_ok=True)
with cached_path.open("w") as f:
cacheable_dist = CacheableDist.from_dist(link, metadata_dist)
cacheable_dist = CacheableDist.from_dist(req.link, metadata_dist)
args = cacheable_dist.to_json()
logger.debug("caching metadata for link %s at %s", link.url, f.name)
logger.debug("caching metadata for link %s at %s", req.link, f.name)
json.dump(args, f)
except (OSError, email.errors.HeaderParseError) as e:
logger.debug(
"could not cache metadata for dist %s from %s: %s(%s)",
metadata_dist,
link,
req.link,
e.__class__.__name__,
str(e),
)
@ -564,17 +566,7 @@ class RequirementPreparer:
links_to_fully_download: Dict[Link, InstallRequirement] = {}
for req in partially_downloaded_reqs:
assert req.link
# If this is e.g. a git url, we don't know how to handle that in the
# BatchDownloader, so leave it for self._prepare_linked_requirement() at the
# end of this method, which knows how to handle any URL.
can_simply_download = True
try:
# This will raise InvalidSchema if our Session can't download it.
self._session.get_adapter(req.link.url)
except InvalidSchema:
can_simply_download = False
if can_simply_download:
links_to_fully_download[req.link] = req
links_to_fully_download[req.link] = req
batch_download = self._batch_download(
links_to_fully_download.keys(),
@ -642,7 +634,7 @@ class RequirementPreparer:
# None of the optimizations worked, fully prepare the requirement
result = self._prepare_linked_requirement(req, parallel_builds)
# Cache the metadata for later.
self._cache_metadata(req.link, result)
self._cache_metadata(req, result)
return result
def _ensure_download_info(self, reqs: Iterable[InstallRequirement]) -> None:

View file

@ -3,7 +3,6 @@
import logging
import os.path
import re
import shutil
from typing import Iterable, List, Optional, Tuple
@ -25,23 +24,12 @@ from pip._internal.utils.setuptools_build import make_setuptools_clean_args
from pip._internal.utils.subprocess import call_subprocess
from pip._internal.utils.temp_dir import TempDirectory
from pip._internal.utils.urls import path_to_url
from pip._internal.vcs import vcs
logger = logging.getLogger(__name__)
_egg_info_re = re.compile(r"([a-z0-9_.]+)-([a-z0-9_.!+-]+)", re.IGNORECASE)
BuildResult = Tuple[List[InstallRequirement], List[InstallRequirement]]
def _contains_egg_info(s: str) -> bool:
"""Determine whether the string looks like an egg_info.
:param s: The string to parse. E.g. foo-2.1
"""
return bool(_egg_info_re.search(s))
def _should_build(
req: InstallRequirement,
need_wheel: bool,
@ -87,54 +75,6 @@ def should_build_for_install_command(
return _should_build(req, need_wheel=False)
def _should_cache(
req: InstallRequirement,
) -> Optional[bool]:
"""
Return whether a built InstallRequirement can be stored in the persistent
wheel cache, assuming the wheel cache is available, and _should_build()
has determined a wheel needs to be built.
"""
if req.editable or not req.source_dir:
# never cache editable requirements
return False
if req.link and req.link.is_vcs:
# VCS checkout. Do not cache
# unless it points to an immutable commit hash.
assert not req.editable
assert req.source_dir
vcs_backend = vcs.get_backend_for_scheme(req.link.scheme)
assert vcs_backend
if vcs_backend.is_immutable_rev_checkout(req.link.url, req.source_dir):
return True
return False
assert req.link
base, ext = req.link.splitext()
if _contains_egg_info(base):
return True
# Otherwise, do not cache.
return False
def _get_cache_dir(
req: InstallRequirement,
wheel_cache: WheelCache,
) -> str:
"""Return the persistent or temporary cache directory where the built
wheel need to be stored.
"""
cache_available = bool(wheel_cache.cache_dir)
assert req.link
if cache_available and _should_cache(req):
cache_dir = wheel_cache.get_path_for_link(req.link)
else:
cache_dir = wheel_cache.get_ephem_path_for_link(req.link)
return cache_dir
def _verify_one(req: InstallRequirement, wheel_path: str) -> None:
canonical_name = canonicalize_name(req.name or "")
w = Wheel(os.path.basename(wheel_path))
@ -316,7 +256,7 @@ def build(
build_successes, build_failures = [], []
for req in requirements:
assert req.name
cache_dir = _get_cache_dir(req, wheel_cache)
cache_dir = wheel_cache.resolve_cache_dir(req)
wheel_file = _build_one(
req,
cache_dir,

View file

@ -1,13 +1,32 @@
import os
from pathlib import Path
import pytest
from pip._vendor.packaging.tags import Tag, interpreter_name, interpreter_version
from pip._internal.cache import WheelCache, _hash_dict
from pip._internal.cache import WheelCache, _contains_egg_info, _hash_dict
from pip._internal.models.link import Link
from pip._internal.utils.misc import ensure_dir
@pytest.mark.parametrize(
"s, expected",
[
# Trivial.
("pip-18.0", True),
# Ambiguous.
("foo-2-2", True),
("im-valid", True),
# Invalid.
("invalid", False),
("im_invalid", False),
],
)
def test_contains_egg_info(s: str, expected: bool) -> None:
result = _contains_egg_info(s)
assert result == expected
def test_falsey_path_none() -> None:
wc = WheelCache("")
assert wc.cache_dir is None

View file

@ -5,7 +5,7 @@ from typing import Optional, cast
import pytest
from pip._internal import wheel_builder
from pip._internal import cache, wheel_builder
from pip._internal.models.link import Link
from pip._internal.operations.build.wheel_legacy import format_command_result
from pip._internal.req.req_install import InstallRequirement
@ -13,24 +13,6 @@ from pip._internal.vcs.git import Git
from tests.lib import _create_test_package
@pytest.mark.parametrize(
"s, expected",
[
# Trivial.
("pip-18.0", True),
# Ambiguous.
("foo-2-2", True),
("im-valid", True),
# Invalid.
("invalid", False),
("im_invalid", False),
],
)
def test_contains_egg_info(s: str, expected: bool) -> None:
result = wheel_builder._contains_egg_info(s)
assert result == expected
class ReqMock:
def __init__(
self,
@ -128,7 +110,7 @@ def test_should_build_for_wheel_command(req: ReqMock, expected: bool) -> None:
],
)
def test_should_cache(req: ReqMock, expected: bool) -> None:
assert wheel_builder._should_cache(cast(InstallRequirement, req)) is expected
assert cache.should_cache(cast(InstallRequirement, req)) is expected
def test_should_cache_git_sha(tmpdir: Path) -> None:
@ -138,12 +120,12 @@ def test_should_cache_git_sha(tmpdir: Path) -> None:
# a link referencing a sha should be cached
url = "git+https://g.c/o/r@" + commit + "#egg=mypkg"
req = ReqMock(link=Link(url), source_dir=repo_path)
assert wheel_builder._should_cache(cast(InstallRequirement, req))
assert cache.should_cache(cast(InstallRequirement, req))
# a link not referencing a sha should not be cached
url = "git+https://g.c/o/r@master#egg=mypkg"
req = ReqMock(link=Link(url), source_dir=repo_path)
assert not wheel_builder._should_cache(cast(InstallRequirement, req))
assert not cache.should_cache(cast(InstallRequirement, req))
def test_format_command_result__INFO(caplog: pytest.LogCaptureFixture) -> None: