From 6a8956d7a876508d50851f77ea13a08c96aa17eb Mon Sep 17 00:00:00 2001 From: Pradyun Gedam <3275593+pradyunsg@users.noreply.github.com> Date: Tue, 13 Oct 2020 19:23:53 +0530 Subject: [PATCH] Merge pull request #8932 from uranusjr/new-resolver-lazy-sequence --- news/8023.feature.rst | 2 + .../resolution/resolvelib/factory.py | 59 ++++++----- .../resolution/resolvelib/found_candidates.py | 98 +++++++++++++++++++ .../resolution/resolvelib/provider.py | 91 ++++------------- tests/functional/test_new_resolver.py | 26 +++++ .../resolution_resolvelib/test_requirement.py | 11 ++- 6 files changed, 183 insertions(+), 104 deletions(-) create mode 100644 news/8023.feature.rst create mode 100644 src/pip/_internal/resolution/resolvelib/found_candidates.py diff --git a/news/8023.feature.rst b/news/8023.feature.rst new file mode 100644 index 000000000..c886e9a66 --- /dev/null +++ b/news/8023.feature.rst @@ -0,0 +1,2 @@ +New resolver: Avoid accessing indexes when the installed candidate is preferred +and considered good enough. diff --git a/src/pip/_internal/resolution/resolvelib/factory.py b/src/pip/_internal/resolution/resolvelib/factory.py index 172f054fa..8813ab038 100644 --- a/src/pip/_internal/resolution/resolvelib/factory.py +++ b/src/pip/_internal/resolution/resolvelib/factory.py @@ -1,7 +1,5 @@ -import collections import logging -from pip._vendor import six from pip._vendor.packaging.utils import canonicalize_name from pip._internal.exceptions import ( @@ -30,6 +28,7 @@ from .candidates import ( LinkCandidate, RequiresPythonCandidate, ) +from .found_candidates import FoundCandidates from .requirements import ( ExplicitRequirement, RequiresPythonRequirement, @@ -41,6 +40,7 @@ if MYPY_CHECK_RUNNING: FrozenSet, Dict, Iterable, + Iterator, List, Optional, Sequence, @@ -102,7 +102,7 @@ class Factory(object): if not ignore_installed: self._installed_dists = { canonicalize_name(dist.project_name): dist - for dist in get_installed_distributions() + for dist in get_installed_distributions(local_only=False) } else: self._installed_dists = {} @@ -156,6 +156,7 @@ class Factory(object): ireqs, # type: Sequence[InstallRequirement] specifier, # type: SpecifierSet hashes, # type: Hashes + prefers_installed, # type: bool ): # type: (...) -> Iterable[Candidate] if not ireqs: @@ -174,54 +175,49 @@ class Factory(object): hashes &= ireq.hashes(trust_internet=False) extras |= frozenset(ireq.extras) - # We use this to ensure that we only yield a single candidate for - # each version (the finder's preferred one for that version). The - # requirement needs to return only one candidate per version, so we - # implement that logic here so that requirements using this helper - # don't all have to do the same thing later. - candidates = collections.OrderedDict() # type: VersionCandidates - # Get the installed version, if it matches, unless the user # specified `--force-reinstall`, when we want the version from # the index instead. - installed_version = None installed_candidate = None if not self._force_reinstall and name in self._installed_dists: installed_dist = self._installed_dists[name] - installed_version = installed_dist.parsed_version - if specifier.contains(installed_version, prereleases=True): + if specifier.contains(installed_dist.version, prereleases=True): installed_candidate = self._make_candidate_from_dist( dist=installed_dist, extras=extras, template=template, ) - found = self._finder.find_best_candidate( - project_name=name, - specifier=specifier, - hashes=hashes, - ) - for ican in found.iter_applicable(): - if ican.version == installed_version and installed_candidate: - candidate = installed_candidate - else: - candidate = self._make_candidate_from_link( + def iter_index_candidates(): + # type: () -> Iterator[Candidate] + result = self._finder.find_best_candidate( + project_name=name, + specifier=specifier, + hashes=hashes, + ) + # PackageFinder returns earlier versions first, so we reverse. + for ican in reversed(list(result.iter_applicable())): + yield self._make_candidate_from_link( link=ican.link, extras=extras, template=template, name=name, version=ican.version, ) - candidates[ican.version] = candidate - # Yield the installed version even if it is not found on the index. - if installed_version and installed_candidate: - candidates[installed_version] = installed_candidate + return FoundCandidates( + iter_index_candidates, + installed_candidate, + prefers_installed, + ) - return six.itervalues(candidates) - - def find_candidates(self, requirements, constraint): - # type: (Sequence[Requirement], Constraint) -> Iterable[Candidate] + def find_candidates( + self, + requirements, # type: Sequence[Requirement] + constraint, # type: Constraint + prefers_installed, # type: bool + ): + # type: (...) -> Iterable[Candidate] explicit_candidates = set() # type: Set[Candidate] ireqs = [] # type: List[InstallRequirement] for req in requirements: @@ -238,6 +234,7 @@ class Factory(object): ireqs, constraint.specifier, constraint.hashes, + prefers_installed, ) if constraint: diff --git a/src/pip/_internal/resolution/resolvelib/found_candidates.py b/src/pip/_internal/resolution/resolvelib/found_candidates.py new file mode 100644 index 000000000..a669e8936 --- /dev/null +++ b/src/pip/_internal/resolution/resolvelib/found_candidates.py @@ -0,0 +1,98 @@ +import itertools +import operator + +from pip._vendor.six.moves import collections_abc # type: ignore + +from pip._internal.utils.compat import lru_cache +from pip._internal.utils.typing import MYPY_CHECK_RUNNING + +if MYPY_CHECK_RUNNING: + from typing import Callable, Iterator, Optional, Set + + from pip._vendor.packaging.version import _BaseVersion + + from .base import Candidate + + +def _deduplicated_by_version(candidates): + # type: (Iterator[Candidate]) -> Iterator[Candidate] + returned = set() # type: Set[_BaseVersion] + for candidate in candidates: + if candidate.version in returned: + continue + returned.add(candidate.version) + yield candidate + + +def _insert_installed(installed, others): + # type: (Candidate, Iterator[Candidate]) -> Iterator[Candidate] + """Iterator for ``FoundCandidates``. + + This iterator is used when the resolver prefers to upgrade an + already-installed package. Candidates from index are returned in their + normal ordering, except replaced when the version is already installed. + + Since candidates from index are already sorted by reverse version order, + `sorted()` here would keep the ordering mostly intact, only shuffling the + already-installed candidate into the correct position. We put the already- + installed candidate in front of those from the index, so it's put in front + after sorting due to Python sorting's stableness guarentee. + """ + candidates = sorted( + itertools.chain([installed], others), + key=operator.attrgetter("version"), + reverse=True, + ) + return iter(candidates) + + +class FoundCandidates(collections_abc.Sequence): + """A lazy sequence to provide candidates to the resolver. + + The intended usage is to return this from `find_matches()` so the resolver + can iterate through the sequence multiple times, but only access the index + page when remote packages are actually needed. This improve performances + when suitable candidates are already installed on disk. + """ + def __init__( + self, + get_others, # type: Callable[[], Iterator[Candidate]] + installed, # type: Optional[Candidate] + prefers_installed, # type: bool + ): + self._get_others = get_others + self._installed = installed + self._prefers_installed = prefers_installed + + def __getitem__(self, index): + # type: (int) -> Candidate + # Implemented to satisfy the ABC check. This is not needed by the + # resolver, and should not be used by the provider either (for + # performance reasons). + raise NotImplementedError("don't do this") + + def __iter__(self): + # type: () -> Iterator[Candidate] + if not self._installed: + candidates = self._get_others() + elif self._prefers_installed: + candidates = itertools.chain([self._installed], self._get_others()) + else: + candidates = _insert_installed(self._installed, self._get_others()) + return _deduplicated_by_version(candidates) + + def __len__(self): + # type: () -> int + # Implemented to satisfy the ABC check. This is not needed by the + # resolver, and should not be used by the provider either (for + # performance reasons). + raise NotImplementedError("don't do this") + + @lru_cache(maxsize=1) + def __bool__(self): + # type: () -> bool + if self._prefers_installed and self._installed: + return True + return any(self) + + __nonzero__ = __bool__ # XXX: Python 2. diff --git a/src/pip/_internal/resolution/resolvelib/provider.py b/src/pip/_internal/resolution/resolvelib/provider.py index 19c4d543f..7d679e5fe 100644 --- a/src/pip/_internal/resolution/resolvelib/provider.py +++ b/src/pip/_internal/resolution/resolvelib/provider.py @@ -54,30 +54,26 @@ class PipProvider(AbstractProvider): self._upgrade_strategy = upgrade_strategy self._user_requested = user_requested - def _sort_matches(self, matches): - # type: (Iterable[Candidate]) -> Sequence[Candidate] + def identify(self, dependency): + # type: (Union[Requirement, Candidate]) -> str + return dependency.name - # The requirement is responsible for returning a sequence of potential - # candidates, one per version. The provider handles the logic of - # deciding the order in which these candidates should be passed to - # the resolver. + def get_preference( + self, + resolution, # type: Optional[Candidate] + candidates, # type: Sequence[Candidate] + information # type: Sequence[Tuple[Requirement, Candidate]] + ): + # type: (...) -> Any + transitive = all(parent is not None for _, parent in information) + return (transitive, bool(candidates)) - # The `matches` argument is a sequence of candidates, one per version, - # which are potential options to be installed. The requirement will - # have already sorted out whether to give us an already-installed - # candidate or a version from PyPI (i.e., it will deal with options - # like --force-reinstall and --ignore-installed). + def find_matches(self, requirements): + # type: (Sequence[Requirement]) -> Iterable[Candidate] + if not requirements: + return [] + name = requirements[0].name - # We now work out the correct order. - # - # 1. If no other considerations apply, later versions take priority. - # 2. An already installed distribution is preferred over any other, - # unless the user has requested an upgrade. - # Upgrades are allowed when: - # * The --upgrade flag is set, and - # - The project was specified on the command line, or - # - The project is a dependency and the "eager" upgrade strategy - # was requested. def _eligible_for_upgrade(name): # type: (str) -> bool """Are upgrades allowed for this project? @@ -96,56 +92,11 @@ class PipProvider(AbstractProvider): return (name in self._user_requested) return False - def sort_key(c): - # type: (Candidate) -> int - """Return a sort key for the matches. - - The highest priority should be given to installed candidates that - are not eligible for upgrade. We use the integer value in the first - part of the key to sort these before other candidates. - - We only pull the installed candidate to the bottom (i.e. most - preferred), but otherwise keep the ordering returned by the - requirement. The requirement is responsible for returning a list - otherwise sorted for the resolver, taking account for versions - and binary preferences as specified by the user. - """ - if c.is_installed and not _eligible_for_upgrade(c.name): - return 1 - return 0 - - return sorted(matches, key=sort_key) - - def identify(self, dependency): - # type: (Union[Requirement, Candidate]) -> str - return dependency.name - - def get_preference( - self, - resolution, # type: Optional[Candidate] - candidates, # type: Sequence[Candidate] - information # type: Sequence[Tuple[Requirement, Optional[Candidate]]] - ): - # type: (...) -> Any - """Return a sort key to determine what dependency to look next. - - A smaller value makes a dependency higher priority. We put direct - (user-requested) dependencies first since they may contain useful - user-specified version ranges. Users tend to expect us to catch - problems in them early as well. - """ - transitive = all(parent is not None for _, parent in information) - return (transitive, len(candidates)) - - def find_matches(self, requirements): - # type: (Sequence[Requirement]) -> Iterable[Candidate] - if not requirements: - return [] - constraint = self._constraints.get( - requirements[0].name, Constraint.empty(), + return self._factory.find_candidates( + requirements, + constraint=self._constraints.get(name, Constraint.empty()), + prefers_installed=(not _eligible_for_upgrade(name)), ) - candidates = self._factory.find_candidates(requirements, constraint) - return reversed(self._sort_matches(candidates)) def is_satisfied_by(self, requirement, candidate): # type: (Requirement, Candidate) -> bool diff --git a/tests/functional/test_new_resolver.py b/tests/functional/test_new_resolver.py index 1dab8d470..1718ab8a8 100644 --- a/tests/functional/test_new_resolver.py +++ b/tests/functional/test_new_resolver.py @@ -1020,3 +1020,29 @@ def test_new_resolver_no_deps_checks_requires_python(script): "{}.{}.{} not in '<2'".format(*sys.version_info[:3]) ) assert message in result.stderr + + +def test_new_resolver_prefers_installed_in_upgrade_if_latest(script): + create_basic_wheel_for_package(script, "pkg", "1") + local_pkg = create_test_package_with_setup(script, name="pkg", version="2") + + # Install the version that's not on the index. + script.pip( + "install", + "--use-feature=2020-resolver", + "--no-cache-dir", + "--no-index", + local_pkg, + ) + + # Now --upgrade should still pick the local version because it's "better". + script.pip( + "install", + "--use-feature=2020-resolver", + "--no-cache-dir", + "--no-index", + "--find-links", script.scratch_path, + "--upgrade", + "pkg", + ) + assert_installed(script, pkg="2") diff --git a/tests/unit/resolution_resolvelib/test_requirement.py b/tests/unit/resolution_resolvelib/test_requirement.py index a03edb6f7..6149fd1ae 100644 --- a/tests/unit/resolution_resolvelib/test_requirement.py +++ b/tests/unit/resolution_resolvelib/test_requirement.py @@ -58,8 +58,10 @@ def test_new_resolver_correct_number_of_matches(test_cases, factory): """Requirements should return the correct number of candidates""" for spec, _, match_count in test_cases: req = factory.make_requirement_from_spec(spec, comes_from=None) - matches = factory.find_candidates([req], Constraint.empty()) - assert len(list(matches)) == match_count + matches = factory.find_candidates( + [req], Constraint.empty(), prefers_installed=False, + ) + assert sum(1 for _ in matches) == match_count def test_new_resolver_candidates_match_requirement(test_cases, factory): @@ -67,7 +69,10 @@ def test_new_resolver_candidates_match_requirement(test_cases, factory): """ for spec, _, _ in test_cases: req = factory.make_requirement_from_spec(spec, comes_from=None) - for c in factory.find_candidates([req], Constraint.empty()): + candidates = factory.find_candidates( + [req], Constraint.empty(), prefers_installed=False, + ) + for c in candidates: assert isinstance(c, Candidate) assert req.is_satisfied_by(c)