Implement "lazy sequence" to avoid Internet

find_matches() is modified to return a special type that implements
the sequence protocol (instead of a plain list). This special sequence
type tries to use the installed candidate as the first element if
possible, and only access indexes when the installed candidate is
considered unsatisfactory.
This commit is contained in:
Tzu-ping Chung 2020-09-28 16:36:04 +08:00
parent 063f2ae74e
commit 8326148149
4 changed files with 179 additions and 110 deletions

View File

@ -1,7 +1,5 @@
import collections
import logging import logging
from pip._vendor import six
from pip._vendor.packaging.utils import canonicalize_name from pip._vendor.packaging.utils import canonicalize_name
from pip._internal.exceptions import ( from pip._internal.exceptions import (
@ -30,6 +28,7 @@ from .candidates import (
LinkCandidate, LinkCandidate,
RequiresPythonCandidate, RequiresPythonCandidate,
) )
from .found_candidates import FoundCandidates
from .requirements import ( from .requirements import (
ExplicitRequirement, ExplicitRequirement,
RequiresPythonRequirement, RequiresPythonRequirement,
@ -41,6 +40,7 @@ if MYPY_CHECK_RUNNING:
Dict, Dict,
FrozenSet, FrozenSet,
Iterable, Iterable,
Iterator,
List, List,
Optional, Optional,
Sequence, Sequence,
@ -98,15 +98,9 @@ class Factory(object):
self._editable_candidate_cache = {} # type: Cache[EditableCandidate] self._editable_candidate_cache = {} # type: Cache[EditableCandidate]
if not ignore_installed: if not ignore_installed:
packages = get_installed_distributions(
local_only=False,
include_editables=True,
editables_only=False,
user_only=False,
paths=None,
)
self._installed_dists = { self._installed_dists = {
canonicalize_name(p.key): p for p in packages canonicalize_name(dist.project_name): dist
for dist in get_installed_distributions(local_only=False)
} }
else: else:
self._installed_dists = {} self._installed_dists = {}
@ -160,6 +154,7 @@ class Factory(object):
ireqs, # type: Sequence[InstallRequirement] ireqs, # type: Sequence[InstallRequirement]
specifier, # type: SpecifierSet specifier, # type: SpecifierSet
hashes, # type: Hashes hashes, # type: Hashes
prefers_installed, # type: bool
): ):
# type: (...) -> Iterable[Candidate] # type: (...) -> Iterable[Candidate]
if not ireqs: if not ireqs:
@ -178,54 +173,49 @@ class Factory(object):
hashes &= ireq.hashes(trust_internet=False) hashes &= ireq.hashes(trust_internet=False)
extras |= frozenset(ireq.extras) extras |= frozenset(ireq.extras)
# We use this to ensure that we only yield a single candidate for
# each version (the finder's preferred one for that version). The
# requirement needs to return only one candidate per version, so we
# implement that logic here so that requirements using this helper
# don't all have to do the same thing later.
candidates = collections.OrderedDict() # type: VersionCandidates
# Get the installed version, if it matches, unless the user # Get the installed version, if it matches, unless the user
# specified `--force-reinstall`, when we want the version from # specified `--force-reinstall`, when we want the version from
# the index instead. # the index instead.
installed_version = None
installed_candidate = None installed_candidate = None
if not self._force_reinstall and name in self._installed_dists: if not self._force_reinstall and name in self._installed_dists:
installed_dist = self._installed_dists[name] installed_dist = self._installed_dists[name]
installed_version = installed_dist.parsed_version if specifier.contains(installed_dist.version, prereleases=True):
if specifier.contains(installed_version, prereleases=True):
installed_candidate = self._make_candidate_from_dist( installed_candidate = self._make_candidate_from_dist(
dist=installed_dist, dist=installed_dist,
extras=extras, extras=extras,
template=template, template=template,
) )
found = self._finder.find_best_candidate( def iter_index_candidates():
project_name=name, # type: () -> Iterator[Candidate]
specifier=specifier, result = self._finder.find_best_candidate(
hashes=hashes, project_name=name,
) specifier=specifier,
for ican in found.iter_applicable(): hashes=hashes,
if ican.version == installed_version and installed_candidate: )
candidate = installed_candidate # PackageFinder returns earlier versions first, so we reverse.
else: for ican in reversed(list(result.iter_applicable())):
candidate = self._make_candidate_from_link( yield self._make_candidate_from_link(
link=ican.link, link=ican.link,
extras=extras, extras=extras,
template=template, template=template,
name=name, name=name,
version=ican.version, version=ican.version,
) )
candidates[ican.version] = candidate
# Yield the installed version even if it is not found on the index. return FoundCandidates(
if installed_version and installed_candidate: iter_index_candidates,
candidates[installed_version] = installed_candidate installed_candidate,
prefers_installed,
)
return six.itervalues(candidates) def find_candidates(
self,
def find_candidates(self, requirements, constraint): requirements, # type: Sequence[Requirement]
# type: (Sequence[Requirement], Constraint) -> Iterable[Candidate] constraint, # type: Constraint
prefers_installed, # type: bool
):
# type: (...) -> Iterable[Candidate]
explicit_candidates = set() # type: Set[Candidate] explicit_candidates = set() # type: Set[Candidate]
ireqs = [] # type: List[InstallRequirement] ireqs = [] # type: List[InstallRequirement]
for req in requirements: for req in requirements:
@ -242,6 +232,7 @@ class Factory(object):
ireqs, ireqs,
constraint.specifier, constraint.specifier,
constraint.hashes, constraint.hashes,
prefers_installed,
) )
if constraint: if constraint:

View File

@ -0,0 +1,122 @@
from pip._vendor.six.moves import collections_abc
from pip._internal.utils.typing import MYPY_CHECK_RUNNING
if MYPY_CHECK_RUNNING:
from typing import Callable, Iterator, Optional, Set
from pip._vendor.packaging.version import _BaseVersion
from .base import Candidate
class _InstalledFirstCandidatesIterator(collections_abc.Iterator):
"""Iterator for ``FoundCandidates``.
This iterator is used when the resolver prefers to keep the version of an
already-installed package. The already-installed candidate is always
returned first. Candidates from index are accessed only when the resolver
wants them, and the already-installed version is excluded from them.
"""
def __init__(
self,
get_others, # type: Callable[[], Iterator[Candidate]]
installed, # type: Optional[Candidate]
):
self._installed = installed
self._get_others = get_others
self._others = None # type: Optional[Iterator[Candidate]]
self._returned = set() # type: Set[_BaseVersion]
def __next__(self):
# type: () -> Candidate
if self._installed and self._installed.version not in self._returned:
self._returned.add(self._installed.version)
return self._installed
if self._others is None:
self._others = self._get_others()
cand = next(self._others)
while cand.version in self._returned:
cand = next(self._others)
self._returned.add(cand.version)
return cand
next = __next__ # XXX: Python 2.
class _InstalledReplacesCandidatesIterator(collections_abc.Iterator):
"""Iterator for ``FoundCandidates``.
This iterator is used when the resolver prefers to upgrade an
already-installed package. Candidates from index are returned in their
normal ordering, except replaced when the version is already installed.
"""
def __init__(
self,
get_others, # type: Callable[[], Iterator[Candidate]]
installed, # type: Optional[Candidate]
):
self._installed = installed
self._get_others = get_others
self._others = None # type: Optional[Iterator[Candidate]]
self._returned = set() # type: Set[_BaseVersion]
def __next__(self):
# type: () -> Candidate
if self._others is None:
self._others = self._get_others()
cand = next(self._others)
while cand.version in self._returned:
cand = next(self._others)
if self._installed and cand.version == self._installed.version:
cand = self._installed
self._returned.add(cand.version)
return cand
next = __next__ # XXX: Python 2.
class FoundCandidates(collections_abc.Sequence):
"""A lazy sequence to provide candidates to the resolver.
The intended usage is to return this from `find_matches()` so the resolver
can iterate through the sequence multiple times, but only access the index
page when remote packages are actually needed. This improve performances
when suitable candidates are already installed on disk.
"""
def __init__(
self,
get_others, # type: Callable[[], Iterator[Candidate]]
installed, # type: Optional[Candidate]
prefers_installed, # type: bool
):
self._get_others = get_others
self._installed = installed
self._prefers_installed = prefers_installed
def __getitem__(self, index):
# type: (int) -> Candidate
for i, value in enumerate(self):
if index == i:
return value
raise IndexError(index)
def __iter__(self):
# type: () -> Iterator[Candidate]
if self._prefers_installed:
klass = _InstalledFirstCandidatesIterator
else:
klass = _InstalledReplacesCandidatesIterator
return klass(self._get_others, self._installed)
def __len__(self):
# type: () -> int
return sum(1 for _ in self)
def __bool__(self):
# type: () -> bool
if self._prefers_installed and self._installed:
return True
return any(self)
__nonzero__ = __bool__ # XXX: Python 2.

View File

@ -45,30 +45,26 @@ class PipProvider(AbstractProvider):
self._upgrade_strategy = upgrade_strategy self._upgrade_strategy = upgrade_strategy
self._user_requested = user_requested self._user_requested = user_requested
def _sort_matches(self, matches): def identify(self, dependency):
# type: (Iterable[Candidate]) -> Sequence[Candidate] # type: (Union[Requirement, Candidate]) -> str
return dependency.name
# The requirement is responsible for returning a sequence of potential def get_preference(
# candidates, one per version. The provider handles the logic of self,
# deciding the order in which these candidates should be passed to resolution, # type: Optional[Candidate]
# the resolver. candidates, # type: Sequence[Candidate]
information # type: Sequence[Tuple[Requirement, Candidate]]
):
# type: (...) -> Any
transitive = all(parent is not None for _, parent in information)
return (transitive, bool(candidates))
# The `matches` argument is a sequence of candidates, one per version, def find_matches(self, requirements):
# which are potential options to be installed. The requirement will # type: (Sequence[Requirement]) -> Iterable[Candidate]
# have already sorted out whether to give us an already-installed if not requirements:
# candidate or a version from PyPI (i.e., it will deal with options return []
# like --force-reinstall and --ignore-installed). name = requirements[0].name
# We now work out the correct order.
#
# 1. If no other considerations apply, later versions take priority.
# 2. An already installed distribution is preferred over any other,
# unless the user has requested an upgrade.
# Upgrades are allowed when:
# * The --upgrade flag is set, and
# - The project was specified on the command line, or
# - The project is a dependency and the "eager" upgrade strategy
# was requested.
def _eligible_for_upgrade(name): def _eligible_for_upgrade(name):
# type: (str) -> bool # type: (str) -> bool
"""Are upgrades allowed for this project? """Are upgrades allowed for this project?
@ -87,56 +83,11 @@ class PipProvider(AbstractProvider):
return (name in self._user_requested) return (name in self._user_requested)
return False return False
def sort_key(c): return self._factory.find_candidates(
# type: (Candidate) -> int requirements,
"""Return a sort key for the matches. constraint=self._constraints.get(name, Constraint.empty()),
prefers_installed=(not _eligible_for_upgrade(name)),
The highest priority should be given to installed candidates that
are not eligible for upgrade. We use the integer value in the first
part of the key to sort these before other candidates.
We only pull the installed candidate to the bottom (i.e. most
preferred), but otherwise keep the ordering returned by the
requirement. The requirement is responsible for returning a list
otherwise sorted for the resolver, taking account for versions
and binary preferences as specified by the user.
"""
if c.is_installed and not _eligible_for_upgrade(c.name):
return 1
return 0
return sorted(matches, key=sort_key)
def identify(self, dependency):
# type: (Union[Requirement, Candidate]) -> str
return dependency.name
def get_preference(
self,
resolution, # type: Optional[Candidate]
candidates, # type: Sequence[Candidate]
information # type: Sequence[Tuple[Requirement, Optional[Candidate]]]
):
# type: (...) -> Any
"""Return a sort key to determine what dependency to look next.
A smaller value makes a dependency higher priority. We put direct
(user-requested) dependencies first since they may contain useful
user-specified version ranges. Users tend to expect us to catch
problems in them early as well.
"""
transitive = all(parent is not None for _, parent in information)
return (transitive, len(candidates))
def find_matches(self, requirements):
# type: (Sequence[Requirement]) -> Iterable[Candidate]
if not requirements:
return []
constraint = self._constraints.get(
requirements[0].name, Constraint.empty(),
) )
candidates = self._factory.find_candidates(requirements, constraint)
return reversed(self._sort_matches(candidates))
def is_satisfied_by(self, requirement, candidate): def is_satisfied_by(self, requirement, candidate):
# type: (Requirement, Candidate) -> bool # type: (Requirement, Candidate) -> bool

View File

@ -58,7 +58,9 @@ def test_new_resolver_correct_number_of_matches(test_cases, factory):
"""Requirements should return the correct number of candidates""" """Requirements should return the correct number of candidates"""
for spec, _, match_count in test_cases: for spec, _, match_count in test_cases:
req = factory.make_requirement_from_spec(spec, comes_from=None) req = factory.make_requirement_from_spec(spec, comes_from=None)
matches = factory.find_candidates([req], Constraint.empty()) matches = factory.find_candidates(
[req], Constraint.empty(), prefers_installed=False,
)
assert len(list(matches)) == match_count assert len(list(matches)) == match_count
@ -67,7 +69,10 @@ def test_new_resolver_candidates_match_requirement(test_cases, factory):
""" """
for spec, _, _ in test_cases: for spec, _, _ in test_cases:
req = factory.make_requirement_from_spec(spec, comes_from=None) req = factory.make_requirement_from_spec(spec, comes_from=None)
for c in factory.find_candidates([req], Constraint.empty()): candidates = factory.find_candidates(
[req], Constraint.empty(), prefers_installed=False,
)
for c in candidates:
assert isinstance(c, Candidate) assert isinstance(c, Candidate)
assert req.is_satisfied_by(c) assert req.is_satisfied_by(c)