1
1
Fork 0
mirror of https://github.com/pypa/pip synced 2023-12-13 21:30:23 +01:00

Pre-warm find_all_candidates cache in finder

This commit is contained in:
Joe Bylund 2023-05-03 18:56:34 -04:00 committed by Joseph Bylund
parent 07049fe1a6
commit 37bd451db8
4 changed files with 86 additions and 2 deletions

1
news/10480.feature.rst Normal file
View file

@ -0,0 +1 @@
Parallelize network requests when finding package candidates for installation.

View file

@ -1,7 +1,7 @@
import functools
import logging
import os
from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, cast
from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Set, Tuple, cast
from pip._vendor.packaging.utils import canonicalize_name
from pip._vendor.resolvelib import BaseReporter, ResolutionImpossible
@ -19,6 +19,7 @@ from pip._internal.resolution.resolvelib.reporter import (
PipDebuggingReporter,
PipReporter,
)
from pip._internal.utils.parallel import LACK_SEM_OPEN, map_multithread
from .base import Candidate, Requirement
from .factory import Factory
@ -66,6 +67,7 @@ class Resolver(BaseResolver):
self.ignore_dependencies = ignore_dependencies
self.upgrade_strategy = upgrade_strategy
self._result: Optional[Result] = None
self._finder = finder
def resolve(
self, root_reqs: List[InstallRequirement], check_supported_wheels: bool
@ -87,6 +89,8 @@ class Resolver(BaseResolver):
reporter,
)
self._prime_finder_cache(provider.identify(r) for r in collected.requirements)
try:
limit_how_complex_resolution_can_be = 200000
result = self._result = resolver.resolve(
@ -164,6 +168,25 @@ class Resolver(BaseResolver):
req.needs_more_preparation = False
return req_set
def _prime_finder_cache(self, project_names: Iterable[str]) -> None:
"""Populate finder's find_all_candidates cache
Pre-emptively call the finder's find_all_candidates for each project
in parallel in order to avoid later blocking on network requests during
resolution.
"""
if LACK_SEM_OPEN:
return
def _maybe_find_candidates(project_name: str) -> None:
try:
self._finder.find_all_candidates(project_name)
except AttributeError:
pass
for _ in map_multithread(_maybe_find_candidates, project_names):
pass
def get_installation_order(
self, req_set: RequirementSet
) -> List[InstallRequirement]:

View file

@ -2,12 +2,14 @@ from typing import Dict, List, Optional, Set, Tuple, cast
from unittest import mock
import pytest
from pip._vendor.packaging.requirements import Requirement
from pip._vendor.packaging.utils import canonicalize_name
from pip._vendor.resolvelib.resolvers import Result
from pip._vendor.resolvelib.structs import DirectedGraph
from pip._internal.index.package_finder import PackageFinder
from pip._internal.operations.prepare import RequirementPreparer
from pip._internal.req import InstallRequirement
from pip._internal.req.constructors import install_req_from_line
from pip._internal.req.req_set import RequirementSet
from pip._internal.resolution.resolvelib.resolver import (
@ -297,3 +299,32 @@ def test_new_resolver_topological_weights(
weights = get_topological_weights(graph, requirement_keys)
assert weights == expected_weights
def test_resolver_cache_population(resolver: Resolver) -> None:
resolver._finder.find_all_candidates.cache_clear()
def get_findall_cacheinfo() -> Dict[str, int]:
cacheinfo = resolver._finder.find_all_candidates.cache_info()
return {k: getattr(cacheinfo, k) for k in ["currsize", "hits", "misses"]}
# empty before any calls
assert get_findall_cacheinfo() == {"currsize": 0, "hits": 0, "misses": 0}
# prime the cache, observe no hits, and size 1
resolver._prime_finder_cache(["simple"])
assert get_findall_cacheinfo() == {"currsize": 1, "hits": 0, "misses": 1}
# reset the cache
resolver._finder.find_all_candidates.cache_clear()
# resolve
simple_req = InstallRequirement(
req=Requirement("simple==3.0"),
comes_from=None,
)
resolver.resolve([simple_req], True)
# if this is 1-1-1, that means the priming populated the cache, and the
# resolution made a cache hit
assert get_findall_cacheinfo() == {"currsize": 1, "hits": 1, "misses": 1}

View file

@ -1,5 +1,5 @@
import logging
from typing import Iterable
from typing import Dict, Iterable
from unittest.mock import Mock, patch
import pytest
@ -566,3 +566,32 @@ def test_find_all_candidates_find_links_and_index(data: TestData) -> None:
versions = finder.find_all_candidates("simple")
# first the find-links versions then the page versions
assert [str(v.version) for v in versions] == ["3.0", "2.0", "1.0", "1.0"]
def test_finder_caching(data: TestData) -> None:
# This is not required for the behavior of the finder itself, but we exploit
# the implementation of the finder's find_best_candidate consuming it's own
# find_all_candidates cache to pre-populate the cache before the resolution process
# starts
finder = make_test_finder(
find_links=[data.find_links],
index_urls=[data.index_url("simple")],
)
finder.find_all_candidates.cache_clear()
def get_findall_cacheinfo() -> Dict[str, int]:
cacheinfo = finder.find_all_candidates.cache_info()
return {k: getattr(cacheinfo, k) for k in ["currsize", "hits", "misses"]}
# empty before any calls
assert get_findall_cacheinfo() == {"currsize": 0, "hits": 0, "misses": 0}
# first findall is a miss
finder.find_all_candidates("simple")
assert get_findall_cacheinfo() == {"currsize": 1, "hits": 0, "misses": 1}
# find best following a find all is a hit
finder.find_best_candidate("simple")
assert get_findall_cacheinfo() == {"currsize": 1, "hits": 1, "misses": 1}