This commit is contained in:
Joseph Bylund 2023-11-29 21:11:30 +08:00 committed by GitHub
commit d000dab6e6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 76 additions and 2 deletions

1
news/10480.feature.rst Normal file
View File

@ -0,0 +1 @@
Parallelize network requests when finding package candidates for installation.

View File

@ -2,7 +2,8 @@ import contextlib
import functools
import logging
import os
from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, cast
from multiprocessing.pool import ThreadPool
from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Set, Tuple, cast
from pip._vendor.packaging.utils import canonicalize_name
from pip._vendor.resolvelib import BaseReporter, ResolutionImpossible
@ -69,6 +70,7 @@ class Resolver(BaseResolver):
self.ignore_dependencies = ignore_dependencies
self.upgrade_strategy = upgrade_strategy
self._result: Optional[Result] = None
self._finder = finder
def resolve(
self, root_reqs: List[InstallRequirement], check_supported_wheels: bool
@ -90,6 +92,8 @@ class Resolver(BaseResolver):
reporter,
)
self._prime_finder_cache(provider.identify(r) for r in collected.requirements)
try:
limit_how_complex_resolution_can_be = 200000
result = self._result = resolver.resolve(
@ -182,6 +186,17 @@ class Resolver(BaseResolver):
req.needs_more_preparation = False
return req_set
def _prime_finder_cache(self, project_names: Iterable[str]) -> None:
"""Populate finder's find_all_candidates cache
Pre-emptively call the finder's find_all_candidates for each project
in parallel in order to avoid later blocking on network requests during
resolution.
"""
with ThreadPool() as tp:
for _ in tp.imap_unordered(self._finder.find_all_candidates, project_names):
pass
def get_installation_order(
self, req_set: RequirementSet
) -> List[InstallRequirement]:

View File

@ -2,12 +2,14 @@ from typing import Dict, List, Optional, Set, Tuple, cast
from unittest import mock
import pytest
from pip._vendor.packaging.requirements import Requirement
from pip._vendor.packaging.utils import canonicalize_name
from pip._vendor.resolvelib.resolvers import Result
from pip._vendor.resolvelib.structs import DirectedGraph
from pip._internal.index.package_finder import PackageFinder
from pip._internal.operations.prepare import RequirementPreparer
from pip._internal.req import InstallRequirement
from pip._internal.req.constructors import install_req_from_line
from pip._internal.req.req_set import RequirementSet
from pip._internal.resolution.resolvelib.resolver import (
@ -30,6 +32,7 @@ def resolver(preparer: RequirementPreparer, finder: PackageFinder) -> Resolver:
force_reinstall=False,
upgrade_strategy="to-satisfy-only",
)
finder.find_all_candidates.cache_clear()
return resolver
@ -297,3 +300,30 @@ def test_new_resolver_topological_weights(
weights = get_topological_weights(graph, requirement_keys)
assert weights == expected_weights
def test_resolver_cache_population(resolver: Resolver) -> None:
def get_findall_cacheinfo() -> Dict[str, int]:
cacheinfo = resolver._finder.find_all_candidates.cache_info()
return {k: getattr(cacheinfo, k) for k in ["currsize", "hits", "misses"]}
# empty before any calls
assert get_findall_cacheinfo() == {"currsize": 0, "hits": 0, "misses": 0}
# prime the cache, observe no hits, and size 1
resolver._prime_finder_cache(["simple"])
assert get_findall_cacheinfo() == {"currsize": 1, "hits": 0, "misses": 1}
# reset the cache
resolver._finder.find_all_candidates.cache_clear()
# resolve
simple_req = InstallRequirement(
req=Requirement("simple==3.0"),
comes_from=None,
)
resolver.resolve([simple_req], True)
# if this is 1-1-1, that means the priming populated the cache, and the
# resolution made a cache hit
assert get_findall_cacheinfo() == {"currsize": 1, "hits": 1, "misses": 1}

View File

@ -1,5 +1,5 @@
import logging
from typing import Iterable
from typing import Dict, Iterable
from unittest.mock import Mock, patch
import pytest
@ -566,3 +566,31 @@ def test_find_all_candidates_find_links_and_index(data: TestData) -> None:
versions = finder.find_all_candidates("simple")
# first the find-links versions then the page versions
assert [str(v.version) for v in versions] == ["3.0", "2.0", "1.0", "1.0"]
def test_finder_caching(data: TestData) -> None:
# This is not required for the behavior of the finder itself, but we exploit
# the implementation of the finder's find_best_candidate consuming it's own
# find_all_candidates cache to pre-populate the cache before the resolution process
# starts
finder = make_test_finder(
find_links=[data.find_links],
index_urls=[data.index_url("simple")],
)
finder.find_all_candidates.cache_clear()
def get_findall_cacheinfo() -> Dict[str, int]:
cacheinfo = finder.find_all_candidates.cache_info()
return {k: getattr(cacheinfo, k) for k in ["currsize", "hits", "misses"]}
# empty before any calls
assert get_findall_cacheinfo() == {"currsize": 0, "hits": 0, "misses": 0}
# first findall is a miss
finder.find_all_candidates("simple")
assert get_findall_cacheinfo() == {"currsize": 1, "hits": 0, "misses": 1}
# find best following a find all is a hit
finder.find_best_candidate("simple")
assert get_findall_cacheinfo() == {"currsize": 1, "hits": 1, "misses": 1}