Improve Performance of Picking Best Candidate from Indexes

Use a mapping for random lookup instead of list traversal.
This commit is contained in:
Joseph Bylund 2021-04-03 11:25:36 -04:00 committed by GitHub
parent 34fbe69664
commit d2c280be64
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 35 additions and 4 deletions

3
.gitignore vendored
View File

@ -48,3 +48,6 @@ tests/data/common_wheels/
# Mac
.DS_Store
# Profiling related artifacts
*.prof

1
news/9748.feature.rst Normal file
View File

@ -0,0 +1 @@
Improve performance when picking the best file from indexes during `pip install`.

View File

@ -434,6 +434,12 @@ class CandidateEvaluator:
self._project_name = project_name
self._specifier = specifier
self._supported_tags = supported_tags
# Since the index of the tag in the _supported_tags list is used
# as a priority, precompute a map from tag to index/priority to be
# used in wheel.find_most_preferred_tag.
self._wheel_tag_preferences = {
tag: idx for idx, tag in enumerate(supported_tags)
}
def get_applicable_candidates(
self,
@ -512,14 +518,17 @@ class CandidateEvaluator:
if link.is_wheel:
# can raise InvalidWheelFilename
wheel = Wheel(link.filename)
if not wheel.supported(valid_tags):
try:
pri = -(wheel.find_most_preferred_tag(
valid_tags, self._wheel_tag_preferences
))
except ValueError:
raise UnsupportedWheel(
"{} is not a supported wheel for this platform. It "
"can't be sorted.".format(wheel.filename)
)
if self._prefer_binary:
binary_preference = 1
pri = -(wheel.support_index_min(valid_tags))
if wheel.build_tag is not None:
match = re.match(r'^(\d+)(.*)$', wheel.build_tag)
build_tag_groups = match.groups()

View File

@ -2,7 +2,7 @@
name that have meaning.
"""
import re
from typing import List
from typing import Dict, List
from pip._vendor.packaging.tags import Tag
@ -66,8 +66,26 @@ class Wheel:
"""
return min(tags.index(tag) for tag in self.file_tags if tag in tags)
def find_most_preferred_tag(self, tags, tag_to_priority):
# type: (List[Tag], Dict[Tag, int]) -> int
"""Return the priority of the most preferred tag that one of the wheel's file
tag combinations acheives in the given list of supported tags using the given
tag_to_priority mapping, where lower priorities are more-preferred.
This is used in place of support_index_min in some cases in order to avoid
an expensive linear scan of a large list of tags.
:param tags: the PEP 425 tags to check the wheel against.
:param tag_to_priority: a mapping from tag to priority of that tag, where
lower is more preferred.
:raises ValueError: If none of the wheel's file tags match one of
the supported tags.
"""
return min(tag_to_priority[tag] for tag in self.file_tags if tag in tag_to_priority)
def supported(self, tags):
# type: (List[Tag]) -> bool
# type: (Iterable[Tag]) -> bool
"""Return whether the wheel is compatible with one of the given tags.
:param tags: the PEP 425 tags to check the wheel against.