Merge pull request #6647 from cjerdonek/issue-6633-yanked-releases

Add support for "yanked" files (PEP 592)
This commit is contained in:
Chris Jerdonek 2019-06-27 00:52:11 -07:00 committed by GitHub
commit fc46a18563
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 316 additions and 36 deletions

2
news/6633.feature Normal file
View File

@ -0,0 +1,2 @@
Respect whether a file has been marked as "yanked" from a simple repository
(see `PEP 592 <https://www.python.org/dev/peps/pep-0592/>`__ for details).

View File

@ -183,7 +183,10 @@ class ListCommand(Command):
if not candidate.version.is_prerelease]
evaluator = finder.candidate_evaluator
best_candidate = evaluator.get_best_candidate(all_candidates)
# Pass allow_yanked=False to ignore yanked versions.
best_candidate = evaluator.get_best_candidate(
all_candidates, allow_yanked=False,
)
if best_candidate is None:
continue

View File

@ -43,15 +43,19 @@ if MYPY_CHECK_RUNNING:
Any, Callable, Iterable, Iterator, List, MutableMapping, Optional,
Sequence, Set, Tuple, Union,
)
import xml.etree.ElementTree
from pip._vendor.packaging.version import _BaseVersion
from pip._vendor.requests import Response
from pip._internal.models.search_scope import SearchScope
from pip._internal.req import InstallRequirement
from pip._internal.download import PipSession
SecureOrigin = Tuple[str, str, Optional[str]]
BuildTag = Tuple[Any, ...] # either empty tuple or Tuple[int, str]
CandidateSortingKey = Tuple[int, _BaseVersion, BuildTag, Optional[int]]
CandidateSortingKey = (
Tuple[int, int, _BaseVersion, BuildTag, Optional[int]]
)
HTMLElement = xml.etree.ElementTree.Element
SecureOrigin = Tuple[str, str, Optional[str]]
__all__ = ['FormatControl', 'FoundCandidates', 'PackageFinder']
@ -454,14 +458,24 @@ class CandidateEvaluator(object):
def _sort_key(self, candidate):
# type: (InstallationCandidate) -> CandidateSortingKey
"""
Function used to generate link sort key for link tuples.
The greater the return value, the more preferred it is.
If not finding wheels, then sorted by version only.
Function to pass as the `key` argument to a call to sorted() to sort
InstallationCandidates by preference.
Returns a tuple such that tuples sorting as greater using Python's
default comparison operator are more preferred.
The preference is as follows:
First and foremost, yanked candidates (in the sense of PEP 592) are
always less preferred than candidates that haven't been yanked. Then:
If not finding wheels, they are sorted by version only.
If finding wheels, then the sort order is by version, then:
1. existing installs
2. wheels ordered via Wheel.support_index_min(self._valid_tags)
3. source archives
If prefer_binary was set, then all wheels are sorted above sources.
Note: it was considered to embed this logic into the Link
comparison operators, but then different sdist links
with the same version, would have to be considered equal
@ -470,9 +484,10 @@ class CandidateEvaluator(object):
support_num = len(valid_tags)
build_tag = tuple() # type: BuildTag
binary_preference = 0
if candidate.location.is_wheel:
link = candidate.location
if link.is_wheel:
# can raise InvalidWheelFilename
wheel = Wheel(candidate.location.filename)
wheel = Wheel(link.filename)
if not self._is_wheel_supported(wheel):
raise UnsupportedWheel(
"%s is not a supported wheel for this platform. It "
@ -487,18 +502,52 @@ class CandidateEvaluator(object):
build_tag = (int(build_tag_groups[0]), build_tag_groups[1])
else: # sdist
pri = -(support_num)
return (binary_preference, candidate.version, build_tag, pri)
yank_value = -1 * int(link.is_yanked) # -1 for yanked.
return (
yank_value, binary_preference, candidate.version, build_tag, pri,
)
def get_best_candidate(self, candidates):
# type: (List[InstallationCandidate]) -> InstallationCandidate
# Don't include an allow_yanked default value to make sure each call
# site considers whether yanked releases are allowed. This also causes
# that decision to be made explicit in the calling code, which helps
# people when reading the code.
def get_best_candidate(
self,
candidates, # type: List[InstallationCandidate]
allow_yanked, # type: bool
):
# type: (...) -> Optional[InstallationCandidate]
"""
Return the best candidate per the instance's sort order, or None if
no candidates are given.
no candidate is acceptable.
:param allow_yanked: Whether to permit returning a yanked candidate
in the sense of PEP 592. If true, a yanked candidate will be
returned only if all candidates have been yanked.
"""
if not candidates:
return None
return max(candidates, key=self._sort_key)
best_candidate = max(candidates, key=self._sort_key)
# Log a warning per PEP 592 if necessary before returning.
link = best_candidate.location
if not link.is_yanked:
return best_candidate
# Otherwise, all the candidates were yanked.
if not allow_yanked:
return None
reason = link.yanked_reason or '<none given>'
msg = (
'The candidate selected for download or install is a '
'yanked version: {candidate}\n'
'Reason for being yanked: {reason}'
).format(candidate=best_candidate, reason=reason)
logger.warning(msg)
return best_candidate
class FoundCandidates(object):
@ -540,13 +589,23 @@ class FoundCandidates(object):
# Again, converting version to str to deal with debundling.
return (c for c in self.iter_all() if str(c.version) in self._versions)
def get_best(self):
# type: () -> Optional[InstallationCandidate]
# Don't include an allow_yanked default value to make sure each call
# site considers whether yanked releases are allowed. This also causes
# that decision to be made explicit in the calling code, which helps
# people when reading the code.
def get_best(self, allow_yanked):
# type: (bool) -> Optional[InstallationCandidate]
"""Return the best candidate available, or None if no applicable
candidates are found.
:param allow_yanked: Whether to permit returning a yanked candidate
in the sense of PEP 592. If true, a yanked candidate will be
returned only if all candidates have been yanked.
"""
candidates = list(self.iter_applicable())
return self._evaluator.get_best_candidate(candidates)
return self._evaluator.get_best_candidate(
candidates, allow_yanked=allow_yanked,
)
class PackageFinder(object):
@ -910,7 +969,7 @@ class PackageFinder(object):
Raises DistributionNotFound or BestVersionAlreadyInstalled otherwise
"""
candidates = self.find_candidates(req.name, req.specifier)
best_candidate = candidates.get_best()
best_candidate = candidates.get_best(allow_yanked=True)
installed_version = None # type: Optional[_BaseVersion]
if req.satisfied_by is not None:
@ -1151,6 +1210,37 @@ def _clean_link(url):
return urllib_parse.urlunparse(result._replace(path=path))
def _create_link_from_element(
anchor, # type: HTMLElement
page_url, # type: str
base_url, # type: str
):
# type: (...) -> Optional[Link]
"""
Convert an anchor element in a simple repository page to a Link.
"""
href = anchor.get("href")
if not href:
return None
url = _clean_link(urllib_parse.urljoin(base_url, href))
pyrequire = anchor.get('data-requires-python')
pyrequire = unescape(pyrequire) if pyrequire else None
yanked_reason = anchor.get('data-yanked')
if yanked_reason:
yanked_reason = unescape(yanked_reason)
link = Link(
url,
comes_from=page_url,
requires_python=pyrequire,
yanked_reason=yanked_reason,
)
return link
class HTMLPage(object):
"""Represents one page, along with its URL"""
@ -1173,12 +1263,14 @@ class HTMLPage(object):
)
base_url = _determine_base_url(document, self.url)
for anchor in document.findall(".//a"):
if anchor.get("href"):
href = anchor.get("href")
url = _clean_link(urllib_parse.urljoin(base_url, href))
pyrequire = anchor.get('data-requires-python')
pyrequire = unescape(pyrequire) if pyrequire else None
yield Link(url, self.url, requires_python=pyrequire)
link = _create_link_from_element(
anchor,
page_url=self.url,
base_url=base_url,
)
if link is None:
continue
yield link
Search = namedtuple('Search', 'supplied canonical formats')

View File

@ -29,3 +29,8 @@ class InstallationCandidate(KeyBasedCompareMixin):
return "<InstallationCandidate({!r}, {!r}, {!r})>".format(
self.project, self.version, self.location,
)
def __str__(self):
return '{!r} candidate (version {} at {})'.format(
self.project, self.version, self.location,
)

View File

@ -19,17 +19,28 @@ class Link(KeyBasedCompareMixin):
"""Represents a parsed link from a Package Index's simple URL
"""
def __init__(self, url, comes_from=None, requires_python=None):
# type: (str, Optional[Union[str, HTMLPage]], Optional[str]) -> None
def __init__(
self,
url, # type: str
comes_from=None, # type: Optional[Union[str, HTMLPage]]
requires_python=None, # type: Optional[str]
yanked_reason=None, # type: Optional[str]
):
# type: (...) -> None
"""
url:
url of the resource pointed to (href of the link)
comes_from:
instance of HTMLPage where the link was found, or string.
requires_python:
String containing the `Requires-Python` metadata field, specified
in PEP 345. This may be specified by a data-requires-python
attribute in the HTML link tag, as described in PEP 503.
:param url: url of the resource pointed to (href of the link)
:param comes_from: instance of HTMLPage where the link was found,
or string.
:param requires_python: String containing the `Requires-Python`
metadata field, specified in PEP 345. This may be specified by
a data-requires-python attribute in the HTML link tag, as
described in PEP 503.
:param yanked_reason: the reason the file has been yanked, if the
file has been yanked, or None if the file hasn't been yanked.
This is the value of the "data-yanked" attribute, if present, in
a simple repository HTML link. If the file has been yanked but
no reason was provided, this should be the empty string. See
PEP 592 for more information and the specification.
"""
# url can be a UNC windows share
@ -43,6 +54,7 @@ class Link(KeyBasedCompareMixin):
self.comes_from = comes_from
self.requires_python = requires_python if requires_python else None
self.yanked_reason = yanked_reason
super(Link, self).__init__(key=url, defining_class=Link)
@ -176,3 +188,8 @@ class Link(KeyBasedCompareMixin):
return False
return True
@property
def is_yanked(self):
# type: () -> bool
return self.yanked_reason is not None

View File

@ -131,7 +131,11 @@ def pip_version_check(session, options):
trusted_hosts=options.trusted_hosts,
session=session,
)
candidate = finder.find_candidates("pip").get_best()
# Pass allow_yanked=False so we don't suggest upgrading to a
# yanked version.
candidate = finder.find_candidates("pip").get_best(
allow_yanked=False,
)
if candidate is None:
return
pypi_version = str(candidate.version)

View File

@ -7,10 +7,11 @@ from pip._vendor import html5lib, requests
from pip._internal.download import PipSession
from pip._internal.index import (
CandidateEvaluator, Link, PackageFinder, Search,
CandidateEvaluator, HTMLPage, Link, PackageFinder, Search,
_check_link_requires_python, _clean_link, _determine_base_url,
_egg_info_matches, _find_name_version_sep, _get_html_page,
)
from pip._internal.models.candidate import InstallationCandidate
from pip._internal.models.search_scope import SearchScope
from pip._internal.models.target_python import TargetPython
from tests.lib import CURRENT_PY_VERSION_INFO, make_test_finder
@ -148,6 +149,122 @@ class TestCandidateEvaluator:
)
assert actual == expected
@pytest.mark.parametrize('yanked_reason, expected', [
# Test a non-yanked file.
(None, 0),
# Test a yanked file (has a lower value than non-yanked).
('bad metadata', -1),
])
def test_sort_key__is_yanked(self, yanked_reason, expected):
"""
Test the effect of is_yanked on _sort_key()'s return value.
"""
url = 'https://example.com/mypackage.tar.gz'
link = Link(url, yanked_reason=yanked_reason)
candidate = InstallationCandidate('mypackage', '1.0', link)
evaluator = CandidateEvaluator()
sort_value = evaluator._sort_key(candidate)
# Yanked / non-yanked is reflected in the first element of the tuple.
actual = sort_value[0]
assert actual == expected
def make_mock_candidate(self, version, yanked_reason=None):
url = 'https://example.com/pkg-{}.tar.gz'.format(version)
link = Link(url, yanked_reason=yanked_reason)
candidate = InstallationCandidate('mypackage', version, link)
return candidate
@pytest.mark.parametrize('allow_yanked', [True, False])
def test_get_best_candidate__no_candidates(self, allow_yanked):
"""
Test passing an empty list.
"""
evaluator = CandidateEvaluator()
actual = evaluator.get_best_candidate([], allow_yanked=allow_yanked)
assert actual is None
def test_get_best_candidate__all_yanked__allow_yanked_false(self):
"""
Test all candidates yanked with allow_yanked=False.
"""
candidates = [
self.make_mock_candidate('1.0', yanked_reason=''),
self.make_mock_candidate('2.0', yanked_reason=''),
]
evaluator = CandidateEvaluator()
actual = evaluator.get_best_candidate(candidates, allow_yanked=False)
assert actual is None
def test_get_best_candidate__all_yanked__allow_yanked_true(self, caplog):
"""
Test all candidates yanked with allow_yanked=True.
"""
candidates = [
self.make_mock_candidate('1.0', yanked_reason='bad metadata #1'),
# Put the best candidate in the middle, to test sorting.
self.make_mock_candidate('3.0', yanked_reason='bad metadata #3'),
self.make_mock_candidate('2.0', yanked_reason='bad metadata #2'),
]
expected_best = candidates[1]
evaluator = CandidateEvaluator()
actual = evaluator.get_best_candidate(candidates, allow_yanked=True)
assert actual is expected_best
assert str(actual.version) == '3.0'
# Check the log messages.
assert len(caplog.records) == 1
record = caplog.records[0]
assert record.levelname == 'WARNING'
assert record.message == (
'The candidate selected for download or install is a yanked '
"version: 'mypackage' candidate "
'(version 3.0 at https://example.com/pkg-3.0.tar.gz)\n'
'Reason for being yanked: bad metadata #3'
)
def test_get_best_candidate__yanked_no_reason_given(self, caplog):
"""
Test the log message when no reason is given.
"""
candidates = [
self.make_mock_candidate('1.0', yanked_reason=''),
]
evaluator = CandidateEvaluator()
actual = evaluator.get_best_candidate(candidates, allow_yanked=True)
assert str(actual.version) == '1.0'
assert len(caplog.records) == 1
record = caplog.records[0]
assert record.levelname == 'WARNING'
assert record.message == (
'The candidate selected for download or install is a yanked '
"version: 'mypackage' candidate "
'(version 1.0 at https://example.com/pkg-1.0.tar.gz)\n'
'Reason for being yanked: <none given>'
)
def test_get_best_candidate__best_yanked_but_not_all(self, caplog):
"""
Test the best candidates being yanked, but not all.
"""
candidates = [
self.make_mock_candidate('4.0', yanked_reason='bad metadata #4'),
# Put the best candidate in the middle, to test sorting.
self.make_mock_candidate('2.0'),
self.make_mock_candidate('3.0', yanked_reason='bad metadata #3'),
self.make_mock_candidate('1.0'),
]
expected_best = candidates[1]
evaluator = CandidateEvaluator()
actual = evaluator.get_best_candidate(candidates, allow_yanked=True)
assert actual is expected_best
assert str(actual.version) == '2.0'
# Check the log messages.
assert len(caplog.records) == 0
class TestPackageFinder:
@ -521,3 +638,31 @@ def test_clean_link_windows(url, clean_url):
@pytest.mark.skipif("sys.platform == 'win32'")
def test_clean_link_non_windows(url, clean_url):
assert(_clean_link(url) == clean_url)
class TestHTMLPage:
@pytest.mark.parametrize(
('anchor_html, expected'),
[
# Test not present.
('<a href="/pkg1-1.0.tar.gz"></a>', None),
# Test present with no value.
('<a href="/pkg2-1.0.tar.gz" data-yanked></a>', ''),
# Test the empty string.
('<a href="/pkg3-1.0.tar.gz" data-yanked=""></a>', ''),
# Test a non-empty string.
('<a href="/pkg4-1.0.tar.gz" data-yanked="error"></a>', 'error'),
# Test a value with an escaped character.
('<a href="/pkg4-1.0.tar.gz" data-yanked="version &lt 1"></a>',
'version < 1'),
]
)
def test_iter_links__yanked_reason(self, anchor_html, expected):
html = '<html><body>{}</body></html>'.format(anchor_html)
html_bytes = html.encode('utf-8')
page = HTMLPage(html_bytes, url='https://example.com/simple/')
links = list(page.iter_links())
link, = links
actual = link.yanked_reason
assert actual == expected

View File

@ -71,3 +71,15 @@ class TestLink:
url = 'git+https://example.com/package#subdirectory=subdir&egg=eggname'
assert 'eggname' == Link(url).egg_fragment
assert 'subdir' == Link(url).subdirectory_fragment
@pytest.mark.parametrize('yanked_reason, expected', [
(None, False),
('', True),
('there was a mistake', True),
])
def test_is_yanked(self, yanked_reason, expected):
link = Link(
'https://example.com/wheel.whl',
yanked_reason=yanked_reason,
)
assert link.is_yanked == expected

View File

@ -16,7 +16,7 @@ class MockFoundCandidates(object):
def __init__(self, best):
self._best = best
def get_best(self):
def get_best(self, allow_yanked):
return self._best