mirror of https://github.com/pypa/pip
Start parsing the "data-yanked" attribute.
This commit is contained in:
parent
fdde0b483e
commit
8666bb1a5d
|
@ -43,15 +43,17 @@ if MYPY_CHECK_RUNNING:
|
|||
Any, Callable, Iterable, Iterator, List, MutableMapping, Optional,
|
||||
Sequence, Set, Tuple, Union,
|
||||
)
|
||||
import xml.etree.ElementTree
|
||||
from pip._vendor.packaging.version import _BaseVersion
|
||||
from pip._vendor.requests import Response
|
||||
from pip._internal.models.search_scope import SearchScope
|
||||
from pip._internal.req import InstallRequirement
|
||||
from pip._internal.download import PipSession
|
||||
|
||||
SecureOrigin = Tuple[str, str, Optional[str]]
|
||||
BuildTag = Tuple[Any, ...] # either empty tuple or Tuple[int, str]
|
||||
CandidateSortingKey = Tuple[int, _BaseVersion, BuildTag, Optional[int]]
|
||||
HTMLElement = xml.etree.ElementTree.Element
|
||||
SecureOrigin = Tuple[str, str, Optional[str]]
|
||||
|
||||
|
||||
__all__ = ['FormatControl', 'FoundCandidates', 'PackageFinder']
|
||||
|
@ -1151,6 +1153,37 @@ def _clean_link(url):
|
|||
return urllib_parse.urlunparse(result._replace(path=path))
|
||||
|
||||
|
||||
def _link_from_element(
|
||||
anchor, # type: HTMLElement
|
||||
page_url, # type: str
|
||||
base_url, # type: str
|
||||
):
|
||||
# type: (...) -> Optional[Link]
|
||||
"""
|
||||
Convert an anchor element in a simple repository page to a Link.
|
||||
"""
|
||||
href = anchor.get("href")
|
||||
if not href:
|
||||
return None
|
||||
|
||||
url = _clean_link(urllib_parse.urljoin(base_url, href))
|
||||
pyrequire = anchor.get('data-requires-python')
|
||||
pyrequire = unescape(pyrequire) if pyrequire else None
|
||||
|
||||
yanked_reason = anchor.get('data-yanked')
|
||||
if yanked_reason:
|
||||
yanked_reason = unescape(yanked_reason)
|
||||
|
||||
link = Link(
|
||||
url,
|
||||
comes_from=page_url,
|
||||
requires_python=pyrequire,
|
||||
yanked_reason=yanked_reason,
|
||||
)
|
||||
|
||||
return link
|
||||
|
||||
|
||||
class HTMLPage(object):
|
||||
"""Represents one page, along with its URL"""
|
||||
|
||||
|
@ -1173,12 +1206,14 @@ class HTMLPage(object):
|
|||
)
|
||||
base_url = _determine_base_url(document, self.url)
|
||||
for anchor in document.findall(".//a"):
|
||||
if anchor.get("href"):
|
||||
href = anchor.get("href")
|
||||
url = _clean_link(urllib_parse.urljoin(base_url, href))
|
||||
pyrequire = anchor.get('data-requires-python')
|
||||
pyrequire = unescape(pyrequire) if pyrequire else None
|
||||
yield Link(url, self.url, requires_python=pyrequire)
|
||||
link = _link_from_element(
|
||||
anchor,
|
||||
page_url=self.url,
|
||||
base_url=base_url,
|
||||
)
|
||||
if link is None:
|
||||
continue
|
||||
yield link
|
||||
|
||||
|
||||
Search = namedtuple('Search', 'supplied canonical formats')
|
||||
|
|
|
@ -7,7 +7,7 @@ from pip._vendor import html5lib, requests
|
|||
|
||||
from pip._internal.download import PipSession
|
||||
from pip._internal.index import (
|
||||
CandidateEvaluator, Link, PackageFinder, Search,
|
||||
CandidateEvaluator, HTMLPage, Link, PackageFinder, Search,
|
||||
_check_link_requires_python, _clean_link, _determine_base_url,
|
||||
_egg_info_matches, _find_name_version_sep, _get_html_page,
|
||||
)
|
||||
|
@ -521,3 +521,31 @@ def test_clean_link_windows(url, clean_url):
|
|||
@pytest.mark.skipif("sys.platform == 'win32'")
|
||||
def test_clean_link_non_windows(url, clean_url):
|
||||
assert(_clean_link(url) == clean_url)
|
||||
|
||||
|
||||
class TestHTMLPage:
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
('anchor_html, expected'),
|
||||
[
|
||||
# Test not present.
|
||||
('<a href="/pkg1-1.0.tar.gz"></a>', None),
|
||||
# Test present with no value.
|
||||
('<a href="/pkg2-1.0.tar.gz" data-yanked></a>', ''),
|
||||
# Test the empty string.
|
||||
('<a href="/pkg3-1.0.tar.gz" data-yanked=""></a>', ''),
|
||||
# Test a non-empty string.
|
||||
('<a href="/pkg4-1.0.tar.gz" data-yanked="error"></a>', 'error'),
|
||||
# Test a value with an escaped character.
|
||||
('<a href="/pkg4-1.0.tar.gz" data-yanked="version < 1"></a>',
|
||||
'version < 1'),
|
||||
]
|
||||
)
|
||||
def test_iter_links__yanked_reason(self, anchor_html, expected):
|
||||
html = '<html><body>{}</body></html>'.format(anchor_html)
|
||||
html_bytes = html.encode('utf-8')
|
||||
page = HTMLPage(html_bytes, url='https://example.com/simple/')
|
||||
links = list(page.iter_links())
|
||||
link, = links
|
||||
actual = link.yanked_reason
|
||||
assert actual == expected
|
||||
|
|
Loading…
Reference in New Issue