1
1
Fork 0
mirror of https://github.com/pypa/pip synced 2023-12-13 21:30:23 +01:00

Properly yield results from html5lib parsing

The earlier variant _returned_ an iterable object from a generator. This
did not properly handle the fallback, resulting in the html5lib code
path not being executed.
This commit is contained in:
Pradyun Gedam 2022-01-30 16:13:59 +00:00
parent 6cc96c28cc
commit 80609e8c20
No known key found for this signature in database
GPG key ID: FF99710C4332258E
3 changed files with 15 additions and 1 deletions

1
news/10846.bugfix.rst Normal file
View file

@ -0,0 +1 @@
Properly handle links parsed by html5lib, when using ```--use-deprecated=html5lib``.

View file

@ -343,7 +343,8 @@ def parse_links(page: "HTMLPage", use_deprecated_html5lib: bool) -> Iterable[Lin
Parse an HTML document, and yield its anchor elements as Link objects.
"""
if use_deprecated_html5lib:
return _parse_links_html5lib(page)
yield from _parse_links_html5lib(page)
return
parser = HTMLLinkParser()
encoding = page.encoding or "utf-8"

View file

@ -539,6 +539,18 @@ def test_parse_links_caches_same_page_by_url() -> None:
assert "pkg2" in parsed_links_3[0].url
def test_parse_link_handles_deprecated_usage_properly() -> None:
html = b'<a href="/pkg1-1.0.tar.gz"><a href="/pkg1-2.0.tar.gz">'
url = "https://example.com/simple/"
page = HTMLPage(html, encoding=None, url=url)
parsed_links = list(parse_links(page, use_deprecated_html5lib=True))
assert len(parsed_links) == 2
assert "pkg1-1.0" in parsed_links[0].url
assert "pkg1-2.0" in parsed_links[1].url
@mock.patch("pip._internal.index.collector.raise_for_status")
def test_request_http_error(
mock_raise_for_status: mock.Mock, caplog: pytest.LogCaptureFixture