Change CollectedLinks to store project_urls.

This commit is contained in:
Chris Jerdonek 2019-09-23 06:53:58 -07:00 committed by Chris Hunt
parent 024038cf10
commit bab1e4f8a1
4 changed files with 36 additions and 52 deletions

View File

@ -27,8 +27,8 @@ from pip._internal.vcs import is_url, vcs
if MYPY_CHECK_RUNNING:
from typing import (
Callable, Dict, Iterable, List, MutableMapping, Optional, Sequence,
Tuple, Union,
Callable, Iterable, List, MutableMapping, Optional, Sequence, Tuple,
Union,
)
import xml.etree.ElementTree
@ -435,29 +435,36 @@ def group_locations(locations, expand_dir=False):
class CollectedLinks(object):
"""
Encapsulates all the Link objects collected by a call to
LinkCollector.collect_links(), stored separately as--
Encapsulates the return value of a call to LinkCollector.collect_links().
The return value includes both URLs to project pages containing package
links, as well as individual package Link objects collected from other
sources.
This info is stored separately as:
(1) links from the configured file locations,
(2) links from the configured find_links, and
(3) a dict mapping HTML page url to links from that page.
(3) urls to HTML project pages, as described by the PEP 503 simple
repository API.
"""
def __init__(
self,
files, # type: List[Link]
find_links, # type: List[Link]
pages, # type: Dict[str, List[Link]]
files, # type: List[Link]
find_links, # type: List[Link]
project_urls, # type: List[Link]
):
# type: (...) -> None
"""
:param files: Links from file locations.
:param find_links: Links from find_links.
:param pages: A dict mapping HTML page url to links from that page.
:param project_urls: URLs to HTML project pages, as described by
the PEP 503 simple repository API.
"""
self.files = files
self.find_links = find_links
self.pages = pages
self.project_urls = project_urls
class LinkCollector(object):
@ -490,19 +497,6 @@ class LinkCollector(object):
"""
return _get_html_page(location, session=self.session)
def _get_pages(self, locations):
# type: (Iterable[Link]) -> Iterable[HTMLPage]
"""
Yields (page, page_url) from the given locations, skipping
locations that have errors.
"""
for location in locations:
page = self.fetch_page(location)
if page is None:
continue
yield page
def collect_links(self, project_name):
# type: (str) -> CollectedLinks
"""Find all available links for the given project name.
@ -544,12 +538,8 @@ class LinkCollector(object):
lines.append('* {}'.format(link))
logger.debug('\n'.join(lines))
pages_links = {}
for page in self._get_pages(url_locations):
pages_links[page.url] = list(parse_links(page))
return CollectedLinks(
files=file_links,
find_links=find_link_links,
pages=pages_links,
project_urls=url_locations,
)

View File

@ -19,6 +19,7 @@ from pip._internal.exceptions import (
InvalidWheelFilename,
UnsupportedWheel,
)
from pip._internal.index.collector import parse_links
from pip._internal.models.candidate import InstallationCandidate
from pip._internal.models.format_control import FormatControl
from pip._internal.models.link import Link
@ -788,7 +789,8 @@ class PackageFinder(object):
See LinkEvaluator.evaluate_link() for details on which files
are accepted.
"""
collected_links = self._link_collector.collect_links(project_name)
link_collector = self._link_collector
collected_links = link_collector.collect_links(project_name)
link_evaluator = self.make_link_evaluator(project_name)
@ -798,8 +800,16 @@ class PackageFinder(object):
)
page_versions = []
for page_url, page_links in collected_links.pages.items():
logger.debug('Analyzing links from page %s', page_url)
for project_url in collected_links.project_urls:
logger.debug(
'Fetching project page and analyzing links: %s', project_url,
)
html_page = link_collector.fetch_page(project_url)
if html_page is None:
continue
page_links = list(parse_links(html_page))
with indent_log():
new_versions = self.evaluate_links(
link_evaluator,

View File

@ -91,7 +91,7 @@ def test_command_line_append_flags(script, virtualenv, data):
'test.pypi.org',
)
assert (
"Analyzing links from page https://test.pypi.org"
"Fetching project page and analyzing links: https://test.pypi.org"
in result.stdout
), str(result)
virtualenv.clear()
@ -100,7 +100,7 @@ def test_command_line_append_flags(script, virtualenv, data):
'--trusted-host', 'test.pypi.org',
)
assert (
"Analyzing links from page https://test.pypi.org"
"Fetching project page and analyzing links: https://test.pypi.org"
in result.stdout
)
assert (
@ -124,7 +124,7 @@ def test_command_line_appends_correctly(script, data):
)
assert (
"Analyzing links from page https://test.pypi.org"
"Fetching project page and analyzing links: https://test.pypi.org"
in result.stdout
), result.stdout
assert (

View File

@ -456,15 +456,9 @@ class TestLinkCollector(object):
url, session=link_collector.session,
)
@patch('pip._internal.index.collector._get_html_response')
def test_collect_links(self, mock_get_html_response, caplog, data):
def test_collect_links(self, caplog, data):
caplog.set_level(logging.DEBUG)
expected_url = 'https://pypi.org/simple/twine/'
fake_response = make_fake_html_response(expected_url)
mock_get_html_response.return_value = fake_response
link_collector = make_test_link_collector(
find_links=[data.find_links],
# Include two copies of the URL to check that the second one
@ -473,10 +467,6 @@ class TestLinkCollector(object):
)
actual = link_collector.collect_links('twine')
mock_get_html_response.assert_called_once_with(
expected_url, session=link_collector.session,
)
# Spot-check the CollectedLinks return value.
assert len(actual.files) > 20
check_links_include(actual.files, names=['simple-1.0.tar.gz'])
@ -484,13 +474,7 @@ class TestLinkCollector(object):
assert len(actual.find_links) == 1
check_links_include(actual.find_links, names=['packages'])
actual_pages = actual.pages
assert list(actual_pages) == [expected_url]
actual_page_links = actual_pages[expected_url]
assert len(actual_page_links) == 1
assert actual_page_links[0].url == (
'https://pypi.org/abc-1.0.tar.gz#md5=000000000'
)
assert actual.project_urls == [Link('https://pypi.org/simple/twine/')]
expected_message = dedent("""\
1 location(s) to search for versions of twine: