From bab1e4f8a1a3ba5f5f08207c83a4e0a7a87ea615 Mon Sep 17 00:00:00 2001 From: Chris Jerdonek Date: Mon, 23 Sep 2019 06:53:58 -0700 Subject: [PATCH] Change CollectedLinks to store project_urls. --- src/pip/_internal/index/collector.py | 46 +++++++++-------------- src/pip/_internal/index/package_finder.py | 16 ++++++-- tests/functional/test_install_config.py | 6 +-- tests/unit/test_collector.py | 20 +--------- 4 files changed, 36 insertions(+), 52 deletions(-) diff --git a/src/pip/_internal/index/collector.py b/src/pip/_internal/index/collector.py index abf450002..b390cdc82 100644 --- a/src/pip/_internal/index/collector.py +++ b/src/pip/_internal/index/collector.py @@ -27,8 +27,8 @@ from pip._internal.vcs import is_url, vcs if MYPY_CHECK_RUNNING: from typing import ( - Callable, Dict, Iterable, List, MutableMapping, Optional, Sequence, - Tuple, Union, + Callable, Iterable, List, MutableMapping, Optional, Sequence, Tuple, + Union, ) import xml.etree.ElementTree @@ -435,29 +435,36 @@ def group_locations(locations, expand_dir=False): class CollectedLinks(object): """ - Encapsulates all the Link objects collected by a call to - LinkCollector.collect_links(), stored separately as-- + Encapsulates the return value of a call to LinkCollector.collect_links(). + + The return value includes both URLs to project pages containing package + links, as well as individual package Link objects collected from other + sources. + + This info is stored separately as: (1) links from the configured file locations, (2) links from the configured find_links, and - (3) a dict mapping HTML page url to links from that page. + (3) urls to HTML project pages, as described by the PEP 503 simple + repository API. """ def __init__( self, - files, # type: List[Link] - find_links, # type: List[Link] - pages, # type: Dict[str, List[Link]] + files, # type: List[Link] + find_links, # type: List[Link] + project_urls, # type: List[Link] ): # type: (...) -> None """ :param files: Links from file locations. :param find_links: Links from find_links. - :param pages: A dict mapping HTML page url to links from that page. + :param project_urls: URLs to HTML project pages, as described by + the PEP 503 simple repository API. """ self.files = files self.find_links = find_links - self.pages = pages + self.project_urls = project_urls class LinkCollector(object): @@ -490,19 +497,6 @@ class LinkCollector(object): """ return _get_html_page(location, session=self.session) - def _get_pages(self, locations): - # type: (Iterable[Link]) -> Iterable[HTMLPage] - """ - Yields (page, page_url) from the given locations, skipping - locations that have errors. - """ - for location in locations: - page = self.fetch_page(location) - if page is None: - continue - - yield page - def collect_links(self, project_name): # type: (str) -> CollectedLinks """Find all available links for the given project name. @@ -544,12 +538,8 @@ class LinkCollector(object): lines.append('* {}'.format(link)) logger.debug('\n'.join(lines)) - pages_links = {} - for page in self._get_pages(url_locations): - pages_links[page.url] = list(parse_links(page)) - return CollectedLinks( files=file_links, find_links=find_link_links, - pages=pages_links, + project_urls=url_locations, ) diff --git a/src/pip/_internal/index/package_finder.py b/src/pip/_internal/index/package_finder.py index 9b338e693..36cf91893 100644 --- a/src/pip/_internal/index/package_finder.py +++ b/src/pip/_internal/index/package_finder.py @@ -19,6 +19,7 @@ from pip._internal.exceptions import ( InvalidWheelFilename, UnsupportedWheel, ) +from pip._internal.index.collector import parse_links from pip._internal.models.candidate import InstallationCandidate from pip._internal.models.format_control import FormatControl from pip._internal.models.link import Link @@ -788,7 +789,8 @@ class PackageFinder(object): See LinkEvaluator.evaluate_link() for details on which files are accepted. """ - collected_links = self._link_collector.collect_links(project_name) + link_collector = self._link_collector + collected_links = link_collector.collect_links(project_name) link_evaluator = self.make_link_evaluator(project_name) @@ -798,8 +800,16 @@ class PackageFinder(object): ) page_versions = [] - for page_url, page_links in collected_links.pages.items(): - logger.debug('Analyzing links from page %s', page_url) + for project_url in collected_links.project_urls: + logger.debug( + 'Fetching project page and analyzing links: %s', project_url, + ) + html_page = link_collector.fetch_page(project_url) + if html_page is None: + continue + + page_links = list(parse_links(html_page)) + with indent_log(): new_versions = self.evaluate_links( link_evaluator, diff --git a/tests/functional/test_install_config.py b/tests/functional/test_install_config.py index bcf83f163..176976c4e 100644 --- a/tests/functional/test_install_config.py +++ b/tests/functional/test_install_config.py @@ -91,7 +91,7 @@ def test_command_line_append_flags(script, virtualenv, data): 'test.pypi.org', ) assert ( - "Analyzing links from page https://test.pypi.org" + "Fetching project page and analyzing links: https://test.pypi.org" in result.stdout ), str(result) virtualenv.clear() @@ -100,7 +100,7 @@ def test_command_line_append_flags(script, virtualenv, data): '--trusted-host', 'test.pypi.org', ) assert ( - "Analyzing links from page https://test.pypi.org" + "Fetching project page and analyzing links: https://test.pypi.org" in result.stdout ) assert ( @@ -124,7 +124,7 @@ def test_command_line_appends_correctly(script, data): ) assert ( - "Analyzing links from page https://test.pypi.org" + "Fetching project page and analyzing links: https://test.pypi.org" in result.stdout ), result.stdout assert ( diff --git a/tests/unit/test_collector.py b/tests/unit/test_collector.py index f95ebd8ba..e266ea163 100644 --- a/tests/unit/test_collector.py +++ b/tests/unit/test_collector.py @@ -456,15 +456,9 @@ class TestLinkCollector(object): url, session=link_collector.session, ) - @patch('pip._internal.index.collector._get_html_response') - def test_collect_links(self, mock_get_html_response, caplog, data): + def test_collect_links(self, caplog, data): caplog.set_level(logging.DEBUG) - expected_url = 'https://pypi.org/simple/twine/' - - fake_response = make_fake_html_response(expected_url) - mock_get_html_response.return_value = fake_response - link_collector = make_test_link_collector( find_links=[data.find_links], # Include two copies of the URL to check that the second one @@ -473,10 +467,6 @@ class TestLinkCollector(object): ) actual = link_collector.collect_links('twine') - mock_get_html_response.assert_called_once_with( - expected_url, session=link_collector.session, - ) - # Spot-check the CollectedLinks return value. assert len(actual.files) > 20 check_links_include(actual.files, names=['simple-1.0.tar.gz']) @@ -484,13 +474,7 @@ class TestLinkCollector(object): assert len(actual.find_links) == 1 check_links_include(actual.find_links, names=['packages']) - actual_pages = actual.pages - assert list(actual_pages) == [expected_url] - actual_page_links = actual_pages[expected_url] - assert len(actual_page_links) == 1 - assert actual_page_links[0].url == ( - 'https://pypi.org/abc-1.0.tar.gz#md5=000000000' - ) + assert actual.project_urls == [Link('https://pypi.org/simple/twine/')] expected_message = dedent("""\ 1 location(s) to search for versions of twine: