From ed55cde68966e2c0b32e6fd543f4ce066609f516 Mon Sep 17 00:00:00 2001 From: Chris Jerdonek Date: Wed, 21 Aug 2019 23:46:55 -0700 Subject: [PATCH] Add LinkCollector class to index.py. --- src/pip/_internal/index.py | 208 +++++++++++++++++++++++++----------- tests/unit/test_finder.py | 5 +- tests/unit/test_index.py | 22 ++-- tests/unit/test_req.py | 2 +- tests/unit/test_req_file.py | 2 +- 5 files changed, 166 insertions(+), 73 deletions(-) diff --git a/src/pip/_internal/index.py b/src/pip/_internal/index.py index 3ab5cabf1..ed08e0272 100644 --- a/src/pip/_internal/index.py +++ b/src/pip/_internal/index.py @@ -48,8 +48,8 @@ from pip._internal.wheel import Wheel if MYPY_CHECK_RUNNING: from typing import ( - Any, Callable, FrozenSet, Iterable, List, MutableMapping, Optional, - Sequence, Set, Text, Tuple, Union, + Any, Callable, Dict, FrozenSet, Iterable, List, MutableMapping, + Optional, Sequence, Set, Text, Tuple, Union, ) import xml.etree.ElementTree from pip._vendor.packaging.version import _BaseVersion @@ -312,6 +312,123 @@ def group_locations(locations, expand_dir=False): return files, urls +class CollectedLinks(object): + + """ + Encapsulates all the Link objects collected by a call to + LinkCollector.collect_links(), stored separately as-- + + (1) links from the configured file locations, + (2) links from the configured find_links, and + (3) a dict mapping HTML page url to links from that page. + """ + + def __init__( + self, + files, # type: List[Link] + find_links, # type: List[Link] + pages, # type: Dict[str, List[Link]] + ): + # type: (...) -> None + """ + :param files: Links from file locations. + :param find_links: Links from find_links. + :param pages: A dict mapping HTML page url to links from that page. + """ + self.files = files + self.find_links = find_links + self.pages = pages + + +class LinkCollector(object): + + """ + Responsible for collecting Link objects from all configured locations, + making network requests as needed. + + The class's main method is its collect_links() method. + """ + + def __init__( + self, + session, # type: PipSession + search_scope, # type: SearchScope + ): + # type: (...) -> None + self.search_scope = search_scope + self.session = session + + @property + def find_links(self): + # type: () -> List[str] + return self.search_scope.find_links + + def _get_pages(self, locations, project_name): + # type: (Iterable[Link], str) -> Iterable[HTMLPage] + """ + Yields (page, page_url) from the given locations, skipping + locations that have errors. + """ + seen = set() # type: Set[Link] + for location in locations: + if location in seen: + continue + seen.add(location) + + page = _get_html_page(location, session=self.session) + if page is None: + continue + + yield page + + def collect_links(self, project_name): + # type: (str) -> CollectedLinks + """Find all available links for the given project name. + + :return: All the Link objects (unfiltered), as a CollectedLinks object. + """ + search_scope = self.search_scope + index_locations = search_scope.get_index_urls_locations(project_name) + index_file_loc, index_url_loc = group_locations(index_locations) + fl_file_loc, fl_url_loc = group_locations( + self.find_links, expand_dir=True, + ) + + file_links = [ + Link(url) for url in itertools.chain(index_file_loc, fl_file_loc) + ] + + # We trust every directly linked archive in find_links + find_link_links = [Link(url, '-f') for url in self.find_links] + + # We trust every url that the user has given us whether it was given + # via --index-url or --find-links. + # We want to filter out anything that does not have a secure origin. + url_locations = [ + link for link in itertools.chain( + (Link(url) for url in index_url_loc), + (Link(url) for url in fl_url_loc), + ) + if self.session.is_secure_origin(link) + ] + + logger.debug('%d location(s) to search for versions of %s:', + len(url_locations), project_name) + + for location in url_locations: + logger.debug('* %s', location) + + pages_links = {} + for page in self._get_pages(url_locations, project_name): + pages_links[page.url] = list(page.iter_links()) + + return CollectedLinks( + files=file_links, + find_links=find_link_links, + pages=pages_links, + ) + + def _check_link_requires_python( link, # type: Link version_info, # type: Tuple[int, int, int] @@ -853,8 +970,7 @@ class PackageFinder(object): def __init__( self, - search_scope, # type: SearchScope - session, # type: PipSession + link_collector, # type: LinkCollector target_python, # type: TargetPython allow_yanked, # type: bool format_control=None, # type: Optional[FormatControl] @@ -866,7 +982,6 @@ class PackageFinder(object): This constructor is primarily meant to be used by the create() class method and from tests. - :param session: The Session to use to make requests. :param format_control: A FormatControl object, used to control the selection of source packages / binary packages when consulting the index and links. @@ -881,10 +996,9 @@ class PackageFinder(object): self._allow_yanked = allow_yanked self._candidate_prefs = candidate_prefs self._ignore_requires_python = ignore_requires_python + self._link_collector = link_collector self._target_python = target_python - self.search_scope = search_scope - self.session = session self.format_control = format_control # These are boring links that have already been logged somehow. @@ -925,20 +1039,34 @@ class PackageFinder(object): allow_all_prereleases=selection_prefs.allow_all_prereleases, ) + link_collector = LinkCollector( + session=session, + search_scope=search_scope, + ) + return cls( candidate_prefs=candidate_prefs, - search_scope=search_scope, - session=session, + link_collector=link_collector, target_python=target_python, allow_yanked=selection_prefs.allow_yanked, format_control=selection_prefs.format_control, ignore_requires_python=selection_prefs.ignore_requires_python, ) + @property + def search_scope(self): + # type: () -> SearchScope + return self._link_collector.search_scope + + @search_scope.setter + def search_scope(self, search_scope): + # type: (SearchScope) -> None + self._link_collector.search_scope = search_scope + @property def find_links(self): # type: () -> List[str] - return self.search_scope.find_links + return self._link_collector.find_links @property def index_urls(self): @@ -948,7 +1076,7 @@ class PackageFinder(object): @property def trusted_hosts(self): # type: () -> Iterable[str] - for host_port in self.session.pip_trusted_origins: + for host_port in self._link_collector.session.pip_trusted_origins: yield build_netloc(*host_port) @property @@ -1045,54 +1173,28 @@ class PackageFinder(object): See LinkEvaluator.evaluate_link() for details on which files are accepted. """ - search_scope = self.search_scope - index_locations = search_scope.get_index_urls_locations(project_name) - index_file_loc, index_url_loc = group_locations(index_locations) - fl_file_loc, fl_url_loc = group_locations( - self.find_links, expand_dir=True, - ) - - file_locations = (Link(url) for url in itertools.chain( - index_file_loc, fl_file_loc, - )) - - # We trust every url that the user has given us whether it was given - # via --index-url or --find-links. - # We want to filter out any thing which does not have a secure origin. - url_locations = [ - link for link in itertools.chain( - (Link(url) for url in index_url_loc), - (Link(url) for url in fl_url_loc), - ) - if self.session.is_secure_origin(link) - ] - - logger.debug('%d location(s) to search for versions of %s:', - len(url_locations), project_name) - - for location in url_locations: - logger.debug('* %s', location) + collected_links = self._link_collector.collect_links(project_name) link_evaluator = self.make_link_evaluator(project_name) + find_links_versions = self.evaluate_links( link_evaluator, - # We trust every directly linked archive in find_links - links=(Link(url, '-f') for url in self.find_links), + links=collected_links.find_links, ) page_versions = [] - for page in self._get_pages(url_locations, project_name): - logger.debug('Analyzing links from page %s', page.url) + for page_url, page_links in collected_links.pages.items(): + logger.debug('Analyzing links from page %s', page_url) with indent_log(): new_versions = self.evaluate_links( link_evaluator, - links=page.iter_links(), + links=page_links, ) page_versions.extend(new_versions) file_versions = self.evaluate_links( link_evaluator, - links=file_locations, + links=collected_links.files, ) if file_versions: file_versions.sort(reverse=True) @@ -1228,24 +1330,6 @@ class PackageFinder(object): ) return best_candidate.link - def _get_pages(self, locations, project_name): - # type: (Iterable[Link], str) -> Iterable[HTMLPage] - """ - Yields (page, page_url) from the given locations, skipping - locations that have errors. - """ - seen = set() # type: Set[Link] - for location in locations: - if location in seen: - continue - seen.add(location) - - page = _get_html_page(location, session=self.session) - if page is None: - continue - - yield page - def _find_name_version_sep(fragment, canonical_name): # type: (str, str) -> int diff --git a/tests/unit/test_finder.py b/tests/unit/test_finder.py index cc8fd8fc1..fd68fcf8e 100644 --- a/tests/unit/test_finder.py +++ b/tests/unit/test_finder.py @@ -35,8 +35,9 @@ def make_no_network_finder( find_links=find_links, allow_all_prereleases=allow_all_prereleases, ) - # Replace the PackageFinder object's _get_pages() with a no-op. - finder._get_pages = lambda locations, project_name: [] + # Replace the PackageFinder._link_collector's _get_pages() with a no-op. + link_collector = finder._link_collector + link_collector._get_pages = lambda locations, project_name: [] return finder diff --git a/tests/unit/test_index.py b/tests/unit/test_index.py index 9f53c0b4a..26e9ea08c 100644 --- a/tests/unit/test_index.py +++ b/tests/unit/test_index.py @@ -12,7 +12,7 @@ from pip._internal.index import ( CandidatePreferences, FormatControl, HTMLPage, - Link, + LinkCollector, LinkEvaluator, PackageFinder, _check_link_requires_python, @@ -25,6 +25,7 @@ from pip._internal.index import ( group_locations, ) from pip._internal.models.candidate import InstallationCandidate +from pip._internal.models.link import Link from pip._internal.models.search_scope import SearchScope from pip._internal.models.selection_prefs import SelectionPreferences from pip._internal.models.target_python import TargetPython @@ -684,9 +685,14 @@ class TestPackageFinder: # Create a test TargetPython that we can check for. target_python = TargetPython(py_version_info=(3, 7)) format_control = FormatControl(set(), only_binary) - finder = PackageFinder( - search_scope=SearchScope([], []), + + link_collector = LinkCollector( session=PipSession(), + search_scope=SearchScope([], []), + ) + + finder = PackageFinder( + link_collector=link_collector, target_python=target_python, allow_yanked=allow_yanked, format_control=format_control, @@ -725,9 +731,12 @@ class TestPackageFinder: prefer_binary=prefer_binary, allow_all_prereleases=allow_all_prereleases, ) - finder = PackageFinder( - search_scope=SearchScope([], []), + link_collector = LinkCollector( session=PipSession(), + search_scope=SearchScope([], []), + ) + finder = PackageFinder( + link_collector=link_collector, target_python=target_python, allow_yanked=True, candidate_prefs=candidate_prefs, @@ -773,8 +782,7 @@ def test_group_locations__non_existing_path(): """ Test that a non-existing path is ignored. """ - files, urls = group_locations( - [os.path.join('this', 'doesnt', 'exist')]) + files, urls = group_locations([os.path.join('this', 'doesnt', 'exist')]) assert not urls and not files, "nothing should have been found" diff --git a/tests/unit/test_req.py b/tests/unit/test_req.py index 479461e48..6085f37f1 100644 --- a/tests/unit/test_req.py +++ b/tests/unit/test_req.py @@ -193,7 +193,7 @@ class TestRequirementSet(object): """ req_set = RequirementSet(require_hashes=False) finder = make_test_finder(find_links=[data.find_links]) - session = finder.session + session = finder._link_collector.session command = create_command('install') with requirements_file('--require-hashes', tmpdir) as reqs_file: options, args = command.parse_args(['-r', reqs_file]) diff --git a/tests/unit/test_req_file.py b/tests/unit/test_req_file.py index 443a76054..81a94a715 100644 --- a/tests/unit/test_req_file.py +++ b/tests/unit/test_req_file.py @@ -349,7 +349,7 @@ class TestProcessLine(object): "file.txt", 1, finder=finder, session=session, )) assert list(finder.trusted_hosts) == ['host1', 'host2:8080'] - session = finder.session + session = finder._link_collector.session assert session.adapters['https://host1/'] is session._insecure_adapter assert ( session.adapters['https://host2:8080/']