Add LinkCollector class to index.py.

This commit is contained in:
Chris Jerdonek 2019-08-21 23:46:55 -07:00
parent 9ae5f1a8df
commit ed55cde689
5 changed files with 166 additions and 73 deletions

View File

@ -48,8 +48,8 @@ from pip._internal.wheel import Wheel
if MYPY_CHECK_RUNNING:
from typing import (
Any, Callable, FrozenSet, Iterable, List, MutableMapping, Optional,
Sequence, Set, Text, Tuple, Union,
Any, Callable, Dict, FrozenSet, Iterable, List, MutableMapping,
Optional, Sequence, Set, Text, Tuple, Union,
)
import xml.etree.ElementTree
from pip._vendor.packaging.version import _BaseVersion
@ -312,6 +312,123 @@ def group_locations(locations, expand_dir=False):
return files, urls
class CollectedLinks(object):
"""
Encapsulates all the Link objects collected by a call to
LinkCollector.collect_links(), stored separately as--
(1) links from the configured file locations,
(2) links from the configured find_links, and
(3) a dict mapping HTML page url to links from that page.
"""
def __init__(
self,
files, # type: List[Link]
find_links, # type: List[Link]
pages, # type: Dict[str, List[Link]]
):
# type: (...) -> None
"""
:param files: Links from file locations.
:param find_links: Links from find_links.
:param pages: A dict mapping HTML page url to links from that page.
"""
self.files = files
self.find_links = find_links
self.pages = pages
class LinkCollector(object):
"""
Responsible for collecting Link objects from all configured locations,
making network requests as needed.
The class's main method is its collect_links() method.
"""
def __init__(
self,
session, # type: PipSession
search_scope, # type: SearchScope
):
# type: (...) -> None
self.search_scope = search_scope
self.session = session
@property
def find_links(self):
# type: () -> List[str]
return self.search_scope.find_links
def _get_pages(self, locations, project_name):
# type: (Iterable[Link], str) -> Iterable[HTMLPage]
"""
Yields (page, page_url) from the given locations, skipping
locations that have errors.
"""
seen = set() # type: Set[Link]
for location in locations:
if location in seen:
continue
seen.add(location)
page = _get_html_page(location, session=self.session)
if page is None:
continue
yield page
def collect_links(self, project_name):
# type: (str) -> CollectedLinks
"""Find all available links for the given project name.
:return: All the Link objects (unfiltered), as a CollectedLinks object.
"""
search_scope = self.search_scope
index_locations = search_scope.get_index_urls_locations(project_name)
index_file_loc, index_url_loc = group_locations(index_locations)
fl_file_loc, fl_url_loc = group_locations(
self.find_links, expand_dir=True,
)
file_links = [
Link(url) for url in itertools.chain(index_file_loc, fl_file_loc)
]
# We trust every directly linked archive in find_links
find_link_links = [Link(url, '-f') for url in self.find_links]
# We trust every url that the user has given us whether it was given
# via --index-url or --find-links.
# We want to filter out anything that does not have a secure origin.
url_locations = [
link for link in itertools.chain(
(Link(url) for url in index_url_loc),
(Link(url) for url in fl_url_loc),
)
if self.session.is_secure_origin(link)
]
logger.debug('%d location(s) to search for versions of %s:',
len(url_locations), project_name)
for location in url_locations:
logger.debug('* %s', location)
pages_links = {}
for page in self._get_pages(url_locations, project_name):
pages_links[page.url] = list(page.iter_links())
return CollectedLinks(
files=file_links,
find_links=find_link_links,
pages=pages_links,
)
def _check_link_requires_python(
link, # type: Link
version_info, # type: Tuple[int, int, int]
@ -853,8 +970,7 @@ class PackageFinder(object):
def __init__(
self,
search_scope, # type: SearchScope
session, # type: PipSession
link_collector, # type: LinkCollector
target_python, # type: TargetPython
allow_yanked, # type: bool
format_control=None, # type: Optional[FormatControl]
@ -866,7 +982,6 @@ class PackageFinder(object):
This constructor is primarily meant to be used by the create() class
method and from tests.
:param session: The Session to use to make requests.
:param format_control: A FormatControl object, used to control
the selection of source packages / binary packages when consulting
the index and links.
@ -881,10 +996,9 @@ class PackageFinder(object):
self._allow_yanked = allow_yanked
self._candidate_prefs = candidate_prefs
self._ignore_requires_python = ignore_requires_python
self._link_collector = link_collector
self._target_python = target_python
self.search_scope = search_scope
self.session = session
self.format_control = format_control
# These are boring links that have already been logged somehow.
@ -925,20 +1039,34 @@ class PackageFinder(object):
allow_all_prereleases=selection_prefs.allow_all_prereleases,
)
link_collector = LinkCollector(
session=session,
search_scope=search_scope,
)
return cls(
candidate_prefs=candidate_prefs,
search_scope=search_scope,
session=session,
link_collector=link_collector,
target_python=target_python,
allow_yanked=selection_prefs.allow_yanked,
format_control=selection_prefs.format_control,
ignore_requires_python=selection_prefs.ignore_requires_python,
)
@property
def search_scope(self):
# type: () -> SearchScope
return self._link_collector.search_scope
@search_scope.setter
def search_scope(self, search_scope):
# type: (SearchScope) -> None
self._link_collector.search_scope = search_scope
@property
def find_links(self):
# type: () -> List[str]
return self.search_scope.find_links
return self._link_collector.find_links
@property
def index_urls(self):
@ -948,7 +1076,7 @@ class PackageFinder(object):
@property
def trusted_hosts(self):
# type: () -> Iterable[str]
for host_port in self.session.pip_trusted_origins:
for host_port in self._link_collector.session.pip_trusted_origins:
yield build_netloc(*host_port)
@property
@ -1045,54 +1173,28 @@ class PackageFinder(object):
See LinkEvaluator.evaluate_link() for details on which files
are accepted.
"""
search_scope = self.search_scope
index_locations = search_scope.get_index_urls_locations(project_name)
index_file_loc, index_url_loc = group_locations(index_locations)
fl_file_loc, fl_url_loc = group_locations(
self.find_links, expand_dir=True,
)
file_locations = (Link(url) for url in itertools.chain(
index_file_loc, fl_file_loc,
))
# We trust every url that the user has given us whether it was given
# via --index-url or --find-links.
# We want to filter out any thing which does not have a secure origin.
url_locations = [
link for link in itertools.chain(
(Link(url) for url in index_url_loc),
(Link(url) for url in fl_url_loc),
)
if self.session.is_secure_origin(link)
]
logger.debug('%d location(s) to search for versions of %s:',
len(url_locations), project_name)
for location in url_locations:
logger.debug('* %s', location)
collected_links = self._link_collector.collect_links(project_name)
link_evaluator = self.make_link_evaluator(project_name)
find_links_versions = self.evaluate_links(
link_evaluator,
# We trust every directly linked archive in find_links
links=(Link(url, '-f') for url in self.find_links),
links=collected_links.find_links,
)
page_versions = []
for page in self._get_pages(url_locations, project_name):
logger.debug('Analyzing links from page %s', page.url)
for page_url, page_links in collected_links.pages.items():
logger.debug('Analyzing links from page %s', page_url)
with indent_log():
new_versions = self.evaluate_links(
link_evaluator,
links=page.iter_links(),
links=page_links,
)
page_versions.extend(new_versions)
file_versions = self.evaluate_links(
link_evaluator,
links=file_locations,
links=collected_links.files,
)
if file_versions:
file_versions.sort(reverse=True)
@ -1228,24 +1330,6 @@ class PackageFinder(object):
)
return best_candidate.link
def _get_pages(self, locations, project_name):
# type: (Iterable[Link], str) -> Iterable[HTMLPage]
"""
Yields (page, page_url) from the given locations, skipping
locations that have errors.
"""
seen = set() # type: Set[Link]
for location in locations:
if location in seen:
continue
seen.add(location)
page = _get_html_page(location, session=self.session)
if page is None:
continue
yield page
def _find_name_version_sep(fragment, canonical_name):
# type: (str, str) -> int

View File

@ -35,8 +35,9 @@ def make_no_network_finder(
find_links=find_links,
allow_all_prereleases=allow_all_prereleases,
)
# Replace the PackageFinder object's _get_pages() with a no-op.
finder._get_pages = lambda locations, project_name: []
# Replace the PackageFinder._link_collector's _get_pages() with a no-op.
link_collector = finder._link_collector
link_collector._get_pages = lambda locations, project_name: []
return finder

View File

@ -12,7 +12,7 @@ from pip._internal.index import (
CandidatePreferences,
FormatControl,
HTMLPage,
Link,
LinkCollector,
LinkEvaluator,
PackageFinder,
_check_link_requires_python,
@ -25,6 +25,7 @@ from pip._internal.index import (
group_locations,
)
from pip._internal.models.candidate import InstallationCandidate
from pip._internal.models.link import Link
from pip._internal.models.search_scope import SearchScope
from pip._internal.models.selection_prefs import SelectionPreferences
from pip._internal.models.target_python import TargetPython
@ -684,9 +685,14 @@ class TestPackageFinder:
# Create a test TargetPython that we can check for.
target_python = TargetPython(py_version_info=(3, 7))
format_control = FormatControl(set(), only_binary)
finder = PackageFinder(
search_scope=SearchScope([], []),
link_collector = LinkCollector(
session=PipSession(),
search_scope=SearchScope([], []),
)
finder = PackageFinder(
link_collector=link_collector,
target_python=target_python,
allow_yanked=allow_yanked,
format_control=format_control,
@ -725,9 +731,12 @@ class TestPackageFinder:
prefer_binary=prefer_binary,
allow_all_prereleases=allow_all_prereleases,
)
finder = PackageFinder(
search_scope=SearchScope([], []),
link_collector = LinkCollector(
session=PipSession(),
search_scope=SearchScope([], []),
)
finder = PackageFinder(
link_collector=link_collector,
target_python=target_python,
allow_yanked=True,
candidate_prefs=candidate_prefs,
@ -773,8 +782,7 @@ def test_group_locations__non_existing_path():
"""
Test that a non-existing path is ignored.
"""
files, urls = group_locations(
[os.path.join('this', 'doesnt', 'exist')])
files, urls = group_locations([os.path.join('this', 'doesnt', 'exist')])
assert not urls and not files, "nothing should have been found"

View File

@ -193,7 +193,7 @@ class TestRequirementSet(object):
"""
req_set = RequirementSet(require_hashes=False)
finder = make_test_finder(find_links=[data.find_links])
session = finder.session
session = finder._link_collector.session
command = create_command('install')
with requirements_file('--require-hashes', tmpdir) as reqs_file:
options, args = command.parse_args(['-r', reqs_file])

View File

@ -349,7 +349,7 @@ class TestProcessLine(object):
"file.txt", 1, finder=finder, session=session,
))
assert list(finder.trusted_hosts) == ['host1', 'host2:8080']
session = finder.session
session = finder._link_collector.session
assert session.adapters['https://host1/'] is session._insecure_adapter
assert (
session.adapters['https://host2:8080/']