mirror of https://github.com/pypa/pip
Rewrite collect_links
This introduces a collect_sources() method to do the same thing, but instead of flattening links eagerly, return each repository entry separately (and return a None for invalid repository options), so subsequent code can better distinguish which link comes from which repository.
This commit is contained in:
parent
a0f604164e
commit
a912c5530d
|
@ -101,7 +101,7 @@ One of ``PackageFinder``'s main top-level methods is
|
|||
1. Calls its ``find_all_candidates()`` method, which gathers all
|
||||
possible package links by reading and parsing the index URL's and
|
||||
locations provided by the user (the :ref:`LinkCollector
|
||||
<link-collector-class>` class's ``collect_links()`` method), constructs a
|
||||
<link-collector-class>` class's ``collect_sources()`` method), constructs a
|
||||
:ref:`LinkEvaluator <link-evaluator-class>` object to filter out some of
|
||||
those links, and then returns a list of ``InstallationCandidates`` (aka
|
||||
candidates for install). This corresponds to steps 1-3 of the
|
||||
|
@ -131,7 +131,7 @@ responsible for collecting the raw list of "links" to package files
|
|||
The ``LinkCollector`` class takes into account the user's :ref:`--find-links
|
||||
<install_--find-links>`, :ref:`--extra-index-url <install_--extra-index-url>`,
|
||||
and related options when deciding which locations to collect links from. The
|
||||
class's main method is the ``collect_links()`` method. The :ref:`PackageFinder
|
||||
class's main method is the ``collect_sources()`` method. The :ref:`PackageFinder
|
||||
<package-finder-class>` class invokes this method as the first step of its
|
||||
``find_all_candidates()`` method.
|
||||
|
||||
|
|
|
@ -1,28 +1,27 @@
|
|||
"""
|
||||
The main purpose of this module is to expose LinkCollector.collect_links().
|
||||
The main purpose of this module is to expose LinkCollector.collect_sources().
|
||||
"""
|
||||
|
||||
import cgi
|
||||
import collections
|
||||
import functools
|
||||
import html
|
||||
import itertools
|
||||
import logging
|
||||
import mimetypes
|
||||
import os
|
||||
import re
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
import xml.etree.ElementTree
|
||||
from collections import OrderedDict
|
||||
from optparse import Values
|
||||
from typing import (
|
||||
Callable,
|
||||
Iterable,
|
||||
List,
|
||||
MutableMapping,
|
||||
NamedTuple,
|
||||
Optional,
|
||||
Sequence,
|
||||
Tuple,
|
||||
Union,
|
||||
)
|
||||
|
||||
|
@ -37,8 +36,9 @@ from pip._internal.network.session import PipSession
|
|||
from pip._internal.network.utils import raise_for_status
|
||||
from pip._internal.utils.filetypes import is_archive_file
|
||||
from pip._internal.utils.misc import pairwise, redact_auth_from_url
|
||||
from pip._internal.utils.urls import path_to_url, url_to_path
|
||||
from pip._internal.vcs import is_url, vcs
|
||||
from pip._internal.vcs import vcs
|
||||
|
||||
from .sources import CandidatesFromPage, LinkSource, build_source
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -449,107 +449,9 @@ def _get_html_page(link, session=None):
|
|||
return None
|
||||
|
||||
|
||||
def _remove_duplicate_links(links):
|
||||
# type: (Iterable[Link]) -> List[Link]
|
||||
"""
|
||||
Return a list of links, with duplicates removed and ordering preserved.
|
||||
"""
|
||||
# We preserve the ordering when removing duplicates because we can.
|
||||
return list(OrderedDict.fromkeys(links))
|
||||
|
||||
|
||||
def group_locations(locations, expand_dir=False):
|
||||
# type: (Sequence[str], bool) -> Tuple[List[str], List[str]]
|
||||
"""
|
||||
Divide a list of locations into two groups: "files" (archives) and "urls."
|
||||
|
||||
:return: A pair of lists (files, urls).
|
||||
"""
|
||||
files = []
|
||||
urls = []
|
||||
|
||||
# puts the url for the given file path into the appropriate list
|
||||
def sort_path(path):
|
||||
# type: (str) -> None
|
||||
url = path_to_url(path)
|
||||
if mimetypes.guess_type(url, strict=False)[0] == 'text/html':
|
||||
urls.append(url)
|
||||
else:
|
||||
files.append(url)
|
||||
|
||||
for url in locations:
|
||||
|
||||
is_local_path = os.path.exists(url)
|
||||
is_file_url = url.startswith('file:')
|
||||
|
||||
if is_local_path or is_file_url:
|
||||
if is_local_path:
|
||||
path = url
|
||||
else:
|
||||
path = url_to_path(url)
|
||||
if os.path.isdir(path):
|
||||
if expand_dir:
|
||||
path = os.path.realpath(path)
|
||||
for item in os.listdir(path):
|
||||
sort_path(os.path.join(path, item))
|
||||
elif is_file_url:
|
||||
urls.append(url)
|
||||
else:
|
||||
logger.warning(
|
||||
"Path '%s' is ignored: it is a directory.", path,
|
||||
)
|
||||
elif os.path.isfile(path):
|
||||
sort_path(path)
|
||||
else:
|
||||
logger.warning(
|
||||
"Url '%s' is ignored: it is neither a file "
|
||||
"nor a directory.", url,
|
||||
)
|
||||
elif is_url(url):
|
||||
# Only add url with clear scheme
|
||||
urls.append(url)
|
||||
else:
|
||||
logger.warning(
|
||||
"Url '%s' is ignored. It is either a non-existing "
|
||||
"path or lacks a specific scheme.", url,
|
||||
)
|
||||
|
||||
return files, urls
|
||||
|
||||
|
||||
class CollectedLinks:
|
||||
|
||||
"""
|
||||
Encapsulates the return value of a call to LinkCollector.collect_links().
|
||||
|
||||
The return value includes both URLs to project pages containing package
|
||||
links, as well as individual package Link objects collected from other
|
||||
sources.
|
||||
|
||||
This info is stored separately as:
|
||||
|
||||
(1) links from the configured file locations,
|
||||
(2) links from the configured find_links, and
|
||||
(3) urls to HTML project pages, as described by the PEP 503 simple
|
||||
repository API.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
files, # type: List[Link]
|
||||
find_links, # type: List[Link]
|
||||
project_urls, # type: List[Link]
|
||||
):
|
||||
# type: (...) -> None
|
||||
"""
|
||||
:param files: Links from file locations.
|
||||
:param find_links: Links from find_links.
|
||||
:param project_urls: URLs to HTML project pages, as described by
|
||||
the PEP 503 simple repository API.
|
||||
"""
|
||||
self.files = files
|
||||
self.find_links = find_links
|
||||
self.project_urls = project_urls
|
||||
class CollectedSources(NamedTuple):
|
||||
find_links: Sequence[Optional[LinkSource]]
|
||||
index_urls: Sequence[Optional[LinkSource]]
|
||||
|
||||
|
||||
class LinkCollector:
|
||||
|
@ -558,7 +460,7 @@ class LinkCollector:
|
|||
Responsible for collecting Link objects from all configured locations,
|
||||
making network requests as needed.
|
||||
|
||||
The class's main method is its collect_links() method.
|
||||
The class's main method is its collect_sources() method.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
|
@ -609,51 +511,46 @@ class LinkCollector:
|
|||
"""
|
||||
return _get_html_page(location, session=self.session)
|
||||
|
||||
def collect_links(self, project_name):
|
||||
# type: (str) -> CollectedLinks
|
||||
"""Find all available links for the given project name.
|
||||
|
||||
:return: All the Link objects (unfiltered), as a CollectedLinks object.
|
||||
"""
|
||||
search_scope = self.search_scope
|
||||
index_locations = search_scope.get_index_urls_locations(project_name)
|
||||
index_file_loc, index_url_loc = group_locations(index_locations)
|
||||
fl_file_loc, fl_url_loc = group_locations(
|
||||
self.find_links, expand_dir=True,
|
||||
)
|
||||
|
||||
file_links = [
|
||||
Link(url) for url in itertools.chain(index_file_loc, fl_file_loc)
|
||||
]
|
||||
|
||||
# We trust every directly linked archive in find_links
|
||||
find_link_links = [Link(url, '-f') for url in self.find_links]
|
||||
|
||||
# We trust every url that the user has given us whether it was given
|
||||
# via --index-url or --find-links.
|
||||
# We want to filter out anything that does not have a secure origin.
|
||||
url_locations = [
|
||||
link for link in itertools.chain(
|
||||
# Mark PyPI indices as "cache_link_parsing == False" -- this
|
||||
# will avoid caching the result of parsing the page for links.
|
||||
(Link(url, cache_link_parsing=False) for url in index_url_loc),
|
||||
(Link(url) for url in fl_url_loc),
|
||||
def collect_sources(
|
||||
self,
|
||||
project_name: str,
|
||||
candidates_from_page: CandidatesFromPage,
|
||||
) -> CollectedSources:
|
||||
# The OrderedDict calls deduplicate sources by URL.
|
||||
index_url_sources = collections.OrderedDict(
|
||||
build_source(
|
||||
loc,
|
||||
candidates_from_page=candidates_from_page,
|
||||
page_validator=self.session.is_secure_origin,
|
||||
expand_dir=False,
|
||||
cache_link_parsing=False,
|
||||
)
|
||||
if self.session.is_secure_origin(link)
|
||||
]
|
||||
for loc in self.search_scope.get_index_urls_locations(project_name)
|
||||
).values()
|
||||
find_links_sources = collections.OrderedDict(
|
||||
build_source(
|
||||
loc,
|
||||
candidates_from_page=candidates_from_page,
|
||||
page_validator=self.session.is_secure_origin,
|
||||
expand_dir=True,
|
||||
cache_link_parsing=True,
|
||||
)
|
||||
for loc in self.find_links
|
||||
).values()
|
||||
|
||||
url_locations = _remove_duplicate_links(url_locations)
|
||||
lines = [
|
||||
'{} location(s) to search for versions of {}:'.format(
|
||||
len(url_locations), project_name,
|
||||
),
|
||||
]
|
||||
for link in url_locations:
|
||||
lines.append(f'* {link}')
|
||||
logger.debug('\n'.join(lines))
|
||||
if logger.isEnabledFor(logging.DEBUG):
|
||||
lines = [
|
||||
f"* {s.link}"
|
||||
for s in itertools.chain(find_links_sources, index_url_sources)
|
||||
if s is not None and s.link is not None
|
||||
]
|
||||
lines = [
|
||||
f"{len(lines)} location(s) to search "
|
||||
f"for versions of {project_name}:"
|
||||
] + lines
|
||||
logger.debug("\n".join(lines))
|
||||
|
||||
return CollectedLinks(
|
||||
files=file_links,
|
||||
find_links=find_link_links,
|
||||
project_urls=url_locations,
|
||||
return CollectedSources(
|
||||
find_links=list(find_links_sources),
|
||||
index_urls=list(index_url_sources),
|
||||
)
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
# mypy: strict-optional=False
|
||||
|
||||
import functools
|
||||
import itertools
|
||||
import logging
|
||||
import re
|
||||
from typing import FrozenSet, Iterable, List, Optional, Set, Tuple, Union
|
||||
|
@ -804,38 +805,41 @@ class PackageFinder:
|
|||
See LinkEvaluator.evaluate_link() for details on which files
|
||||
are accepted.
|
||||
"""
|
||||
collected_links = self._link_collector.collect_links(project_name)
|
||||
|
||||
link_evaluator = self.make_link_evaluator(project_name)
|
||||
|
||||
find_links_versions = self.evaluate_links(
|
||||
link_evaluator,
|
||||
links=collected_links.find_links,
|
||||
collected_sources = self._link_collector.collect_sources(
|
||||
project_name=project_name,
|
||||
candidates_from_page=functools.partial(
|
||||
self.process_project_url,
|
||||
link_evaluator=link_evaluator,
|
||||
),
|
||||
)
|
||||
|
||||
page_versions = []
|
||||
for project_url in collected_links.project_urls:
|
||||
package_links = self.process_project_url(
|
||||
project_url, link_evaluator=link_evaluator,
|
||||
)
|
||||
page_versions.extend(package_links)
|
||||
|
||||
file_versions = self.evaluate_links(
|
||||
link_evaluator,
|
||||
links=collected_links.files,
|
||||
page_candidates_it = itertools.chain.from_iterable(
|
||||
source.page_candidates()
|
||||
for sources in collected_sources
|
||||
for source in sources
|
||||
if source is not None
|
||||
)
|
||||
if file_versions:
|
||||
file_versions.sort(reverse=True)
|
||||
logger.debug(
|
||||
'Local files found: %s',
|
||||
', '.join([
|
||||
url_to_path(candidate.link.url)
|
||||
for candidate in file_versions
|
||||
])
|
||||
)
|
||||
page_candidates = list(page_candidates_it)
|
||||
|
||||
file_links_it = itertools.chain.from_iterable(
|
||||
source.file_links()
|
||||
for sources in collected_sources
|
||||
for source in sources
|
||||
if source is not None
|
||||
)
|
||||
file_candidates = self.evaluate_links(
|
||||
link_evaluator,
|
||||
sorted(file_links_it, reverse=True),
|
||||
)
|
||||
|
||||
if logger.isEnabledFor(logging.DEBUG) and file_candidates:
|
||||
paths = [url_to_path(c.link.url) for c in file_candidates]
|
||||
logger.debug("Local files found: %s", ", ".join(paths))
|
||||
|
||||
# This is an intentional priority ordering
|
||||
return file_versions + find_links_versions + page_versions
|
||||
return file_candidates + page_candidates
|
||||
|
||||
def make_candidate_evaluator(
|
||||
self,
|
||||
|
|
|
@ -0,0 +1,224 @@
|
|||
import logging
|
||||
import mimetypes
|
||||
import os
|
||||
import pathlib
|
||||
from typing import Callable, Iterable, Optional, Tuple
|
||||
|
||||
from pip._internal.models.candidate import InstallationCandidate
|
||||
from pip._internal.models.link import Link
|
||||
from pip._internal.utils.urls import path_to_url, url_to_path
|
||||
from pip._internal.vcs import is_url
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
FoundCandidates = Iterable[InstallationCandidate]
|
||||
FoundLinks = Iterable[Link]
|
||||
CandidatesFromPage = Callable[[Link], Iterable[InstallationCandidate]]
|
||||
PageValidator = Callable[[Link], bool]
|
||||
|
||||
|
||||
class LinkSource:
|
||||
@property
|
||||
def link(self) -> Optional[Link]:
|
||||
"""Returns the underlying link, if there's one."""
|
||||
raise NotImplementedError()
|
||||
|
||||
def page_candidates(self) -> FoundCandidates:
|
||||
"""Candidates found by parsing an archive listing HTML file."""
|
||||
raise NotImplementedError()
|
||||
|
||||
def file_links(self) -> FoundLinks:
|
||||
"""Links found by specifying archives directly."""
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
def _is_html_file(file_url: str) -> bool:
|
||||
return mimetypes.guess_type(file_url, strict=False)[0] == "text/html"
|
||||
|
||||
|
||||
class _FlatDirectorySource(LinkSource):
|
||||
"""Link source specified by ``--find-links=<path-to-dir>``.
|
||||
|
||||
This looks the content of the directory, and returns:
|
||||
|
||||
* ``page_candidates``: Links listed on each HTML file in the directory.
|
||||
* ``file_candidates``: Archives in the directory.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
candidates_from_page: CandidatesFromPage,
|
||||
path: str,
|
||||
) -> None:
|
||||
self._candidates_from_page = candidates_from_page
|
||||
self._path = pathlib.Path(os.path.realpath(path))
|
||||
|
||||
@property
|
||||
def link(self) -> Optional[Link]:
|
||||
return None
|
||||
|
||||
def page_candidates(self) -> FoundCandidates:
|
||||
for path in self._path.iterdir():
|
||||
url = path_to_url(str(path))
|
||||
if not _is_html_file(url):
|
||||
continue
|
||||
yield from self._candidates_from_page(Link(url))
|
||||
|
||||
def file_links(self) -> FoundLinks:
|
||||
for path in self._path.iterdir():
|
||||
url = path_to_url(str(path))
|
||||
if _is_html_file(url):
|
||||
continue
|
||||
yield Link(url)
|
||||
|
||||
|
||||
class _LocalFileSource(LinkSource):
|
||||
"""``--find-links=<path-or-url>`` or ``--[extra-]index-url=<path-or-url>``.
|
||||
|
||||
If a URL is supplied, it must be a ``file:`` URL. If a path is supplied to
|
||||
the option, it is converted to a URL first. This returns:
|
||||
|
||||
* ``page_candidates``: Links listed on an HTML file.
|
||||
* ``file_candidates``: The non-HTML file.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
candidates_from_page: CandidatesFromPage,
|
||||
link: Link,
|
||||
) -> None:
|
||||
self._candidates_from_page = candidates_from_page
|
||||
self._link = link
|
||||
|
||||
@property
|
||||
def link(self) -> Optional[Link]:
|
||||
return self._link
|
||||
|
||||
def page_candidates(self) -> FoundCandidates:
|
||||
if not _is_html_file(self._link.url):
|
||||
return
|
||||
yield from self._candidates_from_page(self._link)
|
||||
|
||||
def file_links(self) -> FoundLinks:
|
||||
if _is_html_file(self._link.url):
|
||||
return
|
||||
yield self._link
|
||||
|
||||
|
||||
class _RemoteFileSource(LinkSource):
|
||||
"""``--find-links=<url>`` or ``--[extra-]index-url=<url>``.
|
||||
|
||||
This returns:
|
||||
|
||||
* ``page_candidates``: Links listed on an HTML file.
|
||||
* ``file_candidates``: The non-HTML file.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
candidates_from_page: CandidatesFromPage,
|
||||
page_validator: PageValidator,
|
||||
link: Link,
|
||||
) -> None:
|
||||
self._candidates_from_page = candidates_from_page
|
||||
self._page_validator = page_validator
|
||||
self._link = link
|
||||
|
||||
@property
|
||||
def link(self) -> Optional[Link]:
|
||||
return self._link
|
||||
|
||||
def page_candidates(self) -> FoundCandidates:
|
||||
if not self._page_validator(self._link):
|
||||
return
|
||||
yield from self._candidates_from_page(self._link)
|
||||
|
||||
def file_links(self) -> FoundLinks:
|
||||
yield self._link
|
||||
|
||||
|
||||
class _IndexDirectorySource(LinkSource):
|
||||
"""``--[extra-]index-url=<path-to-directory>``.
|
||||
|
||||
This is treated like a remote URL; ``candidates_from_page`` contains logic
|
||||
for this by appending ``index.html`` to the link.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
candidates_from_page: CandidatesFromPage,
|
||||
link: Link,
|
||||
) -> None:
|
||||
self._candidates_from_page = candidates_from_page
|
||||
self._link = link
|
||||
|
||||
@property
|
||||
def link(self) -> Optional[Link]:
|
||||
return self._link
|
||||
|
||||
def page_candidates(self) -> FoundCandidates:
|
||||
yield from self._candidates_from_page(self._link)
|
||||
|
||||
def file_links(self) -> FoundLinks:
|
||||
return ()
|
||||
|
||||
|
||||
def build_source(
|
||||
location: str,
|
||||
*,
|
||||
candidates_from_page: CandidatesFromPage,
|
||||
page_validator: PageValidator,
|
||||
expand_dir: bool,
|
||||
cache_link_parsing: bool,
|
||||
) -> Tuple[Optional[str], Optional[LinkSource]]:
|
||||
|
||||
path: Optional[str] = None
|
||||
url: Optional[str] = None
|
||||
if os.path.exists(location): # Is a local path.
|
||||
url = path_to_url(location)
|
||||
path = location
|
||||
elif location.startswith("file:"): # A file: URL.
|
||||
url = location
|
||||
path = url_to_path(location)
|
||||
elif is_url(location):
|
||||
url = location
|
||||
|
||||
if url is None:
|
||||
msg = (
|
||||
"Location '%s' is ignored: "
|
||||
"it is either a non-existing path or lacks a specific scheme."
|
||||
)
|
||||
logger.warning(msg, location)
|
||||
return (None, None)
|
||||
|
||||
if path is None:
|
||||
source: LinkSource = _RemoteFileSource(
|
||||
candidates_from_page=candidates_from_page,
|
||||
page_validator=page_validator,
|
||||
link=Link(url, cache_link_parsing=cache_link_parsing),
|
||||
)
|
||||
return (url, source)
|
||||
|
||||
if os.path.isdir(path):
|
||||
if expand_dir:
|
||||
source = _FlatDirectorySource(
|
||||
candidates_from_page=candidates_from_page,
|
||||
path=path,
|
||||
)
|
||||
else:
|
||||
source = _IndexDirectorySource(
|
||||
candidates_from_page=candidates_from_page,
|
||||
link=Link(url, cache_link_parsing=cache_link_parsing),
|
||||
)
|
||||
return (url, source)
|
||||
elif os.path.isfile(path):
|
||||
source = _LocalFileSource(
|
||||
candidates_from_page=candidates_from_page,
|
||||
link=Link(url, cache_link_parsing=cache_link_parsing),
|
||||
)
|
||||
return (url, source)
|
||||
logger.warning(
|
||||
"Location '%s' is ignored: it is neither a file nor a directory.",
|
||||
location,
|
||||
)
|
||||
return (url, None)
|
|
@ -1,3 +1,4 @@
|
|||
import itertools
|
||||
import logging
|
||||
import os.path
|
||||
import re
|
||||
|
@ -23,10 +24,9 @@ from pip._internal.index.collector import (
|
|||
_make_html_page,
|
||||
_NotHTML,
|
||||
_NotHTTP,
|
||||
_remove_duplicate_links,
|
||||
group_locations,
|
||||
parse_links,
|
||||
)
|
||||
from pip._internal.index.sources import _FlatDirectorySource, _IndexDirectorySource
|
||||
from pip._internal.models.index import PyPI
|
||||
from pip._internal.models.link import Link
|
||||
from pip._internal.network.session import PipSession
|
||||
|
@ -587,46 +587,79 @@ def test_get_html_page_directory_append_index(tmpdir):
|
|||
assert actual.url == expected_url
|
||||
|
||||
|
||||
def test_remove_duplicate_links():
|
||||
links = [
|
||||
# We choose Links that will test that ordering is preserved.
|
||||
Link('https://example.com/2'),
|
||||
Link('https://example.com/1'),
|
||||
Link('https://example.com/2'),
|
||||
]
|
||||
actual = _remove_duplicate_links(links)
|
||||
assert actual == [
|
||||
Link('https://example.com/2'),
|
||||
Link('https://example.com/1'),
|
||||
]
|
||||
|
||||
|
||||
def test_group_locations__file_expand_dir(data):
|
||||
def test_collect_sources__file_expand_dir(data):
|
||||
"""
|
||||
Test that a file:// dir gets listdir run with expand_dir
|
||||
Test that a file:// dir from --find-links becomes _FlatDirectorySource
|
||||
"""
|
||||
files, urls = group_locations([data.find_links], expand_dir=True)
|
||||
assert files and not urls, (
|
||||
"files and not urls should have been found "
|
||||
collector = LinkCollector.create(
|
||||
session=pretend.stub(is_secure_origin=None), # Shouldn't be used.
|
||||
options=pretend.stub(
|
||||
index_url="ignored-by-no-index",
|
||||
extra_index_urls=[],
|
||||
no_index=True,
|
||||
find_links=[data.find_links],
|
||||
),
|
||||
)
|
||||
sources = collector.collect_sources(
|
||||
project_name=None, # Shouldn't be used.
|
||||
candidates_from_page=None, # Shouldn't be used.
|
||||
)
|
||||
assert (
|
||||
not sources.index_urls
|
||||
and len(sources.find_links) == 1
|
||||
and isinstance(sources.find_links[0], _FlatDirectorySource)
|
||||
), (
|
||||
"Directory source should have been found "
|
||||
f"at find-links url: {data.find_links}"
|
||||
)
|
||||
|
||||
|
||||
def test_group_locations__file_not_find_link(data):
|
||||
def test_collect_sources__file_not_find_link(data):
|
||||
"""
|
||||
Test that a file:// url dir that's not a find-link, doesn't get a listdir
|
||||
Test that a file:// dir from --index-url doesn't become _FlatDirectorySource
|
||||
run
|
||||
"""
|
||||
files, urls = group_locations([data.index_url("empty_with_pkg")])
|
||||
assert urls and not files, "urls, but not files should have been found"
|
||||
collector = LinkCollector.create(
|
||||
session=pretend.stub(is_secure_origin=None), # Shouldn't be used.
|
||||
options=pretend.stub(
|
||||
index_url=data.index_url("empty_with_pkg"),
|
||||
extra_index_urls=[],
|
||||
no_index=False,
|
||||
find_links=[],
|
||||
),
|
||||
)
|
||||
sources = collector.collect_sources(
|
||||
project_name="",
|
||||
candidates_from_page=None, # Shouldn't be used.
|
||||
)
|
||||
assert (
|
||||
not sources.find_links
|
||||
and len(sources.index_urls) == 1
|
||||
and isinstance(sources.index_urls[0], _IndexDirectorySource)
|
||||
), "Directory specified as index should be treated as a page"
|
||||
|
||||
|
||||
def test_group_locations__non_existing_path():
|
||||
def test_collect_sources__non_existing_path():
|
||||
"""
|
||||
Test that a non-existing path is ignored.
|
||||
"""
|
||||
files, urls = group_locations([os.path.join('this', 'doesnt', 'exist')])
|
||||
assert not urls and not files, "nothing should have been found"
|
||||
collector = LinkCollector.create(
|
||||
session=pretend.stub(is_secure_origin=None), # Shouldn't be used.
|
||||
options=pretend.stub(
|
||||
index_url="ignored-by-no-index",
|
||||
extra_index_urls=[],
|
||||
no_index=True,
|
||||
find_links=[os.path.join("this", "doesnt", "exist")],
|
||||
),
|
||||
)
|
||||
sources = collector.collect_sources(
|
||||
project_name=None, # Shouldn't be used.
|
||||
candidates_from_page=None, # Shouldn't be used.
|
||||
)
|
||||
assert (
|
||||
not sources.index_urls
|
||||
and sources.find_links == [None]
|
||||
), "Nothing should have been found"
|
||||
|
||||
|
||||
def check_links_include(links, names):
|
||||
|
@ -664,7 +697,7 @@ class TestLinkCollector:
|
|||
url, session=link_collector.session,
|
||||
)
|
||||
|
||||
def test_collect_links(self, caplog, data):
|
||||
def test_collect_sources(self, caplog, data):
|
||||
caplog.set_level(logging.DEBUG)
|
||||
|
||||
link_collector = make_test_link_collector(
|
||||
|
@ -673,20 +706,33 @@ class TestLinkCollector:
|
|||
# is skipped.
|
||||
index_urls=[PyPI.simple_url, PyPI.simple_url],
|
||||
)
|
||||
actual = link_collector.collect_links('twine')
|
||||
collected_sources = link_collector.collect_sources(
|
||||
"twine",
|
||||
candidates_from_page=lambda link: [link],
|
||||
)
|
||||
|
||||
# Spot-check the CollectedLinks return value.
|
||||
assert len(actual.files) > 20
|
||||
check_links_include(actual.files, names=['simple-1.0.tar.gz'])
|
||||
files_it = itertools.chain.from_iterable(
|
||||
source.file_links()
|
||||
for sources in collected_sources
|
||||
for source in sources
|
||||
if source is not None
|
||||
)
|
||||
pages_it = itertools.chain.from_iterable(
|
||||
source.page_candidates()
|
||||
for sources in collected_sources
|
||||
for source in sources
|
||||
if source is not None
|
||||
)
|
||||
files = list(files_it)
|
||||
pages = list(pages_it)
|
||||
|
||||
assert len(actual.find_links) == 1
|
||||
check_links_include(actual.find_links, names=['packages'])
|
||||
# Check that find-links URLs are marked as cacheable.
|
||||
assert actual.find_links[0].cache_link_parsing
|
||||
# Spot-check the returned sources.
|
||||
assert len(files) > 20
|
||||
check_links_include(files, names=["simple-1.0.tar.gz"])
|
||||
|
||||
assert actual.project_urls == [Link('https://pypi.org/simple/twine/')]
|
||||
assert pages == [Link('https://pypi.org/simple/twine/')]
|
||||
# Check that index URLs are marked as *un*cacheable.
|
||||
assert not actual.project_urls[0].cache_link_parsing
|
||||
assert not pages[0].cache_link_parsing
|
||||
|
||||
expected_message = dedent("""\
|
||||
1 location(s) to search for versions of twine:
|
||||
|
|
Loading…
Reference in New Issue