mirror of
https://github.com/pypa/pip
synced 2023-12-13 21:30:23 +01:00
Rewrite collect_links
This introduces a collect_sources() method to do the same thing, but instead of flattening links eagerly, return each repository entry separately (and return a None for invalid repository options), so subsequent code can better distinguish which link comes from which repository.
This commit is contained in:
parent
a0f604164e
commit
a912c5530d
|
@ -101,7 +101,7 @@ One of ``PackageFinder``'s main top-level methods is
|
||||||
1. Calls its ``find_all_candidates()`` method, which gathers all
|
1. Calls its ``find_all_candidates()`` method, which gathers all
|
||||||
possible package links by reading and parsing the index URL's and
|
possible package links by reading and parsing the index URL's and
|
||||||
locations provided by the user (the :ref:`LinkCollector
|
locations provided by the user (the :ref:`LinkCollector
|
||||||
<link-collector-class>` class's ``collect_links()`` method), constructs a
|
<link-collector-class>` class's ``collect_sources()`` method), constructs a
|
||||||
:ref:`LinkEvaluator <link-evaluator-class>` object to filter out some of
|
:ref:`LinkEvaluator <link-evaluator-class>` object to filter out some of
|
||||||
those links, and then returns a list of ``InstallationCandidates`` (aka
|
those links, and then returns a list of ``InstallationCandidates`` (aka
|
||||||
candidates for install). This corresponds to steps 1-3 of the
|
candidates for install). This corresponds to steps 1-3 of the
|
||||||
|
@ -131,7 +131,7 @@ responsible for collecting the raw list of "links" to package files
|
||||||
The ``LinkCollector`` class takes into account the user's :ref:`--find-links
|
The ``LinkCollector`` class takes into account the user's :ref:`--find-links
|
||||||
<install_--find-links>`, :ref:`--extra-index-url <install_--extra-index-url>`,
|
<install_--find-links>`, :ref:`--extra-index-url <install_--extra-index-url>`,
|
||||||
and related options when deciding which locations to collect links from. The
|
and related options when deciding which locations to collect links from. The
|
||||||
class's main method is the ``collect_links()`` method. The :ref:`PackageFinder
|
class's main method is the ``collect_sources()`` method. The :ref:`PackageFinder
|
||||||
<package-finder-class>` class invokes this method as the first step of its
|
<package-finder-class>` class invokes this method as the first step of its
|
||||||
``find_all_candidates()`` method.
|
``find_all_candidates()`` method.
|
||||||
|
|
||||||
|
|
|
@ -1,28 +1,27 @@
|
||||||
"""
|
"""
|
||||||
The main purpose of this module is to expose LinkCollector.collect_links().
|
The main purpose of this module is to expose LinkCollector.collect_sources().
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import cgi
|
import cgi
|
||||||
|
import collections
|
||||||
import functools
|
import functools
|
||||||
import html
|
import html
|
||||||
import itertools
|
import itertools
|
||||||
import logging
|
import logging
|
||||||
import mimetypes
|
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
import urllib.request
|
import urllib.request
|
||||||
import xml.etree.ElementTree
|
import xml.etree.ElementTree
|
||||||
from collections import OrderedDict
|
|
||||||
from optparse import Values
|
from optparse import Values
|
||||||
from typing import (
|
from typing import (
|
||||||
Callable,
|
Callable,
|
||||||
Iterable,
|
Iterable,
|
||||||
List,
|
List,
|
||||||
MutableMapping,
|
MutableMapping,
|
||||||
|
NamedTuple,
|
||||||
Optional,
|
Optional,
|
||||||
Sequence,
|
Sequence,
|
||||||
Tuple,
|
|
||||||
Union,
|
Union,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -37,8 +36,9 @@ from pip._internal.network.session import PipSession
|
||||||
from pip._internal.network.utils import raise_for_status
|
from pip._internal.network.utils import raise_for_status
|
||||||
from pip._internal.utils.filetypes import is_archive_file
|
from pip._internal.utils.filetypes import is_archive_file
|
||||||
from pip._internal.utils.misc import pairwise, redact_auth_from_url
|
from pip._internal.utils.misc import pairwise, redact_auth_from_url
|
||||||
from pip._internal.utils.urls import path_to_url, url_to_path
|
from pip._internal.vcs import vcs
|
||||||
from pip._internal.vcs import is_url, vcs
|
|
||||||
|
from .sources import CandidatesFromPage, LinkSource, build_source
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
@ -449,107 +449,9 @@ def _get_html_page(link, session=None):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def _remove_duplicate_links(links):
|
class CollectedSources(NamedTuple):
|
||||||
# type: (Iterable[Link]) -> List[Link]
|
find_links: Sequence[Optional[LinkSource]]
|
||||||
"""
|
index_urls: Sequence[Optional[LinkSource]]
|
||||||
Return a list of links, with duplicates removed and ordering preserved.
|
|
||||||
"""
|
|
||||||
# We preserve the ordering when removing duplicates because we can.
|
|
||||||
return list(OrderedDict.fromkeys(links))
|
|
||||||
|
|
||||||
|
|
||||||
def group_locations(locations, expand_dir=False):
|
|
||||||
# type: (Sequence[str], bool) -> Tuple[List[str], List[str]]
|
|
||||||
"""
|
|
||||||
Divide a list of locations into two groups: "files" (archives) and "urls."
|
|
||||||
|
|
||||||
:return: A pair of lists (files, urls).
|
|
||||||
"""
|
|
||||||
files = []
|
|
||||||
urls = []
|
|
||||||
|
|
||||||
# puts the url for the given file path into the appropriate list
|
|
||||||
def sort_path(path):
|
|
||||||
# type: (str) -> None
|
|
||||||
url = path_to_url(path)
|
|
||||||
if mimetypes.guess_type(url, strict=False)[0] == 'text/html':
|
|
||||||
urls.append(url)
|
|
||||||
else:
|
|
||||||
files.append(url)
|
|
||||||
|
|
||||||
for url in locations:
|
|
||||||
|
|
||||||
is_local_path = os.path.exists(url)
|
|
||||||
is_file_url = url.startswith('file:')
|
|
||||||
|
|
||||||
if is_local_path or is_file_url:
|
|
||||||
if is_local_path:
|
|
||||||
path = url
|
|
||||||
else:
|
|
||||||
path = url_to_path(url)
|
|
||||||
if os.path.isdir(path):
|
|
||||||
if expand_dir:
|
|
||||||
path = os.path.realpath(path)
|
|
||||||
for item in os.listdir(path):
|
|
||||||
sort_path(os.path.join(path, item))
|
|
||||||
elif is_file_url:
|
|
||||||
urls.append(url)
|
|
||||||
else:
|
|
||||||
logger.warning(
|
|
||||||
"Path '%s' is ignored: it is a directory.", path,
|
|
||||||
)
|
|
||||||
elif os.path.isfile(path):
|
|
||||||
sort_path(path)
|
|
||||||
else:
|
|
||||||
logger.warning(
|
|
||||||
"Url '%s' is ignored: it is neither a file "
|
|
||||||
"nor a directory.", url,
|
|
||||||
)
|
|
||||||
elif is_url(url):
|
|
||||||
# Only add url with clear scheme
|
|
||||||
urls.append(url)
|
|
||||||
else:
|
|
||||||
logger.warning(
|
|
||||||
"Url '%s' is ignored. It is either a non-existing "
|
|
||||||
"path or lacks a specific scheme.", url,
|
|
||||||
)
|
|
||||||
|
|
||||||
return files, urls
|
|
||||||
|
|
||||||
|
|
||||||
class CollectedLinks:
|
|
||||||
|
|
||||||
"""
|
|
||||||
Encapsulates the return value of a call to LinkCollector.collect_links().
|
|
||||||
|
|
||||||
The return value includes both URLs to project pages containing package
|
|
||||||
links, as well as individual package Link objects collected from other
|
|
||||||
sources.
|
|
||||||
|
|
||||||
This info is stored separately as:
|
|
||||||
|
|
||||||
(1) links from the configured file locations,
|
|
||||||
(2) links from the configured find_links, and
|
|
||||||
(3) urls to HTML project pages, as described by the PEP 503 simple
|
|
||||||
repository API.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
files, # type: List[Link]
|
|
||||||
find_links, # type: List[Link]
|
|
||||||
project_urls, # type: List[Link]
|
|
||||||
):
|
|
||||||
# type: (...) -> None
|
|
||||||
"""
|
|
||||||
:param files: Links from file locations.
|
|
||||||
:param find_links: Links from find_links.
|
|
||||||
:param project_urls: URLs to HTML project pages, as described by
|
|
||||||
the PEP 503 simple repository API.
|
|
||||||
"""
|
|
||||||
self.files = files
|
|
||||||
self.find_links = find_links
|
|
||||||
self.project_urls = project_urls
|
|
||||||
|
|
||||||
|
|
||||||
class LinkCollector:
|
class LinkCollector:
|
||||||
|
@ -558,7 +460,7 @@ class LinkCollector:
|
||||||
Responsible for collecting Link objects from all configured locations,
|
Responsible for collecting Link objects from all configured locations,
|
||||||
making network requests as needed.
|
making network requests as needed.
|
||||||
|
|
||||||
The class's main method is its collect_links() method.
|
The class's main method is its collect_sources() method.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
|
@ -609,51 +511,46 @@ class LinkCollector:
|
||||||
"""
|
"""
|
||||||
return _get_html_page(location, session=self.session)
|
return _get_html_page(location, session=self.session)
|
||||||
|
|
||||||
def collect_links(self, project_name):
|
def collect_sources(
|
||||||
# type: (str) -> CollectedLinks
|
self,
|
||||||
"""Find all available links for the given project name.
|
project_name: str,
|
||||||
|
candidates_from_page: CandidatesFromPage,
|
||||||
:return: All the Link objects (unfiltered), as a CollectedLinks object.
|
) -> CollectedSources:
|
||||||
"""
|
# The OrderedDict calls deduplicate sources by URL.
|
||||||
search_scope = self.search_scope
|
index_url_sources = collections.OrderedDict(
|
||||||
index_locations = search_scope.get_index_urls_locations(project_name)
|
build_source(
|
||||||
index_file_loc, index_url_loc = group_locations(index_locations)
|
loc,
|
||||||
fl_file_loc, fl_url_loc = group_locations(
|
candidates_from_page=candidates_from_page,
|
||||||
self.find_links, expand_dir=True,
|
page_validator=self.session.is_secure_origin,
|
||||||
)
|
expand_dir=False,
|
||||||
|
cache_link_parsing=False,
|
||||||
file_links = [
|
|
||||||
Link(url) for url in itertools.chain(index_file_loc, fl_file_loc)
|
|
||||||
]
|
|
||||||
|
|
||||||
# We trust every directly linked archive in find_links
|
|
||||||
find_link_links = [Link(url, '-f') for url in self.find_links]
|
|
||||||
|
|
||||||
# We trust every url that the user has given us whether it was given
|
|
||||||
# via --index-url or --find-links.
|
|
||||||
# We want to filter out anything that does not have a secure origin.
|
|
||||||
url_locations = [
|
|
||||||
link for link in itertools.chain(
|
|
||||||
# Mark PyPI indices as "cache_link_parsing == False" -- this
|
|
||||||
# will avoid caching the result of parsing the page for links.
|
|
||||||
(Link(url, cache_link_parsing=False) for url in index_url_loc),
|
|
||||||
(Link(url) for url in fl_url_loc),
|
|
||||||
)
|
)
|
||||||
if self.session.is_secure_origin(link)
|
for loc in self.search_scope.get_index_urls_locations(project_name)
|
||||||
]
|
).values()
|
||||||
|
find_links_sources = collections.OrderedDict(
|
||||||
|
build_source(
|
||||||
|
loc,
|
||||||
|
candidates_from_page=candidates_from_page,
|
||||||
|
page_validator=self.session.is_secure_origin,
|
||||||
|
expand_dir=True,
|
||||||
|
cache_link_parsing=True,
|
||||||
|
)
|
||||||
|
for loc in self.find_links
|
||||||
|
).values()
|
||||||
|
|
||||||
url_locations = _remove_duplicate_links(url_locations)
|
if logger.isEnabledFor(logging.DEBUG):
|
||||||
lines = [
|
lines = [
|
||||||
'{} location(s) to search for versions of {}:'.format(
|
f"* {s.link}"
|
||||||
len(url_locations), project_name,
|
for s in itertools.chain(find_links_sources, index_url_sources)
|
||||||
),
|
if s is not None and s.link is not None
|
||||||
]
|
]
|
||||||
for link in url_locations:
|
lines = [
|
||||||
lines.append(f'* {link}')
|
f"{len(lines)} location(s) to search "
|
||||||
logger.debug('\n'.join(lines))
|
f"for versions of {project_name}:"
|
||||||
|
] + lines
|
||||||
|
logger.debug("\n".join(lines))
|
||||||
|
|
||||||
return CollectedLinks(
|
return CollectedSources(
|
||||||
files=file_links,
|
find_links=list(find_links_sources),
|
||||||
find_links=find_link_links,
|
index_urls=list(index_url_sources),
|
||||||
project_urls=url_locations,
|
|
||||||
)
|
)
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
# mypy: strict-optional=False
|
# mypy: strict-optional=False
|
||||||
|
|
||||||
import functools
|
import functools
|
||||||
|
import itertools
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
from typing import FrozenSet, Iterable, List, Optional, Set, Tuple, Union
|
from typing import FrozenSet, Iterable, List, Optional, Set, Tuple, Union
|
||||||
|
@ -804,38 +805,41 @@ class PackageFinder:
|
||||||
See LinkEvaluator.evaluate_link() for details on which files
|
See LinkEvaluator.evaluate_link() for details on which files
|
||||||
are accepted.
|
are accepted.
|
||||||
"""
|
"""
|
||||||
collected_links = self._link_collector.collect_links(project_name)
|
|
||||||
|
|
||||||
link_evaluator = self.make_link_evaluator(project_name)
|
link_evaluator = self.make_link_evaluator(project_name)
|
||||||
|
|
||||||
find_links_versions = self.evaluate_links(
|
collected_sources = self._link_collector.collect_sources(
|
||||||
link_evaluator,
|
project_name=project_name,
|
||||||
links=collected_links.find_links,
|
candidates_from_page=functools.partial(
|
||||||
|
self.process_project_url,
|
||||||
|
link_evaluator=link_evaluator,
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
page_versions = []
|
page_candidates_it = itertools.chain.from_iterable(
|
||||||
for project_url in collected_links.project_urls:
|
source.page_candidates()
|
||||||
package_links = self.process_project_url(
|
for sources in collected_sources
|
||||||
project_url, link_evaluator=link_evaluator,
|
for source in sources
|
||||||
)
|
if source is not None
|
||||||
page_versions.extend(package_links)
|
|
||||||
|
|
||||||
file_versions = self.evaluate_links(
|
|
||||||
link_evaluator,
|
|
||||||
links=collected_links.files,
|
|
||||||
)
|
)
|
||||||
if file_versions:
|
page_candidates = list(page_candidates_it)
|
||||||
file_versions.sort(reverse=True)
|
|
||||||
logger.debug(
|
file_links_it = itertools.chain.from_iterable(
|
||||||
'Local files found: %s',
|
source.file_links()
|
||||||
', '.join([
|
for sources in collected_sources
|
||||||
url_to_path(candidate.link.url)
|
for source in sources
|
||||||
for candidate in file_versions
|
if source is not None
|
||||||
])
|
)
|
||||||
)
|
file_candidates = self.evaluate_links(
|
||||||
|
link_evaluator,
|
||||||
|
sorted(file_links_it, reverse=True),
|
||||||
|
)
|
||||||
|
|
||||||
|
if logger.isEnabledFor(logging.DEBUG) and file_candidates:
|
||||||
|
paths = [url_to_path(c.link.url) for c in file_candidates]
|
||||||
|
logger.debug("Local files found: %s", ", ".join(paths))
|
||||||
|
|
||||||
# This is an intentional priority ordering
|
# This is an intentional priority ordering
|
||||||
return file_versions + find_links_versions + page_versions
|
return file_candidates + page_candidates
|
||||||
|
|
||||||
def make_candidate_evaluator(
|
def make_candidate_evaluator(
|
||||||
self,
|
self,
|
||||||
|
|
224
src/pip/_internal/index/sources.py
Normal file
224
src/pip/_internal/index/sources.py
Normal file
|
@ -0,0 +1,224 @@
|
||||||
|
import logging
|
||||||
|
import mimetypes
|
||||||
|
import os
|
||||||
|
import pathlib
|
||||||
|
from typing import Callable, Iterable, Optional, Tuple
|
||||||
|
|
||||||
|
from pip._internal.models.candidate import InstallationCandidate
|
||||||
|
from pip._internal.models.link import Link
|
||||||
|
from pip._internal.utils.urls import path_to_url, url_to_path
|
||||||
|
from pip._internal.vcs import is_url
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
FoundCandidates = Iterable[InstallationCandidate]
|
||||||
|
FoundLinks = Iterable[Link]
|
||||||
|
CandidatesFromPage = Callable[[Link], Iterable[InstallationCandidate]]
|
||||||
|
PageValidator = Callable[[Link], bool]
|
||||||
|
|
||||||
|
|
||||||
|
class LinkSource:
|
||||||
|
@property
|
||||||
|
def link(self) -> Optional[Link]:
|
||||||
|
"""Returns the underlying link, if there's one."""
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
def page_candidates(self) -> FoundCandidates:
|
||||||
|
"""Candidates found by parsing an archive listing HTML file."""
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
def file_links(self) -> FoundLinks:
|
||||||
|
"""Links found by specifying archives directly."""
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
|
||||||
|
def _is_html_file(file_url: str) -> bool:
|
||||||
|
return mimetypes.guess_type(file_url, strict=False)[0] == "text/html"
|
||||||
|
|
||||||
|
|
||||||
|
class _FlatDirectorySource(LinkSource):
|
||||||
|
"""Link source specified by ``--find-links=<path-to-dir>``.
|
||||||
|
|
||||||
|
This looks the content of the directory, and returns:
|
||||||
|
|
||||||
|
* ``page_candidates``: Links listed on each HTML file in the directory.
|
||||||
|
* ``file_candidates``: Archives in the directory.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
candidates_from_page: CandidatesFromPage,
|
||||||
|
path: str,
|
||||||
|
) -> None:
|
||||||
|
self._candidates_from_page = candidates_from_page
|
||||||
|
self._path = pathlib.Path(os.path.realpath(path))
|
||||||
|
|
||||||
|
@property
|
||||||
|
def link(self) -> Optional[Link]:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def page_candidates(self) -> FoundCandidates:
|
||||||
|
for path in self._path.iterdir():
|
||||||
|
url = path_to_url(str(path))
|
||||||
|
if not _is_html_file(url):
|
||||||
|
continue
|
||||||
|
yield from self._candidates_from_page(Link(url))
|
||||||
|
|
||||||
|
def file_links(self) -> FoundLinks:
|
||||||
|
for path in self._path.iterdir():
|
||||||
|
url = path_to_url(str(path))
|
||||||
|
if _is_html_file(url):
|
||||||
|
continue
|
||||||
|
yield Link(url)
|
||||||
|
|
||||||
|
|
||||||
|
class _LocalFileSource(LinkSource):
|
||||||
|
"""``--find-links=<path-or-url>`` or ``--[extra-]index-url=<path-or-url>``.
|
||||||
|
|
||||||
|
If a URL is supplied, it must be a ``file:`` URL. If a path is supplied to
|
||||||
|
the option, it is converted to a URL first. This returns:
|
||||||
|
|
||||||
|
* ``page_candidates``: Links listed on an HTML file.
|
||||||
|
* ``file_candidates``: The non-HTML file.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
candidates_from_page: CandidatesFromPage,
|
||||||
|
link: Link,
|
||||||
|
) -> None:
|
||||||
|
self._candidates_from_page = candidates_from_page
|
||||||
|
self._link = link
|
||||||
|
|
||||||
|
@property
|
||||||
|
def link(self) -> Optional[Link]:
|
||||||
|
return self._link
|
||||||
|
|
||||||
|
def page_candidates(self) -> FoundCandidates:
|
||||||
|
if not _is_html_file(self._link.url):
|
||||||
|
return
|
||||||
|
yield from self._candidates_from_page(self._link)
|
||||||
|
|
||||||
|
def file_links(self) -> FoundLinks:
|
||||||
|
if _is_html_file(self._link.url):
|
||||||
|
return
|
||||||
|
yield self._link
|
||||||
|
|
||||||
|
|
||||||
|
class _RemoteFileSource(LinkSource):
|
||||||
|
"""``--find-links=<url>`` or ``--[extra-]index-url=<url>``.
|
||||||
|
|
||||||
|
This returns:
|
||||||
|
|
||||||
|
* ``page_candidates``: Links listed on an HTML file.
|
||||||
|
* ``file_candidates``: The non-HTML file.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
candidates_from_page: CandidatesFromPage,
|
||||||
|
page_validator: PageValidator,
|
||||||
|
link: Link,
|
||||||
|
) -> None:
|
||||||
|
self._candidates_from_page = candidates_from_page
|
||||||
|
self._page_validator = page_validator
|
||||||
|
self._link = link
|
||||||
|
|
||||||
|
@property
|
||||||
|
def link(self) -> Optional[Link]:
|
||||||
|
return self._link
|
||||||
|
|
||||||
|
def page_candidates(self) -> FoundCandidates:
|
||||||
|
if not self._page_validator(self._link):
|
||||||
|
return
|
||||||
|
yield from self._candidates_from_page(self._link)
|
||||||
|
|
||||||
|
def file_links(self) -> FoundLinks:
|
||||||
|
yield self._link
|
||||||
|
|
||||||
|
|
||||||
|
class _IndexDirectorySource(LinkSource):
|
||||||
|
"""``--[extra-]index-url=<path-to-directory>``.
|
||||||
|
|
||||||
|
This is treated like a remote URL; ``candidates_from_page`` contains logic
|
||||||
|
for this by appending ``index.html`` to the link.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
candidates_from_page: CandidatesFromPage,
|
||||||
|
link: Link,
|
||||||
|
) -> None:
|
||||||
|
self._candidates_from_page = candidates_from_page
|
||||||
|
self._link = link
|
||||||
|
|
||||||
|
@property
|
||||||
|
def link(self) -> Optional[Link]:
|
||||||
|
return self._link
|
||||||
|
|
||||||
|
def page_candidates(self) -> FoundCandidates:
|
||||||
|
yield from self._candidates_from_page(self._link)
|
||||||
|
|
||||||
|
def file_links(self) -> FoundLinks:
|
||||||
|
return ()
|
||||||
|
|
||||||
|
|
||||||
|
def build_source(
|
||||||
|
location: str,
|
||||||
|
*,
|
||||||
|
candidates_from_page: CandidatesFromPage,
|
||||||
|
page_validator: PageValidator,
|
||||||
|
expand_dir: bool,
|
||||||
|
cache_link_parsing: bool,
|
||||||
|
) -> Tuple[Optional[str], Optional[LinkSource]]:
|
||||||
|
|
||||||
|
path: Optional[str] = None
|
||||||
|
url: Optional[str] = None
|
||||||
|
if os.path.exists(location): # Is a local path.
|
||||||
|
url = path_to_url(location)
|
||||||
|
path = location
|
||||||
|
elif location.startswith("file:"): # A file: URL.
|
||||||
|
url = location
|
||||||
|
path = url_to_path(location)
|
||||||
|
elif is_url(location):
|
||||||
|
url = location
|
||||||
|
|
||||||
|
if url is None:
|
||||||
|
msg = (
|
||||||
|
"Location '%s' is ignored: "
|
||||||
|
"it is either a non-existing path or lacks a specific scheme."
|
||||||
|
)
|
||||||
|
logger.warning(msg, location)
|
||||||
|
return (None, None)
|
||||||
|
|
||||||
|
if path is None:
|
||||||
|
source: LinkSource = _RemoteFileSource(
|
||||||
|
candidates_from_page=candidates_from_page,
|
||||||
|
page_validator=page_validator,
|
||||||
|
link=Link(url, cache_link_parsing=cache_link_parsing),
|
||||||
|
)
|
||||||
|
return (url, source)
|
||||||
|
|
||||||
|
if os.path.isdir(path):
|
||||||
|
if expand_dir:
|
||||||
|
source = _FlatDirectorySource(
|
||||||
|
candidates_from_page=candidates_from_page,
|
||||||
|
path=path,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
source = _IndexDirectorySource(
|
||||||
|
candidates_from_page=candidates_from_page,
|
||||||
|
link=Link(url, cache_link_parsing=cache_link_parsing),
|
||||||
|
)
|
||||||
|
return (url, source)
|
||||||
|
elif os.path.isfile(path):
|
||||||
|
source = _LocalFileSource(
|
||||||
|
candidates_from_page=candidates_from_page,
|
||||||
|
link=Link(url, cache_link_parsing=cache_link_parsing),
|
||||||
|
)
|
||||||
|
return (url, source)
|
||||||
|
logger.warning(
|
||||||
|
"Location '%s' is ignored: it is neither a file nor a directory.",
|
||||||
|
location,
|
||||||
|
)
|
||||||
|
return (url, None)
|
|
@ -1,3 +1,4 @@
|
||||||
|
import itertools
|
||||||
import logging
|
import logging
|
||||||
import os.path
|
import os.path
|
||||||
import re
|
import re
|
||||||
|
@ -23,10 +24,9 @@ from pip._internal.index.collector import (
|
||||||
_make_html_page,
|
_make_html_page,
|
||||||
_NotHTML,
|
_NotHTML,
|
||||||
_NotHTTP,
|
_NotHTTP,
|
||||||
_remove_duplicate_links,
|
|
||||||
group_locations,
|
|
||||||
parse_links,
|
parse_links,
|
||||||
)
|
)
|
||||||
|
from pip._internal.index.sources import _FlatDirectorySource, _IndexDirectorySource
|
||||||
from pip._internal.models.index import PyPI
|
from pip._internal.models.index import PyPI
|
||||||
from pip._internal.models.link import Link
|
from pip._internal.models.link import Link
|
||||||
from pip._internal.network.session import PipSession
|
from pip._internal.network.session import PipSession
|
||||||
|
@ -587,46 +587,79 @@ def test_get_html_page_directory_append_index(tmpdir):
|
||||||
assert actual.url == expected_url
|
assert actual.url == expected_url
|
||||||
|
|
||||||
|
|
||||||
def test_remove_duplicate_links():
|
def test_collect_sources__file_expand_dir(data):
|
||||||
links = [
|
|
||||||
# We choose Links that will test that ordering is preserved.
|
|
||||||
Link('https://example.com/2'),
|
|
||||||
Link('https://example.com/1'),
|
|
||||||
Link('https://example.com/2'),
|
|
||||||
]
|
|
||||||
actual = _remove_duplicate_links(links)
|
|
||||||
assert actual == [
|
|
||||||
Link('https://example.com/2'),
|
|
||||||
Link('https://example.com/1'),
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def test_group_locations__file_expand_dir(data):
|
|
||||||
"""
|
"""
|
||||||
Test that a file:// dir gets listdir run with expand_dir
|
Test that a file:// dir from --find-links becomes _FlatDirectorySource
|
||||||
"""
|
"""
|
||||||
files, urls = group_locations([data.find_links], expand_dir=True)
|
collector = LinkCollector.create(
|
||||||
assert files and not urls, (
|
session=pretend.stub(is_secure_origin=None), # Shouldn't be used.
|
||||||
"files and not urls should have been found "
|
options=pretend.stub(
|
||||||
|
index_url="ignored-by-no-index",
|
||||||
|
extra_index_urls=[],
|
||||||
|
no_index=True,
|
||||||
|
find_links=[data.find_links],
|
||||||
|
),
|
||||||
|
)
|
||||||
|
sources = collector.collect_sources(
|
||||||
|
project_name=None, # Shouldn't be used.
|
||||||
|
candidates_from_page=None, # Shouldn't be used.
|
||||||
|
)
|
||||||
|
assert (
|
||||||
|
not sources.index_urls
|
||||||
|
and len(sources.find_links) == 1
|
||||||
|
and isinstance(sources.find_links[0], _FlatDirectorySource)
|
||||||
|
), (
|
||||||
|
"Directory source should have been found "
|
||||||
f"at find-links url: {data.find_links}"
|
f"at find-links url: {data.find_links}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_group_locations__file_not_find_link(data):
|
def test_collect_sources__file_not_find_link(data):
|
||||||
"""
|
"""
|
||||||
Test that a file:// url dir that's not a find-link, doesn't get a listdir
|
Test that a file:// dir from --index-url doesn't become _FlatDirectorySource
|
||||||
run
|
run
|
||||||
"""
|
"""
|
||||||
files, urls = group_locations([data.index_url("empty_with_pkg")])
|
collector = LinkCollector.create(
|
||||||
assert urls and not files, "urls, but not files should have been found"
|
session=pretend.stub(is_secure_origin=None), # Shouldn't be used.
|
||||||
|
options=pretend.stub(
|
||||||
|
index_url=data.index_url("empty_with_pkg"),
|
||||||
|
extra_index_urls=[],
|
||||||
|
no_index=False,
|
||||||
|
find_links=[],
|
||||||
|
),
|
||||||
|
)
|
||||||
|
sources = collector.collect_sources(
|
||||||
|
project_name="",
|
||||||
|
candidates_from_page=None, # Shouldn't be used.
|
||||||
|
)
|
||||||
|
assert (
|
||||||
|
not sources.find_links
|
||||||
|
and len(sources.index_urls) == 1
|
||||||
|
and isinstance(sources.index_urls[0], _IndexDirectorySource)
|
||||||
|
), "Directory specified as index should be treated as a page"
|
||||||
|
|
||||||
|
|
||||||
def test_group_locations__non_existing_path():
|
def test_collect_sources__non_existing_path():
|
||||||
"""
|
"""
|
||||||
Test that a non-existing path is ignored.
|
Test that a non-existing path is ignored.
|
||||||
"""
|
"""
|
||||||
files, urls = group_locations([os.path.join('this', 'doesnt', 'exist')])
|
collector = LinkCollector.create(
|
||||||
assert not urls and not files, "nothing should have been found"
|
session=pretend.stub(is_secure_origin=None), # Shouldn't be used.
|
||||||
|
options=pretend.stub(
|
||||||
|
index_url="ignored-by-no-index",
|
||||||
|
extra_index_urls=[],
|
||||||
|
no_index=True,
|
||||||
|
find_links=[os.path.join("this", "doesnt", "exist")],
|
||||||
|
),
|
||||||
|
)
|
||||||
|
sources = collector.collect_sources(
|
||||||
|
project_name=None, # Shouldn't be used.
|
||||||
|
candidates_from_page=None, # Shouldn't be used.
|
||||||
|
)
|
||||||
|
assert (
|
||||||
|
not sources.index_urls
|
||||||
|
and sources.find_links == [None]
|
||||||
|
), "Nothing should have been found"
|
||||||
|
|
||||||
|
|
||||||
def check_links_include(links, names):
|
def check_links_include(links, names):
|
||||||
|
@ -664,7 +697,7 @@ class TestLinkCollector:
|
||||||
url, session=link_collector.session,
|
url, session=link_collector.session,
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_collect_links(self, caplog, data):
|
def test_collect_sources(self, caplog, data):
|
||||||
caplog.set_level(logging.DEBUG)
|
caplog.set_level(logging.DEBUG)
|
||||||
|
|
||||||
link_collector = make_test_link_collector(
|
link_collector = make_test_link_collector(
|
||||||
|
@ -673,20 +706,33 @@ class TestLinkCollector:
|
||||||
# is skipped.
|
# is skipped.
|
||||||
index_urls=[PyPI.simple_url, PyPI.simple_url],
|
index_urls=[PyPI.simple_url, PyPI.simple_url],
|
||||||
)
|
)
|
||||||
actual = link_collector.collect_links('twine')
|
collected_sources = link_collector.collect_sources(
|
||||||
|
"twine",
|
||||||
|
candidates_from_page=lambda link: [link],
|
||||||
|
)
|
||||||
|
|
||||||
# Spot-check the CollectedLinks return value.
|
files_it = itertools.chain.from_iterable(
|
||||||
assert len(actual.files) > 20
|
source.file_links()
|
||||||
check_links_include(actual.files, names=['simple-1.0.tar.gz'])
|
for sources in collected_sources
|
||||||
|
for source in sources
|
||||||
|
if source is not None
|
||||||
|
)
|
||||||
|
pages_it = itertools.chain.from_iterable(
|
||||||
|
source.page_candidates()
|
||||||
|
for sources in collected_sources
|
||||||
|
for source in sources
|
||||||
|
if source is not None
|
||||||
|
)
|
||||||
|
files = list(files_it)
|
||||||
|
pages = list(pages_it)
|
||||||
|
|
||||||
assert len(actual.find_links) == 1
|
# Spot-check the returned sources.
|
||||||
check_links_include(actual.find_links, names=['packages'])
|
assert len(files) > 20
|
||||||
# Check that find-links URLs are marked as cacheable.
|
check_links_include(files, names=["simple-1.0.tar.gz"])
|
||||||
assert actual.find_links[0].cache_link_parsing
|
|
||||||
|
|
||||||
assert actual.project_urls == [Link('https://pypi.org/simple/twine/')]
|
assert pages == [Link('https://pypi.org/simple/twine/')]
|
||||||
# Check that index URLs are marked as *un*cacheable.
|
# Check that index URLs are marked as *un*cacheable.
|
||||||
assert not actual.project_urls[0].cache_link_parsing
|
assert not pages[0].cache_link_parsing
|
||||||
|
|
||||||
expected_message = dedent("""\
|
expected_message = dedent("""\
|
||||||
1 location(s) to search for versions of twine:
|
1 location(s) to search for versions of twine:
|
||||||
|
|
Loading…
Reference in a new issue