mirror of https://github.com/pypa/pip
Merge dc0b3138e8
into a15dd75d98
This commit is contained in:
commit
ba3b272c06
|
@ -0,0 +1 @@
|
|||
Optimized usage of ``--find-links=<path-to-dir>``, by only scanning the relevant directory once, only considering file names that are valid wheel or sdist names, and only considering files in the directory that are related to the install.
|
|
@ -473,6 +473,7 @@ class LinkCollector:
|
|||
page_validator=self.session.is_secure_origin,
|
||||
expand_dir=False,
|
||||
cache_link_parsing=False,
|
||||
project_name=project_name,
|
||||
)
|
||||
for loc in self.search_scope.get_index_urls_locations(project_name)
|
||||
).values()
|
||||
|
@ -483,6 +484,7 @@ class LinkCollector:
|
|||
page_validator=self.session.is_secure_origin,
|
||||
expand_dir=True,
|
||||
cache_link_parsing=True,
|
||||
project_name=project_name,
|
||||
)
|
||||
for loc in self.find_links
|
||||
).values()
|
||||
|
|
|
@ -1,8 +1,17 @@
|
|||
import logging
|
||||
import mimetypes
|
||||
import os
|
||||
import pathlib
|
||||
from typing import Callable, Iterable, Optional, Tuple
|
||||
from collections import defaultdict
|
||||
from typing import Callable, Dict, Iterable, List, Optional, Tuple
|
||||
|
||||
from pip._vendor.packaging.utils import (
|
||||
InvalidSdistFilename,
|
||||
InvalidVersion,
|
||||
InvalidWheelFilename,
|
||||
canonicalize_name,
|
||||
parse_sdist_filename,
|
||||
parse_wheel_filename,
|
||||
)
|
||||
|
||||
from pip._internal.models.candidate import InstallationCandidate
|
||||
from pip._internal.models.link import Link
|
||||
|
@ -36,6 +45,53 @@ def _is_html_file(file_url: str) -> bool:
|
|||
return mimetypes.guess_type(file_url, strict=False)[0] == "text/html"
|
||||
|
||||
|
||||
class _FlatDirectoryToUrls:
|
||||
"""Scans directory and caches results"""
|
||||
|
||||
def __init__(self, path: str) -> None:
|
||||
self._path = path
|
||||
self._page_candidates: List[str] = []
|
||||
self._project_name_to_urls: Dict[str, List[str]] = defaultdict(list)
|
||||
self._scanned_directory = False
|
||||
|
||||
def _scan_directory(self) -> None:
|
||||
"""Scans directory once and populates both page_candidates
|
||||
and project_name_to_urls at the same time
|
||||
"""
|
||||
for entry in os.scandir(self._path):
|
||||
url = path_to_url(entry.path)
|
||||
if _is_html_file(url):
|
||||
self._page_candidates.append(url)
|
||||
continue
|
||||
|
||||
# File must have a valid wheel or sdist name,
|
||||
# otherwise not worth considering as a package
|
||||
try:
|
||||
project_filename = parse_wheel_filename(entry.name)[0]
|
||||
except (InvalidWheelFilename, InvalidVersion):
|
||||
try:
|
||||
project_filename = parse_sdist_filename(entry.name)[0]
|
||||
except (InvalidSdistFilename, InvalidVersion):
|
||||
continue
|
||||
|
||||
self._project_name_to_urls[project_filename].append(url)
|
||||
self._scanned_directory = True
|
||||
|
||||
@property
|
||||
def page_candidates(self) -> List[str]:
|
||||
if not self._scanned_directory:
|
||||
self._scan_directory()
|
||||
|
||||
return self._page_candidates
|
||||
|
||||
@property
|
||||
def project_name_to_urls(self) -> Dict[str, List[str]]:
|
||||
if not self._scanned_directory:
|
||||
self._scan_directory()
|
||||
|
||||
return self._project_name_to_urls
|
||||
|
||||
|
||||
class _FlatDirectorySource(LinkSource):
|
||||
"""Link source specified by ``--find-links=<path-to-dir>``.
|
||||
|
||||
|
@ -45,30 +101,34 @@ class _FlatDirectorySource(LinkSource):
|
|||
* ``file_candidates``: Archives in the directory.
|
||||
"""
|
||||
|
||||
_paths_to_urls: Dict[str, _FlatDirectoryToUrls] = {}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
candidates_from_page: CandidatesFromPage,
|
||||
path: str,
|
||||
project_name: str,
|
||||
) -> None:
|
||||
self._candidates_from_page = candidates_from_page
|
||||
self._path = pathlib.Path(os.path.realpath(path))
|
||||
self._project_name = canonicalize_name(project_name)
|
||||
|
||||
# Get existing instance of _FlatDirectoryToUrls if it exists
|
||||
if path in self._paths_to_urls:
|
||||
self._path_to_urls = self._paths_to_urls[path]
|
||||
else:
|
||||
self._path_to_urls = _FlatDirectoryToUrls(path=path)
|
||||
self._paths_to_urls[path] = self._path_to_urls
|
||||
|
||||
@property
|
||||
def link(self) -> Optional[Link]:
|
||||
return None
|
||||
|
||||
def page_candidates(self) -> FoundCandidates:
|
||||
for path in self._path.iterdir():
|
||||
url = path_to_url(str(path))
|
||||
if not _is_html_file(url):
|
||||
continue
|
||||
for url in self._path_to_urls.page_candidates:
|
||||
yield from self._candidates_from_page(Link(url))
|
||||
|
||||
def file_links(self) -> FoundLinks:
|
||||
for path in self._path.iterdir():
|
||||
url = path_to_url(str(path))
|
||||
if _is_html_file(url):
|
||||
continue
|
||||
for url in self._path_to_urls.project_name_to_urls[self._project_name]:
|
||||
yield Link(url)
|
||||
|
||||
|
||||
|
@ -170,6 +230,7 @@ def build_source(
|
|||
page_validator: PageValidator,
|
||||
expand_dir: bool,
|
||||
cache_link_parsing: bool,
|
||||
project_name: str,
|
||||
) -> Tuple[Optional[str], Optional[LinkSource]]:
|
||||
path: Optional[str] = None
|
||||
url: Optional[str] = None
|
||||
|
@ -203,6 +264,7 @@ def build_source(
|
|||
source = _FlatDirectorySource(
|
||||
candidates_from_page=candidates_from_page,
|
||||
path=path,
|
||||
project_name=project_name,
|
||||
)
|
||||
else:
|
||||
source = _IndexDirectorySource(
|
||||
|
|
|
@ -125,9 +125,6 @@ def test_command_line_append_flags(
|
|||
"Fetching project page and analyzing links: https://test.pypi.org"
|
||||
in result.stdout
|
||||
)
|
||||
assert (
|
||||
f"Skipping link: not a file: {data.find_links}" in result.stdout
|
||||
), f"stdout: {result.stdout}"
|
||||
|
||||
|
||||
@pytest.mark.network
|
||||
|
@ -151,9 +148,6 @@ def test_command_line_appends_correctly(
|
|||
"Fetching project page and analyzing links: https://test.pypi.org"
|
||||
in result.stdout
|
||||
), result.stdout
|
||||
assert (
|
||||
f"Skipping link: not a file: {data.find_links}" in result.stdout
|
||||
), f"stdout: {result.stdout}"
|
||||
|
||||
|
||||
def test_config_file_override_stack(
|
||||
|
|
|
@ -862,7 +862,7 @@ def test_collect_sources__file_expand_dir(data: TestData) -> None:
|
|||
)
|
||||
sources = collector.collect_sources(
|
||||
# Shouldn't be used.
|
||||
project_name=None, # type: ignore[arg-type]
|
||||
project_name="",
|
||||
candidates_from_page=None, # type: ignore[arg-type]
|
||||
)
|
||||
assert (
|
||||
|
@ -960,7 +960,7 @@ class TestLinkCollector:
|
|||
session=link_collector.session,
|
||||
)
|
||||
|
||||
def test_collect_sources(
|
||||
def test_collect_page_sources(
|
||||
self, caplog: pytest.LogCaptureFixture, data: TestData
|
||||
) -> None:
|
||||
caplog.set_level(logging.DEBUG)
|
||||
|
@ -993,9 +993,8 @@ class TestLinkCollector:
|
|||
files = list(files_it)
|
||||
pages = list(pages_it)
|
||||
|
||||
# Spot-check the returned sources.
|
||||
assert len(files) > 20
|
||||
check_links_include(files, names=["simple-1.0.tar.gz"])
|
||||
# Only "twine" should return from collecting sources
|
||||
assert len(files) == 1
|
||||
|
||||
assert [page.link for page in pages] == [Link("https://pypi.org/simple/twine/")]
|
||||
# Check that index URLs are marked as *un*cacheable.
|
||||
|
@ -1010,6 +1009,52 @@ class TestLinkCollector:
|
|||
("pip._internal.index.collector", logging.DEBUG, expected_message),
|
||||
]
|
||||
|
||||
def test_collect_file_sources(
|
||||
self, caplog: pytest.LogCaptureFixture, data: TestData
|
||||
) -> None:
|
||||
caplog.set_level(logging.DEBUG)
|
||||
|
||||
link_collector = make_test_link_collector(
|
||||
find_links=[data.find_links],
|
||||
# Include two copies of the URL to check that the second one
|
||||
# is skipped.
|
||||
index_urls=[PyPI.simple_url, PyPI.simple_url],
|
||||
)
|
||||
collected_sources = link_collector.collect_sources(
|
||||
"singlemodule",
|
||||
candidates_from_page=lambda link: [
|
||||
InstallationCandidate("singlemodule", "0.0.1", link)
|
||||
],
|
||||
)
|
||||
|
||||
files_it = itertools.chain.from_iterable(
|
||||
source.file_links()
|
||||
for sources in collected_sources
|
||||
for source in sources
|
||||
if source is not None
|
||||
)
|
||||
pages_it = itertools.chain.from_iterable(
|
||||
source.page_candidates()
|
||||
for sources in collected_sources
|
||||
for source in sources
|
||||
if source is not None
|
||||
)
|
||||
files = list(files_it)
|
||||
_ = list(pages_it)
|
||||
|
||||
# singlemodule should return files
|
||||
assert len(files) > 0
|
||||
check_links_include(files, names=["singlemodule-0.0.1.tar.gz"])
|
||||
|
||||
expected_message = dedent(
|
||||
"""\
|
||||
1 location(s) to search for versions of singlemodule:
|
||||
* https://pypi.org/simple/singlemodule/"""
|
||||
)
|
||||
assert caplog.record_tuples == [
|
||||
("pip._internal.index.collector", logging.DEBUG, expected_message),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"find_links, no_index, suppress_no_index, expected",
|
||||
|
|
|
@ -128,7 +128,10 @@ class TestWheel:
|
|||
with pytest.raises(DistributionNotFound):
|
||||
finder.find_requirement(req, True)
|
||||
|
||||
assert "Skipping link: invalid wheel filename:" in caplog.text
|
||||
assert (
|
||||
"Could not find a version that satisfies the requirement invalid"
|
||||
" (from versions:" in caplog.text
|
||||
)
|
||||
|
||||
def test_not_find_wheel_not_supported(self, data: TestData) -> None:
|
||||
"""
|
||||
|
|
Loading…
Reference in New Issue