Merge pull request #8737 from McSinyx/dl-many

This commit is contained in:
Pradyun Gedam 2020-08-14 10:46:17 +05:30 committed by GitHub
commit 4312b940b4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 128 additions and 108 deletions

View File

@ -16,7 +16,6 @@ from pip._internal.exceptions import CommandError, PreviousBuildDirError
from pip._internal.index.collector import LinkCollector
from pip._internal.index.package_finder import PackageFinder
from pip._internal.models.selection_prefs import SelectionPreferences
from pip._internal.network.download import Downloader
from pip._internal.network.session import PipSession
from pip._internal.operations.prepare import RequirementPreparer
from pip._internal.req.constructors import (
@ -213,8 +212,6 @@ class RequirementCommand(IndexGroupCommand):
"""
Create a RequirementPreparer instance for the given parameters.
"""
downloader = Downloader(session, progress_bar=options.progress_bar)
temp_build_dir_path = temp_build_dir.path
assert temp_build_dir_path is not None
@ -239,7 +236,7 @@ class RequirementCommand(IndexGroupCommand):
build_isolation=options.build_isolation,
req_tracker=req_tracker,
session=session,
downloader=downloader,
progress_bar=options.progress_bar,
finder=finder,
require_hashes=options.require_hashes,
use_user_site=use_user_site,

View File

@ -24,7 +24,7 @@ from pip._internal.utils.misc import (
from pip._internal.utils.typing import MYPY_CHECK_RUNNING
if MYPY_CHECK_RUNNING:
from typing import Iterable, Optional
from typing import Iterable, Optional, Tuple
from pip._vendor.requests.models import Response
@ -141,19 +141,6 @@ def _http_get_download(session, link):
return resp
class Download(object):
def __init__(
self,
response, # type: Response
filename, # type: str
chunks, # type: Iterable[bytes]
):
# type: (...) -> None
self.response = response
self.filename = filename
self.chunks = chunks
class Downloader(object):
def __init__(
self,
@ -164,8 +151,9 @@ class Downloader(object):
self._session = session
self._progress_bar = progress_bar
def __call__(self, link):
# type: (Link) -> Download
def __call__(self, link, location):
# type: (Link, str) -> Tuple[str, str]
"""Download the file given by link into location."""
try:
resp = _http_get_download(self._session, link)
except NetworkConnectionError as e:
@ -175,8 +163,48 @@ class Downloader(object):
)
raise
return Download(
resp,
_get_http_response_filename(resp, link),
_prepare_download(resp, link, self._progress_bar),
)
filename = _get_http_response_filename(resp, link)
filepath = os.path.join(location, filename)
chunks = _prepare_download(resp, link, self._progress_bar)
with open(filepath, 'wb') as content_file:
for chunk in chunks:
content_file.write(chunk)
content_type = resp.headers.get('Content-Type', '')
return filepath, content_type
class BatchDownloader(object):
def __init__(
self,
session, # type: PipSession
progress_bar, # type: str
):
# type: (...) -> None
self._session = session
self._progress_bar = progress_bar
def __call__(self, links, location):
# type: (Iterable[Link], str) -> Iterable[Tuple[str, Tuple[str, str]]]
"""Download the files given by links into location."""
for link in links:
try:
resp = _http_get_download(self._session, link)
except NetworkConnectionError as e:
assert e.response is not None
logger.critical(
"HTTP error %s while getting %s",
e.response.status_code, link,
)
raise
filename = _get_http_response_filename(resp, link)
filepath = os.path.join(location, filename)
chunks = _prepare_download(resp, link, self._progress_bar)
with open(filepath, 'wb') as content_file:
for chunk in chunks:
content_file.write(chunk)
content_type = resp.headers.get('Content-Type', '')
yield link.url, (filepath, content_type)

View File

@ -26,6 +26,7 @@ from pip._internal.exceptions import (
VcsHashUnsupported,
)
from pip._internal.models.wheel import Wheel
from pip._internal.network.download import BatchDownloader, Downloader
from pip._internal.network.lazy_wheel import (
HTTPRangeRequestUnsupported,
dist_from_wheel_url,
@ -45,16 +46,13 @@ from pip._internal.utils.unpacking import unpack_file
from pip._internal.vcs import vcs
if MYPY_CHECK_RUNNING:
from typing import (
Callable, List, Optional, Tuple,
)
from typing import Callable, Dict, Iterable, List, Optional, Tuple
from mypy_extensions import TypedDict
from pip._vendor.pkg_resources import Distribution
from pip._internal.index.package_finder import PackageFinder
from pip._internal.models.link import Link
from pip._internal.network.download import Downloader
from pip._internal.network.session import PipSession
from pip._internal.req.req_install import InstallRequirement
from pip._internal.req.req_tracker import RequirementTracker
@ -106,15 +104,19 @@ def unpack_vcs_link(link, location):
class File(object):
def __init__(self, path, content_type):
# type: (str, str) -> None
# type: (str, Optional[str]) -> None
self.path = path
self.content_type = content_type
if content_type is None:
self.content_type = mimetypes.guess_type(path)[0]
else:
self.content_type = content_type
def get_http_url(
link, # type: Link
downloader, # type: Downloader
download, # type: Downloader
download_dir=None, # type: Optional[str]
hashes=None, # type: Optional[Hashes]
):
@ -129,12 +131,12 @@ def get_http_url(
if already_downloaded_path:
from_path = already_downloaded_path
content_type = mimetypes.guess_type(from_path)[0]
content_type = None
else:
# let's download to a tmp dir
from_path, content_type = _download_http_url(
link, downloader, temp_dir.path, hashes
)
from_path, content_type = download(link, temp_dir.path)
if hashes:
hashes.check_against_path(from_path)
return File(from_path, content_type)
@ -219,16 +221,13 @@ def get_file_url(
# one; no internet-sourced hash will be in `hashes`.
if hashes:
hashes.check_against_path(from_path)
content_type = mimetypes.guess_type(from_path)[0]
return File(from_path, content_type)
return File(from_path, None)
def unpack_url(
link, # type: Link
location, # type: str
downloader, # type: Downloader
download, # type: Downloader
download_dir=None, # type: Optional[str]
hashes=None, # type: Optional[Hashes]
):
@ -260,7 +259,7 @@ def unpack_url(
else:
file = get_http_url(
link,
downloader,
download,
download_dir,
hashes=hashes,
)
@ -273,27 +272,6 @@ def unpack_url(
return file
def _download_http_url(
link, # type: Link
downloader, # type: Downloader
temp_dir, # type: str
hashes, # type: Optional[Hashes]
):
# type: (...) -> Tuple[str, str]
"""Download link url into temp_dir using provided session"""
download = downloader(link)
file_path = os.path.join(temp_dir, download.filename)
with open(file_path, 'wb') as content_file:
for chunk in download.chunks:
content_file.write(chunk)
if hashes:
hashes.check_against_path(file_path)
return file_path, download.response.headers.get('content-type', '')
def _check_download_dir(link, download_dir, hashes):
# type: (Link, str, Optional[Hashes]) -> Optional[str]
""" Check download_dir for previously downloaded file with correct hash
@ -333,7 +311,7 @@ class RequirementPreparer(object):
build_isolation, # type: bool
req_tracker, # type: RequirementTracker
session, # type: PipSession
downloader, # type: Downloader
progress_bar, # type: str
finder, # type: PackageFinder
require_hashes, # type: bool
use_user_site, # type: bool
@ -346,7 +324,8 @@ class RequirementPreparer(object):
self.build_dir = build_dir
self.req_tracker = req_tracker
self._session = session
self.downloader = downloader
self._download = Downloader(session, progress_bar)
self._batch_download = BatchDownloader(session, progress_bar)
self.finder = finder
# Where still-packed archives should be written to. If None, they are
@ -375,6 +354,9 @@ class RequirementPreparer(object):
# Should wheels be downloaded lazily?
self.use_lazy_wheel = lazy_wheel
# Memoized downloaded files, as mapping of url: (path, mime type)
self._downloaded = {} # type: Dict[str, Tuple[str, str]]
@property
def _download_should_save(self):
# type: () -> bool
@ -398,6 +380,13 @@ class RequirementPreparer(object):
else:
logger.info('Collecting %s', req.req or req)
def _get_download_dir(self, link):
# type: (Link) -> Optional[str]
if link.is_wheel and self.wheel_download_dir:
# Download wheels to a dedicated dir when doing `pip wheel`.
return self.wheel_download_dir
return self.download_dir
def _ensure_link_req_src_dir(self, req, download_dir, parallel_builds):
# type: (InstallRequirement, Optional[str], bool) -> None
"""Ensure source_dir of a linked InstallRequirement."""
@ -503,35 +492,52 @@ class RequirementPreparer(object):
return wheel_dist
return self._prepare_linked_requirement(req, parallel_builds)
def prepare_linked_requirement_more(self, req, parallel_builds=False):
# type: (InstallRequirement, bool) -> None
def prepare_linked_requirements_more(self, reqs, parallel_builds=False):
# type: (Iterable[InstallRequirement], bool) -> None
"""Prepare a linked requirement more, if needed."""
if not req.needs_more_preparation:
return
self._prepare_linked_requirement(req, parallel_builds)
reqs = [req for req in reqs if req.needs_more_preparation]
links = [] # type: List[Link]
for req in reqs:
download_dir = self._get_download_dir(req.link)
if download_dir is not None:
hashes = self._get_linked_req_hashes(req)
file_path = _check_download_dir(req.link, download_dir, hashes)
if download_dir is None or file_path is None:
links.append(req.link)
else:
self._downloaded[req.link.url] = file_path, None
# Let's download to a temporary directory.
tmpdir = TempDirectory(kind="unpack", globally_managed=True).path
self._downloaded.update(self._batch_download(links, tmpdir))
for req in reqs:
self._prepare_linked_requirement(req, parallel_builds)
def _prepare_linked_requirement(self, req, parallel_builds):
# type: (InstallRequirement, bool) -> Distribution
assert req.link
link = req.link
if link.is_wheel and self.wheel_download_dir:
# Download wheels to a dedicated dir when doing `pip wheel`.
download_dir = self.wheel_download_dir
else:
download_dir = self.download_dir
download_dir = self._get_download_dir(link)
with indent_log():
self._ensure_link_req_src_dir(req, download_dir, parallel_builds)
try:
local_file = unpack_url(
link, req.source_dir, self.downloader, download_dir,
hashes=self._get_linked_req_hashes(req)
)
except NetworkConnectionError as exc:
raise InstallationError(
'Could not install requirement {} because of HTTP '
'error {} for URL {}'.format(req, exc, link)
)
hashes = self._get_linked_req_hashes(req)
if link.url not in self._downloaded:
try:
local_file = unpack_url(
link, req.source_dir, self._download,
download_dir, hashes,
)
except NetworkConnectionError as exc:
raise InstallationError(
'Could not install requirement {} because of HTTP '
'error {} for URL {}'.format(req, exc, link)
)
else:
file_path, content_type = self._downloaded[link.url]
if hashes:
hashes.check_against_path(file_path)
local_file = File(file_path, content_type)
# For use in later processing, preserve the file path on the
# requirement.

View File

@ -160,9 +160,8 @@ class Resolver(BaseResolver):
req_set.add_named_requirement(ireq)
for actual_req in req_set.all_requirements:
self.factory.preparer.prepare_linked_requirement_more(actual_req)
reqs = req_set.all_requirements
self.factory.preparer.prepare_linked_requirements_more(reqs)
return req_set
def get_installation_order(self, req_set):

View File

@ -10,11 +10,7 @@ from pip._internal.exceptions import HashMismatch
from pip._internal.models.link import Link
from pip._internal.network.download import Downloader
from pip._internal.network.session import PipSession
from pip._internal.operations.prepare import (
_copy_source_tree,
_download_http_url,
unpack_url,
)
from pip._internal.operations.prepare import _copy_source_tree, unpack_url
from pip._internal.utils.hashes import Hashes
from pip._internal.utils.urls import path_to_url
from tests.lib.filesystem import (
@ -39,7 +35,7 @@ def test_unpack_url_with_urllib_response_without_content_type(data):
session = Mock()
session.get = _fake_session_get
downloader = Downloader(session, progress_bar="on")
download = Downloader(session, progress_bar="on")
uri = path_to_url(data.packages.joinpath("simple-1.0.tar.gz"))
link = Link(uri)
@ -48,7 +44,7 @@ def test_unpack_url_with_urllib_response_without_content_type(data):
unpack_url(
link,
temp_dir,
downloader=downloader,
download=download,
download_dir=None,
)
assert set(os.listdir(temp_dir)) == {
@ -79,16 +75,11 @@ def test_download_http_url__no_directory_traversal(mock_raise_for_status,
'content-disposition': 'attachment;filename="../out_dir_file"'
}
session.get.return_value = resp
downloader = Downloader(session, progress_bar="on")
download = Downloader(session, progress_bar="on")
download_dir = tmpdir.joinpath('download')
os.mkdir(download_dir)
file_path, content_type = _download_http_url(
link,
downloader,
download_dir,
hashes=None,
)
file_path, content_type = download(link, download_dir)
# The file should be downloaded to download_dir.
actual = os.listdir(download_dir)
assert actual == ['out_dir_file']
@ -187,11 +178,11 @@ class Test_unpack_url(object):
self.dist_path2 = data.packages.joinpath(self.dist_file2)
self.dist_url = Link(path_to_url(self.dist_path))
self.dist_url2 = Link(path_to_url(self.dist_path2))
self.no_downloader = Mock(side_effect=AssertionError)
self.no_download = Mock(side_effect=AssertionError)
def test_unpack_url_no_download(self, tmpdir, data):
self.prep(tmpdir, data)
unpack_url(self.dist_url, self.build_dir, self.no_downloader)
unpack_url(self.dist_url, self.build_dir, self.no_download)
assert os.path.isdir(os.path.join(self.build_dir, 'simple'))
assert not os.path.isfile(
os.path.join(self.download_dir, self.dist_file))
@ -207,7 +198,7 @@ class Test_unpack_url(object):
with pytest.raises(HashMismatch):
unpack_url(dist_url,
self.build_dir,
downloader=self.no_downloader,
download=self.no_download,
hashes=Hashes({'md5': ['bogus']}))
def test_unpack_url_thats_a_dir(self, tmpdir, data):
@ -215,7 +206,7 @@ class Test_unpack_url(object):
dist_path = data.packages.joinpath("FSPkg")
dist_url = Link(path_to_url(dist_path))
unpack_url(dist_url, self.build_dir,
downloader=self.no_downloader,
download=self.no_download,
download_dir=self.download_dir)
assert os.path.isdir(os.path.join(self.build_dir, 'fspkg'))

View File

@ -18,7 +18,6 @@ from pip._internal.exceptions import (
InvalidWheelFilename,
PreviousBuildDirError,
)
from pip._internal.network.download import Downloader
from pip._internal.network.session import PipSession
from pip._internal.operations.prepare import RequirementPreparer
from pip._internal.req import InstallRequirement, RequirementSet
@ -87,7 +86,7 @@ class TestRequirementSet(object):
build_isolation=True,
req_tracker=tracker,
session=session,
downloader=Downloader(session, progress_bar="on"),
progress_bar='on',
finder=finder,
require_hashes=require_hashes,
use_user_site=False,