Merge pull request #8737 from McSinyx/dl-many

This commit is contained in:
Pradyun Gedam 2020-08-14 10:46:17 +05:30 committed by GitHub
commit 4312b940b4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 128 additions and 108 deletions

View File

@ -16,7 +16,6 @@ from pip._internal.exceptions import CommandError, PreviousBuildDirError
from pip._internal.index.collector import LinkCollector from pip._internal.index.collector import LinkCollector
from pip._internal.index.package_finder import PackageFinder from pip._internal.index.package_finder import PackageFinder
from pip._internal.models.selection_prefs import SelectionPreferences from pip._internal.models.selection_prefs import SelectionPreferences
from pip._internal.network.download import Downloader
from pip._internal.network.session import PipSession from pip._internal.network.session import PipSession
from pip._internal.operations.prepare import RequirementPreparer from pip._internal.operations.prepare import RequirementPreparer
from pip._internal.req.constructors import ( from pip._internal.req.constructors import (
@ -213,8 +212,6 @@ class RequirementCommand(IndexGroupCommand):
""" """
Create a RequirementPreparer instance for the given parameters. Create a RequirementPreparer instance for the given parameters.
""" """
downloader = Downloader(session, progress_bar=options.progress_bar)
temp_build_dir_path = temp_build_dir.path temp_build_dir_path = temp_build_dir.path
assert temp_build_dir_path is not None assert temp_build_dir_path is not None
@ -239,7 +236,7 @@ class RequirementCommand(IndexGroupCommand):
build_isolation=options.build_isolation, build_isolation=options.build_isolation,
req_tracker=req_tracker, req_tracker=req_tracker,
session=session, session=session,
downloader=downloader, progress_bar=options.progress_bar,
finder=finder, finder=finder,
require_hashes=options.require_hashes, require_hashes=options.require_hashes,
use_user_site=use_user_site, use_user_site=use_user_site,

View File

@ -24,7 +24,7 @@ from pip._internal.utils.misc import (
from pip._internal.utils.typing import MYPY_CHECK_RUNNING from pip._internal.utils.typing import MYPY_CHECK_RUNNING
if MYPY_CHECK_RUNNING: if MYPY_CHECK_RUNNING:
from typing import Iterable, Optional from typing import Iterable, Optional, Tuple
from pip._vendor.requests.models import Response from pip._vendor.requests.models import Response
@ -141,19 +141,6 @@ def _http_get_download(session, link):
return resp return resp
class Download(object):
def __init__(
self,
response, # type: Response
filename, # type: str
chunks, # type: Iterable[bytes]
):
# type: (...) -> None
self.response = response
self.filename = filename
self.chunks = chunks
class Downloader(object): class Downloader(object):
def __init__( def __init__(
self, self,
@ -164,8 +151,9 @@ class Downloader(object):
self._session = session self._session = session
self._progress_bar = progress_bar self._progress_bar = progress_bar
def __call__(self, link): def __call__(self, link, location):
# type: (Link) -> Download # type: (Link, str) -> Tuple[str, str]
"""Download the file given by link into location."""
try: try:
resp = _http_get_download(self._session, link) resp = _http_get_download(self._session, link)
except NetworkConnectionError as e: except NetworkConnectionError as e:
@ -175,8 +163,48 @@ class Downloader(object):
) )
raise raise
return Download( filename = _get_http_response_filename(resp, link)
resp, filepath = os.path.join(location, filename)
_get_http_response_filename(resp, link),
_prepare_download(resp, link, self._progress_bar), chunks = _prepare_download(resp, link, self._progress_bar)
) with open(filepath, 'wb') as content_file:
for chunk in chunks:
content_file.write(chunk)
content_type = resp.headers.get('Content-Type', '')
return filepath, content_type
class BatchDownloader(object):
def __init__(
self,
session, # type: PipSession
progress_bar, # type: str
):
# type: (...) -> None
self._session = session
self._progress_bar = progress_bar
def __call__(self, links, location):
# type: (Iterable[Link], str) -> Iterable[Tuple[str, Tuple[str, str]]]
"""Download the files given by links into location."""
for link in links:
try:
resp = _http_get_download(self._session, link)
except NetworkConnectionError as e:
assert e.response is not None
logger.critical(
"HTTP error %s while getting %s",
e.response.status_code, link,
)
raise
filename = _get_http_response_filename(resp, link)
filepath = os.path.join(location, filename)
chunks = _prepare_download(resp, link, self._progress_bar)
with open(filepath, 'wb') as content_file:
for chunk in chunks:
content_file.write(chunk)
content_type = resp.headers.get('Content-Type', '')
yield link.url, (filepath, content_type)

View File

@ -26,6 +26,7 @@ from pip._internal.exceptions import (
VcsHashUnsupported, VcsHashUnsupported,
) )
from pip._internal.models.wheel import Wheel from pip._internal.models.wheel import Wheel
from pip._internal.network.download import BatchDownloader, Downloader
from pip._internal.network.lazy_wheel import ( from pip._internal.network.lazy_wheel import (
HTTPRangeRequestUnsupported, HTTPRangeRequestUnsupported,
dist_from_wheel_url, dist_from_wheel_url,
@ -45,16 +46,13 @@ from pip._internal.utils.unpacking import unpack_file
from pip._internal.vcs import vcs from pip._internal.vcs import vcs
if MYPY_CHECK_RUNNING: if MYPY_CHECK_RUNNING:
from typing import ( from typing import Callable, Dict, Iterable, List, Optional, Tuple
Callable, List, Optional, Tuple,
)
from mypy_extensions import TypedDict from mypy_extensions import TypedDict
from pip._vendor.pkg_resources import Distribution from pip._vendor.pkg_resources import Distribution
from pip._internal.index.package_finder import PackageFinder from pip._internal.index.package_finder import PackageFinder
from pip._internal.models.link import Link from pip._internal.models.link import Link
from pip._internal.network.download import Downloader
from pip._internal.network.session import PipSession from pip._internal.network.session import PipSession
from pip._internal.req.req_install import InstallRequirement from pip._internal.req.req_install import InstallRequirement
from pip._internal.req.req_tracker import RequirementTracker from pip._internal.req.req_tracker import RequirementTracker
@ -106,15 +104,19 @@ def unpack_vcs_link(link, location):
class File(object): class File(object):
def __init__(self, path, content_type): def __init__(self, path, content_type):
# type: (str, str) -> None # type: (str, Optional[str]) -> None
self.path = path self.path = path
self.content_type = content_type if content_type is None:
self.content_type = mimetypes.guess_type(path)[0]
else:
self.content_type = content_type
def get_http_url( def get_http_url(
link, # type: Link link, # type: Link
downloader, # type: Downloader download, # type: Downloader
download_dir=None, # type: Optional[str] download_dir=None, # type: Optional[str]
hashes=None, # type: Optional[Hashes] hashes=None, # type: Optional[Hashes]
): ):
@ -129,12 +131,12 @@ def get_http_url(
if already_downloaded_path: if already_downloaded_path:
from_path = already_downloaded_path from_path = already_downloaded_path
content_type = mimetypes.guess_type(from_path)[0] content_type = None
else: else:
# let's download to a tmp dir # let's download to a tmp dir
from_path, content_type = _download_http_url( from_path, content_type = download(link, temp_dir.path)
link, downloader, temp_dir.path, hashes if hashes:
) hashes.check_against_path(from_path)
return File(from_path, content_type) return File(from_path, content_type)
@ -219,16 +221,13 @@ def get_file_url(
# one; no internet-sourced hash will be in `hashes`. # one; no internet-sourced hash will be in `hashes`.
if hashes: if hashes:
hashes.check_against_path(from_path) hashes.check_against_path(from_path)
return File(from_path, None)
content_type = mimetypes.guess_type(from_path)[0]
return File(from_path, content_type)
def unpack_url( def unpack_url(
link, # type: Link link, # type: Link
location, # type: str location, # type: str
downloader, # type: Downloader download, # type: Downloader
download_dir=None, # type: Optional[str] download_dir=None, # type: Optional[str]
hashes=None, # type: Optional[Hashes] hashes=None, # type: Optional[Hashes]
): ):
@ -260,7 +259,7 @@ def unpack_url(
else: else:
file = get_http_url( file = get_http_url(
link, link,
downloader, download,
download_dir, download_dir,
hashes=hashes, hashes=hashes,
) )
@ -273,27 +272,6 @@ def unpack_url(
return file return file
def _download_http_url(
link, # type: Link
downloader, # type: Downloader
temp_dir, # type: str
hashes, # type: Optional[Hashes]
):
# type: (...) -> Tuple[str, str]
"""Download link url into temp_dir using provided session"""
download = downloader(link)
file_path = os.path.join(temp_dir, download.filename)
with open(file_path, 'wb') as content_file:
for chunk in download.chunks:
content_file.write(chunk)
if hashes:
hashes.check_against_path(file_path)
return file_path, download.response.headers.get('content-type', '')
def _check_download_dir(link, download_dir, hashes): def _check_download_dir(link, download_dir, hashes):
# type: (Link, str, Optional[Hashes]) -> Optional[str] # type: (Link, str, Optional[Hashes]) -> Optional[str]
""" Check download_dir for previously downloaded file with correct hash """ Check download_dir for previously downloaded file with correct hash
@ -333,7 +311,7 @@ class RequirementPreparer(object):
build_isolation, # type: bool build_isolation, # type: bool
req_tracker, # type: RequirementTracker req_tracker, # type: RequirementTracker
session, # type: PipSession session, # type: PipSession
downloader, # type: Downloader progress_bar, # type: str
finder, # type: PackageFinder finder, # type: PackageFinder
require_hashes, # type: bool require_hashes, # type: bool
use_user_site, # type: bool use_user_site, # type: bool
@ -346,7 +324,8 @@ class RequirementPreparer(object):
self.build_dir = build_dir self.build_dir = build_dir
self.req_tracker = req_tracker self.req_tracker = req_tracker
self._session = session self._session = session
self.downloader = downloader self._download = Downloader(session, progress_bar)
self._batch_download = BatchDownloader(session, progress_bar)
self.finder = finder self.finder = finder
# Where still-packed archives should be written to. If None, they are # Where still-packed archives should be written to. If None, they are
@ -375,6 +354,9 @@ class RequirementPreparer(object):
# Should wheels be downloaded lazily? # Should wheels be downloaded lazily?
self.use_lazy_wheel = lazy_wheel self.use_lazy_wheel = lazy_wheel
# Memoized downloaded files, as mapping of url: (path, mime type)
self._downloaded = {} # type: Dict[str, Tuple[str, str]]
@property @property
def _download_should_save(self): def _download_should_save(self):
# type: () -> bool # type: () -> bool
@ -398,6 +380,13 @@ class RequirementPreparer(object):
else: else:
logger.info('Collecting %s', req.req or req) logger.info('Collecting %s', req.req or req)
def _get_download_dir(self, link):
# type: (Link) -> Optional[str]
if link.is_wheel and self.wheel_download_dir:
# Download wheels to a dedicated dir when doing `pip wheel`.
return self.wheel_download_dir
return self.download_dir
def _ensure_link_req_src_dir(self, req, download_dir, parallel_builds): def _ensure_link_req_src_dir(self, req, download_dir, parallel_builds):
# type: (InstallRequirement, Optional[str], bool) -> None # type: (InstallRequirement, Optional[str], bool) -> None
"""Ensure source_dir of a linked InstallRequirement.""" """Ensure source_dir of a linked InstallRequirement."""
@ -503,35 +492,52 @@ class RequirementPreparer(object):
return wheel_dist return wheel_dist
return self._prepare_linked_requirement(req, parallel_builds) return self._prepare_linked_requirement(req, parallel_builds)
def prepare_linked_requirement_more(self, req, parallel_builds=False): def prepare_linked_requirements_more(self, reqs, parallel_builds=False):
# type: (InstallRequirement, bool) -> None # type: (Iterable[InstallRequirement], bool) -> None
"""Prepare a linked requirement more, if needed.""" """Prepare a linked requirement more, if needed."""
if not req.needs_more_preparation: reqs = [req for req in reqs if req.needs_more_preparation]
return links = [] # type: List[Link]
self._prepare_linked_requirement(req, parallel_builds) for req in reqs:
download_dir = self._get_download_dir(req.link)
if download_dir is not None:
hashes = self._get_linked_req_hashes(req)
file_path = _check_download_dir(req.link, download_dir, hashes)
if download_dir is None or file_path is None:
links.append(req.link)
else:
self._downloaded[req.link.url] = file_path, None
# Let's download to a temporary directory.
tmpdir = TempDirectory(kind="unpack", globally_managed=True).path
self._downloaded.update(self._batch_download(links, tmpdir))
for req in reqs:
self._prepare_linked_requirement(req, parallel_builds)
def _prepare_linked_requirement(self, req, parallel_builds): def _prepare_linked_requirement(self, req, parallel_builds):
# type: (InstallRequirement, bool) -> Distribution # type: (InstallRequirement, bool) -> Distribution
assert req.link assert req.link
link = req.link link = req.link
if link.is_wheel and self.wheel_download_dir: download_dir = self._get_download_dir(link)
# Download wheels to a dedicated dir when doing `pip wheel`.
download_dir = self.wheel_download_dir
else:
download_dir = self.download_dir
with indent_log(): with indent_log():
self._ensure_link_req_src_dir(req, download_dir, parallel_builds) self._ensure_link_req_src_dir(req, download_dir, parallel_builds)
try: hashes = self._get_linked_req_hashes(req)
local_file = unpack_url( if link.url not in self._downloaded:
link, req.source_dir, self.downloader, download_dir, try:
hashes=self._get_linked_req_hashes(req) local_file = unpack_url(
) link, req.source_dir, self._download,
except NetworkConnectionError as exc: download_dir, hashes,
raise InstallationError( )
'Could not install requirement {} because of HTTP ' except NetworkConnectionError as exc:
'error {} for URL {}'.format(req, exc, link) raise InstallationError(
) 'Could not install requirement {} because of HTTP '
'error {} for URL {}'.format(req, exc, link)
)
else:
file_path, content_type = self._downloaded[link.url]
if hashes:
hashes.check_against_path(file_path)
local_file = File(file_path, content_type)
# For use in later processing, preserve the file path on the # For use in later processing, preserve the file path on the
# requirement. # requirement.

View File

@ -160,9 +160,8 @@ class Resolver(BaseResolver):
req_set.add_named_requirement(ireq) req_set.add_named_requirement(ireq)
for actual_req in req_set.all_requirements: reqs = req_set.all_requirements
self.factory.preparer.prepare_linked_requirement_more(actual_req) self.factory.preparer.prepare_linked_requirements_more(reqs)
return req_set return req_set
def get_installation_order(self, req_set): def get_installation_order(self, req_set):

View File

@ -10,11 +10,7 @@ from pip._internal.exceptions import HashMismatch
from pip._internal.models.link import Link from pip._internal.models.link import Link
from pip._internal.network.download import Downloader from pip._internal.network.download import Downloader
from pip._internal.network.session import PipSession from pip._internal.network.session import PipSession
from pip._internal.operations.prepare import ( from pip._internal.operations.prepare import _copy_source_tree, unpack_url
_copy_source_tree,
_download_http_url,
unpack_url,
)
from pip._internal.utils.hashes import Hashes from pip._internal.utils.hashes import Hashes
from pip._internal.utils.urls import path_to_url from pip._internal.utils.urls import path_to_url
from tests.lib.filesystem import ( from tests.lib.filesystem import (
@ -39,7 +35,7 @@ def test_unpack_url_with_urllib_response_without_content_type(data):
session = Mock() session = Mock()
session.get = _fake_session_get session.get = _fake_session_get
downloader = Downloader(session, progress_bar="on") download = Downloader(session, progress_bar="on")
uri = path_to_url(data.packages.joinpath("simple-1.0.tar.gz")) uri = path_to_url(data.packages.joinpath("simple-1.0.tar.gz"))
link = Link(uri) link = Link(uri)
@ -48,7 +44,7 @@ def test_unpack_url_with_urllib_response_without_content_type(data):
unpack_url( unpack_url(
link, link,
temp_dir, temp_dir,
downloader=downloader, download=download,
download_dir=None, download_dir=None,
) )
assert set(os.listdir(temp_dir)) == { assert set(os.listdir(temp_dir)) == {
@ -79,16 +75,11 @@ def test_download_http_url__no_directory_traversal(mock_raise_for_status,
'content-disposition': 'attachment;filename="../out_dir_file"' 'content-disposition': 'attachment;filename="../out_dir_file"'
} }
session.get.return_value = resp session.get.return_value = resp
downloader = Downloader(session, progress_bar="on") download = Downloader(session, progress_bar="on")
download_dir = tmpdir.joinpath('download') download_dir = tmpdir.joinpath('download')
os.mkdir(download_dir) os.mkdir(download_dir)
file_path, content_type = _download_http_url( file_path, content_type = download(link, download_dir)
link,
downloader,
download_dir,
hashes=None,
)
# The file should be downloaded to download_dir. # The file should be downloaded to download_dir.
actual = os.listdir(download_dir) actual = os.listdir(download_dir)
assert actual == ['out_dir_file'] assert actual == ['out_dir_file']
@ -187,11 +178,11 @@ class Test_unpack_url(object):
self.dist_path2 = data.packages.joinpath(self.dist_file2) self.dist_path2 = data.packages.joinpath(self.dist_file2)
self.dist_url = Link(path_to_url(self.dist_path)) self.dist_url = Link(path_to_url(self.dist_path))
self.dist_url2 = Link(path_to_url(self.dist_path2)) self.dist_url2 = Link(path_to_url(self.dist_path2))
self.no_downloader = Mock(side_effect=AssertionError) self.no_download = Mock(side_effect=AssertionError)
def test_unpack_url_no_download(self, tmpdir, data): def test_unpack_url_no_download(self, tmpdir, data):
self.prep(tmpdir, data) self.prep(tmpdir, data)
unpack_url(self.dist_url, self.build_dir, self.no_downloader) unpack_url(self.dist_url, self.build_dir, self.no_download)
assert os.path.isdir(os.path.join(self.build_dir, 'simple')) assert os.path.isdir(os.path.join(self.build_dir, 'simple'))
assert not os.path.isfile( assert not os.path.isfile(
os.path.join(self.download_dir, self.dist_file)) os.path.join(self.download_dir, self.dist_file))
@ -207,7 +198,7 @@ class Test_unpack_url(object):
with pytest.raises(HashMismatch): with pytest.raises(HashMismatch):
unpack_url(dist_url, unpack_url(dist_url,
self.build_dir, self.build_dir,
downloader=self.no_downloader, download=self.no_download,
hashes=Hashes({'md5': ['bogus']})) hashes=Hashes({'md5': ['bogus']}))
def test_unpack_url_thats_a_dir(self, tmpdir, data): def test_unpack_url_thats_a_dir(self, tmpdir, data):
@ -215,7 +206,7 @@ class Test_unpack_url(object):
dist_path = data.packages.joinpath("FSPkg") dist_path = data.packages.joinpath("FSPkg")
dist_url = Link(path_to_url(dist_path)) dist_url = Link(path_to_url(dist_path))
unpack_url(dist_url, self.build_dir, unpack_url(dist_url, self.build_dir,
downloader=self.no_downloader, download=self.no_download,
download_dir=self.download_dir) download_dir=self.download_dir)
assert os.path.isdir(os.path.join(self.build_dir, 'fspkg')) assert os.path.isdir(os.path.join(self.build_dir, 'fspkg'))

View File

@ -18,7 +18,6 @@ from pip._internal.exceptions import (
InvalidWheelFilename, InvalidWheelFilename,
PreviousBuildDirError, PreviousBuildDirError,
) )
from pip._internal.network.download import Downloader
from pip._internal.network.session import PipSession from pip._internal.network.session import PipSession
from pip._internal.operations.prepare import RequirementPreparer from pip._internal.operations.prepare import RequirementPreparer
from pip._internal.req import InstallRequirement, RequirementSet from pip._internal.req import InstallRequirement, RequirementSet
@ -87,7 +86,7 @@ class TestRequirementSet(object):
build_isolation=True, build_isolation=True,
req_tracker=tracker, req_tracker=tracker,
session=session, session=session,
downloader=Downloader(session, progress_bar="on"), progress_bar='on',
finder=finder, finder=finder,
require_hashes=require_hashes, require_hashes=require_hashes,
use_user_site=False, use_user_site=False,