pip/tests/unit/test_download.py

342 lines
11 KiB
Python
Raw Normal View History

import hashlib
2013-05-28 23:58:08 +02:00
import os
from io import BytesIO
2017-05-16 12:16:30 +02:00
from shutil import copy, rmtree
2013-05-28 23:58:08 +02:00
from tempfile import mkdtemp
import pytest
from mock import Mock, patch
from pip._vendor.six.moves.urllib import request as urllib_request
import pip
from pip.download import (
2017-05-16 12:16:30 +02:00
MultiDomainBasicAuth, PipSession, SafeFileCache, path_to_url,
unpack_file_url, unpack_http_url, url_to_path
)
2017-05-16 12:16:30 +02:00
from pip.exceptions import HashMismatch
2013-05-28 23:58:08 +02:00
from pip.index import Link
Add checks against requirements-file-dwelling hashes for most kinds of packages. Close #1175. * Add --require-hashes option. This is handy in deployment scripts to force application authors to hash their requirements. It is also a convenient way to get pip to show computed hashes for a virgin, unhashed requirements file. Eventually, additions to `pip freeze` should fill a superset of this use case. * In --require-hashes mode, at least one hash is required to match for each requirement. * Option-based requirements (--sha256=...) turn on --require-hashes mode implicitly. * Internet-derived URL-based hashes are "necessary but not sufficient": they do not satisfy --require-hashes mode when they match, but they are still used to guard against transmission errors. * Other URL-based requirements (#md5=...) are treated just like flag-based ones, except they don't turn on --require-hashes. * Complain informatively, with the most devastating errors first so you don't chase your tail all day only to run up against a brick wall at the end. This also means we don't complain that a hash is missing, only for the user to find, after fixing it, that we have no idea how to even compute a hash for that type of requirement. * Complain about unpinned requirements when hash-checking mode is on, lest they cause the user surprise later. * Complain about missing hashes. * Complain about requirement types we don't know how to hash (like VCS ones and local dirs). * Have InstallRequirement keep its original Link around (original_link) so we can differentiate between URL hashes from requirements files and ones downloaded from the (untrustworthy) internet. * Remove test_download_hashes, which is obsolete. Similar coverage is provided in test_utils.TestHashes and the various hash cases in test_req.py.
2015-09-09 19:01:53 +02:00
from pip.utils.hashes import Hashes
from tests.lib import create_file
2013-05-28 23:58:08 +02:00
def test_unpack_http_url_with_urllib_response_without_content_type(data):
2013-05-28 23:58:08 +02:00
"""
It should download and unpack files even if no Content-Type header exists
"""
2013-08-16 14:04:27 +02:00
_real_session = PipSession()
def _fake_session_get(*args, **kwargs):
resp = _real_session.get(*args, **kwargs)
del resp.headers["Content-Type"]
2013-05-28 23:58:08 +02:00
return resp
2013-08-16 14:04:27 +02:00
session = Mock()
session.get = _fake_session_get
uri = path_to_url(data.packages.join("simple-1.0.tar.gz"))
2013-08-16 14:04:27 +02:00
link = Link(uri)
temp_dir = mkdtemp()
try:
unpack_http_url(
link,
temp_dir,
2013-08-16 14:04:27 +02:00
download_dir=None,
session=session,
)
assert set(os.listdir(temp_dir)) == set([
'PKG-INFO', 'setup.cfg', 'setup.py', 'simple', 'simple.egg-info'
])
2013-08-16 14:04:27 +02:00
finally:
rmtree(temp_dir)
2013-05-28 23:58:08 +02:00
def test_user_agent():
2013-08-16 14:04:27 +02:00
PipSession().headers["User-Agent"].startswith("pip/%s" % pip.__version__)
2013-05-30 23:03:04 +02:00
class FakeStream(object):
2013-08-16 14:04:27 +02:00
def __init__(self, contents):
self._io = BytesIO(contents)
def read(self, size, decode_content=None):
return self._io.read(size)
def stream(self, size, decode_content=None):
2013-08-16 14:04:27 +02:00
yield self._io.read(size)
class MockResponse(object):
def __init__(self, contents):
self.raw = FakeStream(contents)
2013-08-16 14:04:27 +02:00
def raise_for_status(self):
pass
@patch('pip.download.unpack_file')
2013-08-16 14:04:27 +02:00
def test_unpack_http_url_bad_downloaded_checksum(mock_unpack_file):
2013-05-30 23:03:04 +02:00
"""
If already-downloaded file has bad checksum, re-download.
"""
base_url = 'http://www.example.com/somepackage.tgz'
contents = b'downloaded'
download_hash = hashlib.new('sha1', contents)
link = Link(base_url + '#sha1=' + download_hash.hexdigest())
2013-08-16 14:04:27 +02:00
session = Mock()
session.get = Mock()
response = session.get.return_value = MockResponse(contents)
response.headers = {'content-type': 'application/x-tar'}
response.url = base_url
download_dir = mkdtemp()
try:
downloaded_file = os.path.join(download_dir, 'somepackage.tgz')
create_file(downloaded_file, 'some contents')
unpack_http_url(
link,
'location',
2013-08-16 14:04:27 +02:00
download_dir=download_dir,
session=session,
Add checks against requirements-file-dwelling hashes for most kinds of packages. Close #1175. * Add --require-hashes option. This is handy in deployment scripts to force application authors to hash their requirements. It is also a convenient way to get pip to show computed hashes for a virgin, unhashed requirements file. Eventually, additions to `pip freeze` should fill a superset of this use case. * In --require-hashes mode, at least one hash is required to match for each requirement. * Option-based requirements (--sha256=...) turn on --require-hashes mode implicitly. * Internet-derived URL-based hashes are "necessary but not sufficient": they do not satisfy --require-hashes mode when they match, but they are still used to guard against transmission errors. * Other URL-based requirements (#md5=...) are treated just like flag-based ones, except they don't turn on --require-hashes. * Complain informatively, with the most devastating errors first so you don't chase your tail all day only to run up against a brick wall at the end. This also means we don't complain that a hash is missing, only for the user to find, after fixing it, that we have no idea how to even compute a hash for that type of requirement. * Complain about unpinned requirements when hash-checking mode is on, lest they cause the user surprise later. * Complain about missing hashes. * Complain about requirement types we don't know how to hash (like VCS ones and local dirs). * Have InstallRequirement keep its original Link around (original_link) so we can differentiate between URL hashes from requirements files and ones downloaded from the (untrustworthy) internet. * Remove test_download_hashes, which is obsolete. Similar coverage is provided in test_utils.TestHashes and the various hash cases in test_req.py.
2015-09-09 19:01:53 +02:00
hashes=Hashes({'sha1': [download_hash.hexdigest()]})
2013-08-16 14:04:27 +02:00
)
# despite existence of downloaded file with bad hash, downloaded again
2013-08-16 14:04:27 +02:00
session.get.assert_called_once_with(
'http://www.example.com/somepackage.tgz',
headers={"Accept-Encoding": "identity"},
2013-08-16 14:04:27 +02:00
stream=True,
)
# cached file is replaced with newly downloaded file
with open(downloaded_file) as fh:
assert fh.read() == 'downloaded'
finally:
rmtree(download_dir)
@pytest.mark.skipif("sys.platform == 'win32'")
def test_path_to_url_unix():
assert path_to_url('/tmp/file') == 'file:///tmp/file'
path = os.path.join(os.getcwd(), 'file')
assert path_to_url('file') == 'file://' + urllib_request.pathname2url(path)
@pytest.mark.skipif("sys.platform == 'win32'")
def test_url_to_path_unix():
assert url_to_path('file:///tmp/file') == '/tmp/file'
@pytest.mark.skipif("sys.platform != 'win32'")
def test_path_to_url_win():
assert path_to_url('c:/tmp/file') == 'file:///C:/tmp/file'
assert path_to_url('c:\\tmp\\file') == 'file:///C:/tmp/file'
assert path_to_url(r'\\unc\as\path') == 'file://unc/as/path'
path = os.path.join(os.getcwd(), 'file')
assert path_to_url('file') == 'file:' + urllib_request.pathname2url(path)
@pytest.mark.skipif("sys.platform != 'win32'")
def test_url_to_path_win():
assert url_to_path('file:///c:/tmp/file') == 'C:\\tmp\\file'
assert url_to_path('file://unc/as/path') == r'\\unc\as\path'
@pytest.mark.skipif("sys.platform != 'win32'")
2014-12-18 17:52:26 +01:00
def test_url_to_path_path_to_url_symmetry_win():
path = r'C:\tmp\file'
assert url_to_path(path_to_url(path)) == path
unc_path = r'\\unc\share\path'
assert url_to_path(path_to_url(unc_path)) == unc_path
class Test_unpack_file_url(object):
def prep(self, tmpdir, data):
self.build_dir = tmpdir.join('build')
self.download_dir = tmpdir.join('download')
os.mkdir(self.build_dir)
os.mkdir(self.download_dir)
self.dist_file = "simple-1.0.tar.gz"
self.dist_file2 = "simple-2.0.tar.gz"
self.dist_path = data.packages.join(self.dist_file)
self.dist_path2 = data.packages.join(self.dist_file2)
self.dist_url = Link(path_to_url(self.dist_path))
self.dist_url2 = Link(path_to_url(self.dist_path2))
def test_unpack_file_url_no_download(self, tmpdir, data):
self.prep(tmpdir, data)
unpack_file_url(self.dist_url, self.build_dir)
assert os.path.isdir(os.path.join(self.build_dir, 'simple'))
assert not os.path.isfile(
os.path.join(self.download_dir, self.dist_file))
def test_unpack_file_url_and_download(self, tmpdir, data):
self.prep(tmpdir, data)
unpack_file_url(self.dist_url, self.build_dir,
download_dir=self.download_dir)
assert os.path.isdir(os.path.join(self.build_dir, 'simple'))
assert os.path.isfile(os.path.join(self.download_dir, self.dist_file))
def test_unpack_file_url_download_already_exists(self, tmpdir,
data, monkeypatch):
self.prep(tmpdir, data)
# add in previous download (copy simple-2.0 as simple-1.0)
# so we can tell it didn't get overwritten
dest_file = os.path.join(self.download_dir, self.dist_file)
copy(self.dist_path2, dest_file)
2017-05-24 06:29:07 +02:00
with open(self.dist_path2, 'rb') as f:
dist_path2_md5 = hashlib.md5(f.read()).hexdigest()
unpack_file_url(self.dist_url, self.build_dir,
download_dir=self.download_dir)
# our hash should be the same, i.e. not overwritten by simple-1.0 hash
2017-05-24 06:29:07 +02:00
with open(dest_file, 'rb') as f:
assert dist_path2_md5 == hashlib.md5(f.read()).hexdigest()
def test_unpack_file_url_bad_hash(self, tmpdir, data,
monkeypatch):
"""
Test when the file url hash fragment is wrong
"""
self.prep(tmpdir, data)
self.dist_url.url = "%s#md5=bogus" % self.dist_url.url
with pytest.raises(HashMismatch):
Add checks against requirements-file-dwelling hashes for most kinds of packages. Close #1175. * Add --require-hashes option. This is handy in deployment scripts to force application authors to hash their requirements. It is also a convenient way to get pip to show computed hashes for a virgin, unhashed requirements file. Eventually, additions to `pip freeze` should fill a superset of this use case. * In --require-hashes mode, at least one hash is required to match for each requirement. * Option-based requirements (--sha256=...) turn on --require-hashes mode implicitly. * Internet-derived URL-based hashes are "necessary but not sufficient": they do not satisfy --require-hashes mode when they match, but they are still used to guard against transmission errors. * Other URL-based requirements (#md5=...) are treated just like flag-based ones, except they don't turn on --require-hashes. * Complain informatively, with the most devastating errors first so you don't chase your tail all day only to run up against a brick wall at the end. This also means we don't complain that a hash is missing, only for the user to find, after fixing it, that we have no idea how to even compute a hash for that type of requirement. * Complain about unpinned requirements when hash-checking mode is on, lest they cause the user surprise later. * Complain about missing hashes. * Complain about requirement types we don't know how to hash (like VCS ones and local dirs). * Have InstallRequirement keep its original Link around (original_link) so we can differentiate between URL hashes from requirements files and ones downloaded from the (untrustworthy) internet. * Remove test_download_hashes, which is obsolete. Similar coverage is provided in test_utils.TestHashes and the various hash cases in test_req.py.
2015-09-09 19:01:53 +02:00
unpack_file_url(self.dist_url,
self.build_dir,
hashes=Hashes({'md5': ['bogus']}))
def test_unpack_file_url_download_bad_hash(self, tmpdir, data,
monkeypatch):
"""
Test when existing download has different hash from the file url
fragment
"""
self.prep(tmpdir, data)
# add in previous download (copy simple-2.0 as simple-1.0 so it's wrong
# hash)
dest_file = os.path.join(self.download_dir, self.dist_file)
copy(self.dist_path2, dest_file)
2017-05-24 06:29:07 +02:00
with open(self.dist_path, 'rb') as f:
dist_path_md5 = hashlib.md5(f.read()).hexdigest()
with open(dest_file, 'rb') as f:
dist_path2_md5 = hashlib.md5(f.read()).hexdigest()
assert dist_path_md5 != dist_path2_md5
self.dist_url.url = "%s#md5=%s" % (
self.dist_url.url,
dist_path_md5
)
unpack_file_url(self.dist_url, self.build_dir,
Add checks against requirements-file-dwelling hashes for most kinds of packages. Close #1175. * Add --require-hashes option. This is handy in deployment scripts to force application authors to hash their requirements. It is also a convenient way to get pip to show computed hashes for a virgin, unhashed requirements file. Eventually, additions to `pip freeze` should fill a superset of this use case. * In --require-hashes mode, at least one hash is required to match for each requirement. * Option-based requirements (--sha256=...) turn on --require-hashes mode implicitly. * Internet-derived URL-based hashes are "necessary but not sufficient": they do not satisfy --require-hashes mode when they match, but they are still used to guard against transmission errors. * Other URL-based requirements (#md5=...) are treated just like flag-based ones, except they don't turn on --require-hashes. * Complain informatively, with the most devastating errors first so you don't chase your tail all day only to run up against a brick wall at the end. This also means we don't complain that a hash is missing, only for the user to find, after fixing it, that we have no idea how to even compute a hash for that type of requirement. * Complain about unpinned requirements when hash-checking mode is on, lest they cause the user surprise later. * Complain about missing hashes. * Complain about requirement types we don't know how to hash (like VCS ones and local dirs). * Have InstallRequirement keep its original Link around (original_link) so we can differentiate between URL hashes from requirements files and ones downloaded from the (untrustworthy) internet. * Remove test_download_hashes, which is obsolete. Similar coverage is provided in test_utils.TestHashes and the various hash cases in test_req.py.
2015-09-09 19:01:53 +02:00
download_dir=self.download_dir,
hashes=Hashes({'md5': [dist_path_md5]}))
# confirm hash is for simple1-1.0
# the previous bad download has been removed
2017-05-24 06:29:07 +02:00
with open(dest_file, 'rb') as f:
assert hashlib.md5(f.read()).hexdigest() == dist_path_md5
def test_unpack_file_url_thats_a_dir(self, tmpdir, data):
self.prep(tmpdir, data)
dist_path = data.packages.join("FSPkg")
dist_url = Link(path_to_url(dist_path))
unpack_file_url(dist_url, self.build_dir,
download_dir=self.download_dir)
assert os.path.isdir(os.path.join(self.build_dir, 'fspkg'))
class TestSafeFileCache:
"""
The no_perms test are useless on Windows since SafeFileCache uses
pip.utils.filesystem.check_path_owner which is based on os.geteuid
which is absent on Windows.
"""
def test_cache_roundtrip(self, tmpdir):
cache_dir = tmpdir.join("test-cache")
cache_dir.makedirs()
cache = SafeFileCache(cache_dir)
assert cache.get("test key") is None
cache.set("test key", b"a test string")
assert cache.get("test key") == b"a test string"
cache.delete("test key")
assert cache.get("test key") is None
@pytest.mark.skipif("sys.platform == 'win32'")
def test_safe_get_no_perms(self, tmpdir, monkeypatch):
cache_dir = tmpdir.join("unreadable-cache")
cache_dir.makedirs()
os.chmod(cache_dir, 000)
monkeypatch.setattr(os.path, "exists", lambda x: True)
cache = SafeFileCache(cache_dir)
cache.get("foo")
@pytest.mark.skipif("sys.platform == 'win32'")
def test_safe_set_no_perms(self, tmpdir):
cache_dir = tmpdir.join("unreadable-cache")
cache_dir.makedirs()
os.chmod(cache_dir, 000)
cache = SafeFileCache(cache_dir)
cache.set("foo", b"bar")
@pytest.mark.skipif("sys.platform == 'win32'")
def test_safe_delete_no_perms(self, tmpdir):
cache_dir = tmpdir.join("unreadable-cache")
cache_dir.makedirs()
os.chmod(cache_dir, 000)
cache = SafeFileCache(cache_dir)
cache.delete("foo")
class TestPipSession:
def test_cache_defaults_off(self):
session = PipSession()
assert not hasattr(session.adapters["http://"], "cache")
assert not hasattr(session.adapters["https://"], "cache")
def test_cache_is_enabled(self, tmpdir):
session = PipSession(cache=tmpdir.join("test-cache"))
assert hasattr(session.adapters["https://"], "cache")
2015-02-24 13:46:10 +01:00
assert (session.adapters["https://"].cache.directory ==
tmpdir.join("test-cache"))
def test_http_cache_is_not_enabled(self, tmpdir):
session = PipSession(cache=tmpdir.join("test-cache"))
assert not hasattr(session.adapters["http://"], "cache")
def test_insecure_host_cache_is_not_enabled(self, tmpdir):
session = PipSession(
cache=tmpdir.join("test-cache"),
insecure_hosts=["example.com"],
)
assert not hasattr(session.adapters["https://example.com/"], "cache")
def test_parse_credentials():
auth = MultiDomainBasicAuth()
assert auth.parse_credentials("foo:bar@example.com") == ('foo', 'bar')
assert auth.parse_credentials("foo@example.com") == ('foo', None)
assert auth.parse_credentials("example.com") == (None, None)
# URL-encoded reserved characters:
assert auth.parse_credentials("user%3Aname:%23%40%5E@example.com") \
== ("user:name", "#@^")