Cached or already-downloaded files with bad hashes are ignored.

This commit is contained in:
Carl Meyer 2013-05-30 16:43:58 -06:00
parent 388d1b53a7
commit b3a791223c
3 changed files with 91 additions and 6 deletions

View File

@ -573,7 +573,25 @@ def unpack_http_url(link, location, download_cache, download_dir=None):
if link.hash:
download_hash = _get_hash_from_file(temp_location, link)
logger.notify('File was already downloaded %s' % already_downloaded)
else:
if download_hash:
try:
_check_hash(download_hash, link)
except HashMismatch:
logger.warn(
'Cached or previously-downloaded file %s has bad hash, '
're-downloading.' % temp_location
)
temp_location = None
if already_downloaded:
os.unlink(already_downloaded)
already_downloaded = None
if already_cached:
os.unlink(cache_file)
os.unlink(cache_content_type_file)
already_cached = False
if not temp_location:
resp = _get_response_from_url(target_url, link)
content_type = resp.info().get('content-type', '')
filename = link.filename # fallback
@ -595,6 +613,7 @@ def unpack_http_url(link, location, download_cache, download_dir=None):
filename += ext
temp_location = os.path.join(temp_dir, filename)
download_hash = _download_url(resp, link, temp_location)
if link.hash and link.hash_name:
_check_hash(download_hash, link)
if download_dir and not already_downloaded:
@ -602,7 +621,7 @@ def unpack_http_url(link, location, download_cache, download_dir=None):
unpack_file(temp_location, location, content_type, link)
if cache_file and cache_file != temp_location:
cache_download(cache_file, temp_location, content_type)
if cache_file is None and not already_downloaded:
if not (already_cached or already_downloaded):
os.unlink(temp_location)
os.rmdir(temp_dir)

View File

@ -582,7 +582,6 @@ def cache_download(target_file, temp_location, content_type):
fp = open(target_file+'.content-type', 'w')
fp.write(content_type)
fp.close()
os.unlink(temp_location)
def unpack_file(filename, location, content_type, link):

View File

@ -1,9 +1,11 @@
import hashlib
import os
from shutil import rmtree
from tempfile import mkdtemp
import pip
from mock import patch
import pip
from pip.backwardcompat import urllib, BytesIO, b
from pip.download import (_get_response_from_url as _get_response_from_url_original,
path_to_url2, unpack_http_url, URLOpener)
from pip.index import Link
@ -36,13 +38,78 @@ def test_user_agent():
assert user_agent[1].startswith("pip/%s" % pip.__version__)
def test_bad_cache_checksum():
def _write_file(fn, contents):
with open(fn, 'w') as fh:
fh.write(contents)
class MockResponse(object):
def __init__(self, contents):
self._io = BytesIO(contents)
def read(self, *a, **kw):
return self._io.read(*a, **kw)
@patch('pip.download.unpack_file')
@patch('pip.download._get_response_from_url')
def test_unpack_http_url_bad_cache_checksum(mock_get_response, mock_unpack_file):
"""
If cached download has bad checksum, re-download.
"""
base_url = 'http://www.example.com/somepackage.tgz'
contents = b('downloaded')
download_hash = hashlib.new('sha1', contents)
link = Link(base_url + '#sha1=' + download_hash.hexdigest())
response = mock_get_response.return_value = MockResponse(contents)
response.info = lambda: {'content-type': 'application/x-tar'}
response.geturl = lambda: base_url
cache_dir = mkdtemp()
try:
cache_file = os.path.join(cache_dir, urllib.quote(base_url, ''))
cache_ct_file = cache_file + '.content-type'
_write_file(cache_file, 'some contents')
_write_file(cache_ct_file, 'application/x-tar')
unpack_http_url(link, 'location', download_cache=cache_dir)
# despite existence of cached file with bad hash, downloaded again
mock_get_response.assert_called_once_with(base_url, link)
# cached file is replaced with newly downloaded file
with open(cache_file) as fh:
assert fh.read() == 'downloaded'
finally:
rmtree(cache_dir)
def test_bad_already_downloaded_checksum():
@patch('pip.download.unpack_file')
@patch('pip.download._get_response_from_url')
def test_unpack_http_url_bad_downloaded_checksum(mock_get_response, mock_unpack_file):
"""
If already-downloaded file has bad checksum, re-download.
"""
base_url = 'http://www.example.com/somepackage.tgz'
contents = b('downloaded')
download_hash = hashlib.new('sha1', contents)
link = Link(base_url + '#sha1=' + download_hash.hexdigest())
response = mock_get_response.return_value = MockResponse(contents)
response.info = lambda: {'content-type': 'application/x-tar'}
response.geturl = lambda: base_url
download_dir = mkdtemp()
try:
downloaded_file = os.path.join(download_dir, 'somepackage.tgz')
_write_file(downloaded_file, 'some contents')
unpack_http_url(link, 'location', download_cache=None, download_dir=download_dir)
# despite existence of downloaded file with bad hash, downloaded again
mock_get_response.assert_called_once_with(base_url, link)
# cached file is replaced with newly downloaded file
with open(downloaded_file) as fh:
assert fh.read() == 'downloaded'
finally:
rmtree(download_dir)