1
1
Fork 0
mirror of https://github.com/pypa/pip synced 2023-12-13 21:30:23 +01:00

Use Accept-Encoding: identity when requesting files

This commit is contained in:
Donald Stufft 2014-03-26 10:58:46 -04:00
parent 6e173ebe3f
commit 22c562429a
3 changed files with 30 additions and 1 deletions

View file

@ -19,6 +19,10 @@
* Added a virtualenv-specific configuration file. (:pull:`1364`)
* Send `Accept-Encoding: identity` when downloading files in an attempt to
convince some servers who double compress the downloaded file to stop doing
so. (:pull:`1688`)
**1.5.4 (2014-02-21)**

View file

@ -582,7 +582,30 @@ def unpack_http_url(link, location, download_cache, download_dir=None,
# let's download to a tmp dir
if not temp_location:
try:
resp = session.get(target_url, stream=True)
resp = session.get(
target_url,
# We use Accept-Encoding: identity here because requests
# defaults to accepting compressed responses. This breaks in
# a variety of ways depending on how the server is configured.
# - Some servers will notice that the file isn't a compressible
# file and will leave the file alone and with an empty
# Content-Encoding
# - Some servers will notice that the file is already
# compressed and will leave the file alone and will add a
# Content-Encoding: gzip header
# - Some servers won't notice anything at all and will take
# a file that's already been compressed and compress it again
# and set the Content-Encoding: gzip header
# By setting this to request only the identity encoding We're
# hoping to eliminate the third case. Hopefully there does not
# exist a server which when given a file will notice it is
# already compressed and that you're not asking for a
# compressed file and will then decompress it before sending
# because if that's the case I don't think it'll ever be
# possible to make this work.
headers={"Accept-Encoding": "identity"},
stream=True,
)
resp.raise_for_status()
except requests.HTTPError as exc:
logger.fatal("HTTP error %s while getting %s" %

View file

@ -109,6 +109,7 @@ def test_unpack_http_url_bad_cache_checksum(mock_unpack_file):
# despite existence of cached file with bad hash, downloaded again
session.get.assert_called_once_with(
"http://www.example.com/somepackage.tgz",
headers={"Accept-Encoding": "identity"},
stream=True,
)
# cached file is replaced with newly downloaded file
@ -151,6 +152,7 @@ def test_unpack_http_url_bad_downloaded_checksum(mock_unpack_file):
# despite existence of downloaded file with bad hash, downloaded again
session.get.assert_called_once_with(
'http://www.example.com/somepackage.tgz',
headers={"Accept-Encoding": "identity"},
stream=True,
)
# cached file is replaced with newly downloaded file