mirror of https://github.com/pypa/pip
utils: decode requirement files according to their BOM if present
This commit is contained in:
parent
0c73957b6d
commit
e2889268bd
|
@ -29,6 +29,7 @@ from pip.models import PyPI
|
|||
from pip.utils import (splitext, rmtree, format_size, display_path,
|
||||
backup_dir, ask_path_exists, unpack_file,
|
||||
ARCHIVE_EXTENSIONS, consume, call_subprocess)
|
||||
from pip.utils.encoding import auto_decode
|
||||
from pip.utils.filesystem import check_path_owner
|
||||
from pip.utils.logging import indent_log
|
||||
from pip.utils.setuptools_build import SETUPTOOLS_SHIM
|
||||
|
@ -413,8 +414,8 @@ def get_file_content(url, comes_from=None, session=None):
|
|||
else:
|
||||
return resp.url, resp.content
|
||||
try:
|
||||
with open(url) as f:
|
||||
content = f.read()
|
||||
with open(url, 'rb') as f:
|
||||
content = auto_decode(f.read())
|
||||
except IOError as exc:
|
||||
raise InstallationError(
|
||||
'Could not open requirements file: %s' % str(exc)
|
||||
|
|
|
@ -0,0 +1,23 @@
|
|||
import codecs
|
||||
import locale
|
||||
|
||||
|
||||
BOMS = [
|
||||
(codecs.BOM_UTF8, 'utf8'),
|
||||
(codecs.BOM_UTF16, 'utf16'),
|
||||
(codecs.BOM_UTF16_BE, 'utf16-be'),
|
||||
(codecs.BOM_UTF16_LE, 'utf16-le'),
|
||||
(codecs.BOM_UTF32, 'utf32'),
|
||||
(codecs.BOM_UTF32_BE, 'utf32-be'),
|
||||
(codecs.BOM_UTF32_LE, 'utf32-le'),
|
||||
]
|
||||
|
||||
|
||||
def auto_decode(data):
|
||||
"""Check a bytes string for a BOM to correctly detect the encoding
|
||||
|
||||
Fallback to locale.getpreferredencoding(False) like open() on Python3"""
|
||||
for bom, encoding in BOMS:
|
||||
if data.startswith(bom):
|
||||
return data[len(bom):].decode(encoding)
|
||||
return data.decode(locale.getpreferredencoding(False))
|
|
@ -1,3 +1,5 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
util tests
|
||||
|
||||
|
@ -15,6 +17,7 @@ from mock import Mock, patch
|
|||
from pip.exceptions import HashMismatch, HashMissing, InstallationError
|
||||
from pip.utils import (egg_link_path, get_installed_distributions,
|
||||
untar_file, unzip_file, rmtree, normalize_path)
|
||||
from pip.utils.encoding import auto_decode
|
||||
from pip.utils.hashes import Hashes, MissingHashes
|
||||
from pip._vendor.six import BytesIO
|
||||
|
||||
|
@ -447,3 +450,18 @@ class TestHashes(object):
|
|||
assert Hashes({'sha256': 'dummy'})
|
||||
assert not Hashes()
|
||||
assert not Hashes({})
|
||||
|
||||
|
||||
class TestEncoding(object):
|
||||
"""Tests for pip.utils.encoding"""
|
||||
|
||||
def test_auto_decode_utf16_le(self):
|
||||
data = (
|
||||
b'\xff\xfeD\x00j\x00a\x00n\x00g\x00o\x00=\x00'
|
||||
b'=\x001\x00.\x004\x00.\x002\x00'
|
||||
)
|
||||
assert auto_decode(data) == "Django==1.4.2"
|
||||
|
||||
def test_auto_decode_utf8_no_bom(self):
|
||||
data = u"Fort de café"
|
||||
assert auto_decode(data.encode('utf8')) == data
|
||||
|
|
Loading…
Reference in New Issue