mirror of
https://github.com/pypa/pip
synced 2023-12-13 21:30:23 +01:00
Merge pull request #6311 from hroncok/i6054
Fix utils.encoding.auto_decode() LookupError with invalid encodings
This commit is contained in:
commit
4589ed464e
4
news/6054.bugfix
Normal file
4
news/6054.bugfix
Normal file
|
@ -0,0 +1,4 @@
|
|||
Fix ``utils.encoding.auto_decode()`` ``LookupError`` with invalid encodings.
|
||||
``utils.encoding.auto_decode()`` was broken when decoding Big Endian BOM
|
||||
byte-strings on Little Endian or vice versa.
|
||||
|
|
@ -9,13 +9,13 @@ if MYPY_CHECK_RUNNING:
|
|||
from typing import List, Tuple, Text
|
||||
|
||||
BOMS = [
|
||||
(codecs.BOM_UTF8, 'utf8'),
|
||||
(codecs.BOM_UTF16, 'utf16'),
|
||||
(codecs.BOM_UTF16_BE, 'utf16-be'),
|
||||
(codecs.BOM_UTF16_LE, 'utf16-le'),
|
||||
(codecs.BOM_UTF32, 'utf32'),
|
||||
(codecs.BOM_UTF32_BE, 'utf32-be'),
|
||||
(codecs.BOM_UTF32_LE, 'utf32-le'),
|
||||
(codecs.BOM_UTF8, 'utf-8'),
|
||||
(codecs.BOM_UTF16, 'utf-16'),
|
||||
(codecs.BOM_UTF16_BE, 'utf-16-be'),
|
||||
(codecs.BOM_UTF16_LE, 'utf-16-le'),
|
||||
(codecs.BOM_UTF32, 'utf-32'),
|
||||
(codecs.BOM_UTF32_BE, 'utf-32-be'),
|
||||
(codecs.BOM_UTF32_LE, 'utf-32-le'),
|
||||
] # type: List[Tuple[bytes, Text]]
|
||||
|
||||
ENCODING_RE = re.compile(br'coding[:=]\s*([-\w.]+)')
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
util tests
|
||||
|
||||
"""
|
||||
import codecs
|
||||
import itertools
|
||||
import os
|
||||
import shutil
|
||||
|
@ -20,7 +21,7 @@ from mock import Mock, patch
|
|||
from pip._internal.exceptions import (
|
||||
HashMismatch, HashMissing, InstallationError, UnsupportedPythonVersion,
|
||||
)
|
||||
from pip._internal.utils.encoding import auto_decode
|
||||
from pip._internal.utils.encoding import BOMS, auto_decode
|
||||
from pip._internal.utils.glibc import check_glibc_version
|
||||
from pip._internal.utils.hashes import Hashes, MissingHashes
|
||||
from pip._internal.utils.misc import (
|
||||
|
@ -462,11 +463,20 @@ class TestHashes(object):
|
|||
class TestEncoding(object):
|
||||
"""Tests for pip._internal.utils.encoding"""
|
||||
|
||||
def test_auto_decode_utf16_le(self):
|
||||
def test_auto_decode_utf_16_le(self):
|
||||
data = (
|
||||
b'\xff\xfeD\x00j\x00a\x00n\x00g\x00o\x00=\x00'
|
||||
b'=\x001\x00.\x004\x00.\x002\x00'
|
||||
)
|
||||
assert data.startswith(codecs.BOM_UTF16_LE)
|
||||
assert auto_decode(data) == "Django==1.4.2"
|
||||
|
||||
def test_auto_decode_utf_16_be(self):
|
||||
data = (
|
||||
b'\xfe\xff\x00D\x00j\x00a\x00n\x00g\x00o\x00='
|
||||
b'\x00=\x001\x00.\x004\x00.\x002'
|
||||
)
|
||||
assert data.startswith(codecs.BOM_UTF16_BE)
|
||||
assert auto_decode(data) == "Django==1.4.2"
|
||||
|
||||
def test_auto_decode_no_bom(self):
|
||||
|
@ -486,6 +496,11 @@ class TestEncoding(object):
|
|||
ret = auto_decode(data.encode(sys.getdefaultencoding()))
|
||||
assert ret == data
|
||||
|
||||
@pytest.mark.parametrize('encoding', [encoding for bom, encoding in BOMS])
|
||||
def test_all_encodings_are_valid(self, encoding):
|
||||
# we really only care that there is no LookupError
|
||||
assert ''.encode(encoding).decode(encoding) == ''
|
||||
|
||||
|
||||
class TestTempDirectory(object):
|
||||
|
||||
|
|
Loading…
Reference in a new issue