mirror of https://github.com/pypa/pip
Merge pull request #6671 from cjerdonek/make-subprocess-error-non-ascii-cmd
Handle non-ascii commands in Python 2 in make_subprocess_output_error()
This commit is contained in:
commit
0d5a98390e
|
@ -15,7 +15,7 @@ from pip._vendor.urllib3.util import IS_PYOPENSSL
|
|||
from pip._internal.utils.typing import MYPY_CHECK_RUNNING
|
||||
|
||||
if MYPY_CHECK_RUNNING:
|
||||
from typing import Tuple, Text
|
||||
from typing import Optional, Text, Tuple, Union
|
||||
|
||||
try:
|
||||
import _ssl # noqa
|
||||
|
@ -83,18 +83,29 @@ else:
|
|||
backslashreplace_decode = "backslashreplace_decode"
|
||||
|
||||
|
||||
def console_to_str(data):
|
||||
# type: (bytes) -> Text
|
||||
"""Return a string, safe for output, of subprocess output.
|
||||
|
||||
We assume the data is in the locale preferred encoding.
|
||||
If it won't decode properly, we warn the user but decode as
|
||||
best we can.
|
||||
|
||||
We also ensure that the output can be safely written to
|
||||
standard output without encoding errors.
|
||||
def str_to_display(data, desc=None):
|
||||
# type: (Union[bytes, Text], Optional[str]) -> Text
|
||||
"""
|
||||
For display or logging purposes, convert a bytes object (or text) to
|
||||
text (e.g. unicode in Python 2) safe for output.
|
||||
|
||||
:param desc: An optional phrase describing the input data, for use in
|
||||
the log message if a warning is logged. Defaults to "Bytes object".
|
||||
|
||||
This function should never error out and so can take a best effort
|
||||
approach. It is okay to be lossy if needed since the return value is
|
||||
just for display.
|
||||
|
||||
We assume the data is in the locale preferred encoding. If it won't
|
||||
decode properly, we warn the user but decode as best we can.
|
||||
|
||||
We also ensure that the output can be safely written to standard output
|
||||
without encoding errors.
|
||||
"""
|
||||
if isinstance(data, text_type):
|
||||
return data
|
||||
|
||||
# Otherwise, data is a bytes object (str in Python 2).
|
||||
# First, get the encoding we assume. This is the preferred
|
||||
# encoding for the locale, unless that is not found, or
|
||||
# it is ASCII, in which case assume UTF-8
|
||||
|
@ -107,10 +118,10 @@ def console_to_str(data):
|
|||
try:
|
||||
decoded_data = data.decode(encoding)
|
||||
except UnicodeDecodeError:
|
||||
logger.warning(
|
||||
"Subprocess output does not appear to be encoded as %s",
|
||||
encoding,
|
||||
)
|
||||
if desc is None:
|
||||
desc = 'Bytes object'
|
||||
msg_format = '{} does not appear to be encoded as %s'.format(desc)
|
||||
logger.warning(msg_format, encoding)
|
||||
decoded_data = data.decode(encoding, errors=backslashreplace_decode)
|
||||
|
||||
# Make sure we can print the output, by encoding it to the output
|
||||
|
@ -138,6 +149,13 @@ def console_to_str(data):
|
|||
return decoded_data
|
||||
|
||||
|
||||
def console_to_str(data):
|
||||
# type: (bytes) -> Text
|
||||
"""Return a string, safe for output, of subprocess output.
|
||||
"""
|
||||
return str_to_display(data, desc='Subprocess output')
|
||||
|
||||
|
||||
if sys.version_info >= (3,):
|
||||
def native_str(s, replace=False):
|
||||
# type: (str, bool) -> str
|
||||
|
|
|
@ -35,7 +35,7 @@ from pip._internal.locations import (
|
|||
write_delete_marker_file,
|
||||
)
|
||||
from pip._internal.utils.compat import (
|
||||
WINDOWS, console_to_str, expanduser, stdlib_pkgs,
|
||||
WINDOWS, console_to_str, expanduser, stdlib_pkgs, str_to_display,
|
||||
)
|
||||
from pip._internal.utils.typing import MYPY_CHECK_RUNNING
|
||||
|
||||
|
@ -751,19 +751,25 @@ def make_subprocess_output_error(
|
|||
:param lines: A list of lines, each ending with a newline.
|
||||
"""
|
||||
command = format_command_args(cmd_args)
|
||||
# Convert `command` to text (unicode in Python 2) so we can use it as
|
||||
# an argument in the unicode format string below. This avoids
|
||||
# "UnicodeDecodeError: 'ascii' codec can't decode byte ..." in Python 2
|
||||
# when the formatted command contains a non-ascii character.
|
||||
command_display = str_to_display(command, desc='command bytes')
|
||||
|
||||
# We know the joined output value ends in a newline.
|
||||
output = ''.join(lines)
|
||||
msg = (
|
||||
# We need to mark this explicitly as a unicode string to avoid
|
||||
# "UnicodeEncodeError: 'ascii' codec can't encode character ..."
|
||||
# errors in Python 2 since e.g. `output` is a unicode string.
|
||||
# Use a unicode string to avoid "UnicodeEncodeError: 'ascii'
|
||||
# codec can't encode character ..." in Python 2 when a format
|
||||
# argument (e.g. `output`) has a non-ascii character.
|
||||
u'Command errored out with exit status {exit_status}:\n'
|
||||
' command: {command}\n'
|
||||
' command: {command_display}\n'
|
||||
' cwd: {cwd}\n'
|
||||
'Complete output ({line_count} lines):\n{output}{divider}'
|
||||
).format(
|
||||
exit_status=exit_status,
|
||||
command=command,
|
||||
command_display=command_display,
|
||||
cwd=cwd,
|
||||
line_count=len(lines),
|
||||
output=output,
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
import locale
|
||||
import os
|
||||
|
||||
|
@ -5,7 +7,7 @@ import pytest
|
|||
|
||||
import pip._internal.utils.compat as pip_compat
|
||||
from pip._internal.utils.compat import (
|
||||
console_to_str, expanduser, get_path_uid, native_str,
|
||||
console_to_str, expanduser, get_path_uid, native_str, str_to_display,
|
||||
)
|
||||
|
||||
|
||||
|
@ -45,6 +47,58 @@ def test_get_path_uid_symlink_without_NOFOLLOW(tmpdir, monkeypatch):
|
|||
get_path_uid(fs)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('data, expected', [
|
||||
('abc', u'abc'),
|
||||
# Test text (unicode in Python 2) input.
|
||||
(u'abc', u'abc'),
|
||||
# Test text input with non-ascii characters.
|
||||
(u'déf', u'déf'),
|
||||
])
|
||||
def test_str_to_display(data, expected):
|
||||
actual = str_to_display(data)
|
||||
assert actual == expected, (
|
||||
# Show the encoding for easier troubleshooting.
|
||||
'encoding: {!r}'.format(locale.getpreferredencoding())
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('data, encoding, expected', [
|
||||
# Test str input with non-ascii characters.
|
||||
('déf', 'utf-8', u'déf'),
|
||||
# Test bytes input with non-ascii characters:
|
||||
(u'déf'.encode('utf-8'), 'utf-8', u'déf'),
|
||||
# Test a Windows encoding.
|
||||
(u'déf'.encode('cp1252'), 'cp1252', u'déf'),
|
||||
# Test a Windows encoding with incompatibly encoded text.
|
||||
(u'déf'.encode('utf-8'), 'cp1252', u'déf'),
|
||||
])
|
||||
def test_str_to_display__encoding(monkeypatch, data, encoding, expected):
|
||||
monkeypatch.setattr(locale, 'getpreferredencoding', lambda: encoding)
|
||||
actual = str_to_display(data)
|
||||
assert actual == expected, (
|
||||
# Show the encoding for easier troubleshooting.
|
||||
'encoding: {!r}'.format(locale.getpreferredencoding())
|
||||
)
|
||||
|
||||
|
||||
def test_str_to_display__decode_error(monkeypatch, caplog):
|
||||
monkeypatch.setattr(locale, 'getpreferredencoding', lambda: 'utf-8')
|
||||
# Encode with an incompatible encoding.
|
||||
data = u'ab'.encode('utf-16')
|
||||
actual = str_to_display(data)
|
||||
|
||||
assert actual == u'\\xff\\xfea\x00b\x00', (
|
||||
# Show the encoding for easier troubleshooting.
|
||||
'encoding: {!r}'.format(locale.getpreferredencoding())
|
||||
)
|
||||
assert len(caplog.records) == 1
|
||||
record = caplog.records[0]
|
||||
assert record.levelname == 'WARNING'
|
||||
assert record.message == (
|
||||
'Bytes object does not appear to be encoded as utf-8'
|
||||
)
|
||||
|
||||
|
||||
def test_console_to_str(monkeypatch):
|
||||
some_bytes = b"a\xE9\xC3\xE9b"
|
||||
encodings = ('ascii', 'utf-8', 'iso-8859-1', 'iso-8859-5',
|
||||
|
|
|
@ -6,6 +6,7 @@ util tests
|
|||
"""
|
||||
import codecs
|
||||
import itertools
|
||||
import locale
|
||||
import os
|
||||
import shutil
|
||||
import stat
|
||||
|
@ -767,10 +768,38 @@ def test_make_subprocess_output_error():
|
|||
assert actual == expected, 'actual: {}'.format(actual)
|
||||
|
||||
|
||||
# This test is mainly important for checking unicode in Python 2.
|
||||
def test_make_subprocess_output_error__unicode():
|
||||
def test_make_subprocess_output_error__non_ascii_command_arg(monkeypatch):
|
||||
"""
|
||||
Test a line with non-ascii unicode characters.
|
||||
Test a command argument with a non-ascii character.
|
||||
"""
|
||||
cmd_args = ['foo', 'déf']
|
||||
if sys.version_info[0] == 2:
|
||||
# Check in Python 2 that the str (bytes object) with the non-ascii
|
||||
# character has the encoding we expect. (This comes from the source
|
||||
# code encoding at the top of the file.)
|
||||
assert cmd_args[1].decode('utf-8') == u'déf'
|
||||
|
||||
# We need to monkeypatch so the encoding will be correct on Windows.
|
||||
monkeypatch.setattr(locale, 'getpreferredencoding', lambda: 'utf-8')
|
||||
actual = make_subprocess_output_error(
|
||||
cmd_args=cmd_args,
|
||||
cwd='/path/to/cwd',
|
||||
lines=[],
|
||||
exit_status=1,
|
||||
)
|
||||
expected = dedent(u"""\
|
||||
Command errored out with exit status 1:
|
||||
command: foo 'déf'
|
||||
cwd: /path/to/cwd
|
||||
Complete output (0 lines):
|
||||
----------------------------------------""")
|
||||
assert actual == expected, u'actual: {}'.format(actual)
|
||||
|
||||
|
||||
# This test is mainly important for checking unicode in Python 2.
|
||||
def test_make_subprocess_output_error__non_ascii_line():
|
||||
"""
|
||||
Test a line with a non-ascii character.
|
||||
"""
|
||||
lines = [u'curly-quote: \u2018\n']
|
||||
actual = make_subprocess_output_error(
|
||||
|
|
Loading…
Reference in New Issue