Merge pull request #8223 from uranusjr/unicode-wheel

This commit is contained in:
Pradyun Gedam 2020-05-19 18:02:06 +05:30 committed by GitHub
commit 15f0863a65
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 159 additions and 72 deletions

2
news/5712.bugfix Normal file
View File

@ -0,0 +1,2 @@
Correctly treat wheels contenting non-ASCII file contents so they can be
installed on Windows.

View File

@ -1,9 +1,6 @@
"""Support for installing and building the "wheel" binary package format.
"""
# The following comment should be removed at some point in the future.
# mypy: strict-optional=False
from __future__ import absolute_import
import collections
@ -24,7 +21,14 @@ from zipfile import ZipFile
from pip._vendor import pkg_resources
from pip._vendor.distlib.scripts import ScriptMaker
from pip._vendor.distlib.util import get_export_entry
from pip._vendor.six import StringIO
from pip._vendor.six import (
PY2,
StringIO,
ensure_str,
ensure_text,
itervalues,
text_type,
)
from pip._internal.exceptions import InstallationError
from pip._internal.locations import get_major_minor_version
@ -43,28 +47,35 @@ if not MYPY_CHECK_RUNNING:
from pip._internal.utils.typing import cast
else:
from email.message import Message
import typing # noqa F401
from typing import (
Dict, List, Optional, Sequence, Tuple, Any,
Iterable, Iterator, Callable, Set, IO, cast
Any,
Callable,
Dict,
IO,
Iterable,
Iterator,
List,
NewType,
Optional,
Sequence,
Set,
Tuple,
Union,
cast,
)
from pip._internal.models.scheme import Scheme
from pip._internal.utils.filesystem import NamedTemporaryFileResult
InstalledCSVRow = Tuple[str, ...]
RecordPath = NewType('RecordPath', text_type)
InstalledCSVRow = Tuple[RecordPath, str, Union[int, str]]
logger = logging.getLogger(__name__)
def normpath(src, p):
# type: (str, str) -> str
return os.path.relpath(src, p).replace(os.path.sep, '/')
def rehash(path, blocksize=1 << 20):
# type: (str, int) -> Tuple[str, str]
# type: (text_type, int) -> Tuple[str, str]
"""Return (encoded_digest, length) for path using hashlib.sha256()"""
h, length = hash_file(path, blocksize)
digest = 'sha256=' + urlsafe_b64encode(
@ -79,14 +90,14 @@ def csv_io_kwargs(mode):
"""Return keyword arguments to properly open a CSV file
in the given mode.
"""
if sys.version_info.major < 3:
if PY2:
return {'mode': '{}b'.format(mode)}
else:
return {'mode': mode, 'newline': ''}
return {'mode': mode, 'newline': '', 'encoding': 'utf-8'}
def fix_script(path):
# type: (str) -> Optional[bool]
# type: (text_type) -> Optional[bool]
"""Replace #!python with #!/path/to/python
Return True if file was changed.
"""
@ -217,9 +228,12 @@ def message_about_scripts_not_on_PATH(scripts):
return "\n".join(msg_lines)
def sorted_outrows(outrows):
# type: (Iterable[InstalledCSVRow]) -> List[InstalledCSVRow]
"""Return the given rows of a RECORD file in sorted order.
def _normalized_outrows(outrows):
# type: (Iterable[InstalledCSVRow]) -> List[Tuple[str, str, str]]
"""Normalize the given rows of a RECORD file.
Items in each row are converted into str. Rows are then sorted to make
the value more predictable for tests.
Each row is a 3-tuple (path, hash, size) and corresponds to a record of
a RECORD file (see PEP 376 and PEP 427 for details). For the rows
@ -234,13 +248,35 @@ def sorted_outrows(outrows):
# coerce each element to a string to avoid a TypeError in this case.
# For additional background, see--
# https://github.com/pypa/pip/issues/5868
return sorted(outrows, key=lambda row: tuple(str(x) for x in row))
return sorted(
(ensure_str(record_path, encoding='utf-8'), hash_, str(size))
for record_path, hash_, size in outrows
)
def _record_to_fs_path(record_path):
# type: (RecordPath) -> text_type
return record_path
def _fs_to_record_path(path, relative_to=None):
# type: (text_type, Optional[text_type]) -> RecordPath
if relative_to is not None:
path = os.path.relpath(path, relative_to)
path = path.replace(os.path.sep, '/')
return cast('RecordPath', path)
def _parse_record_path(record_column):
# type: (str) -> RecordPath
p = ensure_text(record_column, encoding='utf-8')
return cast('RecordPath', p)
def get_csv_rows_for_installed(
old_csv_rows, # type: Iterable[List[str]]
installed, # type: Dict[str, str]
changed, # type: Set[str]
installed, # type: Dict[RecordPath, RecordPath]
changed, # type: Set[RecordPath]
generated, # type: List[str]
lib_dir, # type: str
):
@ -255,21 +291,20 @@ def get_csv_rows_for_installed(
logger.warning(
'RECORD line has more than three elements: {}'.format(row)
)
# Make a copy because we are mutating the row.
row = list(row)
old_path = row[0]
new_path = installed.pop(old_path, old_path)
row[0] = new_path
if new_path in changed:
digest, length = rehash(new_path)
row[1] = digest
row[2] = length
installed_rows.append(tuple(row))
old_record_path = _parse_record_path(row[0])
new_record_path = installed.pop(old_record_path, old_record_path)
if new_record_path in changed:
digest, length = rehash(_record_to_fs_path(new_record_path))
else:
digest = row[1] if len(row) > 1 else ''
length = row[2] if len(row) > 2 else ''
installed_rows.append((new_record_path, digest, length))
for f in generated:
path = _fs_to_record_path(f, lib_dir)
digest, length = rehash(f)
installed_rows.append((normpath(f, lib_dir), digest, str(length)))
for f in installed:
installed_rows.append((installed[f], '', ''))
installed_rows.append((path, digest, length))
for installed_record_path in itervalues(installed):
installed_rows.append((installed_record_path, '', ''))
return installed_rows
@ -338,8 +373,8 @@ def install_unpacked_wheel(
# installed = files copied from the wheel to the destination
# changed = files changed while installing (scripts #! line typically)
# generated = files newly generated during the install (script wrappers)
installed = {} # type: Dict[str, str]
changed = set()
installed = {} # type: Dict[RecordPath, RecordPath]
changed = set() # type: Set[RecordPath]
generated = [] # type: List[str]
# Compile all of the pyc files that we're going to be installing
@ -351,20 +386,20 @@ def install_unpacked_wheel(
logger.debug(stdout.getvalue())
def record_installed(srcfile, destfile, modified=False):
# type: (str, str, bool) -> None
# type: (text_type, text_type, bool) -> None
"""Map archive RECORD paths to installation RECORD paths."""
oldpath = normpath(srcfile, wheeldir)
newpath = normpath(destfile, lib_dir)
oldpath = _fs_to_record_path(srcfile, wheeldir)
newpath = _fs_to_record_path(destfile, lib_dir)
installed[oldpath] = newpath
if modified:
changed.add(destfile)
changed.add(_fs_to_record_path(destfile))
def clobber(
source, # type: str
dest, # type: str
source, # type: text_type
dest, # type: text_type
is_base, # type: bool
fixer=None, # type: Optional[Callable[[str], Any]]
filter=None # type: Optional[Callable[[str], bool]]
fixer=None, # type: Optional[Callable[[text_type], Any]]
filter=None # type: Optional[Callable[[text_type], bool]]
):
# type: (...) -> None
ensure_dir(dest) # common for the 'include' path
@ -423,7 +458,11 @@ def install_unpacked_wheel(
changed = fixer(destfile)
record_installed(srcfile, destfile, changed)
clobber(source, lib_dir, True)
clobber(
ensure_text(source, encoding=sys.getfilesystemencoding()),
ensure_text(lib_dir, encoding=sys.getfilesystemencoding()),
True,
)
dest_info_dir = os.path.join(lib_dir, info_dir)
@ -432,7 +471,7 @@ def install_unpacked_wheel(
console, gui = get_entrypoints(ep_file)
def is_entrypoint_wrapper(name):
# type: (str) -> bool
# type: (text_type) -> bool
# EP, EP.exe and EP-script.py are scripts generated for
# entry point EP by setuptools
if name.lower().endswith('.exe'):
@ -456,7 +495,13 @@ def install_unpacked_wheel(
filter = is_entrypoint_wrapper
source = os.path.join(wheeldir, datadir, subdir)
dest = getattr(scheme, subdir)
clobber(source, dest, False, fixer=fixer, filter=filter)
clobber(
ensure_text(source, encoding=sys.getfilesystemencoding()),
ensure_text(dest, encoding=sys.getfilesystemencoding()),
False,
fixer=fixer,
filter=filter,
)
maker = PipScriptMaker(None, scheme.scripts)
@ -606,16 +651,11 @@ def install_unpacked_wheel(
generated=generated,
lib_dir=lib_dir)
with _generate_file(record_path, **csv_io_kwargs('w')) as record_file:
# The type mypy infers for record_file using reveal_type
# is different for Python 3 (typing.IO[Any]) and
# Python 2 (typing.BinaryIO), leading us to explicitly
# cast to typing.IO[str] as a workaround
# for bad Python 2 behaviour
record_file_obj = cast('IO[str]', record_file)
writer = csv.writer(record_file_obj)
writer.writerows(sorted_outrows(rows)) # sort to simplify testing
# The type mypy infers for record_file is different for Python 3
# (typing.IO[Any]) and Python 2 (typing.BinaryIO). We explicitly
# cast to typing.IO[str] as a workaround.
writer = csv.writer(cast('IO[str]', record_file))
writer.writerows(_normalized_outrows(rows))
def install_wheel(

View File

@ -131,7 +131,7 @@ def get_prog():
# Retry every half second for up to 3 seconds
@retry(stop_max_delay=3000, wait_fixed=500)
def rmtree(dir, ignore_errors=False):
# type: (str, bool) -> None
# type: (Text, bool) -> None
shutil.rmtree(dir, ignore_errors=ignore_errors,
onerror=rmtree_errorhandler)
@ -876,7 +876,7 @@ def is_console_interactive():
def hash_file(path, blocksize=1 << 20):
# type: (str, int) -> Tuple[Any, int]
# type: (Text, int) -> Tuple[Any, int]
"""Return (hash, length) for path using hashlib.sha256()
"""

View File

@ -8,6 +8,7 @@ import tempfile
from contextlib import contextmanager
from pip._vendor.contextlib2 import ExitStack
from pip._vendor.six import ensure_text
from pip._internal.utils.misc import enum, rmtree
from pip._internal.utils.typing import MYPY_CHECK_RUNNING
@ -193,7 +194,9 @@ class TempDirectory(object):
"""
self._deleted = True
if os.path.exists(self._path):
rmtree(self._path)
# Make sure to pass unicode on Python 2 to make the contents also
# use unicode, ensuring non-ASCII names and can be represented.
rmtree(ensure_text(self._path))
class AdjacentTempDirectory(TempDirectory):

View File

@ -1,3 +1,5 @@
# -*- coding: utf-8 -*-
import distutils
import glob
import os
@ -125,6 +127,36 @@ def test_basic_install_from_wheel_file(script, data):
result.stdout)
# Installation seems to work, but scripttest fails to check.
# I really don't care now since we're desupporting it soon anyway.
@skip_if_python2
def test_basic_install_from_unicode_wheel(script, data):
"""
Test installing from a wheel (that has a script)
"""
make_wheel(
'unicode_package',
'1.0',
extra_files={
'வணக்கம்/__init__.py': b'',
'வணக்கம்/નમસ્તે.py': b'',
},
).save_to_dir(script.scratch_path)
result = script.pip(
'install', 'unicode_package==1.0', '--no-index',
'--find-links', script.scratch_path,
)
dist_info_folder = script.site_packages / 'unicode_package-1.0.dist-info'
assert dist_info_folder in result.files_created, str(result)
file1 = script.site_packages.joinpath('வணக்கம்', '__init__.py')
assert file1 in result.files_created, str(result)
file2 = script.site_packages.joinpath('வணக்கம்', 'નમસ્તે.py')
assert file2 in result.files_created, str(result)
def test_install_from_wheel_with_headers(script, data):
"""
Test installing from a wheel file with headers

View File

@ -1,3 +1,5 @@
# -*- coding: utf-8 -*-
"""Tests for wheel binary packages and .dist-info."""
import csv
import logging
@ -114,8 +116,8 @@ def test_raise_for_invalid_entrypoint_fail(entrypoint):
@pytest.mark.parametrize("outrows, expected", [
([
('', '', 'a'),
('', '', ''),
(u'', '', 'a'),
(u'', '', ''),
], [
('', '', ''),
('', '', 'a'),
@ -123,15 +125,23 @@ def test_raise_for_invalid_entrypoint_fail(entrypoint):
([
# Include an int to check avoiding the following error:
# > TypeError: '<' not supported between instances of 'str' and 'int'
('', '', 1),
('', '', ''),
(u'', '', 1),
(u'', '', ''),
], [
('', '', ''),
('', '', 1),
('', '', '1'),
]),
([
# Test the normalization correctly encode everything for csv.writer().
(u'😉', '', 1),
(u'', '', ''),
], [
('', '', ''),
('😉', '', '1'),
]),
])
def test_sorted_outrows(outrows, expected):
actual = wheel.sorted_outrows(outrows)
def test_normalized_outrows(outrows, expected):
actual = wheel._normalized_outrows(outrows)
assert actual == expected
@ -141,7 +151,7 @@ def call_get_csv_rows_for_installed(tmpdir, text):
# Test that an installed file appearing in RECORD has its filename
# updated in the new RECORD file.
installed = {'a': 'z'}
installed = {u'a': 'z'}
changed = set()
generated = []
lib_dir = '/lib/dir'
@ -180,9 +190,9 @@ def test_get_csv_rows_for_installed__long_lines(tmpdir, caplog):
outrows = call_get_csv_rows_for_installed(tmpdir, text)
expected = [
('z', 'b', 'c', 'd'),
('z', 'b', 'c'),
('e', 'f', 'g'),
('h', 'i', 'j', 'k'),
('h', 'i', 'j'),
]
assert outrows == expected