1
1
Fork 0
mirror of https://github.com/pypa/pip synced 2023-12-13 21:30:23 +01:00

Merge pull request #8223 from uranusjr/unicode-wheel

This commit is contained in:
Pradyun Gedam 2020-05-19 18:02:06 +05:30 committed by GitHub
commit 15f0863a65
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 159 additions and 72 deletions

2
news/5712.bugfix Normal file
View file

@ -0,0 +1,2 @@
Correctly treat wheels contenting non-ASCII file contents so they can be
installed on Windows.

View file

@ -1,9 +1,6 @@
"""Support for installing and building the "wheel" binary package format. """Support for installing and building the "wheel" binary package format.
""" """
# The following comment should be removed at some point in the future.
# mypy: strict-optional=False
from __future__ import absolute_import from __future__ import absolute_import
import collections import collections
@ -24,7 +21,14 @@ from zipfile import ZipFile
from pip._vendor import pkg_resources from pip._vendor import pkg_resources
from pip._vendor.distlib.scripts import ScriptMaker from pip._vendor.distlib.scripts import ScriptMaker
from pip._vendor.distlib.util import get_export_entry from pip._vendor.distlib.util import get_export_entry
from pip._vendor.six import StringIO from pip._vendor.six import (
PY2,
StringIO,
ensure_str,
ensure_text,
itervalues,
text_type,
)
from pip._internal.exceptions import InstallationError from pip._internal.exceptions import InstallationError
from pip._internal.locations import get_major_minor_version from pip._internal.locations import get_major_minor_version
@ -43,28 +47,35 @@ if not MYPY_CHECK_RUNNING:
from pip._internal.utils.typing import cast from pip._internal.utils.typing import cast
else: else:
from email.message import Message from email.message import Message
import typing # noqa F401
from typing import ( from typing import (
Dict, List, Optional, Sequence, Tuple, Any, Any,
Iterable, Iterator, Callable, Set, IO, cast Callable,
Dict,
IO,
Iterable,
Iterator,
List,
NewType,
Optional,
Sequence,
Set,
Tuple,
Union,
cast,
) )
from pip._internal.models.scheme import Scheme from pip._internal.models.scheme import Scheme
from pip._internal.utils.filesystem import NamedTemporaryFileResult from pip._internal.utils.filesystem import NamedTemporaryFileResult
InstalledCSVRow = Tuple[str, ...] RecordPath = NewType('RecordPath', text_type)
InstalledCSVRow = Tuple[RecordPath, str, Union[int, str]]
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def normpath(src, p):
# type: (str, str) -> str
return os.path.relpath(src, p).replace(os.path.sep, '/')
def rehash(path, blocksize=1 << 20): def rehash(path, blocksize=1 << 20):
# type: (str, int) -> Tuple[str, str] # type: (text_type, int) -> Tuple[str, str]
"""Return (encoded_digest, length) for path using hashlib.sha256()""" """Return (encoded_digest, length) for path using hashlib.sha256()"""
h, length = hash_file(path, blocksize) h, length = hash_file(path, blocksize)
digest = 'sha256=' + urlsafe_b64encode( digest = 'sha256=' + urlsafe_b64encode(
@ -79,14 +90,14 @@ def csv_io_kwargs(mode):
"""Return keyword arguments to properly open a CSV file """Return keyword arguments to properly open a CSV file
in the given mode. in the given mode.
""" """
if sys.version_info.major < 3: if PY2:
return {'mode': '{}b'.format(mode)} return {'mode': '{}b'.format(mode)}
else: else:
return {'mode': mode, 'newline': ''} return {'mode': mode, 'newline': '', 'encoding': 'utf-8'}
def fix_script(path): def fix_script(path):
# type: (str) -> Optional[bool] # type: (text_type) -> Optional[bool]
"""Replace #!python with #!/path/to/python """Replace #!python with #!/path/to/python
Return True if file was changed. Return True if file was changed.
""" """
@ -217,9 +228,12 @@ def message_about_scripts_not_on_PATH(scripts):
return "\n".join(msg_lines) return "\n".join(msg_lines)
def sorted_outrows(outrows): def _normalized_outrows(outrows):
# type: (Iterable[InstalledCSVRow]) -> List[InstalledCSVRow] # type: (Iterable[InstalledCSVRow]) -> List[Tuple[str, str, str]]
"""Return the given rows of a RECORD file in sorted order. """Normalize the given rows of a RECORD file.
Items in each row are converted into str. Rows are then sorted to make
the value more predictable for tests.
Each row is a 3-tuple (path, hash, size) and corresponds to a record of Each row is a 3-tuple (path, hash, size) and corresponds to a record of
a RECORD file (see PEP 376 and PEP 427 for details). For the rows a RECORD file (see PEP 376 and PEP 427 for details). For the rows
@ -234,13 +248,35 @@ def sorted_outrows(outrows):
# coerce each element to a string to avoid a TypeError in this case. # coerce each element to a string to avoid a TypeError in this case.
# For additional background, see-- # For additional background, see--
# https://github.com/pypa/pip/issues/5868 # https://github.com/pypa/pip/issues/5868
return sorted(outrows, key=lambda row: tuple(str(x) for x in row)) return sorted(
(ensure_str(record_path, encoding='utf-8'), hash_, str(size))
for record_path, hash_, size in outrows
)
def _record_to_fs_path(record_path):
# type: (RecordPath) -> text_type
return record_path
def _fs_to_record_path(path, relative_to=None):
# type: (text_type, Optional[text_type]) -> RecordPath
if relative_to is not None:
path = os.path.relpath(path, relative_to)
path = path.replace(os.path.sep, '/')
return cast('RecordPath', path)
def _parse_record_path(record_column):
# type: (str) -> RecordPath
p = ensure_text(record_column, encoding='utf-8')
return cast('RecordPath', p)
def get_csv_rows_for_installed( def get_csv_rows_for_installed(
old_csv_rows, # type: Iterable[List[str]] old_csv_rows, # type: Iterable[List[str]]
installed, # type: Dict[str, str] installed, # type: Dict[RecordPath, RecordPath]
changed, # type: Set[str] changed, # type: Set[RecordPath]
generated, # type: List[str] generated, # type: List[str]
lib_dir, # type: str lib_dir, # type: str
): ):
@ -255,21 +291,20 @@ def get_csv_rows_for_installed(
logger.warning( logger.warning(
'RECORD line has more than three elements: {}'.format(row) 'RECORD line has more than three elements: {}'.format(row)
) )
# Make a copy because we are mutating the row. old_record_path = _parse_record_path(row[0])
row = list(row) new_record_path = installed.pop(old_record_path, old_record_path)
old_path = row[0] if new_record_path in changed:
new_path = installed.pop(old_path, old_path) digest, length = rehash(_record_to_fs_path(new_record_path))
row[0] = new_path else:
if new_path in changed: digest = row[1] if len(row) > 1 else ''
digest, length = rehash(new_path) length = row[2] if len(row) > 2 else ''
row[1] = digest installed_rows.append((new_record_path, digest, length))
row[2] = length
installed_rows.append(tuple(row))
for f in generated: for f in generated:
path = _fs_to_record_path(f, lib_dir)
digest, length = rehash(f) digest, length = rehash(f)
installed_rows.append((normpath(f, lib_dir), digest, str(length))) installed_rows.append((path, digest, length))
for f in installed: for installed_record_path in itervalues(installed):
installed_rows.append((installed[f], '', '')) installed_rows.append((installed_record_path, '', ''))
return installed_rows return installed_rows
@ -338,8 +373,8 @@ def install_unpacked_wheel(
# installed = files copied from the wheel to the destination # installed = files copied from the wheel to the destination
# changed = files changed while installing (scripts #! line typically) # changed = files changed while installing (scripts #! line typically)
# generated = files newly generated during the install (script wrappers) # generated = files newly generated during the install (script wrappers)
installed = {} # type: Dict[str, str] installed = {} # type: Dict[RecordPath, RecordPath]
changed = set() changed = set() # type: Set[RecordPath]
generated = [] # type: List[str] generated = [] # type: List[str]
# Compile all of the pyc files that we're going to be installing # Compile all of the pyc files that we're going to be installing
@ -351,20 +386,20 @@ def install_unpacked_wheel(
logger.debug(stdout.getvalue()) logger.debug(stdout.getvalue())
def record_installed(srcfile, destfile, modified=False): def record_installed(srcfile, destfile, modified=False):
# type: (str, str, bool) -> None # type: (text_type, text_type, bool) -> None
"""Map archive RECORD paths to installation RECORD paths.""" """Map archive RECORD paths to installation RECORD paths."""
oldpath = normpath(srcfile, wheeldir) oldpath = _fs_to_record_path(srcfile, wheeldir)
newpath = normpath(destfile, lib_dir) newpath = _fs_to_record_path(destfile, lib_dir)
installed[oldpath] = newpath installed[oldpath] = newpath
if modified: if modified:
changed.add(destfile) changed.add(_fs_to_record_path(destfile))
def clobber( def clobber(
source, # type: str source, # type: text_type
dest, # type: str dest, # type: text_type
is_base, # type: bool is_base, # type: bool
fixer=None, # type: Optional[Callable[[str], Any]] fixer=None, # type: Optional[Callable[[text_type], Any]]
filter=None # type: Optional[Callable[[str], bool]] filter=None # type: Optional[Callable[[text_type], bool]]
): ):
# type: (...) -> None # type: (...) -> None
ensure_dir(dest) # common for the 'include' path ensure_dir(dest) # common for the 'include' path
@ -423,7 +458,11 @@ def install_unpacked_wheel(
changed = fixer(destfile) changed = fixer(destfile)
record_installed(srcfile, destfile, changed) record_installed(srcfile, destfile, changed)
clobber(source, lib_dir, True) clobber(
ensure_text(source, encoding=sys.getfilesystemencoding()),
ensure_text(lib_dir, encoding=sys.getfilesystemencoding()),
True,
)
dest_info_dir = os.path.join(lib_dir, info_dir) dest_info_dir = os.path.join(lib_dir, info_dir)
@ -432,7 +471,7 @@ def install_unpacked_wheel(
console, gui = get_entrypoints(ep_file) console, gui = get_entrypoints(ep_file)
def is_entrypoint_wrapper(name): def is_entrypoint_wrapper(name):
# type: (str) -> bool # type: (text_type) -> bool
# EP, EP.exe and EP-script.py are scripts generated for # EP, EP.exe and EP-script.py are scripts generated for
# entry point EP by setuptools # entry point EP by setuptools
if name.lower().endswith('.exe'): if name.lower().endswith('.exe'):
@ -456,7 +495,13 @@ def install_unpacked_wheel(
filter = is_entrypoint_wrapper filter = is_entrypoint_wrapper
source = os.path.join(wheeldir, datadir, subdir) source = os.path.join(wheeldir, datadir, subdir)
dest = getattr(scheme, subdir) dest = getattr(scheme, subdir)
clobber(source, dest, False, fixer=fixer, filter=filter) clobber(
ensure_text(source, encoding=sys.getfilesystemencoding()),
ensure_text(dest, encoding=sys.getfilesystemencoding()),
False,
fixer=fixer,
filter=filter,
)
maker = PipScriptMaker(None, scheme.scripts) maker = PipScriptMaker(None, scheme.scripts)
@ -606,16 +651,11 @@ def install_unpacked_wheel(
generated=generated, generated=generated,
lib_dir=lib_dir) lib_dir=lib_dir)
with _generate_file(record_path, **csv_io_kwargs('w')) as record_file: with _generate_file(record_path, **csv_io_kwargs('w')) as record_file:
# The type mypy infers for record_file is different for Python 3
# The type mypy infers for record_file using reveal_type # (typing.IO[Any]) and Python 2 (typing.BinaryIO). We explicitly
# is different for Python 3 (typing.IO[Any]) and # cast to typing.IO[str] as a workaround.
# Python 2 (typing.BinaryIO), leading us to explicitly writer = csv.writer(cast('IO[str]', record_file))
# cast to typing.IO[str] as a workaround writer.writerows(_normalized_outrows(rows))
# for bad Python 2 behaviour
record_file_obj = cast('IO[str]', record_file)
writer = csv.writer(record_file_obj)
writer.writerows(sorted_outrows(rows)) # sort to simplify testing
def install_wheel( def install_wheel(

View file

@ -131,7 +131,7 @@ def get_prog():
# Retry every half second for up to 3 seconds # Retry every half second for up to 3 seconds
@retry(stop_max_delay=3000, wait_fixed=500) @retry(stop_max_delay=3000, wait_fixed=500)
def rmtree(dir, ignore_errors=False): def rmtree(dir, ignore_errors=False):
# type: (str, bool) -> None # type: (Text, bool) -> None
shutil.rmtree(dir, ignore_errors=ignore_errors, shutil.rmtree(dir, ignore_errors=ignore_errors,
onerror=rmtree_errorhandler) onerror=rmtree_errorhandler)
@ -876,7 +876,7 @@ def is_console_interactive():
def hash_file(path, blocksize=1 << 20): def hash_file(path, blocksize=1 << 20):
# type: (str, int) -> Tuple[Any, int] # type: (Text, int) -> Tuple[Any, int]
"""Return (hash, length) for path using hashlib.sha256() """Return (hash, length) for path using hashlib.sha256()
""" """

View file

@ -8,6 +8,7 @@ import tempfile
from contextlib import contextmanager from contextlib import contextmanager
from pip._vendor.contextlib2 import ExitStack from pip._vendor.contextlib2 import ExitStack
from pip._vendor.six import ensure_text
from pip._internal.utils.misc import enum, rmtree from pip._internal.utils.misc import enum, rmtree
from pip._internal.utils.typing import MYPY_CHECK_RUNNING from pip._internal.utils.typing import MYPY_CHECK_RUNNING
@ -193,7 +194,9 @@ class TempDirectory(object):
""" """
self._deleted = True self._deleted = True
if os.path.exists(self._path): if os.path.exists(self._path):
rmtree(self._path) # Make sure to pass unicode on Python 2 to make the contents also
# use unicode, ensuring non-ASCII names and can be represented.
rmtree(ensure_text(self._path))
class AdjacentTempDirectory(TempDirectory): class AdjacentTempDirectory(TempDirectory):

View file

@ -1,3 +1,5 @@
# -*- coding: utf-8 -*-
import distutils import distutils
import glob import glob
import os import os
@ -125,6 +127,36 @@ def test_basic_install_from_wheel_file(script, data):
result.stdout) result.stdout)
# Installation seems to work, but scripttest fails to check.
# I really don't care now since we're desupporting it soon anyway.
@skip_if_python2
def test_basic_install_from_unicode_wheel(script, data):
"""
Test installing from a wheel (that has a script)
"""
make_wheel(
'unicode_package',
'1.0',
extra_files={
'வணக்கம்/__init__.py': b'',
'வணக்கம்/નમસ્તે.py': b'',
},
).save_to_dir(script.scratch_path)
result = script.pip(
'install', 'unicode_package==1.0', '--no-index',
'--find-links', script.scratch_path,
)
dist_info_folder = script.site_packages / 'unicode_package-1.0.dist-info'
assert dist_info_folder in result.files_created, str(result)
file1 = script.site_packages.joinpath('வணக்கம்', '__init__.py')
assert file1 in result.files_created, str(result)
file2 = script.site_packages.joinpath('வணக்கம்', 'નમસ્તે.py')
assert file2 in result.files_created, str(result)
def test_install_from_wheel_with_headers(script, data): def test_install_from_wheel_with_headers(script, data):
""" """
Test installing from a wheel file with headers Test installing from a wheel file with headers

View file

@ -1,3 +1,5 @@
# -*- coding: utf-8 -*-
"""Tests for wheel binary packages and .dist-info.""" """Tests for wheel binary packages and .dist-info."""
import csv import csv
import logging import logging
@ -114,8 +116,8 @@ def test_raise_for_invalid_entrypoint_fail(entrypoint):
@pytest.mark.parametrize("outrows, expected", [ @pytest.mark.parametrize("outrows, expected", [
([ ([
('', '', 'a'), (u'', '', 'a'),
('', '', ''), (u'', '', ''),
], [ ], [
('', '', ''), ('', '', ''),
('', '', 'a'), ('', '', 'a'),
@ -123,15 +125,23 @@ def test_raise_for_invalid_entrypoint_fail(entrypoint):
([ ([
# Include an int to check avoiding the following error: # Include an int to check avoiding the following error:
# > TypeError: '<' not supported between instances of 'str' and 'int' # > TypeError: '<' not supported between instances of 'str' and 'int'
('', '', 1), (u'', '', 1),
('', '', ''), (u'', '', ''),
], [ ], [
('', '', ''), ('', '', ''),
('', '', 1), ('', '', '1'),
]),
([
# Test the normalization correctly encode everything for csv.writer().
(u'😉', '', 1),
(u'', '', ''),
], [
('', '', ''),
('😉', '', '1'),
]), ]),
]) ])
def test_sorted_outrows(outrows, expected): def test_normalized_outrows(outrows, expected):
actual = wheel.sorted_outrows(outrows) actual = wheel._normalized_outrows(outrows)
assert actual == expected assert actual == expected
@ -141,7 +151,7 @@ def call_get_csv_rows_for_installed(tmpdir, text):
# Test that an installed file appearing in RECORD has its filename # Test that an installed file appearing in RECORD has its filename
# updated in the new RECORD file. # updated in the new RECORD file.
installed = {'a': 'z'} installed = {u'a': 'z'}
changed = set() changed = set()
generated = [] generated = []
lib_dir = '/lib/dir' lib_dir = '/lib/dir'
@ -180,9 +190,9 @@ def test_get_csv_rows_for_installed__long_lines(tmpdir, caplog):
outrows = call_get_csv_rows_for_installed(tmpdir, text) outrows = call_get_csv_rows_for_installed(tmpdir, text)
expected = [ expected = [
('z', 'b', 'c', 'd'), ('z', 'b', 'c'),
('e', 'f', 'g'), ('e', 'f', 'g'),
('h', 'i', 'j', 'k'), ('h', 'i', 'j'),
] ]
assert outrows == expected assert outrows == expected