1
1
Fork 0
mirror of https://github.com/pypa/pip synced 2023-12-13 21:30:23 +01:00

Strip slash before Windows drive letter in path (#10116)

Functions like urllib.parse.urlsplit() parses a file:// URL created from
a non-UNC Windows absolute path with a leading slash in the path
component:

>>> from pathlib import Path
>>> from urllib.parse import urlsplit
>>> path = Path("C:/Users/VssAdministrator")
>>> parsed = urlsplit(path.as_posix())
>>> parsed.path
'/C:/Users/VssAdministrator'

This value unfortunately does not play well with path functions like
open(), so we performs some additional cleanup to strip that leading
slash.

This commit also contains some minor cleanup to unify how Windows is
detected, and how a file:// URL is fetched.
This commit is contained in:
Tzu-ping Chung 2021-07-22 15:28:43 +08:00 committed by GitHub
parent 76cd70ac42
commit f6b184ccaf
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 42 additions and 11 deletions

3
news/10115.bugfix.rst Normal file
View file

@ -0,0 +1,3 @@
Strip leading slash from a ``file://`` URL built from an path with the Windows
drive notation. This fixes bugs where the ``file://`` URL cannot be correctly
used as requirement, constraint, or index URLs on Windows.

View file

@ -16,7 +16,7 @@ from pip._internal.models.search_scope import SearchScope
from pip._internal.network.session import PipSession
from pip._internal.network.utils import raise_for_status
from pip._internal.utils.encoding import auto_decode
from pip._internal.utils.urls import get_url_scheme, url_to_path
from pip._internal.utils.urls import get_url_scheme
if TYPE_CHECKING:
# NoReturn introduced in 3.6.2; imported only for type checking to maintain
@ -532,20 +532,16 @@ def get_file_content(url, session):
"""
scheme = get_url_scheme(url)
if scheme in ['http', 'https']:
# FIXME: catch some errors
# Pip has special support for file:// URLs (LocalFSAdapter).
if scheme in ['http', 'https', 'file']:
resp = session.get(url)
raise_for_status(resp)
return resp.url, resp.text
elif scheme == 'file':
url = url_to_path(url)
# Assume this is a bare path.
try:
with open(url, 'rb') as f:
content = auto_decode(f.read())
except OSError as exc:
raise InstallationError(
f'Could not open requirements file: {exc}'
)
raise InstallationError(f'Could not open requirements file: {exc}')
return url, content

View file

@ -1,9 +1,11 @@
import os
import sys
import string
import urllib.parse
import urllib.request
from typing import Optional
from .compat import WINDOWS
def get_url_scheme(url):
# type: (str) -> Optional[str]
@ -37,7 +39,7 @@ def url_to_path(url):
if not netloc or netloc == "localhost":
# According to RFC 8089, same as empty authority.
netloc = ""
elif sys.platform == "win32":
elif WINDOWS:
# If we have a UNC path, prepend UNC share notation.
netloc = "\\\\" + netloc
else:
@ -46,4 +48,18 @@ def url_to_path(url):
)
path = urllib.request.url2pathname(netloc + path)
# On Windows, urlsplit parses the path as something like "/C:/Users/foo".
# This creates issues for path-related functions like io.open(), so we try
# to detect and strip the leading slash.
if (
WINDOWS
and not netloc # Not UNC.
and len(path) >= 3
and path[0] == "/" # Leading slash to strip.
and path[1] in string.ascii_letters # Drive letter.
and path[2:4] in (":", ":/") # Colon + end of string, or colon + absolute path.
):
path = path[1:]
return path

View file

@ -67,6 +67,22 @@ def parse_reqfile(
)
def test_read_file_url(tmp_path):
reqs = tmp_path.joinpath("requirements.txt")
reqs.write_text("foo")
result = list(parse_requirements(reqs.as_posix(), session))
assert len(result) == 1, result
assert result[0].requirement == "foo"
# The comes_from value has three parts: -r or -c flag, path, and line.
# The path value in the middle needs some special logic due to our path
# normalization logic.
assert result[0].comes_from[:3] == "-r "
assert result[0].comes_from[-9:] == " (line 1)"
assert os.path.samefile(result[0].comes_from[3:-9], str(reqs))
class TestPreprocess:
"""tests for `preprocess`"""