Strip slash before Windows drive letter in path (#10116)

Functions like urllib.parse.urlsplit() parses a file:// URL created from a non-UNC Windows absolute path with a leading slash in the path component: >>> from pathlib import Path >>> from urllib.parse import urlsplit >>> path = Path("C:/Users/VssAdministrator") >>> parsed = urlsplit(path.as_posix()) >>> parsed.path '/C:/Users/VssAdministrator' This value unfortunately does not play well with path functions like open(), so we performs some additional cleanup to strip that leading slash. This commit also contains some minor cleanup to unify how Windows is detected, and how a file:// URL is fetched.
2023-12-13 21:30:23 +01:00 · 2021-07-22 15:28:43 +08:00 · 2021-07-22 15:28:43 +08:00 · f6b184ccaf
parent 76cd70ac42
commit f6b184ccaf
4 changed files with 42 additions and 11 deletions
--- a/news/10115.bugfix.rst
+++ b/news/10115.bugfix.rst
@ -0,0 +1,3 @@
+Strip leading slash from a ``file://`` URL built from an path with the Windows
+drive notation. This fixes bugs where the ``file://`` URL cannot be correctly
+used as requirement, constraint, or index URLs on Windows.
--- a/src/pip/_internal/req/req_file.py
+++ b/src/pip/_internal/req/req_file.py
@ -16,7 +16,7 @@ from pip._internal.models.search_scope import SearchScope
 from pip._internal.network.session import PipSession
 from pip._internal.network.utils import raise_for_status
 from pip._internal.utils.encoding import auto_decode
-from pip._internal.utils.urls import get_url_scheme, url_to_path
+from pip._internal.utils.urls import get_url_scheme

 if TYPE_CHECKING:
    # NoReturn introduced in 3.6.2; imported only for type checking to maintain
@ -532,20 +532,16 @@ def get_file_content(url, session):
    """
    scheme = get_url_scheme(url)

-    if scheme in ['http', 'https']:
-        # FIXME: catch some errors
+    # Pip has special support for file:// URLs (LocalFSAdapter).
+    if scheme in ['http', 'https', 'file']:
        resp = session.get(url)
        raise_for_status(resp)
        return resp.url, resp.text

-    elif scheme == 'file':
-        url = url_to_path(url)
-
+    # Assume this is a bare path.
    try:
        with open(url, 'rb') as f:
            content = auto_decode(f.read())
    except OSError as exc:
-        raise InstallationError(
-            f'Could not open requirements file: {exc}'
-        )
+        raise InstallationError(f'Could not open requirements file: {exc}')
    return url, content
--- a/src/pip/_internal/utils/urls.py
+++ b/src/pip/_internal/utils/urls.py
@ -1,9 +1,11 @@
 import os
-import sys
+import string
 import urllib.parse
 import urllib.request
 from typing import Optional

+from .compat import WINDOWS
+

 def get_url_scheme(url):
    # type: (str) -> Optional[str]
@ -37,7 +39,7 @@ def url_to_path(url):
    if not netloc or netloc == "localhost":
        # According to RFC 8089, same as empty authority.
        netloc = ""
-    elif sys.platform == "win32":
+    elif WINDOWS:
        # If we have a UNC path, prepend UNC share notation.
        netloc = "\\\\" + netloc
    else:
@ -46,4 +48,18 @@ def url_to_path(url):
        )

    path = urllib.request.url2pathname(netloc + path)
+
+    # On Windows, urlsplit parses the path as something like "/C:/Users/foo".
+    # This creates issues for path-related functions like io.open(), so we try
+    # to detect and strip the leading slash.
+    if (
+        WINDOWS
+        and not netloc  # Not UNC.
+        and len(path) >= 3
+        and path[0] == "/"  # Leading slash to strip.
+        and path[1] in string.ascii_letters  # Drive letter.
+        and path[2:4] in (":", ":/")  # Colon + end of string, or colon + absolute path.
+    ):
+        path = path[1:]
+
    return path
--- a/tests/unit/test_req_file.py
+++ b/tests/unit/test_req_file.py
@ -67,6 +67,22 @@ def parse_reqfile(
        )


+def test_read_file_url(tmp_path):
+    reqs = tmp_path.joinpath("requirements.txt")
+    reqs.write_text("foo")
+    result = list(parse_requirements(reqs.as_posix(), session))
+
+    assert len(result) == 1, result
+    assert result[0].requirement == "foo"
+
+    # The comes_from value has three parts: -r or -c flag, path, and line.
+    # The path value in the middle needs some special logic due to our path
+    # normalization logic.
+    assert result[0].comes_from[:3] == "-r "
+    assert result[0].comes_from[-9:] == " (line 1)"
+    assert os.path.samefile(result[0].comes_from[3:-9], str(reqs))
+
+
 class TestPreprocess:
    """tests for `preprocess`"""