mirror of
https://github.com/pypa/pip
synced 2023-12-13 21:30:23 +01:00
When the `base_url` is a `[]` protected IPv6 address, the `_clean_link()` function converts `[` to `%5B` and `]` to `%5D`, which renders the `base_url` invalid. For example: ``` Starting new HTTP connection (1): fd00:0:0:236:💯8181 http://fd00:0:0:236:💯8181 "GET /os-releases/19.0.0.0b1/opensuse_leap-42.3-x86_64/requirements_absolute_requirements.txt HTTP/1.1" 200 None Setting setuptools==40.6.3 (from -c http://[fd00:0:0:236::100]:8181/os-releases/19.0.0.0b1/opensuse_leap-42.3-x86_64/requirements_absolute_requirements.txt (line 204)) extras to: () Looking in indexes: http://[fd00:0:0:236::100]:8181/simple Collecting setuptools==40.6.3 (from -c http://[fd00:0:0:236::100]:8181/os-releases/19.0.0.0b1/opensuse_leap-42.3-x86_64/requirements_absolute_requirements.txt (line 204)) 1 location(s) to search for versions of setuptools: * http://[fd00:0:0:236::100]:8181/simple/setuptools/ Getting page http://[fd00:0:0:236::100]:8181/simple/setuptools/ http://fd00:0:0:236:💯8181 "GET /simple/setuptools/ HTTP/1.1" 200 376 Analyzing links from page http://[fd00:0:0:236::100]:8181/simple/setuptools/ _package_versions: link = http://%5bfd00:0:0:236::100%5d:8181/packages/opensuse_leap-42.3-x86_64/setuptools/setuptools-40.6.3-py2.py3-none-any.whl#md5=389d3cd088d7afec3a1133b1d8e15df0 (from http://[fd00:0:0: 236::100]:8181/simple/setuptools/) _link_package_versions: link = http://%5bfd00:0:0:236::100%5d:8181/packages/opensuse_leap-42.3-x86_64/setuptools/setuptools-40.6.3-py2.py3-none-any.whl#md5=389d3cd088d7afec3a1133b1d8e15df0 (from http://[fd00 :0:0:236::100]:8181/simple/setuptools/) Found link http://%5bfd00:0:0:236::100%5d:8181/packages/opensuse_leap-42.3-x86_64/setuptools/setuptools-40.6.3-py2.py3-none-any.whl#md5=389d3cd088d7afec3a1133b1d8e15df0 (from http://[fd00:0:0:236::100]:8181/ simple/setuptools/), version: 40.6.3 Using version 40.6.3 (newest of versions: 40.6.3) Could not install packages due to an EnvironmentError. InvalidURL: Failed to parse: %5bfd00:0:0:236::100%5d:8181 ``` This change uses the vendored `urllib` library to split the host part off of the url before URL quoting only the path part. Fixes: #6285 Signed-off-by: Nicolas Bock <nicolasbock@gmail.com>
345 lines
12 KiB
Python
345 lines
12 KiB
Python
import logging
|
|
import os.path
|
|
|
|
import pytest
|
|
from mock import Mock
|
|
from pip._vendor import html5lib, requests
|
|
|
|
from pip._internal.download import PipSession
|
|
from pip._internal.index import (
|
|
Link, PackageFinder, _clean_link, _determine_base_url, _egg_info_matches,
|
|
_find_name_version_sep, _get_html_page,
|
|
)
|
|
|
|
|
|
def test_sort_locations_file_expand_dir(data):
|
|
"""
|
|
Test that a file:// dir gets listdir run with expand_dir
|
|
"""
|
|
finder = PackageFinder([data.find_links], [], session=PipSession())
|
|
files, urls = finder._sort_locations([data.find_links], expand_dir=True)
|
|
assert files and not urls, (
|
|
"files and not urls should have been found at find-links url: %s" %
|
|
data.find_links
|
|
)
|
|
|
|
|
|
def test_sort_locations_file_not_find_link(data):
|
|
"""
|
|
Test that a file:// url dir that's not a find-link, doesn't get a listdir
|
|
run
|
|
"""
|
|
finder = PackageFinder([], [], session=PipSession())
|
|
files, urls = finder._sort_locations([data.index_url("empty_with_pkg")])
|
|
assert urls and not files, "urls, but not files should have been found"
|
|
|
|
|
|
def test_sort_locations_non_existing_path():
|
|
"""
|
|
Test that a non-existing path is ignored.
|
|
"""
|
|
finder = PackageFinder([], [], session=PipSession())
|
|
files, urls = finder._sort_locations(
|
|
[os.path.join('this', 'doesnt', 'exist')])
|
|
assert not urls and not files, "nothing should have been found"
|
|
|
|
|
|
class TestLink(object):
|
|
|
|
def test_splitext(self):
|
|
assert ('wheel', '.whl') == Link('http://yo/wheel.whl').splitext()
|
|
|
|
@pytest.mark.parametrize(
|
|
("url", "expected"),
|
|
[
|
|
("http://yo/wheel.whl", "wheel.whl"),
|
|
("http://yo/wheel", "wheel"),
|
|
(
|
|
"http://yo/myproject-1.0%2Bfoobar.0-py2.py3-none-any.whl",
|
|
"myproject-1.0+foobar.0-py2.py3-none-any.whl",
|
|
),
|
|
],
|
|
)
|
|
def test_filename(self, url, expected):
|
|
assert Link(url).filename == expected
|
|
|
|
def test_no_ext(self):
|
|
assert '' == Link('http://yo/wheel').ext
|
|
|
|
def test_ext(self):
|
|
assert '.whl' == Link('http://yo/wheel.whl').ext
|
|
|
|
def test_ext_fragment(self):
|
|
assert '.whl' == Link('http://yo/wheel.whl#frag').ext
|
|
|
|
def test_ext_query(self):
|
|
assert '.whl' == Link('http://yo/wheel.whl?a=b').ext
|
|
|
|
def test_is_wheel(self):
|
|
assert Link('http://yo/wheel.whl').is_wheel
|
|
|
|
def test_is_wheel_false(self):
|
|
assert not Link('http://yo/not_a_wheel').is_wheel
|
|
|
|
def test_fragments(self):
|
|
url = 'git+https://example.com/package#egg=eggname'
|
|
assert 'eggname' == Link(url).egg_fragment
|
|
assert None is Link(url).subdirectory_fragment
|
|
url = 'git+https://example.com/package#egg=eggname&subdirectory=subdir'
|
|
assert 'eggname' == Link(url).egg_fragment
|
|
assert 'subdir' == Link(url).subdirectory_fragment
|
|
url = 'git+https://example.com/package#subdirectory=subdir&egg=eggname'
|
|
assert 'eggname' == Link(url).egg_fragment
|
|
assert 'subdir' == Link(url).subdirectory_fragment
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
("html", "url", "expected"),
|
|
[
|
|
(b"<html></html>", "https://example.com/", "https://example.com/"),
|
|
(
|
|
b"<html><head>"
|
|
b"<base href=\"https://foo.example.com/\">"
|
|
b"</head></html>",
|
|
"https://example.com/",
|
|
"https://foo.example.com/",
|
|
),
|
|
(
|
|
b"<html><head>"
|
|
b"<base><base href=\"https://foo.example.com/\">"
|
|
b"</head></html>",
|
|
"https://example.com/",
|
|
"https://foo.example.com/",
|
|
),
|
|
],
|
|
)
|
|
def test_determine_base_url(html, url, expected):
|
|
document = html5lib.parse(
|
|
html, transport_encoding=None, namespaceHTMLElements=False,
|
|
)
|
|
assert _determine_base_url(document, url) == expected
|
|
|
|
|
|
class MockLogger(object):
|
|
def __init__(self):
|
|
self.called = False
|
|
|
|
def warning(self, *args, **kwargs):
|
|
self.called = True
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
("location", "trusted", "expected"),
|
|
[
|
|
("http://pypi.org/something", [], True),
|
|
("https://pypi.org/something", [], False),
|
|
("git+http://pypi.org/something", [], True),
|
|
("git+https://pypi.org/something", [], False),
|
|
("git+ssh://git@pypi.org/something", [], False),
|
|
("http://localhost", [], False),
|
|
("http://127.0.0.1", [], False),
|
|
("http://example.com/something/", [], True),
|
|
("http://example.com/something/", ["example.com"], False),
|
|
("http://eXample.com/something/", ["example.cOm"], False),
|
|
],
|
|
)
|
|
def test_secure_origin(location, trusted, expected):
|
|
finder = PackageFinder([], [], session=[], trusted_hosts=trusted)
|
|
logger = MockLogger()
|
|
finder._validate_secure_origin(logger, location)
|
|
assert logger.called == expected
|
|
|
|
|
|
def test_get_formatted_locations_basic_auth():
|
|
"""
|
|
Test that basic authentication credentials defined in URL
|
|
is not included in formatted output.
|
|
"""
|
|
index_urls = [
|
|
'https://pypi.org/simple',
|
|
'https://user:pass@repo.domain.com',
|
|
]
|
|
finder = PackageFinder([], index_urls, session=[])
|
|
|
|
result = finder.get_formatted_locations()
|
|
assert 'user' in result
|
|
assert '****' in result
|
|
assert 'pass' not in result
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
("egg_info", "canonical_name", "expected"),
|
|
[
|
|
# Trivial.
|
|
("pip-18.0", "pip", 3),
|
|
("zope-interface-4.5.0", "zope-interface", 14),
|
|
|
|
# Canonicalized name match non-canonicalized egg info. (pypa/pip#5870)
|
|
("Jinja2-2.10", "jinja2", 6),
|
|
("zope.interface-4.5.0", "zope-interface", 14),
|
|
("zope_interface-4.5.0", "zope-interface", 14),
|
|
|
|
# Should be smart enough to parse ambiguous names from the provided
|
|
# package name.
|
|
("foo-2-2", "foo", 3),
|
|
("foo-2-2", "foo-2", 5),
|
|
|
|
# Should be able to detect collapsed characters in the egg info.
|
|
("foo--bar-1.0", "foo-bar", 8),
|
|
("foo-_bar-1.0", "foo-bar", 8),
|
|
|
|
# The package name must not ends with a dash (PEP 508), so the first
|
|
# dash would be the separator, not the second.
|
|
("zope.interface--4.5.0", "zope-interface", 14),
|
|
("zope.interface--", "zope-interface", 14),
|
|
|
|
# The version part is missing, but the split function does not care.
|
|
("zope.interface-", "zope-interface", 14),
|
|
],
|
|
)
|
|
def test_find_name_version_sep(egg_info, canonical_name, expected):
|
|
index = _find_name_version_sep(egg_info, canonical_name)
|
|
assert index == expected
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
("egg_info", "canonical_name"),
|
|
[
|
|
# A dash must follow the package name.
|
|
("zope.interface4.5.0", "zope-interface"),
|
|
("zope.interface.4.5.0", "zope-interface"),
|
|
("zope.interface.-4.5.0", "zope-interface"),
|
|
("zope.interface", "zope-interface"),
|
|
],
|
|
)
|
|
def test_find_name_version_sep_failure(egg_info, canonical_name):
|
|
with pytest.raises(ValueError) as ctx:
|
|
_find_name_version_sep(egg_info, canonical_name)
|
|
message = "{} does not match {}".format(egg_info, canonical_name)
|
|
assert str(ctx.value) == message
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
("egg_info", "canonical_name", "expected"),
|
|
[
|
|
# Trivial.
|
|
("pip-18.0", "pip", "18.0"),
|
|
("zope-interface-4.5.0", "zope-interface", "4.5.0"),
|
|
|
|
# Canonicalized name match non-canonicalized egg info. (pypa/pip#5870)
|
|
("Jinja2-2.10", "jinja2", "2.10"),
|
|
("zope.interface-4.5.0", "zope-interface", "4.5.0"),
|
|
("zope_interface-4.5.0", "zope-interface", "4.5.0"),
|
|
|
|
# Should be smart enough to parse ambiguous names from the provided
|
|
# package name.
|
|
("foo-2-2", "foo", "2-2"),
|
|
("foo-2-2", "foo-2", "2"),
|
|
("zope.interface--4.5.0", "zope-interface", "-4.5.0"),
|
|
("zope.interface--", "zope-interface", "-"),
|
|
|
|
# Should be able to detect collapsed characters in the egg info.
|
|
("foo--bar-1.0", "foo-bar", "1.0"),
|
|
("foo-_bar-1.0", "foo-bar", "1.0"),
|
|
|
|
# Invalid.
|
|
("the-package-name-8.19", "does-not-match", None),
|
|
("zope.interface.-4.5.0", "zope.interface", None),
|
|
("zope.interface-", "zope-interface", None),
|
|
("zope.interface4.5.0", "zope-interface", None),
|
|
("zope.interface.4.5.0", "zope-interface", None),
|
|
("zope.interface.-4.5.0", "zope-interface", None),
|
|
("zope.interface", "zope-interface", None),
|
|
],
|
|
)
|
|
def test_egg_info_matches(egg_info, canonical_name, expected):
|
|
version = _egg_info_matches(egg_info, canonical_name)
|
|
assert version == expected
|
|
|
|
|
|
def test_request_http_error(caplog):
|
|
caplog.set_level(logging.DEBUG)
|
|
link = Link('http://localhost')
|
|
session = Mock(PipSession)
|
|
session.get.return_value = resp = Mock()
|
|
resp.raise_for_status.side_effect = requests.HTTPError('Http error')
|
|
assert _get_html_page(link, session=session) is None
|
|
assert (
|
|
'Could not fetch URL http://localhost: Http error - skipping'
|
|
in caplog.text
|
|
)
|
|
|
|
|
|
def test_request_retries(caplog):
|
|
caplog.set_level(logging.DEBUG)
|
|
link = Link('http://localhost')
|
|
session = Mock(PipSession)
|
|
session.get.side_effect = requests.exceptions.RetryError('Retry error')
|
|
assert _get_html_page(link, session=session) is None
|
|
assert (
|
|
'Could not fetch URL http://localhost: Retry error - skipping'
|
|
in caplog.text
|
|
)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
("url", "clean_url"),
|
|
[
|
|
# URL with hostname and port. Port separator should not be quoted.
|
|
("https://localhost.localdomain:8181/path/with space/",
|
|
"https://localhost.localdomain:8181/path/with%20space/"),
|
|
# URL that is already properly quoted. The quoting `%`
|
|
# characters should not be quoted again.
|
|
("https://localhost.localdomain:8181/path/with%20quoted%20space/",
|
|
"https://localhost.localdomain:8181/path/with%20quoted%20space/"),
|
|
# URL with IPv4 address and port.
|
|
("https://127.0.0.1:8181/path/with space/",
|
|
"https://127.0.0.1:8181/path/with%20space/"),
|
|
# URL with IPv6 address and port. The `[]` brackets around the
|
|
# IPv6 address should not be quoted.
|
|
("https://[fd00:0:0:236::100]:8181/path/with space/",
|
|
"https://[fd00:0:0:236::100]:8181/path/with%20space/"),
|
|
# URL with query. The leading `?` should not be quoted.
|
|
("https://localhost.localdomain:8181/path/with/query?request=test",
|
|
"https://localhost.localdomain:8181/path/with/query?request=test"),
|
|
# URL with colon in the path portion.
|
|
("https://localhost.localdomain:8181/path:/with:/colon",
|
|
"https://localhost.localdomain:8181/path%3A/with%3A/colon"),
|
|
# URL with something that looks like a drive letter, but is
|
|
# not. The `:` should be quoted.
|
|
("https://localhost.localdomain/T:/path/",
|
|
"https://localhost.localdomain/T%3A/path/")
|
|
]
|
|
)
|
|
def test_clean_link(url, clean_url):
|
|
assert(_clean_link(url) == clean_url)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
("url", "clean_url"),
|
|
[
|
|
# URL with Windows drive letter. The `:` after the drive
|
|
# letter should not be quoted. The trailing `/` should be
|
|
# removed.
|
|
("file:///T:/path/with spaces/",
|
|
"file:///T:/path/with%20spaces")
|
|
]
|
|
)
|
|
@pytest.mark.skipif("sys.platform != 'win32'")
|
|
def test_clean_link_windows(url, clean_url):
|
|
assert(_clean_link(url) == clean_url)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
("url", "clean_url"),
|
|
[
|
|
# URL with Windows drive letter, running on non-windows
|
|
# platform. The `:` after the drive should be quoted.
|
|
("file:///T:/path/with spaces/",
|
|
"file:///T%3A/path/with%20spaces/")
|
|
]
|
|
)
|
|
@pytest.mark.skipif("sys.platform == 'win32'")
|
|
def test_clean_link_non_windows(url, clean_url):
|
|
assert(_clean_link(url) == clean_url)
|