1
1
Fork 0
mirror of https://github.com/pypa/pip synced 2023-12-13 21:30:23 +01:00
pip/tests/unit/test_index.py
Nicolas Bock 913757cb53
Do not clean base_url
When the `base_url` is a `[]` protected IPv6 address, the
`_clean_link()` function converts `[` to `%5B` and `]` to `%5D`, which
renders the `base_url` invalid. For example:

```
	Starting new HTTP connection (1): fd00:0:0:236:💯8181
	http://fd00:0:0:236:💯8181 "GET /os-releases/19.0.0.0b1/opensuse_leap-42.3-x86_64/requirements_absolute_requirements.txt HTTP/1.1" 200 None
	Setting setuptools==40.6.3 (from -c http://[fd00:0:0:236::100]:8181/os-releases/19.0.0.0b1/opensuse_leap-42.3-x86_64/requirements_absolute_requirements.txt (line 204)) extras to: ()
	Looking in indexes: http://[fd00:0:0:236::100]:8181/simple
	Collecting setuptools==40.6.3 (from -c http://[fd00:0:0:236::100]:8181/os-releases/19.0.0.0b1/opensuse_leap-42.3-x86_64/requirements_absolute_requirements.txt (line 204))
	  1 location(s) to search for versions of setuptools:
	  * http://[fd00:0:0:236::100]:8181/simple/setuptools/
	  Getting page http://[fd00:0:0:236::100]:8181/simple/setuptools/
	  http://fd00:0:0:236:💯8181 "GET /simple/setuptools/ HTTP/1.1" 200 376
	  Analyzing links from page http://[fd00:0:0:236::100]:8181/simple/setuptools/
	    _package_versions: link = http://%5bfd00:0:0:236::100%5d:8181/packages/opensuse_leap-42.3-x86_64/setuptools/setuptools-40.6.3-py2.py3-none-any.whl#md5=389d3cd088d7afec3a1133b1d8e15df0 (from http://[fd00:0:0:
	236::100]:8181/simple/setuptools/)
	    _link_package_versions: link = http://%5bfd00:0:0:236::100%5d:8181/packages/opensuse_leap-42.3-x86_64/setuptools/setuptools-40.6.3-py2.py3-none-any.whl#md5=389d3cd088d7afec3a1133b1d8e15df0 (from http://[fd00
	:0:0:236::100]:8181/simple/setuptools/)
	    Found link http://%5bfd00:0:0:236::100%5d:8181/packages/opensuse_leap-42.3-x86_64/setuptools/setuptools-40.6.3-py2.py3-none-any.whl#md5=389d3cd088d7afec3a1133b1d8e15df0 (from http://[fd00:0:0:236::100]:8181/
	simple/setuptools/), version: 40.6.3
	  Using version 40.6.3 (newest of versions: 40.6.3)
        Could not install packages due to an EnvironmentError.
        InvalidURL: Failed to parse: %5bfd00:0:0:236::100%5d:8181
```

This change uses the vendored `urllib` library to split the host part
off of the url before URL quoting only the path part.

Fixes: #6285
Signed-off-by: Nicolas Bock <nicolasbock@gmail.com>
2019-04-07 05:57:03 -06:00

345 lines
12 KiB
Python

import logging
import os.path
import pytest
from mock import Mock
from pip._vendor import html5lib, requests
from pip._internal.download import PipSession
from pip._internal.index import (
Link, PackageFinder, _clean_link, _determine_base_url, _egg_info_matches,
_find_name_version_sep, _get_html_page,
)
def test_sort_locations_file_expand_dir(data):
"""
Test that a file:// dir gets listdir run with expand_dir
"""
finder = PackageFinder([data.find_links], [], session=PipSession())
files, urls = finder._sort_locations([data.find_links], expand_dir=True)
assert files and not urls, (
"files and not urls should have been found at find-links url: %s" %
data.find_links
)
def test_sort_locations_file_not_find_link(data):
"""
Test that a file:// url dir that's not a find-link, doesn't get a listdir
run
"""
finder = PackageFinder([], [], session=PipSession())
files, urls = finder._sort_locations([data.index_url("empty_with_pkg")])
assert urls and not files, "urls, but not files should have been found"
def test_sort_locations_non_existing_path():
"""
Test that a non-existing path is ignored.
"""
finder = PackageFinder([], [], session=PipSession())
files, urls = finder._sort_locations(
[os.path.join('this', 'doesnt', 'exist')])
assert not urls and not files, "nothing should have been found"
class TestLink(object):
def test_splitext(self):
assert ('wheel', '.whl') == Link('http://yo/wheel.whl').splitext()
@pytest.mark.parametrize(
("url", "expected"),
[
("http://yo/wheel.whl", "wheel.whl"),
("http://yo/wheel", "wheel"),
(
"http://yo/myproject-1.0%2Bfoobar.0-py2.py3-none-any.whl",
"myproject-1.0+foobar.0-py2.py3-none-any.whl",
),
],
)
def test_filename(self, url, expected):
assert Link(url).filename == expected
def test_no_ext(self):
assert '' == Link('http://yo/wheel').ext
def test_ext(self):
assert '.whl' == Link('http://yo/wheel.whl').ext
def test_ext_fragment(self):
assert '.whl' == Link('http://yo/wheel.whl#frag').ext
def test_ext_query(self):
assert '.whl' == Link('http://yo/wheel.whl?a=b').ext
def test_is_wheel(self):
assert Link('http://yo/wheel.whl').is_wheel
def test_is_wheel_false(self):
assert not Link('http://yo/not_a_wheel').is_wheel
def test_fragments(self):
url = 'git+https://example.com/package#egg=eggname'
assert 'eggname' == Link(url).egg_fragment
assert None is Link(url).subdirectory_fragment
url = 'git+https://example.com/package#egg=eggname&subdirectory=subdir'
assert 'eggname' == Link(url).egg_fragment
assert 'subdir' == Link(url).subdirectory_fragment
url = 'git+https://example.com/package#subdirectory=subdir&egg=eggname'
assert 'eggname' == Link(url).egg_fragment
assert 'subdir' == Link(url).subdirectory_fragment
@pytest.mark.parametrize(
("html", "url", "expected"),
[
(b"<html></html>", "https://example.com/", "https://example.com/"),
(
b"<html><head>"
b"<base href=\"https://foo.example.com/\">"
b"</head></html>",
"https://example.com/",
"https://foo.example.com/",
),
(
b"<html><head>"
b"<base><base href=\"https://foo.example.com/\">"
b"</head></html>",
"https://example.com/",
"https://foo.example.com/",
),
],
)
def test_determine_base_url(html, url, expected):
document = html5lib.parse(
html, transport_encoding=None, namespaceHTMLElements=False,
)
assert _determine_base_url(document, url) == expected
class MockLogger(object):
def __init__(self):
self.called = False
def warning(self, *args, **kwargs):
self.called = True
@pytest.mark.parametrize(
("location", "trusted", "expected"),
[
("http://pypi.org/something", [], True),
("https://pypi.org/something", [], False),
("git+http://pypi.org/something", [], True),
("git+https://pypi.org/something", [], False),
("git+ssh://git@pypi.org/something", [], False),
("http://localhost", [], False),
("http://127.0.0.1", [], False),
("http://example.com/something/", [], True),
("http://example.com/something/", ["example.com"], False),
("http://eXample.com/something/", ["example.cOm"], False),
],
)
def test_secure_origin(location, trusted, expected):
finder = PackageFinder([], [], session=[], trusted_hosts=trusted)
logger = MockLogger()
finder._validate_secure_origin(logger, location)
assert logger.called == expected
def test_get_formatted_locations_basic_auth():
"""
Test that basic authentication credentials defined in URL
is not included in formatted output.
"""
index_urls = [
'https://pypi.org/simple',
'https://user:pass@repo.domain.com',
]
finder = PackageFinder([], index_urls, session=[])
result = finder.get_formatted_locations()
assert 'user' in result
assert '****' in result
assert 'pass' not in result
@pytest.mark.parametrize(
("egg_info", "canonical_name", "expected"),
[
# Trivial.
("pip-18.0", "pip", 3),
("zope-interface-4.5.0", "zope-interface", 14),
# Canonicalized name match non-canonicalized egg info. (pypa/pip#5870)
("Jinja2-2.10", "jinja2", 6),
("zope.interface-4.5.0", "zope-interface", 14),
("zope_interface-4.5.0", "zope-interface", 14),
# Should be smart enough to parse ambiguous names from the provided
# package name.
("foo-2-2", "foo", 3),
("foo-2-2", "foo-2", 5),
# Should be able to detect collapsed characters in the egg info.
("foo--bar-1.0", "foo-bar", 8),
("foo-_bar-1.0", "foo-bar", 8),
# The package name must not ends with a dash (PEP 508), so the first
# dash would be the separator, not the second.
("zope.interface--4.5.0", "zope-interface", 14),
("zope.interface--", "zope-interface", 14),
# The version part is missing, but the split function does not care.
("zope.interface-", "zope-interface", 14),
],
)
def test_find_name_version_sep(egg_info, canonical_name, expected):
index = _find_name_version_sep(egg_info, canonical_name)
assert index == expected
@pytest.mark.parametrize(
("egg_info", "canonical_name"),
[
# A dash must follow the package name.
("zope.interface4.5.0", "zope-interface"),
("zope.interface.4.5.0", "zope-interface"),
("zope.interface.-4.5.0", "zope-interface"),
("zope.interface", "zope-interface"),
],
)
def test_find_name_version_sep_failure(egg_info, canonical_name):
with pytest.raises(ValueError) as ctx:
_find_name_version_sep(egg_info, canonical_name)
message = "{} does not match {}".format(egg_info, canonical_name)
assert str(ctx.value) == message
@pytest.mark.parametrize(
("egg_info", "canonical_name", "expected"),
[
# Trivial.
("pip-18.0", "pip", "18.0"),
("zope-interface-4.5.0", "zope-interface", "4.5.0"),
# Canonicalized name match non-canonicalized egg info. (pypa/pip#5870)
("Jinja2-2.10", "jinja2", "2.10"),
("zope.interface-4.5.0", "zope-interface", "4.5.0"),
("zope_interface-4.5.0", "zope-interface", "4.5.0"),
# Should be smart enough to parse ambiguous names from the provided
# package name.
("foo-2-2", "foo", "2-2"),
("foo-2-2", "foo-2", "2"),
("zope.interface--4.5.0", "zope-interface", "-4.5.0"),
("zope.interface--", "zope-interface", "-"),
# Should be able to detect collapsed characters in the egg info.
("foo--bar-1.0", "foo-bar", "1.0"),
("foo-_bar-1.0", "foo-bar", "1.0"),
# Invalid.
("the-package-name-8.19", "does-not-match", None),
("zope.interface.-4.5.0", "zope.interface", None),
("zope.interface-", "zope-interface", None),
("zope.interface4.5.0", "zope-interface", None),
("zope.interface.4.5.0", "zope-interface", None),
("zope.interface.-4.5.0", "zope-interface", None),
("zope.interface", "zope-interface", None),
],
)
def test_egg_info_matches(egg_info, canonical_name, expected):
version = _egg_info_matches(egg_info, canonical_name)
assert version == expected
def test_request_http_error(caplog):
caplog.set_level(logging.DEBUG)
link = Link('http://localhost')
session = Mock(PipSession)
session.get.return_value = resp = Mock()
resp.raise_for_status.side_effect = requests.HTTPError('Http error')
assert _get_html_page(link, session=session) is None
assert (
'Could not fetch URL http://localhost: Http error - skipping'
in caplog.text
)
def test_request_retries(caplog):
caplog.set_level(logging.DEBUG)
link = Link('http://localhost')
session = Mock(PipSession)
session.get.side_effect = requests.exceptions.RetryError('Retry error')
assert _get_html_page(link, session=session) is None
assert (
'Could not fetch URL http://localhost: Retry error - skipping'
in caplog.text
)
@pytest.mark.parametrize(
("url", "clean_url"),
[
# URL with hostname and port. Port separator should not be quoted.
("https://localhost.localdomain:8181/path/with space/",
"https://localhost.localdomain:8181/path/with%20space/"),
# URL that is already properly quoted. The quoting `%`
# characters should not be quoted again.
("https://localhost.localdomain:8181/path/with%20quoted%20space/",
"https://localhost.localdomain:8181/path/with%20quoted%20space/"),
# URL with IPv4 address and port.
("https://127.0.0.1:8181/path/with space/",
"https://127.0.0.1:8181/path/with%20space/"),
# URL with IPv6 address and port. The `[]` brackets around the
# IPv6 address should not be quoted.
("https://[fd00:0:0:236::100]:8181/path/with space/",
"https://[fd00:0:0:236::100]:8181/path/with%20space/"),
# URL with query. The leading `?` should not be quoted.
("https://localhost.localdomain:8181/path/with/query?request=test",
"https://localhost.localdomain:8181/path/with/query?request=test"),
# URL with colon in the path portion.
("https://localhost.localdomain:8181/path:/with:/colon",
"https://localhost.localdomain:8181/path%3A/with%3A/colon"),
# URL with something that looks like a drive letter, but is
# not. The `:` should be quoted.
("https://localhost.localdomain/T:/path/",
"https://localhost.localdomain/T%3A/path/")
]
)
def test_clean_link(url, clean_url):
assert(_clean_link(url) == clean_url)
@pytest.mark.parametrize(
("url", "clean_url"),
[
# URL with Windows drive letter. The `:` after the drive
# letter should not be quoted. The trailing `/` should be
# removed.
("file:///T:/path/with spaces/",
"file:///T:/path/with%20spaces")
]
)
@pytest.mark.skipif("sys.platform != 'win32'")
def test_clean_link_windows(url, clean_url):
assert(_clean_link(url) == clean_url)
@pytest.mark.parametrize(
("url", "clean_url"),
[
# URL with Windows drive letter, running on non-windows
# platform. The `:` after the drive should be quoted.
("file:///T:/path/with spaces/",
"file:///T%3A/path/with%20spaces/")
]
)
@pytest.mark.skipif("sys.platform == 'win32'")
def test_clean_link_non_windows(url, clean_url):
assert(_clean_link(url) == clean_url)