pip/tests/unit/test_index.py

859 lines
31 KiB
Python

import logging
import os.path
import pytest
from mock import Mock
from pip._vendor import html5lib, requests
from pip._vendor.packaging.specifiers import SpecifierSet
from pip._internal.download import PipSession
from pip._internal.index import (
CandidateEvaluator, FormatControl, HTMLPage, Link, LinkEvaluator,
PackageFinder, _check_link_requires_python, _clean_link,
_determine_base_url, _extract_version_from_fragment,
_find_name_version_sep, _get_html_page,
)
from pip._internal.models.candidate import InstallationCandidate
from pip._internal.models.search_scope import SearchScope
from pip._internal.models.selection_prefs import SelectionPreferences
from pip._internal.models.target_python import TargetPython
from tests.lib import CURRENT_PY_VERSION_INFO, make_test_finder
@pytest.mark.parametrize('requires_python, expected', [
('== 3.6.4', False),
('== 3.6.5', True),
# Test an invalid Requires-Python value.
('invalid', True),
])
def test_check_link_requires_python(requires_python, expected):
version_info = (3, 6, 5)
link = Link('https://example.com', requires_python=requires_python)
actual = _check_link_requires_python(link, version_info)
assert actual == expected
def check_caplog(caplog, expected_level, expected_message):
assert len(caplog.records) == 1
record = caplog.records[0]
assert record.levelname == expected_level
assert record.message == expected_message
@pytest.mark.parametrize('ignore_requires_python, expected', [
(None, (
False, 'DEBUG',
"Link requires a different Python (3.6.5 not in: '== 3.6.4'): "
"https://example.com"
)),
(True, (
True, 'DEBUG',
"Ignoring failed Requires-Python check (3.6.5 not in: '== 3.6.4') "
"for link: https://example.com"
)),
])
def test_check_link_requires_python__incompatible_python(
caplog, ignore_requires_python, expected,
):
"""
Test an incompatible Python.
"""
expected_return, expected_level, expected_message = expected
link = Link('https://example.com', requires_python='== 3.6.4')
caplog.set_level(logging.DEBUG)
actual = _check_link_requires_python(
link, version_info=(3, 6, 5),
ignore_requires_python=ignore_requires_python,
)
assert actual == expected_return
check_caplog(caplog, expected_level, expected_message)
def test_check_link_requires_python__invalid_requires(caplog):
"""
Test the log message for an invalid Requires-Python.
"""
link = Link('https://example.com', requires_python='invalid')
caplog.set_level(logging.DEBUG)
actual = _check_link_requires_python(link, version_info=(3, 6, 5))
assert actual
expected_message = (
"Ignoring invalid Requires-Python ('invalid') for link: "
"https://example.com"
)
check_caplog(caplog, 'DEBUG', expected_message)
class TestLinkEvaluator:
@pytest.mark.parametrize(
'py_version_info,ignore_requires_python,expected', [
((3, 6, 5), None, (True, '1.12')),
# Test an incompatible Python.
((3, 6, 4), None, (False, None)),
# Test an incompatible Python with ignore_requires_python=True.
((3, 6, 4), True, (True, '1.12')),
],
)
def test_evaluate_link(
self, py_version_info, ignore_requires_python, expected,
):
target_python = TargetPython(py_version_info=py_version_info)
evaluator = LinkEvaluator(
project_name='twine',
canonical_name='twine',
formats={'source'},
target_python=target_python,
allow_yanked=True,
ignore_requires_python=ignore_requires_python,
)
link = Link(
'https://example.com/#egg=twine-1.12',
requires_python='== 3.6.5',
)
actual = evaluator.evaluate_link(link)
assert actual == expected
@pytest.mark.parametrize('yanked_reason, allow_yanked, expected', [
(None, True, (True, '1.12')),
(None, False, (True, '1.12')),
('', True, (True, '1.12')),
('', False, (False, 'yanked for reason: <none given>')),
('bad metadata', True, (True, '1.12')),
('bad metadata', False,
(False, 'yanked for reason: bad metadata')),
# Test a unicode string with a non-ascii character.
(u'curly quote: \u2018', True, (True, '1.12')),
(u'curly quote: \u2018', False,
(False, u'yanked for reason: curly quote: \u2018')),
])
def test_evaluate_link__allow_yanked(
self, yanked_reason, allow_yanked, expected,
):
target_python = TargetPython(py_version_info=(3, 6, 4))
evaluator = LinkEvaluator(
project_name='twine',
canonical_name='twine',
formats={'source'},
target_python=target_python,
allow_yanked=allow_yanked,
)
link = Link(
'https://example.com/#egg=twine-1.12',
yanked_reason=yanked_reason,
)
actual = evaluator.evaluate_link(link)
assert actual == expected
def test_evaluate_link__incompatible_wheel(self):
"""
Test an incompatible wheel.
"""
target_python = TargetPython(py_version_info=(3, 6, 4))
# Set the valid tags to an empty list to make sure nothing matches.
target_python._valid_tags = []
evaluator = LinkEvaluator(
project_name='sample',
canonical_name='sample',
formats={'binary'},
target_python=target_python,
allow_yanked=True,
)
link = Link('https://example.com/sample-1.0-py2.py3-none-any.whl')
actual = evaluator.evaluate_link(link)
expected = (
False, "none of the wheel's tags match: py2-none-any, py3-none-any"
)
assert actual == expected
class TestCandidateEvaluator:
def make_mock_candidate(self, version, yanked_reason=None):
url = 'https://example.com/pkg-{}.tar.gz'.format(version)
link = Link(url, yanked_reason=yanked_reason)
candidate = InstallationCandidate('mypackage', version, link)
return candidate
def test_make_found_candidates(self):
specifier = SpecifierSet('<= 1.11')
versions = ['1.10', '1.11', '1.12']
candidates = [
self.make_mock_candidate(version) for version in versions
]
evaluator = CandidateEvaluator()
found_candidates = evaluator.make_found_candidates(
candidates, specifier=specifier,
)
assert found_candidates._candidates == candidates
assert found_candidates._evaluator is evaluator
expected_applicable = candidates[:2]
assert [str(c.version) for c in expected_applicable] == [
'1.10',
'1.11',
]
assert found_candidates._applicable_candidates == expected_applicable
@pytest.mark.parametrize('yanked_reason, expected', [
# Test a non-yanked file.
(None, 0),
# Test a yanked file (has a lower value than non-yanked).
('bad metadata', -1),
])
def test_sort_key__is_yanked(self, yanked_reason, expected):
"""
Test the effect of is_yanked on _sort_key()'s return value.
"""
url = 'https://example.com/mypackage.tar.gz'
link = Link(url, yanked_reason=yanked_reason)
candidate = InstallationCandidate('mypackage', '1.0', link)
evaluator = CandidateEvaluator()
sort_value = evaluator._sort_key(candidate)
# Yanked / non-yanked is reflected in the first element of the tuple.
actual = sort_value[0]
assert actual == expected
def test_get_best_candidate__no_candidates(self):
"""
Test passing an empty list.
"""
evaluator = CandidateEvaluator()
actual = evaluator.get_best_candidate([])
assert actual is None
def test_get_best_candidate__all_yanked(self, caplog):
"""
Test all candidates yanked.
"""
candidates = [
self.make_mock_candidate('1.0', yanked_reason='bad metadata #1'),
# Put the best candidate in the middle, to test sorting.
self.make_mock_candidate('3.0', yanked_reason='bad metadata #3'),
self.make_mock_candidate('2.0', yanked_reason='bad metadata #2'),
]
expected_best = candidates[1]
evaluator = CandidateEvaluator()
actual = evaluator.get_best_candidate(candidates)
assert actual is expected_best
assert str(actual.version) == '3.0'
# Check the log messages.
assert len(caplog.records) == 1
record = caplog.records[0]
assert record.levelname == 'WARNING'
assert record.message == (
'The candidate selected for download or install is a yanked '
"version: 'mypackage' candidate "
'(version 3.0 at https://example.com/pkg-3.0.tar.gz)\n'
'Reason for being yanked: bad metadata #3'
)
@pytest.mark.parametrize('yanked_reason, expected_reason', [
# Test no reason given.
('', '<none given>'),
# Test a unicode string with a non-ascii character.
(u'curly quote: \u2018', u'curly quote: \u2018'),
])
def test_get_best_candidate__yanked_reason(
self, caplog, yanked_reason, expected_reason,
):
"""
Test the log message with various reason strings.
"""
candidates = [
self.make_mock_candidate('1.0', yanked_reason=yanked_reason),
]
evaluator = CandidateEvaluator()
actual = evaluator.get_best_candidate(candidates)
assert str(actual.version) == '1.0'
assert len(caplog.records) == 1
record = caplog.records[0]
assert record.levelname == 'WARNING'
expected_message = (
'The candidate selected for download or install is a yanked '
"version: 'mypackage' candidate "
'(version 1.0 at https://example.com/pkg-1.0.tar.gz)\n'
'Reason for being yanked: '
) + expected_reason
assert record.message == expected_message
def test_get_best_candidate__best_yanked_but_not_all(self, caplog):
"""
Test the best candidates being yanked, but not all.
"""
candidates = [
self.make_mock_candidate('4.0', yanked_reason='bad metadata #4'),
# Put the best candidate in the middle, to test sorting.
self.make_mock_candidate('2.0'),
self.make_mock_candidate('3.0', yanked_reason='bad metadata #3'),
self.make_mock_candidate('1.0'),
]
expected_best = candidates[1]
evaluator = CandidateEvaluator()
actual = evaluator.get_best_candidate(candidates)
assert actual is expected_best
assert str(actual.version) == '2.0'
# Check the log messages.
assert len(caplog.records) == 0
class TestPackageFinder:
@pytest.mark.parametrize('allow_all_prereleases, prefer_binary', [
(False, False),
(False, True),
(True, False),
(True, True),
])
def test_create__candidate_evaluator(
self, allow_all_prereleases, prefer_binary,
):
"""
Test that the candidate_evaluator attribute is set correctly.
"""
selection_prefs = SelectionPreferences(
allow_yanked=True,
allow_all_prereleases=allow_all_prereleases,
prefer_binary=prefer_binary,
)
target_python = TargetPython(py_version_info=(3, 7, 3))
target_python._valid_tags = ['tag1', 'tag2']
finder = PackageFinder.create(
search_scope=SearchScope([], []),
selection_prefs=selection_prefs,
session=PipSession(),
target_python=target_python,
)
evaluator = finder.candidate_evaluator
assert evaluator.allow_all_prereleases == allow_all_prereleases
assert evaluator._prefer_binary == prefer_binary
assert evaluator._supported_tags == ['tag1', 'tag2']
def test_create__target_python(self):
"""
Test that the _target_python attribute is set correctly.
"""
target_python = TargetPython(py_version_info=(3, 7, 3))
finder = PackageFinder.create(
search_scope=SearchScope([], []),
selection_prefs=SelectionPreferences(allow_yanked=True),
session=PipSession(),
target_python=target_python,
)
actual_target_python = finder._target_python
# The target_python attribute should be set as is.
assert actual_target_python is target_python
# Check that the attributes weren't reset.
assert actual_target_python.py_version_info == (3, 7, 3)
def test_create__target_python_none(self):
"""
Test passing target_python=None.
"""
finder = PackageFinder.create(
search_scope=SearchScope([], []),
selection_prefs=SelectionPreferences(allow_yanked=True),
session=PipSession(),
target_python=None,
)
# Spot-check the default TargetPython object.
actual_target_python = finder._target_python
assert actual_target_python._given_py_version_info is None
assert actual_target_python.py_version_info == CURRENT_PY_VERSION_INFO
@pytest.mark.parametrize('allow_yanked', [False, True])
def test_create__allow_yanked(self, allow_yanked):
"""
Test that the _allow_yanked attribute is set correctly.
"""
selection_prefs = SelectionPreferences(allow_yanked=allow_yanked)
finder = PackageFinder.create(
search_scope=SearchScope([], []),
selection_prefs=selection_prefs,
session=PipSession(),
)
assert finder._allow_yanked == allow_yanked
@pytest.mark.parametrize('ignore_requires_python', [False, True])
def test_create__ignore_requires_python(self, ignore_requires_python):
"""
Test that the _ignore_requires_python attribute is set correctly.
"""
selection_prefs = SelectionPreferences(
allow_yanked=True,
ignore_requires_python=ignore_requires_python,
)
finder = PackageFinder.create(
search_scope=SearchScope([], []),
selection_prefs=selection_prefs,
session=PipSession(),
)
assert finder._ignore_requires_python == ignore_requires_python
def test_create__format_control(self):
"""
Test that the format_control attribute is set correctly.
"""
format_control = FormatControl(set(), {':all:'})
selection_prefs = SelectionPreferences(
allow_yanked=True,
format_control=format_control,
)
finder = PackageFinder.create(
search_scope=SearchScope([], []),
selection_prefs=selection_prefs,
session=PipSession(),
)
actual_format_control = finder.format_control
assert actual_format_control is format_control
# Check that the attributes weren't reset.
assert actual_format_control.only_binary == {':all:'}
def test_add_trusted_host(self):
# Leave a gap to test how the ordering is affected.
trusted_hosts = ['host1', 'host3']
session = PipSession(insecure_hosts=trusted_hosts)
finder = make_test_finder(
session=session,
trusted_hosts=trusted_hosts,
)
insecure_adapter = session._insecure_adapter
prefix2 = 'https://host2/'
prefix3 = 'https://host3/'
# Confirm some initial conditions as a baseline.
assert finder.trusted_hosts == ['host1', 'host3']
assert session.adapters[prefix3] is insecure_adapter
assert prefix2 not in session.adapters
# Test adding a new host.
finder.add_trusted_host('host2')
assert finder.trusted_hosts == ['host1', 'host3', 'host2']
# Check that prefix3 is still present.
assert session.adapters[prefix3] is insecure_adapter
assert session.adapters[prefix2] is insecure_adapter
# Test that adding the same host doesn't create a duplicate.
finder.add_trusted_host('host3')
assert finder.trusted_hosts == ['host1', 'host3', 'host2'], (
'actual: {}'.format(finder.trusted_hosts)
)
def test_add_trusted_host__logging(self, caplog):
"""
Test logging when add_trusted_host() is called.
"""
trusted_hosts = ['host1']
session = PipSession(insecure_hosts=trusted_hosts)
finder = make_test_finder(
session=session,
trusted_hosts=trusted_hosts,
)
with caplog.at_level(logging.INFO):
# Test adding an existing host.
finder.add_trusted_host('host1', source='somewhere')
finder.add_trusted_host('host2')
# Test calling add_trusted_host() on the same host twice.
finder.add_trusted_host('host2')
actual = [(r.levelname, r.message) for r in caplog.records]
expected = [
('INFO', "adding trusted host: 'host1' (from somewhere)"),
('INFO', "adding trusted host: 'host2'"),
('INFO', "adding trusted host: 'host2'"),
]
assert actual == expected
def test_iter_secure_origins(self):
trusted_hosts = ['host1', 'host2']
finder = make_test_finder(trusted_hosts=trusted_hosts)
actual = list(finder.iter_secure_origins())
assert len(actual) == 8
# Spot-check that SECURE_ORIGINS is included.
assert actual[0] == ('https', '*', '*')
assert actual[-2:] == [
('*', 'host1', '*'),
('*', 'host2', '*'),
]
def test_iter_secure_origins__none_trusted_hosts(self):
"""
Test iter_secure_origins() after passing trusted_hosts=None.
"""
# Use PackageFinder.create() rather than make_test_finder()
# to make sure we're really passing trusted_hosts=None.
search_scope = SearchScope([], [])
selection_prefs = SelectionPreferences(
allow_yanked=True,
)
finder = PackageFinder.create(
search_scope=search_scope,
selection_prefs=selection_prefs,
trusted_hosts=None,
session=object(),
)
actual = list(finder.iter_secure_origins())
assert len(actual) == 6
# Spot-check that SECURE_ORIGINS is included.
assert actual[0] == ('https', '*', '*')
@pytest.mark.parametrize(
'allow_yanked, ignore_requires_python, only_binary, expected_formats',
[
(False, False, {}, frozenset({'binary', 'source'})),
# Test allow_yanked=True.
(True, False, {}, frozenset({'binary', 'source'})),
# Test ignore_requires_python=True.
(False, True, {}, frozenset({'binary', 'source'})),
# Test a non-trivial only_binary.
(False, False, {'twine'}, frozenset({'binary'})),
]
)
def test_make_link_evaluator(
self, allow_yanked, ignore_requires_python, only_binary,
expected_formats,
):
# Create a test TargetPython that we can check for.
target_python = TargetPython(py_version_info=(3, 7))
format_control = FormatControl(set(), only_binary)
finder = PackageFinder(
candidate_evaluator=CandidateEvaluator(),
search_scope=SearchScope([], []),
session=PipSession(),
target_python=target_python,
allow_yanked=allow_yanked,
format_control=format_control,
ignore_requires_python=ignore_requires_python,
)
# Pass a project_name that will be different from canonical_name.
link_evaluator = finder.make_link_evaluator('Twine')
assert link_evaluator.project_name == 'Twine'
assert link_evaluator._canonical_name == 'twine'
assert link_evaluator._allow_yanked == allow_yanked
assert link_evaluator._ignore_requires_python == ignore_requires_python
assert link_evaluator._formats == expected_formats
# Test the _target_python attribute.
actual_target_python = link_evaluator._target_python
# The target_python attribute should be set as is.
assert actual_target_python is target_python
# For good measure, check that the attributes weren't reset.
assert actual_target_python._given_py_version_info == (3, 7)
assert actual_target_python.py_version_info == (3, 7, 0)
def test_sort_locations_file_expand_dir(data):
"""
Test that a file:// dir gets listdir run with expand_dir
"""
finder = make_test_finder(find_links=[data.find_links])
files, urls = finder._sort_locations([data.find_links], expand_dir=True)
assert files and not urls, (
"files and not urls should have been found at find-links url: %s" %
data.find_links
)
def test_sort_locations_file_not_find_link(data):
"""
Test that a file:// url dir that's not a find-link, doesn't get a listdir
run
"""
finder = make_test_finder()
files, urls = finder._sort_locations([data.index_url("empty_with_pkg")])
assert urls and not files, "urls, but not files should have been found"
def test_sort_locations_non_existing_path():
"""
Test that a non-existing path is ignored.
"""
finder = make_test_finder()
files, urls = finder._sort_locations(
[os.path.join('this', 'doesnt', 'exist')])
assert not urls and not files, "nothing should have been found"
@pytest.mark.parametrize(
("html", "url", "expected"),
[
(b"<html></html>", "https://example.com/", "https://example.com/"),
(
b"<html><head>"
b"<base href=\"https://foo.example.com/\">"
b"</head></html>",
"https://example.com/",
"https://foo.example.com/",
),
(
b"<html><head>"
b"<base><base href=\"https://foo.example.com/\">"
b"</head></html>",
"https://example.com/",
"https://foo.example.com/",
),
],
)
def test_determine_base_url(html, url, expected):
document = html5lib.parse(
html, transport_encoding=None, namespaceHTMLElements=False,
)
assert _determine_base_url(document, url) == expected
class MockLogger(object):
def __init__(self):
self.called = False
def warning(self, *args, **kwargs):
self.called = True
@pytest.mark.parametrize(
("location", "trusted", "expected"),
[
("http://pypi.org/something", [], True),
("https://pypi.org/something", [], False),
("git+http://pypi.org/something", [], True),
("git+https://pypi.org/something", [], False),
("git+ssh://git@pypi.org/something", [], False),
("http://localhost", [], False),
("http://127.0.0.1", [], False),
("http://example.com/something/", [], True),
("http://example.com/something/", ["example.com"], False),
("http://eXample.com/something/", ["example.cOm"], False),
],
)
def test_secure_origin(location, trusted, expected):
finder = make_test_finder(trusted_hosts=trusted)
logger = MockLogger()
finder._validate_secure_origin(logger, location)
assert logger.called == expected
@pytest.mark.parametrize(
("fragment", "canonical_name", "expected"),
[
# Trivial.
("pip-18.0", "pip", 3),
("zope-interface-4.5.0", "zope-interface", 14),
# Canonicalized name match non-canonicalized egg info. (pypa/pip#5870)
("Jinja2-2.10", "jinja2", 6),
("zope.interface-4.5.0", "zope-interface", 14),
("zope_interface-4.5.0", "zope-interface", 14),
# Should be smart enough to parse ambiguous names from the provided
# package name.
("foo-2-2", "foo", 3),
("foo-2-2", "foo-2", 5),
# Should be able to detect collapsed characters in the egg info.
("foo--bar-1.0", "foo-bar", 8),
("foo-_bar-1.0", "foo-bar", 8),
# The package name must not ends with a dash (PEP 508), so the first
# dash would be the separator, not the second.
("zope.interface--4.5.0", "zope-interface", 14),
("zope.interface--", "zope-interface", 14),
# The version part is missing, but the split function does not care.
("zope.interface-", "zope-interface", 14),
],
)
def test_find_name_version_sep(fragment, canonical_name, expected):
index = _find_name_version_sep(fragment, canonical_name)
assert index == expected
@pytest.mark.parametrize(
("fragment", "canonical_name"),
[
# A dash must follow the package name.
("zope.interface4.5.0", "zope-interface"),
("zope.interface.4.5.0", "zope-interface"),
("zope.interface.-4.5.0", "zope-interface"),
("zope.interface", "zope-interface"),
],
)
def test_find_name_version_sep_failure(fragment, canonical_name):
with pytest.raises(ValueError) as ctx:
_find_name_version_sep(fragment, canonical_name)
message = "{} does not match {}".format(fragment, canonical_name)
assert str(ctx.value) == message
@pytest.mark.parametrize(
("fragment", "canonical_name", "expected"),
[
# Trivial.
("pip-18.0", "pip", "18.0"),
("zope-interface-4.5.0", "zope-interface", "4.5.0"),
# Canonicalized name match non-canonicalized egg info. (pypa/pip#5870)
("Jinja2-2.10", "jinja2", "2.10"),
("zope.interface-4.5.0", "zope-interface", "4.5.0"),
("zope_interface-4.5.0", "zope-interface", "4.5.0"),
# Should be smart enough to parse ambiguous names from the provided
# package name.
("foo-2-2", "foo", "2-2"),
("foo-2-2", "foo-2", "2"),
("zope.interface--4.5.0", "zope-interface", "-4.5.0"),
("zope.interface--", "zope-interface", "-"),
# Should be able to detect collapsed characters in the egg info.
("foo--bar-1.0", "foo-bar", "1.0"),
("foo-_bar-1.0", "foo-bar", "1.0"),
# Invalid.
("the-package-name-8.19", "does-not-match", None),
("zope.interface.-4.5.0", "zope.interface", None),
("zope.interface-", "zope-interface", None),
("zope.interface4.5.0", "zope-interface", None),
("zope.interface.4.5.0", "zope-interface", None),
("zope.interface.-4.5.0", "zope-interface", None),
("zope.interface", "zope-interface", None),
],
)
def test_extract_version_from_fragment(fragment, canonical_name, expected):
version = _extract_version_from_fragment(fragment, canonical_name)
assert version == expected
def test_request_http_error(caplog):
caplog.set_level(logging.DEBUG)
link = Link('http://localhost')
session = Mock(PipSession)
session.get.return_value = resp = Mock()
resp.raise_for_status.side_effect = requests.HTTPError('Http error')
assert _get_html_page(link, session=session) is None
assert (
'Could not fetch URL http://localhost: Http error - skipping'
in caplog.text
)
def test_request_retries(caplog):
caplog.set_level(logging.DEBUG)
link = Link('http://localhost')
session = Mock(PipSession)
session.get.side_effect = requests.exceptions.RetryError('Retry error')
assert _get_html_page(link, session=session) is None
assert (
'Could not fetch URL http://localhost: Retry error - skipping'
in caplog.text
)
@pytest.mark.parametrize(
("url", "clean_url"),
[
# URL with hostname and port. Port separator should not be quoted.
("https://localhost.localdomain:8181/path/with space/",
"https://localhost.localdomain:8181/path/with%20space/"),
# URL that is already properly quoted. The quoting `%`
# characters should not be quoted again.
("https://localhost.localdomain:8181/path/with%20quoted%20space/",
"https://localhost.localdomain:8181/path/with%20quoted%20space/"),
# URL with IPv4 address and port.
("https://127.0.0.1:8181/path/with space/",
"https://127.0.0.1:8181/path/with%20space/"),
# URL with IPv6 address and port. The `[]` brackets around the
# IPv6 address should not be quoted.
("https://[fd00:0:0:236::100]:8181/path/with space/",
"https://[fd00:0:0:236::100]:8181/path/with%20space/"),
# URL with query. The leading `?` should not be quoted.
("https://localhost.localdomain:8181/path/with/query?request=test",
"https://localhost.localdomain:8181/path/with/query?request=test"),
# URL with colon in the path portion.
("https://localhost.localdomain:8181/path:/with:/colon",
"https://localhost.localdomain:8181/path%3A/with%3A/colon"),
# URL with something that looks like a drive letter, but is
# not. The `:` should be quoted.
("https://localhost.localdomain/T:/path/",
"https://localhost.localdomain/T%3A/path/"),
# VCS URL containing revision string.
("git+ssh://example.com/path to/repo.git@1.0#egg=my-package-1.0",
"git+ssh://example.com/path%20to/repo.git@1.0#egg=my-package-1.0")
]
)
def test_clean_link(url, clean_url):
assert(_clean_link(url) == clean_url)
@pytest.mark.parametrize(
("url", "clean_url"),
[
# URL with Windows drive letter. The `:` after the drive
# letter should not be quoted. The trailing `/` should be
# removed.
("file:///T:/path/with spaces/",
"file:///T:/path/with%20spaces")
]
)
@pytest.mark.skipif("sys.platform != 'win32'")
def test_clean_link_windows(url, clean_url):
assert(_clean_link(url) == clean_url)
@pytest.mark.parametrize(
("url", "clean_url"),
[
# URL with Windows drive letter, running on non-windows
# platform. The `:` after the drive should be quoted.
("file:///T:/path/with spaces/",
"file:///T%3A/path/with%20spaces/")
]
)
@pytest.mark.skipif("sys.platform == 'win32'")
def test_clean_link_non_windows(url, clean_url):
assert(_clean_link(url) == clean_url)
class TestHTMLPage:
@pytest.mark.parametrize(
('anchor_html, expected'),
[
# Test not present.
('<a href="/pkg1-1.0.tar.gz"></a>', None),
# Test present with no value.
('<a href="/pkg2-1.0.tar.gz" data-yanked></a>', ''),
# Test the empty string.
('<a href="/pkg3-1.0.tar.gz" data-yanked=""></a>', ''),
# Test a non-empty string.
('<a href="/pkg4-1.0.tar.gz" data-yanked="error"></a>', 'error'),
# Test a value with an escaped character.
('<a href="/pkg4-1.0.tar.gz" data-yanked="version &lt 1"></a>',
'version < 1'),
# Test a yanked reason with a non-ascii character.
(u'<a href="/pkg-1.0.tar.gz" data-yanked="curlyquote \u2018"></a>',
u'curlyquote \u2018'),
]
)
def test_iter_links__yanked_reason(self, anchor_html, expected):
html = (
# Mark this as a unicode string for Python 2 since anchor_html
# can contain non-ascii.
u'<html><head><meta charset="utf-8"><head>'
'<body>{}</body></html>'
).format(anchor_html)
html_bytes = html.encode('utf-8')
page = HTMLPage(html_bytes, url='https://example.com/simple/')
links = list(page.iter_links())
link, = links
actual = link.yanked_reason
assert actual == expected