pip/tests/unit/test_collector.py

import logging
import os.path
from textwrap import dedent

import mock
import pytest
from mock import Mock, patch
from pip._vendor import html5lib, requests
from pip._vendor.six.moves.urllib import request as urllib_request

from pip._internal.collector import (
    HTMLPage,
    LinkCollector,
    _clean_link,
    _determine_base_url,
    _get_html_page,
    _get_html_response,
    _NotHTML,
    _NotHTTP,
    group_locations,
)
from pip._internal.download import PipSession
from pip._internal.models.index import PyPI
from pip._internal.models.link import Link
from pip._internal.utils.typing import MYPY_CHECK_RUNNING
from tests.lib import make_test_search_scope

if MYPY_CHECK_RUNNING:
    from typing import List, Optional


@pytest.mark.parametrize(
    "url",
    [
        "ftp://python.org/python-3.7.1.zip",
        "file:///opt/data/pip-18.0.tar.gz",
    ],
)
def test_get_html_response_archive_to_naive_scheme(url):
    """
    `_get_html_response()` should error on an archive-like URL if the scheme
    does not allow "poking" without getting data.
    """
    with pytest.raises(_NotHTTP):
        _get_html_response(url, session=mock.Mock(PipSession))


@pytest.mark.parametrize(
    "url, content_type",
    [
        ("http://python.org/python-3.7.1.zip", "application/zip"),
        ("https://pypi.org/pip-18.0.tar.gz", "application/gzip"),
    ],
)
def test_get_html_response_archive_to_http_scheme(url, content_type):
    """
    `_get_html_response()` should send a HEAD request on an archive-like URL
    if the scheme supports it, and raise `_NotHTML` if the response isn't HTML.
    """
    session = mock.Mock(PipSession)
    session.head.return_value = mock.Mock(**{
        "request.method": "HEAD",
        "headers": {"Content-Type": content_type},
    })

    with pytest.raises(_NotHTML) as ctx:
        _get_html_response(url, session=session)

    session.assert_has_calls([
        mock.call.head(url, allow_redirects=True),
    ])
    assert ctx.value.args == (content_type, "HEAD")


@pytest.mark.parametrize(
    "url",
    [
        "http://python.org/python-3.7.1.zip",
        "https://pypi.org/pip-18.0.tar.gz",
    ],
)
def test_get_html_response_archive_to_http_scheme_is_html(url):
    """
    `_get_html_response()` should work with archive-like URLs if the HEAD
    request is responded with text/html.
    """
    session = mock.Mock(PipSession)
    session.head.return_value = mock.Mock(**{
        "request.method": "HEAD",
        "headers": {"Content-Type": "text/html"},
    })
    session.get.return_value = mock.Mock(headers={"Content-Type": "text/html"})

    resp = _get_html_response(url, session=session)

    assert resp is not None
    assert session.mock_calls == [
        mock.call.head(url, allow_redirects=True),
        mock.call.head().raise_for_status(),
        mock.call.get(url, headers={
            "Accept": "text/html", "Cache-Control": "max-age=0",
        }),
        mock.call.get().raise_for_status(),
    ]


@pytest.mark.parametrize(
    "url",
    [
        "https://pypi.org/simple/pip",
        "https://pypi.org/simple/pip/",
        "https://python.org/sitemap.xml",
    ],
)
def test_get_html_response_no_head(url):
    """
    `_get_html_response()` shouldn't send a HEAD request if the URL does not
    look like an archive, only the GET request that retrieves data.
    """
    session = mock.Mock(PipSession)

    # Mock the headers dict to ensure it is accessed.
    session.get.return_value = mock.Mock(headers=mock.Mock(**{
        "get.return_value": "text/html",
    }))

    resp = _get_html_response(url, session=session)

    assert resp is not None
    assert session.head.call_count == 0
    assert session.get.mock_calls == [
        mock.call(url, headers={
            "Accept": "text/html", "Cache-Control": "max-age=0",
        }),
        mock.call().raise_for_status(),
        mock.call().headers.get("Content-Type", ""),
    ]


def test_get_html_response_dont_log_clear_text_password(caplog):
    """
    `_get_html_response()` should redact the password from the index URL
    in its DEBUG log message.
    """
    session = mock.Mock(PipSession)

    # Mock the headers dict to ensure it is accessed.
    session.get.return_value = mock.Mock(headers=mock.Mock(**{
        "get.return_value": "text/html",
    }))

    caplog.set_level(logging.DEBUG)

    resp = _get_html_response(
        "https://user:my_password@example.com/simple/", session=session
    )

    assert resp is not None

    assert len(caplog.records) == 1
    record = caplog.records[0]
    assert record.levelname == 'DEBUG'
    assert record.message.splitlines() == [
        "Getting page https://user:****@example.com/simple/",
    ]


@pytest.mark.parametrize(
    ("html", "url", "expected"),
    [
        (b"<html></html>", "https://example.com/", "https://example.com/"),
        (
            b"<html><head>"
            b"<base href=\"https://foo.example.com/\">"
            b"</head></html>",
            "https://example.com/",
            "https://foo.example.com/",
        ),
        (
            b"<html><head>"
            b"<base><base href=\"https://foo.example.com/\">"
            b"</head></html>",
            "https://example.com/",
            "https://foo.example.com/",
        ),
    ],
)
def test_determine_base_url(html, url, expected):
    document = html5lib.parse(
        html, transport_encoding=None, namespaceHTMLElements=False,
    )
    assert _determine_base_url(document, url) == expected


@pytest.mark.parametrize(
    ("url", "clean_url"),
    [
        # URL with hostname and port. Port separator should not be quoted.
        ("https://localhost.localdomain:8181/path/with space/",
         "https://localhost.localdomain:8181/path/with%20space/"),
        # URL that is already properly quoted. The quoting `%`
        # characters should not be quoted again.
        ("https://localhost.localdomain:8181/path/with%20quoted%20space/",
         "https://localhost.localdomain:8181/path/with%20quoted%20space/"),
        # URL with IPv4 address and port.
        ("https://127.0.0.1:8181/path/with space/",
         "https://127.0.0.1:8181/path/with%20space/"),
        # URL with IPv6 address and port. The `[]` brackets around the
        # IPv6 address should not be quoted.
        ("https://[fd00:0:0:236::100]:8181/path/with space/",
         "https://[fd00:0:0:236::100]:8181/path/with%20space/"),
        # URL with query. The leading `?` should not be quoted.
        ("https://localhost.localdomain:8181/path/with/query?request=test",
         "https://localhost.localdomain:8181/path/with/query?request=test"),
        # URL with colon in the path portion.
        ("https://localhost.localdomain:8181/path:/with:/colon",
         "https://localhost.localdomain:8181/path%3A/with%3A/colon"),
        # URL with something that looks like a drive letter, but is
        # not. The `:` should be quoted.
        ("https://localhost.localdomain/T:/path/",
         "https://localhost.localdomain/T%3A/path/"),
        # VCS URL containing revision string.
        ("git+ssh://example.com/path to/repo.git@1.0#egg=my-package-1.0",
         "git+ssh://example.com/path%20to/repo.git@1.0#egg=my-package-1.0"),
        # URL with Windows drive letter. The `:` after the drive
        # letter should not be quoted. The trailing `/` should be
        # removed.
        pytest.param(
            "file:///T:/path/with spaces/",
            "file:///T:/path/with%20spaces",
            marks=pytest.mark.skipif("sys.platform != 'win32'"),
        ),
        # URL with Windows drive letter, running on non-windows
        # platform. The `:` after the drive should be quoted.
        pytest.param(
            "file:///T:/path/with spaces/",
            "file:///T%3A/path/with%20spaces/",
            marks=pytest.mark.skipif("sys.platform == 'win32'"),
        ),
    ]
)
def test_clean_link(url, clean_url):
    assert(_clean_link(url) == clean_url)


class TestHTMLPage:

    @pytest.mark.parametrize(
        ('anchor_html, expected'),
        [
            # Test not present.
            ('<a href="/pkg1-1.0.tar.gz"></a>', None),
            # Test present with no value.
            ('<a href="/pkg2-1.0.tar.gz" data-yanked></a>', ''),
            # Test the empty string.
            ('<a href="/pkg3-1.0.tar.gz" data-yanked=""></a>', ''),
            # Test a non-empty string.
            ('<a href="/pkg4-1.0.tar.gz" data-yanked="error"></a>', 'error'),
            # Test a value with an escaped character.
            ('<a href="/pkg4-1.0.tar.gz" data-yanked="version &lt 1"></a>',
                'version < 1'),
            # Test a yanked reason with a non-ascii character.
            (u'<a href="/pkg-1.0.tar.gz" data-yanked="curlyquote \u2018"></a>',
                u'curlyquote \u2018'),
        ]
    )
    def test_iter_links__yanked_reason(self, anchor_html, expected):
        html = (
            # Mark this as a unicode string for Python 2 since anchor_html
            # can contain non-ascii.
            u'<html><head><meta charset="utf-8"><head>'
            '<body>{}</body></html>'
        ).format(anchor_html)
        html_bytes = html.encode('utf-8')
        page = HTMLPage(html_bytes, url='https://example.com/simple/')
        links = list(page.iter_links())
        link, = links
        actual = link.yanked_reason
        assert actual == expected


def test_request_http_error(caplog):
    caplog.set_level(logging.DEBUG)
    link = Link('http://localhost')
    session = Mock(PipSession)
    session.get.return_value = resp = Mock()
    resp.raise_for_status.side_effect = requests.HTTPError('Http error')
    assert _get_html_page(link, session=session) is None
    assert (
        'Could not fetch URL http://localhost: Http error - skipping'
        in caplog.text
    )


def test_request_retries(caplog):
    caplog.set_level(logging.DEBUG)
    link = Link('http://localhost')
    session = Mock(PipSession)
    session.get.side_effect = requests.exceptions.RetryError('Retry error')
    assert _get_html_page(link, session=session) is None
    assert (
        'Could not fetch URL http://localhost: Retry error - skipping'
        in caplog.text
    )


@pytest.mark.parametrize(
    "url, vcs_scheme",
    [
        ("svn+http://pypi.org/something", "svn"),
        ("git+https://github.com/pypa/pip.git", "git"),
    ],
)
def test_get_html_page_invalid_scheme(caplog, url, vcs_scheme):
    """`_get_html_page()` should error if an invalid scheme is given.

    Only file:, http:, https:, and ftp: are allowed.
    """
    with caplog.at_level(logging.DEBUG):
        page = _get_html_page(Link(url), session=mock.Mock(PipSession))

    assert page is None
    assert caplog.record_tuples == [
        (
            "pip._internal.collector",
            logging.DEBUG,
            "Cannot look at {} URL {}".format(vcs_scheme, url),
        ),
    ]


def test_get_html_page_directory_append_index(tmpdir):
    """`_get_html_page()` should append "index.html" to a directory URL.
    """
    dirpath = tmpdir.mkdir("something")
    dir_url = "file:///{}".format(
        urllib_request.pathname2url(dirpath).lstrip("/"),
    )

    session = mock.Mock(PipSession)
    with mock.patch("pip._internal.collector._get_html_response") as mock_func:
        _get_html_page(Link(dir_url), session=session)
        assert mock_func.mock_calls == [
            mock.call(
                "{}/index.html".format(dir_url.rstrip("/")),
                session=session,
            ),
        ]


def test_group_locations__file_expand_dir(data):
    """
    Test that a file:// dir gets listdir run with expand_dir
    """
    files, urls = group_locations([data.find_links], expand_dir=True)
    assert files and not urls, (
        "files and not urls should have been found at find-links url: %s" %
        data.find_links
    )


def test_group_locations__file_not_find_link(data):
    """
    Test that a file:// url dir that's not a find-link, doesn't get a listdir
    run
    """
    files, urls = group_locations([data.index_url("empty_with_pkg")])
    assert urls and not files, "urls, but not files should have been found"


def test_group_locations__non_existing_path():
    """
    Test that a non-existing path is ignored.
    """
    files, urls = group_locations([os.path.join('this', 'doesnt', 'exist')])
    assert not urls and not files, "nothing should have been found"


def make_fake_html_page(url):
    html = dedent(u"""\
    <html><head><meta name="api-version" value="2" /></head>
    <body>
    <a href="/abc-1.0.tar.gz#md5=000000000">abc-1.0.tar.gz</a>
    </body></html>
    """)
    content = html.encode('utf-8')
    headers = {}
    return HTMLPage(content, url=url, headers=headers)


def make_test_link_collector(
    find_links=None,  # type: Optional[List[str]]
):
    # type: (...) -> LinkCollector
    """
    Create a LinkCollector object for testing purposes.
    """
    session = PipSession()
    search_scope = make_test_search_scope(
        find_links=find_links,
        index_urls=[PyPI.simple_url],
    )

    return LinkCollector(
        session=session,
        search_scope=search_scope,
    )


def check_links_include(links, names):
    """
    Assert that the given list of Link objects includes, for each of the
    given names, a link whose URL has a base name matching that name.
    """
    for name in names:
        assert any(link.url.endswith(name) for link in links), (
            'name {!r} not among links: {}'.format(name, links)
        )


class TestLinkCollector(object):

    @patch('pip._internal.collector._get_html_response')
    def test_collect_links(self, mock_get_html_response, data):
        expected_url = 'https://pypi.org/simple/twine/'

        fake_page = make_fake_html_page(expected_url)
        mock_get_html_response.return_value = fake_page

        link_collector = make_test_link_collector(
            find_links=[data.find_links]
        )
        actual = link_collector.collect_links('twine')

        mock_get_html_response.assert_called_once_with(
            expected_url, session=link_collector.session,
        )

        # Spot-check the CollectedLinks return value.
        assert len(actual.files) > 20
        check_links_include(actual.files, names=['simple-1.0.tar.gz'])

        assert len(actual.find_links) == 1
        check_links_include(actual.find_links, names=['packages'])

        actual_pages = actual.pages
        assert list(actual_pages) == [expected_url]
        actual_page_links = actual_pages[expected_url]
        assert len(actual_page_links) == 1
        assert actual_page_links[0].url == (
            'https://pypi.org/abc-1.0.tar.gz#md5=000000000'
        )
Move LinkCollector to a new collector.py module. 2019-09-14 02:51:02 +02:00			`import logging`
			`import os.path`
			`from textwrap import dedent`

			`import mock`
			`import pytest`
			`from mock import Mock, patch`
			`from pip._vendor import html5lib, requests`
			`from pip._vendor.six.moves.urllib import request as urllib_request`

			`from pip._internal.collector import (`
			`HTMLPage,`
			`LinkCollector,`
			`_clean_link,`
			`_determine_base_url,`
			`_get_html_page,`
			`_get_html_response,`
			`_NotHTML,`
			`_NotHTTP,`
			`group_locations,`
			`)`
			`from pip._internal.download import PipSession`
			`from pip._internal.models.index import PyPI`
			`from pip._internal.models.link import Link`
			`from pip._internal.utils.typing import MYPY_CHECK_RUNNING`
			`from tests.lib import make_test_search_scope`

			`if MYPY_CHECK_RUNNING:`
			`from typing import List, Optional`


			`@pytest.mark.parametrize(`
			`"url",`
			`[`
			`"ftp://python.org/python-3.7.1.zip",`
			`"file:///opt/data/pip-18.0.tar.gz",`
			`],`
			`)`
			`def test_get_html_response_archive_to_naive_scheme(url):`
			`"""`
			`_get_html_response()` should error on an archive-like URL if the scheme
			`does not allow "poking" without getting data.`
			`"""`
			`with pytest.raises(_NotHTTP):`
			`_get_html_response(url, session=mock.Mock(PipSession))`


			`@pytest.mark.parametrize(`
			`"url, content_type",`
			`[`
			`("http://python.org/python-3.7.1.zip", "application/zip"),`
			`("https://pypi.org/pip-18.0.tar.gz", "application/gzip"),`
			`],`
			`)`
			`def test_get_html_response_archive_to_http_scheme(url, content_type):`
			`"""`
			`_get_html_response()` should send a HEAD request on an archive-like URL
			if the scheme supports it, and raise `_NotHTML` if the response isn't HTML.
			`"""`
			`session = mock.Mock(PipSession)`
			`session.head.return_value = mock.Mock(**{`
			`"request.method": "HEAD",`
			`"headers": {"Content-Type": content_type},`
			`})`

			`with pytest.raises(_NotHTML) as ctx:`
			`_get_html_response(url, session=session)`

			`session.assert_has_calls([`
			`mock.call.head(url, allow_redirects=True),`
			`])`
			`assert ctx.value.args == (content_type, "HEAD")`


			`@pytest.mark.parametrize(`
			`"url",`
			`[`
			`"http://python.org/python-3.7.1.zip",`
			`"https://pypi.org/pip-18.0.tar.gz",`
			`],`
			`)`
			`def test_get_html_response_archive_to_http_scheme_is_html(url):`
			`"""`
			`_get_html_response()` should work with archive-like URLs if the HEAD
			`request is responded with text/html.`
			`"""`
			`session = mock.Mock(PipSession)`
			`session.head.return_value = mock.Mock(**{`
			`"request.method": "HEAD",`
			`"headers": {"Content-Type": "text/html"},`
			`})`
			`session.get.return_value = mock.Mock(headers={"Content-Type": "text/html"})`

			`resp = _get_html_response(url, session=session)`

			`assert resp is not None`
			`assert session.mock_calls == [`
			`mock.call.head(url, allow_redirects=True),`
			`mock.call.head().raise_for_status(),`
			`mock.call.get(url, headers={`
			`"Accept": "text/html", "Cache-Control": "max-age=0",`
			`}),`
			`mock.call.get().raise_for_status(),`
			`]`


			`@pytest.mark.parametrize(`
			`"url",`
			`[`
			`"https://pypi.org/simple/pip",`
			`"https://pypi.org/simple/pip/",`
			`"https://python.org/sitemap.xml",`
			`],`
			`)`
			`def test_get_html_response_no_head(url):`
			`"""`
			`_get_html_response()` shouldn't send a HEAD request if the URL does not
			`look like an archive, only the GET request that retrieves data.`
			`"""`
			`session = mock.Mock(PipSession)`

			`# Mock the headers dict to ensure it is accessed.`
			`session.get.return_value = mock.Mock(headers=mock.Mock(**{`
			`"get.return_value": "text/html",`
			`}))`

			`resp = _get_html_response(url, session=session)`

			`assert resp is not None`
			`assert session.head.call_count == 0`
			`assert session.get.mock_calls == [`
			`mock.call(url, headers={`
			`"Accept": "text/html", "Cache-Control": "max-age=0",`
			`}),`
			`mock.call().raise_for_status(),`
			`mock.call().headers.get("Content-Type", ""),`
			`]`


			`def test_get_html_response_dont_log_clear_text_password(caplog):`
			`"""`
			`_get_html_response()` should redact the password from the index URL
			`in its DEBUG log message.`
			`"""`
			`session = mock.Mock(PipSession)`

			`# Mock the headers dict to ensure it is accessed.`
			`session.get.return_value = mock.Mock(headers=mock.Mock(**{`
			`"get.return_value": "text/html",`
			`}))`

			`caplog.set_level(logging.DEBUG)`

			`resp = _get_html_response(`
			`"https://user:my_password@example.com/simple/", session=session`
			`)`

			`assert resp is not None`

			`assert len(caplog.records) == 1`
			`record = caplog.records[0]`
			`assert record.levelname == 'DEBUG'`
			`assert record.message.splitlines() == [`
			`"Getting page https://user:****@example.com/simple/",`
			`]`


			`@pytest.mark.parametrize(`
			`("html", "url", "expected"),`
			`[`
			`(b"<html></html>", "https://example.com/", "https://example.com/"),`
			`(`
			`b"<html><head>"`
			`b"<base href=\"https://foo.example.com/\">"`
			`b"</head></html>",`
			`"https://example.com/",`
			`"https://foo.example.com/",`
			`),`
			`(`
			`b"<html><head>"`
			`b"<base><base href=\"https://foo.example.com/\">"`
			`b"</head></html>",`
			`"https://example.com/",`
			`"https://foo.example.com/",`
			`),`
			`],`
			`)`
			`def test_determine_base_url(html, url, expected):`
			`document = html5lib.parse(`
			`html, transport_encoding=None, namespaceHTMLElements=False,`
			`)`
			`assert _determine_base_url(document, url) == expected`


			`@pytest.mark.parametrize(`
			`("url", "clean_url"),`
			`[`
			`# URL with hostname and port. Port separator should not be quoted.`
			`("https://localhost.localdomain:8181/path/with space/",`
			`"https://localhost.localdomain:8181/path/with%20space/"),`
			# URL that is already properly quoted. The quoting `%`
			`# characters should not be quoted again.`
			`("https://localhost.localdomain:8181/path/with%20quoted%20space/",`
			`"https://localhost.localdomain:8181/path/with%20quoted%20space/"),`
			`# URL with IPv4 address and port.`
			`("https://127.0.0.1:8181/path/with space/",`
			`"https://127.0.0.1:8181/path/with%20space/"),`
			# URL with IPv6 address and port. The `[]` brackets around the
			`# IPv6 address should not be quoted.`
			`("https://[fd00:0:0:236::100]:8181/path/with space/",`
			`"https://[fd00:0:0:236::100]:8181/path/with%20space/"),`
			# URL with query. The leading `?` should not be quoted.
			`("https://localhost.localdomain:8181/path/with/query?request=test",`
			`"https://localhost.localdomain:8181/path/with/query?request=test"),`
			`# URL with colon in the path portion.`
			`("https://localhost.localdomain:8181/path:/with:/colon",`
			`"https://localhost.localdomain:8181/path%3A/with%3A/colon"),`
			`# URL with something that looks like a drive letter, but is`
			# not. The `:` should be quoted.
			`("https://localhost.localdomain/T:/path/",`
			`"https://localhost.localdomain/T%3A/path/"),`
			`# VCS URL containing revision string.`
			`("git+ssh://example.com/path to/repo.git@1.0#egg=my-package-1.0",`
			`"git+ssh://example.com/path%20to/repo.git@1.0#egg=my-package-1.0"),`
			# URL with Windows drive letter. The `:` after the drive
			# letter should not be quoted. The trailing `/` should be
			`# removed.`
			`pytest.param(`
			`"file:///T:/path/with spaces/",`
			`"file:///T:/path/with%20spaces",`
			`marks=pytest.mark.skipif("sys.platform != 'win32'"),`
			`),`
			`# URL with Windows drive letter, running on non-windows`
			# platform. The `:` after the drive should be quoted.
			`pytest.param(`
			`"file:///T:/path/with spaces/",`
			`"file:///T%3A/path/with%20spaces/",`
			`marks=pytest.mark.skipif("sys.platform == 'win32'"),`
			`),`
			`]`
			`)`
			`def test_clean_link(url, clean_url):`
			`assert(_clean_link(url) == clean_url)`


			`class TestHTMLPage:`

			`@pytest.mark.parametrize(`
			`('anchor_html, expected'),`
			`[`
			`# Test not present.`
			`('<a href="/pkg1-1.0.tar.gz"></a>', None),`
			`# Test present with no value.`
			`('<a href="/pkg2-1.0.tar.gz" data-yanked></a>', ''),`
			`# Test the empty string.`
			`('<a href="/pkg3-1.0.tar.gz" data-yanked=""></a>', ''),`
			`# Test a non-empty string.`
			`('<a href="/pkg4-1.0.tar.gz" data-yanked="error"></a>', 'error'),`
			`# Test a value with an escaped character.`
			`('<a href="/pkg4-1.0.tar.gz" data-yanked="version &lt 1"></a>',`
			`'version < 1'),`
			`# Test a yanked reason with a non-ascii character.`
			`(u'<a href="/pkg-1.0.tar.gz" data-yanked="curlyquote \u2018"></a>',`
			`u'curlyquote \u2018'),`
			`]`
			`)`
			`def test_iter_links__yanked_reason(self, anchor_html, expected):`
			`html = (`
			`# Mark this as a unicode string for Python 2 since anchor_html`
			`# can contain non-ascii.`
			`u'<html><head><meta charset="utf-8"><head>'`
			`'<body>{}</body></html>'`
			`).format(anchor_html)`
			`html_bytes = html.encode('utf-8')`
			`page = HTMLPage(html_bytes, url='https://example.com/simple/')`
			`links = list(page.iter_links())`
			`link, = links`
			`actual = link.yanked_reason`
			`assert actual == expected`


			`def test_request_http_error(caplog):`
			`caplog.set_level(logging.DEBUG)`
			`link = Link('http://localhost')`
			`session = Mock(PipSession)`
			`session.get.return_value = resp = Mock()`
			`resp.raise_for_status.side_effect = requests.HTTPError('Http error')`
			`assert _get_html_page(link, session=session) is None`
			`assert (`
			`'Could not fetch URL http://localhost: Http error - skipping'`
			`in caplog.text`
			`)`


			`def test_request_retries(caplog):`
			`caplog.set_level(logging.DEBUG)`
			`link = Link('http://localhost')`
			`session = Mock(PipSession)`
			`session.get.side_effect = requests.exceptions.RetryError('Retry error')`
			`assert _get_html_page(link, session=session) is None`
			`assert (`
			`'Could not fetch URL http://localhost: Retry error - skipping'`
			`in caplog.text`
			`)`


			`@pytest.mark.parametrize(`
			`"url, vcs_scheme",`
			`[`
			`("svn+http://pypi.org/something", "svn"),`
			`("git+https://github.com/pypa/pip.git", "git"),`
			`],`
			`)`
			`def test_get_html_page_invalid_scheme(caplog, url, vcs_scheme):`
			"""`_get_html_page()` should error if an invalid scheme is given.

			`Only file:, http:, https:, and ftp: are allowed.`
			`"""`
			`with caplog.at_level(logging.DEBUG):`
			`page = _get_html_page(Link(url), session=mock.Mock(PipSession))`

			`assert page is None`
			`assert caplog.record_tuples == [`
			`(`
			`"pip._internal.collector",`
			`logging.DEBUG,`
			`"Cannot look at {} URL {}".format(vcs_scheme, url),`
			`),`
			`]`


			`def test_get_html_page_directory_append_index(tmpdir):`
			"""`_get_html_page()` should append "index.html" to a directory URL.
			`"""`
			`dirpath = tmpdir.mkdir("something")`
			`dir_url = "file:///{}".format(`
			`urllib_request.pathname2url(dirpath).lstrip("/"),`
			`)`

			`session = mock.Mock(PipSession)`
			`with mock.patch("pip._internal.collector._get_html_response") as mock_func:`
			`_get_html_page(Link(dir_url), session=session)`
			`assert mock_func.mock_calls == [`
			`mock.call(`
			`"{}/index.html".format(dir_url.rstrip("/")),`
			`session=session,`
			`),`
			`]`


			`def test_group_locations__file_expand_dir(data):`
			`"""`
			`Test that a file:// dir gets listdir run with expand_dir`
			`"""`
			`files, urls = group_locations([data.find_links], expand_dir=True)`
			`assert files and not urls, (`
			`"files and not urls should have been found at find-links url: %s" %`
			`data.find_links`
			`)`


			`def test_group_locations__file_not_find_link(data):`
			`"""`
			`Test that a file:// url dir that's not a find-link, doesn't get a listdir`
			`run`
			`"""`
			`files, urls = group_locations([data.index_url("empty_with_pkg")])`
			`assert urls and not files, "urls, but not files should have been found"`


			`def test_group_locations__non_existing_path():`
			`"""`
			`Test that a non-existing path is ignored.`
			`"""`
			`files, urls = group_locations([os.path.join('this', 'doesnt', 'exist')])`
			`assert not urls and not files, "nothing should have been found"`


			`def make_fake_html_page(url):`
			`html = dedent(u"""\`
			`<html><head><meta name="api-version" value="2" /></head>`
			`<body>`
			`<a href="/abc-1.0.tar.gz#md5=000000000">abc-1.0.tar.gz</a>`
			`</body></html>`
			`""")`
			`content = html.encode('utf-8')`
			`headers = {}`
			`return HTMLPage(content, url=url, headers=headers)`


			`def make_test_link_collector(`
			`find_links=None, # type: Optional[List[str]]`
			`):`
			`# type: (...) -> LinkCollector`
			`"""`
			`Create a LinkCollector object for testing purposes.`
			`"""`
			`session = PipSession()`
			`search_scope = make_test_search_scope(`
			`find_links=find_links,`
			`index_urls=[PyPI.simple_url],`
			`)`

			`return LinkCollector(`
			`session=session,`
			`search_scope=search_scope,`
			`)`


			`def check_links_include(links, names):`
			`"""`
			`Assert that the given list of Link objects includes, for each of the`
			`given names, a link whose URL has a base name matching that name.`
			`"""`
			`for name in names:`
			`assert any(link.url.endswith(name) for link in links), (`
			`'name {!r} not among links: {}'.format(name, links)`
			`)`


			`class TestLinkCollector(object):`

			`@patch('pip._internal.collector._get_html_response')`
			`def test_collect_links(self, mock_get_html_response, data):`
			`expected_url = 'https://pypi.org/simple/twine/'`

			`fake_page = make_fake_html_page(expected_url)`
			`mock_get_html_response.return_value = fake_page`

			`link_collector = make_test_link_collector(`
			`find_links=[data.find_links]`
			`)`
			`actual = link_collector.collect_links('twine')`

			`mock_get_html_response.assert_called_once_with(`
			`expected_url, session=link_collector.session,`
			`)`

			`# Spot-check the CollectedLinks return value.`
			`assert len(actual.files) > 20`
			`check_links_include(actual.files, names=['simple-1.0.tar.gz'])`

			`assert len(actual.find_links) == 1`
			`check_links_include(actual.find_links, names=['packages'])`

			`actual_pages = actual.pages`
			`assert list(actual_pages) == [expected_url]`
			`actual_page_links = actual_pages[expected_url]`
			`assert len(actual_page_links) == 1`
			`assert actual_page_links[0].url == (`
			`'https://pypi.org/abc-1.0.tar.gz#md5=000000000'`
			`)`