2021-03-23 15:34:20 +01:00
|
|
|
import itertools
|
2022-07-15 23:55:15 +02:00
|
|
|
import json
|
2019-09-14 02:51:02 +02:00
|
|
|
import logging
|
2022-06-07 11:52:38 +02:00
|
|
|
import os
|
2020-02-13 04:23:46 +01:00
|
|
|
import re
|
|
|
|
import uuid
|
2022-06-07 11:52:38 +02:00
|
|
|
from pathlib import Path
|
2019-09-14 02:51:02 +02:00
|
|
|
from textwrap import dedent
|
2023-01-04 11:18:07 +01:00
|
|
|
from typing import Dict, List, Optional, Tuple
|
2021-02-10 11:28:55 +01:00
|
|
|
from unittest import mock
|
2021-02-10 11:38:21 +01:00
|
|
|
|
2019-09-14 02:51:02 +02:00
|
|
|
import pytest
|
2022-07-14 18:15:07 +02:00
|
|
|
from pip._vendor import requests
|
2022-09-10 12:28:57 +02:00
|
|
|
from pip._vendor.packaging.requirements import Requirement
|
2019-09-14 02:51:02 +02:00
|
|
|
|
2020-05-03 18:48:24 +02:00
|
|
|
from pip._internal.exceptions import NetworkConnectionError
|
2019-10-06 20:11:01 +02:00
|
|
|
from pip._internal.index.collector import (
|
2022-06-25 20:23:00 +02:00
|
|
|
IndexContent,
|
2020-06-26 11:20:13 +02:00
|
|
|
LinkCollector,
|
2022-06-25 20:45:45 +02:00
|
|
|
_get_index_content,
|
|
|
|
_get_simple_response,
|
|
|
|
_make_index_content,
|
|
|
|
_NotAPIContent,
|
2019-09-14 02:51:02 +02:00
|
|
|
_NotHTTP,
|
2019-09-21 10:28:13 +02:00
|
|
|
parse_links,
|
2019-09-14 02:51:02 +02:00
|
|
|
)
|
2021-03-23 15:34:20 +01:00
|
|
|
from pip._internal.index.sources import _FlatDirectorySource, _IndexDirectorySource
|
2021-08-30 00:43:28 +02:00
|
|
|
from pip._internal.models.candidate import InstallationCandidate
|
2019-09-14 02:51:02 +02:00
|
|
|
from pip._internal.models.index import PyPI
|
2022-09-10 12:28:57 +02:00
|
|
|
from pip._internal.models.link import (
|
|
|
|
Link,
|
|
|
|
LinkHash,
|
|
|
|
_clean_url_path,
|
|
|
|
_ensure_quoted_url,
|
|
|
|
)
|
2019-09-27 00:39:53 +02:00
|
|
|
from pip._internal.network.session import PipSession
|
2021-08-30 00:43:28 +02:00
|
|
|
from tests.lib import TestData, make_test_link_collector
|
2019-09-14 02:51:02 +02:00
|
|
|
|
2022-06-25 20:54:02 +02:00
|
|
|
ACCEPT = ", ".join(
|
|
|
|
[
|
|
|
|
"application/vnd.pypi.simple.v1+json",
|
|
|
|
"application/vnd.pypi.simple.v1+html; q=0.1",
|
|
|
|
"text/html; q=0.01",
|
|
|
|
]
|
|
|
|
)
|
|
|
|
|
|
|
|
|
2019-09-14 02:51:02 +02:00
|
|
|
@pytest.mark.parametrize(
|
|
|
|
"url",
|
|
|
|
[
|
|
|
|
"ftp://python.org/python-3.7.1.zip",
|
|
|
|
"file:///opt/data/pip-18.0.tar.gz",
|
|
|
|
],
|
|
|
|
)
|
2022-06-25 20:45:45 +02:00
|
|
|
def test_get_simple_response_archive_to_naive_scheme(url: str) -> None:
|
2019-09-14 02:51:02 +02:00
|
|
|
"""
|
2022-06-25 20:45:45 +02:00
|
|
|
`_get_simple_response()` should error on an archive-like URL if the scheme
|
2019-09-14 02:51:02 +02:00
|
|
|
does not allow "poking" without getting data.
|
|
|
|
"""
|
|
|
|
with pytest.raises(_NotHTTP):
|
2022-06-25 20:45:45 +02:00
|
|
|
_get_simple_response(url, session=mock.Mock(PipSession))
|
2019-09-14 02:51:02 +02:00
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
|
|
"url, content_type",
|
|
|
|
[
|
|
|
|
("http://python.org/python-3.7.1.zip", "application/zip"),
|
|
|
|
("https://pypi.org/pip-18.0.tar.gz", "application/gzip"),
|
|
|
|
],
|
|
|
|
)
|
2020-05-03 18:48:24 +02:00
|
|
|
@mock.patch("pip._internal.index.collector.raise_for_status")
|
2022-06-25 20:45:45 +02:00
|
|
|
def test_get_simple_response_archive_to_http_scheme(
|
2021-08-30 00:43:28 +02:00
|
|
|
mock_raise_for_status: mock.Mock, url: str, content_type: str
|
|
|
|
) -> None:
|
2019-09-14 02:51:02 +02:00
|
|
|
"""
|
2022-06-25 20:45:45 +02:00
|
|
|
`_get_simple_response()` should send a HEAD request on an archive-like URL
|
|
|
|
if the scheme supports it, and raise `_NotAPIContent` if the response isn't HTML.
|
2019-09-14 02:51:02 +02:00
|
|
|
"""
|
|
|
|
session = mock.Mock(PipSession)
|
2021-08-13 15:23:45 +02:00
|
|
|
session.head.return_value = mock.Mock(
|
|
|
|
**{
|
|
|
|
"request.method": "HEAD",
|
|
|
|
"headers": {"Content-Type": content_type},
|
|
|
|
}
|
|
|
|
)
|
2019-09-14 02:51:02 +02:00
|
|
|
|
2022-06-25 20:45:45 +02:00
|
|
|
with pytest.raises(_NotAPIContent) as ctx:
|
|
|
|
_get_simple_response(url, session=session)
|
2019-09-14 02:51:02 +02:00
|
|
|
|
2021-08-13 15:23:45 +02:00
|
|
|
session.assert_has_calls(
|
|
|
|
[
|
|
|
|
mock.call.head(url, allow_redirects=True),
|
|
|
|
]
|
|
|
|
)
|
2020-05-03 18:48:24 +02:00
|
|
|
mock_raise_for_status.assert_called_once_with(session.head.return_value)
|
2019-09-14 02:51:02 +02:00
|
|
|
assert ctx.value.args == (content_type, "HEAD")
|
|
|
|
|
|
|
|
|
2020-05-23 20:47:04 +02:00
|
|
|
@pytest.mark.parametrize(
|
|
|
|
"url",
|
|
|
|
[
|
|
|
|
("ftp://python.org/python-3.7.1.zip"),
|
|
|
|
("file:///opt/data/pip-18.0.tar.gz"),
|
|
|
|
],
|
|
|
|
)
|
2022-06-25 20:45:45 +02:00
|
|
|
def test_get_index_content_invalid_content_type_archive(
|
2021-08-30 00:43:28 +02:00
|
|
|
caplog: pytest.LogCaptureFixture, url: str
|
|
|
|
) -> None:
|
2022-06-25 20:45:45 +02:00
|
|
|
"""`_get_index_content()` should warn if an archive URL is not HTML
|
2020-05-23 20:47:04 +02:00
|
|
|
and therefore cannot be used for a HEAD request.
|
|
|
|
"""
|
|
|
|
caplog.set_level(logging.WARNING)
|
|
|
|
link = Link(url)
|
|
|
|
|
|
|
|
session = mock.Mock(PipSession)
|
|
|
|
|
2022-06-25 20:45:45 +02:00
|
|
|
assert _get_index_content(link, session=session) is None
|
2021-08-13 15:23:45 +02:00
|
|
|
assert (
|
|
|
|
"pip._internal.index.collector",
|
|
|
|
logging.WARNING,
|
|
|
|
"Skipping page {} because it looks like an archive, and cannot "
|
|
|
|
"be checked by a HTTP HEAD request.".format(url),
|
|
|
|
) in caplog.record_tuples
|
2020-05-23 20:47:04 +02:00
|
|
|
|
|
|
|
|
2019-09-14 02:51:02 +02:00
|
|
|
@pytest.mark.parametrize(
|
|
|
|
"url",
|
|
|
|
[
|
|
|
|
"http://python.org/python-3.7.1.zip",
|
|
|
|
"https://pypi.org/pip-18.0.tar.gz",
|
|
|
|
],
|
|
|
|
)
|
2020-05-03 18:48:24 +02:00
|
|
|
@mock.patch("pip._internal.index.collector.raise_for_status")
|
2022-06-25 20:45:45 +02:00
|
|
|
def test_get_simple_response_archive_to_http_scheme_is_html(
|
2021-08-30 00:43:28 +02:00
|
|
|
mock_raise_for_status: mock.Mock, url: str
|
|
|
|
) -> None:
|
2019-09-14 02:51:02 +02:00
|
|
|
"""
|
2022-06-25 20:45:45 +02:00
|
|
|
`_get_simple_response()` should work with archive-like URLs if the HEAD
|
2019-09-14 02:51:02 +02:00
|
|
|
request is responded with text/html.
|
|
|
|
"""
|
|
|
|
session = mock.Mock(PipSession)
|
2021-08-13 15:23:45 +02:00
|
|
|
session.head.return_value = mock.Mock(
|
|
|
|
**{
|
|
|
|
"request.method": "HEAD",
|
|
|
|
"headers": {"Content-Type": "text/html"},
|
|
|
|
}
|
|
|
|
)
|
2019-09-14 02:51:02 +02:00
|
|
|
session.get.return_value = mock.Mock(headers={"Content-Type": "text/html"})
|
|
|
|
|
2022-06-25 20:45:45 +02:00
|
|
|
resp = _get_simple_response(url, session=session)
|
2019-09-14 02:51:02 +02:00
|
|
|
|
|
|
|
assert resp is not None
|
|
|
|
assert session.mock_calls == [
|
|
|
|
mock.call.head(url, allow_redirects=True),
|
2021-08-13 15:23:45 +02:00
|
|
|
mock.call.get(
|
|
|
|
url,
|
|
|
|
headers={
|
2022-06-25 20:54:02 +02:00
|
|
|
"Accept": ACCEPT,
|
2021-08-13 15:23:45 +02:00
|
|
|
"Cache-Control": "max-age=0",
|
|
|
|
},
|
|
|
|
),
|
2020-05-03 18:48:24 +02:00
|
|
|
]
|
|
|
|
assert mock_raise_for_status.mock_calls == [
|
|
|
|
mock.call(session.head.return_value),
|
2021-08-13 15:23:45 +02:00
|
|
|
mock.call(resp),
|
2019-09-14 02:51:02 +02:00
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
|
|
"url",
|
|
|
|
[
|
|
|
|
"https://pypi.org/simple/pip",
|
|
|
|
"https://pypi.org/simple/pip/",
|
|
|
|
"https://python.org/sitemap.xml",
|
|
|
|
],
|
|
|
|
)
|
2020-05-03 18:48:24 +02:00
|
|
|
@mock.patch("pip._internal.index.collector.raise_for_status")
|
2022-06-25 20:45:45 +02:00
|
|
|
def test_get_simple_response_no_head(
|
|
|
|
mock_raise_for_status: mock.Mock, url: str
|
|
|
|
) -> None:
|
2019-09-14 02:51:02 +02:00
|
|
|
"""
|
2022-06-25 20:45:45 +02:00
|
|
|
`_get_simple_response()` shouldn't send a HEAD request if the URL does not
|
2019-09-14 02:51:02 +02:00
|
|
|
look like an archive, only the GET request that retrieves data.
|
|
|
|
"""
|
|
|
|
session = mock.Mock(PipSession)
|
|
|
|
|
|
|
|
# Mock the headers dict to ensure it is accessed.
|
2021-08-13 15:23:45 +02:00
|
|
|
session.get.return_value = mock.Mock(
|
|
|
|
headers=mock.Mock(
|
|
|
|
**{
|
|
|
|
"get.return_value": "text/html",
|
|
|
|
}
|
|
|
|
)
|
|
|
|
)
|
2019-09-14 02:51:02 +02:00
|
|
|
|
2022-06-25 20:45:45 +02:00
|
|
|
resp = _get_simple_response(url, session=session)
|
2019-09-14 02:51:02 +02:00
|
|
|
|
|
|
|
assert resp is not None
|
|
|
|
assert session.head.call_count == 0
|
|
|
|
assert session.get.mock_calls == [
|
2021-08-13 15:23:45 +02:00
|
|
|
mock.call(
|
|
|
|
url,
|
|
|
|
headers={
|
2022-06-25 20:54:02 +02:00
|
|
|
"Accept": ACCEPT,
|
2021-08-13 15:23:45 +02:00
|
|
|
"Cache-Control": "max-age=0",
|
|
|
|
},
|
|
|
|
),
|
2022-06-25 21:37:43 +02:00
|
|
|
mock.call().headers.get("Content-Type", "Unknown"),
|
2022-06-26 08:36:36 +02:00
|
|
|
mock.call().headers.get("Content-Type", "Unknown"),
|
2019-09-14 02:51:02 +02:00
|
|
|
]
|
2020-05-03 18:48:24 +02:00
|
|
|
mock_raise_for_status.assert_called_once_with(resp)
|
2019-09-14 02:51:02 +02:00
|
|
|
|
|
|
|
|
2020-05-03 18:48:24 +02:00
|
|
|
@mock.patch("pip._internal.index.collector.raise_for_status")
|
2022-06-25 20:45:45 +02:00
|
|
|
def test_get_simple_response_dont_log_clear_text_password(
|
2021-08-30 00:43:28 +02:00
|
|
|
mock_raise_for_status: mock.Mock, caplog: pytest.LogCaptureFixture
|
|
|
|
) -> None:
|
2019-09-14 02:51:02 +02:00
|
|
|
"""
|
2022-06-25 20:45:45 +02:00
|
|
|
`_get_simple_response()` should redact the password from the index URL
|
2019-09-14 02:51:02 +02:00
|
|
|
in its DEBUG log message.
|
|
|
|
"""
|
|
|
|
session = mock.Mock(PipSession)
|
|
|
|
|
|
|
|
# Mock the headers dict to ensure it is accessed.
|
2021-08-13 15:23:45 +02:00
|
|
|
session.get.return_value = mock.Mock(
|
|
|
|
headers=mock.Mock(
|
|
|
|
**{
|
|
|
|
"get.return_value": "text/html",
|
|
|
|
}
|
|
|
|
)
|
|
|
|
)
|
2019-09-14 02:51:02 +02:00
|
|
|
|
|
|
|
caplog.set_level(logging.DEBUG)
|
|
|
|
|
2022-06-25 20:45:45 +02:00
|
|
|
resp = _get_simple_response(
|
2019-09-14 02:51:02 +02:00
|
|
|
"https://user:my_password@example.com/simple/", session=session
|
|
|
|
)
|
|
|
|
|
|
|
|
assert resp is not None
|
2020-05-03 18:48:24 +02:00
|
|
|
mock_raise_for_status.assert_called_once_with(resp)
|
2019-09-14 02:51:02 +02:00
|
|
|
|
2022-06-25 20:58:32 +02:00
|
|
|
assert len(caplog.records) == 2
|
2019-09-14 02:51:02 +02:00
|
|
|
record = caplog.records[0]
|
2021-08-13 15:23:45 +02:00
|
|
|
assert record.levelname == "DEBUG"
|
2019-09-14 02:51:02 +02:00
|
|
|
assert record.message.splitlines() == [
|
|
|
|
"Getting page https://user:****@example.com/simple/",
|
|
|
|
]
|
2022-06-25 20:58:32 +02:00
|
|
|
record = caplog.records[1]
|
|
|
|
assert record.levelname == "DEBUG"
|
|
|
|
assert record.message.splitlines() == [
|
|
|
|
"Fetched page https://user:****@example.com/simple/ as text/html",
|
|
|
|
]
|
2019-09-14 02:51:02 +02:00
|
|
|
|
|
|
|
|
2020-01-14 08:55:06 +01:00
|
|
|
@pytest.mark.parametrize(
|
2021-08-13 15:23:45 +02:00
|
|
|
("path", "expected"),
|
2020-01-14 08:55:06 +01:00
|
|
|
[
|
|
|
|
# Test a character that needs quoting.
|
2021-08-13 15:23:45 +02:00
|
|
|
("a b", "a%20b"),
|
2020-01-14 08:55:06 +01:00
|
|
|
# Test an unquoted "@".
|
2021-08-13 15:23:45 +02:00
|
|
|
("a @ b", "a%20@%20b"),
|
2020-01-14 08:55:06 +01:00
|
|
|
# Test multiple unquoted "@".
|
2021-08-13 15:23:45 +02:00
|
|
|
("a @ @ b", "a%20@%20@%20b"),
|
2020-01-14 08:55:06 +01:00
|
|
|
# Test a quoted "@".
|
2021-08-13 15:23:45 +02:00
|
|
|
("a %40 b", "a%20%40%20b"),
|
2020-01-14 08:55:06 +01:00
|
|
|
# Test a quoted "@" before an unquoted "@".
|
2021-08-13 15:23:45 +02:00
|
|
|
("a %40b@ c", "a%20%40b@%20c"),
|
2020-01-14 08:55:06 +01:00
|
|
|
# Test a quoted "@" after an unquoted "@".
|
2021-08-13 15:23:45 +02:00
|
|
|
("a @b%40 c", "a%20@b%40%20c"),
|
2020-01-14 08:55:06 +01:00
|
|
|
# Test alternating quoted and unquoted "@".
|
2021-08-13 15:23:45 +02:00
|
|
|
("a %40@b %40@c %40", "a%20%40@b%20%40@c%20%40"),
|
2020-01-14 08:55:06 +01:00
|
|
|
# Test an unquoted "/".
|
2021-08-13 15:23:45 +02:00
|
|
|
("a / b", "a%20/%20b"),
|
2020-01-14 08:55:06 +01:00
|
|
|
# Test multiple unquoted "/".
|
2021-08-13 15:23:45 +02:00
|
|
|
("a / / b", "a%20/%20/%20b"),
|
2020-01-14 08:55:06 +01:00
|
|
|
# Test a quoted "/".
|
2021-08-13 15:23:45 +02:00
|
|
|
("a %2F b", "a%20%2F%20b"),
|
2020-01-14 08:55:06 +01:00
|
|
|
# Test a quoted "/" before an unquoted "/".
|
2021-08-13 15:23:45 +02:00
|
|
|
("a %2Fb/ c", "a%20%2Fb/%20c"),
|
2020-01-14 08:55:06 +01:00
|
|
|
# Test a quoted "/" after an unquoted "/".
|
2021-08-13 15:23:45 +02:00
|
|
|
("a /b%2F c", "a%20/b%2F%20c"),
|
2020-01-14 08:55:06 +01:00
|
|
|
# Test alternating quoted and unquoted "/".
|
2021-08-13 15:23:45 +02:00
|
|
|
("a %2F/b %2F/c %2F", "a%20%2F/b%20%2F/c%20%2F"),
|
2020-01-14 08:55:06 +01:00
|
|
|
# Test normalizing non-reserved quoted characters "[" and "]"
|
2021-08-13 15:23:45 +02:00
|
|
|
("a %5b %5d b", "a%20%5B%20%5D%20b"),
|
2020-01-14 08:55:06 +01:00
|
|
|
# Test normalizing a reserved quoted "/"
|
2021-08-13 15:23:45 +02:00
|
|
|
("a %2f b", "a%20%2F%20b"),
|
|
|
|
],
|
2020-01-14 08:55:06 +01:00
|
|
|
)
|
2021-08-13 15:23:45 +02:00
|
|
|
@pytest.mark.parametrize("is_local_path", [True, False])
|
2021-08-30 00:43:28 +02:00
|
|
|
def test_clean_url_path(path: str, expected: str, is_local_path: bool) -> None:
|
2020-01-14 08:55:06 +01:00
|
|
|
assert _clean_url_path(path, is_local_path=is_local_path) == expected
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize(
|
2021-08-13 15:23:45 +02:00
|
|
|
("path", "expected"),
|
2020-01-14 08:55:06 +01:00
|
|
|
[
|
|
|
|
# Test a VCS path with a Windows drive letter and revision.
|
|
|
|
pytest.param(
|
2021-08-13 15:23:45 +02:00
|
|
|
"/T:/with space/repo.git@1.0",
|
|
|
|
"///T:/with%20space/repo.git@1.0",
|
2020-01-14 08:55:06 +01:00
|
|
|
marks=pytest.mark.skipif("sys.platform != 'win32'"),
|
|
|
|
),
|
|
|
|
# Test a VCS path with a Windows drive letter and revision,
|
|
|
|
# running on non-windows platform.
|
|
|
|
pytest.param(
|
2021-08-13 15:23:45 +02:00
|
|
|
"/T:/with space/repo.git@1.0",
|
|
|
|
"/T%3A/with%20space/repo.git@1.0",
|
2020-01-14 08:55:06 +01:00
|
|
|
marks=pytest.mark.skipif("sys.platform == 'win32'"),
|
|
|
|
),
|
2021-08-13 15:23:45 +02:00
|
|
|
],
|
2020-01-14 08:55:06 +01:00
|
|
|
)
|
2021-08-30 00:43:28 +02:00
|
|
|
def test_clean_url_path_with_local_path(path: str, expected: str) -> None:
|
2020-01-14 08:55:06 +01:00
|
|
|
actual = _clean_url_path(path, is_local_path=True)
|
|
|
|
assert actual == expected
|
|
|
|
|
|
|
|
|
2019-09-14 02:51:02 +02:00
|
|
|
@pytest.mark.parametrize(
|
|
|
|
("url", "clean_url"),
|
|
|
|
[
|
|
|
|
# URL with hostname and port. Port separator should not be quoted.
|
2021-08-13 15:23:45 +02:00
|
|
|
(
|
|
|
|
"https://localhost.localdomain:8181/path/with space/",
|
|
|
|
"https://localhost.localdomain:8181/path/with%20space/",
|
|
|
|
),
|
2019-09-14 02:51:02 +02:00
|
|
|
# URL that is already properly quoted. The quoting `%`
|
|
|
|
# characters should not be quoted again.
|
2021-08-13 15:23:45 +02:00
|
|
|
(
|
|
|
|
"https://localhost.localdomain:8181/path/with%20quoted%20space/",
|
|
|
|
"https://localhost.localdomain:8181/path/with%20quoted%20space/",
|
|
|
|
),
|
2019-09-14 02:51:02 +02:00
|
|
|
# URL with IPv4 address and port.
|
2021-08-13 15:23:45 +02:00
|
|
|
(
|
|
|
|
"https://127.0.0.1:8181/path/with space/",
|
|
|
|
"https://127.0.0.1:8181/path/with%20space/",
|
|
|
|
),
|
2019-09-14 02:51:02 +02:00
|
|
|
# URL with IPv6 address and port. The `[]` brackets around the
|
|
|
|
# IPv6 address should not be quoted.
|
2021-08-13 15:23:45 +02:00
|
|
|
(
|
|
|
|
"https://[fd00:0:0:236::100]:8181/path/with space/",
|
|
|
|
"https://[fd00:0:0:236::100]:8181/path/with%20space/",
|
|
|
|
),
|
2019-09-14 02:51:02 +02:00
|
|
|
# URL with query. The leading `?` should not be quoted.
|
2021-08-13 15:23:45 +02:00
|
|
|
(
|
|
|
|
"https://localhost.localdomain:8181/path/with/query?request=test",
|
|
|
|
"https://localhost.localdomain:8181/path/with/query?request=test",
|
|
|
|
),
|
2019-09-14 02:51:02 +02:00
|
|
|
# URL with colon in the path portion.
|
2021-08-13 15:23:45 +02:00
|
|
|
(
|
|
|
|
"https://localhost.localdomain:8181/path:/with:/colon",
|
|
|
|
"https://localhost.localdomain:8181/path%3A/with%3A/colon",
|
|
|
|
),
|
2019-09-14 02:51:02 +02:00
|
|
|
# URL with something that looks like a drive letter, but is
|
|
|
|
# not. The `:` should be quoted.
|
2021-08-13 15:23:45 +02:00
|
|
|
(
|
|
|
|
"https://localhost.localdomain/T:/path/",
|
|
|
|
"https://localhost.localdomain/T%3A/path/",
|
|
|
|
),
|
2020-01-14 08:55:06 +01:00
|
|
|
# URL with a quoted "/" in the path portion.
|
2021-08-13 15:23:45 +02:00
|
|
|
(
|
|
|
|
"https://example.com/access%2Ftoken/path/",
|
|
|
|
"https://example.com/access%2Ftoken/path/",
|
|
|
|
),
|
2019-09-14 02:51:02 +02:00
|
|
|
# VCS URL containing revision string.
|
2021-08-13 15:23:45 +02:00
|
|
|
(
|
|
|
|
"git+ssh://example.com/path to/repo.git@1.0#egg=my-package-1.0",
|
|
|
|
"git+ssh://example.com/path%20to/repo.git@1.0#egg=my-package-1.0",
|
|
|
|
),
|
2020-01-14 08:55:06 +01:00
|
|
|
# VCS URL with a quoted "#" in the revision string.
|
2021-08-13 15:23:45 +02:00
|
|
|
(
|
|
|
|
"git+https://example.com/repo.git@hash%23symbol#egg=my-package-1.0",
|
|
|
|
"git+https://example.com/repo.git@hash%23symbol#egg=my-package-1.0",
|
|
|
|
),
|
2020-01-14 08:55:06 +01:00
|
|
|
# VCS URL with a quoted "@" in the revision string.
|
2021-08-13 15:23:45 +02:00
|
|
|
(
|
|
|
|
"git+https://example.com/repo.git@at%40 space#egg=my-package-1.0",
|
|
|
|
"git+https://example.com/repo.git@at%40%20space#egg=my-package-1.0",
|
|
|
|
),
|
2019-09-14 02:51:02 +02:00
|
|
|
# URL with Windows drive letter. The `:` after the drive
|
|
|
|
# letter should not be quoted. The trailing `/` should be
|
|
|
|
# removed.
|
|
|
|
pytest.param(
|
|
|
|
"file:///T:/path/with spaces/",
|
|
|
|
"file:///T:/path/with%20spaces",
|
|
|
|
marks=pytest.mark.skipif("sys.platform != 'win32'"),
|
|
|
|
),
|
|
|
|
# URL with Windows drive letter, running on non-windows
|
|
|
|
# platform. The `:` after the drive should be quoted.
|
|
|
|
pytest.param(
|
|
|
|
"file:///T:/path/with spaces/",
|
|
|
|
"file:///T%3A/path/with%20spaces/",
|
|
|
|
marks=pytest.mark.skipif("sys.platform == 'win32'"),
|
|
|
|
),
|
2020-01-14 08:55:06 +01:00
|
|
|
# Test a VCS URL with a Windows drive letter and revision.
|
|
|
|
pytest.param(
|
|
|
|
"git+file:///T:/with space/repo.git@1.0#egg=my-package-1.0",
|
|
|
|
"git+file:///T:/with%20space/repo.git@1.0#egg=my-package-1.0",
|
|
|
|
marks=pytest.mark.skipif("sys.platform != 'win32'"),
|
|
|
|
),
|
|
|
|
# Test a VCS URL with a Windows drive letter and revision,
|
|
|
|
# running on non-windows platform.
|
|
|
|
pytest.param(
|
|
|
|
"git+file:///T:/with space/repo.git@1.0#egg=my-package-1.0",
|
|
|
|
"git+file:/T%3A/with%20space/repo.git@1.0#egg=my-package-1.0",
|
|
|
|
marks=pytest.mark.skipif("sys.platform == 'win32'"),
|
|
|
|
),
|
2021-08-13 15:23:45 +02:00
|
|
|
],
|
2019-09-14 02:51:02 +02:00
|
|
|
)
|
2022-09-10 12:28:57 +02:00
|
|
|
def test_ensure_quoted_url(url: str, clean_url: str) -> None:
|
|
|
|
assert _ensure_quoted_url(url) == clean_url
|
2019-09-14 02:51:02 +02:00
|
|
|
|
|
|
|
|
2021-08-30 00:43:28 +02:00
|
|
|
def _test_parse_links_data_attribute(
|
|
|
|
anchor_html: str, attr: str, expected: Optional[str]
|
2022-09-10 12:28:57 +02:00
|
|
|
) -> Link:
|
2021-08-08 16:57:06 +02:00
|
|
|
html = (
|
|
|
|
"<!DOCTYPE html>"
|
|
|
|
'<html><head><meta charset="utf-8"><head>'
|
|
|
|
"<body>{}</body></html>"
|
|
|
|
).format(anchor_html)
|
2021-08-20 16:23:52 +02:00
|
|
|
html_bytes = html.encode("utf-8")
|
2022-06-25 20:23:00 +02:00
|
|
|
page = IndexContent(
|
2021-08-20 16:23:52 +02:00
|
|
|
html_bytes,
|
2022-06-25 20:45:45 +02:00
|
|
|
"text/html",
|
2021-08-20 16:23:52 +02:00
|
|
|
encoding=None,
|
|
|
|
# parse_links() is cached by url, so we inject a random uuid to ensure
|
|
|
|
# the page content isn't cached.
|
|
|
|
url=f"https://example.com/simple-{uuid.uuid4()}/",
|
|
|
|
)
|
2022-07-14 18:15:07 +02:00
|
|
|
links = list(parse_links(page))
|
2021-08-20 16:23:52 +02:00
|
|
|
(link,) = links
|
|
|
|
actual = getattr(link, attr)
|
|
|
|
assert actual == expected
|
2022-09-10 12:28:57 +02:00
|
|
|
return link
|
2021-08-20 16:23:52 +02:00
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
|
|
"anchor_html, expected",
|
|
|
|
[
|
|
|
|
# Test not present.
|
|
|
|
('<a href="/pkg-1.0.tar.gz"></a>', None),
|
|
|
|
# Test present with no value.
|
|
|
|
('<a href="/pkg-1.0.tar.gz" data-requires-python></a>', None),
|
|
|
|
# Test a value with an escaped character.
|
|
|
|
(
|
|
|
|
'<a href="/pkg-1.0.tar.gz" data-requires-python=">=3.6"></a>',
|
|
|
|
">=3.6",
|
|
|
|
),
|
|
|
|
# Test requires python is unescaped once.
|
|
|
|
(
|
|
|
|
'<a href="/pkg-1.0.tar.gz" data-requires-python="&gt;=3.6"></a>',
|
|
|
|
">=3.6",
|
|
|
|
),
|
|
|
|
],
|
|
|
|
)
|
2021-08-30 00:43:28 +02:00
|
|
|
def test_parse_links__requires_python(
|
|
|
|
anchor_html: str, expected: Optional[str]
|
|
|
|
) -> None:
|
2021-08-20 16:23:52 +02:00
|
|
|
_test_parse_links_data_attribute(anchor_html, "requires_python", expected)
|
|
|
|
|
|
|
|
|
2022-09-10 12:28:57 +02:00
|
|
|
# TODO: this test generates its own examples to validate the json client implementation
|
|
|
|
# instead of sharing those examples with the html client testing. We expect this won't
|
|
|
|
# hide any bugs because operations like resolving PEP 658 metadata should use the same
|
|
|
|
# code for both types of indices, but it might be nice to explicitly have all our tests
|
|
|
|
# in test_download.py execute over both html and json indices with
|
|
|
|
# a pytest.mark.parameterize decorator to ensure nothing slips through the cracks.
|
2022-07-15 23:55:15 +02:00
|
|
|
def test_parse_links_json() -> None:
|
|
|
|
json_bytes = json.dumps(
|
|
|
|
{
|
|
|
|
"meta": {"api-version": "1.0"},
|
|
|
|
"name": "holygrail",
|
|
|
|
"files": [
|
|
|
|
{
|
|
|
|
"filename": "holygrail-1.0.tar.gz",
|
|
|
|
"url": "https://example.com/files/holygrail-1.0.tar.gz",
|
|
|
|
"hashes": {"sha256": "sha256 hash", "blake2b": "blake2b hash"},
|
|
|
|
"requires-python": ">=3.7",
|
|
|
|
"yanked": "Had a vulnerability",
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"filename": "holygrail-1.0-py3-none-any.whl",
|
|
|
|
"url": "/files/holygrail-1.0-py3-none-any.whl",
|
|
|
|
"hashes": {"sha256": "sha256 hash", "blake2b": "blake2b hash"},
|
|
|
|
"requires-python": ">=3.7",
|
|
|
|
"dist-info-metadata": False,
|
|
|
|
},
|
2022-09-10 12:28:57 +02:00
|
|
|
# Same as above, but parsing dist-info-metadata.
|
|
|
|
{
|
|
|
|
"filename": "holygrail-1.0-py3-none-any.whl",
|
|
|
|
"url": "/files/holygrail-1.0-py3-none-any.whl",
|
|
|
|
"hashes": {"sha256": "sha256 hash", "blake2b": "blake2b hash"},
|
|
|
|
"requires-python": ">=3.7",
|
|
|
|
"dist-info-metadata": "sha512=aabdd41",
|
|
|
|
},
|
2022-07-15 23:55:15 +02:00
|
|
|
],
|
|
|
|
}
|
|
|
|
).encode("utf8")
|
|
|
|
page = IndexContent(
|
|
|
|
json_bytes,
|
|
|
|
"application/vnd.pypi.simple.v1+json",
|
|
|
|
encoding=None,
|
|
|
|
# parse_links() is cached by url, so we inject a random uuid to ensure
|
|
|
|
# the page content isn't cached.
|
|
|
|
url=f"https://example.com/simple-{uuid.uuid4()}/",
|
|
|
|
)
|
2022-07-14 18:15:07 +02:00
|
|
|
links = list(parse_links(page))
|
2022-07-15 23:55:15 +02:00
|
|
|
|
|
|
|
assert links == [
|
|
|
|
Link(
|
|
|
|
"https://example.com/files/holygrail-1.0.tar.gz",
|
|
|
|
comes_from=page.url,
|
|
|
|
requires_python=">=3.7",
|
|
|
|
yanked_reason="Had a vulnerability",
|
|
|
|
hashes={"sha256": "sha256 hash", "blake2b": "blake2b hash"},
|
|
|
|
),
|
|
|
|
Link(
|
|
|
|
"https://example.com/files/holygrail-1.0-py3-none-any.whl",
|
|
|
|
comes_from=page.url,
|
|
|
|
requires_python=">=3.7",
|
|
|
|
yanked_reason=None,
|
|
|
|
hashes={"sha256": "sha256 hash", "blake2b": "blake2b hash"},
|
|
|
|
),
|
2022-09-10 12:28:57 +02:00
|
|
|
Link(
|
|
|
|
"https://example.com/files/holygrail-1.0-py3-none-any.whl",
|
|
|
|
comes_from=page.url,
|
|
|
|
requires_python=">=3.7",
|
|
|
|
yanked_reason=None,
|
|
|
|
hashes={"sha256": "sha256 hash", "blake2b": "blake2b hash"},
|
|
|
|
dist_info_metadata="sha512=aabdd41",
|
|
|
|
),
|
2022-07-15 23:55:15 +02:00
|
|
|
]
|
|
|
|
|
2022-09-10 12:28:57 +02:00
|
|
|
# Ensure the metadata info can be parsed into the correct link.
|
|
|
|
metadata_link = links[2].metadata_link()
|
|
|
|
assert metadata_link is not None
|
|
|
|
assert (
|
|
|
|
metadata_link.url
|
|
|
|
== "https://example.com/files/holygrail-1.0-py3-none-any.whl.metadata"
|
|
|
|
)
|
2023-01-04 11:18:07 +01:00
|
|
|
assert metadata_link._hashes == {"sha512": "aabdd41"}
|
2022-09-10 12:28:57 +02:00
|
|
|
|
2022-07-15 23:55:15 +02:00
|
|
|
|
2021-08-13 15:23:45 +02:00
|
|
|
@pytest.mark.parametrize(
|
|
|
|
"anchor_html, expected",
|
|
|
|
[
|
|
|
|
# Test not present.
|
|
|
|
('<a href="/pkg1-1.0.tar.gz"></a>', None),
|
|
|
|
# Test present with no value.
|
2021-08-08 16:57:06 +02:00
|
|
|
('<a href="/pkg2-1.0.tar.gz" data-yanked></a>', None),
|
2021-08-13 15:23:45 +02:00
|
|
|
# Test the empty string.
|
|
|
|
('<a href="/pkg3-1.0.tar.gz" data-yanked=""></a>', ""),
|
|
|
|
# Test a non-empty string.
|
|
|
|
('<a href="/pkg4-1.0.tar.gz" data-yanked="error"></a>', "error"),
|
|
|
|
# Test a value with an escaped character.
|
|
|
|
('<a href="/pkg4-1.0.tar.gz" data-yanked="version < 1"></a>', "version < 1"),
|
|
|
|
# Test a yanked reason with a non-ascii character.
|
|
|
|
(
|
|
|
|
'<a href="/pkg-1.0.tar.gz" data-yanked="curlyquote \u2018"></a>',
|
|
|
|
"curlyquote \u2018",
|
|
|
|
),
|
2021-08-20 16:23:52 +02:00
|
|
|
# Test yanked reason is unescaped once.
|
|
|
|
(
|
|
|
|
'<a href="/pkg-1.0.tar.gz" data-yanked="version &lt; 1"></a>',
|
|
|
|
"version < 1",
|
|
|
|
),
|
2021-08-13 15:23:45 +02:00
|
|
|
],
|
|
|
|
)
|
2021-08-30 00:43:28 +02:00
|
|
|
def test_parse_links__yanked_reason(anchor_html: str, expected: Optional[str]) -> None:
|
2021-08-20 16:23:52 +02:00
|
|
|
_test_parse_links_data_attribute(anchor_html, "yanked_reason", expected)
|
2019-09-14 02:51:02 +02:00
|
|
|
|
|
|
|
|
2022-09-10 12:28:57 +02:00
|
|
|
# Requirement objects do not == each other unless they point to the same instance!
|
|
|
|
_pkg1_requirement = Requirement("pkg1==1.0")
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize(
|
2023-01-04 11:18:07 +01:00
|
|
|
"anchor_html, expected, hashes",
|
2022-09-10 12:28:57 +02:00
|
|
|
[
|
|
|
|
# Test not present.
|
|
|
|
(
|
|
|
|
'<a href="/pkg1-1.0.tar.gz"></a>',
|
|
|
|
None,
|
2023-01-04 11:18:07 +01:00
|
|
|
{},
|
2022-09-10 12:28:57 +02:00
|
|
|
),
|
|
|
|
# Test with value "true".
|
|
|
|
(
|
|
|
|
'<a href="/pkg1-1.0.tar.gz" data-dist-info-metadata="true"></a>',
|
|
|
|
"true",
|
2023-01-04 11:18:07 +01:00
|
|
|
{},
|
2022-09-10 12:28:57 +02:00
|
|
|
),
|
|
|
|
# Test with a provided hash value.
|
|
|
|
(
|
|
|
|
'<a href="/pkg1-1.0.tar.gz" data-dist-info-metadata="sha256=aa113592bbe"></a>', # noqa: E501
|
|
|
|
"sha256=aa113592bbe",
|
2023-01-04 11:18:07 +01:00
|
|
|
{},
|
2022-09-10 12:28:57 +02:00
|
|
|
),
|
|
|
|
# Test with a provided hash value for both the requirement as well as metadata.
|
|
|
|
(
|
|
|
|
'<a href="/pkg1-1.0.tar.gz#sha512=abc132409cb" data-dist-info-metadata="sha256=aa113592bbe"></a>', # noqa: E501
|
|
|
|
"sha256=aa113592bbe",
|
2023-01-04 11:18:07 +01:00
|
|
|
{"sha512": "abc132409cb"},
|
2022-09-10 12:28:57 +02:00
|
|
|
),
|
|
|
|
],
|
|
|
|
)
|
|
|
|
def test_parse_links__dist_info_metadata(
|
|
|
|
anchor_html: str,
|
|
|
|
expected: Optional[str],
|
2023-01-04 11:18:07 +01:00
|
|
|
hashes: Dict[str, str],
|
2022-09-10 12:28:57 +02:00
|
|
|
) -> None:
|
|
|
|
link = _test_parse_links_data_attribute(anchor_html, "dist_info_metadata", expected)
|
2023-01-04 11:18:07 +01:00
|
|
|
assert link._hashes == hashes
|
2022-09-10 12:28:57 +02:00
|
|
|
|
|
|
|
|
2021-08-30 00:43:28 +02:00
|
|
|
def test_parse_links_caches_same_page_by_url() -> None:
|
2020-02-13 04:23:46 +01:00
|
|
|
html = (
|
2021-08-08 16:57:06 +02:00
|
|
|
"<!DOCTYPE html>"
|
2020-02-13 04:23:46 +01:00
|
|
|
'<html><head><meta charset="utf-8"><head>'
|
|
|
|
'<body><a href="/pkg1-1.0.tar.gz"></a></body></html>'
|
|
|
|
)
|
2021-08-13 15:23:45 +02:00
|
|
|
html_bytes = html.encode("utf-8")
|
2020-02-13 04:23:46 +01:00
|
|
|
|
2021-08-13 15:23:45 +02:00
|
|
|
url = "https://example.com/simple/"
|
2020-02-13 04:23:46 +01:00
|
|
|
|
2022-06-25 20:23:00 +02:00
|
|
|
page_1 = IndexContent(
|
2020-02-13 04:23:46 +01:00
|
|
|
html_bytes,
|
2022-06-25 20:45:45 +02:00
|
|
|
"text/html",
|
2020-02-13 04:23:46 +01:00
|
|
|
encoding=None,
|
|
|
|
url=url,
|
|
|
|
)
|
|
|
|
# Make a second page with zero content, to ensure that it's not accessed,
|
|
|
|
# because the page was cached by url.
|
2022-06-25 20:23:00 +02:00
|
|
|
page_2 = IndexContent(
|
2021-08-13 15:23:45 +02:00
|
|
|
b"",
|
2022-06-25 20:45:45 +02:00
|
|
|
"text/html",
|
2020-02-13 04:23:46 +01:00
|
|
|
encoding=None,
|
|
|
|
url=url,
|
|
|
|
)
|
|
|
|
# Make a third page which represents an index url, which should not be
|
|
|
|
# cached, even for the same url. We modify the page content slightly to
|
|
|
|
# verify that the result is not cached.
|
2022-06-25 20:23:00 +02:00
|
|
|
page_3 = IndexContent(
|
2021-08-13 15:23:45 +02:00
|
|
|
re.sub(b"pkg1", b"pkg2", html_bytes),
|
2022-06-25 20:45:45 +02:00
|
|
|
"text/html",
|
2020-02-13 04:23:46 +01:00
|
|
|
encoding=None,
|
|
|
|
url=url,
|
|
|
|
cache_link_parsing=False,
|
|
|
|
)
|
|
|
|
|
2022-07-14 18:15:07 +02:00
|
|
|
parsed_links_1 = list(parse_links(page_1))
|
2020-02-13 04:23:46 +01:00
|
|
|
assert len(parsed_links_1) == 1
|
2021-08-13 15:23:45 +02:00
|
|
|
assert "pkg1" in parsed_links_1[0].url
|
2020-02-13 04:23:46 +01:00
|
|
|
|
2022-07-14 18:15:07 +02:00
|
|
|
parsed_links_2 = list(parse_links(page_2))
|
2020-02-13 04:23:46 +01:00
|
|
|
assert parsed_links_2 == parsed_links_1
|
|
|
|
|
2022-07-14 18:15:07 +02:00
|
|
|
parsed_links_3 = list(parse_links(page_3))
|
2020-02-13 04:23:46 +01:00
|
|
|
assert len(parsed_links_3) == 1
|
|
|
|
assert parsed_links_3 != parsed_links_1
|
2021-08-13 15:23:45 +02:00
|
|
|
assert "pkg2" in parsed_links_3[0].url
|
2020-02-13 04:23:46 +01:00
|
|
|
|
|
|
|
|
2020-05-03 18:48:24 +02:00
|
|
|
@mock.patch("pip._internal.index.collector.raise_for_status")
|
2021-08-30 00:43:28 +02:00
|
|
|
def test_request_http_error(
|
|
|
|
mock_raise_for_status: mock.Mock, caplog: pytest.LogCaptureFixture
|
|
|
|
) -> None:
|
2019-09-14 02:51:02 +02:00
|
|
|
caplog.set_level(logging.DEBUG)
|
2021-08-13 15:23:45 +02:00
|
|
|
link = Link("http://localhost")
|
2021-08-30 00:43:28 +02:00
|
|
|
session = mock.Mock(PipSession)
|
|
|
|
session.get.return_value = mock.Mock()
|
2021-08-13 15:23:45 +02:00
|
|
|
mock_raise_for_status.side_effect = NetworkConnectionError("Http error")
|
2022-06-25 20:45:45 +02:00
|
|
|
assert _get_index_content(link, session=session) is None
|
2021-08-13 15:23:45 +02:00
|
|
|
assert "Could not fetch URL http://localhost: Http error - skipping" in caplog.text
|
2019-09-14 02:51:02 +02:00
|
|
|
|
|
|
|
|
2021-08-30 00:43:28 +02:00
|
|
|
def test_request_retries(caplog: pytest.LogCaptureFixture) -> None:
|
2019-09-14 02:51:02 +02:00
|
|
|
caplog.set_level(logging.DEBUG)
|
2021-08-13 15:23:45 +02:00
|
|
|
link = Link("http://localhost")
|
2021-08-30 00:43:28 +02:00
|
|
|
session = mock.Mock(PipSession)
|
2021-08-13 15:23:45 +02:00
|
|
|
session.get.side_effect = requests.exceptions.RetryError("Retry error")
|
2022-06-25 20:45:45 +02:00
|
|
|
assert _get_index_content(link, session=session) is None
|
2021-08-13 15:23:45 +02:00
|
|
|
assert "Could not fetch URL http://localhost: Retry error - skipping" in caplog.text
|
2019-09-14 02:51:02 +02:00
|
|
|
|
|
|
|
|
2022-06-25 20:45:45 +02:00
|
|
|
def test_make_index_content() -> None:
|
2021-08-13 15:23:45 +02:00
|
|
|
headers = {"Content-Type": "text/html; charset=UTF-8"}
|
2021-08-30 00:43:28 +02:00
|
|
|
response = mock.Mock(
|
2021-08-13 15:23:45 +02:00
|
|
|
content=b"<content>",
|
|
|
|
url="https://example.com/index.html",
|
2019-09-21 10:19:00 +02:00
|
|
|
headers=headers,
|
|
|
|
)
|
|
|
|
|
2022-06-25 20:45:45 +02:00
|
|
|
actual = _make_index_content(response)
|
2021-08-13 15:23:45 +02:00
|
|
|
assert actual.content == b"<content>"
|
|
|
|
assert actual.encoding == "UTF-8"
|
|
|
|
assert actual.url == "https://example.com/index.html"
|
2019-09-21 10:19:00 +02:00
|
|
|
|
|
|
|
|
2019-09-14 02:51:02 +02:00
|
|
|
@pytest.mark.parametrize(
|
|
|
|
"url, vcs_scheme",
|
|
|
|
[
|
|
|
|
("svn+http://pypi.org/something", "svn"),
|
|
|
|
("git+https://github.com/pypa/pip.git", "git"),
|
|
|
|
],
|
|
|
|
)
|
2022-06-25 20:45:45 +02:00
|
|
|
def test_get_index_content_invalid_scheme(
|
2021-08-30 00:43:28 +02:00
|
|
|
caplog: pytest.LogCaptureFixture, url: str, vcs_scheme: str
|
|
|
|
) -> None:
|
2022-06-25 20:45:45 +02:00
|
|
|
"""`_get_index_content()` should error if an invalid scheme is given.
|
2019-09-14 02:51:02 +02:00
|
|
|
|
|
|
|
Only file:, http:, https:, and ftp: are allowed.
|
|
|
|
"""
|
2020-05-23 20:47:04 +02:00
|
|
|
with caplog.at_level(logging.WARNING):
|
2022-06-25 20:45:45 +02:00
|
|
|
page = _get_index_content(Link(url), session=mock.Mock(PipSession))
|
2019-09-14 02:51:02 +02:00
|
|
|
|
|
|
|
assert page is None
|
|
|
|
assert caplog.record_tuples == [
|
|
|
|
(
|
2019-10-06 20:11:01 +02:00
|
|
|
"pip._internal.index.collector",
|
2020-05-23 20:47:04 +02:00
|
|
|
logging.WARNING,
|
2020-06-19 06:41:05 +02:00
|
|
|
"Cannot look at {} URL {} because it does not support "
|
|
|
|
"lookup as web pages.".format(vcs_scheme, url),
|
2019-09-14 02:51:02 +02:00
|
|
|
),
|
|
|
|
]
|
|
|
|
|
|
|
|
|
2020-04-19 12:08:37 +02:00
|
|
|
@pytest.mark.parametrize(
|
|
|
|
"content_type",
|
|
|
|
[
|
|
|
|
"application/xhtml+xml",
|
|
|
|
"application/json",
|
|
|
|
],
|
|
|
|
)
|
2020-05-03 18:48:24 +02:00
|
|
|
@mock.patch("pip._internal.index.collector.raise_for_status")
|
2022-06-25 20:45:45 +02:00
|
|
|
def test_get_index_content_invalid_content_type(
|
2021-08-30 00:43:28 +02:00
|
|
|
mock_raise_for_status: mock.Mock,
|
|
|
|
caplog: pytest.LogCaptureFixture,
|
|
|
|
content_type: str,
|
|
|
|
) -> None:
|
2022-06-25 20:45:45 +02:00
|
|
|
"""`_get_index_content()` should warn if an invalid content-type is given.
|
2020-04-19 12:08:37 +02:00
|
|
|
Only text/html is allowed.
|
|
|
|
"""
|
|
|
|
caplog.set_level(logging.DEBUG)
|
2021-08-13 15:23:45 +02:00
|
|
|
url = "https://pypi.org/simple/pip"
|
2020-04-19 12:08:37 +02:00
|
|
|
link = Link(url)
|
|
|
|
|
|
|
|
session = mock.Mock(PipSession)
|
2021-08-13 15:23:45 +02:00
|
|
|
session.get.return_value = mock.Mock(
|
|
|
|
**{
|
|
|
|
"request.method": "GET",
|
|
|
|
"headers": {"Content-Type": content_type},
|
|
|
|
}
|
|
|
|
)
|
2022-06-25 20:45:45 +02:00
|
|
|
assert _get_index_content(link, session=session) is None
|
2020-05-03 18:48:24 +02:00
|
|
|
mock_raise_for_status.assert_called_once_with(session.get.return_value)
|
2021-08-13 15:23:45 +02:00
|
|
|
assert (
|
|
|
|
"pip._internal.index.collector",
|
|
|
|
logging.WARNING,
|
2022-06-25 21:37:43 +02:00
|
|
|
"Skipping page {} because the GET request got Content-Type: {}. "
|
2022-06-25 20:58:32 +02:00
|
|
|
"The only supported Content-Types are application/vnd.pypi.simple.v1+json, "
|
2022-06-26 08:28:31 +02:00
|
|
|
"application/vnd.pypi.simple.v1+html, and text/html".format(url, content_type),
|
2021-08-13 15:23:45 +02:00
|
|
|
) in caplog.record_tuples
|
2020-04-19 12:08:37 +02:00
|
|
|
|
|
|
|
|
2021-08-30 00:43:28 +02:00
|
|
|
def make_fake_html_response(url: str) -> mock.Mock:
|
2019-09-21 10:19:00 +02:00
|
|
|
"""
|
|
|
|
Create a fake requests.Response object.
|
|
|
|
"""
|
2021-08-13 15:23:45 +02:00
|
|
|
html = dedent(
|
|
|
|
"""\
|
2019-09-23 14:08:27 +02:00
|
|
|
<html><head><meta name="api-version" value="2" /></head>
|
|
|
|
<body>
|
|
|
|
<a href="/abc-1.0.tar.gz#md5=000000000">abc-1.0.tar.gz</a>
|
|
|
|
</body></html>
|
2021-08-13 15:23:45 +02:00
|
|
|
"""
|
|
|
|
)
|
|
|
|
content = html.encode("utf-8")
|
2022-06-25 20:58:32 +02:00
|
|
|
return mock.Mock(content=content, url=url, headers={"Content-Type": "text/html"})
|
2019-09-23 14:08:27 +02:00
|
|
|
|
|
|
|
|
2022-06-25 20:45:45 +02:00
|
|
|
def test_get_index_content_directory_append_index(tmpdir: Path) -> None:
|
|
|
|
"""`_get_index_content()` should append "index.html" to a directory URL."""
|
2019-08-17 03:34:17 +02:00
|
|
|
dirpath = tmpdir / "something"
|
|
|
|
dirpath.mkdir()
|
2022-06-07 11:52:38 +02:00
|
|
|
dir_url = dirpath.as_uri()
|
2019-09-21 10:19:00 +02:00
|
|
|
expected_url = "{}/index.html".format(dir_url.rstrip("/"))
|
2019-09-14 02:51:02 +02:00
|
|
|
|
|
|
|
session = mock.Mock(PipSession)
|
2019-09-21 10:19:00 +02:00
|
|
|
fake_response = make_fake_html_response(expected_url)
|
2022-06-25 20:45:45 +02:00
|
|
|
mock_func = mock.patch("pip._internal.index.collector._get_simple_response")
|
2019-10-06 20:11:01 +02:00
|
|
|
with mock_func as mock_func:
|
2019-09-21 10:19:00 +02:00
|
|
|
mock_func.return_value = fake_response
|
2022-06-25 20:45:45 +02:00
|
|
|
actual = _get_index_content(Link(dir_url), session=session)
|
2019-09-14 02:51:02 +02:00
|
|
|
assert mock_func.mock_calls == [
|
2019-09-21 10:19:00 +02:00
|
|
|
mock.call(expected_url, session=session),
|
2021-08-13 15:23:45 +02:00
|
|
|
], f"actual calls: {mock_func.mock_calls}"
|
2019-09-21 10:19:00 +02:00
|
|
|
|
2021-08-30 00:43:28 +02:00
|
|
|
assert actual is not None
|
2019-09-21 10:19:00 +02:00
|
|
|
assert actual.content == fake_response.content
|
|
|
|
assert actual.encoding is None
|
|
|
|
assert actual.url == expected_url
|
2019-09-14 02:51:02 +02:00
|
|
|
|
|
|
|
|
2021-08-30 00:43:28 +02:00
|
|
|
def test_collect_sources__file_expand_dir(data: TestData) -> None:
|
2019-09-14 02:51:02 +02:00
|
|
|
"""
|
2021-03-23 15:34:20 +01:00
|
|
|
Test that a file:// dir from --find-links becomes _FlatDirectorySource
|
2019-09-14 02:51:02 +02:00
|
|
|
"""
|
2021-03-23 15:34:20 +01:00
|
|
|
collector = LinkCollector.create(
|
2021-08-30 00:43:28 +02:00
|
|
|
session=mock.Mock(is_secure_origin=None), # Shouldn't be used.
|
|
|
|
options=mock.Mock(
|
2021-03-23 15:34:20 +01:00
|
|
|
index_url="ignored-by-no-index",
|
|
|
|
extra_index_urls=[],
|
|
|
|
no_index=True,
|
|
|
|
find_links=[data.find_links],
|
|
|
|
),
|
|
|
|
)
|
|
|
|
sources = collector.collect_sources(
|
2021-08-30 00:43:28 +02:00
|
|
|
# Shouldn't be used.
|
|
|
|
project_name=None, # type: ignore[arg-type]
|
|
|
|
candidates_from_page=None, # type: ignore[arg-type]
|
2021-03-23 15:34:20 +01:00
|
|
|
)
|
|
|
|
assert (
|
|
|
|
not sources.index_urls
|
|
|
|
and len(sources.find_links) == 1
|
|
|
|
and isinstance(sources.find_links[0], _FlatDirectorySource)
|
|
|
|
), (
|
|
|
|
"Directory source should have been found "
|
2021-02-13 07:27:17 +01:00
|
|
|
f"at find-links url: {data.find_links}"
|
2019-09-14 02:51:02 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
|
2021-08-30 00:43:28 +02:00
|
|
|
def test_collect_sources__file_not_find_link(data: TestData) -> None:
|
2019-09-14 02:51:02 +02:00
|
|
|
"""
|
2021-03-23 15:34:20 +01:00
|
|
|
Test that a file:// dir from --index-url doesn't become _FlatDirectorySource
|
2019-09-14 02:51:02 +02:00
|
|
|
run
|
|
|
|
"""
|
2021-03-23 15:34:20 +01:00
|
|
|
collector = LinkCollector.create(
|
2021-08-30 00:43:28 +02:00
|
|
|
session=mock.Mock(is_secure_origin=None), # Shouldn't be used.
|
|
|
|
options=mock.Mock(
|
2021-03-23 15:34:20 +01:00
|
|
|
index_url=data.index_url("empty_with_pkg"),
|
|
|
|
extra_index_urls=[],
|
|
|
|
no_index=False,
|
|
|
|
find_links=[],
|
|
|
|
),
|
|
|
|
)
|
|
|
|
sources = collector.collect_sources(
|
|
|
|
project_name="",
|
2021-08-30 00:43:28 +02:00
|
|
|
# Shouldn't be used.
|
|
|
|
candidates_from_page=None, # type: ignore[arg-type]
|
2021-03-23 15:34:20 +01:00
|
|
|
)
|
|
|
|
assert (
|
|
|
|
not sources.find_links
|
|
|
|
and len(sources.index_urls) == 1
|
|
|
|
and isinstance(sources.index_urls[0], _IndexDirectorySource)
|
|
|
|
), "Directory specified as index should be treated as a page"
|
2019-09-14 02:51:02 +02:00
|
|
|
|
|
|
|
|
2021-08-30 00:43:28 +02:00
|
|
|
def test_collect_sources__non_existing_path() -> None:
|
2019-09-14 02:51:02 +02:00
|
|
|
"""
|
|
|
|
Test that a non-existing path is ignored.
|
|
|
|
"""
|
2021-03-23 15:34:20 +01:00
|
|
|
collector = LinkCollector.create(
|
2021-08-30 00:43:28 +02:00
|
|
|
session=mock.Mock(is_secure_origin=None), # Shouldn't be used.
|
|
|
|
options=mock.Mock(
|
2021-03-23 15:34:20 +01:00
|
|
|
index_url="ignored-by-no-index",
|
|
|
|
extra_index_urls=[],
|
|
|
|
no_index=True,
|
|
|
|
find_links=[os.path.join("this", "doesnt", "exist")],
|
|
|
|
),
|
|
|
|
)
|
|
|
|
sources = collector.collect_sources(
|
2021-08-30 00:43:28 +02:00
|
|
|
# Shouldn't be used.
|
|
|
|
project_name=None, # type: ignore[arg-type]
|
|
|
|
candidates_from_page=None, # type: ignore[arg-type]
|
2021-03-23 15:34:20 +01:00
|
|
|
)
|
2021-08-13 15:23:45 +02:00
|
|
|
assert not sources.index_urls and sources.find_links == [
|
|
|
|
None
|
|
|
|
], "Nothing should have been found"
|
2019-09-14 02:51:02 +02:00
|
|
|
|
|
|
|
|
2021-08-30 00:43:28 +02:00
|
|
|
def check_links_include(links: List[Link], names: List[str]) -> None:
|
2019-09-14 02:51:02 +02:00
|
|
|
"""
|
|
|
|
Assert that the given list of Link objects includes, for each of the
|
|
|
|
given names, a link whose URL has a base name matching that name.
|
|
|
|
"""
|
|
|
|
for name in names:
|
2021-08-13 15:23:45 +02:00
|
|
|
assert any(
|
|
|
|
link.url.endswith(name) for link in links
|
|
|
|
), f"name {name!r} not among links: {links}"
|
2019-09-14 02:51:02 +02:00
|
|
|
|
|
|
|
|
2020-12-24 22:23:07 +01:00
|
|
|
class TestLinkCollector:
|
2022-06-25 20:45:45 +02:00
|
|
|
@mock.patch("pip._internal.index.collector._get_simple_response")
|
|
|
|
def test_fetch_response(self, mock_get_simple_response: mock.Mock) -> None:
|
2021-08-13 15:23:45 +02:00
|
|
|
url = "https://pypi.org/simple/twine/"
|
2019-09-23 15:20:03 +02:00
|
|
|
|
|
|
|
fake_response = make_fake_html_response(url)
|
2022-06-25 20:45:45 +02:00
|
|
|
mock_get_simple_response.return_value = fake_response
|
2019-09-23 15:20:03 +02:00
|
|
|
|
2020-02-13 04:23:46 +01:00
|
|
|
location = Link(url, cache_link_parsing=False)
|
2019-09-23 15:20:03 +02:00
|
|
|
link_collector = make_test_link_collector()
|
2022-06-25 20:45:45 +02:00
|
|
|
actual = link_collector.fetch_response(location)
|
2019-09-23 15:20:03 +02:00
|
|
|
|
2021-08-30 00:43:28 +02:00
|
|
|
assert actual is not None
|
2019-09-23 15:20:03 +02:00
|
|
|
assert actual.content == fake_response.content
|
|
|
|
assert actual.encoding is None
|
|
|
|
assert actual.url == url
|
2020-02-13 04:23:46 +01:00
|
|
|
assert actual.cache_link_parsing == location.cache_link_parsing
|
2019-09-23 15:20:03 +02:00
|
|
|
|
|
|
|
# Also check that the right session object was passed to
|
2022-06-25 20:45:45 +02:00
|
|
|
# _get_simple_response().
|
|
|
|
mock_get_simple_response.assert_called_once_with(
|
2021-08-13 15:23:45 +02:00
|
|
|
url,
|
|
|
|
session=link_collector.session,
|
2019-09-23 15:20:03 +02:00
|
|
|
)
|
|
|
|
|
2021-08-30 00:43:28 +02:00
|
|
|
def test_collect_sources(
|
|
|
|
self, caplog: pytest.LogCaptureFixture, data: TestData
|
|
|
|
) -> None:
|
2019-09-21 19:10:53 +02:00
|
|
|
caplog.set_level(logging.DEBUG)
|
|
|
|
|
2019-09-14 02:51:02 +02:00
|
|
|
link_collector = make_test_link_collector(
|
2019-09-14 20:08:32 +02:00
|
|
|
find_links=[data.find_links],
|
2019-09-21 19:10:53 +02:00
|
|
|
# Include two copies of the URL to check that the second one
|
|
|
|
# is skipped.
|
|
|
|
index_urls=[PyPI.simple_url, PyPI.simple_url],
|
2019-09-14 02:51:02 +02:00
|
|
|
)
|
2021-03-23 15:34:20 +01:00
|
|
|
collected_sources = link_collector.collect_sources(
|
|
|
|
"twine",
|
2021-08-30 00:43:28 +02:00
|
|
|
candidates_from_page=lambda link: [
|
|
|
|
InstallationCandidate("twine", "1.0", link)
|
|
|
|
],
|
2021-03-23 15:34:20 +01:00
|
|
|
)
|
2019-09-14 02:51:02 +02:00
|
|
|
|
2021-03-23 15:34:20 +01:00
|
|
|
files_it = itertools.chain.from_iterable(
|
|
|
|
source.file_links()
|
|
|
|
for sources in collected_sources
|
|
|
|
for source in sources
|
|
|
|
if source is not None
|
|
|
|
)
|
|
|
|
pages_it = itertools.chain.from_iterable(
|
|
|
|
source.page_candidates()
|
|
|
|
for sources in collected_sources
|
|
|
|
for source in sources
|
|
|
|
if source is not None
|
|
|
|
)
|
|
|
|
files = list(files_it)
|
|
|
|
pages = list(pages_it)
|
2019-09-14 02:51:02 +02:00
|
|
|
|
2021-03-23 15:34:20 +01:00
|
|
|
# Spot-check the returned sources.
|
|
|
|
assert len(files) > 20
|
|
|
|
check_links_include(files, names=["simple-1.0.tar.gz"])
|
2019-09-14 02:51:02 +02:00
|
|
|
|
2021-08-30 00:43:28 +02:00
|
|
|
assert [page.link for page in pages] == [Link("https://pypi.org/simple/twine/")]
|
2020-02-13 04:23:46 +01:00
|
|
|
# Check that index URLs are marked as *un*cacheable.
|
2021-08-30 00:43:28 +02:00
|
|
|
assert not pages[0].link.cache_link_parsing
|
2019-09-21 19:10:53 +02:00
|
|
|
|
2021-08-13 15:23:45 +02:00
|
|
|
expected_message = dedent(
|
|
|
|
"""\
|
2019-09-21 19:39:25 +02:00
|
|
|
1 location(s) to search for versions of twine:
|
2021-08-13 15:23:45 +02:00
|
|
|
* https://pypi.org/simple/twine/"""
|
|
|
|
)
|
2019-09-21 19:39:25 +02:00
|
|
|
assert caplog.record_tuples == [
|
2021-08-13 15:23:45 +02:00
|
|
|
("pip._internal.index.collector", logging.DEBUG, expected_message),
|
2019-09-21 19:10:53 +02:00
|
|
|
]
|
2020-06-26 11:20:13 +02:00
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize(
|
2021-08-13 15:23:45 +02:00
|
|
|
"find_links, no_index, suppress_no_index, expected",
|
|
|
|
[
|
|
|
|
(["link1"], False, False, (["link1"], ["default_url", "url1", "url2"])),
|
|
|
|
(["link1"], False, True, (["link1"], ["default_url", "url1", "url2"])),
|
|
|
|
(["link1"], True, False, (["link1"], [])),
|
2020-06-26 11:20:13 +02:00
|
|
|
# Passing suppress_no_index=True suppresses no_index=True.
|
2021-08-13 15:23:45 +02:00
|
|
|
(["link1"], True, True, (["link1"], ["default_url", "url1", "url2"])),
|
2020-06-26 11:20:13 +02:00
|
|
|
# Test options.find_links=False.
|
2021-08-13 15:23:45 +02:00
|
|
|
(False, False, False, ([], ["default_url", "url1", "url2"])),
|
2020-06-26 11:20:13 +02:00
|
|
|
],
|
|
|
|
)
|
|
|
|
def test_link_collector_create(
|
2021-08-30 00:43:28 +02:00
|
|
|
find_links: List[str],
|
|
|
|
no_index: bool,
|
|
|
|
suppress_no_index: bool,
|
|
|
|
expected: Tuple[List[str], List[str]],
|
|
|
|
) -> None:
|
2020-06-26 11:20:13 +02:00
|
|
|
"""
|
|
|
|
:param expected: the expected (find_links, index_urls) values.
|
|
|
|
"""
|
|
|
|
expected_find_links, expected_index_urls = expected
|
|
|
|
session = PipSession()
|
2021-08-30 00:43:28 +02:00
|
|
|
options = mock.Mock(
|
2020-06-26 11:20:13 +02:00
|
|
|
find_links=find_links,
|
2021-08-13 15:23:45 +02:00
|
|
|
index_url="default_url",
|
|
|
|
extra_index_urls=["url1", "url2"],
|
2020-06-26 11:20:13 +02:00
|
|
|
no_index=no_index,
|
|
|
|
)
|
|
|
|
link_collector = LinkCollector.create(
|
2021-08-13 15:23:45 +02:00
|
|
|
session,
|
|
|
|
options=options,
|
|
|
|
suppress_no_index=suppress_no_index,
|
2020-06-26 11:20:13 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
assert link_collector.session is session
|
|
|
|
|
|
|
|
search_scope = link_collector.search_scope
|
|
|
|
assert search_scope.find_links == expected_find_links
|
|
|
|
assert search_scope.index_urls == expected_index_urls
|
|
|
|
|
|
|
|
|
2021-08-30 00:43:28 +02:00
|
|
|
@mock.patch("os.path.expanduser")
|
2020-06-26 11:20:13 +02:00
|
|
|
def test_link_collector_create_find_links_expansion(
|
2021-08-30 00:43:28 +02:00
|
|
|
mock_expanduser: mock.Mock, tmpdir: Path
|
|
|
|
) -> None:
|
2020-06-26 11:20:13 +02:00
|
|
|
"""
|
|
|
|
Test "~" expansion in --find-links paths.
|
|
|
|
"""
|
2023-03-27 21:03:11 +02:00
|
|
|
|
2020-06-26 11:20:13 +02:00
|
|
|
# This is a mock version of expanduser() that expands "~" to the tmpdir.
|
2021-08-30 00:43:28 +02:00
|
|
|
def expand_path(path: str) -> str:
|
2021-08-13 15:23:45 +02:00
|
|
|
if path.startswith("~/"):
|
2020-06-26 11:20:13 +02:00
|
|
|
path = os.path.join(tmpdir, path[2:])
|
|
|
|
return path
|
|
|
|
|
|
|
|
mock_expanduser.side_effect = expand_path
|
|
|
|
|
|
|
|
session = PipSession()
|
2021-08-30 00:43:28 +02:00
|
|
|
options = mock.Mock(
|
2021-08-13 15:23:45 +02:00
|
|
|
find_links=["~/temp1", "~/temp2"],
|
|
|
|
index_url="default_url",
|
2020-06-26 11:20:13 +02:00
|
|
|
extra_index_urls=[],
|
|
|
|
no_index=False,
|
|
|
|
)
|
|
|
|
# Only create temp2 and not temp1 to test that "~" expansion only occurs
|
|
|
|
# when the directory exists.
|
2021-08-13 15:23:45 +02:00
|
|
|
temp2_dir = os.path.join(tmpdir, "temp2")
|
2020-06-26 11:20:13 +02:00
|
|
|
os.mkdir(temp2_dir)
|
|
|
|
|
|
|
|
link_collector = LinkCollector.create(session, options=options)
|
|
|
|
|
|
|
|
search_scope = link_collector.search_scope
|
|
|
|
# Only ~/temp2 gets expanded. Also, the path is normalized when expanded.
|
|
|
|
expected_temp2_dir = os.path.normcase(temp2_dir)
|
2021-08-13 15:23:45 +02:00
|
|
|
assert search_scope.find_links == ["~/temp1", expected_temp2_dir]
|
|
|
|
assert search_scope.index_urls == ["default_url"]
|
2022-09-10 12:28:57 +02:00
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
|
|
"url, result",
|
|
|
|
[
|
|
|
|
(
|
|
|
|
"https://pypi.org/pip-18.0.tar.gz#sha256=aa113592bbe",
|
|
|
|
LinkHash("sha256", "aa113592bbe"),
|
|
|
|
),
|
2023-04-07 23:03:03 +02:00
|
|
|
(
|
|
|
|
"https://pypi.org/pip-18.0.tar.gz#sha256=aa113592bbe&subdirectory=setup",
|
|
|
|
LinkHash("sha256", "aa113592bbe"),
|
|
|
|
),
|
|
|
|
(
|
|
|
|
"https://pypi.org/pip-18.0.tar.gz#subdirectory=setup&sha256=aa113592bbe",
|
|
|
|
LinkHash("sha256", "aa113592bbe"),
|
|
|
|
),
|
|
|
|
# "xsha256" is not a valid algorithm, so we discard it.
|
|
|
|
("https://pypi.org/pip-18.0.tar.gz#xsha256=aa113592bbe", None),
|
|
|
|
# Discard empty hash.
|
|
|
|
("https://pypi.org/pip-18.0.tar.gz#sha256=", None),
|
2022-09-10 12:28:57 +02:00
|
|
|
(
|
|
|
|
"https://pypi.org/pip-18.0.tar.gz#md5=aa113592bbe",
|
|
|
|
LinkHash("md5", "aa113592bbe"),
|
|
|
|
),
|
|
|
|
("https://pypi.org/pip-18.0.tar.gz", None),
|
|
|
|
# We don't recognize the "sha500" algorithm, so we discard it.
|
|
|
|
("https://pypi.org/pip-18.0.tar.gz#sha500=aa113592bbe", None),
|
|
|
|
],
|
|
|
|
)
|
|
|
|
def test_link_hash_parsing(url: str, result: Optional[LinkHash]) -> None:
|
|
|
|
assert LinkHash.split_hash_name_and_value(url) == result
|