feat(pip/_internal/*): Use custom raise_for_status method

2020-05-03 22:18:24 +05:30 · 2020-05-03 22:18:24 +05:30 · 8c267e6e39
parent 9ec3cfad20
commit 8c267e6e39
13 changed files with 134 additions and 32 deletions
--- a/news/5380.feature
+++ b/news/5380.feature
@ -0,0 +1 @@
+Refine error messages to avoid showing Python tracebacks when an HTTP error occurs.
--- a/src/pip/_internal/cli/base_command.py
+++ b/src/pip/_internal/cli/base_command.py
@ -26,6 +26,7 @@ from pip._internal.exceptions import (
    BadCommand,
    CommandError,
    InstallationError,
+    NetworkConnectionError,
    PreviousBuildDirError,
    SubProcessError,
    UninstallationError,
@ -221,7 +222,7 @@ class Command(CommandContextMixIn):

            return PREVIOUS_BUILD_DIR_ERROR
        except (InstallationError, UninstallationError, BadCommand,
-                SubProcessError) as exc:
+                SubProcessError, NetworkConnectionError) as exc:
            logger.critical(str(exc))
            logger.debug('Exception information:', exc_info=True)

--- a/src/pip/_internal/exceptions.py
+++ b/src/pip/_internal/exceptions.py
@ -99,6 +99,23 @@ class PreviousBuildDirError(PipError):
    """Raised when there's a previous conflicting build directory"""


+class NetworkConnectionError(PipError):
+    """HTTP connection error"""
+
+    def __init__(self, *args, **kwargs):
+        """
+        Initialize NetworkConnectionError with  `request` and `response`
+        objects.
+        """
+        response = kwargs.pop('response', None)
+        self.response = response
+        self.request = kwargs.pop('request', None)
+        if (response is not None and not self.request and
+                hasattr(response, 'request')):
+            self.request = self.response.request
+        super(NetworkConnectionError, self).__init__(*args, **kwargs)
+
+
 class InvalidWheelFilename(InstallationError):
    """Invalid wheel filename."""

--- a/src/pip/_internal/index/collector.py
+++ b/src/pip/_internal/index/collector.py
@ -13,12 +13,14 @@ from collections import OrderedDict

 from pip._vendor import html5lib, requests
 from pip._vendor.distlib.compat import unescape
-from pip._vendor.requests.exceptions import HTTPError, RetryError, SSLError
+from pip._vendor.requests.exceptions import RetryError, SSLError
 from pip._vendor.six.moves.urllib import parse as urllib_parse
 from pip._vendor.six.moves.urllib import request as urllib_request

+from pip._internal.exceptions import NetworkConnectionError
 from pip._internal.models.link import Link
 from pip._internal.models.search_scope import SearchScope
+from pip._internal.network.utils import raise_for_status
 from pip._internal.utils.filetypes import ARCHIVE_EXTENSIONS
 from pip._internal.utils.misc import pairwise, redact_auth_from_url
 from pip._internal.utils.typing import MYPY_CHECK_RUNNING
@ -123,7 +125,7 @@ def _ensure_html_response(url, session):
        raise _NotHTTP()

    resp = session.head(url, allow_redirects=True)
-    resp.raise_for_status()
+    raise_for_status(resp)

    _ensure_html_header(resp)

@ -167,7 +169,7 @@ def _get_html_response(url, session):
            "Cache-Control": "max-age=0",
        },
    )
-    resp.raise_for_status()
+    raise_for_status(resp)

    # The check for archives above only works if the url ends with
    # something that looks like an archive. However that is not a
@ -462,7 +464,7 @@ def _get_html_page(link, session=None):
            'The only supported Content-Type is text/html',
            link, exc.request_desc, exc.content_type,
        )
-    except HTTPError as exc:
+    except NetworkConnectionError as exc:
        _handle_get_page_fail(link, exc)
    except RetryError as exc:
        _handle_get_page_fail(link, exc)
--- a/src/pip/_internal/network/download.py
+++ b/src/pip/_internal/network/download.py
@ -5,13 +5,13 @@ import logging
 import mimetypes
 import os

-from pip._vendor import requests
 from pip._vendor.requests.models import CONTENT_CHUNK_SIZE

 from pip._internal.cli.progress_bars import DownloadProgressProvider
+from pip._internal.exceptions import NetworkConnectionError
 from pip._internal.models.index import PyPI
 from pip._internal.network.cache import is_from_cache
-from pip._internal.network.utils import HEADERS, response_chunks
+from pip._internal.network.utils import HEADERS, raise_for_status, response_chunks
 from pip._internal.utils.misc import (
    format_size,
    redact_auth_from_url,
@ -133,7 +133,7 @@ def _http_get_download(session, link):
    # type: (PipSession, Link) -> Response
    target_url = link.url.split('#', 1)[0]
    resp = session.get(target_url, headers=HEADERS, stream=True)
-    resp.raise_for_status()
+    raise_for_status(resp)
    return resp


@ -164,7 +164,7 @@ class Downloader(object):
        # type: (Link) -> Download
        try:
            resp = _http_get_download(self._session, link)
-        except requests.HTTPError as e:
+        except NetworkConnectionError as e:
            logger.critical(
                "HTTP error %s while getting %s", e.response.status_code, link
            )
--- a/src/pip/_internal/network/utils.py
+++ b/src/pip/_internal/network/utils.py
@ -1,5 +1,6 @@
 from pip._vendor.requests.models import CONTENT_CHUNK_SIZE, Response

+from pip._internal.exceptions import NetworkConnectionError
 from pip._internal.utils.typing import MYPY_CHECK_RUNNING

 if MYPY_CHECK_RUNNING:
@ -27,6 +28,33 @@ if MYPY_CHECK_RUNNING:
 HEADERS = {'Accept-Encoding': 'identity'}  # type: Dict[str, str]


+def raise_for_status(resp):
+    # type: (Response) -> None
+    http_error_msg = u''
+    if isinstance(resp.reason, bytes):
+        # We attempt to decode utf-8 first because some servers
+        # choose to localize their reason strings. If the string
+        # isn't utf-8, we fall back to iso-8859-1 for all other
+        # encodings.
+        try:
+            reason = resp.reason.decode('utf-8')
+        except UnicodeDecodeError:
+            reason = resp.reason.decode('iso-8859-1')
+    else:
+        reason = resp.reason
+
+    if 400 <= resp.status_code < 500:
+        http_error_msg = u'%s Client Error: %s for url: %s' % (
+            resp.status_code, reason, resp.url)
+
+    elif 500 <= resp.status_code < 600:
+        http_error_msg = u'%s Server Error: %s for url: %s' % (
+            resp.status_code, reason, resp.url)
+
+    if http_error_msg:
+        raise NetworkConnectionError(http_error_msg, response=resp)
+
+
 def response_chunks(response, chunk_size=CONTENT_CHUNK_SIZE):
    # type: (Response, int) -> Iterator[bytes]
    """Given a requests Response, provide the data chunks.
--- a/src/pip/_internal/network/xmlrpc.py
+++ b/src/pip/_internal/network/xmlrpc.py
@ -3,18 +3,20 @@

 import logging

-from pip._vendor import requests
 # NOTE: XMLRPC Client is not annotated in typeshed as on 2017-07-17, which is
 #       why we ignore the type on this import
 from pip._vendor.six.moves import xmlrpc_client  # type: ignore
 from pip._vendor.six.moves.urllib import parse as urllib_parse

+from pip._internal.exceptions import NetworkConnectionError
+from pip._internal.network.utils import raise_for_status
 from pip._internal.utils.typing import MYPY_CHECK_RUNNING

 if MYPY_CHECK_RUNNING:
    from typing import Dict
    from pip._internal.network.session import PipSession

+
 logger = logging.getLogger(__name__)


@ -38,10 +40,10 @@ class PipXmlrpcTransport(xmlrpc_client.Transport):
            headers = {'Content-Type': 'text/xml'}
            response = self._session.post(url, data=request_body,
                                          headers=headers, stream=True)
-            response.raise_for_status()
+            raise_for_status(response)
            self.verbose = verbose
            return self.parse_response(response.raw)
-        except requests.HTTPError as exc:
+        except NetworkConnectionError as exc:
            logger.critical(
                "HTTP error %s while getting %s",
                exc.response.status_code, url,
--- a/src/pip/_internal/operations/prepare.py
+++ b/src/pip/_internal/operations/prepare.py
@ -9,7 +9,6 @@ import mimetypes
 import os
 import shutil

-from pip._vendor import requests
 from pip._vendor.six import PY2

 from pip._internal.distributions import (
@ -21,6 +20,7 @@ from pip._internal.exceptions import (
    HashMismatch,
    HashUnpinned,
    InstallationError,
+    NetworkConnectionError,
    PreviousBuildDirError,
    VcsHashUnsupported,
 )
@ -468,7 +468,7 @@ class RequirementPreparer(object):
                    link, req.source_dir, self.downloader, download_dir,
                    hashes=self._get_linked_req_hashes(req)
                )
-            except requests.HTTPError as exc:
+            except NetworkConnectionError as exc:
                raise InstallationError(
                    'Could not install requirement {} because of HTTP '
                    'error {} for URL {}'.format(req, exc, link)
--- a/src/pip/_internal/req/req_file.py
+++ b/src/pip/_internal/req/req_file.py
@ -18,6 +18,7 @@ from pip._internal.exceptions import (
    RequirementsFileParseError,
 )
 from pip._internal.models.search_scope import SearchScope
+from pip._internal.network.utils import raise_for_status
 from pip._internal.utils.encoding import auto_decode
 from pip._internal.utils.typing import MYPY_CHECK_RUNNING
 from pip._internal.utils.urls import get_url_scheme
@ -551,7 +552,7 @@ def get_file_content(url, session, comes_from=None):
    if scheme in ['http', 'https']:
        # FIXME: catch some errors
        resp = session.get(url)
-        resp.raise_for_status()
+        raise_for_status(resp)
        return resp.url, resp.text

    elif scheme == 'file':
--- a/tests/lib/requests_mocks.py
+++ b/tests/lib/requests_mocks.py
@ -31,9 +31,6 @@ class MockResponse(object):
        self.headers = {'Content-Length': len(contents)}
        self.history = []

-    def raise_for_status(self):
-        pass
-

 class MockConnection(object):

--- a/tests/unit/test_collector.py
+++ b/tests/unit/test_collector.py
@ -11,6 +11,7 @@ from mock import Mock, patch
 from pip._vendor import html5lib, requests
 from pip._vendor.six.moves.urllib import request as urllib_request

+from pip._internal.exceptions import NetworkConnectionError
 from pip._internal.index.collector import (
    HTMLPage,
    LinkCollector,
@ -55,7 +56,9 @@ def test_get_html_response_archive_to_naive_scheme(url):
        ("https://pypi.org/pip-18.0.tar.gz", "application/gzip"),
    ],
 )
-def test_get_html_response_archive_to_http_scheme(url, content_type):
+@mock.patch("pip._internal.index.collector.raise_for_status")
+def test_get_html_response_archive_to_http_scheme(mock_raise_for_status, url,
+                                                  content_type):
    """
    `_get_html_response()` should send a HEAD request on an archive-like URL
    if the scheme supports it, and raise `_NotHTML` if the response isn't HTML.
@ -72,6 +75,7 @@ def test_get_html_response_archive_to_http_scheme(url, content_type):
    session.assert_has_calls([
        mock.call.head(url, allow_redirects=True),
    ])
+    mock_raise_for_status.assert_called_once_with(session.head.return_value)
    assert ctx.value.args == (content_type, "HEAD")


@ -107,7 +111,10 @@ def test_get_html_page_invalid_content_type_archive(caplog, url):
        "https://pypi.org/pip-18.0.tar.gz",
    ],
 )
-def test_get_html_response_archive_to_http_scheme_is_html(url):
+@mock.patch("pip._internal.index.collector.raise_for_status")
+def test_get_html_response_archive_to_http_scheme_is_html(
+    mock_raise_for_status, url
+):
    """
    `_get_html_response()` should work with archive-like URLs if the HEAD
    request is responded with text/html.
@ -124,11 +131,13 @@ def test_get_html_response_archive_to_http_scheme_is_html(url):
    assert resp is not None
    assert session.mock_calls == [
        mock.call.head(url, allow_redirects=True),
-        mock.call.head().raise_for_status(),
        mock.call.get(url, headers={
            "Accept": "text/html", "Cache-Control": "max-age=0",
        }),
-        mock.call.get().raise_for_status(),
+    ]
+    assert mock_raise_for_status.mock_calls == [
+        mock.call(session.head.return_value),
+        mock.call(resp)
    ]


@ -140,7 +149,8 @@ def test_get_html_response_archive_to_http_scheme_is_html(url):
        "https://python.org/sitemap.xml",
    ],
 )
-def test_get_html_response_no_head(url):
+@mock.patch("pip._internal.index.collector.raise_for_status")
+def test_get_html_response_no_head(mock_raise_for_status, url):
    """
    `_get_html_response()` shouldn't send a HEAD request if the URL does not
    look like an archive, only the GET request that retrieves data.
@ -160,12 +170,14 @@ def test_get_html_response_no_head(url):
        mock.call(url, headers={
            "Accept": "text/html", "Cache-Control": "max-age=0",
        }),
-        mock.call().raise_for_status(),
        mock.call().headers.get("Content-Type", ""),
    ]
+    mock_raise_for_status.assert_called_once_with(resp)


-def test_get_html_response_dont_log_clear_text_password(caplog):
+@mock.patch("pip._internal.index.collector.raise_for_status")
+def test_get_html_response_dont_log_clear_text_password(mock_raise_for_status,
+                                                        caplog):
    """
    `_get_html_response()` should redact the password from the index URL
    in its DEBUG log message.
@ -184,6 +196,7 @@ def test_get_html_response_dont_log_clear_text_password(caplog):
    )

    assert resp is not None
+    mock_raise_for_status.assert_called_once_with(resp)

    assert len(caplog.records) == 1
    record = caplog.records[0]
@ -438,12 +451,13 @@ def test_parse_links_caches_same_page_by_url():
    assert 'pkg2' in parsed_links_3[0].url


-def test_request_http_error(caplog):
+@mock.patch("pip._internal.index.collector.raise_for_status")
+def test_request_http_error(mock_raise_for_status, caplog):
    caplog.set_level(logging.DEBUG)
    link = Link('http://localhost')
    session = Mock(PipSession)
-    session.get.return_value = resp = Mock()
-    resp.raise_for_status.side_effect = requests.HTTPError('Http error')
+    session.get.return_value = Mock()
+    mock_raise_for_status.side_effect = NetworkConnectionError('Http error')
    assert _get_html_page(link, session=session) is None
    assert (
        'Could not fetch URL http://localhost: Http error - skipping'
@ -510,7 +524,9 @@ def test_get_html_page_invalid_scheme(caplog, url, vcs_scheme):
        "application/json",
    ],
 )
-def test_get_html_page_invalid_content_type(caplog, content_type):
+@mock.patch("pip._internal.index.collector.raise_for_status")
+def test_get_html_page_invalid_content_type(mock_raise_for_status,
+                                            caplog, content_type):
    """`_get_html_page()` should warn if an invalid content-type is given.
    Only text/html is allowed.
    """
@ -523,8 +539,8 @@ def test_get_html_page_invalid_content_type(caplog, content_type):
        "request.method": "GET",
        "headers": {"Content-Type": content_type},
    })
-
    assert _get_html_page(link, session=session) is None
+    mock_raise_for_status.assert_called_once_with(session.get.return_value)
    assert ('pip._internal.index.collector',
            logging.WARNING,
            'Skipping page {} because the GET request got Content-Type: {}.'
--- a/tests/unit/test_network_utils.py
+++ b/tests/unit/test_network_utils.py
@ -0,0 +1,34 @@
+import pytest
+
+from pip._internal.exceptions import NetworkConnectionError
+from pip._internal.network.utils import raise_for_status
+from tests.lib.requests_mocks import MockResponse
+
+
+@pytest.mark.parametrize(("status_code", "error_type"), [
+    (401, "Client Error"),
+    (501, "Server Error"),
+])
+def test_raise_for_status_raises_exception(status_code, error_type):
+    contents = b'downloaded'
+    resp = MockResponse(contents)
+    resp.status_code = status_code
+    resp.url = "http://www.example.com/whatever.tgz"
+    resp.reason = "Network Error"
+    with pytest.raises(NetworkConnectionError) as exc:
+        raise_for_status(resp)
+        assert str(exc.info) == (
+            "{} {}: Network Error for url:"
+            " http://www.example.com/whatever.tgz".format(
+                status_code, error_type)
+        )
+
+
+def test_raise_for_status_does_not_raises_exception():
+    contents = b'downloaded'
+    resp = MockResponse(contents)
+    resp.status_code = 201
+    resp.url = "http://www.example.com/whatever.tgz"
+    resp.reason = "No error"
+    return_value = raise_for_status(resp)
+    assert return_value is None
--- a/tests/unit/test_operations_prepare.py
+++ b/tests/unit/test_operations_prepare.py
@ -4,7 +4,7 @@ from shutil import rmtree
 from tempfile import mkdtemp

 import pytest
-from mock import Mock
+from mock import Mock, patch

 from pip._internal.exceptions import HashMismatch
 from pip._internal.models.link import Link
@ -58,7 +58,9 @@ def test_unpack_url_with_urllib_response_without_content_type(data):
        rmtree(temp_dir)


-def test_download_http_url__no_directory_traversal(tmpdir):
+@patch("pip._internal.network.download.raise_for_status")
+def test_download_http_url__no_directory_traversal(mock_raise_for_status,
+                                                   tmpdir):
    """
    Test that directory traversal doesn't happen on download when the
    Content-Disposition header contains a filename with a ".." path part.
@ -90,6 +92,7 @@ def test_download_http_url__no_directory_traversal(tmpdir):
    # The file should be downloaded to download_dir.
    actual = os.listdir(download_dir)
    assert actual == ['out_dir_file']
+    mock_raise_for_status.assert_called_once_with(resp)


@pytest.fixture
				`@ -0,0 +1 @@`
				`Refine error messages to avoid showing Python tracebacks when an HTTP error occurs.`