pip/tests/unit/test_download.py

import hashlib
import os
import sys
from io import BytesIO
from shutil import copy, rmtree
from tempfile import mkdtemp

import pytest
from mock import Mock, patch
from pip._vendor.six.moves.urllib import request as urllib_request

import pip
from pip._internal.download import (
    CI_ENVIRONMENT_VARIABLES, PipSession, SafeFileCache, path_to_url,
    unpack_file_url, unpack_http_url, url_to_path,
)
from pip._internal.exceptions import HashMismatch
from pip._internal.models.link import Link
from pip._internal.utils.hashes import Hashes
from tests.lib import create_file


@pytest.fixture(scope="function")
def cache_tmpdir(tmpdir):
    cache_dir = tmpdir.join("cache")
    cache_dir.makedirs()
    yield cache_dir


def test_unpack_http_url_with_urllib_response_without_content_type(data):
    """
    It should download and unpack files even if no Content-Type header exists
    """
    _real_session = PipSession()

    def _fake_session_get(*args, **kwargs):
        resp = _real_session.get(*args, **kwargs)
        del resp.headers["Content-Type"]
        return resp

    session = Mock()
    session.get = _fake_session_get

    uri = path_to_url(data.packages.join("simple-1.0.tar.gz"))
    link = Link(uri)
    temp_dir = mkdtemp()
    try:
        unpack_http_url(
            link,
            temp_dir,
            download_dir=None,
            session=session,
        )
        assert set(os.listdir(temp_dir)) == {
            'PKG-INFO', 'setup.cfg', 'setup.py', 'simple', 'simple.egg-info'
        }
    finally:
        rmtree(temp_dir)


def get_user_agent():
    return PipSession().headers["User-Agent"]


def test_user_agent():
    user_agent = get_user_agent()

    assert user_agent.startswith("pip/%s" % pip.__version__)


@pytest.mark.parametrize('name, expected_like_ci', [
    ('BUILD_BUILDID', True),
    ('BUILD_ID', True),
    ('CI', True),
    # Test a prefix substring of one of the variable names we use.
    ('BUILD', False),
])
def test_user_agent__ci(monkeypatch, name, expected_like_ci):
    # Delete the variable names we use to check for CI to prevent the
    # detection from always returning True in case the tests are being run
    # under actual CI.  It is okay to depend on CI_ENVIRONMENT_VARIABLES
    # here (part of the code under test) because this setup step can only
    # prevent false test failures.  It can't cause a false test passage.
    for ci_name in CI_ENVIRONMENT_VARIABLES:
        monkeypatch.delenv(ci_name, raising=False)

    # Confirm the baseline before setting the environment variable.
    user_agent = get_user_agent()
    assert '"ci":null' in user_agent
    assert '"ci":true' not in user_agent

    monkeypatch.setenv(name, 'true')
    user_agent = get_user_agent()
    assert ('"ci":true' in user_agent) == expected_like_ci
    assert ('"ci":null' in user_agent) == (not expected_like_ci)


def test_user_agent_user_data(monkeypatch):
    monkeypatch.setenv("PIP_USER_AGENT_USER_DATA", "some_string")
    assert "some_string" in PipSession().headers["User-Agent"]


class FakeStream(object):

    def __init__(self, contents):
        self._io = BytesIO(contents)

    def read(self, size, decode_content=None):
        return self._io.read(size)

    def stream(self, size, decode_content=None):
        yield self._io.read(size)


class MockResponse(object):

    def __init__(self, contents):
        self.raw = FakeStream(contents)

    def raise_for_status(self):
        pass


@patch('pip._internal.download.unpack_file')
def test_unpack_http_url_bad_downloaded_checksum(mock_unpack_file):
    """
    If already-downloaded file has bad checksum, re-download.
    """
    base_url = 'http://www.example.com/somepackage.tgz'
    contents = b'downloaded'
    download_hash = hashlib.new('sha1', contents)
    link = Link(base_url + '#sha1=' + download_hash.hexdigest())

    session = Mock()
    session.get = Mock()
    response = session.get.return_value = MockResponse(contents)
    response.headers = {'content-type': 'application/x-tar'}
    response.url = base_url

    download_dir = mkdtemp()
    try:
        downloaded_file = os.path.join(download_dir, 'somepackage.tgz')
        create_file(downloaded_file, 'some contents')

        unpack_http_url(
            link,
            'location',
            download_dir=download_dir,
            session=session,
            hashes=Hashes({'sha1': [download_hash.hexdigest()]})
        )

        # despite existence of downloaded file with bad hash, downloaded again
        session.get.assert_called_once_with(
            'http://www.example.com/somepackage.tgz',
            headers={"Accept-Encoding": "identity"},
            stream=True,
        )
        # cached file is replaced with newly downloaded file
        with open(downloaded_file) as fh:
            assert fh.read() == 'downloaded'

    finally:
        rmtree(download_dir)


@pytest.mark.skipif("sys.platform == 'win32'")
def test_path_to_url_unix():
    assert path_to_url('/tmp/file') == 'file:///tmp/file'
    path = os.path.join(os.getcwd(), 'file')
    assert path_to_url('file') == 'file://' + urllib_request.pathname2url(path)


@pytest.mark.skipif("sys.platform != 'win32'")
def test_path_to_url_win():
    assert path_to_url('c:/tmp/file') == 'file:///C:/tmp/file'
    assert path_to_url('c:\\tmp\\file') == 'file:///C:/tmp/file'
    assert path_to_url(r'\\unc\as\path') == 'file://unc/as/path'
    path = os.path.join(os.getcwd(), 'file')
    assert path_to_url('file') == 'file:' + urllib_request.pathname2url(path)


@pytest.mark.parametrize("url,win_expected,non_win_expected", [
    ('file:tmp', 'tmp', 'tmp'),
    ('file:c:/path/to/file', r'C:\path\to\file', 'c:/path/to/file'),
    ('file:/path/to/file', r'\path\to\file', '/path/to/file'),
    ('file://localhost/tmp/file', r'\tmp\file', '/tmp/file'),
    ('file://localhost/c:/tmp/file', r'C:\tmp\file', '/c:/tmp/file'),
    ('file://somehost/tmp/file', r'\\somehost\tmp\file', None),
    ('file:///tmp/file', r'\tmp\file', '/tmp/file'),
    ('file:///c:/tmp/file', r'C:\tmp\file', '/c:/tmp/file'),
])
def test_url_to_path(url, win_expected, non_win_expected):
    if sys.platform == 'win32':
        expected_path = win_expected
    else:
        expected_path = non_win_expected

    if expected_path is None:
        with pytest.raises(ValueError):
            url_to_path(url)
    else:
        assert url_to_path(url) == expected_path


@pytest.mark.skipif("sys.platform != 'win32'")
def test_url_to_path_path_to_url_symmetry_win():
    path = r'C:\tmp\file'
    assert url_to_path(path_to_url(path)) == path

    unc_path = r'\\unc\share\path'
    assert url_to_path(path_to_url(unc_path)) == unc_path


class Test_unpack_file_url(object):

    def prep(self, tmpdir, data):
        self.build_dir = tmpdir.join('build')
        self.download_dir = tmpdir.join('download')
        os.mkdir(self.build_dir)
        os.mkdir(self.download_dir)
        self.dist_file = "simple-1.0.tar.gz"
        self.dist_file2 = "simple-2.0.tar.gz"
        self.dist_path = data.packages.join(self.dist_file)
        self.dist_path2 = data.packages.join(self.dist_file2)
        self.dist_url = Link(path_to_url(self.dist_path))
        self.dist_url2 = Link(path_to_url(self.dist_path2))

    def test_unpack_file_url_no_download(self, tmpdir, data):
        self.prep(tmpdir, data)
        unpack_file_url(self.dist_url, self.build_dir)
        assert os.path.isdir(os.path.join(self.build_dir, 'simple'))
        assert not os.path.isfile(
            os.path.join(self.download_dir, self.dist_file))

    def test_unpack_file_url_and_download(self, tmpdir, data):
        self.prep(tmpdir, data)
        unpack_file_url(self.dist_url, self.build_dir,
                        download_dir=self.download_dir)
        assert os.path.isdir(os.path.join(self.build_dir, 'simple'))
        assert os.path.isfile(os.path.join(self.download_dir, self.dist_file))

    def test_unpack_file_url_download_already_exists(self, tmpdir,
                                                     data, monkeypatch):
        self.prep(tmpdir, data)
        # add in previous download (copy simple-2.0 as simple-1.0)
        # so we can tell it didn't get overwritten
        dest_file = os.path.join(self.download_dir, self.dist_file)
        copy(self.dist_path2, dest_file)
        with open(self.dist_path2, 'rb') as f:
            dist_path2_md5 = hashlib.md5(f.read()).hexdigest()

        unpack_file_url(self.dist_url, self.build_dir,
                        download_dir=self.download_dir)
        # our hash should be the same, i.e. not overwritten by simple-1.0 hash
        with open(dest_file, 'rb') as f:
            assert dist_path2_md5 == hashlib.md5(f.read()).hexdigest()

    def test_unpack_file_url_bad_hash(self, tmpdir, data,
                                      monkeypatch):
        """
        Test when the file url hash fragment is wrong
        """
        self.prep(tmpdir, data)
        self.dist_url.url = "%s#md5=bogus" % self.dist_url.url
        with pytest.raises(HashMismatch):
            unpack_file_url(self.dist_url,
                            self.build_dir,
                            hashes=Hashes({'md5': ['bogus']}))

    def test_unpack_file_url_download_bad_hash(self, tmpdir, data,
                                               monkeypatch):
        """
        Test when existing download has different hash from the file url
        fragment
        """
        self.prep(tmpdir, data)

        # add in previous download (copy simple-2.0 as simple-1.0 so it's wrong
        # hash)
        dest_file = os.path.join(self.download_dir, self.dist_file)
        copy(self.dist_path2, dest_file)

        with open(self.dist_path, 'rb') as f:
            dist_path_md5 = hashlib.md5(f.read()).hexdigest()
        with open(dest_file, 'rb') as f:
            dist_path2_md5 = hashlib.md5(f.read()).hexdigest()

        assert dist_path_md5 != dist_path2_md5

        self.dist_url.url = "%s#md5=%s" % (
            self.dist_url.url,
            dist_path_md5
        )
        unpack_file_url(self.dist_url, self.build_dir,
                        download_dir=self.download_dir,
                        hashes=Hashes({'md5': [dist_path_md5]}))

        # confirm hash is for simple1-1.0
        # the previous bad download has been removed
        with open(dest_file, 'rb') as f:
            assert hashlib.md5(f.read()).hexdigest() == dist_path_md5

    def test_unpack_file_url_thats_a_dir(self, tmpdir, data):
        self.prep(tmpdir, data)
        dist_path = data.packages.join("FSPkg")
        dist_url = Link(path_to_url(dist_path))
        unpack_file_url(dist_url, self.build_dir,
                        download_dir=self.download_dir)
        assert os.path.isdir(os.path.join(self.build_dir, 'fspkg'))


class TestSafeFileCache:
    """
    The no_perms test are useless on Windows since SafeFileCache uses
    pip._internal.utils.filesystem.check_path_owner which is based on
    os.geteuid which is absent on Windows.
    """

    def test_cache_roundtrip(self, cache_tmpdir):

        cache = SafeFileCache(cache_tmpdir)
        assert cache.get("test key") is None
        cache.set("test key", b"a test string")
        assert cache.get("test key") == b"a test string"
        cache.delete("test key")
        assert cache.get("test key") is None

    @pytest.mark.skipif("sys.platform == 'win32'")
    def test_safe_get_no_perms(self, cache_tmpdir, monkeypatch):
        os.chmod(cache_tmpdir, 000)

        monkeypatch.setattr(os.path, "exists", lambda x: True)

        cache = SafeFileCache(cache_tmpdir)
        cache.get("foo")

    @pytest.mark.skipif("sys.platform == 'win32'")
    def test_safe_set_no_perms(self, cache_tmpdir):
        os.chmod(cache_tmpdir, 000)

        cache = SafeFileCache(cache_tmpdir)
        cache.set("foo", b"bar")

    @pytest.mark.skipif("sys.platform == 'win32'")
    def test_safe_delete_no_perms(self, cache_tmpdir):
        os.chmod(cache_tmpdir, 000)

        cache = SafeFileCache(cache_tmpdir)
        cache.delete("foo")


class TestPipSession:

    def test_cache_defaults_off(self):
        session = PipSession()

        assert not hasattr(session.adapters["http://"], "cache")
        assert not hasattr(session.adapters["https://"], "cache")

    def test_cache_is_enabled(self, tmpdir):
        session = PipSession(cache=tmpdir.join("test-cache"))

        assert hasattr(session.adapters["https://"], "cache")

        assert (session.adapters["https://"].cache.directory ==
                tmpdir.join("test-cache"))

    def test_http_cache_is_not_enabled(self, tmpdir):
        session = PipSession(cache=tmpdir.join("test-cache"))

        assert not hasattr(session.adapters["http://"], "cache")

    def test_insecure_host_cache_is_not_enabled(self, tmpdir):
        session = PipSession(
            cache=tmpdir.join("test-cache"),
            insecure_hosts=["example.com"],
        )

        assert not hasattr(session.adapters["https://example.com/"], "cache")