Begin moving WheelCache to pip.cache

This commit is contained in:
Pradyun S. Gedam 2017-06-15 18:34:05 +05:30
parent 28384267f6
commit fe5b7c9c9a
8 changed files with 139 additions and 117 deletions

71
pip/cache.py Normal file
View File

@ -0,0 +1,71 @@
"""Cache Management
"""
import os
import errno
import logging
from pip._vendor.packaging.utils import canonicalize_name
import pip.index
from pip.compat import expanduser
from pip.download import path_to_url
from pip.wheel import Wheel, InvalidWheelFilename
from pip.utils.cache import get_cache_path_for_link
logger = logging.getLogger(__name__)
class WheelCache(object):
"""A cache of wheels for future installs."""
def __init__(self, cache_dir, format_control):
"""Create a wheel cache.
:param cache_dir: The root of the cache.
:param format_control: A pip.index.FormatControl object to limit
binaries being read from the cache.
"""
self._cache_dir = expanduser(cache_dir) if cache_dir else None
self._format_control = format_control
def cached_wheel(self, link, package_name):
not_cached = (
not self._cache_dir or
not link or
link.is_wheel or
not link.is_artifact or
not package_name
)
if not_cached:
return link
canonical_name = canonicalize_name(package_name)
formats = pip.index.fmt_ctl_formats(
self._format_control, canonical_name
)
if "binary" not in formats:
return link
root = get_cache_path_for_link(self._cache_dir, link)
try:
wheel_names = os.listdir(root)
except OSError as err:
if err.errno in {errno.ENOENT, errno.ENOTDIR}:
return link
raise
candidates = []
for wheel_name in wheel_names:
try:
wheel = Wheel(wheel_name)
except InvalidWheelFilename:
continue
if not wheel.supported():
# Built for a different python/arch/etc
continue
candidates.append((wheel.support_index_min(), wheel_name))
if not candidates:
return link
candidates.sort()
path = os.path.join(root, candidates[0][1])
return pip.index.Link(path_to_url(path))

View File

@ -4,9 +4,9 @@ import sys
import pip
from pip.basecommand import Command
from pip.cache import WheelCache
from pip.compat import stdlib_pkgs
from pip.operations.freeze import freeze
from pip.wheel import WheelCache
DEV_PKGS = {'pip', 'setuptools', 'distribute', 'wheel'}

View File

@ -8,6 +8,7 @@ import shutil
from pip import cmdoptions
from pip.basecommand import RequirementCommand
from pip.cache import WheelCache
from pip.exceptions import (
CommandError, InstallationError, PreviousBuildDirError
)
@ -17,7 +18,7 @@ from pip.status_codes import ERROR
from pip.utils import ensure_dir, get_installed_version
from pip.utils.filesystem import check_path_owner
from pip.utils.temp_dir import TempDirectory
from pip.wheel import WheelBuilder, WheelCache
from pip.wheel import WheelBuilder
try:
import wheel

View File

@ -6,11 +6,12 @@ import os
from pip import cmdoptions
from pip.basecommand import RequirementCommand
from pip.cache import WheelCache
from pip.exceptions import CommandError, PreviousBuildDirError
from pip.req import RequirementSet
from pip.utils import import_or_raise
from pip.utils.temp_dir import TempDirectory
from pip.wheel import WheelBuilder, WheelCache
from pip.wheel import WheelBuilder
logger = logging.getLogger(__name__)

46
pip/utils/cache.py Normal file
View File

@ -0,0 +1,46 @@
"""Helpers for caches
"""
import os.path
import hashlib
def get_cache_path_for_link(cache_dir, link):
"""
Return a directory to store cached wheels in for link.
Because there are M wheels for any one sdist, we provide a directory
to cache them in, and then consult that directory when looking up
cache hits.
We only insert things into the cache if they have plausible version
numbers, so that we don't contaminate the cache with things that were not
unique. E.g. ./package might have dozens of installs done for it and build
a version of 0.0...and if we built and cached a wheel, we'd end up using
the same wheel even if the source has been edited.
:param cache_dir: The cache_dir being used by pip.
:param link: The link of the sdist for which this will cache wheels.
"""
# We want to generate an url to use as our cache key, we don't want to just
# re-use the URL because it might have other items in the fragment and we
# don't care about those.
key_parts = [link.url_without_fragment]
if link.hash_name is not None and link.hash is not None:
key_parts.append("=".join([link.hash_name, link.hash]))
key_url = "#".join(key_parts)
# Encode our key url with sha224, we'll use this because it has similar
# security properties to sha256, but with a shorter total output (and thus
# less secure). However the differences don't make a lot of difference for
# our use case here.
hashed = hashlib.sha224(key_url.encode()).hexdigest()
# We want to nest the directories some to prevent having a ton of top level
# directories where we might run out of sub directories on some FS.
parts = [hashed[:2], hashed[2:4], hashed[4:6], hashed[6:]]
# Inside of the base location for cached wheels, expand our parts and join
# them all together.
return os.path.join(cache_dir, "wheels", *parts)

View File

@ -6,10 +6,8 @@ from __future__ import absolute_import
import compileall
import copy
import csv
import errno
import hashlib
import logging
import os
import os.path
import re
import shutil
@ -25,15 +23,15 @@ from pip._vendor.distlib.scripts import ScriptMaker
from pip._vendor.packaging.utils import canonicalize_name
from pip._vendor.six import StringIO
import pip
import pip.index
from pip import pep425tags
from pip.compat import expanduser
from pip.download import path_to_url, unpack_url
from pip.exceptions import (
InstallationError, InvalidWheelFilename, UnsupportedWheel
)
from pip.locations import PIP_DELETE_MARKER_FILENAME, distutils_scheme
from pip.utils import call_subprocess, captured_stdout, ensure_dir, read_chunks
from pip.utils.cache import get_cache_path_for_link
from pip.utils.logging import indent_log
from pip.utils.setuptools_build import SETUPTOOLS_SHIM
from pip.utils.temp_dir import TempDirectory
@ -47,104 +45,6 @@ VERSION_COMPATIBLE = (1, 0)
logger = logging.getLogger(__name__)
class WheelCache(object):
"""A cache of wheels for future installs."""
def __init__(self, cache_dir, format_control):
"""Create a wheel cache.
:param cache_dir: The root of the cache.
:param format_control: A pip.index.FormatControl object to limit
binaries being read from the cache.
"""
self._cache_dir = expanduser(cache_dir) if cache_dir else None
self._format_control = format_control
def cached_wheel(self, link, package_name):
return cached_wheel(
self._cache_dir, link, self._format_control, package_name)
def _cache_for_link(cache_dir, link):
"""
Return a directory to store cached wheels in for link.
Because there are M wheels for any one sdist, we provide a directory
to cache them in, and then consult that directory when looking up
cache hits.
We only insert things into the cache if they have plausible version
numbers, so that we don't contaminate the cache with things that were not
unique. E.g. ./package might have dozens of installs done for it and build
a version of 0.0...and if we built and cached a wheel, we'd end up using
the same wheel even if the source has been edited.
:param cache_dir: The cache_dir being used by pip.
:param link: The link of the sdist for which this will cache wheels.
"""
# We want to generate an url to use as our cache key, we don't want to just
# re-use the URL because it might have other items in the fragment and we
# don't care about those.
key_parts = [link.url_without_fragment]
if link.hash_name is not None and link.hash is not None:
key_parts.append("=".join([link.hash_name, link.hash]))
key_url = "#".join(key_parts)
# Encode our key url with sha224, we'll use this because it has similar
# security properties to sha256, but with a shorter total output (and thus
# less secure). However the differences don't make a lot of difference for
# our use case here.
hashed = hashlib.sha224(key_url.encode()).hexdigest()
# We want to nest the directories some to prevent having a ton of top level
# directories where we might run out of sub directories on some FS.
parts = [hashed[:2], hashed[2:4], hashed[4:6], hashed[6:]]
# Inside of the base location for cached wheels, expand our parts and join
# them all together.
return os.path.join(cache_dir, "wheels", *parts)
def cached_wheel(cache_dir, link, format_control, package_name):
if not cache_dir:
return link
if not link:
return link
if link.is_wheel:
return link
if not link.is_artifact:
return link
if not package_name:
return link
canonical_name = canonicalize_name(package_name)
formats = pip.index.fmt_ctl_formats(format_control, canonical_name)
if "binary" not in formats:
return link
root = _cache_for_link(cache_dir, link)
try:
wheel_names = os.listdir(root)
except OSError as e:
if e.errno in {errno.ENOENT, errno.ENOTDIR}:
return link
raise
candidates = []
for wheel_name in wheel_names:
try:
wheel = Wheel(wheel_name)
except InvalidWheelFilename:
continue
if not wheel.supported():
# Built for a different python/arch/etc
continue
candidates.append((wheel.support_index_min(), wheel_name))
if not candidates:
return link
candidates.sort()
path = os.path.join(root, candidates[0][1])
return pip.index.Link(path_to_url(path))
def rehash(path, algo='sha256', blocksize=1 << 20):
"""Return (hash, length) for path using hashlib.new(algo)"""
h = hashlib.new(algo)
@ -872,7 +772,9 @@ class WheelBuilder(object):
python_tag = None
if autobuilding:
python_tag = pep425tags.implementation_tag
output_dir = _cache_for_link(self._cache_root, req.link)
output_dir = get_cache_path_for_link(
self._cache_root, req.link
)
try:
ensure_dir(output_dir)
except OSError as e:

12
tests/unit/test_cache.py Normal file
View File

@ -0,0 +1,12 @@
from pip.cache import WheelCache
class TestWheelCache:
def test_expands_path(self):
wc = WheelCache("~/.foo/", None)
assert wc._cache_dir == expanduser("~/.foo/")
def test_falsey_path_none(self):
wc = WheelCache(False, None)
assert wc._cache_dir is None

View File

@ -366,14 +366,3 @@ class TestWheelBuilder(object):
wb.build()
assert "due to already being wheel" in caplog.text
assert mock_build_one.mock_calls == []
class TestWheelCache:
def test_expands_path(self):
wc = wheel.WheelCache("~/.foo/", None)
assert wc._cache_dir == expanduser("~/.foo/")
def test_falsey_path_none(self):
wc = wheel.WheelCache(False, None)
assert wc._cache_dir is None