Fix parallel pip cache downloads causing crash (#12364)

Co-authored-by: Itamar Turner-Trauring <itamar@pythonspeed.com>
This commit is contained in:
Itamar Turner-Trauring 2023-10-18 18:14:22 -04:00 committed by GitHub
parent 8a0f77c171
commit 5e7cc16c3b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 36 additions and 4 deletions

1
news/12361.bugfix.rst Normal file
View File

@ -0,0 +1 @@
Fix bug where installing the same package at the same time with multiple pip processes could fail.

View File

@ -33,6 +33,18 @@ class SafeFileCache(SeparateBodyBaseCache):
"""
A file based cache which is safe to use even when the target directory may
not be accessible or writable.
There is a race condition when two processes try to write and/or read the
same entry at the same time, since each entry consists of two separate
files (https://github.com/psf/cachecontrol/issues/324). We therefore have
additional logic that makes sure that both files to be present before
returning an entry; this fixes the read side of the race condition.
For the write side, we assume that the server will only ever return the
same data for the same URL, which ought to be the case for files pip is
downloading. PyPI does not have a mechanism to swap out a wheel for
another wheel, for example. If this assumption is not true, the
CacheControl issue will need to be fixed.
"""
def __init__(self, directory: str) -> None:
@ -49,9 +61,13 @@ class SafeFileCache(SeparateBodyBaseCache):
return os.path.join(self.directory, *parts)
def get(self, key: str) -> Optional[bytes]:
path = self._get_cache_path(key)
# The cache entry is only valid if both metadata and body exist.
metadata_path = self._get_cache_path(key)
body_path = metadata_path + ".body"
if not (os.path.exists(metadata_path) and os.path.exists(body_path)):
return None
with suppressed_cache_errors():
with open(path, "rb") as f:
with open(metadata_path, "rb") as f:
return f.read()
def _write(self, path: str, data: bytes) -> None:
@ -77,9 +93,13 @@ class SafeFileCache(SeparateBodyBaseCache):
os.remove(path + ".body")
def get_body(self, key: str) -> Optional[BinaryIO]:
path = self._get_cache_path(key) + ".body"
# The cache entry is only valid if both metadata and body exist.
metadata_path = self._get_cache_path(key)
body_path = metadata_path + ".body"
if not (os.path.exists(metadata_path) and os.path.exists(body_path)):
return None
with suppressed_cache_errors():
return open(path, "rb")
return open(body_path, "rb")
def set_body(self, key: str, body: bytes) -> None:
path = self._get_cache_path(key) + ".body"

View File

@ -27,6 +27,11 @@ class TestSafeFileCache:
cache = SafeFileCache(os.fspath(cache_tmpdir))
assert cache.get("test key") is None
cache.set("test key", b"a test string")
# Body hasn't been stored yet, so the entry isn't valid yet
assert cache.get("test key") is None
# With a body, the cache entry is valid:
cache.set_body("test key", b"body")
assert cache.get("test key") == b"a test string"
cache.delete("test key")
assert cache.get("test key") is None
@ -35,6 +40,12 @@ class TestSafeFileCache:
cache = SafeFileCache(os.fspath(cache_tmpdir))
assert cache.get_body("test key") is None
cache.set_body("test key", b"a test string")
# Metadata isn't available, so the entry isn't valid yet (this
# shouldn't happen, but just in case)
assert cache.get_body("test key") is None
# With metadata, the cache entry is valid:
cache.set("test key", b"metadata")
body = cache.get_body("test key")
assert body is not None
with body: