diff --git a/news/12361.bugfix.rst b/news/12361.bugfix.rst new file mode 100644 index 000000000..59575e80b --- /dev/null +++ b/news/12361.bugfix.rst @@ -0,0 +1 @@ +Fix bug where installing the same package at the same time with multiple pip processes could fail. diff --git a/src/pip/_internal/network/cache.py b/src/pip/_internal/network/cache.py index a4d136205..4d0fb545d 100644 --- a/src/pip/_internal/network/cache.py +++ b/src/pip/_internal/network/cache.py @@ -33,6 +33,18 @@ class SafeFileCache(SeparateBodyBaseCache): """ A file based cache which is safe to use even when the target directory may not be accessible or writable. + + There is a race condition when two processes try to write and/or read the + same entry at the same time, since each entry consists of two separate + files (https://github.com/psf/cachecontrol/issues/324). We therefore have + additional logic that makes sure that both files to be present before + returning an entry; this fixes the read side of the race condition. + + For the write side, we assume that the server will only ever return the + same data for the same URL, which ought to be the case for files pip is + downloading. PyPI does not have a mechanism to swap out a wheel for + another wheel, for example. If this assumption is not true, the + CacheControl issue will need to be fixed. """ def __init__(self, directory: str) -> None: @@ -49,9 +61,13 @@ class SafeFileCache(SeparateBodyBaseCache): return os.path.join(self.directory, *parts) def get(self, key: str) -> Optional[bytes]: - path = self._get_cache_path(key) + # The cache entry is only valid if both metadata and body exist. + metadata_path = self._get_cache_path(key) + body_path = metadata_path + ".body" + if not (os.path.exists(metadata_path) and os.path.exists(body_path)): + return None with suppressed_cache_errors(): - with open(path, "rb") as f: + with open(metadata_path, "rb") as f: return f.read() def _write(self, path: str, data: bytes) -> None: @@ -77,9 +93,13 @@ class SafeFileCache(SeparateBodyBaseCache): os.remove(path + ".body") def get_body(self, key: str) -> Optional[BinaryIO]: - path = self._get_cache_path(key) + ".body" + # The cache entry is only valid if both metadata and body exist. + metadata_path = self._get_cache_path(key) + body_path = metadata_path + ".body" + if not (os.path.exists(metadata_path) and os.path.exists(body_path)): + return None with suppressed_cache_errors(): - return open(path, "rb") + return open(body_path, "rb") def set_body(self, key: str, body: bytes) -> None: path = self._get_cache_path(key) + ".body" diff --git a/tests/unit/test_network_cache.py b/tests/unit/test_network_cache.py index aa849f3b0..6a816b300 100644 --- a/tests/unit/test_network_cache.py +++ b/tests/unit/test_network_cache.py @@ -27,6 +27,11 @@ class TestSafeFileCache: cache = SafeFileCache(os.fspath(cache_tmpdir)) assert cache.get("test key") is None cache.set("test key", b"a test string") + # Body hasn't been stored yet, so the entry isn't valid yet + assert cache.get("test key") is None + + # With a body, the cache entry is valid: + cache.set_body("test key", b"body") assert cache.get("test key") == b"a test string" cache.delete("test key") assert cache.get("test key") is None @@ -35,6 +40,12 @@ class TestSafeFileCache: cache = SafeFileCache(os.fspath(cache_tmpdir)) assert cache.get_body("test key") is None cache.set_body("test key", b"a test string") + # Metadata isn't available, so the entry isn't valid yet (this + # shouldn't happen, but just in case) + assert cache.get_body("test key") is None + + # With metadata, the cache entry is valid: + cache.set("test key", b"metadata") body = cache.get_body("test key") assert body is not None with body: