This commit is contained in:
Joey Ballentine 2023-11-29 11:50:53 -07:00 committed by GitHub
commit 658059a6d2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 116 additions and 2 deletions

View File

@ -855,6 +855,64 @@ We are using `freeze`_ here which outputs installed packages in requirements for
reqs = subprocess.check_output([sys.executable, '-m', 'pip', 'freeze'])
Since pip's progress bar gets hidden when running in a subprocess, you can use
the ``--progress-bar=json`` option for easily parsable progress information::
import subprocess
import sys
import json
python_path = sys.executable
process = subprocess.Popen(
[
python_path,
"-m",
"pip",
"install",
"numpy",
"opencv-python",
"scipy",
"--progress-bar=json",
],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
)
while True:
nextline = process.stdout.readline()
if nextline == b"" and process.poll() is not None:
break
line = nextline.decode("utf-8").strip()
if "Progress:" in line:
json_line = line.replace("Progress:", "").strip()
parsed = json.loads(json_line)
current, total = parsed["current"], parsed["total"]
if total is not None and total > 0:
percent = current / total * 100
print(f"Download at: {percent}%")
Which will give the following output after it processes each download chunk:
``Progress: {"current": ######, "total": ######}``
Here, ``Progress:`` indicates it is download progress. The rest of the message is JSON
with the ``current`` number of bytes downloaded and ``total`` .whl size as key/value pairs.
Note: ``total`` is may be null if the size of the package cannot be determined. This means
that it will either be null during the duration of the download, or not be null at all.
This can be used to build your own progress bar, or report progress in other ways.
In the code example above, we just print the current parsed status of the download.
This feature cannot be used unless pip is invoked in a subprocess.
NOTE: For this to work properly, you may need to run python with the ``-u`` flag
to ensure that the output is unbuffered.
NOTE: Relying on the exact form of pip's output is unsupported, and so should not be used in
production applications unless you are willing to adapt when pip's output changes.
If you don't want to use pip's command line functionality, but are rather
trying to implement code that works with Python packages, their metadata, or
PyPI, then you should consider other, supported, packages that offer this type

1
news/11508.feature.rst Normal file
View File

@ -0,0 +1 @@
Add a new progress_bar type that allows machine-readable (json) download progress

View File

@ -226,9 +226,12 @@ progress_bar: Callable[..., Option] = partial(
"--progress-bar",
dest="progress_bar",
type="choice",
choices=["on", "off"],
choices=["on", "off", "json"],
default="on",
help="Specify whether the progress bar should be used [on, off] (default: on)",
help=(
"Specify whether the progress bar should be used"
" [on, off, json] (default: on)"
),
)
log: Callable[..., Option] = partial(

View File

@ -1,4 +1,7 @@
import functools
import json
import logging
import sys
from typing import Callable, Generator, Iterable, Iterator, Optional, Tuple
from pip._vendor.rich.progress import (
@ -14,8 +17,11 @@ from pip._vendor.rich.progress import (
TransferSpeedColumn,
)
from pip._internal.cli.spinners import RateLimiter
from pip._internal.utils.logging import get_indentation
logger = logging.getLogger(__name__)
DownloadProgressRenderer = Callable[[Iterable[bytes]], Iterator[bytes]]
@ -55,6 +61,37 @@ def _rich_progress_bar(
progress.update(task_id, advance=len(chunk))
class _MachineReadableProgress:
def __init__(
self,
iterable: Iterable[bytes],
size: Optional[int],
# Copying the default from spinners.py
min_update_interval_seconds: float = 0.125,
) -> None:
self._iterable = iter(iterable)
self._size = size
self._progress = 0
self._rate_limiter = RateLimiter(min_update_interval_seconds)
def __iter__(self) -> Iterator[bytes]:
return self
def __next__(self) -> bytes:
chunk = next(self._iterable)
self._progress += len(chunk)
progress_info = {
"current": self._progress,
"total": self._size,
}
if not self._rate_limiter.ready():
return chunk
sys.stdout.write(f"Progress: {json.dumps(progress_info)}\n")
sys.stdout.flush()
self._rate_limiter.reset()
return chunk
def get_download_progress_renderer(
*, bar_type: str, size: Optional[int] = None
) -> DownloadProgressRenderer:
@ -64,5 +101,20 @@ def get_download_progress_renderer(
"""
if bar_type == "on":
return functools.partial(_rich_progress_bar, bar_type=bar_type, size=size)
elif bar_type == "json":
# We don't want regular users to use this progress_bar type
# so only use if not a TTY
if sys.stdout.isatty():
logger.warning(
"""Using json progress bar type outside a subprocess is not recommended.
Using normal progress bar instead."""
)
return functools.partial(_rich_progress_bar, bar_type="on", size=size)
# Mimic log level
if logger.getEffectiveLevel() <= logging.INFO:
return functools.partial(_MachineReadableProgress, size=size)
return iter
else:
return iter # no-op, when passed an iterator