Refactor and make mypy happy

This commit is contained in:
Nguyễn Gia Phong 2020-06-19 13:59:00 +07:00
parent 4d33136241
commit 17a2984587
1 changed files with 175 additions and 75 deletions

250
lazip.py
View File

@ -14,17 +14,19 @@
# GNU Lesser General Public License for more details. # GNU Lesser General Public License for more details.
# #
# You should have received a copy of the GNU Lesser General Public License # You should have received a copy of the GNU Lesser General Public License
# along with palace. If not, see <https://www.gnu.org/licenses/>. # along with lazip. If not, see <https://www.gnu.org/licenses/>.
"""Lazy ZIP over HTTP""" """Lazy ZIP over HTTP"""
__version__ = '0.0.2' __version__ = '0.0.3'
__all__ = ['Lazip'] __all__ = ['Filazy', 'Lazip']
from abc import abstractmethod
from bisect import bisect_left, bisect_right from bisect import bisect_left, bisect_right
from contextlib import contextmanager from contextlib import contextmanager
from io import UnsupportedOperation
from tempfile import NamedTemporaryFile from tempfile import NamedTemporaryFile
from typing import Any, Dict, Iterator, List, Optional, Tuple from typing import IO, Dict, Iterator, List, Optional, Tuple
from zipfile import BadZipFile, ZipFile from zipfile import BadZipFile, ZipFile
from requests import Session from requests import Session
@ -41,45 +43,166 @@ def init_range(stop: int, size: int) -> Iterator[Tuple[int, int]]:
yield 0, stop-1 yield 0, stop-1
class Lazip: class ReadOnlyBinaryIOWrapper(IO[bytes]):
"""File-like object mapped to a ZIP file over HTTP. """Wrapper for a read-only binary I/O."""
This uses HTTP range requests to lazily fetch the file's content, file: IO[bytes]
which is supposed to be fed to ZipFile. length: int
"""
def __init__(self, session: Session, url: str, @property
chunk_size: int = CONTENT_CHUNK_SIZE) -> None: def mode(self) -> str:
head = session.head(url) """Opening mode, which is always w+b."""
head.raise_for_status() return self.file.mode
assert head.status_code == 200
self.session, self.url, self.chunk_size = session, url, chunk_size
self.length = int(head.headers['Content-Length'])
self.file = NamedTemporaryFile()
self.file.truncate(self.length)
self.left: List[int] = []
self.right: List[int] = []
self.check_zip('bytes' in head.headers.get('Accept-Ranges', 'none'))
def __enter__(self) -> 'Lazip':
self.file.__enter__()
return self
def __exit__(self, *exc: Any) -> Optional[bool]:
return self.file.__exit__(*exc)
@property @property
def name(self) -> str: def name(self) -> str:
"""File name.""" """File name."""
return self.file.name return self.file.name
def close(self) -> None:
"""Close the file."""
self.file.close()
@property
def closed(self) -> bool:
"""Whether the file is closed."""
return self.file.closed
def fileno(self) -> int:
"""Return the underlying file descriptor (an integer)."""
return self.file.fileno()
def flush(self) -> None:
"""Do nothing."""
self.file.flush()
def isatty(self) -> bool:
"""Return False."""
return self.file.isatty()
def read(self, size: int = -1) -> bytes:
"""Read up to size bytes from the object and return them.
As a convenience, if size is unspecified or -1,
all bytes until EOF are returned. Fewer than
size bytes may be returned if EOF is reached.
"""
start = self.tell()
stop = start + size if 0 <= size <= self.length-start else self.length
self.ensure(start, stop-1)
return self.file.read(size)
def readable(self) -> bool:
"""Return True."""
return self.file.readable()
def readline(self, limit):
raise UnsupportedOperation
def readlines(self, hint):
raise UnsupportedOperation
def seek(self, offset: int, whence: int = 0) -> int:
"""Change stream position and return the new absolute position.
Seek to offset relative position indicated by whence:
* 0: Start of stream (the default). pos should be >= 0;
* 1: Current position - pos may be negative;
* 2: End of stream - pos usually negative.
"""
return self.file.seek(offset, whence)
def seekable(self) -> bool: def seekable(self) -> bool:
"""Return whether random access is supported, which is True.""" """Return whether random access is supported, which is True."""
return True return self.file.seekable()
def tell(self) -> int:
"""Return the current possition."""
return self.file.tell()
def truncate(self, size: Optional[int] = None) -> int:
"""Resize the stream to the given size in bytes.
If size is unspecified resize to the current position.
The current stream position isn't changed.
Return the new file size.
"""
return self.file.truncate(size)
def writable(self) -> bool:
"""Return False."""
return False
def write(self, s):
raise UnsupportedOperation
def writelines(self, lines):
raise UnsupportedOperation
def __next__(self):
raise UnsupportedOperation
def __iter__(self):
raise UnsupportedOperation
def __enter__(self) -> 'ReadOnlyBinaryIOWrapper':
self.file.__enter__()
return self
def __exit__(self, *exc) -> Optional[bool]:
return self.file.__exit__(*exc)
@abstractmethod
def ensure(self, start: int, end: int) -> None:
"""Ensure the data from start to end inclusively.
This method must return to the original position
if seek is called.
"""
class Filazy(ReadOnlyBinaryIOWrapper):
"""Read-only file-like object mapped to a file over HTTP.
This uses HTTP range requests to lazily fetch the file's content.
At the end of initialization, __post_init__ will be called.
Parameters:
session (Session): Requests session
url (str): HTTP URL to the file
chunk_size (int): Download chunk size
Attributes:
session (Session): Requests session
url (str): HTTP URL to the file
chunk_size (int): Download chunk size
left (List[int]): Left endpoints of downloaded intervals
right (List[int]): Right endpoints of downloaded intervals
accept_ranges (bool): Whether range requests are supported
"""
def __init__(self, session: Session, url: str,
chunk_size: int = CONTENT_CHUNK_SIZE) -> None:
response = session.head(url)
response.raise_for_status()
assert response.status_code == 200
headers = response.headers
self.session, self.url, self.chunk_size = session, url, chunk_size
self.length = int(headers['Content-Length'])
self.file = NamedTemporaryFile()
self.truncate(self.length)
self.left: List[int] = []
self.right: List[int] = []
self.accept_ranges = 'bytes' in headers.get('Accept-Ranges', 'none')
with self.stay(): self.__post_init__()
def __post_init__(self) -> None:
pass
@contextmanager @contextmanager
def stay(self) -> Iterator[None]: def stay(self) -> Iterator[None]:
"""Return a context manager keeping the position. """Return a context manager that keeps the stream position.
At the end of the block, seek back to original position. At the end of the block, seek back to original position.
""" """
@ -89,23 +212,6 @@ class Lazip:
finally: finally:
self.seek(pos) self.seek(pos)
def check_zip(self, range_request: bool) -> None:
"""Check and download until the file is a valid ZIP."""
if not range_request:
end = self.length - 1
self.download(0, end)
self.left, self.right = [0], [end]
return
for start, end in init_range(self.length, self.chunk_size):
self.download(start, end)
with self.stay():
try:
ZipFile(self) # type: ignore
except BadZipFile:
pass
else:
break
def stream_response(self, start: int, end: int, def stream_response(self, start: int, end: int,
base_headers: Dict[str, str] = {}) -> Response: base_headers: Dict[str, str] = {}) -> Response:
"""Return HTTP response to a range request from start to end.""" """Return HTTP response to a range request from start to end."""
@ -116,7 +222,7 @@ class Lazip:
left: int, right: int) -> Iterator[Tuple[int, int]]: left: int, right: int) -> Iterator[Tuple[int, int]]:
"""Return an iterator of intervals to be fetched. """Return an iterator of intervals to be fetched.
Args: Parameters:
start (int): Start of needed interval start (int): Start of needed interval
end (int): End of needed interval end (int): End of needed interval
left (int): Index of first overlapping downloaded data left (int): Index of first overlapping downloaded data
@ -131,7 +237,7 @@ class Lazip:
if i <= end: yield i, end if i <= end: yield i, end
self.left[left:right], self.right[left:right] = [start], [end] self.left[left:right], self.right[left:right] = [start], [end]
def download(self, start: int, end: int) -> None: def ensure(self, start: int, end: int) -> None:
"""Download bytes from start to end inclusively.""" """Download bytes from start to end inclusively."""
with self.stay(): with self.stay():
i, j = bisect_left(self.right, start), bisect_right(self.left, end) i, j = bisect_left(self.right, start), bisect_right(self.left, end)
@ -143,32 +249,26 @@ class Lazip:
decode_content=False): decode_content=False):
self.file.write(chunk) self.file.write(chunk)
def read(self, size: int = -1) -> bytes:
"""Read up to size bytes from the object and return them.
As a convenience, if size is unspecified or -1, class Lazip(Filazy):
all bytes until EOF are returned. Fewer than """Read-only file-like object mapped to a ZIP file over HTTP.
size bytes may be returned if EOF is reached.
"""
start = self.tell()
stop = start + size if 0 <= size <= self.length-start else self.length
self.download(start, stop-1)
return self.file.read(size)
def seek(self, offset: int, whence: int = 0) -> int: This uses HTTP range requests to lazily fetch the file's content,
"""Change stream position and return the new absolute position. which is supposed to be fed to ZipFile.
"""
Seek to offset relative position indicated by whence: def __post_init__(self) -> None:
* 0: Start of stream (the default). pos should be >= 0; """Check and download until the file is a valid ZIP."""
* 1: Current position - pos may be negative; if not self.accept_ranges:
* 2: End of stream - pos usually negative. end = self.length - 1
""" self.ensure(0, end)
return self.file.seek(offset, whence) self.left, self.right = [0], [end]
return
def tell(self) -> int: for start, end in init_range(self.length, self.chunk_size):
"""Return the current possition.""" self.ensure(start, end)
return self.file.tell() try:
ZipFile(self)
def close(self) -> None: except BadZipFile:
"""Close the file.""" pass
self.file.close() else:
break