Refactor and make mypy happy
This commit is contained in:
parent
4d33136241
commit
17a2984587
250
lazip.py
250
lazip.py
|
@ -14,17 +14,19 @@
|
||||||
# GNU Lesser General Public License for more details.
|
# GNU Lesser General Public License for more details.
|
||||||
#
|
#
|
||||||
# You should have received a copy of the GNU Lesser General Public License
|
# You should have received a copy of the GNU Lesser General Public License
|
||||||
# along with palace. If not, see <https://www.gnu.org/licenses/>.
|
# along with lazip. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
"""Lazy ZIP over HTTP"""
|
"""Lazy ZIP over HTTP"""
|
||||||
|
|
||||||
__version__ = '0.0.2'
|
__version__ = '0.0.3'
|
||||||
__all__ = ['Lazip']
|
__all__ = ['Filazy', 'Lazip']
|
||||||
|
|
||||||
|
from abc import abstractmethod
|
||||||
from bisect import bisect_left, bisect_right
|
from bisect import bisect_left, bisect_right
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
|
from io import UnsupportedOperation
|
||||||
from tempfile import NamedTemporaryFile
|
from tempfile import NamedTemporaryFile
|
||||||
from typing import Any, Dict, Iterator, List, Optional, Tuple
|
from typing import IO, Dict, Iterator, List, Optional, Tuple
|
||||||
from zipfile import BadZipFile, ZipFile
|
from zipfile import BadZipFile, ZipFile
|
||||||
|
|
||||||
from requests import Session
|
from requests import Session
|
||||||
|
@ -41,45 +43,166 @@ def init_range(stop: int, size: int) -> Iterator[Tuple[int, int]]:
|
||||||
yield 0, stop-1
|
yield 0, stop-1
|
||||||
|
|
||||||
|
|
||||||
class Lazip:
|
class ReadOnlyBinaryIOWrapper(IO[bytes]):
|
||||||
"""File-like object mapped to a ZIP file over HTTP.
|
"""Wrapper for a read-only binary I/O."""
|
||||||
|
|
||||||
This uses HTTP range requests to lazily fetch the file's content,
|
file: IO[bytes]
|
||||||
which is supposed to be fed to ZipFile.
|
length: int
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, session: Session, url: str,
|
@property
|
||||||
chunk_size: int = CONTENT_CHUNK_SIZE) -> None:
|
def mode(self) -> str:
|
||||||
head = session.head(url)
|
"""Opening mode, which is always w+b."""
|
||||||
head.raise_for_status()
|
return self.file.mode
|
||||||
assert head.status_code == 200
|
|
||||||
self.session, self.url, self.chunk_size = session, url, chunk_size
|
|
||||||
self.length = int(head.headers['Content-Length'])
|
|
||||||
self.file = NamedTemporaryFile()
|
|
||||||
self.file.truncate(self.length)
|
|
||||||
self.left: List[int] = []
|
|
||||||
self.right: List[int] = []
|
|
||||||
self.check_zip('bytes' in head.headers.get('Accept-Ranges', 'none'))
|
|
||||||
|
|
||||||
def __enter__(self) -> 'Lazip':
|
|
||||||
self.file.__enter__()
|
|
||||||
return self
|
|
||||||
|
|
||||||
def __exit__(self, *exc: Any) -> Optional[bool]:
|
|
||||||
return self.file.__exit__(*exc)
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def name(self) -> str:
|
def name(self) -> str:
|
||||||
"""File name."""
|
"""File name."""
|
||||||
return self.file.name
|
return self.file.name
|
||||||
|
|
||||||
|
def close(self) -> None:
|
||||||
|
"""Close the file."""
|
||||||
|
self.file.close()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def closed(self) -> bool:
|
||||||
|
"""Whether the file is closed."""
|
||||||
|
return self.file.closed
|
||||||
|
|
||||||
|
def fileno(self) -> int:
|
||||||
|
"""Return the underlying file descriptor (an integer)."""
|
||||||
|
return self.file.fileno()
|
||||||
|
|
||||||
|
def flush(self) -> None:
|
||||||
|
"""Do nothing."""
|
||||||
|
self.file.flush()
|
||||||
|
|
||||||
|
def isatty(self) -> bool:
|
||||||
|
"""Return False."""
|
||||||
|
return self.file.isatty()
|
||||||
|
|
||||||
|
def read(self, size: int = -1) -> bytes:
|
||||||
|
"""Read up to size bytes from the object and return them.
|
||||||
|
|
||||||
|
As a convenience, if size is unspecified or -1,
|
||||||
|
all bytes until EOF are returned. Fewer than
|
||||||
|
size bytes may be returned if EOF is reached.
|
||||||
|
"""
|
||||||
|
start = self.tell()
|
||||||
|
stop = start + size if 0 <= size <= self.length-start else self.length
|
||||||
|
self.ensure(start, stop-1)
|
||||||
|
return self.file.read(size)
|
||||||
|
|
||||||
|
def readable(self) -> bool:
|
||||||
|
"""Return True."""
|
||||||
|
return self.file.readable()
|
||||||
|
|
||||||
|
def readline(self, limit):
|
||||||
|
raise UnsupportedOperation
|
||||||
|
|
||||||
|
def readlines(self, hint):
|
||||||
|
raise UnsupportedOperation
|
||||||
|
|
||||||
|
def seek(self, offset: int, whence: int = 0) -> int:
|
||||||
|
"""Change stream position and return the new absolute position.
|
||||||
|
|
||||||
|
Seek to offset relative position indicated by whence:
|
||||||
|
* 0: Start of stream (the default). pos should be >= 0;
|
||||||
|
* 1: Current position - pos may be negative;
|
||||||
|
* 2: End of stream - pos usually negative.
|
||||||
|
"""
|
||||||
|
return self.file.seek(offset, whence)
|
||||||
|
|
||||||
def seekable(self) -> bool:
|
def seekable(self) -> bool:
|
||||||
"""Return whether random access is supported, which is True."""
|
"""Return whether random access is supported, which is True."""
|
||||||
return True
|
return self.file.seekable()
|
||||||
|
|
||||||
|
def tell(self) -> int:
|
||||||
|
"""Return the current possition."""
|
||||||
|
return self.file.tell()
|
||||||
|
|
||||||
|
def truncate(self, size: Optional[int] = None) -> int:
|
||||||
|
"""Resize the stream to the given size in bytes.
|
||||||
|
|
||||||
|
If size is unspecified resize to the current position.
|
||||||
|
The current stream position isn't changed.
|
||||||
|
|
||||||
|
Return the new file size.
|
||||||
|
"""
|
||||||
|
return self.file.truncate(size)
|
||||||
|
|
||||||
|
def writable(self) -> bool:
|
||||||
|
"""Return False."""
|
||||||
|
return False
|
||||||
|
|
||||||
|
def write(self, s):
|
||||||
|
raise UnsupportedOperation
|
||||||
|
|
||||||
|
def writelines(self, lines):
|
||||||
|
raise UnsupportedOperation
|
||||||
|
|
||||||
|
def __next__(self):
|
||||||
|
raise UnsupportedOperation
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
raise UnsupportedOperation
|
||||||
|
|
||||||
|
def __enter__(self) -> 'ReadOnlyBinaryIOWrapper':
|
||||||
|
self.file.__enter__()
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, *exc) -> Optional[bool]:
|
||||||
|
return self.file.__exit__(*exc)
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def ensure(self, start: int, end: int) -> None:
|
||||||
|
"""Ensure the data from start to end inclusively.
|
||||||
|
|
||||||
|
This method must return to the original position
|
||||||
|
if seek is called.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class Filazy(ReadOnlyBinaryIOWrapper):
|
||||||
|
"""Read-only file-like object mapped to a file over HTTP.
|
||||||
|
|
||||||
|
This uses HTTP range requests to lazily fetch the file's content.
|
||||||
|
At the end of initialization, __post_init__ will be called.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
session (Session): Requests session
|
||||||
|
url (str): HTTP URL to the file
|
||||||
|
chunk_size (int): Download chunk size
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
session (Session): Requests session
|
||||||
|
url (str): HTTP URL to the file
|
||||||
|
chunk_size (int): Download chunk size
|
||||||
|
left (List[int]): Left endpoints of downloaded intervals
|
||||||
|
right (List[int]): Right endpoints of downloaded intervals
|
||||||
|
accept_ranges (bool): Whether range requests are supported
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, session: Session, url: str,
|
||||||
|
chunk_size: int = CONTENT_CHUNK_SIZE) -> None:
|
||||||
|
response = session.head(url)
|
||||||
|
response.raise_for_status()
|
||||||
|
assert response.status_code == 200
|
||||||
|
headers = response.headers
|
||||||
|
self.session, self.url, self.chunk_size = session, url, chunk_size
|
||||||
|
self.length = int(headers['Content-Length'])
|
||||||
|
self.file = NamedTemporaryFile()
|
||||||
|
self.truncate(self.length)
|
||||||
|
self.left: List[int] = []
|
||||||
|
self.right: List[int] = []
|
||||||
|
self.accept_ranges = 'bytes' in headers.get('Accept-Ranges', 'none')
|
||||||
|
with self.stay(): self.__post_init__()
|
||||||
|
|
||||||
|
def __post_init__(self) -> None:
|
||||||
|
pass
|
||||||
|
|
||||||
@contextmanager
|
@contextmanager
|
||||||
def stay(self) -> Iterator[None]:
|
def stay(self) -> Iterator[None]:
|
||||||
"""Return a context manager keeping the position.
|
"""Return a context manager that keeps the stream position.
|
||||||
|
|
||||||
At the end of the block, seek back to original position.
|
At the end of the block, seek back to original position.
|
||||||
"""
|
"""
|
||||||
|
@ -89,23 +212,6 @@ class Lazip:
|
||||||
finally:
|
finally:
|
||||||
self.seek(pos)
|
self.seek(pos)
|
||||||
|
|
||||||
def check_zip(self, range_request: bool) -> None:
|
|
||||||
"""Check and download until the file is a valid ZIP."""
|
|
||||||
if not range_request:
|
|
||||||
end = self.length - 1
|
|
||||||
self.download(0, end)
|
|
||||||
self.left, self.right = [0], [end]
|
|
||||||
return
|
|
||||||
for start, end in init_range(self.length, self.chunk_size):
|
|
||||||
self.download(start, end)
|
|
||||||
with self.stay():
|
|
||||||
try:
|
|
||||||
ZipFile(self) # type: ignore
|
|
||||||
except BadZipFile:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
break
|
|
||||||
|
|
||||||
def stream_response(self, start: int, end: int,
|
def stream_response(self, start: int, end: int,
|
||||||
base_headers: Dict[str, str] = {}) -> Response:
|
base_headers: Dict[str, str] = {}) -> Response:
|
||||||
"""Return HTTP response to a range request from start to end."""
|
"""Return HTTP response to a range request from start to end."""
|
||||||
|
@ -116,7 +222,7 @@ class Lazip:
|
||||||
left: int, right: int) -> Iterator[Tuple[int, int]]:
|
left: int, right: int) -> Iterator[Tuple[int, int]]:
|
||||||
"""Return an iterator of intervals to be fetched.
|
"""Return an iterator of intervals to be fetched.
|
||||||
|
|
||||||
Args:
|
Parameters:
|
||||||
start (int): Start of needed interval
|
start (int): Start of needed interval
|
||||||
end (int): End of needed interval
|
end (int): End of needed interval
|
||||||
left (int): Index of first overlapping downloaded data
|
left (int): Index of first overlapping downloaded data
|
||||||
|
@ -131,7 +237,7 @@ class Lazip:
|
||||||
if i <= end: yield i, end
|
if i <= end: yield i, end
|
||||||
self.left[left:right], self.right[left:right] = [start], [end]
|
self.left[left:right], self.right[left:right] = [start], [end]
|
||||||
|
|
||||||
def download(self, start: int, end: int) -> None:
|
def ensure(self, start: int, end: int) -> None:
|
||||||
"""Download bytes from start to end inclusively."""
|
"""Download bytes from start to end inclusively."""
|
||||||
with self.stay():
|
with self.stay():
|
||||||
i, j = bisect_left(self.right, start), bisect_right(self.left, end)
|
i, j = bisect_left(self.right, start), bisect_right(self.left, end)
|
||||||
|
@ -143,32 +249,26 @@ class Lazip:
|
||||||
decode_content=False):
|
decode_content=False):
|
||||||
self.file.write(chunk)
|
self.file.write(chunk)
|
||||||
|
|
||||||
def read(self, size: int = -1) -> bytes:
|
|
||||||
"""Read up to size bytes from the object and return them.
|
|
||||||
|
|
||||||
As a convenience, if size is unspecified or -1,
|
class Lazip(Filazy):
|
||||||
all bytes until EOF are returned. Fewer than
|
"""Read-only file-like object mapped to a ZIP file over HTTP.
|
||||||
size bytes may be returned if EOF is reached.
|
|
||||||
"""
|
|
||||||
start = self.tell()
|
|
||||||
stop = start + size if 0 <= size <= self.length-start else self.length
|
|
||||||
self.download(start, stop-1)
|
|
||||||
return self.file.read(size)
|
|
||||||
|
|
||||||
def seek(self, offset: int, whence: int = 0) -> int:
|
This uses HTTP range requests to lazily fetch the file's content,
|
||||||
"""Change stream position and return the new absolute position.
|
which is supposed to be fed to ZipFile.
|
||||||
|
"""
|
||||||
|
|
||||||
Seek to offset relative position indicated by whence:
|
def __post_init__(self) -> None:
|
||||||
* 0: Start of stream (the default). pos should be >= 0;
|
"""Check and download until the file is a valid ZIP."""
|
||||||
* 1: Current position - pos may be negative;
|
if not self.accept_ranges:
|
||||||
* 2: End of stream - pos usually negative.
|
end = self.length - 1
|
||||||
"""
|
self.ensure(0, end)
|
||||||
return self.file.seek(offset, whence)
|
self.left, self.right = [0], [end]
|
||||||
|
return
|
||||||
def tell(self) -> int:
|
for start, end in init_range(self.length, self.chunk_size):
|
||||||
"""Return the current possition."""
|
self.ensure(start, end)
|
||||||
return self.file.tell()
|
try:
|
||||||
|
ZipFile(self)
|
||||||
def close(self) -> None:
|
except BadZipFile:
|
||||||
"""Close the file."""
|
pass
|
||||||
self.file.close()
|
else:
|
||||||
|
break
|
||||||
|
|
Loading…
Reference in New Issue