diff --git a/lazip.py b/lazip.py
index eb8f87b..a96428b 100644
--- a/lazip.py
+++ b/lazip.py
@@ -14,17 +14,19 @@
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
-# along with palace. If not, see .
+# along with lazip. If not, see .
"""Lazy ZIP over HTTP"""
-__version__ = '0.0.2'
-__all__ = ['Lazip']
+__version__ = '0.0.3'
+__all__ = ['Filazy', 'Lazip']
+from abc import abstractmethod
from bisect import bisect_left, bisect_right
from contextlib import contextmanager
+from io import UnsupportedOperation
from tempfile import NamedTemporaryFile
-from typing import Any, Dict, Iterator, List, Optional, Tuple
+from typing import IO, Dict, Iterator, List, Optional, Tuple
from zipfile import BadZipFile, ZipFile
from requests import Session
@@ -41,45 +43,166 @@ def init_range(stop: int, size: int) -> Iterator[Tuple[int, int]]:
yield 0, stop-1
-class Lazip:
- """File-like object mapped to a ZIP file over HTTP.
+class ReadOnlyBinaryIOWrapper(IO[bytes]):
+ """Wrapper for a read-only binary I/O."""
- This uses HTTP range requests to lazily fetch the file's content,
- which is supposed to be fed to ZipFile.
- """
+ file: IO[bytes]
+ length: int
- def __init__(self, session: Session, url: str,
- chunk_size: int = CONTENT_CHUNK_SIZE) -> None:
- head = session.head(url)
- head.raise_for_status()
- assert head.status_code == 200
- self.session, self.url, self.chunk_size = session, url, chunk_size
- self.length = int(head.headers['Content-Length'])
- self.file = NamedTemporaryFile()
- self.file.truncate(self.length)
- self.left: List[int] = []
- self.right: List[int] = []
- self.check_zip('bytes' in head.headers.get('Accept-Ranges', 'none'))
-
- def __enter__(self) -> 'Lazip':
- self.file.__enter__()
- return self
-
- def __exit__(self, *exc: Any) -> Optional[bool]:
- return self.file.__exit__(*exc)
+ @property
+ def mode(self) -> str:
+ """Opening mode, which is always w+b."""
+ return self.file.mode
@property
def name(self) -> str:
"""File name."""
return self.file.name
+ def close(self) -> None:
+ """Close the file."""
+ self.file.close()
+
+ @property
+ def closed(self) -> bool:
+ """Whether the file is closed."""
+ return self.file.closed
+
+ def fileno(self) -> int:
+ """Return the underlying file descriptor (an integer)."""
+ return self.file.fileno()
+
+ def flush(self) -> None:
+ """Do nothing."""
+ self.file.flush()
+
+ def isatty(self) -> bool:
+ """Return False."""
+ return self.file.isatty()
+
+ def read(self, size: int = -1) -> bytes:
+ """Read up to size bytes from the object and return them.
+
+ As a convenience, if size is unspecified or -1,
+ all bytes until EOF are returned. Fewer than
+ size bytes may be returned if EOF is reached.
+ """
+ start = self.tell()
+ stop = start + size if 0 <= size <= self.length-start else self.length
+ self.ensure(start, stop-1)
+ return self.file.read(size)
+
+ def readable(self) -> bool:
+ """Return True."""
+ return self.file.readable()
+
+ def readline(self, limit):
+ raise UnsupportedOperation
+
+ def readlines(self, hint):
+ raise UnsupportedOperation
+
+ def seek(self, offset: int, whence: int = 0) -> int:
+ """Change stream position and return the new absolute position.
+
+ Seek to offset relative position indicated by whence:
+ * 0: Start of stream (the default). pos should be >= 0;
+ * 1: Current position - pos may be negative;
+ * 2: End of stream - pos usually negative.
+ """
+ return self.file.seek(offset, whence)
+
def seekable(self) -> bool:
"""Return whether random access is supported, which is True."""
- return True
+ return self.file.seekable()
+
+ def tell(self) -> int:
+ """Return the current possition."""
+ return self.file.tell()
+
+ def truncate(self, size: Optional[int] = None) -> int:
+ """Resize the stream to the given size in bytes.
+
+ If size is unspecified resize to the current position.
+ The current stream position isn't changed.
+
+ Return the new file size.
+ """
+ return self.file.truncate(size)
+
+ def writable(self) -> bool:
+ """Return False."""
+ return False
+
+ def write(self, s):
+ raise UnsupportedOperation
+
+ def writelines(self, lines):
+ raise UnsupportedOperation
+
+ def __next__(self):
+ raise UnsupportedOperation
+
+ def __iter__(self):
+ raise UnsupportedOperation
+
+ def __enter__(self) -> 'ReadOnlyBinaryIOWrapper':
+ self.file.__enter__()
+ return self
+
+ def __exit__(self, *exc) -> Optional[bool]:
+ return self.file.__exit__(*exc)
+
+ @abstractmethod
+ def ensure(self, start: int, end: int) -> None:
+ """Ensure the data from start to end inclusively.
+
+ This method must return to the original position
+ if seek is called.
+ """
+
+
+class Filazy(ReadOnlyBinaryIOWrapper):
+ """Read-only file-like object mapped to a file over HTTP.
+
+ This uses HTTP range requests to lazily fetch the file's content.
+ At the end of initialization, __post_init__ will be called.
+
+ Parameters:
+ session (Session): Requests session
+ url (str): HTTP URL to the file
+ chunk_size (int): Download chunk size
+
+ Attributes:
+ session (Session): Requests session
+ url (str): HTTP URL to the file
+ chunk_size (int): Download chunk size
+ left (List[int]): Left endpoints of downloaded intervals
+ right (List[int]): Right endpoints of downloaded intervals
+ accept_ranges (bool): Whether range requests are supported
+ """
+
+ def __init__(self, session: Session, url: str,
+ chunk_size: int = CONTENT_CHUNK_SIZE) -> None:
+ response = session.head(url)
+ response.raise_for_status()
+ assert response.status_code == 200
+ headers = response.headers
+ self.session, self.url, self.chunk_size = session, url, chunk_size
+ self.length = int(headers['Content-Length'])
+ self.file = NamedTemporaryFile()
+ self.truncate(self.length)
+ self.left: List[int] = []
+ self.right: List[int] = []
+ self.accept_ranges = 'bytes' in headers.get('Accept-Ranges', 'none')
+ with self.stay(): self.__post_init__()
+
+ def __post_init__(self) -> None:
+ pass
@contextmanager
def stay(self) -> Iterator[None]:
- """Return a context manager keeping the position.
+ """Return a context manager that keeps the stream position.
At the end of the block, seek back to original position.
"""
@@ -89,23 +212,6 @@ class Lazip:
finally:
self.seek(pos)
- def check_zip(self, range_request: bool) -> None:
- """Check and download until the file is a valid ZIP."""
- if not range_request:
- end = self.length - 1
- self.download(0, end)
- self.left, self.right = [0], [end]
- return
- for start, end in init_range(self.length, self.chunk_size):
- self.download(start, end)
- with self.stay():
- try:
- ZipFile(self) # type: ignore
- except BadZipFile:
- pass
- else:
- break
-
def stream_response(self, start: int, end: int,
base_headers: Dict[str, str] = {}) -> Response:
"""Return HTTP response to a range request from start to end."""
@@ -116,7 +222,7 @@ class Lazip:
left: int, right: int) -> Iterator[Tuple[int, int]]:
"""Return an iterator of intervals to be fetched.
- Args:
+ Parameters:
start (int): Start of needed interval
end (int): End of needed interval
left (int): Index of first overlapping downloaded data
@@ -131,7 +237,7 @@ class Lazip:
if i <= end: yield i, end
self.left[left:right], self.right[left:right] = [start], [end]
- def download(self, start: int, end: int) -> None:
+ def ensure(self, start: int, end: int) -> None:
"""Download bytes from start to end inclusively."""
with self.stay():
i, j = bisect_left(self.right, start), bisect_right(self.left, end)
@@ -143,32 +249,26 @@ class Lazip:
decode_content=False):
self.file.write(chunk)
- def read(self, size: int = -1) -> bytes:
- """Read up to size bytes from the object and return them.
- As a convenience, if size is unspecified or -1,
- all bytes until EOF are returned. Fewer than
- size bytes may be returned if EOF is reached.
- """
- start = self.tell()
- stop = start + size if 0 <= size <= self.length-start else self.length
- self.download(start, stop-1)
- return self.file.read(size)
+class Lazip(Filazy):
+ """Read-only file-like object mapped to a ZIP file over HTTP.
- def seek(self, offset: int, whence: int = 0) -> int:
- """Change stream position and return the new absolute position.
+ This uses HTTP range requests to lazily fetch the file's content,
+ which is supposed to be fed to ZipFile.
+ """
- Seek to offset relative position indicated by whence:
- * 0: Start of stream (the default). pos should be >= 0;
- * 1: Current position - pos may be negative;
- * 2: End of stream - pos usually negative.
- """
- return self.file.seek(offset, whence)
-
- def tell(self) -> int:
- """Return the current possition."""
- return self.file.tell()
-
- def close(self) -> None:
- """Close the file."""
- self.file.close()
+ def __post_init__(self) -> None:
+ """Check and download until the file is a valid ZIP."""
+ if not self.accept_ranges:
+ end = self.length - 1
+ self.ensure(0, end)
+ self.left, self.right = [0], [end]
+ return
+ for start, end in init_range(self.length, self.chunk_size):
+ self.ensure(start, end)
+ try:
+ ZipFile(self)
+ except BadZipFile:
+ pass
+ else:
+ break