scripts/pack: add zstd support

This commit is contained in:
Andrei Alexeyev 2021-03-28 14:43:02 +03:00
parent 6f5e0716eb
commit 91639d1c43
No known key found for this signature in database
GPG key ID: 72D26128040B9690
4 changed files with 113 additions and 16 deletions

3
.gitmodules vendored
View file

@ -13,3 +13,6 @@
path = external/basis_universal
url = https://github.com/taisei-project/basis_universal.git
branch = taisei
[submodule "external/python-zipfile-zstd"]
path = external/python-zipfile-zstd
url = https://github.com/taisei-project/python-zipfile-zstd

1
external/python-zipfile-zstd vendored Submodule

@ -0,0 +1 @@
Subproject commit 14c76000f499701e82f55bf9c7b6cc37836e9ec1

View file

@ -3,16 +3,24 @@
import os
import sys
import re
import zstandard
import zlib
import shutil
from datetime import (
datetime,
)
import zipfile_zstd
from zipfile import (
ZipFile,
ZipInfo,
ZIP_DEFLATED,
ZIP_STORED,
ZIP_ZSTANDARD,
ZSTANDARD_VERSION,
ZipFile,
ZipInfo,
compressor_names,
)
from pathlib import Path
@ -24,9 +32,85 @@ from taiseilib.common import (
write_depfile,
)
zstd_decompressor = zstandard.ZstdDecompressor()
def write_zst_file(zf, zst_path, arcname):
'''
Add a file pre-compressed with zstd to the archive
Of course zipfile doesn't support this use-case (because it sucks),
so abuse generous access to its internals to implement it here.
'''
log_file(zst_path, arcname, ZIP_ZSTANDARD)
zip64 = False
with zst_path.open('rb') as zst_file:
zst_size = zst_file.seek(0, 2)
zst_file.seek(0, 0)
zi = ZipInfo.from_file(str(zst_path), arcname=arcname)
zi.compress_type = ZIP_ZSTANDARD
zi.create_version = ZSTANDARD_VERSION
zi.extract_version = ZSTANDARD_VERSION
zi.compress_size = zst_size
if not zi.external_attr:
zi.external_attr = 0o600 << 16 # permissions: ?rw-------
# Unfortunately we must decompress it to compute crc32.
# We'll also compute file size from decompressed data instead of relying on frame headers.
zi.file_size = 0
zi.CRC = 0
for chunk in zstd_decompressor.read_to_iter(zst_file):
zi.file_size += len(chunk)
zi.CRC = zlib.crc32(chunk, zi.CRC)
if zf._seekable:
zf.fp.seek(zf.start_dir)
zi.header_offset = zf.fp.tell()
zf._writecheck(zi)
zf._didModify = True
zf.fp.write(zi.FileHeader(zip64))
zf._writing = True
try:
zst_file.seek(0, 0)
shutil.copyfileobj(zst_file, zf.fp)
assert zst_file.tell() == zi.compress_size
zf.filelist.append(zi)
zf.NameToInfo[zi.filename] = zi
zf.start_dir = zf.fp.tell()
finally:
zf._writing = False
def log_file(path, arcname, comp_type=None):
if str(arcname).endswith('/'):
prefix = 'dir'
else:
prefix = compressor_names.get(comp_type, '???')
print('% 12s' % prefix, '|', arcname, '<--', str(path))
def pack(args):
nocompress_file = args.directory / '.nocompress'
if 1:
comp_type = ZIP_ZSTANDARD
comp_level = 20
else:
comp_type = ZIP_DEFLATED
comp_level = 9
try:
nocompress = list(map(re.compile, filter(None, nocompress_file.read_text().strip().split('\n'))))
except FileNotFoundError:
@ -35,9 +119,11 @@ def pack(args):
zkwargs = {}
if (sys.version_info.major, sys.version_info.minor) >= (3, 7):
zkwargs['compresslevel'] = 9
zkwargs['compresslevel'] = comp_level
with ZipFile(str(args.output), 'w', ZIP_DEFLATED, **zkwargs) as zf:
dependencies = []
with ZipFile(str(args.output), 'w', comp_type, **zkwargs) as zf:
for path in sorted(args.directory.glob('**/*')):
if path.name[0] == '.' or any(path.match(x) for x in args.exclude):
continue
@ -48,23 +134,29 @@ def pack(args):
zi = ZipInfo(str(relpath) + "/", datetime.fromtimestamp(path.stat().st_mtime).timetuple())
zi.compress_type = ZIP_STORED
zi.external_attr = 0o40755 << 16 # drwxr-xr-x
log_file(path, zi.filename)
zf.writestr(zi, '')
else:
ctype = ZIP_DEFLATED
dependencies.append(path)
for pattern in nocompress:
if pattern.match(str(relpath)):
ctype = ZIP_STORED
break
if path.suffix == '.zst':
write_zst_file(zf, path, str(relpath.with_suffix('')))
else:
ctype = comp_type
zf.write(str(path), str(relpath), compress_type=ctype)
for pattern in nocompress:
if pattern.match(str(relpath)):
ctype = ZIP_STORED
break
if args.depfile is not None:
write_depfile(args.depfile, args.output,
[args.directory.resolve() / x for x in zf.namelist()] +
[str(Path(__file__).resolve())] +
list(filter(None, [nocompress_file]))
)
log_file(path, relpath, ctype)
zf.write(str(path), str(relpath), compress_type=ctype)
if args.depfile is not None:
if nocompress_file is not None:
dependencies.append(nocompress_file)
dependencies.append(Path(__file__).resolve())
write_depfile(args.depfile, args.output, dependencies)
def main(args):

1
scripts/zipfile_zstd Symbolic link
View file

@ -0,0 +1 @@
../external/python-zipfile-zstd/zipfile_zstd