Update mmdb download to check the checksum at start

This commit is contained in:
Théophile Diot 2023-05-16 18:52:29 -04:00
parent 4378f18cc8
commit b58798746d
No known key found for this signature in database
GPG Key ID: E752C80DB72BB014
5 changed files with 144 additions and 95 deletions

View File

@ -2,6 +2,7 @@
from datetime import date
from gzip import decompress
from hashlib import sha1
from os import _exit, getenv
from pathlib import Path
from sys import exit as sys_exit, path as sys_path
@ -24,62 +25,91 @@ from jobs import cache_file, cache_hash, file_hash, is_cached_file
logger = setup_logger("JOBS.mmdb-asn", getenv("LOG_LEVEL", "INFO"))
status = 0
db = Database(
logger,
sqlalchemy_string=getenv("DATABASE_URI", None),
)
try:
# Don't go further if the cache is fresh
if is_cached_file("/var/cache/bunkerweb/asn.mmdb", "month", db):
logger.info("asn.mmdb is already in cache, skipping download...")
_exit(0)
dl_mmdb = True
tmp_path = "/var/tmp/bunkerweb/asn.mmdb"
new_hash = None
# Compute the mmdb URL
mmdb_url = f"https://download.db-ip.com/free/dbip-asn-lite-{date.today().strftime('%Y-%m')}.mmdb.gz"
# Don't go further if the cache match the latest version
if Path("/var/tmp/bunkerweb/asn.mmdb").exists():
response = get("https://db-ip.com/db/download/ip-to-asn-lite")
# Download the mmdb file and save it to tmp
logger.info(f"Downloading mmdb file from url {mmdb_url} ...")
file_content = b""
with get(mmdb_url, stream=True) as resp:
resp.raise_for_status()
for chunk in resp.iter_content(chunk_size=4 * 1024):
if chunk:
file_content += chunk
if response.status_code == 200:
_sha1 = sha1()
with open("/var/tmp/bunkerweb/asn.mmdb", "rb") as f:
while True:
data = f.read(1024)
if not data:
break
_sha1.update(data)
try:
assert file_content
except AssertionError:
logger.error(f"Error while downloading mmdb file from {mmdb_url}")
_exit(2)
if response.content.decode().find(_sha1.hexdigest()) != -1:
logger.info(
"asn.mmdb is already the latest version, skipping download..."
)
dl_mmdb = False
tmp_path = "/var/tmp/bunkerweb/asn.mmdb"
else:
logger.warning(
"Unable to check if asn.mmdb is the latest version, downloading it anyway..."
)
# Decompress it
logger.info("Decompressing mmdb file ...")
Path(f"/var/tmp/bunkerweb/asn.mmdb").write_bytes(decompress(file_content))
db = Database(
logger,
sqlalchemy_string=getenv("DATABASE_URI", None),
)
if dl_mmdb:
# Don't go further if the cache is fresh
if is_cached_file("/var/cache/bunkerweb/asn.mmdb", "month", db):
logger.info("asn.mmdb is already in cache, skipping download...")
_exit(0)
# Compute the mmdb URL
mmdb_url = f"https://download.db-ip.com/free/dbip-asn-lite-{date.today().strftime('%Y-%m')}.mmdb.gz"
# Download the mmdb file and save it to tmp
logger.info(f"Downloading mmdb file from url {mmdb_url} ...")
file_content = b""
with get(mmdb_url, stream=True) as resp:
resp.raise_for_status()
for chunk in resp.iter_content(chunk_size=4 * 1024):
if chunk:
file_content += chunk
try:
assert file_content
except AssertionError:
logger.error(f"Error while downloading mmdb file from {mmdb_url}")
_exit(2)
# Decompress it
logger.info("Decompressing mmdb file ...")
Path(tmp_path).write_bytes(decompress(file_content))
# Check if file has changed
new_hash = file_hash(tmp_path)
old_hash = cache_hash("/var/cache/bunkerweb/asn.mmdb", db)
if new_hash == old_hash:
logger.info("New file is identical to cache file, reload is not needed")
_exit(0)
# Try to load it
logger.info("Checking if mmdb file is valid ...")
with open_database("/var/tmp/bunkerweb/asn.mmdb") as reader:
with open_database(tmp_path or "/var/cache/bunkerweb/asn.mmdb") as reader:
pass
# Check if file has changed
new_hash = file_hash("/var/tmp/bunkerweb/asn.mmdb")
old_hash = cache_hash("/var/cache/bunkerweb/asn.mmdb", db)
if new_hash == old_hash:
logger.info("New file is identical to cache file, reload is not needed")
_exit(0)
# Move it to cache folder
logger.info("Moving mmdb file to cache ...")
cached, err = cache_file(
"/var/tmp/bunkerweb/asn.mmdb", "/var/cache/bunkerweb/asn.mmdb", new_hash, db
)
cached, err = cache_file(tmp_path, "/var/cache/bunkerweb/asn.mmdb", new_hash, db)
if not cached:
logger.error(f"Error while caching mmdb file : {err}")
_exit(2)
# Success
logger.info(f"Downloaded new mmdb from {mmdb_url}")
if dl_mmdb:
logger.info(f"Downloaded new mmdb from {mmdb_url}")
status = 1

View File

@ -2,6 +2,7 @@
from datetime import date
from gzip import decompress
from hashlib import sha1
from os import _exit, getenv
from pathlib import Path
from sys import exit as sys_exit, path as sys_path
@ -26,86 +27,91 @@ logger = setup_logger("JOBS.mmdb-country", getenv("LOG_LEVEL", "INFO"))
status = 0
try:
# Only download mmdb if the country blacklist or whitelist is enabled
dl_mmdb = False
# Multisite case
if getenv("MULTISITE", "no") == "yes":
for first_server in getenv("SERVER_NAME", "").split(" "):
if getenv(
f"{first_server}_BLACKLIST_COUNTRY", getenv("BLACKLIST_COUNTRY")
) or getenv(
f"{first_server}_WHITELIST_COUNTRY", getenv("WHITELIST_COUNTRY")
):
dl_mmdb = True
break
# Singlesite case
elif getenv("BLACKLIST_COUNTRY") or getenv("WHITELIST_COUNTRY"):
dl_mmdb = True
dl_mmdb = True
tmp_path = "/var/tmp/bunkerweb/country.mmdb"
new_hash = None
if not dl_mmdb:
logger.info(
"Country blacklist or whitelist is not enabled, skipping download..."
)
_exit(0)
# Don't go further if the cache match the latest version
if Path("/var/tmp/bunkerweb/country.mmdb").exists():
response = get("https://db-ip.com/db/download/ip-to-country-lite")
if response.status_code == 200:
_sha1 = sha1()
with open("/var/tmp/bunkerweb/country.mmdb", "rb") as f:
while True:
data = f.read(1024)
if not data:
break
_sha1.update(data)
if response.content.decode().find(_sha1.hexdigest()) != -1:
logger.info(
"country.mmdb is already the latest version, skipping download..."
)
dl_mmdb = False
tmp_path = "/var/tmp/bunkerweb/country.mmdb"
else:
logger.warning(
"Unable to check if country.mmdb is the latest version, downloading it anyway..."
)
db = Database(
logger,
sqlalchemy_string=getenv("DATABASE_URI", None),
)
# Don't go further if the cache is fresh
if is_cached_file("/var/cache/bunkerweb/country.mmdb", "month", db):
logger.info("country.mmdb is already in cache, skipping download...")
_exit(0)
if dl_mmdb:
# Don't go further if the cache is fresh
if is_cached_file("/var/cache/bunkerweb/country.mmdb", "month", db):
logger.info("country.mmdb is already in cache, skipping download...")
_exit(0)
# Compute the mmdb URL
mmdb_url = f"https://download.db-ip.com/free/dbip-country-lite-{date.today().strftime('%Y-%m')}.mmdb.gz"
# Compute the mmdb URL
mmdb_url = f"https://download.db-ip.com/free/dbip-country-lite-{date.today().strftime('%Y-%m')}.mmdb.gz"
# Download the mmdb file and save it to tmp
logger.info(f"Downloading mmdb file from url {mmdb_url} ...")
file_content = b""
with get(mmdb_url, stream=True) as resp:
resp.raise_for_status()
for chunk in resp.iter_content(chunk_size=4 * 1024):
if chunk:
file_content += chunk
# Download the mmdb file and save it to tmp
logger.info(f"Downloading mmdb file from url {mmdb_url} ...")
file_content = b""
with get(mmdb_url, stream=True) as resp:
resp.raise_for_status()
for chunk in resp.iter_content(chunk_size=4 * 1024):
if chunk:
file_content += chunk
try:
assert file_content
except AssertionError:
logger.error(f"Error while downloading mmdb file from {mmdb_url}")
_exit(2)
try:
assert file_content
except AssertionError:
logger.error(f"Error while downloading mmdb file from {mmdb_url}")
_exit(2)
# Decompress it
logger.info("Decompressing mmdb file ...")
Path(f"/var/tmp/bunkerweb/country.mmdb").write_bytes(decompress(file_content))
# Decompress it
logger.info("Decompressing mmdb file ...")
Path(tmp_path).write_bytes(decompress(file_content))
# Check if file has changed
new_hash = file_hash(tmp_path)
old_hash = cache_hash("/var/cache/bunkerweb/country.mmdb", db)
if new_hash == old_hash:
logger.info("New file is identical to cache file, reload is not needed")
_exit(0)
# Try to load it
logger.info("Checking if mmdb file is valid ...")
with open_database("/var/tmp/bunkerweb/country.mmdb") as reader:
with open_database(tmp_path or "/var/cache/bunkerweb/country.mmdb") as reader:
pass
# Check if file has changed
new_hash = file_hash("/var/tmp/bunkerweb/country.mmdb")
old_hash = cache_hash("/var/cache/bunkerweb/country.mmdb", db)
if new_hash == old_hash:
logger.info("New file is identical to cache file, reload is not needed")
_exit(0)
# Move it to cache folder
logger.info("Moving mmdb file to cache ...")
cached, err = cache_file(
"/var/tmp/bunkerweb/country.mmdb",
"/var/cache/bunkerweb/country.mmdb",
new_hash,
db,
tmp_path, "/var/cache/bunkerweb/country.mmdb", new_hash, db
)
if not cached:
logger.error(f"Error while caching mmdb file : {err}")
_exit(2)
# Success
logger.info(f"Downloaded new mmdb from {mmdb_url}")
if dl_mmdb:
logger.info(f"Downloaded new mmdb from {mmdb_url}")
status = 1

View File

@ -135,7 +135,12 @@ def cache_hash(cache: str, db=None) -> Optional[str]:
def cache_file(
file: str, cache: str, _hash: str, db=None, *, service_id: Optional[str] = None
file: str,
cache: str,
_hash: Optional[str],
db=None,
*,
service_id: Optional[str] = None,
) -> Tuple[bool, str]:
ret, err = True, "success"
try:
@ -143,6 +148,9 @@ def cache_file(
Path(cache).write_bytes(content)
Path(file).unlink()
if not _hash:
_hash = file_hash(cache)
if db:
with lock:
err = db.update_job_cache(

View File

@ -68,6 +68,11 @@ RUN apk add --no-cache bash libgcc libstdc++ openssl && \
chmod 660 /usr/share/bunkerweb/INTEGRATION && \
chown root:scheduler /usr/share/bunkerweb/INTEGRATION
COPY --chown=root:scheduler src/bw/misc/asn.mmdb /var/tmp/bunkerweb/asn.mmdb
COPY --chown=root:scheduler src/bw/misc/country.mmdb /var/tmp/bunkerweb/country.mmdb
RUN chmod 770 /var/tmp/bunkerweb/asn.mmdb /var/tmp/bunkerweb/country.mmdb
# Fix CVEs
RUN apk add "libcrypto3>=3.0.8-r4" "libssl3>=3.0.8-r4"

View File

@ -1,3 +1,3 @@
schedule==1.2.0
certbot==2.6.0
maxminddb==2.3.0
maxminddb==2.3.0