Update jobs will now also check and save the cache in the db

This commit is contained in:
Théophile Diot 2023-04-21 11:00:18 +02:00
parent 63b1fb947a
commit 08f3d93ab5
No known key found for this signature in database
GPG Key ID: E752C80DB72BB014
12 changed files with 139 additions and 182 deletions

View File

@ -6,7 +6,6 @@ from os import _exit, getenv
from pathlib import Path
from re import IGNORECASE, compile as re_compile
from sys import exit as sys_exit, path as sys_path
from threading import Lock
from traceback import format_exc
from typing import Tuple
@ -84,7 +83,6 @@ try:
logger,
sqlalchemy_string=getenv("DATABASE_URI", None),
)
lock = Lock()
# Create directories if they don't exist
Path("/var/cache/bunkerweb/blacklist").mkdir(parents=True, exist_ok=True)
@ -108,7 +106,9 @@ try:
}
all_fresh = True
for kind in kinds_fresh:
if not is_cached_file(f"/var/cache/bunkerweb/blacklist/{kind}.list", "hour"):
if not is_cached_file(
f"/var/cache/bunkerweb/blacklist/{kind}.list", "hour", db
):
kinds_fresh[kind] = False
all_fresh = False
logger.info(
@ -172,7 +172,7 @@ try:
logger.info(f"Downloaded {i} bad {kind}")
# Check if file has changed
new_hash = file_hash(f"/var/tmp/bunkerweb/blacklist/{kind}.list")
old_hash = cache_hash(f"/var/cache/bunkerweb/blacklist/{kind}.list")
old_hash = cache_hash(f"/var/cache/bunkerweb/blacklist/{kind}.list", db)
if new_hash == old_hash:
logger.info(
f"New file {kind}.list is identical to cache file, reload is not needed",
@ -186,25 +186,12 @@ try:
f"/var/tmp/bunkerweb/blacklist/{kind}.list",
f"/var/cache/bunkerweb/blacklist/{kind}.list",
new_hash,
db,
)
if not cached:
logger.error(f"Error while caching blacklist : {err}")
status = 2
else:
# Update db
with lock:
err = db.update_job_cache(
"blacklist-download",
None,
f"{kind}.list",
content,
checksum=new_hash,
)
if err:
logger.warning(f"Couldn't update db cache: {err}")
status = 1
except:
status = 2
logger.error(

View File

@ -47,7 +47,6 @@ try:
logger,
sqlalchemy_string=getenv("DATABASE_URI", None),
)
lock = Lock()
# Create directory if it doesn't exist
Path("/var/cache/bunkerweb/bunkernet").mkdir(parents=True, exist_ok=True)
@ -64,7 +63,7 @@ try:
_exit(2)
# Don't go further if the cache is fresh
if is_cached_file("/var/cache/bunkerweb/bunkernet/ip.list", "day"):
if is_cached_file("/var/cache/bunkerweb/bunkernet/ip.list", "day", db):
logger.info(
"BunkerNet list is already in cache, skipping download...",
)
@ -111,7 +110,7 @@ try:
# Check if file has changed
new_hash = file_hash("/var/tmp/bunkerweb/bunkernet-ip.list")
old_hash = cache_hash("/var/cache/bunkerweb/bunkernet/ip.list")
old_hash = cache_hash("/var/cache/bunkerweb/bunkernet/ip.list", db)
if new_hash == old_hash:
logger.info(
"New file is identical to cache file, reload is not needed",
@ -123,24 +122,12 @@ try:
"/var/tmp/bunkerweb/bunkernet-ip.list",
"/var/cache/bunkerweb/bunkernet/ip.list",
new_hash,
db,
)
if not cached:
logger.error(f"Error while caching BunkerNet data : {err}")
_exit(2)
# Update db
with lock:
err = db.update_job_cache(
"bunkernet-data",
None,
"ip.list",
content,
checksum=new_hash,
)
if err:
logger.warning(f"Couldn't update db ip.list cache: {err}")
logger.info("Successfully saved BunkerNet data")
status = 1

View File

@ -81,30 +81,28 @@ def check_cert(cert_path, key_path, first_server: Optional[str] = None) -> bool:
if old_hash != key_hash:
copy(key_path, key_cache_path.replace(".hash", ""))
with open(key_path, "r") as f:
with lock:
err = db.update_job_cache(
"custom-cert",
first_server,
key_cache_path.replace(".hash", "").split("/")[-1],
f.read().encode("utf-8"),
checksum=key_hash,
)
with lock:
err = db.update_job_cache(
"custom-cert",
first_server,
key_cache_path.replace(".hash", "").split("/")[-1],
Path(key_path).read_bytes(),
checksum=key_hash,
)
if err:
logger.warning(
f"Couldn't update db cache for {key_path.replace('/', '_')}.hash: {err}"
)
with open(cert_path, "r") as f:
with lock:
err = db.update_job_cache(
"custom-cert",
first_server,
cert_cache_path.replace(".hash", "").split("/")[-1],
f.read().encode("utf-8"),
checksum=cert_hash,
)
with lock:
err = db.update_job_cache(
"custom-cert",
first_server,
cert_cache_path.replace(".hash", "").split("/")[-1],
Path(cert_path).read_bytes(),
checksum=cert_hash,
)
if err:
logger.warning(

View File

@ -6,7 +6,6 @@ from os import _exit, getenv
from pathlib import Path
from re import IGNORECASE, compile as re_compile
from sys import exit as sys_exit, path as sys_path
from threading import Lock
from traceback import format_exc
from typing import Tuple
@ -84,7 +83,6 @@ try:
logger,
sqlalchemy_string=getenv("DATABASE_URI", None),
)
lock = Lock()
# Create directories if they don't exist
Path("/var/cache/bunkerweb/greylist").mkdir(parents=True, exist_ok=True)
@ -103,7 +101,7 @@ try:
}
all_fresh = True
for kind in kinds_fresh:
if not is_cached_file(f"/var/cache/bunkerweb/greylist/{kind}.list", "hour"):
if not is_cached_file(f"/var/cache/bunkerweb/greylist/{kind}.list", "hour", db):
kinds_fresh[kind] = False
all_fresh = False
logger.info(
@ -156,7 +154,7 @@ try:
logger.info(f"Downloaded {i} grey {kind}")
# Check if file has changed
new_hash = file_hash(f"/var/tmp/bunkerweb/greylist/{kind}.list")
old_hash = cache_hash(f"/var/cache/bunkerweb/greylist/{kind}.list")
old_hash = cache_hash(f"/var/cache/bunkerweb/greylist/{kind}.list", db)
if new_hash == old_hash:
logger.info(
f"New file {kind}.list is identical to cache file, reload is not needed",
@ -170,25 +168,12 @@ try:
f"/var/tmp/bunkerweb/greylist/{kind}.list",
f"/var/cache/bunkerweb/greylist/{kind}.list",
new_hash,
db,
)
if not cached:
logger.error(f"Error while caching greylist : {err}")
status = 2
else:
# Update db
with lock:
err = db.update_job_cache(
"greylist-download",
None,
f"{kind}.list",
content,
checksum=new_hash,
)
if err:
logger.warning(f"Couldn't update db cache: {err}")
status = 1
except:
status = 2
logger.error(

View File

@ -5,7 +5,6 @@ from gzip import decompress
from os import _exit, getenv
from pathlib import Path
from sys import exit as sys_exit, path as sys_path
from threading import Lock
from traceback import format_exc
sys_path.extend(
@ -25,10 +24,14 @@ from jobs import cache_file, cache_hash, file_hash, is_cached_file
logger = setup_logger("JOBS.mmdb-asn", getenv("LOG_LEVEL", "INFO"))
status = 0
db = Database(
logger,
sqlalchemy_string=getenv("DATABASE_URI", None),
)
try:
# Don't go further if the cache is fresh
if is_cached_file("/var/cache/bunkerweb/asn.mmdb", "month"):
if is_cached_file("/var/cache/bunkerweb/asn.mmdb", "month", db):
logger.info("asn.mmdb is already in cache, skipping download...")
_exit(0)
@ -52,8 +55,7 @@ try:
# Decompress it
logger.info("Decompressing mmdb file ...")
file_content = decompress(file_content)
Path(f"/var/tmp/bunkerweb/asn.mmdb").write_bytes(file_content)
Path(f"/var/tmp/bunkerweb/asn.mmdb").write_bytes(decompress(file_content))
# Try to load it
logger.info("Checking if mmdb file is valid ...")
@ -62,7 +64,7 @@ try:
# Check if file has changed
new_hash = file_hash("/var/tmp/bunkerweb/asn.mmdb")
old_hash = cache_hash("/var/cache/bunkerweb/asn.mmdb")
old_hash = cache_hash("/var/cache/bunkerweb/asn.mmdb", db)
if new_hash == old_hash:
logger.info("New file is identical to cache file, reload is not needed")
_exit(0)
@ -70,27 +72,12 @@ try:
# Move it to cache folder
logger.info("Moving mmdb file to cache ...")
cached, err = cache_file(
"/var/tmp/bunkerweb/asn.mmdb", "/var/cache/bunkerweb/asn.mmdb", new_hash
"/var/tmp/bunkerweb/asn.mmdb", "/var/cache/bunkerweb/asn.mmdb", new_hash, db
)
if not cached:
logger.error(f"Error while caching mmdb file : {err}")
_exit(2)
db = Database(
logger,
sqlalchemy_string=getenv("DATABASE_URI", None),
)
lock = Lock()
# Update db
with lock:
err = db.update_job_cache(
"mmdb-asn", None, "asn.mmdb", file_content, checksum=new_hash
)
if err:
logger.warning(f"Couldn't update db cache: {err}")
# Success
logger.info(f"Downloaded new mmdb from {mmdb_url}")

View File

@ -5,7 +5,6 @@ from gzip import decompress
from os import _exit, getenv
from pathlib import Path
from sys import exit as sys_exit, path as sys_path
from threading import Lock
from traceback import format_exc
sys_path.extend(
@ -25,10 +24,14 @@ from jobs import cache_file, cache_hash, file_hash, is_cached_file
logger = setup_logger("JOBS.mmdb-country", getenv("LOG_LEVEL", "INFO"))
status = 0
db = Database(
logger,
sqlalchemy_string=getenv("DATABASE_URI", None),
)
try:
# Don't go further if the cache is fresh
if is_cached_file("/var/cache/bunkerweb/country.mmdb", "month"):
if is_cached_file("/var/cache/bunkerweb/country.mmdb", "month", db):
logger.info("country.mmdb is already in cache, skipping download...")
_exit(0)
@ -52,8 +55,7 @@ try:
# Decompress it
logger.info("Decompressing mmdb file ...")
file_content = decompress(file_content)
Path(f"/var/tmp/bunkerweb/country.mmdb").write_bytes(file_content)
Path(f"/var/tmp/bunkerweb/country.mmdb").write_bytes(decompress(file_content))
# Try to load it
logger.info("Checking if mmdb file is valid ...")
@ -62,7 +64,7 @@ try:
# Check if file has changed
new_hash = file_hash("/var/tmp/bunkerweb/country.mmdb")
old_hash = cache_hash("/var/cache/bunkerweb/country.mmdb")
old_hash = cache_hash("/var/cache/bunkerweb/country.mmdb", db)
if new_hash == old_hash:
logger.info("New file is identical to cache file, reload is not needed")
_exit(0)
@ -70,27 +72,15 @@ try:
# Move it to cache folder
logger.info("Moving mmdb file to cache ...")
cached, err = cache_file(
"/var/tmp/bunkerweb/country.mmdb", "/var/cache/bunkerweb/country.mmdb", new_hash
"/var/tmp/bunkerweb/country.mmdb",
"/var/cache/bunkerweb/country.mmdb",
new_hash,
db,
)
if not cached:
logger.error(f"Error while caching mmdb file : {err}")
_exit(2)
db = Database(
logger,
sqlalchemy_string=getenv("DATABASE_URI", None),
)
lock = Lock()
# Update db
with lock:
err = db.update_job_cache(
"mmdb-country", None, "country.mmdb", file_content, checksum=new_hash
)
if err:
logger.warning(f"Couldn't update db cache: {err}")
# Success
logger.info(f"Downloaded new mmdb from {mmdb_url}")

View File

@ -99,17 +99,15 @@ try:
)
if Path(f"/etc/letsencrypt/live/{first_server}/cert.pem").exists():
cert = Path(
f"/etc/letsencrypt/live/{first_server}/cert.pem"
).read_bytes()
# Update db
with lock:
err = db.update_job_cache(
"certbot-new",
first_server,
"cert.pem",
cert,
Path(
f"/etc/letsencrypt/live/{first_server}/cert.pem"
).read_bytes(),
)
if err:
@ -139,17 +137,15 @@ try:
)
if Path(f"/etc/letsencrypt/live/{first_server}/cert.pem").exists():
cert = Path(
f"/etc/letsencrypt/live/{first_server}/cert.pem"
).read_bytes()
# Update db
with lock:
err = db.update_job_cache(
"certbot-new",
first_server,
"cert.pem",
cert,
Path(
f"/etc/letsencrypt/live/{first_server}/cert.pem"
).read_bytes(),
)
if err:

View File

@ -67,8 +67,13 @@ try:
# Create directory if it doesn't exist
Path("/var/cache/bunkerweb/realip").mkdir(parents=True, exist_ok=True)
db = Database(
logger,
sqlalchemy_string=getenv("DATABASE_URI", None),
)
# Don't go further if the cache is fresh
if is_cached_file("/var/cache/bunkerweb/realip/combined.list", "hour"):
if is_cached_file("/var/cache/bunkerweb/realip/combined.list", "hour", db):
logger.info("RealIP list is already in cache, skipping download...")
_exit(0)
@ -106,7 +111,7 @@ try:
# Check if file has changed
new_hash = file_hash("/var/tmp/bunkerweb/realip-combined.list")
old_hash = cache_hash("/var/cache/bunkerweb/realip/combined.list")
old_hash = cache_hash("/var/cache/bunkerweb/realip/combined.list", db)
if new_hash == old_hash:
logger.info("New file is identical to cache file, reload is not needed")
_exit(0)
@ -116,30 +121,12 @@ try:
"/var/tmp/bunkerweb/realip-combined.list",
"/var/cache/bunkerweb/realip/combined.list",
new_hash,
db,
)
if not cached:
logger.error(f"Error while caching list : {err}")
_exit(2)
db = Database(
logger,
sqlalchemy_string=getenv("DATABASE_URI", None),
)
lock = Lock()
# Update db
with lock:
err = db.update_job_cache(
"realip-download",
None,
"combined.list",
content,
checksum=new_hash,
)
if err:
logger.warning(f"Couldn't update db cache: {err}")
logger.info(f"Downloaded {i} trusted IP/net")
status = 1

View File

@ -42,21 +42,23 @@ def generate_cert(first_server, days, subj):
return False, 2
# Update db
key_data = Path(f"/var/cache/bunkerweb/selfsigned/{first_server}.key").read_bytes()
with lock:
err = db.update_job_cache(
"self-signed", first_server, f"{first_server}.key", key_data
"self-signed",
first_server,
f"{first_server}.key",
Path(f"/var/cache/bunkerweb/selfsigned/{first_server}.key").read_bytes(),
)
if err:
logger.warning(f"Couldn't update db cache for {first_server}.key file: {err}")
pem_data = Path(f"/var/cache/bunkerweb/selfsigned/{first_server}.pem").read_bytes()
with lock:
err = db.update_job_cache(
"self-signed", first_server, f"{first_server}.pem", pem_data
"self-signed",
first_server,
f"{first_server}.pem",
Path(f"/var/cache/bunkerweb/selfsigned/{first_server}.pem").read_bytes(),
)
if err:

View File

@ -6,7 +6,6 @@ from os import _exit, getenv
from pathlib import Path
from re import IGNORECASE, compile as re_compile
from sys import exit as sys_exit, path as sys_path
from threading import Lock
from traceback import format_exc
from typing import Tuple
@ -80,6 +79,11 @@ try:
logger.info("Whitelist is not activated, skipping downloads...")
_exit(0)
db = Database(
logger,
sqlalchemy_string=getenv("DATABASE_URI", None),
)
# Create directories if they don't exist
Path("/var/cache/bunkerweb/whitelist").mkdir(parents=True, exist_ok=True)
Path("/var/tmp/bunkerweb/whitelist").mkdir(parents=True, exist_ok=True)
@ -97,7 +101,9 @@ try:
}
all_fresh = True
for kind in kinds_fresh:
if not is_cached_file(f"/var/cache/bunkerweb/whitelist/{kind}.list", "hour"):
if not is_cached_file(
f"/var/cache/bunkerweb/whitelist/{kind}.list", "hour", db
):
kinds_fresh[kind] = False
all_fresh = False
logger.info(
@ -150,7 +156,7 @@ try:
logger.info(f"Downloaded {i} good {kind}")
# Check if file has changed
new_hash = file_hash(f"/var/tmp/bunkerweb/whitelist/{kind}.list")
old_hash = cache_hash(f"/var/cache/bunkerweb/whitelist/{kind}.list")
old_hash = cache_hash(f"/var/cache/bunkerweb/whitelist/{kind}.list", db)
if new_hash == old_hash:
logger.info(
f"New file {kind}.list is identical to cache file, reload is not needed",
@ -164,30 +170,12 @@ try:
f"/var/tmp/bunkerweb/whitelist/{kind}.list",
f"/var/cache/bunkerweb/whitelist/{kind}.list",
new_hash,
db,
)
if not cached:
logger.error(f"Error while caching whitelist : {err}")
status = 2
else:
db = Database(
logger,
sqlalchemy_string=getenv("DATABASE_URI", None),
)
lock = Lock()
# Update db
with lock:
err = db.update_job_cache(
"whitelist-download",
None,
f"{kind}.list",
content,
checksum=new_hash,
)
if err:
logger.warning(f"Couldn't update db cache: {err}")
status = 1
except:
status = 2
logger.error(

View File

@ -1501,12 +1501,21 @@ class Database:
)
}
def get_job_cache_file(self, job_name: str, file_name: str) -> Optional[Any]:
def get_job_cache_file(
self, job_name: str, file_name: str, *, with_data: bool = True
) -> Optional[Any]:
"""Get job cache file."""
with self.__db_session() as session:
if with_data:
return (
session.query(Jobs_cache)
.with_entities(Jobs_cache.data)
.filter_by(job_name=job_name, file_name=file_name)
.first()
)
return (
session.query(Jobs_cache)
.with_entities(Jobs_cache.data)
.with_entities(Jobs_cache.last_update, Jobs_cache.checksum)
.filter_by(job_name=job_name, file_name=file_name)
.first()
)

View File

@ -1,11 +1,16 @@
from contextlib import suppress
from datetime import datetime
from hashlib import sha512
from inspect import getsourcefile
from json import dumps, loads
from pathlib import Path
from shutil import copy
from sys import _getframe
from threading import Lock
from traceback import format_exc
from typing import Optional, Tuple
lock = Lock()
"""
{
@ -15,13 +20,24 @@ from traceback import format_exc
"""
def is_cached_file(file, expire):
def is_cached_file(file: str, expire: str, db=None) -> bool:
is_cached = False
try:
if not Path(f"{file}.md").is_file():
return False
if not db:
return False
cached_file = db.get_job_cache_file(
getsourcefile(_getframe(1)).replace(".py", ""),
file.split("/")[-1],
with_data=False,
)
if not cached_file:
return False
cached_time = cached_file.last_update
else:
cached_time = loads(Path(f"{file}.md").read_text())["date"]
cached_time = loads(Path(f"{file}.md").read_text())["date"]
current_time = datetime.now().timestamp()
if current_time < cached_time:
return False
@ -37,7 +53,7 @@ def is_cached_file(file, expire):
return is_cached
def file_hash(file):
def file_hash(file: str) -> str:
_sha512 = sha512()
with open(file, "rb") as f:
while True:
@ -48,19 +64,44 @@ def file_hash(file):
return _sha512.hexdigest()
def cache_hash(cache):
def cache_hash(cache: str, db=None) -> Optional[str]:
with suppress(BaseException):
return loads(Path(f"{cache}.md").read_text())["checksum"]
return loads(Path(f"{cache}.md").read_text()).get("checksum", None)
if db:
cached_file = db.get_job_cache_file(
getsourcefile(_getframe(1)).replace(".py", ""),
cache.split("/")[-1],
with_data=False,
)
if cached_file:
return cached_file.checksum
return None
def cache_file(file, cache, _hash):
def cache_file(
file: str, cache: str, _hash: str, db=None, *, service_id: Optional[str] = None
) -> Tuple[bool, str]:
ret, err = True, "success"
try:
copy(file, cache)
content = Path(file).read_bytes()
Path(cache).write_bytes(content)
Path(file).unlink()
md = {"date": datetime.timestamp(datetime.now()), "checksum": _hash}
md = {"date": datetime.now().timestamp(), "checksum": _hash}
Path(f"{cache}.md").write_text(dumps(md))
if db:
with lock:
err = db.update_job_cache(
getsourcefile(_getframe(1)).replace(".py", ""),
service_id,
cache.split("/")[-1],
content,
checksum=_hash,
)
if err:
ret = False
except:
return False, f"exception :\n{format_exc()}"
return ret, err