mirror of
https://github.com/HelloZeroNet/ZeroNet.git
synced 2023-12-14 04:33:03 +01:00
255 lines
8.8 KiB
Python
255 lines
8.8 KiB
Python
import os, re, shutil, json, time, sqlite3
|
|
import gevent.event
|
|
from Db import Db
|
|
|
|
class SiteStorage:
|
|
def __init__(self, site, allow_create=True):
|
|
self.site = site
|
|
self.directory = "data/%s" % self.site.address # Site data diretory
|
|
self.log = site.log
|
|
self.db = None # Db class
|
|
self.db_checked = False # Checked db tables since startup
|
|
self.event_db_busy = None # Gevent AsyncResult if db is working on rebuild
|
|
self.has_db = self.isFile("dbschema.json") # The site has schema
|
|
|
|
if not os.path.isdir(self.directory):
|
|
if allow_create:
|
|
os.mkdir(self.directory) # Create directory if not found
|
|
else:
|
|
raise Exception("Directory not exists: %s" % self.directory)
|
|
|
|
|
|
|
|
|
|
# Load db from dbschema.json
|
|
def openDb(self, check=True):
|
|
schema = self.loadJson("dbschema.json")
|
|
db_path = self.getPath(schema["db_file"])
|
|
if check:
|
|
if not os.path.isfile(db_path) or os.path.getsize(db_path) == 0: # Not exits or null
|
|
self.rebuildDb()
|
|
self.db = Db(schema, db_path)
|
|
if check and not self.db_checked:
|
|
changed_tables = self.db.checkTables()
|
|
if changed_tables: self.rebuildDb(delete_db=False) # Todo only update the changed table datas
|
|
|
|
|
|
def closeDb(self):
|
|
if self.db: self.db.close()
|
|
|
|
|
|
# Return db class
|
|
def getDb(self):
|
|
if not self.db and self.has_db:
|
|
self.openDb()
|
|
return self.db
|
|
|
|
|
|
# Rebuild sql cache
|
|
def rebuildDb(self, delete_db=True):
|
|
self.event_db_busy = gevent.event.AsyncResult()
|
|
schema = self.loadJson("dbschema.json")
|
|
db_path = self.getPath(schema["db_file"])
|
|
if os.path.isfile(db_path) and delete_db:
|
|
if self.db: self.db.close() # Close db if open
|
|
self.log.info("Deleting %s" % db_path)
|
|
try:
|
|
os.unlink(db_path)
|
|
except Exception, err:
|
|
self.log.error("Delete error: %s" % err)
|
|
self.openDb(check=False)
|
|
self.log.info("Creating tables...")
|
|
self.db.checkTables()
|
|
self.log.info("Importing data...")
|
|
cur = self.db.getCursor()
|
|
cur.execute("BEGIN")
|
|
cur.logging = False
|
|
found = 0
|
|
s = time.time()
|
|
for content_inner_path, content in self.site.content_manager.contents.items():
|
|
content_path = self.getPath(content_inner_path)
|
|
if os.path.isfile(content_path): # Missing content.json file
|
|
if self.db.loadJson(content_path, cur=cur): found += 1
|
|
else:
|
|
self.log.error("[MISSING] %s" % content_inner_path)
|
|
for file_relative_path in content["files"].keys():
|
|
if not file_relative_path.endswith(".json"): continue # We only interesed in json files
|
|
file_inner_path = self.site.content_manager.toDir(content_inner_path)+file_relative_path # Relative to content.json
|
|
file_inner_path = file_inner_path.strip("/") # Strip leading /
|
|
file_path = self.getPath(file_inner_path)
|
|
if os.path.isfile(file_path):
|
|
if self.db.loadJson(file_path, cur=cur): found += 1
|
|
else:
|
|
self.log.error("[MISSING] %s" % file_inner_path)
|
|
cur.execute("END")
|
|
self.log.info("Imported %s data file in %ss" % (found, time.time()-s))
|
|
self.event_db_busy.set(True) # Event done, notify waiters
|
|
self.event_db_busy = None # Clear event
|
|
|
|
|
|
# Execute sql query or rebuild on dberror
|
|
def query(self, query, params=None):
|
|
if self.event_db_busy: # Db not ready for queries
|
|
self.log.debug("Wating for db...")
|
|
self.event_db_busy.get() # Wait for event
|
|
try:
|
|
res = self.getDb().execute(query, params)
|
|
except sqlite3.DatabaseError, err:
|
|
if err.__class__.__name__ == "DatabaseError":
|
|
self.log.error("Database error: %s, query: %s, try to rebuilding it..." % (err, query))
|
|
self.rebuildDb()
|
|
res = self.db.cur.execute(query, params)
|
|
else:
|
|
raise err
|
|
return res
|
|
|
|
|
|
|
|
|
|
|
|
# Open file object
|
|
def open(self, inner_path, mode="rb"):
|
|
return open(self.getPath(inner_path), mode)
|
|
|
|
|
|
# Open file object
|
|
def read(self, inner_path, mode="r"):
|
|
return open(self.getPath(inner_path), mode).read()
|
|
|
|
|
|
# Write content to file
|
|
def write(self, inner_path, content):
|
|
file_path = self.getPath(inner_path)
|
|
# Write file
|
|
if hasattr(content, 'read'): # File-like object
|
|
file = open(file_path, "wb")
|
|
shutil.copyfileobj(content, file) # Write buff to disk
|
|
file.close()
|
|
else: # Simple string
|
|
open(file_path, "wb").write(content)
|
|
del content
|
|
|
|
# Update Sql cache
|
|
if inner_path == "dbschema.json":
|
|
self.has_db = self.isFile("dbschema.json")
|
|
self.getDb().checkTables() # Check if any if table schema changed
|
|
elif inner_path != "content.json" and inner_path.endswith(".json") and self.has_db: # Load json file to db
|
|
self.log.debug("Loading json file to db: %s" % inner_path)
|
|
self.getDb().loadJson(file_path)
|
|
|
|
|
|
# Load and parse json file
|
|
def loadJson(self, inner_path):
|
|
return json.load(self.open(inner_path))
|
|
|
|
|
|
# Get file size
|
|
def getSize(self, inner_path):
|
|
return os.path.getsize(self.getPath(inner_path))
|
|
|
|
|
|
# File exits
|
|
def isFile(self, inner_path):
|
|
return os.path.isfile(self.getPath(inner_path))
|
|
|
|
|
|
# Dir exits
|
|
def isDir(self, inner_path):
|
|
return os.path.isdir(self.getPath(inner_path))
|
|
|
|
|
|
# Sercurity check and return path of site's file
|
|
def getPath(self, inner_path):
|
|
inner_path = inner_path.replace("\\", "/") # Windows separator fix
|
|
inner_path = re.sub("^%s/" % re.escape(self.directory), "", inner_path) # Remove site directory if begins with it
|
|
file_path = self.directory+"/"+inner_path
|
|
allowed_dir = os.path.abspath(self.directory) # Only files within this directory allowed
|
|
if ".." in file_path or not os.path.dirname(os.path.abspath(file_path)).startswith(allowed_dir):
|
|
raise Exception("File not allowed: %s" % file_path)
|
|
return file_path
|
|
|
|
|
|
|
|
|
|
|
|
# Verify all files sha512sum using content.json
|
|
def verifyFiles(self, quick_check=False): # Fast = using file size
|
|
bad_files = []
|
|
if not self.site.content_manager.contents.get("content.json"): # No content.json, download it first
|
|
self.site.needFile("content.json", update=True) # Force update to fix corrupt file
|
|
self.site.content_manager.loadContent() # Reload content.json
|
|
for content_inner_path, content in self.site.content_manager.contents.items():
|
|
if not os.path.isfile(self.getPath(content_inner_path)): # Missing content.json file
|
|
self.log.error("[MISSING] %s" % content_inner_path)
|
|
bad_files.append(content_inner_path)
|
|
for file_relative_path in content["files"].keys():
|
|
file_inner_path = self.site.content_manager.toDir(content_inner_path)+file_relative_path # Relative to content.json
|
|
file_inner_path = file_inner_path.strip("/") # Strip leading /
|
|
file_path = self.getPath(file_inner_path)
|
|
if not os.path.isfile(file_path):
|
|
self.log.error("[MISSING] %s" % file_inner_path)
|
|
bad_files.append(file_inner_path)
|
|
continue
|
|
|
|
if quick_check:
|
|
ok = os.path.getsize(file_path) == content["files"][file_relative_path]["size"]
|
|
else:
|
|
ok = self.site.content_manager.verifyFile(file_inner_path, open(file_path, "rb"))
|
|
|
|
if not ok:
|
|
self.log.error("[ERROR] %s" % file_inner_path)
|
|
bad_files.append(file_inner_path)
|
|
self.log.debug("%s verified: %s files, quick_check: %s, bad files: %s" % (content_inner_path, len(content["files"]), quick_check, bad_files))
|
|
|
|
return bad_files
|
|
|
|
|
|
# Check and try to fix site files integrity
|
|
def checkFiles(self, quick_check=True):
|
|
self.log.debug("Checking files... Quick:%s" % quick_check)
|
|
bad_files = self.verifyFiles(quick_check)
|
|
if bad_files:
|
|
for bad_file in bad_files:
|
|
self.site.bad_files[bad_file] = self.site.bad_files.get("bad_file", 0)+1
|
|
|
|
|
|
# Delete site's all file
|
|
def deleteFiles(self):
|
|
if self.has_db:
|
|
self.log.debug("Deleting db file...")
|
|
self.closeDb()
|
|
try:
|
|
schema = self.loadJson("dbschema.json")
|
|
db_path = self.getPath(schema["db_file"])
|
|
if os.path.isfile(db_path): os.unlink(db_path)
|
|
except Exception, err:
|
|
self.log.error("Db file delete error: %s" % err)
|
|
|
|
|
|
self.log.debug("Deleting files from content.json...")
|
|
files = [] # Get filenames
|
|
for content_inner_path, content in self.site.content_manager.contents.items():
|
|
files.append(content_inner_path)
|
|
for file_relative_path in content["files"].keys():
|
|
file_inner_path = self.site.content_manager.toDir(content_inner_path)+file_relative_path # Relative to content.json
|
|
files.append(file_inner_path)
|
|
|
|
for inner_path in files:
|
|
path = self.getPath(inner_path)
|
|
if os.path.isfile(path): os.unlink(path)
|
|
|
|
self.log.debug("Deleting empty dirs...")
|
|
for root, dirs, files in os.walk(self.directory, topdown=False):
|
|
for dir in dirs:
|
|
path = os.path.join(root,dir)
|
|
if os.path.isdir(path) and os.listdir(path) == []:
|
|
os.removedirs(path)
|
|
self.log.debug("Removing %s" % path)
|
|
if os.path.isdir(self.directory) and os.listdir(self.directory) == []: os.removedirs(self.directory) # Remove sites directory if empty
|
|
|
|
if os.path.isdir(self.directory):
|
|
self.log.debug("Some unknown file remained in site data dir: %s..." % self.directory)
|
|
return False # Some files not deleted
|
|
else:
|
|
self.log.debug("Site data directory deleted: %s..." % self.directory)
|
|
return True # All clean
|