ZeroNet/src/Site/SiteStorage.py

292 lines
10 KiB
Python

import os, re, shutil, json, time, sqlite3
import gevent.event
from Db import Db
from Debug import Debug
from Config import config
class SiteStorage:
def __init__(self, site, allow_create=True):
self.site = site
self.directory = "%s/%s" % (config.data_dir, self.site.address) # Site data diretory
self.log = site.log
self.db = None # Db class
self.db_checked = False # Checked db tables since startup
self.event_db_busy = None # Gevent AsyncResult if db is working on rebuild
self.has_db = self.isFile("dbschema.json") # The site has schema
if not os.path.isdir(self.directory):
if allow_create:
os.mkdir(self.directory) # Create directory if not found
else:
raise Exception("Directory not exists: %s" % self.directory)
# Load db from dbschema.json
def openDb(self, check=True):
schema = self.loadJson("dbschema.json")
db_path = self.getPath(schema["db_file"])
if check:
if not os.path.isfile(db_path) or os.path.getsize(db_path) == 0: # Not exits or null
self.rebuildDb()
self.db = Db(schema, db_path)
if check and not self.db_checked:
changed_tables = self.db.checkTables()
if changed_tables: self.rebuildDb(delete_db=False) # Todo only update the changed table datas
def closeDb(self):
if self.db: self.db.close()
self.event_db_busy = None
self.db = None
# Return db class
def getDb(self):
if not self.db:
self.log.debug("No database, waiting for dbschema.json...")
self.site.needFile("dbschema.json", priority=1)
self.has_db = self.isFile("dbschema.json") # Recheck if dbschema exits
if self.has_db: self.openDb()
return self.db
# Rebuild sql cache
def rebuildDb(self, delete_db=True):
self.event_db_busy = gevent.event.AsyncResult()
schema = self.loadJson("dbschema.json")
db_path = self.getPath(schema["db_file"])
if os.path.isfile(db_path) and delete_db:
if self.db: self.db.close() # Close db if open
self.log.info("Deleting %s" % db_path)
try:
os.unlink(db_path)
except Exception, err:
self.log.error("Delete error: %s" % err)
self.openDb(check=False)
self.log.info("Creating tables...")
self.db.checkTables()
self.log.info("Importing data...")
cur = self.db.getCursor()
cur.execute("BEGIN")
cur.logging = False
found = 0
s = time.time()
for content_inner_path, content in self.site.content_manager.contents.items():
content_path = self.getPath(content_inner_path)
if os.path.isfile(content_path): # Missing content.json file
if self.db.loadJson(content_path, cur=cur): found += 1
else:
self.log.error("[MISSING] %s" % content_inner_path)
for file_relative_path in content["files"].keys():
if not file_relative_path.endswith(".json"): continue # We only interesed in json files
file_inner_path = self.site.content_manager.toDir(content_inner_path)+file_relative_path # Relative to content.json
file_inner_path = file_inner_path.strip("/") # Strip leading /
file_path = self.getPath(file_inner_path)
if os.path.isfile(file_path):
if self.db.loadJson(file_path, cur=cur): found += 1
else:
self.log.error("[MISSING] %s" % file_inner_path)
cur.execute("END")
self.log.info("Imported %s data file in %ss" % (found, time.time()-s))
self.event_db_busy.set(True) # Event done, notify waiters
self.event_db_busy = None # Clear event
# Execute sql query or rebuild on dberror
def query(self, query, params=None):
if self.event_db_busy: # Db not ready for queries
self.log.debug("Wating for db...")
self.event_db_busy.get() # Wait for event
try:
res = self.getDb().execute(query, params)
except sqlite3.DatabaseError, err:
if err.__class__.__name__ == "DatabaseError":
self.log.error("Database error: %s, query: %s, try to rebuilding it..." % (err, query))
self.rebuildDb()
res = self.db.cur.execute(query, params)
else:
raise err
return res
# Open file object
def open(self, inner_path, mode="rb"):
return open(self.getPath(inner_path), mode)
# Open file object
def read(self, inner_path, mode="r"):
return open(self.getPath(inner_path), mode).read()
# Write content to file
def write(self, inner_path, content):
file_path = self.getPath(inner_path)
# Create dir if not exits
file_dir = os.path.dirname(file_path)
if not os.path.isdir(file_dir):
os.makedirs(file_dir)
# Write file
if hasattr(content, 'read'): # File-like object
with open(file_path, "wb") as file:
shutil.copyfileobj(content, file) # Write buff to disk
else: # Simple string
with open(file_path, "wb") as file:
file.write(content)
del content
self.onUpdated(inner_path)
# Site content updated
def onUpdated(self, inner_path):
file_path = self.getPath(inner_path)
# Update Sql cache
if inner_path == "dbschema.json":
self.has_db = self.isFile("dbschema.json")
self.getDb().checkTables() # Check if any if table schema changed
elif inner_path.endswith(".json") and self.has_db: # Load json file to db
self.log.debug("Loading json file to db: %s" % inner_path)
try:
self.getDb().loadJson(file_path)
except Exception, err:
self.log.error("Json %s load error: %s" % (inner_path, Debug.formatException(err)))
self.closeDb()
# Load and parse json file
def loadJson(self, inner_path):
with self.open(inner_path) as file:
return json.load(file)
# Write formatted json file
def writeJson(self, inner_path, data):
content = json.dumps(data, indent=2, sort_keys=True)
# Make it a little more compact by removing unnecessary white space
def compact_list(match):
return "[ "+match.group(1).strip()+" ]"
def compact_dict(match):
return "{ "+match.group(1).strip()+" }"
content = re.sub("\[([^,\{\[]{10,100}?)\]", compact_list, content, flags=re.DOTALL)
content = re.sub("\{([^,\[\{]{10,100}?)\}", compact_dict, content, flags=re.DOTALL)
# Write to disk
self.write(inner_path, content)
# Get file size
def getSize(self, inner_path):
return os.path.getsize(self.getPath(inner_path))
# File exits
def isFile(self, inner_path):
return os.path.isfile(self.getPath(inner_path))
# Dir exits
def isDir(self, inner_path):
return os.path.isdir(self.getPath(inner_path))
# Security check and return path of site's file
def getPath(self, inner_path):
inner_path = inner_path.replace("\\", "/") # Windows separator fix
inner_path = re.sub("^%s/" % re.escape(self.directory), "", inner_path) # Remove site directory if begins with it
file_path = self.directory+"/"+inner_path
allowed_dir = os.path.abspath(self.directory) # Only files within this directory allowed
if ".." in file_path or not os.path.dirname(os.path.abspath(file_path)).startswith(allowed_dir):
raise Exception("File not allowed: %s" % file_path)
return file_path
# Verify all files sha512sum using content.json
def verifyFiles(self, quick_check=False): # Fast = using file size
bad_files = []
if not self.site.content_manager.contents.get("content.json"): # No content.json, download it first
self.site.needFile("content.json", update=True) # Force update to fix corrupt file
self.site.content_manager.loadContent() # Reload content.json
for content_inner_path, content in self.site.content_manager.contents.items():
if not os.path.isfile(self.getPath(content_inner_path)): # Missing content.json file
self.log.debug("[MISSING] %s" % content_inner_path)
bad_files.append(content_inner_path)
for file_relative_path in content["files"].keys():
file_inner_path = self.site.content_manager.toDir(content_inner_path)+file_relative_path # Relative to content.json
file_inner_path = file_inner_path.strip("/") # Strip leading /
file_path = self.getPath(file_inner_path)
if not os.path.isfile(file_path):
self.log.debug("[MISSING] %s" % file_inner_path)
bad_files.append(file_inner_path)
continue
if quick_check:
ok = os.path.getsize(file_path) == content["files"][file_relative_path]["size"]
else:
ok = self.site.content_manager.verifyFile(file_inner_path, open(file_path, "rb"))
if not ok:
self.log.debug("[CHANGED] %s" % file_inner_path)
bad_files.append(file_inner_path)
self.log.debug("%s verified: %s files, quick_check: %s, bad files: %s" % (content_inner_path, len(content["files"]), quick_check, bad_files))
return bad_files
# Check and try to fix site files integrity
def checkFiles(self, quick_check=True):
s = time.time()
bad_files = self.verifyFiles(quick_check)
if bad_files:
for bad_file in bad_files:
self.site.bad_files[bad_file] = self.site.bad_files.get("bad_file", 0)+1
self.log.debug("Checked files in %.2fs... Quick:%s" % (time.time()-s, quick_check))
# Delete site's all file
def deleteFiles(self):
if self.has_db:
self.log.debug("Deleting db file...")
self.closeDb()
try:
schema = self.loadJson("dbschema.json")
db_path = self.getPath(schema["db_file"])
if os.path.isfile(db_path): os.unlink(db_path)
except Exception, err:
self.log.error("Db file delete error: %s" % err)
self.log.debug("Deleting files from content.json...")
files = [] # Get filenames
for content_inner_path, content in self.site.content_manager.contents.items():
files.append(content_inner_path)
for file_relative_path in content["files"].keys():
file_inner_path = self.site.content_manager.toDir(content_inner_path)+file_relative_path # Relative to content.json
files.append(file_inner_path)
for inner_path in files:
path = self.getPath(inner_path)
if os.path.isfile(path): os.unlink(path)
self.log.debug("Deleting empty dirs...")
for root, dirs, files in os.walk(self.directory, topdown=False):
for dir in dirs:
path = os.path.join(root,dir)
if os.path.isdir(path) and os.listdir(path) == []:
os.removedirs(path)
self.log.debug("Removing %s" % path)
if os.path.isdir(self.directory) and os.listdir(self.directory) == []: os.removedirs(self.directory) # Remove sites directory if empty
if os.path.isdir(self.directory):
self.log.debug("Some unknown file remained in site data dir: %s..." % self.directory)
return False # Some files not deleted
else:
self.log.debug("Site data directory deleted: %s..." % self.directory)
return True # All clean