ZeroNet/src/Site/SiteStorage.py

292 lines
10 KiB
Python
Raw Normal View History

import os, re, shutil, json, time, sqlite3
import gevent.event
from Db import Db
from Debug import Debug
from Config import config
class SiteStorage:
def __init__(self, site, allow_create=True):
self.site = site
self.directory = "%s/%s" % (config.data_dir, self.site.address) # Site data diretory
self.log = site.log
self.db = None # Db class
self.db_checked = False # Checked db tables since startup
self.event_db_busy = None # Gevent AsyncResult if db is working on rebuild
self.has_db = self.isFile("dbschema.json") # The site has schema
if not os.path.isdir(self.directory):
if allow_create:
os.mkdir(self.directory) # Create directory if not found
else:
raise Exception("Directory not exists: %s" % self.directory)
# Load db from dbschema.json
def openDb(self, check=True):
schema = self.loadJson("dbschema.json")
db_path = self.getPath(schema["db_file"])
if check:
if not os.path.isfile(db_path) or os.path.getsize(db_path) == 0: # Not exits or null
self.rebuildDb()
self.db = Db(schema, db_path)
if check and not self.db_checked:
changed_tables = self.db.checkTables()
if changed_tables: self.rebuildDb(delete_db=False) # Todo only update the changed table datas
def closeDb(self):
if self.db: self.db.close()
self.event_db_busy = None
self.db = None
# Return db class
def getDb(self):
if not self.db:
self.log.debug("No database, waiting for dbschema.json...")
self.site.needFile("dbschema.json", priority=1)
self.has_db = self.isFile("dbschema.json") # Recheck if dbschema exits
if self.has_db: self.openDb()
return self.db
# Rebuild sql cache
def rebuildDb(self, delete_db=True):
self.event_db_busy = gevent.event.AsyncResult()
schema = self.loadJson("dbschema.json")
db_path = self.getPath(schema["db_file"])
if os.path.isfile(db_path) and delete_db:
if self.db: self.db.close() # Close db if open
self.log.info("Deleting %s" % db_path)
try:
os.unlink(db_path)
except Exception, err:
self.log.error("Delete error: %s" % err)
self.openDb(check=False)
self.log.info("Creating tables...")
self.db.checkTables()
self.log.info("Importing data...")
cur = self.db.getCursor()
cur.execute("BEGIN")
cur.logging = False
found = 0
s = time.time()
for content_inner_path, content in self.site.content_manager.contents.items():
content_path = self.getPath(content_inner_path)
if os.path.isfile(content_path): # Missing content.json file
if self.db.loadJson(content_path, cur=cur): found += 1
else:
self.log.error("[MISSING] %s" % content_inner_path)
for file_relative_path in content["files"].keys():
if not file_relative_path.endswith(".json"): continue # We only interesed in json files
file_inner_path = self.site.content_manager.toDir(content_inner_path)+file_relative_path # Relative to content.json
file_inner_path = file_inner_path.strip("/") # Strip leading /
file_path = self.getPath(file_inner_path)
if os.path.isfile(file_path):
if self.db.loadJson(file_path, cur=cur): found += 1
else:
self.log.error("[MISSING] %s" % file_inner_path)
cur.execute("END")
self.log.info("Imported %s data file in %ss" % (found, time.time()-s))
self.event_db_busy.set(True) # Event done, notify waiters
self.event_db_busy = None # Clear event
# Execute sql query or rebuild on dberror
def query(self, query, params=None):
if self.event_db_busy: # Db not ready for queries
self.log.debug("Wating for db...")
self.event_db_busy.get() # Wait for event
try:
res = self.getDb().execute(query, params)
except sqlite3.DatabaseError, err:
if err.__class__.__name__ == "DatabaseError":
self.log.error("Database error: %s, query: %s, try to rebuilding it..." % (err, query))
self.rebuildDb()
res = self.db.cur.execute(query, params)
else:
raise err
return res
# Open file object
def open(self, inner_path, mode="rb"):
return open(self.getPath(inner_path), mode)
# Open file object
def read(self, inner_path, mode="r"):
return open(self.getPath(inner_path), mode).read()
# Write content to file
def write(self, inner_path, content):
file_path = self.getPath(inner_path)
# Create dir if not exits
file_dir = os.path.dirname(file_path)
if not os.path.isdir(file_dir):
os.makedirs(file_dir)
# Write file
if hasattr(content, 'read'): # File-like object
with open(file_path, "wb") as file:
shutil.copyfileobj(content, file) # Write buff to disk
else: # Simple string
with open(file_path, "wb") as file:
file.write(content)
del content
self.onUpdated(inner_path)
# Site content updated
def onUpdated(self, inner_path):
file_path = self.getPath(inner_path)
# Update Sql cache
if inner_path == "dbschema.json":
self.has_db = self.isFile("dbschema.json")
self.getDb().checkTables() # Check if any if table schema changed
elif inner_path.endswith(".json") and self.has_db: # Load json file to db
self.log.debug("Loading json file to db: %s" % inner_path)
try:
self.getDb().loadJson(file_path)
except Exception, err:
self.log.error("Json %s load error: %s" % (inner_path, Debug.formatException(err)))
self.closeDb()
# Load and parse json file
def loadJson(self, inner_path):
with self.open(inner_path) as file:
return json.load(file)
# Write formatted json file
def writeJson(self, inner_path, data):
content = json.dumps(data, indent=2, sort_keys=True)
# Make it a little more compact by removing unnecessary white space
def compact_list(match):
return "[ "+match.group(1).strip()+" ]"
def compact_dict(match):
return "{ "+match.group(1).strip()+" }"
content = re.sub("\[([^,\{\[]{10,100}?)\]", compact_list, content, flags=re.DOTALL)
content = re.sub("\{([^,\[\{]{10,100}?)\}", compact_dict, content, flags=re.DOTALL)
# Write to disk
self.write(inner_path, content)
# Get file size
def getSize(self, inner_path):
return os.path.getsize(self.getPath(inner_path))
# File exits
def isFile(self, inner_path):
return os.path.isfile(self.getPath(inner_path))
# Dir exits
def isDir(self, inner_path):
return os.path.isdir(self.getPath(inner_path))
# Security check and return path of site's file
def getPath(self, inner_path):
inner_path = inner_path.replace("\\", "/") # Windows separator fix
inner_path = re.sub("^%s/" % re.escape(self.directory), "", inner_path) # Remove site directory if begins with it
file_path = self.directory+"/"+inner_path
allowed_dir = os.path.abspath(self.directory) # Only files within this directory allowed
if ".." in file_path or not os.path.dirname(os.path.abspath(file_path)).startswith(allowed_dir):
raise Exception("File not allowed: %s" % file_path)
return file_path
# Verify all files sha512sum using content.json
def verifyFiles(self, quick_check=False): # Fast = using file size
bad_files = []
if not self.site.content_manager.contents.get("content.json"): # No content.json, download it first
self.site.needFile("content.json", update=True) # Force update to fix corrupt file
self.site.content_manager.loadContent() # Reload content.json
for content_inner_path, content in self.site.content_manager.contents.items():
if not os.path.isfile(self.getPath(content_inner_path)): # Missing content.json file
self.log.debug("[MISSING] %s" % content_inner_path)
bad_files.append(content_inner_path)
for file_relative_path in content["files"].keys():
file_inner_path = self.site.content_manager.toDir(content_inner_path)+file_relative_path # Relative to content.json
file_inner_path = file_inner_path.strip("/") # Strip leading /
file_path = self.getPath(file_inner_path)
if not os.path.isfile(file_path):
self.log.debug("[MISSING] %s" % file_inner_path)
bad_files.append(file_inner_path)
continue
if quick_check:
ok = os.path.getsize(file_path) == content["files"][file_relative_path]["size"]
else:
ok = self.site.content_manager.verifyFile(file_inner_path, open(file_path, "rb"))
if not ok:
self.log.debug("[CHANGED] %s" % file_inner_path)
bad_files.append(file_inner_path)
self.log.debug("%s verified: %s files, quick_check: %s, bad files: %s" % (content_inner_path, len(content["files"]), quick_check, bad_files))
return bad_files
# Check and try to fix site files integrity
def checkFiles(self, quick_check=True):
s = time.time()
bad_files = self.verifyFiles(quick_check)
if bad_files:
for bad_file in bad_files:
self.site.bad_files[bad_file] = self.site.bad_files.get("bad_file", 0)+1
self.log.debug("Checked files in %.2fs... Quick:%s" % (time.time()-s, quick_check))
# Delete site's all file
def deleteFiles(self):
if self.has_db:
self.log.debug("Deleting db file...")
self.closeDb()
try:
schema = self.loadJson("dbschema.json")
db_path = self.getPath(schema["db_file"])
if os.path.isfile(db_path): os.unlink(db_path)
except Exception, err:
self.log.error("Db file delete error: %s" % err)
self.log.debug("Deleting files from content.json...")
files = [] # Get filenames
for content_inner_path, content in self.site.content_manager.contents.items():
files.append(content_inner_path)
for file_relative_path in content["files"].keys():
file_inner_path = self.site.content_manager.toDir(content_inner_path)+file_relative_path # Relative to content.json
files.append(file_inner_path)
for inner_path in files:
path = self.getPath(inner_path)
if os.path.isfile(path): os.unlink(path)
self.log.debug("Deleting empty dirs...")
for root, dirs, files in os.walk(self.directory, topdown=False):
for dir in dirs:
path = os.path.join(root,dir)
if os.path.isdir(path) and os.listdir(path) == []:
os.removedirs(path)
self.log.debug("Removing %s" % path)
if os.path.isdir(self.directory) and os.listdir(self.directory) == []: os.removedirs(self.directory) # Remove sites directory if empty
if os.path.isdir(self.directory):
self.log.debug("Some unknown file remained in site data dir: %s..." % self.directory)
return False # Some files not deleted
else:
self.log.debug("Site data directory deleted: %s..." % self.directory)
return True # All clean