ZeroNet/src/Site/SiteStorage.py

313 lines
13 KiB
Python

import os
import re
import shutil
import json
import time
import sys
import sqlite3
import gevent.event
from Db import Db
from Debug import Debug
from Config import config
class SiteStorage:
def __init__(self, site, allow_create=True):
self.site = site
self.directory = "%s/%s" % (config.data_dir, self.site.address) # Site data diretory
self.allowed_dir = os.path.abspath(self.directory) # Only serve/modify file within this dir
self.log = site.log
self.db = None # Db class
self.db_checked = False # Checked db tables since startup
self.event_db_busy = None # Gevent AsyncResult if db is working on rebuild
self.has_db = self.isFile("dbschema.json") # The site has schema
if not os.path.isdir(self.directory):
if allow_create:
os.mkdir(self.directory) # Create directory if not found
else:
raise Exception("Directory not exists: %s" % self.directory)
# Load db from dbschema.json
def openDb(self, check=True):
schema = self.loadJson("dbschema.json")
db_path = self.getPath(schema["db_file"])
if check:
if not os.path.isfile(db_path) or os.path.getsize(db_path) == 0: # Not exist or null
self.rebuildDb()
if not self.db:
self.db = Db(schema, db_path)
if check and not self.db_checked:
changed_tables = self.db.checkTables()
if changed_tables:
self.rebuildDb(delete_db=False) # Todo only update the changed table datas
def closeDb(self):
if self.db:
self.db.close()
self.event_db_busy = None
self.db = None
# Return db class
def getDb(self):
if not self.db:
self.log.debug("No database, waiting for dbschema.json...")
self.site.needFile("dbschema.json", priority=1)
self.has_db = self.isFile("dbschema.json") # Recheck if dbschema exist
if self.has_db:
self.openDb()
return self.db
# Rebuild sql cache
def rebuildDb(self, delete_db=True):
self.has_db = self.isFile("dbschema.json")
if not self.has_db:
return False
self.event_db_busy = gevent.event.AsyncResult()
schema = self.loadJson("dbschema.json")
db_path = self.getPath(schema["db_file"])
if os.path.isfile(db_path) and delete_db:
if self.db:
self.db.close() # Close db if open
self.log.info("Deleting %s" % db_path)
try:
os.unlink(db_path)
except Exception, err:
self.log.error("Delete error: %s" % err)
self.openDb(check=False)
self.log.info("Creating tables...")
self.db.checkTables()
self.log.info("Importing data...")
cur = self.db.getCursor()
cur.execute("BEGIN")
cur.logging = False
found = 0
s = time.time()
for content_inner_path, content in self.site.content_manager.contents.items():
content_path = self.getPath(content_inner_path)
if os.path.isfile(content_path): # Missing content.json file
if self.db.loadJson(content_path, cur=cur):
found += 1
else:
self.log.error("[MISSING] %s" % content_inner_path)
for file_relative_path in content["files"].keys():
if not file_relative_path.endswith(".json"):
continue # We only interesed in json files
content_inner_path_dir = self.site.content_manager.toDir(content_inner_path) # Content.json dir relative to site
file_inner_path = content_inner_path_dir + file_relative_path # File Relative to site dir
file_inner_path = file_inner_path.strip("/") # Strip leading /
file_path = self.getPath(file_inner_path)
if os.path.isfile(file_path):
if self.db.loadJson(file_path, cur=cur):
found += 1
else:
self.log.error("[MISSING] %s" % file_inner_path)
cur.execute("END")
self.log.info("Imported %s data file in %ss" % (found, time.time() - s))
self.event_db_busy.set(True) # Event done, notify waiters
self.event_db_busy = None # Clear event
# Execute sql query or rebuild on dberror
def query(self, query, params=None):
if self.event_db_busy: # Db not ready for queries
self.log.debug("Wating for db...")
self.event_db_busy.get() # Wait for event
try:
res = self.getDb().execute(query, params)
except sqlite3.DatabaseError, err:
if err.__class__.__name__ == "DatabaseError":
self.log.error("Database error: %s, query: %s, try to rebuilding it..." % (err, query))
self.rebuildDb()
res = self.db.cur.execute(query, params)
else:
raise err
return res
# Open file object
def open(self, inner_path, mode="rb"):
return open(self.getPath(inner_path), mode)
# Open file object
def read(self, inner_path, mode="r"):
return open(self.getPath(inner_path), mode).read()
# Write content to file
def write(self, inner_path, content):
file_path = self.getPath(inner_path)
# Create dir if not exist
file_dir = os.path.dirname(file_path)
if not os.path.isdir(file_dir):
os.makedirs(file_dir)
# Write file
if hasattr(content, 'read'): # File-like object
with open(file_path, "wb") as file:
shutil.copyfileobj(content, file) # Write buff to disk
else: # Simple string
with open(file_path, "wb") as file:
file.write(content)
del content
self.onUpdated(inner_path)
# Remove file from filesystem
def delete(self, inner_path):
file_path = self.getPath(inner_path)
os.unlink(file_path)
# Site content updated
def onUpdated(self, inner_path):
file_path = self.getPath(inner_path)
# Update Sql cache
if inner_path == "dbschema.json":
self.has_db = self.isFile("dbschema.json")
self.getDb().checkTables() # Check if any if table schema changed
elif inner_path.endswith(".json") and self.has_db: # Load json file to db
self.log.debug("Loading json file to db: %s" % inner_path)
try:
self.getDb().loadJson(file_path)
except Exception, err:
self.log.error("Json %s load error: %s" % (inner_path, Debug.formatException(err)))
self.closeDb()
# Load and parse json file
def loadJson(self, inner_path):
with self.open(inner_path) as file:
return json.load(file)
# Write formatted json file
def writeJson(self, inner_path, data):
content = json.dumps(data, indent=1, sort_keys=True)
# Make it a little more compact by removing unnecessary white space
def compact_list(match):
return "[ " + match.group(1).strip() + " ]"
def compact_dict(match):
return "{ " + match.group(1).strip() + " }"
content = re.sub("\[([^,\{\[]{10,100}?)\]", compact_list, content, flags=re.DOTALL)
content = re.sub("\{([^,\[\{]{10,100}?)\}", compact_dict, content, flags=re.DOTALL)
# Write to disk
self.write(inner_path, content)
# Get file size
def getSize(self, inner_path):
path = self.getPath(inner_path)
if os.path.isfile(path):
return os.path.getsize(path)
else:
return 0
# File exist
def isFile(self, inner_path):
return os.path.isfile(self.getPath(inner_path))
# Dir exist
def isDir(self, inner_path):
return os.path.isdir(self.getPath(inner_path))
# Security check and return path of site's file
def getPath(self, inner_path):
inner_path = inner_path.replace("\\", "/") # Windows separator fix
inner_path = re.sub("^%s/" % re.escape(self.directory), "", inner_path) # Remove site directory if begins with it
file_path = u"%s/%s" % (self.directory, inner_path)
file_abspath = os.path.dirname(os.path.abspath(file_path))
if ".." in file_path or not file_abspath.startswith(self.allowed_dir):
raise Exception(u"File not allowed: %s" % file_path)
return file_path
# Get site dir relative path
def getInnerPath(self, path):
inner_path = re.sub("^%s/" % re.escape(self.directory), "", path)
return inner_path
# Verify all files sha512sum using content.json
def verifyFiles(self, quick_check=False): # Fast = using file size
bad_files = []
if not self.site.content_manager.contents.get("content.json"): # No content.json, download it first
self.site.needFile("content.json", update=True) # Force update to fix corrupt file
self.site.content_manager.loadContent() # Reload content.json
for content_inner_path, content in self.site.content_manager.contents.items():
if not os.path.isfile(self.getPath(content_inner_path)): # Missing content.json file
self.log.debug("[MISSING] %s" % content_inner_path)
bad_files.append(content_inner_path)
for file_relative_path in content["files"].keys():
file_inner_path = self.site.content_manager.toDir(content_inner_path) + file_relative_path # Relative to site dir
file_inner_path = file_inner_path.strip("/") # Strip leading /
file_path = self.getPath(file_inner_path)
if not os.path.isfile(file_path):
self.log.debug("[MISSING] %s" % file_inner_path)
bad_files.append(file_inner_path)
continue
if quick_check:
ok = os.path.getsize(file_path) == content["files"][file_relative_path]["size"]
else:
ok = self.site.content_manager.verifyFile(file_inner_path, open(file_path, "rb"))
if not ok:
self.log.debug("[CHANGED] %s" % file_inner_path)
bad_files.append(file_inner_path)
self.log.debug(
"%s verified: %s files, quick_check: %s, bad files: %s" %
(content_inner_path, len(content["files"]), quick_check, bad_files)
)
return bad_files
# Check and try to fix site files integrity
def checkFiles(self, quick_check=True):
s = time.time()
bad_files = self.verifyFiles(quick_check)
if bad_files:
for bad_file in bad_files:
self.site.bad_files[bad_file] = self.site.bad_files.get("bad_file", 0) + 1
self.log.debug("Checked files in %.2fs... Quick:%s" % (time.time() - s, quick_check))
# Delete site's all file
def deleteFiles(self):
if self.has_db:
self.log.debug("Deleting db file...")
self.closeDb()
try:
schema = self.loadJson("dbschema.json")
db_path = self.getPath(schema["db_file"])
if os.path.isfile(db_path):
os.unlink(db_path)
except Exception, err:
self.log.error("Db file delete error: %s" % err)
self.log.debug("Deleting files from content.json...")
files = [] # Get filenames
for content_inner_path, content in self.site.content_manager.contents.items():
files.append(content_inner_path)
for file_relative_path in content["files"].keys():
file_inner_path = self.site.content_manager.toDir(content_inner_path) + file_relative_path # Relative to site dir
files.append(file_inner_path)
for inner_path in files:
path = self.getPath(inner_path)
if os.path.isfile(path):
os.unlink(path)
self.log.debug("Deleting empty dirs...")
for root, dirs, files in os.walk(self.directory, topdown=False):
for dir in dirs:
path = os.path.join(root, dir)
if os.path.isdir(path) and os.listdir(path) == []:
os.removedirs(path)
self.log.debug("Removing %s" % path)
if os.path.isdir(self.directory) and os.listdir(self.directory) == []:
os.removedirs(self.directory) # Remove sites directory if empty
if os.path.isdir(self.directory):
self.log.debug("Some unknown file remained in site data dir: %s..." % self.directory)
return False # Some files not deleted
else:
self.log.debug("Site data directory deleted: %s..." % self.directory)
return True # All clean