2015-03-19 21:19:14 +01:00
|
|
|
import os, re, shutil, json, time, sqlite3
|
|
|
|
import gevent.event
|
|
|
|
from Db import Db
|
|
|
|
|
|
|
|
class SiteStorage:
|
|
|
|
def __init__(self, site, allow_create=True):
|
|
|
|
self.site = site
|
|
|
|
self.directory = "data/%s" % self.site.address # Site data diretory
|
|
|
|
self.log = site.log
|
|
|
|
self.db = None # Db class
|
|
|
|
self.db_checked = False # Checked db tables since startup
|
|
|
|
self.event_db_busy = None # Gevent AsyncResult if db is working on rebuild
|
|
|
|
self.has_db = self.isFile("dbschema.json") # The site has schema
|
|
|
|
|
|
|
|
if not os.path.isdir(self.directory):
|
|
|
|
if allow_create:
|
|
|
|
os.mkdir(self.directory) # Create directory if not found
|
|
|
|
else:
|
|
|
|
raise Exception("Directory not exists: %s" % self.directory)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Load db from dbschema.json
|
|
|
|
def openDb(self, check=True):
|
|
|
|
schema = self.loadJson("dbschema.json")
|
|
|
|
db_path = self.getPath(schema["db_file"])
|
|
|
|
if check:
|
|
|
|
if not os.path.isfile(db_path) or os.path.getsize(db_path) == 0: # Not exits or null
|
|
|
|
self.rebuildDb()
|
|
|
|
self.db = Db(schema, db_path)
|
|
|
|
if check and not self.db_checked:
|
|
|
|
changed_tables = self.db.checkTables()
|
|
|
|
if changed_tables: self.rebuildDb(delete_db=False) # Todo only update the changed table datas
|
|
|
|
|
|
|
|
|
|
|
|
def closeDb(self):
|
|
|
|
if self.db: self.db.close()
|
rev125, Class statistics, OpenSSL disabled on OSX by default because of possible segfault, --disable_openssl command line parameter, Save memory on Connection, Peer and FileRequest objects using slots, Dont store modification time from the far future, Able to query modified files from peer, Allow reannounce in 30secs, Use with command in SiteStorage, Always create dir before write file, PeerCmd shell command to query specific command from peer
2015-04-29 23:12:45 +02:00
|
|
|
self.db = None
|
2015-03-19 21:19:14 +01:00
|
|
|
|
|
|
|
|
|
|
|
# Return db class
|
|
|
|
def getDb(self):
|
|
|
|
if not self.db and self.has_db:
|
|
|
|
self.openDb()
|
|
|
|
return self.db
|
|
|
|
|
|
|
|
|
|
|
|
# Rebuild sql cache
|
|
|
|
def rebuildDb(self, delete_db=True):
|
|
|
|
self.event_db_busy = gevent.event.AsyncResult()
|
|
|
|
schema = self.loadJson("dbschema.json")
|
|
|
|
db_path = self.getPath(schema["db_file"])
|
|
|
|
if os.path.isfile(db_path) and delete_db:
|
|
|
|
if self.db: self.db.close() # Close db if open
|
|
|
|
self.log.info("Deleting %s" % db_path)
|
|
|
|
try:
|
|
|
|
os.unlink(db_path)
|
|
|
|
except Exception, err:
|
|
|
|
self.log.error("Delete error: %s" % err)
|
|
|
|
self.openDb(check=False)
|
|
|
|
self.log.info("Creating tables...")
|
|
|
|
self.db.checkTables()
|
|
|
|
self.log.info("Importing data...")
|
|
|
|
cur = self.db.getCursor()
|
|
|
|
cur.execute("BEGIN")
|
|
|
|
cur.logging = False
|
|
|
|
found = 0
|
|
|
|
s = time.time()
|
|
|
|
for content_inner_path, content in self.site.content_manager.contents.items():
|
|
|
|
content_path = self.getPath(content_inner_path)
|
|
|
|
if os.path.isfile(content_path): # Missing content.json file
|
|
|
|
if self.db.loadJson(content_path, cur=cur): found += 1
|
|
|
|
else:
|
|
|
|
self.log.error("[MISSING] %s" % content_inner_path)
|
|
|
|
for file_relative_path in content["files"].keys():
|
|
|
|
if not file_relative_path.endswith(".json"): continue # We only interesed in json files
|
|
|
|
file_inner_path = self.site.content_manager.toDir(content_inner_path)+file_relative_path # Relative to content.json
|
|
|
|
file_inner_path = file_inner_path.strip("/") # Strip leading /
|
|
|
|
file_path = self.getPath(file_inner_path)
|
|
|
|
if os.path.isfile(file_path):
|
|
|
|
if self.db.loadJson(file_path, cur=cur): found += 1
|
|
|
|
else:
|
|
|
|
self.log.error("[MISSING] %s" % file_inner_path)
|
|
|
|
cur.execute("END")
|
|
|
|
self.log.info("Imported %s data file in %ss" % (found, time.time()-s))
|
|
|
|
self.event_db_busy.set(True) # Event done, notify waiters
|
|
|
|
self.event_db_busy = None # Clear event
|
|
|
|
|
|
|
|
|
|
|
|
# Execute sql query or rebuild on dberror
|
|
|
|
def query(self, query, params=None):
|
|
|
|
if self.event_db_busy: # Db not ready for queries
|
|
|
|
self.log.debug("Wating for db...")
|
|
|
|
self.event_db_busy.get() # Wait for event
|
|
|
|
try:
|
|
|
|
res = self.getDb().execute(query, params)
|
|
|
|
except sqlite3.DatabaseError, err:
|
|
|
|
if err.__class__.__name__ == "DatabaseError":
|
|
|
|
self.log.error("Database error: %s, query: %s, try to rebuilding it..." % (err, query))
|
|
|
|
self.rebuildDb()
|
|
|
|
res = self.db.cur.execute(query, params)
|
|
|
|
else:
|
|
|
|
raise err
|
|
|
|
return res
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Open file object
|
|
|
|
def open(self, inner_path, mode="rb"):
|
|
|
|
return open(self.getPath(inner_path), mode)
|
|
|
|
|
|
|
|
|
|
|
|
# Open file object
|
|
|
|
def read(self, inner_path, mode="r"):
|
|
|
|
return open(self.getPath(inner_path), mode).read()
|
|
|
|
|
|
|
|
|
|
|
|
# Write content to file
|
|
|
|
def write(self, inner_path, content):
|
|
|
|
file_path = self.getPath(inner_path)
|
rev125, Class statistics, OpenSSL disabled on OSX by default because of possible segfault, --disable_openssl command line parameter, Save memory on Connection, Peer and FileRequest objects using slots, Dont store modification time from the far future, Able to query modified files from peer, Allow reannounce in 30secs, Use with command in SiteStorage, Always create dir before write file, PeerCmd shell command to query specific command from peer
2015-04-29 23:12:45 +02:00
|
|
|
# Create dir if not exits
|
|
|
|
file_dir = os.path.dirname(file_path)
|
|
|
|
if not os.path.isdir(file_dir):
|
|
|
|
os.makedirs(file_dir)
|
2015-03-19 21:19:14 +01:00
|
|
|
# Write file
|
|
|
|
if hasattr(content, 'read'): # File-like object
|
rev125, Class statistics, OpenSSL disabled on OSX by default because of possible segfault, --disable_openssl command line parameter, Save memory on Connection, Peer and FileRequest objects using slots, Dont store modification time from the far future, Able to query modified files from peer, Allow reannounce in 30secs, Use with command in SiteStorage, Always create dir before write file, PeerCmd shell command to query specific command from peer
2015-04-29 23:12:45 +02:00
|
|
|
with open(file_path, "wb") as file:
|
|
|
|
shutil.copyfileobj(content, file) # Write buff to disk
|
2015-03-19 21:19:14 +01:00
|
|
|
else: # Simple string
|
rev125, Class statistics, OpenSSL disabled on OSX by default because of possible segfault, --disable_openssl command line parameter, Save memory on Connection, Peer and FileRequest objects using slots, Dont store modification time from the far future, Able to query modified files from peer, Allow reannounce in 30secs, Use with command in SiteStorage, Always create dir before write file, PeerCmd shell command to query specific command from peer
2015-04-29 23:12:45 +02:00
|
|
|
with open(file_path, "wb") as file:
|
|
|
|
file.write(content)
|
2015-03-19 21:19:14 +01:00
|
|
|
del content
|
2015-04-08 01:57:55 +02:00
|
|
|
self.onUpdated(inner_path)
|
2015-03-19 21:19:14 +01:00
|
|
|
|
2015-04-08 01:57:55 +02:00
|
|
|
|
|
|
|
# Site content updated
|
|
|
|
def onUpdated(self, inner_path):
|
|
|
|
file_path = self.getPath(inner_path)
|
2015-03-19 21:19:14 +01:00
|
|
|
# Update Sql cache
|
|
|
|
if inner_path == "dbschema.json":
|
|
|
|
self.has_db = self.isFile("dbschema.json")
|
|
|
|
self.getDb().checkTables() # Check if any if table schema changed
|
|
|
|
elif inner_path != "content.json" and inner_path.endswith(".json") and self.has_db: # Load json file to db
|
|
|
|
self.log.debug("Loading json file to db: %s" % inner_path)
|
|
|
|
self.getDb().loadJson(file_path)
|
|
|
|
|
|
|
|
|
2015-04-08 01:57:55 +02:00
|
|
|
|
2015-03-19 21:19:14 +01:00
|
|
|
# Load and parse json file
|
|
|
|
def loadJson(self, inner_path):
|
rev125, Class statistics, OpenSSL disabled on OSX by default because of possible segfault, --disable_openssl command line parameter, Save memory on Connection, Peer and FileRequest objects using slots, Dont store modification time from the far future, Able to query modified files from peer, Allow reannounce in 30secs, Use with command in SiteStorage, Always create dir before write file, PeerCmd shell command to query specific command from peer
2015-04-29 23:12:45 +02:00
|
|
|
with self.open(inner_path) as file:
|
|
|
|
return json.load(file)
|
2015-03-19 21:19:14 +01:00
|
|
|
|
|
|
|
|
|
|
|
# Get file size
|
|
|
|
def getSize(self, inner_path):
|
|
|
|
return os.path.getsize(self.getPath(inner_path))
|
|
|
|
|
|
|
|
|
|
|
|
# File exits
|
|
|
|
def isFile(self, inner_path):
|
|
|
|
return os.path.isfile(self.getPath(inner_path))
|
|
|
|
|
|
|
|
|
|
|
|
# Dir exits
|
|
|
|
def isDir(self, inner_path):
|
|
|
|
return os.path.isdir(self.getPath(inner_path))
|
|
|
|
|
|
|
|
|
rev125, Class statistics, OpenSSL disabled on OSX by default because of possible segfault, --disable_openssl command line parameter, Save memory on Connection, Peer and FileRequest objects using slots, Dont store modification time from the far future, Able to query modified files from peer, Allow reannounce in 30secs, Use with command in SiteStorage, Always create dir before write file, PeerCmd shell command to query specific command from peer
2015-04-29 23:12:45 +02:00
|
|
|
# Security check and return path of site's file
|
2015-03-19 21:19:14 +01:00
|
|
|
def getPath(self, inner_path):
|
|
|
|
inner_path = inner_path.replace("\\", "/") # Windows separator fix
|
|
|
|
inner_path = re.sub("^%s/" % re.escape(self.directory), "", inner_path) # Remove site directory if begins with it
|
|
|
|
file_path = self.directory+"/"+inner_path
|
|
|
|
allowed_dir = os.path.abspath(self.directory) # Only files within this directory allowed
|
|
|
|
if ".." in file_path or not os.path.dirname(os.path.abspath(file_path)).startswith(allowed_dir):
|
|
|
|
raise Exception("File not allowed: %s" % file_path)
|
|
|
|
return file_path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Verify all files sha512sum using content.json
|
|
|
|
def verifyFiles(self, quick_check=False): # Fast = using file size
|
|
|
|
bad_files = []
|
|
|
|
if not self.site.content_manager.contents.get("content.json"): # No content.json, download it first
|
|
|
|
self.site.needFile("content.json", update=True) # Force update to fix corrupt file
|
|
|
|
self.site.content_manager.loadContent() # Reload content.json
|
|
|
|
for content_inner_path, content in self.site.content_manager.contents.items():
|
|
|
|
if not os.path.isfile(self.getPath(content_inner_path)): # Missing content.json file
|
2015-04-17 00:34:08 +02:00
|
|
|
self.log.debug("[MISSING] %s" % content_inner_path)
|
2015-03-19 21:19:14 +01:00
|
|
|
bad_files.append(content_inner_path)
|
|
|
|
for file_relative_path in content["files"].keys():
|
|
|
|
file_inner_path = self.site.content_manager.toDir(content_inner_path)+file_relative_path # Relative to content.json
|
|
|
|
file_inner_path = file_inner_path.strip("/") # Strip leading /
|
|
|
|
file_path = self.getPath(file_inner_path)
|
|
|
|
if not os.path.isfile(file_path):
|
2015-04-17 00:34:08 +02:00
|
|
|
self.log.debug("[MISSING] %s" % file_inner_path)
|
2015-03-19 21:19:14 +01:00
|
|
|
bad_files.append(file_inner_path)
|
|
|
|
continue
|
|
|
|
|
|
|
|
if quick_check:
|
|
|
|
ok = os.path.getsize(file_path) == content["files"][file_relative_path]["size"]
|
|
|
|
else:
|
|
|
|
ok = self.site.content_manager.verifyFile(file_inner_path, open(file_path, "rb"))
|
|
|
|
|
|
|
|
if not ok:
|
2015-04-17 00:34:08 +02:00
|
|
|
self.log.debug("[CHANGED] %s" % file_inner_path)
|
2015-03-19 21:19:14 +01:00
|
|
|
bad_files.append(file_inner_path)
|
|
|
|
self.log.debug("%s verified: %s files, quick_check: %s, bad files: %s" % (content_inner_path, len(content["files"]), quick_check, bad_files))
|
|
|
|
|
|
|
|
return bad_files
|
|
|
|
|
|
|
|
|
|
|
|
# Check and try to fix site files integrity
|
|
|
|
def checkFiles(self, quick_check=True):
|
2015-04-17 00:34:08 +02:00
|
|
|
s = time.time()
|
2015-03-19 21:19:14 +01:00
|
|
|
bad_files = self.verifyFiles(quick_check)
|
|
|
|
if bad_files:
|
|
|
|
for bad_file in bad_files:
|
|
|
|
self.site.bad_files[bad_file] = self.site.bad_files.get("bad_file", 0)+1
|
2015-04-17 00:34:08 +02:00
|
|
|
self.log.debug("Checked files in %.2fs... Quick:%s" % (time.time()-s, quick_check))
|
2015-03-19 21:19:14 +01:00
|
|
|
|
|
|
|
|
|
|
|
# Delete site's all file
|
|
|
|
def deleteFiles(self):
|
|
|
|
if self.has_db:
|
|
|
|
self.log.debug("Deleting db file...")
|
|
|
|
self.closeDb()
|
|
|
|
try:
|
|
|
|
schema = self.loadJson("dbschema.json")
|
|
|
|
db_path = self.getPath(schema["db_file"])
|
|
|
|
if os.path.isfile(db_path): os.unlink(db_path)
|
|
|
|
except Exception, err:
|
|
|
|
self.log.error("Db file delete error: %s" % err)
|
|
|
|
|
|
|
|
|
|
|
|
self.log.debug("Deleting files from content.json...")
|
|
|
|
files = [] # Get filenames
|
|
|
|
for content_inner_path, content in self.site.content_manager.contents.items():
|
|
|
|
files.append(content_inner_path)
|
|
|
|
for file_relative_path in content["files"].keys():
|
|
|
|
file_inner_path = self.site.content_manager.toDir(content_inner_path)+file_relative_path # Relative to content.json
|
|
|
|
files.append(file_inner_path)
|
|
|
|
|
|
|
|
for inner_path in files:
|
|
|
|
path = self.getPath(inner_path)
|
|
|
|
if os.path.isfile(path): os.unlink(path)
|
|
|
|
|
|
|
|
self.log.debug("Deleting empty dirs...")
|
|
|
|
for root, dirs, files in os.walk(self.directory, topdown=False):
|
|
|
|
for dir in dirs:
|
|
|
|
path = os.path.join(root,dir)
|
|
|
|
if os.path.isdir(path) and os.listdir(path) == []:
|
|
|
|
os.removedirs(path)
|
|
|
|
self.log.debug("Removing %s" % path)
|
|
|
|
if os.path.isdir(self.directory) and os.listdir(self.directory) == []: os.removedirs(self.directory) # Remove sites directory if empty
|
|
|
|
|
|
|
|
if os.path.isdir(self.directory):
|
|
|
|
self.log.debug("Some unknown file remained in site data dir: %s..." % self.directory)
|
|
|
|
return False # Some files not deleted
|
|
|
|
else:
|
|
|
|
self.log.debug("Site data directory deleted: %s..." % self.directory)
|
|
|
|
return True # All clean
|