2015-07-12 20:36:46 +02:00
|
|
|
import sqlite3
|
|
|
|
import json
|
|
|
|
import time
|
|
|
|
import logging
|
|
|
|
import re
|
|
|
|
import os
|
2019-03-18 03:32:42 +01:00
|
|
|
import atexit
|
2019-12-17 15:02:04 +01:00
|
|
|
import threading
|
2019-03-20 00:49:27 +01:00
|
|
|
import sys
|
2019-12-17 15:02:04 +01:00
|
|
|
import weakref
|
|
|
|
import errno
|
2019-03-18 03:32:42 +01:00
|
|
|
|
2015-08-06 00:51:25 +02:00
|
|
|
import gevent
|
2015-07-12 20:36:46 +02:00
|
|
|
|
2018-10-20 08:58:35 +02:00
|
|
|
from Debug import Debug
|
2019-03-15 21:06:59 +01:00
|
|
|
from .DbCursor import DbCursor
|
2017-07-14 10:34:57 +02:00
|
|
|
from util import SafeRe
|
2017-08-09 14:19:39 +02:00
|
|
|
from util import helper
|
2019-11-30 02:12:33 +01:00
|
|
|
from util import ThreadPool
|
|
|
|
from Config import config
|
2015-03-19 21:19:14 +01:00
|
|
|
|
2019-11-30 02:12:33 +01:00
|
|
|
thread_pool_db = ThreadPool.ThreadPool(config.threads_db)
|
2019-11-30 02:13:39 +01:00
|
|
|
|
|
|
|
next_db_id = 0
|
2015-08-06 00:51:25 +02:00
|
|
|
opened_dbs = []
|
|
|
|
|
|
|
|
|
|
|
|
# Close idle databases to save some memory
|
2015-08-16 11:51:00 +02:00
|
|
|
def dbCleanup():
|
2015-08-06 00:51:25 +02:00
|
|
|
while 1:
|
|
|
|
time.sleep(60 * 5)
|
|
|
|
for db in opened_dbs[:]:
|
2016-05-16 22:28:30 +02:00
|
|
|
idle = time.time() - db.last_query_time
|
2018-11-08 01:20:33 +01:00
|
|
|
if idle > 60 * 5 and db.close_idle:
|
2019-12-17 14:35:49 +01:00
|
|
|
db.close("Cleanup")
|
2015-08-06 00:51:25 +02:00
|
|
|
|
2019-03-20 00:49:27 +01:00
|
|
|
|
2019-03-16 02:40:32 +01:00
|
|
|
def dbCommitCheck():
|
|
|
|
while 1:
|
|
|
|
time.sleep(5)
|
|
|
|
for db in opened_dbs[:]:
|
|
|
|
if not db.need_commit:
|
|
|
|
continue
|
|
|
|
|
2019-03-18 03:33:06 +01:00
|
|
|
success = db.commit("Interval")
|
|
|
|
if success:
|
|
|
|
db.need_commit = False
|
2019-03-16 02:40:32 +01:00
|
|
|
time.sleep(0.1)
|
|
|
|
|
2019-03-20 00:49:27 +01:00
|
|
|
|
2019-03-18 03:32:42 +01:00
|
|
|
def dbCloseAll():
|
|
|
|
for db in opened_dbs[:]:
|
2019-12-17 14:35:49 +01:00
|
|
|
db.close("Close all")
|
2019-03-18 03:32:42 +01:00
|
|
|
|
2019-11-27 03:07:08 +01:00
|
|
|
|
2015-08-16 11:51:00 +02:00
|
|
|
gevent.spawn(dbCleanup)
|
2019-03-16 02:40:32 +01:00
|
|
|
gevent.spawn(dbCommitCheck)
|
2019-03-18 03:32:42 +01:00
|
|
|
atexit.register(dbCloseAll)
|
2015-08-06 00:51:25 +02:00
|
|
|
|
2019-11-27 03:07:08 +01:00
|
|
|
|
2019-06-23 14:21:50 +02:00
|
|
|
class DbTableError(Exception):
|
|
|
|
def __init__(self, message, table):
|
|
|
|
super().__init__(message)
|
|
|
|
self.table = table
|
2015-07-12 20:36:46 +02:00
|
|
|
|
2019-11-27 03:07:08 +01:00
|
|
|
|
Version 0.3.5, Rev830, Full Tor mode support with hidden services, Onion stats in Sidebar, GeoDB download fix using Tor, Gray out disabled sites in Stats page, Tor hidden service status in stat page, Benchmark sha256, Skyts tracker out expodie in, 2 new tracker using ZeroNet protocol, Keep SSL cert option between restarts, SSL Certificate pinning support for connections, Site lock support for connections, Certificate pinned connections using implicit SSL, Flood protection whitelist support, Foreign keys support for DB layer, Not support for SQL query helper, 0 length file get bugfix, Pex onion address support, Faster port testing, Faster uPnP port opening, Need connections more often on owned sites, Delay ZeroHello startup message if port check or Tor manager not ready yet, Use lockfiles to avoid double start, Save original socket on proxy monkey patching to get ability to connect localhost directly, Handle atomic write errors, Broken gevent https workaround helper, Rsa crypt functions, Plugin to Bootstrap using ZeroNet protocol
2016-01-05 00:20:52 +01:00
|
|
|
class Db(object):
|
2015-03-19 21:19:14 +01:00
|
|
|
|
2018-11-08 01:20:33 +01:00
|
|
|
def __init__(self, schema, db_path, close_idle=False):
|
2019-11-30 02:13:39 +01:00
|
|
|
global next_db_id
|
2015-07-12 20:36:46 +02:00
|
|
|
self.db_path = db_path
|
|
|
|
self.db_dir = os.path.dirname(db_path) + "/"
|
|
|
|
self.schema = schema
|
|
|
|
self.schema["version"] = self.schema.get("version", 1)
|
|
|
|
self.conn = None
|
|
|
|
self.cur = None
|
2019-12-17 15:02:04 +01:00
|
|
|
self.cursors = weakref.WeakSet()
|
2019-11-30 02:13:39 +01:00
|
|
|
self.id = next_db_id
|
|
|
|
next_db_id += 1
|
2019-03-18 03:36:12 +01:00
|
|
|
self.progress_sleeping = False
|
2019-11-30 02:11:11 +01:00
|
|
|
self.commiting = False
|
2019-11-30 02:13:39 +01:00
|
|
|
self.log = logging.getLogger("Db#%s:%s" % (self.id, schema["db_name"]))
|
2015-07-12 20:36:46 +02:00
|
|
|
self.table_names = None
|
|
|
|
self.collect_stats = False
|
Version 0.3.5, Rev830, Full Tor mode support with hidden services, Onion stats in Sidebar, GeoDB download fix using Tor, Gray out disabled sites in Stats page, Tor hidden service status in stat page, Benchmark sha256, Skyts tracker out expodie in, 2 new tracker using ZeroNet protocol, Keep SSL cert option between restarts, SSL Certificate pinning support for connections, Site lock support for connections, Certificate pinned connections using implicit SSL, Flood protection whitelist support, Foreign keys support for DB layer, Not support for SQL query helper, 0 length file get bugfix, Pex onion address support, Faster port testing, Faster uPnP port opening, Need connections more often on owned sites, Delay ZeroHello startup message if port check or Tor manager not ready yet, Use lockfiles to avoid double start, Save original socket on proxy monkey patching to get ability to connect localhost directly, Handle atomic write errors, Broken gevent https workaround helper, Rsa crypt functions, Plugin to Bootstrap using ZeroNet protocol
2016-01-05 00:20:52 +01:00
|
|
|
self.foreign_keys = False
|
2019-03-16 02:40:32 +01:00
|
|
|
self.need_commit = False
|
2015-07-12 20:36:46 +02:00
|
|
|
self.query_stats = {}
|
|
|
|
self.db_keyvalues = {}
|
2016-11-07 23:11:14 +01:00
|
|
|
self.delayed_queue = []
|
|
|
|
self.delayed_queue_thread = None
|
2018-11-08 01:20:33 +01:00
|
|
|
self.close_idle = close_idle
|
2015-08-06 00:51:25 +02:00
|
|
|
self.last_query_time = time.time()
|
2019-03-16 02:40:32 +01:00
|
|
|
self.last_sleep_time = time.time()
|
|
|
|
self.num_execute_since_sleep = 0
|
2019-11-30 02:11:34 +01:00
|
|
|
self.lock = ThreadPool.Lock()
|
2019-11-30 02:13:17 +01:00
|
|
|
self.connect_lock = ThreadPool.Lock()
|
2015-08-06 00:51:25 +02:00
|
|
|
|
|
|
|
def __repr__(self):
|
2018-11-08 01:20:33 +01:00
|
|
|
return "<Db#%s:%s close_idle:%s>" % (id(self), self.db_path, self.close_idle)
|
2015-07-12 20:36:46 +02:00
|
|
|
|
|
|
|
def connect(self):
|
2019-11-30 02:13:17 +01:00
|
|
|
self.connect_lock.acquire(True)
|
|
|
|
try:
|
|
|
|
if self.conn:
|
|
|
|
self.log.debug("Already connected, connection ignored")
|
|
|
|
return
|
|
|
|
|
|
|
|
if self not in opened_dbs:
|
|
|
|
opened_dbs.append(self)
|
|
|
|
s = time.time()
|
2019-12-17 15:02:18 +01:00
|
|
|
try: # Directory not exist yet
|
2019-11-30 02:13:17 +01:00
|
|
|
os.makedirs(self.db_dir)
|
|
|
|
self.log.debug("Created Db path: %s" % self.db_dir)
|
2019-12-17 15:02:18 +01:00
|
|
|
except OSError as err:
|
|
|
|
if err.errno != errno.EEXIST:
|
|
|
|
raise err
|
2019-11-30 02:13:17 +01:00
|
|
|
if not os.path.isfile(self.db_path):
|
|
|
|
self.log.debug("Db file not exist yet: %s" % self.db_path)
|
|
|
|
self.conn = sqlite3.connect(self.db_path, isolation_level="DEFERRED", check_same_thread=False)
|
|
|
|
self.conn.row_factory = sqlite3.Row
|
|
|
|
self.conn.set_progress_handler(self.progress, 5000000)
|
2019-12-04 17:14:50 +01:00
|
|
|
self.conn.execute('PRAGMA journal_mode=WAL')
|
|
|
|
if self.foreign_keys:
|
|
|
|
self.conn.execute("PRAGMA foreign_keys = ON")
|
2019-11-30 02:13:17 +01:00
|
|
|
self.cur = self.getCursor()
|
2019-12-04 17:14:50 +01:00
|
|
|
|
2019-11-30 02:13:17 +01:00
|
|
|
self.log.debug(
|
|
|
|
"Connected to %s in %.3fs (opened: %s, sqlite version: %s)..." %
|
|
|
|
(self.db_path, time.time() - s, len(opened_dbs), sqlite3.version)
|
|
|
|
)
|
2019-12-17 15:02:04 +01:00
|
|
|
self.log.debug("Connect by thread: %s" % threading.current_thread().ident)
|
2019-11-30 02:13:17 +01:00
|
|
|
self.log.debug("Connect called by %s" % Debug.formatStack())
|
|
|
|
finally:
|
|
|
|
self.connect_lock.release()
|
2015-07-12 20:36:46 +02:00
|
|
|
|
2019-12-17 15:02:04 +01:00
|
|
|
def getConn(self):
|
|
|
|
if not self.conn:
|
|
|
|
self.connect()
|
|
|
|
return self.conn
|
|
|
|
|
2019-03-16 02:40:32 +01:00
|
|
|
def progress(self, *args, **kwargs):
|
2019-03-18 03:36:12 +01:00
|
|
|
self.progress_sleeping = True
|
|
|
|
time.sleep(0.001)
|
|
|
|
self.progress_sleeping = False
|
2019-03-16 02:40:32 +01:00
|
|
|
|
2015-07-12 20:36:46 +02:00
|
|
|
# Execute query using dbcursor
|
|
|
|
def execute(self, query, params=None):
|
|
|
|
if not self.conn:
|
|
|
|
self.connect()
|
|
|
|
return self.cur.execute(query, params)
|
|
|
|
|
2019-11-30 02:12:33 +01:00
|
|
|
@thread_pool_db.wrap
|
2019-03-16 02:40:32 +01:00
|
|
|
def commit(self, reason="Unknown"):
|
2019-03-18 03:36:12 +01:00
|
|
|
if self.progress_sleeping:
|
|
|
|
self.log.debug("Commit ignored: Progress sleeping")
|
|
|
|
return False
|
|
|
|
|
2019-11-19 02:08:30 +01:00
|
|
|
if not self.conn:
|
|
|
|
self.log.debug("Commit ignored: No connection")
|
|
|
|
return False
|
|
|
|
|
2020-01-28 16:58:14 +01:00
|
|
|
if self.commiting:
|
|
|
|
self.log.debug("Commit ignored: Already commiting")
|
|
|
|
return False
|
|
|
|
|
2019-03-16 02:40:32 +01:00
|
|
|
try:
|
|
|
|
s = time.time()
|
2019-11-30 02:11:11 +01:00
|
|
|
self.commiting = True
|
2019-03-16 02:40:32 +01:00
|
|
|
self.conn.commit()
|
|
|
|
self.log.debug("Commited in %.3fs (reason: %s)" % (time.time() - s, reason))
|
|
|
|
return True
|
|
|
|
except Exception as err:
|
2019-11-19 02:09:36 +01:00
|
|
|
if "SQL statements in progress" in str(err):
|
|
|
|
self.log.warning("Commit delayed: %s (reason: %s)" % (Debug.formatException(err), reason))
|
|
|
|
else:
|
|
|
|
self.log.error("Commit error: %s (reason: %s)" % (Debug.formatException(err), reason))
|
2019-03-16 02:40:32 +01:00
|
|
|
return False
|
2019-11-30 02:11:11 +01:00
|
|
|
finally:
|
|
|
|
self.commiting = False
|
2019-03-16 02:40:32 +01:00
|
|
|
|
2016-11-07 23:11:14 +01:00
|
|
|
def insertOrUpdate(self, *args, **kwargs):
|
|
|
|
if not self.conn:
|
|
|
|
self.connect()
|
|
|
|
return self.cur.insertOrUpdate(*args, **kwargs)
|
|
|
|
|
|
|
|
def executeDelayed(self, *args, **kwargs):
|
|
|
|
if not self.delayed_queue_thread:
|
2018-03-29 02:50:23 +02:00
|
|
|
self.delayed_queue_thread = gevent.spawn_later(1, self.processDelayed)
|
2016-11-07 23:11:14 +01:00
|
|
|
self.delayed_queue.append(("execute", (args, kwargs)))
|
|
|
|
|
|
|
|
def insertOrUpdateDelayed(self, *args, **kwargs):
|
|
|
|
if not self.delayed_queue:
|
|
|
|
gevent.spawn_later(1, self.processDelayed)
|
|
|
|
self.delayed_queue.append(("insertOrUpdate", (args, kwargs)))
|
|
|
|
|
|
|
|
def processDelayed(self):
|
|
|
|
if not self.delayed_queue:
|
|
|
|
self.log.debug("processDelayed aborted")
|
|
|
|
return
|
|
|
|
if not self.conn:
|
|
|
|
self.connect()
|
|
|
|
|
|
|
|
s = time.time()
|
|
|
|
cur = self.getCursor()
|
|
|
|
for command, params in self.delayed_queue:
|
|
|
|
if command == "insertOrUpdate":
|
|
|
|
cur.insertOrUpdate(*params[0], **params[1])
|
|
|
|
else:
|
|
|
|
cur.execute(*params[0], **params[1])
|
|
|
|
|
|
|
|
if len(self.delayed_queue) > 10:
|
|
|
|
self.log.debug("Processed %s delayed queue in %.3fs" % (len(self.delayed_queue), time.time() - s))
|
|
|
|
self.delayed_queue = []
|
|
|
|
self.delayed_queue_thread = None
|
|
|
|
|
2019-12-17 14:35:49 +01:00
|
|
|
def close(self, reason="Unknown"):
|
2019-11-30 02:13:58 +01:00
|
|
|
if not self.conn:
|
|
|
|
return False
|
2019-12-17 15:02:04 +01:00
|
|
|
self.connect_lock.acquire()
|
2016-05-16 22:28:59 +02:00
|
|
|
s = time.time()
|
2016-11-07 23:11:14 +01:00
|
|
|
if self.delayed_queue:
|
|
|
|
self.processDelayed()
|
2015-08-06 00:51:25 +02:00
|
|
|
if self in opened_dbs:
|
|
|
|
opened_dbs.remove(self)
|
2019-03-16 02:40:32 +01:00
|
|
|
self.need_commit = False
|
2019-12-17 14:35:49 +01:00
|
|
|
self.commit("Closing: %s" % reason)
|
2019-11-30 02:13:58 +01:00
|
|
|
self.log.debug("Close called by %s" % Debug.formatStack())
|
2020-01-22 16:36:52 +01:00
|
|
|
for i in range(5):
|
2019-12-17 15:02:04 +01:00
|
|
|
if len(self.cursors) == 0:
|
|
|
|
break
|
|
|
|
self.log.debug("Pending cursors: %s" % len(self.cursors))
|
|
|
|
time.sleep(0.1 * i)
|
|
|
|
if len(self.cursors):
|
|
|
|
self.log.debug("Killing cursors: %s" % len(self.cursors))
|
|
|
|
self.conn.interrupt()
|
|
|
|
|
2015-07-12 20:36:46 +02:00
|
|
|
if self.cur:
|
|
|
|
self.cur.close()
|
|
|
|
if self.conn:
|
2019-12-17 15:02:04 +01:00
|
|
|
ThreadPool.main_loop.call(self.conn.close)
|
2015-08-06 00:51:25 +02:00
|
|
|
self.conn = None
|
|
|
|
self.cur = None
|
2019-12-17 14:35:49 +01:00
|
|
|
self.log.debug("%s closed (reason: %s) in %.3fs, opened: %s" % (self.db_path, reason, time.time() - s, len(opened_dbs)))
|
|
|
|
self.connect_lock.release()
|
2019-11-30 02:13:58 +01:00
|
|
|
return True
|
2015-07-12 20:36:46 +02:00
|
|
|
|
|
|
|
# Gets a cursor object to database
|
|
|
|
# Return: Cursor class
|
|
|
|
def getCursor(self):
|
|
|
|
if not self.conn:
|
|
|
|
self.connect()
|
2018-10-15 13:01:04 +02:00
|
|
|
|
2019-12-17 14:28:52 +01:00
|
|
|
cur = DbCursor(self)
|
2018-10-15 13:01:04 +02:00
|
|
|
return cur
|
2015-07-12 20:36:46 +02:00
|
|
|
|
2019-03-18 03:36:44 +01:00
|
|
|
def getSharedCursor(self):
|
|
|
|
if not self.conn:
|
|
|
|
self.connect()
|
|
|
|
return self.cur
|
|
|
|
|
2015-07-12 20:36:46 +02:00
|
|
|
# Get the table version
|
|
|
|
# Return: Table version or None if not exist
|
|
|
|
def getTableVersion(self, table_name):
|
|
|
|
if not self.db_keyvalues: # Get db keyvalues
|
|
|
|
try:
|
2016-11-07 23:13:05 +01:00
|
|
|
res = self.execute("SELECT * FROM keyvalue WHERE json_id=0") # json_id = 0 is internal keyvalues
|
2019-03-15 21:06:59 +01:00
|
|
|
except sqlite3.OperationalError as err: # Table not exist
|
|
|
|
self.log.debug("Query table version error: %s" % err)
|
2015-07-12 20:36:46 +02:00
|
|
|
return False
|
|
|
|
|
|
|
|
for row in res:
|
|
|
|
self.db_keyvalues[row["key"]] = row["value"]
|
|
|
|
|
|
|
|
return self.db_keyvalues.get("table.%s.version" % table_name, 0)
|
|
|
|
|
|
|
|
# Check Db tables
|
|
|
|
# Return: <list> Changed table names
|
|
|
|
def checkTables(self):
|
|
|
|
s = time.time()
|
|
|
|
changed_tables = []
|
2019-03-18 03:36:44 +01:00
|
|
|
|
|
|
|
cur = self.getSharedCursor()
|
2015-07-12 20:36:46 +02:00
|
|
|
|
|
|
|
# Check internal tables
|
|
|
|
# Check keyvalue table
|
|
|
|
changed = cur.needTable("keyvalue", [
|
|
|
|
["keyvalue_id", "INTEGER PRIMARY KEY AUTOINCREMENT"],
|
|
|
|
["key", "TEXT"],
|
|
|
|
["value", "INTEGER"],
|
2016-11-07 23:14:09 +01:00
|
|
|
["json_id", "INTEGER"],
|
2015-07-12 20:36:46 +02:00
|
|
|
], [
|
|
|
|
"CREATE UNIQUE INDEX key_id ON keyvalue(json_id, key)"
|
|
|
|
], version=self.schema["version"])
|
|
|
|
if changed:
|
|
|
|
changed_tables.append("keyvalue")
|
|
|
|
|
2018-11-21 03:31:12 +01:00
|
|
|
# Create json table if no custom one defined
|
|
|
|
if "json" not in self.schema.get("tables", {}):
|
|
|
|
if self.schema["version"] == 1:
|
|
|
|
changed = cur.needTable("json", [
|
|
|
|
["json_id", "INTEGER PRIMARY KEY AUTOINCREMENT"],
|
|
|
|
["path", "VARCHAR(255)"]
|
|
|
|
], [
|
|
|
|
"CREATE UNIQUE INDEX path ON json(path)"
|
|
|
|
], version=self.schema["version"])
|
|
|
|
elif self.schema["version"] == 2:
|
|
|
|
changed = cur.needTable("json", [
|
|
|
|
["json_id", "INTEGER PRIMARY KEY AUTOINCREMENT"],
|
|
|
|
["directory", "VARCHAR(255)"],
|
|
|
|
["file_name", "VARCHAR(255)"]
|
|
|
|
], [
|
|
|
|
"CREATE UNIQUE INDEX path ON json(directory, file_name)"
|
|
|
|
], version=self.schema["version"])
|
|
|
|
elif self.schema["version"] == 3:
|
|
|
|
changed = cur.needTable("json", [
|
|
|
|
["json_id", "INTEGER PRIMARY KEY AUTOINCREMENT"],
|
|
|
|
["site", "VARCHAR(255)"],
|
|
|
|
["directory", "VARCHAR(255)"],
|
|
|
|
["file_name", "VARCHAR(255)"]
|
|
|
|
], [
|
|
|
|
"CREATE UNIQUE INDEX path ON json(directory, site, file_name)"
|
|
|
|
], version=self.schema["version"])
|
|
|
|
if changed:
|
|
|
|
changed_tables.append("json")
|
2015-07-12 20:36:46 +02:00
|
|
|
|
|
|
|
# Check schema tables
|
2018-01-19 02:18:40 +01:00
|
|
|
for table_name, table_settings in self.schema.get("tables", {}).items():
|
2018-10-15 13:01:17 +02:00
|
|
|
try:
|
2019-06-23 14:21:50 +02:00
|
|
|
indexes = table_settings.get("indexes", [])
|
|
|
|
version = table_settings.get("schema_changed", 0)
|
2018-10-15 13:01:17 +02:00
|
|
|
changed = cur.needTable(
|
|
|
|
table_name, table_settings["cols"],
|
2019-06-23 14:21:50 +02:00
|
|
|
indexes, version=version
|
2018-10-15 13:01:17 +02:00
|
|
|
)
|
|
|
|
if changed:
|
|
|
|
changed_tables.append(table_name)
|
|
|
|
except Exception as err:
|
|
|
|
self.log.error("Error creating table %s: %s" % (table_name, Debug.formatException(err)))
|
2019-06-23 14:21:50 +02:00
|
|
|
raise DbTableError(err, table_name)
|
2015-07-12 20:36:46 +02:00
|
|
|
|
|
|
|
self.log.debug("Db check done in %.3fs, changed tables: %s" % (time.time() - s, changed_tables))
|
2015-11-12 02:04:45 +01:00
|
|
|
if changed_tables:
|
|
|
|
self.db_keyvalues = {} # Refresh table version cache
|
2015-07-12 20:36:46 +02:00
|
|
|
|
|
|
|
return changed_tables
|
|
|
|
|
2017-02-09 01:53:31 +01:00
|
|
|
# Update json file to db
|
2015-07-12 20:36:46 +02:00
|
|
|
# Return: True if matched
|
2017-02-09 01:53:31 +01:00
|
|
|
def updateJson(self, file_path, file=None, cur=None):
|
2015-07-12 20:36:46 +02:00
|
|
|
if not file_path.startswith(self.db_dir):
|
|
|
|
return False # Not from the db dir: Skipping
|
2017-06-15 13:33:51 +02:00
|
|
|
relative_path = file_path[len(self.db_dir):] # File path realative to db file
|
|
|
|
|
2015-07-12 20:36:46 +02:00
|
|
|
# Check if filename matches any of mappings in schema
|
|
|
|
matched_maps = []
|
|
|
|
for match, map_settings in self.schema["maps"].items():
|
2017-07-14 10:34:57 +02:00
|
|
|
try:
|
|
|
|
if SafeRe.match(match, relative_path):
|
|
|
|
matched_maps.append(map_settings)
|
|
|
|
except SafeRe.UnsafePatternError as err:
|
|
|
|
self.log.error(err)
|
2015-07-12 20:36:46 +02:00
|
|
|
|
|
|
|
# No match found for the file
|
|
|
|
if not matched_maps:
|
|
|
|
return False
|
|
|
|
|
|
|
|
# Load the json file
|
2016-08-10 12:49:22 +02:00
|
|
|
try:
|
|
|
|
if file is None: # Open file is not file object passed
|
2017-08-09 14:19:39 +02:00
|
|
|
file = open(file_path, "rb")
|
2016-08-10 12:49:22 +02:00
|
|
|
|
|
|
|
if file is False: # File deleted
|
|
|
|
data = {}
|
|
|
|
else:
|
2017-08-09 14:19:39 +02:00
|
|
|
if file_path.endswith("json.gz"):
|
2019-03-20 00:49:27 +01:00
|
|
|
file = helper.limitedGzipFile(fileobj=file)
|
|
|
|
|
|
|
|
if sys.version_info.major == 3 and sys.version_info.minor < 6:
|
|
|
|
data = json.loads(file.read().decode("utf8"))
|
2017-08-09 14:19:39 +02:00
|
|
|
else:
|
|
|
|
data = json.load(file)
|
2019-03-15 21:06:59 +01:00
|
|
|
except Exception as err:
|
2016-08-10 12:49:22 +02:00
|
|
|
self.log.debug("Json file %s load error: %s" % (file_path, err))
|
|
|
|
data = {}
|
2015-07-12 20:36:46 +02:00
|
|
|
|
|
|
|
# No cursor specificed
|
|
|
|
if not cur:
|
2019-03-18 03:36:44 +01:00
|
|
|
cur = self.getSharedCursor()
|
2015-07-12 20:36:46 +02:00
|
|
|
cur.logging = False
|
|
|
|
|
2016-08-10 12:49:38 +02:00
|
|
|
# Row for current json file if required
|
2019-03-15 21:06:59 +01:00
|
|
|
if not data or [dbmap for dbmap in matched_maps if "to_keyvalue" in dbmap or "to_table" in dbmap]:
|
2016-08-10 12:49:38 +02:00
|
|
|
json_row = cur.getJsonRow(relative_path)
|
2015-07-12 20:36:46 +02:00
|
|
|
|
|
|
|
# Check matched mappings in schema
|
2016-11-07 23:14:36 +01:00
|
|
|
for dbmap in matched_maps:
|
2015-07-12 20:36:46 +02:00
|
|
|
# Insert non-relational key values
|
2016-11-07 23:14:36 +01:00
|
|
|
if dbmap.get("to_keyvalue"):
|
2015-07-12 20:36:46 +02:00
|
|
|
# Get current values
|
|
|
|
res = cur.execute("SELECT * FROM keyvalue WHERE json_id = ?", (json_row["json_id"],))
|
|
|
|
current_keyvalue = {}
|
|
|
|
current_keyvalue_id = {}
|
|
|
|
for row in res:
|
|
|
|
current_keyvalue[row["key"]] = row["value"]
|
|
|
|
current_keyvalue_id[row["key"]] = row["keyvalue_id"]
|
|
|
|
|
2016-11-07 23:14:36 +01:00
|
|
|
for key in dbmap["to_keyvalue"]:
|
2015-07-12 20:36:46 +02:00
|
|
|
if key not in current_keyvalue: # Keyvalue not exist yet in the db
|
|
|
|
cur.execute(
|
|
|
|
"INSERT INTO keyvalue ?",
|
|
|
|
{"key": key, "value": data.get(key), "json_id": json_row["json_id"]}
|
|
|
|
)
|
|
|
|
elif data.get(key) != current_keyvalue[key]: # Keyvalue different value
|
|
|
|
cur.execute(
|
|
|
|
"UPDATE keyvalue SET value = ? WHERE keyvalue_id = ?",
|
|
|
|
(data.get(key), current_keyvalue_id[key])
|
|
|
|
)
|
|
|
|
|
2016-08-10 12:49:38 +02:00
|
|
|
# Insert data to json table for easier joins
|
2016-11-07 23:14:36 +01:00
|
|
|
if dbmap.get("to_json_table"):
|
2016-08-10 12:49:38 +02:00
|
|
|
directory, file_name = re.match("^(.*?)/*([^/]*)$", relative_path).groups()
|
2016-11-07 23:14:36 +01:00
|
|
|
data_json_row = dict(cur.getJsonRow(directory + "/" + dbmap.get("file_name", file_name)))
|
2016-08-10 12:49:38 +02:00
|
|
|
changed = False
|
2016-11-07 23:14:36 +01:00
|
|
|
for key in dbmap["to_json_table"]:
|
2016-08-10 12:49:38 +02:00
|
|
|
if data.get(key) != data_json_row.get(key):
|
|
|
|
changed = True
|
|
|
|
if changed:
|
|
|
|
# Add the custom col values
|
2019-03-15 21:06:59 +01:00
|
|
|
data_json_row.update({key: val for key, val in data.items() if key in dbmap["to_json_table"]})
|
2016-08-10 12:49:38 +02:00
|
|
|
cur.execute("INSERT OR REPLACE INTO json ?", data_json_row)
|
2015-07-12 20:36:46 +02:00
|
|
|
|
|
|
|
# Insert data to tables
|
2016-11-07 23:14:36 +01:00
|
|
|
for table_settings in dbmap.get("to_table", []):
|
2015-07-12 20:36:46 +02:00
|
|
|
if isinstance(table_settings, dict): # Custom settings
|
|
|
|
table_name = table_settings["table"] # Table name to insert datas
|
|
|
|
node = table_settings.get("node", table_name) # Node keyname in data json file
|
|
|
|
key_col = table_settings.get("key_col") # Map dict key as this col
|
|
|
|
val_col = table_settings.get("val_col") # Map dict value as this col
|
|
|
|
import_cols = table_settings.get("import_cols")
|
|
|
|
replaces = table_settings.get("replaces")
|
|
|
|
else: # Simple settings
|
|
|
|
table_name = table_settings
|
|
|
|
node = table_settings
|
|
|
|
key_col = None
|
|
|
|
val_col = None
|
|
|
|
import_cols = None
|
|
|
|
replaces = None
|
|
|
|
|
2016-11-07 23:15:09 +01:00
|
|
|
# Fill import cols from table cols
|
|
|
|
if not import_cols:
|
2019-03-15 21:06:59 +01:00
|
|
|
import_cols = set([item[0] for item in self.schema["tables"][table_name]["cols"]])
|
2016-11-07 23:15:09 +01:00
|
|
|
|
2015-07-12 20:36:46 +02:00
|
|
|
cur.execute("DELETE FROM %s WHERE json_id = ?" % table_name, (json_row["json_id"],))
|
|
|
|
|
|
|
|
if node not in data:
|
|
|
|
continue
|
|
|
|
|
|
|
|
if key_col: # Map as dict
|
2019-03-15 21:06:59 +01:00
|
|
|
for key, val in data[node].items():
|
2015-07-12 20:36:46 +02:00
|
|
|
if val_col: # Single value
|
|
|
|
cur.execute(
|
|
|
|
"INSERT OR REPLACE INTO %s ?" % table_name,
|
|
|
|
{key_col: key, val_col: val, "json_id": json_row["json_id"]}
|
|
|
|
)
|
|
|
|
else: # Multi value
|
2018-10-30 04:43:21 +01:00
|
|
|
if type(val) is dict: # Single row
|
2015-07-12 20:36:46 +02:00
|
|
|
row = val
|
|
|
|
if import_cols:
|
2016-11-07 23:15:09 +01:00
|
|
|
row = {key: row[key] for key in row if key in import_cols} # Filter row by import_cols
|
2015-07-12 20:36:46 +02:00
|
|
|
row[key_col] = key
|
|
|
|
# Replace in value if necessary
|
|
|
|
if replaces:
|
2019-03-15 21:06:59 +01:00
|
|
|
for replace_key, replace in replaces.items():
|
2015-07-12 20:36:46 +02:00
|
|
|
if replace_key in row:
|
2019-03-15 21:06:59 +01:00
|
|
|
for replace_from, replace_to in replace.items():
|
2015-07-12 20:36:46 +02:00
|
|
|
row[replace_key] = row[replace_key].replace(replace_from, replace_to)
|
|
|
|
|
|
|
|
row["json_id"] = json_row["json_id"]
|
|
|
|
cur.execute("INSERT OR REPLACE INTO %s ?" % table_name, row)
|
2018-10-30 04:43:21 +01:00
|
|
|
elif type(val) is list: # Multi row
|
2015-07-12 20:36:46 +02:00
|
|
|
for row in val:
|
|
|
|
row[key_col] = key
|
|
|
|
row["json_id"] = json_row["json_id"]
|
|
|
|
cur.execute("INSERT OR REPLACE INTO %s ?" % table_name, row)
|
|
|
|
else: # Map as list
|
|
|
|
for row in data[node]:
|
|
|
|
row["json_id"] = json_row["json_id"]
|
2016-11-07 23:15:09 +01:00
|
|
|
if import_cols:
|
|
|
|
row = {key: row[key] for key in row if key in import_cols} # Filter row by import_cols
|
2015-07-12 20:36:46 +02:00
|
|
|
cur.execute("INSERT OR REPLACE INTO %s ?" % table_name, row)
|
|
|
|
|
2016-08-10 12:49:22 +02:00
|
|
|
# Cleanup json row
|
|
|
|
if not data:
|
|
|
|
self.log.debug("Cleanup json row for %s" % file_path)
|
|
|
|
cur.execute("DELETE FROM json WHERE json_id = %s" % json_row["json_id"])
|
|
|
|
|
2015-07-12 20:36:46 +02:00
|
|
|
return True
|
2015-03-19 21:19:14 +01:00
|
|
|
|
|
|
|
|
2015-07-12 20:36:46 +02:00
|
|
|
if __name__ == "__main__":
|
|
|
|
s = time.time()
|
|
|
|
console_log = logging.StreamHandler()
|
|
|
|
logging.getLogger('').setLevel(logging.DEBUG)
|
|
|
|
logging.getLogger('').addHandler(console_log)
|
|
|
|
console_log.setLevel(logging.DEBUG)
|
|
|
|
dbjson = Db(json.load(open("zerotalk.schema.json")), "data/users/zerotalk.db")
|
|
|
|
dbjson.collect_stats = True
|
|
|
|
dbjson.checkTables()
|
|
|
|
cur = dbjson.getCursor()
|
|
|
|
cur.logging = False
|
2017-02-09 01:53:31 +01:00
|
|
|
dbjson.updateJson("data/users/content.json", cur=cur)
|
2015-07-12 20:36:46 +02:00
|
|
|
for user_dir in os.listdir("data/users"):
|
|
|
|
if os.path.isdir("data/users/%s" % user_dir):
|
2017-02-09 01:53:31 +01:00
|
|
|
dbjson.updateJson("data/users/%s/data.json" % user_dir, cur=cur)
|
2015-07-12 20:36:46 +02:00
|
|
|
# print ".",
|
|
|
|
cur.logging = True
|
2019-03-15 21:06:59 +01:00
|
|
|
print("Done in %.3fs" % (time.time() - s))
|
2015-07-12 20:36:46 +02:00
|
|
|
for query, stats in sorted(dbjson.query_stats.items()):
|
2019-03-15 21:06:59 +01:00
|
|
|
print("-", query, stats)
|