ZeroNet/src/Content/ContentDbDict.py

156 lines
4.9 KiB
Python

import time
import os
from . import ContentDb
from Debug import Debug
from Config import config
class ContentDbDict(dict):
def __init__(self, site, *args, **kwargs):
s = time.time()
self.site = site
self.cached_keys = []
self.log = self.site.log
self.db = ContentDb.getContentDb()
self.db_id = self.db.needSite(site)
self.num_loaded = 0
super(ContentDbDict, self).__init__(self.db.loadDbDict(site)) # Load keys from database
self.log.debug("ContentDb init: %.3fs, found files: %s, sites: %s" % (time.time() - s, len(self), len(self.db.site_ids)))
def loadItem(self, key):
try:
self.num_loaded += 1
if self.num_loaded % 100 == 0:
if config.verbose:
self.log.debug("Loaded json: %s (latest: %s) called by: %s" % (self.num_loaded, key, Debug.formatStack()))
else:
self.log.debug("Loaded json: %s (latest: %s)" % (self.num_loaded, key))
content = self.site.storage.loadJson(key)
dict.__setitem__(self, key, content)
except IOError:
if dict.get(self, key):
self.__delitem__(key) # File not exists anymore
raise KeyError(key)
self.addCachedKey(key)
self.checkLimit()
return content
def getItemSize(self, key):
return self.site.storage.getSize(key)
# Only keep last 10 accessed json in memory
def checkLimit(self):
if len(self.cached_keys) > 10:
key_deleted = self.cached_keys.pop(0)
dict.__setitem__(self, key_deleted, False)
def addCachedKey(self, key):
if key not in self.cached_keys and key != "content.json" and len(key) > 40: # Always keep keys smaller than 40 char
self.cached_keys.append(key)
def __getitem__(self, key):
val = dict.get(self, key)
if val: # Already loaded
return val
elif val is None: # Unknown key
raise KeyError(key)
elif val is False: # Loaded before, but purged from cache
return self.loadItem(key)
def __setitem__(self, key, val):
self.addCachedKey(key)
self.checkLimit()
size = self.getItemSize(key)
self.db.setContent(self.site, key, val, size)
dict.__setitem__(self, key, val)
def __delitem__(self, key):
self.db.deleteContent(self.site, key)
dict.__delitem__(self, key)
try:
self.cached_keys.remove(key)
except ValueError:
pass
def iteritems(self):
for key in dict.keys(self):
try:
val = self[key]
except Exception as err:
self.log.warning("Error loading %s: %s" % (key, err))
continue
yield key, val
def items(self):
back = []
for key in dict.keys(self):
try:
val = self[key]
except Exception as err:
self.log.warning("Error loading %s: %s" % (key, err))
continue
back.append((key, val))
return back
def values(self):
back = []
for key, val in dict.iteritems(self):
if not val:
try:
val = self.loadItem(key)
except Exception:
continue
back.append(val)
return back
def get(self, key, default=None):
try:
return self.__getitem__(key)
except KeyError:
return default
except Exception as err:
self.site.bad_files[key] = self.site.bad_files.get(key, 1)
dict.__delitem__(self, key)
self.log.warning("Error loading %s: %s" % (key, err))
return default
def execute(self, query, params={}):
params["site_id"] = self.db_id
return self.db.execute(query, params)
if __name__ == "__main__":
import psutil
process = psutil.Process(os.getpid())
s_mem = process.memory_info()[0] / float(2 ** 20)
root = "data-live/1MaiL5gfBM1cyb4a8e3iiL8L5gXmoAJu27"
contents = ContentDbDict("1MaiL5gfBM1cyb4a8e3iiL8L5gXmoAJu27", root)
print("Init len", len(contents))
s = time.time()
for dir_name in os.listdir(root + "/data/users/")[0:8000]:
contents["data/users/%s/content.json" % dir_name]
print("Load: %.3fs" % (time.time() - s))
s = time.time()
found = 0
for key, val in contents.items():
found += 1
assert key
assert val
print("Found:", found)
print("Iteritem: %.3fs" % (time.time() - s))
s = time.time()
found = 0
for key in list(contents.keys()):
found += 1
assert key in contents
print("In: %.3fs" % (time.time() - s))
print("Len:", len(list(contents.values())), len(list(contents.keys())))
print("Mem: +", process.memory_info()[0] / float(2 ** 20) - s_mem)