ZeroNet/src/Content/ContentManager.py

559 lines
25 KiB
Python
Raw Normal View History

import json
import time
import re
import os
import copy
import gevent
from Debug import Debug
from Crypt import CryptHash
from Config import config
2015-06-18 00:08:45 +02:00
class ContentManager(object):
2015-06-17 23:49:47 +02:00
def __init__(self, site):
self.site = site
self.log = self.site.log
self.contents = {} # Known content.json (without files and includes)
2015-06-17 23:49:47 +02:00
self.loadContent(add_bad_files=False)
self.site.settings["size"] = self.getTotalSize()
# Load content.json to self.content
# Return: Changed files ["index.html", "data/messages.json"]
def loadContent(self, content_inner_path="content.json", add_bad_files=True, load_includes=True):
content_inner_path = content_inner_path.strip("/") # Remove / from begning
2015-06-17 23:49:47 +02:00
old_content = self.contents.get(content_inner_path)
content_path = self.site.storage.getPath(content_inner_path)
content_path_dir = self.toDir(self.site.storage.getPath(content_inner_path))
content_dir = self.toDir(content_inner_path)
if os.path.isfile(content_path):
try:
new_content = json.load(open(content_path))
except Exception, err:
self.log.error("%s load error: %s" % (content_path, Debug.formatException(err)))
return False
else:
self.log.error("Content.json not exist: %s" % content_path)
return False # Content.json not exist
2015-06-17 23:49:47 +02:00
try:
# Get the files where the sha512 changed
changed = []
for relative_path, info in new_content.get("files", {}).items():
if "sha512" in info:
hash_type = "sha512"
else: # Backward compatiblity
2015-06-17 23:49:47 +02:00
hash_type = "sha1"
new_hash = info[hash_type]
if old_content and old_content["files"].get(relative_path): # We have the file in the old content
2015-06-17 23:49:47 +02:00
old_hash = old_content["files"][relative_path].get(hash_type)
else: # The file is not in the old content
2015-06-17 23:49:47 +02:00
old_hash = None
if old_hash != new_hash:
changed.append(content_dir + relative_path)
2015-06-17 23:49:47 +02:00
# Load includes
if load_includes and "includes" in new_content:
for relative_path, info in new_content["includes"].items():
include_inner_path = content_dir + relative_path
if self.site.storage.isFile(include_inner_path): # Content.json exists, load it
2015-06-17 23:49:47 +02:00
success = self.loadContent(include_inner_path, add_bad_files=add_bad_files)
if success:
changed += success # Add changed files
else: # Content.json not exist, add to changed files
2015-06-17 23:49:47 +02:00
self.log.debug("Missing include: %s" % include_inner_path)
changed += [include_inner_path]
# Load blind user includes (all subdir)
if load_includes and "user_contents" in new_content:
for relative_dir in os.listdir(content_path_dir):
include_inner_path = content_dir + relative_dir + "/content.json"
if not self.site.storage.isFile(include_inner_path):
continue # Content.json not exist
2015-06-17 23:49:47 +02:00
success = self.loadContent(include_inner_path, add_bad_files=add_bad_files, load_includes=False)
if success:
changed += success # Add changed files
2015-06-17 23:49:47 +02:00
# Update the content
self.contents[content_inner_path] = new_content
except Exception, err:
self.log.error("Content.json parse error: %s" % Debug.formatException(err))
return False # Content.json parse error
2015-06-17 23:49:47 +02:00
# Add changed files to bad files
if add_bad_files:
for inner_path in changed:
self.site.bad_files[inner_path] = True
if new_content["modified"] > self.site.settings.get("modified", 0):
# Dont store modifications in the far future (more than 10 minute)
self.site.settings["modified"] = min(time.time() + 60 * 10, new_content["modified"])
2015-06-17 23:49:47 +02:00
return changed
# Get total size of site
# Return: 32819 (size of files in kb)
def getTotalSize(self, ignore=None):
total_size = 0
for inner_path, content in self.contents.iteritems():
if inner_path == ignore:
continue
total_size += self.site.storage.getSize(inner_path) # Size of content.json
2015-06-17 23:49:47 +02:00
for file, info in content.get("files", {}).iteritems():
total_size += info["size"]
return total_size
# Find the file info line from self.contents
# Return: { "sha512": "c29d73d...21f518", "size": 41 , "content_inner_path": "content.json"}
2015-06-17 23:49:47 +02:00
def getFileInfo(self, inner_path):
dirs = inner_path.split("/") # Parent dirs of content.json
inner_path_parts = [dirs.pop()] # Filename relative to content.json
2015-06-17 23:49:47 +02:00
while True:
content_inner_path = "%s/content.json" % "/".join(dirs)
content = self.contents.get(content_inner_path.strip("/"))
if content and "files" in content: # Check if content.json exists
2015-06-17 23:49:47 +02:00
back = content["files"].get("/".join(inner_path_parts))
if back:
back["content_inner_path"] = content_inner_path
return back
if content and "user_contents" in content: # User dir
back = content["user_contents"]
# Content.json is in the users dir
back["content_inner_path"] = re.sub("(.*)/.*?$", "\\1/content.json", inner_path)
return back
# No inner path in this dir, lets try the parent dir
if dirs:
inner_path_parts.insert(0, dirs.pop())
else: # No more parent dirs
break
# Not found
return False
# Get rules for the file
# Return: The rules for the file or False if not allowed
def getRules(self, inner_path, content=None):
if not inner_path.endswith("content.json"): # Find the files content.json first
2015-06-17 23:49:47 +02:00
file_info = self.getFileInfo(inner_path)
if not file_info:
return False # File not found
2015-06-17 23:49:47 +02:00
inner_path = file_info["content_inner_path"]
dirs = inner_path.split("/") # Parent dirs of content.json
inner_path_parts = [dirs.pop()] # Filename relative to content.json
inner_path_parts.insert(0, dirs.pop()) # Dont check in self dir
2015-06-17 23:49:47 +02:00
while True:
content_inner_path = "%s/content.json" % "/".join(dirs)
parent_content = self.contents.get(content_inner_path.strip("/"))
if parent_content and "includes" in parent_content:
return parent_content["includes"].get("/".join(inner_path_parts))
elif parent_content and "user_contents" in parent_content:
return self.getUserContentRules(parent_content, inner_path, content)
else: # No inner path in this dir, lets try the parent dir
2015-06-17 23:49:47 +02:00
if dirs:
inner_path_parts.insert(0, dirs.pop())
else: # No more parent dirs
2015-06-17 23:49:47 +02:00
break
return False
# Get rules for a user file
# Return: The rules of the file or False if not allowed
def getUserContentRules(self, parent_content, inner_path, content):
user_contents = parent_content["user_contents"]
user_address = re.match(".*/([A-Za-z0-9]*?)/.*?$", inner_path).group(1) # Delivered for directory
2015-06-17 23:49:47 +02:00
try:
if not content:
content = self.site.storage.loadJson(inner_path) # Read the file if no content specified
2015-06-17 23:49:47 +02:00
except (Exception, ): # Content.json not exist
return {"signers": [user_address], "user_address": user_address} # Return information that we know for sure
"""if not "cert_user_name" in content: # New file, unknown user
content["cert_auth_type"] = "unknown"
content["cert_user_name"] = "unknown@unknown"
"""
user_urn = "%s/%s" % (content["cert_auth_type"], content["cert_user_id"]) # web/nofish@zeroid.bit
2015-06-17 23:49:47 +02:00
rules = copy.copy(user_contents["permissions"].get(content["cert_user_id"], {})) # Default rules by username
if rules is False:
2015-06-17 23:49:47 +02:00
return False # User banned
if "signers" in rules:
rules["signers"] = rules["signers"][:] # Make copy of the signers
for permission_pattern, permission_rules in user_contents["permission_rules"].items(): # Regexp rules
if not re.match(permission_pattern, user_urn):
continue # Rule is not valid for user
2015-06-17 23:49:47 +02:00
# Update rules if its better than current recorded ones
for key, val in permission_rules.iteritems():
if key not in rules:
if type(val) is list:
rules[key] = val[:] # Make copy
else:
rules[key] = val
elif type(val) is int: # Int, update if larger
if val > rules[key]:
rules[key] = val
elif hasattr(val, "startswith"): # String, update if longer
if len(val) > len(rules[key]):
rules[key] = val
2015-06-17 23:49:47 +02:00
elif type(val) is list: # List, append
rules[key] += val
rules["cert_signers"] = user_contents["cert_signers"] # Add valid cert signers
if "signers" not in rules:
rules["signers"] = []
rules["signers"].append(user_address) # Add user as valid signer
2015-06-17 23:49:47 +02:00
rules["user_address"] = user_address
return rules
# Create and sign a content.json
# Return: The new content if filewrite = False
def sign(self, inner_path="content.json", privatekey=None, filewrite=True, update_changed_files=False, extend=None):
content = self.contents.get(inner_path)
if not content: # Content not exist yet, load default one
2015-06-17 23:49:47 +02:00
self.log.info("File %s not exist yet, loading default values..." % inner_path)
content = {"files": {}, "signs": {}} # Default content.json
if inner_path == "content.json": # It's the root content.json, add some more fields
content["title"] = "%s - ZeroNet_" % self.site.address
content["description"] = ""
content["signs_required"] = 1
content["ignore"] = ""
if extend:
content.update(extend) # Add custom fields
2015-06-17 23:49:47 +02:00
directory = self.toDir(self.site.storage.getPath(inner_path))
self.log.info("Opening site data directory: %s..." % directory)
hashed_files = {}
changed_files = [inner_path]
for root, dirs, files in os.walk(directory):
for file_name in files:
file_path = self.site.storage.getPath("%s/%s" % (root.strip("/"), file_name))
file_inner_path = re.sub(re.escape(directory), "", file_path)
if file_name == "content.json":
ignored = True
elif content.get("ignore") and re.match(content["ignore"], file_inner_path):
ignored = True
elif file_name.startswith("."):
ignored = True
else:
ignored = False
2015-06-17 23:49:47 +02:00
if ignored: # Ignore content.json, definied regexp and files starting with .
2015-06-17 23:49:47 +02:00
self.log.info("- [SKIPPED] %s" % file_inner_path)
else:
sha512sum = CryptHash.sha512sum(file_path) # Calculate sha512 sum of file
2015-06-17 23:49:47 +02:00
self.log.info("- %s (SHA512: %s)" % (file_inner_path, sha512sum))
hashed_files[file_inner_path] = {"sha512": sha512sum, "size": os.path.getsize(file_path)}
if (
file_inner_path in content["files"].keys()
and hashed_files[file_inner_path]["sha512"] != content["files"][file_inner_path].get("sha512")
):
2015-06-17 23:49:47 +02:00
changed_files.append(file_path)
self.log.debug("Changed files: %s" % changed_files)
if update_changed_files:
for file_path in changed_files:
self.site.storage.onUpdated(file_path)
# Generate new content.json
self.log.info("Adding timestamp and sha512sums to new content.json...")
new_content = content.copy() # Create a copy of current content.json
new_content["files"] = hashed_files # Add files sha512 hash
new_content["modified"] = time.time() # Add timestamp
2015-06-17 23:49:47 +02:00
if inner_path == "content.json":
new_content["address"] = self.site.address
new_content["zeronet_version"] = config.version
new_content["signs_required"] = content.get("signs_required", 1)
from Crypt import CryptBitcoin
self.log.info("Verifying private key...")
privatekey_address = CryptBitcoin.privatekeyToAddress(privatekey)
valid_signers = self.getValidSigners(inner_path, new_content)
if privatekey_address not in valid_signers:
return self.log.error(
"Private key invalid! Valid signers: %s, Private key address: %s" %
(valid_signers, privatekey_address)
)
2015-06-17 23:49:47 +02:00
self.log.info("Correct %s in valid signers: %s" % (privatekey_address, valid_signers))
if inner_path == "content.json" and privatekey_address == self.site.address:
# If signing using the root key, then sign the valid signers
new_content["signers_sign"] = CryptBitcoin.sign(
"%s:%s" % (new_content["signs_required"], ",".join(valid_signers)), privatekey
)
if not new_content["signers_sign"]:
self.log.info("Old style address, signers_sign is none")
2015-06-17 23:49:47 +02:00
self.log.info("Signing %s..." % inner_path)
if "signs" in new_content:
del(new_content["signs"]) # Delete old signs
if "sign" in new_content:
del(new_content["sign"]) # Delete old sign (backward compatibility)
2015-06-17 23:49:47 +02:00
sign_content = json.dumps(new_content, sort_keys=True)
sign = CryptBitcoin.sign(sign_content, privatekey)
# new_content["signs"] = content.get("signs", {}) # TODO: Multisig
if sign: # If signing is successful (not an old address)
2015-06-17 23:49:47 +02:00
new_content["signs"] = {}
new_content["signs"][privatekey_address] = sign
if inner_path == "content.json": # To root content.json add old format sign for backward compatibility
oldsign_content = json.dumps(new_content, sort_keys=True)
new_content["sign"] = CryptBitcoin.signOld(oldsign_content, privatekey)
if not self.validContent(inner_path, new_content):
self.log.error("Sign failed: Invalid content")
return False
if filewrite:
self.log.info("Saving to %s..." % inner_path)
self.site.storage.writeJson(inner_path, new_content)
self.log.info("File %s signed!" % inner_path)
if filewrite: # Written to file
return True
else: # Return the new content
return new_content
# The valid signers of content.json file
# Return: ["1KRxE1s3oDyNDawuYWpzbLUwNm8oDbeEp6", "13ReyhCsjhpuCVahn1DHdf6eMqqEVev162"]
def getValidSigners(self, inner_path, content=None):
valid_signers = []
if inner_path == "content.json": # Root content.json
2015-06-17 23:49:47 +02:00
if "content.json" in self.contents and "signers" in self.contents["content.json"]:
valid_signers += self.contents["content.json"]["signers"].keys()
else:
rules = self.getRules(inner_path, content)
if rules and "signers" in rules:
valid_signers += rules["signers"]
if self.site.address not in valid_signers:
valid_signers.append(self.site.address) # Site address always valid
return valid_signers
# Return: The required number of valid signs for the content.json
def getSignsRequired(self, inner_path, content=None):
return 1 # Todo: Multisig
def verifyCert(self, inner_path, content):
from Crypt import CryptBitcoin
rules = self.getRules(inner_path, content)
if not rules.get("cert_signers"):
return True # Does not need cert
2015-06-17 23:49:47 +02:00
name, domain = content["cert_user_id"].split("@")
cert_address = rules["cert_signers"].get(domain)
if not cert_address: # Cert signer not allowed
2015-06-17 23:49:47 +02:00
self.log.error("Invalid cert signer: %s" % domain)
return False
return CryptBitcoin.verify(
"%s#%s/%s" % (rules["user_address"], content["cert_auth_type"], name), cert_address, content["cert_sign"]
)
2015-06-17 23:49:47 +02:00
# Checks if the content.json content is valid
# Return: True or False
def validContent(self, inner_path, content):
content_size = len(json.dumps(content)) + sum([file["size"] for file in content["files"].values()]) # Size of new content
site_size = self.getTotalSize(ignore=inner_path) + content_size # Site size without old content
if site_size > self.site.settings.get("size", 0):
self.site.settings["size"] = site_size # Save to settings if larger
2015-06-17 23:49:47 +02:00
site_size_limit = self.site.getSizeLimit() * 1024 * 1024
2015-06-17 23:49:47 +02:00
# Check total site size limit
if site_size > site_size_limit:
self.log.error("%s: Site too large %s > %s, aborting task..." % (inner_path, site_size, site_size_limit))
task = self.site.worker_manager.findTask(inner_path)
if task: # Dont try to download from other peers
2015-06-17 23:49:47 +02:00
self.site.worker_manager.failTask(task)
return False
if inner_path == "content.json":
return True # Root content.json is passed
2015-06-17 23:49:47 +02:00
# Load include details
rules = self.getRules(inner_path, content)
if not rules:
self.log.error("%s: No rules" % inner_path)
return False
# Check include size limit
if rules.get("max_size"): # Include size limit
2015-06-17 23:49:47 +02:00
if content_size > rules["max_size"]:
self.log.error("%s: Include too large %s > %s" % (inner_path, content_size, rules["max_size"]))
return False
# Check if content includes allowed
if rules.get("includes_allowed") is False and content.get("includes"):
2015-06-17 23:49:47 +02:00
self.log.error("%s: Includes not allowed" % inner_path)
return False # Includes not allowed
2015-06-17 23:49:47 +02:00
# Filename limit
if rules.get("files_allowed"):
for file_inner_path in content["files"].keys():
if not re.match("^%s$" % rules["files_allowed"], file_inner_path):
self.log.error("%s: File not allowed" % file_inner_path)
return False
return True # All good
# Verify file validity
# Return: None = Same as before, False = Invalid, True = Valid
def verifyFile(self, inner_path, file, ignore_same=True):
if inner_path.endswith("content.json"): # content.json: Check using sign
2015-06-17 23:49:47 +02:00
from Crypt import CryptBitcoin
try:
new_content = json.load(file)
if inner_path in self.contents:
old_content = self.contents.get(inner_path)
# Checks if its newer the ours
if old_content["modified"] == new_content["modified"] and ignore_same: # Ignore, have the same content.json
2015-06-17 23:49:47 +02:00
return None
elif old_content["modified"] > new_content["modified"]: # We have newer
self.log.debug(
"We have newer %s (Our: %s, Sent: %s)" %
(inner_path, old_content["modified"], new_content["modified"])
)
gevent.spawn(self.site.publish, inner_path=inner_path) # Try to fix the broken peers
2015-06-17 23:49:47 +02:00
return False
if new_content["modified"] > time.time() + 60 * 60 * 24: # Content modified in the far future (allow 1 day+)
2015-06-17 23:49:47 +02:00
self.log.error("%s modify is in the future!" % inner_path)
return False
# Check sign
sign = new_content.get("sign")
signs = new_content.get("signs", {})
if "sign" in new_content:
del(new_content["sign"]) # The file signed without the sign
if "signs" in new_content:
del(new_content["signs"]) # The file signed without the signs
sign_content = json.dumps(new_content, sort_keys=True) # Dump the json to string to remove whitepsace
2015-06-17 23:49:47 +02:00
if not self.validContent(inner_path, new_content):
return False # Content not valid (files too large, invalid files)
2015-06-17 23:49:47 +02:00
if signs: # New style signing
2015-06-17 23:49:47 +02:00
valid_signers = self.getValidSigners(inner_path, new_content)
signs_required = self.getSignsRequired(inner_path, new_content)
if inner_path == "content.json" and len(valid_signers) > 1: # Check signers_sign on root content.json
if not CryptBitcoin.verify(
"%s:%s" % (signs_required, ",".join(valid_signers)), self.site.address, new_content["signers_sign"]
):
2015-06-17 23:49:47 +02:00
self.log.error("%s invalid signers_sign!" % inner_path)
return False
if inner_path != "content.json" and not self.verifyCert(inner_path, new_content): # Check if cert valid
2015-06-17 23:49:47 +02:00
self.log.error("%s invalid cert!" % inner_path)
return False
valid_signs = 0
for address in valid_signers:
if address in signs:
valid_signs += CryptBitcoin.verify(sign_content, address, signs[address])
if valid_signs >= signs_required:
break # Break if we has enough signs
2015-06-17 23:49:47 +02:00
return valid_signs >= signs_required
else: # Old style signing
2015-06-17 23:49:47 +02:00
return CryptBitcoin.verify(sign_content, self.site.address, sign)
except Exception, err:
self.log.error("Verify sign error: %s" % Debug.formatException(err))
return False
else: # Check using sha512 hash
file_info = self.getFileInfo(inner_path)
if file_info:
if "sha512" in file_info:
hash_valid = CryptHash.sha512sum(file) == file_info["sha512"]
elif "sha1" in file_info: # Backward compatibility
2015-06-17 23:49:47 +02:00
hash_valid = CryptHash.sha1sum(file) == file_info["sha1"]
else:
hash_valid = False
if file_info["size"] != file.tell():
self.log.error(
"%s file size does not match %s <> %s, Hash: %s" %
(inner_path, file.tell(), file_info["size"], hash_valid)
)
2015-06-17 23:49:47 +02:00
return False
return hash_valid
else: # File not in content.json
2015-06-17 23:49:47 +02:00
self.log.error("File not in content.json: %s" % inner_path)
return False
# Get dir from file
# Return: data/site/content.json -> data/site
def toDir(self, inner_path):
file_dir = re.sub("[^/]*?$", "", inner_path).strip("/")
if file_dir:
file_dir += "/" # Add / at end if its not the root
2015-06-17 23:49:47 +02:00
return file_dir
def testSign():
2015-06-17 23:49:47 +02:00
global config
from Site import Site
site = Site("12Hw8rTgzrNo4DSh2AkqwPRqDyTticwJyH")
content_manager = ContentManager(site)
content_manager.sign(
"data/users/1KRxE1s3oDyNDawuYWpzbLUwNm8oDbeEp6/content.json", "5JCGE6UUruhfmAfcZ2GYjvrswkaiq7uLo6Gmtf2ep2Jh2jtNzWR"
)
def testVerify():
2015-06-17 23:49:47 +02:00
from Site import Site
site = Site("12Hw8rTgzrNo4DSh2AkqwPRqDyTticwJyH")
2015-06-17 23:49:47 +02:00
content_manager = ContentManager(site)
print "Loaded contents:", content_manager.contents.keys()
2015-06-17 23:49:47 +02:00
file = open(site.storage.getPath("data/users/1KRxE1s3oDyNDawuYWpzbLUwNm8oDbeEp6/content.json"))
print "content.json valid:", content_manager.verifyFile(
"data/users/1KRxE1s3oDyNDawuYWpzbLUwNm8oDbeEp6/content.json", file, ignore_same=False
)
2015-06-17 23:49:47 +02:00
file = open(site.storage.getPath("data/users/1KRxE1s3oDyNDawuYWpzbLUwNm8oDbeEp6/messages.json"))
print "messages.json valid:", content_manager.verifyFile(
"data/users/1KRxE1s3oDyNDawuYWpzbLUwNm8oDbeEp6/messages.json", file, ignore_same=False
)
def testInfo():
2015-06-17 23:49:47 +02:00
from Site import Site
site = Site("12Hw8rTgzrNo4DSh2AkqwPRqDyTticwJyH")
2015-06-17 23:49:47 +02:00
content_manager = ContentManager(site)
print content_manager.contents.keys()
2015-06-17 23:49:47 +02:00
print content_manager.getFileInfo("index.html")
print content_manager.getIncludeInfo("data/users/1KRxE1s3oDyNDawuYWpzbLUwNm8oDbeEp6/content.json")
print content_manager.getValidSigners("data/users/1KRxE1s3oDyNDawuYWpzbLUwNm8oDbeEp6/content.json")
print content_manager.getValidSigners("data/users/content.json")
print content_manager.getValidSigners("content.json")
if __name__ == "__main__":
import sys
import logging
2015-06-17 23:49:47 +02:00
os.chdir("../..")
sys.path.insert(0, os.path.abspath("."))
sys.path.insert(0, os.path.abspath("src"))
logging.basicConfig(level=logging.DEBUG)
# testSign()
2015-06-17 23:49:47 +02:00
testVerify()
# testInfo()