331 lines
12 KiB
Python
Executable File
331 lines
12 KiB
Python
Executable File
#!/usr/bin/env python3
|
||
"""build.py
|
||
|
||
Building system for my gemini capsule with html support
|
||
"""
|
||
|
||
import os
|
||
import sys
|
||
from fnmatch import fnmatch
|
||
import re
|
||
from functools import partial
|
||
import apis # from itself
|
||
import traceback
|
||
from item import Item # from itself
|
||
from files import save_this, link_this, scan_dir, delete_this # from itself
|
||
from config import config # from itself
|
||
from bs4 import BeautifulSoup # from beautifulsoup4
|
||
from gemtext import GemtextParser # from itself
|
||
|
||
# regex pre-compilation
|
||
REGEX = {
|
||
'evaluation': re.compile(r'{{(.*?)}}', re.DOTALL),
|
||
'url': re.compile(r'(https?://)([-\w]+[-\w.]*)(/\S*)?'),
|
||
'generic_url': re.compile(r'^[\w]+:')
|
||
}
|
||
|
||
def template_for(path: str) -> str:
|
||
"""Determine used template for the file
|
||
Returns the last matching template from the config or None
|
||
"""
|
||
global config
|
||
|
||
if 'templates' not in config:
|
||
return None
|
||
|
||
template = template_data = None # the last matching will be used
|
||
for pattern in config['templates'].keys():
|
||
if fnmatch(path, pattern):
|
||
template = config['templates'][pattern]
|
||
|
||
return template
|
||
|
||
def namespace_from(*extensions) -> dict:
|
||
"""Generates global namespace for evaluation derrived from provided
|
||
extensions, which can be any iterable object or class.
|
||
__builtins__ is empty by default, but can be overrided by extension
|
||
"""
|
||
|
||
namespace = {'__builtins__': {}}
|
||
|
||
for extension in extensions:
|
||
if type(extension) == type: # extract items from class
|
||
extension = vars(extension)
|
||
for i in extension.keys():
|
||
if i.startswith('_') and i != '__builtins__': continue
|
||
namespace[i] = extension[i]
|
||
|
||
else: # iterables
|
||
namespace.update(extension)
|
||
|
||
return namespace
|
||
|
||
def evaluate_this(content: str, global_ns: dict, local_ns: dict = {}) -> str:
|
||
"""Finds some inline Python code in {braces} and evaluates it in-place.
|
||
global_ns is a dictionary of globals that it can use. It's recommended to
|
||
use namespace_from() to generate one.
|
||
"""
|
||
|
||
return REGEX['evaluation'].sub(partial(
|
||
evaluate,
|
||
global_ns=global_ns,
|
||
local_ns=local_ns
|
||
), content)
|
||
|
||
def evaluate(match: re.Match, global_ns: dict, local_ns: dict) -> str:
|
||
"""Callable for re.sub, returns value of the last Python expression line,
|
||
executing all the other lines before.
|
||
group 1: python code
|
||
"""
|
||
|
||
match = match.group(1).split('\n')
|
||
# this trick let the functions in apis.py access our environment
|
||
apis.environment = global_ns
|
||
# we can't use 'return' in expressions
|
||
# instead the last line will be evaluated and returned, the rest – executed
|
||
to_exec = '\n'.join(match[:-1])
|
||
to_eval = match[-1]
|
||
try:
|
||
if to_exec:
|
||
exec(to_exec, global_ns, local_ns)
|
||
result = eval(to_eval, global_ns, local_ns)
|
||
return str(result)
|
||
except:
|
||
print(traceback.format_exc(), file=sys.stderr)
|
||
return '\n<a real error occured here>\n'
|
||
|
||
def redirect(match: re.Match, domains: dict) -> str:
|
||
"""Callable for re.sub, replaces urls to sites like youtube.com or
|
||
twitter.com with their respective privacy-respecting proxy services defined
|
||
in config section redirections
|
||
group 1: http:// or https://
|
||
group 2: hostname
|
||
group 3: remainder URL
|
||
"""
|
||
|
||
this_domain = match.group(2)
|
||
if this_domain in domains:
|
||
return match.group(1) + domains[this_domain] + match.group(3)
|
||
else: return match.group(0)
|
||
|
||
def gemtext2html(parser: GemtextParser, rotate_extension=True) -> BeautifulSoup:
|
||
"""Converts gemtext to html format"""
|
||
|
||
soup = BeautifulSoup(features='html.parser')
|
||
|
||
for item in parser.elements:
|
||
if item.type == 'plain':
|
||
paragraphs = item.content.split('\n\n')
|
||
el = BeautifulSoup(features='html.parser')
|
||
for content in paragraphs:
|
||
p = soup.new_tag('p')
|
||
p.append(content)
|
||
el.append(p)
|
||
|
||
if item.type == 'link':
|
||
# put an image if this is a link to image file
|
||
if os.path.splitext(item.href)[1] in ('.jpg', '.png', '.gif'):
|
||
el = soup.new_tag('img', src=item.href)
|
||
if item.label:
|
||
el.attrs['alt'] = item.label
|
||
else:
|
||
href = item.href
|
||
# html links typically points to .html files
|
||
if rotate_extension and href.endswith('.gmi') and not REGEX['generic_url'].match(href):
|
||
href = os.path.splitext(href)[0] + '.html'
|
||
el = soup.new_tag('a', href=href)
|
||
el.append(item.label or href)
|
||
|
||
elif item.type == 'preformatted':
|
||
el = soup.new_tag('pre')
|
||
el.append(item.content)
|
||
if item.alt:
|
||
el.attrs['title'] = item.alt
|
||
|
||
elif item.type == 'list':
|
||
el = soup.new_tag('ul')
|
||
for list_item in item.items:
|
||
li = soup.new_tag('li')
|
||
li.append(list_item)
|
||
el.append(li)
|
||
|
||
elif item.type == 'head1':
|
||
el = soup.new_tag('h1')
|
||
el.append(item.content)
|
||
|
||
elif item.type == 'head2':
|
||
el = soup.new_tag('h2')
|
||
el.append(item.content)
|
||
|
||
elif item.type == 'head3':
|
||
el = soup.new_tag('h3')
|
||
el.append(item.content)
|
||
|
||
elif item.type == 'quote':
|
||
el = soup.new_tag('blockquote')
|
||
for line in item.content.split('\n'):
|
||
br = soup.new_tag('br')
|
||
el.extend((br, line))
|
||
el.find('br').decompose()
|
||
|
||
soup.append(el)
|
||
return soup
|
||
|
||
def convert_href(href: str, path: str) -> str:
|
||
"""Redirects URLs and converts paths to relative.
|
||
The 'path' argument is a path to the current document the URL appears in.
|
||
"""
|
||
global config
|
||
|
||
if href.startswith('/'):
|
||
output = os.path.relpath(href[1:], start=os.path.dirname(path))
|
||
elif 'redirections' in config:
|
||
output = REGEX['url'].sub(partial(
|
||
redirect,
|
||
domains=dict(config['redirections'])
|
||
), href)
|
||
else: output = href
|
||
|
||
# if href != output: print(' M', href, '->', output)
|
||
return output
|
||
|
||
if __name__ == '__main__':
|
||
os.chdir(sys.path[0])
|
||
|
||
link_this('build.py', '../build.py')
|
||
link_this('build.py', '../html/build.py')
|
||
link_this('../.git', '../html/gemini.git')
|
||
|
||
# load config
|
||
config.read('config.ini')
|
||
|
||
# scan source files
|
||
content_files = scan_dir('content')
|
||
static_files = scan_dir('static')
|
||
|
||
# add static files to the project
|
||
for path in static_files:
|
||
print('F', path)
|
||
if path in content_files:
|
||
raise Exception('There is a conflicting static and content file')
|
||
|
||
link_this('static/'+path, '../'+path)
|
||
link_this('static/'+path, '../html/'+path)
|
||
|
||
|
||
# now the actual parsing of content files:
|
||
for path in content_files:
|
||
print('F', path)
|
||
|
||
if path.endswith('.gmi'):
|
||
html_path = os.path.splitext(path)[0]+'.html'
|
||
else:
|
||
html_path = path
|
||
item = Item(path)
|
||
|
||
|
||
# parsing in gemini mode
|
||
if not 'mode' in item or item.mode == 'gemini':
|
||
# preparing namespace
|
||
namespace = namespace_from(
|
||
{'path': path},
|
||
apis.Content,
|
||
config['variables'],
|
||
config['variables.gemini'],
|
||
item.frontmatter_data,
|
||
{'mode': 'gemini'}
|
||
)
|
||
# evaluation
|
||
content = evaluate_this(item.content, namespace)
|
||
# determining and applying template (optional)
|
||
if template := template_for(path):
|
||
tpl_item = Item(template, prefix='templates/')
|
||
if not 'mode' in tpl_item or tpl_item.mode == 'gemini':
|
||
namespace = namespace_from(
|
||
{'path': path},
|
||
apis.Content,
|
||
config['variables'],
|
||
config['variables.gemini'],
|
||
item.frontmatter_data,
|
||
{'mode': 'gemini', 'content': content}
|
||
)
|
||
content = evaluate_this(tpl_item.content, namespace)
|
||
|
||
# redirections, path conversion
|
||
if path.endswith('.gmi'):
|
||
parser = GemtextParser(content)
|
||
for el in parser.elements:
|
||
if el.type == 'link':
|
||
el.href = convert_href(el.href, item.path)
|
||
content = str(parser)
|
||
|
||
# save results
|
||
save_this('../'+path, content)
|
||
else:
|
||
print(' ! skipped for gemini')
|
||
# delete from gemini output
|
||
delete_this('../'+path)
|
||
|
||
|
||
# parsing in www mode
|
||
if not 'mode' in item or item.mode == 'www':
|
||
namespace = namespace_from(
|
||
{'path': path},
|
||
apis.Content,
|
||
config['variables'],
|
||
config['variables.www'],
|
||
item.frontmatter_data,
|
||
{'mode': 'www'}
|
||
)
|
||
|
||
content = evaluate_this(item.content, namespace)
|
||
|
||
if template := template_for(path):
|
||
tpl_item = Item(template, prefix='templates/')
|
||
if not 'mode' in tpl_item or tpl_item.mode == 'www':
|
||
namespace = namespace_from(
|
||
{'path': path},
|
||
apis.Content,
|
||
config['variables'],
|
||
config['variables.www'],
|
||
item.frontmatter_data,
|
||
{'mode': 'www', 'content': content}
|
||
)
|
||
content = evaluate_this(tpl_item.content, namespace)
|
||
|
||
# convert to html
|
||
if path.endswith('.gmi'):
|
||
parser = GemtextParser(content)
|
||
soup = gemtext2html(parser)
|
||
content = soup.prettify()
|
||
|
||
if html_path.endswith('.html'):
|
||
# TODO: html template
|
||
# incomplete, yet should function with some primitive templates
|
||
if template := template_for(html_path):
|
||
tpl_item = Item(template, prefix='templates/')
|
||
if not 'mode' in tpl_item or tpl_item.mode == 'www':
|
||
namespace = namespace_from(
|
||
{'path': path},
|
||
apis.Content,
|
||
config['variables'],
|
||
config['variables.www'],
|
||
item.frontmatter_data,
|
||
{'mode': 'www', 'content': content}
|
||
)
|
||
content = evaluate_this(tpl_item.content, namespace)
|
||
|
||
# redirections, path conversion
|
||
soup = BeautifulSoup(content, features='html.parser')
|
||
for attr in 'href', 'src', 'action':
|
||
for node in soup.css.select('['+attr+']'):
|
||
node[attr] = convert_href(node[attr], path)
|
||
content = soup.prettify()
|
||
|
||
|
||
save_this('../html/'+html_path, content)
|
||
else:
|
||
print(' ! skipped for www')
|
||
# delete from www output
|
||
delete_this('../html/'+html_path)
|