This repository has been archived on 2024-05-17. You can view files and clone it, but cannot push or open issues or pull requests.
build.py/_src/build.py

292 lines
10 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""build.py
Building system for my gemini capsule with html support
"""
import os
import sys
from fnmatch import fnmatch
import re
from functools import partial
import apis # from itself
import traceback
from item import Item # from itself
from files import save_this, link_this, scan_dir # from itself
from config import config # from itself
def template_for(path: str) -> str:
"""Determine used template for the file
Returns the last matching template from the config or None
"""
global config
if 'templates' not in config:
return None
template = template_data = None # the last matching will be used
for pattern in config['templates'].keys():
if fnmatch(path, pattern):
template = config['templates'][pattern]
return template
def namespace_from(*extensions) -> dict:
"""Generates global namespace for evaluation derrived from provided
extensions, which can be any iterable object or class.
__builtins__ is empty by default, but can be overrided by extension
"""
namespace = {'__builtins__': {}}
for extension in extensions:
if type(extension) == type: # extract items from class
extension = vars(extension)
for i in extension.keys():
if i.startswith('_') and i != '__builtins__': continue
namespace[i] = extension[i]
else: # iterables
namespace.update(extension)
return namespace
def evaluate_this(content: str, global_ns: dict, local_ns: dict = {}) -> str:
"""Finds some inline Python code in {braces} and evaluates it in-place.
global_ns is a dictionary of globals that it can use. It's recommended to use
namespace_from() to generate one.
"""
return re.sub(r'\{([^\s}]+([^}]*[^\s}])?)\}',
partial(evaluate, global_ns=global_ns, local_ns=local_ns),
content)
def evaluate(match: re.Match, global_ns: dict, local_ns: dict) -> str:
"""Callable for re.sub, returns value of the last Python expression line,
executing all the other lines before.
"""
match = match.group(1).split('\n')
# this trick let the functions in apis.py access our environment
apis.environment = global_ns
# we can't use 'return' in expressions
# instead the last line will be evaluated and returned, the rest executed
to_exec = '\n'.join(match[:-1])
to_eval = match[-1]
try:
if to_exec:
exec(to_exec, global_ns, local_ns)
result = eval(to_eval, global_ns, local_ns)
return str(result)
except:
print(traceback.format_exc(), file=sys.stderr)
return '\n<a real error occured here>\n'
def redirect(match: re.Match, domains: dict) -> str:
"""Callable for re.sub, replaces urls to sites like youtube.com or twitter.com
with their respective privacy-respecting proxy services defined in config section
redirections
"""
this_domain = match.group(2)
if this_domain in domains:
return match.group(1) + domains[this_domain] + match.group(3)
else: return match.group(0)
def quote_html(text: str) -> str:
"""Escape unsafe html characters"""
return text.replace('&', '&amp;').replace('<', '&lt;').replace('"', '&quot;')
def gemtext2html(gemtext: str) -> str:
"""Converts gemtext to html format"""
gemtext = gemtext.split('\n')
html = []
preformat = in_line = False
for line in gemtext:
if match := re.match(r'^```(.*)', line):
if preformat := not preformat:
if match.group(1):
html.append(f'<pre language="{quote_html(match.group(1))}">')
else:
html.append(f'<pre>')
else:
html.append('</pre>')
continue
if preformat:
html.append(quote_html(line))
continue
if not line: continue # skip empty lines
if match := re.match(r'^\*\s+(.*)', line):
if not in_line:
in_line = True
html.append('<ul>')
html.append(f'<li>{quote_html(match.group(1))}</li>')
else:
if in_line:
in_line = False
html.append('</ul>')
if match := re.match(r'^#\s+(.*)', line):
html.append(f'<h1>{quote_html(match.group(1))}</h1>')
elif match := re.match(r'^##\s+(.*)', line):
html.append(f'<h2>{quote_html(match.group(1))}</h2>')
elif match := re.match(r'^###\s+(.*)', line):
html.append(f'<h3>{quote_html(match.group(1))}</h3>')
elif match := re.match(r'^=>\s+(\S+)(\s+.*)?', line):
url = match.group(1)
text = match.group(2).strip() if len(match.groups()) > 2 else None
# rotate file extension
if url.endswith('.gmi') and not re.match(r'^[-\w]+:', url):
url = os.path.splitext(url)[0] + '.html'
# if links points to image, display it instead
if os.path.splitext(url)[1] in ('.jpg', '.png', '.gif'):
if text:
html.append(f'<img src="{quote_html(url)}" alt="{quote_html(text)}">')
else:
html.append(f'<img src="{quote_html(url)}">')
else:
html.append(f'<a href="{quote_html(url)}">{quote_html(text or url)}</a>')
elif match := re.match(r'^>\s*(.*)', line):
html.append(f'<blockquote>{quote_html(match.group(1))}</blockquote>')
else:
html.append(f'<p>{quote_html(line)}</p>')
# close tags
if preformat:
html.append('</code>')
elif in_line:
html.append('</ul>')
return '\n'.join(html)
def abs2rel(match: re.Match, path: str) -> str:
"""Callable for re.sub, converts absolute link to relative to current document (path argument)."""
url = os.path.relpath(match.group(2), start=os.path.dirname(path))
print(' M', url)
return match.group(1) + url
if __name__ == '__main__':
os.chdir(sys.path[0])
link_this('build.py', '../build.py')
link_this('build.py', '../html/build.py')
link_this('../.git', '../html/gemini.git')
# load config
config.read('config.ini')
# scan source files
content_files = scan_dir('content')
static_files = scan_dir('static')
# add static files to the project
for path in static_files:
print('F', path)
if path in content_files:
raise Exception('There is a conflicting static and content file')
link_this('static/'+path, '../'+path)
link_this('static/'+path, '../html/'+path)
# now the actual parsing of content files:
for path in content_files:
print('F', path)
item = Item(path)
# parsing in gemini mode
if not 'mode' in item or item.mode == 'gemini':
# preparing namespace
namespace = namespace_from(
{'path': path},
apis.Content,
config['variables'],
config['variables.gemini'],
item.frontmatter_data,
{'mode': 'gemini'}
)
# evaluation
content = evaluate_this(item.content, namespace)
# determining and applying template (optional)
if template := template_for(path):
tpl_item = Item(template, prefix='templates/')
if not 'mode' in tpl_item or tpl_item.mode == 'gemini':
namespace = namespace_from(
{'path': path},
apis.Content,
config['variables'],
config['variables.gemini'],
item.frontmatter_data,
{'mode': 'gemini', 'content': content}
)
content = evaluate_this(tpl_item.content, namespace)
# do redirections (http/https only!)
if 'redirections' in config:
content = re.sub(r'(https?://)([-\w]+[-\w.]*)(/\S*)?', partial(
redirect,
domains=dict(config['redirections'])
), content)
# convert absolute links to relative (gemtext only!)
if path.endswith('.gmi'):
content = re.sub(r'(=>[ \t]+)/(\S+)', partial(
abs2rel, path=path
), content)
# save results
save_this('../'+path, content)
else:
print(' ! skipped for gemini')
# TODO: delete from gemini output
# parsing in www mode
if not 'mode' in item or item.mode == 'www':
namespace = namespace_from(
{'path': path},
apis.Content,
config['variables'],
config['variables.www'],
item.frontmatter_data,
{'mode': 'www'}
)
content = evaluate_this(item.content, namespace)
if template := template_for(path):
tpl_item = Item(template, prefix='templates/')
if not 'mode' in tpl_item or tpl_item.mode == 'www':
namespace = namespace_from(
{'path': path},
apis.Content,
config['variables'],
config['variables.www'],
item.frontmatter_data,
{'mode': 'www', 'content': content}
)
content = evaluate_this(tpl_item.content, namespace)
# convert to html
html_path = path
if path.endswith('.gmi'):
content = gemtext2html(content)
html_path = os.path.splitext(path)[0]+'.html'
if html_path.endswith('.html'):
# TODO: html template
if template := template_for(html_path):
...
# TODO: redirections, path conversion
save_this('../html/'+html_path, content)
else:
print(' ! skipped for www')
# TODO: delete from www output