This repository has been archived on 2024-05-17. You can view files and clone it, but cannot push or open issues or pull requests.
build.py/_src/build.py

331 lines
12 KiB
Python
Executable File
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""build.py
Building system for my gemini capsule with html support
"""
import os
import sys
from fnmatch import fnmatch
import re
from functools import partial
import apis # from itself
import traceback
from item import Item # from itself
from files import save_this, link_this, scan_dir, delete_this # from itself
from config import config # from itself
from bs4 import BeautifulSoup # from beautifulsoup4
from gemtext import GemtextParser # from itself
# regex pre-compilation
REGEX = {
'evaluation': re.compile(r'{{(.*?)}}', re.DOTALL),
'url': re.compile(r'(https?://)([-\w]+[-\w.]*)(/\S*)?'),
'generic_url': re.compile(r'^[\w]+:')
}
def template_for(path: str) -> str:
"""Determine used template for the file
Returns the last matching template from the config or None
"""
global config
if 'templates' not in config:
return None
template = template_data = None # the last matching will be used
for pattern in config['templates'].keys():
if fnmatch(path, pattern):
template = config['templates'][pattern]
return template
def namespace_from(*extensions) -> dict:
"""Generates global namespace for evaluation derrived from provided
extensions, which can be any iterable object or class.
__builtins__ is empty by default, but can be overrided by extension
"""
namespace = {'__builtins__': {}}
for extension in extensions:
if type(extension) == type: # extract items from class
extension = vars(extension)
for i in extension.keys():
if i.startswith('_') and i != '__builtins__': continue
namespace[i] = extension[i]
else: # iterables
namespace.update(extension)
return namespace
def evaluate_this(content: str, global_ns: dict, local_ns: dict = {}) -> str:
"""Finds some inline Python code in {braces} and evaluates it in-place.
global_ns is a dictionary of globals that it can use. It's recommended to
use namespace_from() to generate one.
"""
return REGEX['evaluation'].sub(partial(
evaluate,
global_ns=global_ns,
local_ns=local_ns
), content)
def evaluate(match: re.Match, global_ns: dict, local_ns: dict) -> str:
"""Callable for re.sub, returns value of the last Python expression line,
executing all the other lines before.
group 1: python code
"""
match = match.group(1).split('\n')
# this trick let the functions in apis.py access our environment
apis.environment = global_ns
# we can't use 'return' in expressions
# instead the last line will be evaluated and returned, the rest executed
to_exec = '\n'.join(match[:-1])
to_eval = match[-1]
try:
if to_exec:
exec(to_exec, global_ns, local_ns)
result = eval(to_eval, global_ns, local_ns)
return str(result)
except:
print(traceback.format_exc(), file=sys.stderr)
return '\n<a real error occured here>\n'
def redirect(match: re.Match, domains: dict) -> str:
"""Callable for re.sub, replaces urls to sites like youtube.com or
twitter.com with their respective privacy-respecting proxy services defined
in config section redirections
group 1: http:// or https://
group 2: hostname
group 3: remainder URL
"""
this_domain = match.group(2)
if this_domain in domains:
return match.group(1) + domains[this_domain] + match.group(3)
else: return match.group(0)
def gemtext2html(parser: GemtextParser, rotate_extension=True) -> BeautifulSoup:
"""Converts gemtext to html format"""
soup = BeautifulSoup(features='html.parser')
for item in parser.elements:
if item.type == 'plain':
paragraphs = item.content.split('\n\n')
el = BeautifulSoup(features='html.parser')
for content in paragraphs:
p = soup.new_tag('p')
p.append(content)
el.append(p)
if item.type == 'link':
# put an image if this is a link to image file
if os.path.splitext(item.href)[1] in ('.jpg', '.png', '.gif'):
el = soup.new_tag('img', src=item.href)
if item.label:
el.attrs['alt'] = item.label
else:
href = item.href
# html links typically points to .html files
if rotate_extension and href.endswith('.gmi') and not REGEX['generic_url'].match(href):
href = os.path.splitext(href)[0] + '.html'
el = soup.new_tag('a', href=href)
el.append(item.label or href)
elif item.type == 'preformatted':
el = soup.new_tag('pre')
el.append(item.content)
if item.alt:
el.attrs['title'] = item.alt
elif item.type == 'list':
el = soup.new_tag('ul')
for list_item in item.items:
li = soup.new_tag('li')
li.append(list_item)
el.append(li)
elif item.type == 'head1':
el = soup.new_tag('h1')
el.append(item.content)
elif item.type == 'head2':
el = soup.new_tag('h2')
el.append(item.content)
elif item.type == 'head3':
el = soup.new_tag('h3')
el.append(item.content)
elif item.type == 'quote':
el = soup.new_tag('blockquote')
for line in item.content.split('\n'):
br = soup.new_tag('br')
el.extend((br, line))
el.find('br').decompose()
soup.append(el)
return soup
def convert_href(href: str, path: str) -> str:
"""Redirects URLs and converts paths to relative.
The 'path' argument is a path to the current document the URL appears in.
"""
global config
if href.startswith('/'):
output = os.path.relpath(href[1:], start=os.path.dirname(path))
elif 'redirections' in config:
output = REGEX['url'].sub(partial(
redirect,
domains=dict(config['redirections'])
), href)
else: output = href
# if href != output: print(' M', href, '->', output)
return output
if __name__ == '__main__':
os.chdir(sys.path[0])
link_this('build.py', '../build.py')
link_this('build.py', '../html/build.py')
link_this('../.git', '../html/gemini.git')
# load config
config.read('config.ini')
# scan source files
content_files = scan_dir('content')
static_files = scan_dir('static')
# add static files to the project
for path in static_files:
print('F', path)
if path in content_files:
raise Exception('There is a conflicting static and content file')
link_this('static/'+path, '../'+path)
link_this('static/'+path, '../html/'+path)
# now the actual parsing of content files:
for path in content_files:
print('F', path)
if path.endswith('.gmi'):
html_path = os.path.splitext(path)[0]+'.html'
else:
html_path = path
item = Item(path)
# parsing in gemini mode
if not 'mode' in item or item.mode == 'gemini':
# preparing namespace
namespace = namespace_from(
{'path': path},
apis.Content,
config['variables'],
config['variables.gemini'],
item.frontmatter_data,
{'mode': 'gemini'}
)
# evaluation
content = evaluate_this(item.content, namespace)
# determining and applying template (optional)
if template := template_for(path):
tpl_item = Item(template, prefix='templates/')
if not 'mode' in tpl_item or tpl_item.mode == 'gemini':
namespace = namespace_from(
{'path': path},
apis.Content,
config['variables'],
config['variables.gemini'],
item.frontmatter_data,
{'mode': 'gemini', 'content': content}
)
content = evaluate_this(tpl_item.content, namespace)
# redirections, path conversion
if path.endswith('.gmi'):
parser = GemtextParser(content)
for el in parser.elements:
if el.type == 'link':
el.href = convert_href(el.href, item.path)
content = str(parser)
# save results
save_this('../'+path, content)
else:
print(' ! skipped for gemini')
# delete from gemini output
delete_this('../'+path)
# parsing in www mode
if not 'mode' in item or item.mode == 'www':
namespace = namespace_from(
{'path': path},
apis.Content,
config['variables'],
config['variables.www'],
item.frontmatter_data,
{'mode': 'www'}
)
content = evaluate_this(item.content, namespace)
if template := template_for(path):
tpl_item = Item(template, prefix='templates/')
if not 'mode' in tpl_item or tpl_item.mode == 'www':
namespace = namespace_from(
{'path': path},
apis.Content,
config['variables'],
config['variables.www'],
item.frontmatter_data,
{'mode': 'www', 'content': content}
)
content = evaluate_this(tpl_item.content, namespace)
# convert to html
if path.endswith('.gmi'):
parser = GemtextParser(content)
soup = gemtext2html(parser)
content = soup.prettify()
if html_path.endswith('.html'):
# TODO: html template
# incomplete, yet should function with some primitive templates
if template := template_for(html_path):
tpl_item = Item(template, prefix='templates/')
if not 'mode' in tpl_item or tpl_item.mode == 'www':
namespace = namespace_from(
{'path': path},
apis.Content,
config['variables'],
config['variables.www'],
item.frontmatter_data,
{'mode': 'www', 'content': content}
)
content = evaluate_this(tpl_item.content, namespace)
# redirections, path conversion
soup = BeautifulSoup(content, features='html.parser')
for attr in 'href', 'src', 'action':
for node in soup.css.select('['+attr+']'):
node[attr] = convert_href(node[attr], path)
content = soup.prettify()
save_this('../html/'+html_path, content)
else:
print(' ! skipped for www')
# delete from www output
delete_this('../html/'+html_path)