Compare commits
3 Commits
066a8167f9
...
172f017090
Author | SHA1 | Date |
---|---|---|
faildev_mode | 172f017090 | |
faildev_mode | f3a48ec9e6 | |
faildev_mode | 3188379c9e |
124
_src/build.py
124
_src/build.py
|
@ -14,8 +14,8 @@ import traceback
|
|||
from item import Item # from itself
|
||||
from files import save_this, link_this, scan_dir, delete_this # from itself
|
||||
from config import config # from itself
|
||||
import html
|
||||
from bs4 import BeautifulSoup # from beautifulsoup4
|
||||
from gemtext import GemtextParser # from itself
|
||||
|
||||
def template_for(path: str) -> str:
|
||||
"""Determine used template for the file
|
||||
|
@ -111,70 +111,65 @@ def abs2rel(match: re.Match, path: str) -> str:
|
|||
print(' M', url)
|
||||
return match.group(1) + url
|
||||
|
||||
def gemtext2html(gemtext: str) -> str:
|
||||
def gemtext2html(parser: GemtextParser, rotate_extension=True) -> BeautifulSoup:
|
||||
"""Converts gemtext to html format"""
|
||||
|
||||
gemtext = gemtext.split('\n')
|
||||
html_data = []
|
||||
soup = BeautifulSoup()
|
||||
|
||||
preformat = in_line = False
|
||||
for line in gemtext:
|
||||
if match := re.match(r'^```(.*)', line):
|
||||
if preformat := not preformat:
|
||||
if match.group(1):
|
||||
html_data.append(f'<pre language="{html.escape(match.group(1))}">')
|
||||
else:
|
||||
html_data.append(f'<pre>')
|
||||
for item in parser.elements:
|
||||
if item.type == 'plain':
|
||||
paragraphs = item.content.split('\n\n')
|
||||
el = BeautifulSoup()
|
||||
for content in paragraphs:
|
||||
p = soup.new_tag('p')
|
||||
p.append(content)
|
||||
el.append(p)
|
||||
|
||||
if item.type == 'link':
|
||||
# put image if it's link to image file
|
||||
if os.path.splitext(item.href)[1] in ('.jpg', '.png', '.gif'):
|
||||
el = soup.new_tag('img', src=item.href)
|
||||
if item.label:
|
||||
el.attrs['alt'] = item.label
|
||||
else:
|
||||
html_data.append('</pre>')
|
||||
continue
|
||||
href = item.href
|
||||
if rotate_extension and href.endswith('.gmi') and not re.match(r'^[\w]+:', href):
|
||||
href = os.path.splitext(href)[0] + '.html'
|
||||
el = soup.new_tag('a', href=href)
|
||||
if item.label:
|
||||
el.append(item.label)
|
||||
|
||||
if preformat:
|
||||
html_data.append(html.escape(line))
|
||||
continue
|
||||
elif item.type == 'preformatted':
|
||||
el = soup.new_tag('pre')
|
||||
el.append(item.content)
|
||||
if el.alt:
|
||||
el.attrs['title'] = el.alt
|
||||
|
||||
if not line: continue # skip empty lines
|
||||
elif item.type == 'list':
|
||||
el = soup.new_tag('ul')
|
||||
for list_item in item.items:
|
||||
li = soup.new_tag('li')
|
||||
li.append(list_item)
|
||||
el.append(li)
|
||||
|
||||
if match := re.match(r'^\*\s+(.*)', line):
|
||||
if not in_line:
|
||||
in_line = True
|
||||
html_data.append('<ul>')
|
||||
html_data.append(f'<li>{html.escape(match.group(1))}</li>')
|
||||
else:
|
||||
if in_line:
|
||||
in_line = False
|
||||
html_data.append('</ul>')
|
||||
if match := re.match(r'^#\s+(.*)', line):
|
||||
html_data.append(f'<h1>{html.escape(match.group(1))}</h1>')
|
||||
elif match := re.match(r'^##\s+(.*)', line):
|
||||
html_data.append(f'<h2>{html.escape(match.group(1))}</h2>')
|
||||
elif match := re.match(r'^###\s+(.*)', line):
|
||||
html_data.append(f'<h3>{html.escape(match.group(1))}</h3>')
|
||||
elif match := re.match(r'^=>\s+(\S+)(\s+.*)?', line):
|
||||
url = match.group(1)
|
||||
text = match.group(2).strip() if match.group(2) else None
|
||||
# rotate file extension
|
||||
if url.endswith('.gmi') and not re.match(r'^[-\w]+:', url):
|
||||
url = os.path.splitext(url)[0] + '.html'
|
||||
# if links points to image, display it instead
|
||||
if os.path.splitext(url)[1] in ('.jpg', '.png', '.gif'):
|
||||
if text:
|
||||
html_data.append(f'<img src="{html.escape(url)}" alt="{html.escape(text)}">')
|
||||
else:
|
||||
html_data.append(f'<img src="{html.escape(url)}">')
|
||||
else:
|
||||
html_data.append(f'<a href="{html.escape(url)}">{html.escape(text or url)}</a>')
|
||||
elif match := re.match(r'^>\s*(.*)', line):
|
||||
html_data.append(f'<blockquote>{html.escape(match.group(1))}</blockquote>')
|
||||
else:
|
||||
html_data.append(f'<p>{html.escape(line)}</p>')
|
||||
# close tags
|
||||
if preformat:
|
||||
html_data.append('</code>')
|
||||
elif in_line:
|
||||
html_data.append('</ul>')
|
||||
|
||||
return '\n'.join(html_data)
|
||||
elif item.type == 'head1':
|
||||
el = soup.new_tag('h1')
|
||||
el.append(item.content)
|
||||
|
||||
elif item.type == 'head2':
|
||||
el = soup.new_tag('h2')
|
||||
el.append(item.content)
|
||||
|
||||
elif item.type == 'head3':
|
||||
el = soup.new_tag('h3')
|
||||
el.append(item.content)
|
||||
|
||||
elif item.type == 'quote':
|
||||
el = soup.new_tag('blockquote')
|
||||
# FIXME: \n to <br>
|
||||
|
||||
soup.append(el)
|
||||
return soup
|
||||
|
||||
def convert_href(href: str, path: str) -> str:
|
||||
"""Redirects URLs and converts paths to relative.
|
||||
|
@ -256,6 +251,7 @@ if __name__ == '__main__':
|
|||
)
|
||||
content = evaluate_this(tpl_item.content, namespace)
|
||||
|
||||
# """
|
||||
# do redirections (http/https only!)
|
||||
if 'redirections' in config:
|
||||
content = re.sub(r'(https?://)([-\w]+[-\w.]*)(/\S*)?', partial(
|
||||
|
@ -268,6 +264,13 @@ if __name__ == '__main__':
|
|||
content = re.sub(r'(=>[ \t]+)/(\S+)', partial(
|
||||
abs2rel, path=path
|
||||
), content)
|
||||
# """
|
||||
|
||||
# redirections, path conversion
|
||||
if path.endswith('.gmi'):
|
||||
for line in content.strip().split('\n'):
|
||||
if line.startswith('=> '):
|
||||
...
|
||||
|
||||
# save results
|
||||
save_this('../'+path, content)
|
||||
|
@ -305,7 +308,9 @@ if __name__ == '__main__':
|
|||
|
||||
# convert to html
|
||||
if path.endswith('.gmi'):
|
||||
content = gemtext2html(content)
|
||||
parser = GemtextParser(content)
|
||||
soup = gemtext2html(parser)
|
||||
content = soup.prettify()
|
||||
|
||||
if html_path.endswith('.html'):
|
||||
# TODO: html template
|
||||
|
@ -313,7 +318,6 @@ if __name__ == '__main__':
|
|||
...
|
||||
|
||||
# redirections, path conversion
|
||||
|
||||
soup = BeautifulSoup(content, features='html.parser')
|
||||
for attr in 'href', 'src', 'action':
|
||||
for node in soup.css.select('['+attr+']'):
|
||||
|
|
|
@ -0,0 +1,108 @@
|
|||
import re
|
||||
|
||||
line_patterns = {
|
||||
'link': re.compile(r'=>(\s+)(\S+)(\s+(.*))?'),
|
||||
'preformatted': re.compile(r'```(.*)'),
|
||||
'list': re.compile(r'\*\s+(.*)'),
|
||||
'head1': re.compile(r'#\s+(.*)'),
|
||||
'head2': re.compile(r'##\s+(.*)'),
|
||||
'head3': re.compile(r'###\s+(.*)'),
|
||||
'quote': re.compile(r'>\s*(.*)')
|
||||
}
|
||||
|
||||
class GemtextParser:
|
||||
"""Provides abstract representation of gemtext file, that can be used in
|
||||
conversion to html or modification in documents without rexeges.
|
||||
"""
|
||||
|
||||
def __init__(self, content: str):
|
||||
# add blank element to refer to as last_item in loop at start
|
||||
self.elements = []
|
||||
|
||||
self.elements.append(GemtextElement(''))
|
||||
self.elements[0].type = 'blank'
|
||||
|
||||
for line in content.split('\n'):
|
||||
last_item = self.elements[-1]
|
||||
if last_item.type == 'preformatted' and not last_item.closed:
|
||||
if line == '```':
|
||||
last_item.closed = True
|
||||
else:
|
||||
last_item.add_line(line)
|
||||
else:
|
||||
new = GemtextElement(line)
|
||||
if last_item.type == new.type:
|
||||
if new.type in ('plain', 'quote'):
|
||||
last_item.add_line(new.content)
|
||||
elif new.type == 'list':
|
||||
last_item.items += new.items
|
||||
else:
|
||||
self.elements.append(new)
|
||||
else:
|
||||
self.elements.append(new)
|
||||
|
||||
# remove blank element
|
||||
self.elements.pop(0)
|
||||
|
||||
def __str__(self):
|
||||
return '\n'.join(str(x) for x in self.elements)
|
||||
|
||||
class GemtextElement:
|
||||
"""Represents single Gemtext element. It is created from single line and
|
||||
extended later.
|
||||
"""
|
||||
type = 'plain'
|
||||
content = None
|
||||
|
||||
def __init__(self, line: str):
|
||||
global line_pattern
|
||||
|
||||
for type, pattern in line_patterns.items():
|
||||
if match := pattern.match(line):
|
||||
self.type = type
|
||||
|
||||
if type == 'link':
|
||||
self.href = match.group(2)
|
||||
self.label = match.group(4)
|
||||
elif type == 'list':
|
||||
self.items = [match.group(1)]
|
||||
elif type == 'preformatted':
|
||||
self.alt = match.group(1) or None
|
||||
self.content = ''
|
||||
# to distinguish two consecutive preformatted blocks:
|
||||
self.closed = False
|
||||
else:
|
||||
self.content = match.group(1)
|
||||
|
||||
break # there is no point of testing further
|
||||
|
||||
if self.type == 'plain':
|
||||
self.content = line
|
||||
|
||||
def add_line(self, line: str):
|
||||
if self.content == None: return
|
||||
if self.content == '':
|
||||
self.content = line or '\n' # add newline on empty line
|
||||
else: self.content += '\n'+line
|
||||
|
||||
def __str__(self):
|
||||
"""Gemtext reassembler"""
|
||||
|
||||
if self.type == 'plain':
|
||||
return self.content
|
||||
elif self.type == 'link':
|
||||
return f'=> {self.href} {self.label}'
|
||||
elif self.type == 'preformatted':
|
||||
opening = '```'
|
||||
if self.alt: opening += self.alt
|
||||
return '\n'.join((opening, self.content, '```'))
|
||||
elif self.type == 'list':
|
||||
return '\n'.join('* '+x for x in self.items)
|
||||
elif self.type == 'head1':
|
||||
return '# ' + self.content
|
||||
elif self.type == 'head2':
|
||||
return '## ' + self.content
|
||||
elif self.type == 'head3':
|
||||
return '### ' + self.content
|
||||
elif self.type == 'quote':
|
||||
return '\n'.join('> '+x for x in self.content.split('\n'))
|
13
_src/item.py
13
_src/item.py
|
@ -8,18 +8,19 @@ class Item(AttrDict):
|
|||
"""This class represents single content file
|
||||
It extracts all frontmatter fields using python-frontmatter module"""
|
||||
|
||||
title = None
|
||||
tags = []
|
||||
description = None
|
||||
source = None
|
||||
author = None
|
||||
|
||||
def __init__(self, path: str, prefix: str = 'content/'):
|
||||
# initialize parent
|
||||
super().__init__()
|
||||
|
||||
self.path, self.prefix = path, prefix
|
||||
|
||||
# initialize common fields
|
||||
self.title = None
|
||||
self.tags = []
|
||||
self.description = None
|
||||
self.source = None
|
||||
self.author = None
|
||||
|
||||
frontmatter_data, content = frontmatter.parse(read_this(prefix + path))
|
||||
self.content = content
|
||||
self.frontmatter_data = frontmatter_data
|
||||
|
|
Reference in New Issue