Renewed gemtext2html function
This commit is contained in:
parent
f3a48ec9e6
commit
172f017090
124
_src/build.py
124
_src/build.py
|
@ -14,8 +14,8 @@ import traceback
|
|||
from item import Item # from itself
|
||||
from files import save_this, link_this, scan_dir, delete_this # from itself
|
||||
from config import config # from itself
|
||||
import html
|
||||
from bs4 import BeautifulSoup # from beautifulsoup4
|
||||
from gemtext import GemtextParser # from itself
|
||||
|
||||
def template_for(path: str) -> str:
|
||||
"""Determine used template for the file
|
||||
|
@ -111,70 +111,65 @@ def abs2rel(match: re.Match, path: str) -> str:
|
|||
print(' M', url)
|
||||
return match.group(1) + url
|
||||
|
||||
def gemtext2html(gemtext: str) -> str:
|
||||
def gemtext2html(parser: GemtextParser, rotate_extension=True) -> BeautifulSoup:
|
||||
"""Converts gemtext to html format"""
|
||||
|
||||
gemtext = gemtext.split('\n')
|
||||
html_data = []
|
||||
soup = BeautifulSoup()
|
||||
|
||||
preformat = in_line = False
|
||||
for line in gemtext:
|
||||
if match := re.match(r'^```(.*)', line):
|
||||
if preformat := not preformat:
|
||||
if match.group(1):
|
||||
html_data.append(f'<pre language="{html.escape(match.group(1))}">')
|
||||
else:
|
||||
html_data.append(f'<pre>')
|
||||
for item in parser.elements:
|
||||
if item.type == 'plain':
|
||||
paragraphs = item.content.split('\n\n')
|
||||
el = BeautifulSoup()
|
||||
for content in paragraphs:
|
||||
p = soup.new_tag('p')
|
||||
p.append(content)
|
||||
el.append(p)
|
||||
|
||||
if item.type == 'link':
|
||||
# put image if it's link to image file
|
||||
if os.path.splitext(item.href)[1] in ('.jpg', '.png', '.gif'):
|
||||
el = soup.new_tag('img', src=item.href)
|
||||
if item.label:
|
||||
el.attrs['alt'] = item.label
|
||||
else:
|
||||
html_data.append('</pre>')
|
||||
continue
|
||||
href = item.href
|
||||
if rotate_extension and href.endswith('.gmi') and not re.match(r'^[\w]+:', href):
|
||||
href = os.path.splitext(href)[0] + '.html'
|
||||
el = soup.new_tag('a', href=href)
|
||||
if item.label:
|
||||
el.append(item.label)
|
||||
|
||||
if preformat:
|
||||
html_data.append(html.escape(line))
|
||||
continue
|
||||
elif item.type == 'preformatted':
|
||||
el = soup.new_tag('pre')
|
||||
el.append(item.content)
|
||||
if el.alt:
|
||||
el.attrs['title'] = el.alt
|
||||
|
||||
if not line: continue # skip empty lines
|
||||
elif item.type == 'list':
|
||||
el = soup.new_tag('ul')
|
||||
for list_item in item.items:
|
||||
li = soup.new_tag('li')
|
||||
li.append(list_item)
|
||||
el.append(li)
|
||||
|
||||
if match := re.match(r'^\*\s+(.*)', line):
|
||||
if not in_line:
|
||||
in_line = True
|
||||
html_data.append('<ul>')
|
||||
html_data.append(f'<li>{html.escape(match.group(1))}</li>')
|
||||
else:
|
||||
if in_line:
|
||||
in_line = False
|
||||
html_data.append('</ul>')
|
||||
if match := re.match(r'^#\s+(.*)', line):
|
||||
html_data.append(f'<h1>{html.escape(match.group(1))}</h1>')
|
||||
elif match := re.match(r'^##\s+(.*)', line):
|
||||
html_data.append(f'<h2>{html.escape(match.group(1))}</h2>')
|
||||
elif match := re.match(r'^###\s+(.*)', line):
|
||||
html_data.append(f'<h3>{html.escape(match.group(1))}</h3>')
|
||||
elif match := re.match(r'^=>\s+(\S+)(\s+.*)?', line):
|
||||
url = match.group(1)
|
||||
text = match.group(2).strip() if match.group(2) else None
|
||||
# rotate file extension
|
||||
if url.endswith('.gmi') and not re.match(r'^[-\w]+:', url):
|
||||
url = os.path.splitext(url)[0] + '.html'
|
||||
# if links points to image, display it instead
|
||||
if os.path.splitext(url)[1] in ('.jpg', '.png', '.gif'):
|
||||
if text:
|
||||
html_data.append(f'<img src="{html.escape(url)}" alt="{html.escape(text)}">')
|
||||
else:
|
||||
html_data.append(f'<img src="{html.escape(url)}">')
|
||||
else:
|
||||
html_data.append(f'<a href="{html.escape(url)}">{html.escape(text or url)}</a>')
|
||||
elif match := re.match(r'^>\s*(.*)', line):
|
||||
html_data.append(f'<blockquote>{html.escape(match.group(1))}</blockquote>')
|
||||
else:
|
||||
html_data.append(f'<p>{html.escape(line)}</p>')
|
||||
# close tags
|
||||
if preformat:
|
||||
html_data.append('</code>')
|
||||
elif in_line:
|
||||
html_data.append('</ul>')
|
||||
|
||||
return '\n'.join(html_data)
|
||||
elif item.type == 'head1':
|
||||
el = soup.new_tag('h1')
|
||||
el.append(item.content)
|
||||
|
||||
elif item.type == 'head2':
|
||||
el = soup.new_tag('h2')
|
||||
el.append(item.content)
|
||||
|
||||
elif item.type == 'head3':
|
||||
el = soup.new_tag('h3')
|
||||
el.append(item.content)
|
||||
|
||||
elif item.type == 'quote':
|
||||
el = soup.new_tag('blockquote')
|
||||
# FIXME: \n to <br>
|
||||
|
||||
soup.append(el)
|
||||
return soup
|
||||
|
||||
def convert_href(href: str, path: str) -> str:
|
||||
"""Redirects URLs and converts paths to relative.
|
||||
|
@ -256,6 +251,7 @@ if __name__ == '__main__':
|
|||
)
|
||||
content = evaluate_this(tpl_item.content, namespace)
|
||||
|
||||
# """
|
||||
# do redirections (http/https only!)
|
||||
if 'redirections' in config:
|
||||
content = re.sub(r'(https?://)([-\w]+[-\w.]*)(/\S*)?', partial(
|
||||
|
@ -268,6 +264,13 @@ if __name__ == '__main__':
|
|||
content = re.sub(r'(=>[ \t]+)/(\S+)', partial(
|
||||
abs2rel, path=path
|
||||
), content)
|
||||
# """
|
||||
|
||||
# redirections, path conversion
|
||||
if path.endswith('.gmi'):
|
||||
for line in content.strip().split('\n'):
|
||||
if line.startswith('=> '):
|
||||
...
|
||||
|
||||
# save results
|
||||
save_this('../'+path, content)
|
||||
|
@ -305,7 +308,9 @@ if __name__ == '__main__':
|
|||
|
||||
# convert to html
|
||||
if path.endswith('.gmi'):
|
||||
content = gemtext2html(content)
|
||||
parser = GemtextParser(content)
|
||||
soup = gemtext2html(parser)
|
||||
content = soup.prettify()
|
||||
|
||||
if html_path.endswith('.html'):
|
||||
# TODO: html template
|
||||
|
@ -313,7 +318,6 @@ if __name__ == '__main__':
|
|||
...
|
||||
|
||||
# redirections, path conversion
|
||||
|
||||
soup = BeautifulSoup(content, features='html.parser')
|
||||
for attr in 'href', 'src', 'action':
|
||||
for node in soup.css.select('['+attr+']'):
|
||||
|
|
|
@ -7,7 +7,7 @@ line_patterns = {
|
|||
'head1': re.compile(r'#\s+(.*)'),
|
||||
'head2': re.compile(r'##\s+(.*)'),
|
||||
'head3': re.compile(r'###\s+(.*)'),
|
||||
'quote': re.compile(r'>\s+(.*)')
|
||||
'quote': re.compile(r'>\s*(.*)')
|
||||
}
|
||||
|
||||
class GemtextParser:
|
||||
|
|
Reference in New Issue