Added gemtext parser
This commit is contained in:
parent
066a8167f9
commit
3188379c9e
|
@ -0,0 +1,82 @@
|
|||
import re
|
||||
|
||||
line_patterns = {
|
||||
'link': re.compile(r'=>(\s+)(\S+)(\s+(.*))?'),
|
||||
'preformatted': re.compile(r'```(.*)'),
|
||||
'list': re.compile(r'\*\s+(.*)'),
|
||||
'head1': re.compile(r'#\s+(.*)'),
|
||||
'head2': re.compile(r'##\s+(.*)'),
|
||||
'head3': re.compile(r'###\s+(.*)'),
|
||||
'quote': re.compile(r'>\s+(.*)')
|
||||
}
|
||||
|
||||
class GemtextParser:
|
||||
"""Provides abstract representation of gemtext file, that can be used in
|
||||
conversion to html"""
|
||||
|
||||
elements = []
|
||||
|
||||
def __init__(self, content: str):
|
||||
# add blank element to refer to as last_item in loop at start
|
||||
self.elements.append(GemtextElement(''))
|
||||
self.elements[0].type = 'blank'
|
||||
|
||||
for line in content.split('\n'):
|
||||
last_item = self.elements[-1]
|
||||
if last_item.type == 'preformatted' and not last_item.closed:
|
||||
if line == '```':
|
||||
last_item.closed = True
|
||||
else:
|
||||
last_item.add_line(line)
|
||||
else:
|
||||
new = GemtextElement(line)
|
||||
if last_item.type == new.type:
|
||||
if new.type in ('plain', 'quote'):
|
||||
last_item.add_line(new.content)
|
||||
elif new.type == 'list':
|
||||
last_item.items += new.items
|
||||
else:
|
||||
self.elements.append(new)
|
||||
else:
|
||||
self.elements.append(new)
|
||||
|
||||
# remove blank element
|
||||
self.elements.pop(0)
|
||||
|
||||
class GemtextElement:
|
||||
"""Represents single Gemtext element. It is created from single line and
|
||||
extended later.
|
||||
"""
|
||||
type = 'plain'
|
||||
content = None
|
||||
|
||||
def __init__(self, line: str):
|
||||
global line_pattern
|
||||
|
||||
for type, pattern in line_patterns.items():
|
||||
if match := pattern.match(line):
|
||||
self.type = type
|
||||
|
||||
if type == 'link':
|
||||
self.href = match.group(2)
|
||||
self.label = match.group(4)
|
||||
elif type == 'list':
|
||||
self.items = [match.group(1)]
|
||||
elif type == 'preformatted':
|
||||
self.alt = match.group(1) or None
|
||||
self.content = ''
|
||||
# to distinguish two consecutive preformatted blocks:
|
||||
self.closed = False
|
||||
else:
|
||||
self.content = match.group(1)
|
||||
|
||||
break # there is no point of testing further
|
||||
|
||||
if self.type == 'plain':
|
||||
self.content = line
|
||||
|
||||
def add_line(self, line: str):
|
||||
if self.content == None: return
|
||||
if self.content == '':
|
||||
self.content = line or '\n' # add newline on empty line
|
||||
else: self.content += '\n'+line
|
Reference in New Issue