112 lines
3.8 KiB
Python
112 lines
3.8 KiB
Python
import re
|
|
|
|
line_patterns = {
|
|
'link': re.compile(r'=>(\s+)(\S+)(\s+(.*))?'),
|
|
'preformatted': re.compile(r'```(.*)'),
|
|
'list': re.compile(r'\*\s+(.*)'),
|
|
'head1': re.compile(r'#\s+(.*)'),
|
|
'head2': re.compile(r'##\s+(.*)'),
|
|
'head3': re.compile(r'###\s+(.*)'),
|
|
'quote': re.compile(r'>\s*(.*)')
|
|
}
|
|
|
|
class GemtextParser:
|
|
"""Provides abstract representation of gemtext file, that can be used in
|
|
conversion to html or modification in documents without rexeges.
|
|
"""
|
|
|
|
def __init__(self, content: str):
|
|
# add blank element to refer to as last_item in loop at start
|
|
self.elements = []
|
|
|
|
self.elements.append(GemtextElement(''))
|
|
self.elements[0].type = 'blank'
|
|
|
|
for line in content.split('\n'):
|
|
last_item = self.elements[-1]
|
|
if last_item.type == 'preformatted' and not last_item.closed:
|
|
if line == '```':
|
|
last_item.closed = True
|
|
else:
|
|
last_item.add_line(line)
|
|
else:
|
|
new = GemtextElement(line)
|
|
if last_item.type == new.type:
|
|
if new.type in ('plain', 'quote'):
|
|
last_item.add_line(new.content)
|
|
elif new.type == 'list':
|
|
last_item.items += new.items
|
|
else:
|
|
self.elements.append(new)
|
|
else:
|
|
self.elements.append(new)
|
|
|
|
# remove blank element
|
|
self.elements.pop(0)
|
|
|
|
def __str__(self):
|
|
return '\n'.join(str(x) for x in self.elements)
|
|
|
|
class GemtextElement:
|
|
"""Represents single Gemtext element. It is created from single line and
|
|
extended later.
|
|
"""
|
|
type = 'plain'
|
|
content = None
|
|
|
|
def __init__(self, line: str):
|
|
global line_pattern
|
|
|
|
for type, pattern in line_patterns.items():
|
|
if match := pattern.match(line):
|
|
self.type = type
|
|
|
|
if type == 'link':
|
|
self.href = match.group(2)
|
|
self.label = match.group(4)
|
|
elif type == 'list':
|
|
self.items = [match.group(1)]
|
|
elif type == 'preformatted':
|
|
self.alt = match.group(1) or None
|
|
self.content = ''
|
|
# to distinguish two consecutive preformatted blocks:
|
|
self.closed = False
|
|
else:
|
|
self.content = match.group(1)
|
|
|
|
break # there is no point of testing further
|
|
|
|
if self.type == 'plain':
|
|
self.content = line
|
|
|
|
def add_line(self, line: str):
|
|
if self.content == None: return
|
|
if self.content == '':
|
|
self.content = line or '\n' # add newline on empty line
|
|
else: self.content += '\n'+line
|
|
|
|
def __str__(self):
|
|
"""Gemtext reassembler"""
|
|
|
|
if self.type == 'plain':
|
|
return self.content
|
|
elif self.type == 'link':
|
|
if self.label:
|
|
return f'=> {self.href} {self.label}'
|
|
else:
|
|
return f'=> {self.href}'
|
|
elif self.type == 'preformatted':
|
|
opening = '```'
|
|
if self.alt: opening += self.alt
|
|
return '\n'.join((opening, self.content, '```'))
|
|
elif self.type == 'list':
|
|
return '\n'.join('* '+x for x in self.items)
|
|
elif self.type == 'head1':
|
|
return '# ' + self.content
|
|
elif self.type == 'head2':
|
|
return '## ' + self.content
|
|
elif self.type == 'head3':
|
|
return '### ' + self.content
|
|
elif self.type == 'quote':
|
|
return '\n'.join('> '+x for x in self.content.split('\n'))
|