xch-blockchain/tools/chialispp.py

357 lines
12 KiB
Python

from __future__ import annotations
import sys
from pathlib import Path
from typing import Any, Dict, List, Optional
# A simple class for separating a line into code and comment
class Line:
def __init__(self, code: List[bytes], comment: Optional[List[bytes]]):
self.code = code
self.comment = comment
# Remove all whitespace from the beginning of a byte array
def trim_ascii_start(line: List[bytes]) -> List[bytes]:
first_non_ws: int = 0
got_one: bool = False
for i, ch in enumerate(line):
if not (ch.decode("ascii").isspace()):
got_one = True
first_non_ws = i
break
if not got_one:
return []
else:
return line[first_non_ws:]
# Remove all whitespace from the end of a byte array
def trim_ascii_end(line: List[bytes]) -> List[bytes]:
last_non_ws: int = 0
got_one: bool = False
for i, ch in enumerate(line):
if (not ch.decode("ascii").isspace()) and ch[0] <= 127:
got_one = True
last_non_ws = i
if not got_one:
return []
else:
return line[0 : last_non_ws + 1]
class Formatter:
def __init__(self) -> None:
self.start_paren_level: int = 0
self.paren_level: int = 0
self.out_col: int = 0 # The colum we are at while outputting a line
self.cur_line: int = 0
self.line: List[bytes] = []
self.comment: Optional[List[bytes]] = None
self.lines: List[List[bytes]] = []
self.work_lines: List[Line] = []
self.getting_form_name: int = 0
self.got_form_on_line: int = 0
self.form_name: List[bytes] = []
self.reset_form_indent: bool = False
# self.def_started = False
self.result_line: List[bytes] = []
# self.definition_starts = []
# self.extra_def_lines = []
self.indent_stack: List[int] = []
self.result: List[List[bytes]] = []
self.config: Dict[str, Any] = {
"gnu_comment_conventions": False,
}
# Add a character of source, breaking the source into lines as we go
def run_char(self, ch: bytes) -> None:
if ch == b"\n":
self.finish_line()
else:
self.line.append(ch)
# Process a single character and add it to the final result
def output_char(self, ch: bytes) -> None:
if ch == b"\n":
self.work_lines.append(Line(self.result_line, self.comment))
self.result_line = []
self.comment = None
self.out_col = 0
else:
self.result_line.append(ch)
self.out_col += 1
# Process a line and add it to the work_lines array
def output_line(self) -> None:
line_indent = self.get_cur_indent()
max_paren_level = self.paren_level
self.start_paren_level = self.paren_level
starting_indent_len = len(self.indent_stack)
if not self.line:
self.output_char(b"\n")
return
# Get a line from the unprocessed lines
line = trim_ascii_end(self.line)
line = trim_ascii_start(line)
self.line.clear()
# Some variables to be aware of whether or not we're in a string literal
in_string = None
string_bs = False # bs == backslash
# Some variables to be aware of whether or not we're in a comment
semis = 0 # number of semi colons starting a comment
semi_off = 0 # the column where the comment starts
comment = [] # The comment byte array
# Main loop to format the line
for i, ch in enumerate(line):
# Track the form name
if self.getting_form_name > 0:
self.reset_form_indent = False
if self.getting_form_name == 1 and not (ch == b" "):
self.getting_form_name = 2
self.form_name.append(ch)
elif self.getting_form_name == 2 and ch in (b" ", b"(", b")"):
self.getting_form_name = 0
self.got_form_on_line = self.cur_line
else:
self.form_name.append(ch)
# if self.start_paren_level == 1 and not self.def_started:
# self.def_started = True
# self.definition_starts.append(len(self.work_lines))
# Special indentation rules for `if`
should_reset_indent = (
self.getting_form_name == 0
and self.form_name == [b"i", b"f"]
and not (ch == b" ")
and not self.reset_form_indent
)
# Be sure to not format string literals as code
if string_bs:
string_bs = False
continue
if in_string is not None:
if ch == b"\\":
string_bs = True
if ch == in_string:
in_string = None
continue
if semis == 0:
# We've entered a string, stop processing
if ch == b"'" or ch == b'"':
in_string = ch
continue
elif ch == b"(":
self.paren_level += 1
if self.paren_level > max_paren_level:
max_paren_level = self.paren_level
if should_reset_indent:
self.reset_indent(line_indent + i)
self.reset_form_indent = True
self.indent_paren()
self.form_name.clear()
self.got_form_on_line = 0
self.getting_form_name = 1
continue
elif ch == b")":
indentation_diff: int = (self.indent_stack[-1] if len(self.indent_stack) > 0 else 0) - (
self.indent_stack[-2] if len(self.indent_stack) > 1 else 0
)
self.retire_indent()
if self.paren_level <= self.start_paren_level:
line_indent -= indentation_diff
self.paren_level -= 1
continue
elif should_reset_indent:
self.reset_indent(line_indent + i)
self.reset_form_indent = True
if ch == b";":
if semis == 0:
semi_off = i
semis += 1
elif semis > 0:
comment = line[i:]
line = trim_ascii_end(line[:semi_off])
break
if semis + semi_off == len(line):
line = trim_ascii_end(line[:semi_off])
line = trim_ascii_end(line)
if semis == 1 and not line and self.config["gnu_comment_conventions"]:
semis = 0
self.comment = comment
comment = []
else:
self.comment = None
if semis > 0:
if semis < 3 or not self.config["gnu_comment_conventions"]:
self.indent(line_indent)
if line and not self.config["gnu_comment_conventions"]:
for co in line:
self.output_char(co)
self.output_char(b" ")
self.output_char(b" ")
for _i in range(semis):
self.output_char(b";")
for co in comment:
self.output_char(co)
if line and self.config["gnu_comment_conventions"]:
# Code after comment in this scenario
self.output_char(b"\n")
self.indent(line_indent)
for co in line:
self.output_char(co)
elif line != []:
self.indent(line_indent)
for co in line:
self.output_char(co)
self.output_char(b"\n")
# We never want the next line to be more indented than us + 1 unit
if len(self.indent_stack) > starting_indent_len + 1:
for i in range(starting_indent_len + 1, len(self.indent_stack)):
self.indent_stack[i] = self.indent_stack[starting_indent_len + 1]
# if max_paren_level > 1 and self.paren_level == 1:
# self.def_started = False
# self.extra_def_lines.append(len(self.work_lines))
# Add our current line to our lines array and reset the line
def finish_line(self) -> None:
self.lines.append(self.line.copy())
self.line.clear()
self.comment = None
def finish(self) -> None:
if self.line:
self.finish_line()
for i in range(len(self.lines)):
self.line = self.lines[i]
self.cur_line = i
self.output_line()
next_handle_line = 0
for i in range(len(self.work_lines)):
if i < next_handle_line:
continue
# Find the max comment spacing needed and output the group.
# Skip if already handled.
comment = self.work_lines[i].comment
if comment is not None:
comment_offset = len(self.work_lines[i].code)
comments = [comment]
for j in range(i + 1, len(self.lines)):
comment = self.work_lines[j].comment
if comment is not None:
comments.append(comment)
comment_offset = max(comment_offset, len(self.work_lines[j].code))
else:
next_handle_line = j
break
for j, comment in enumerate(comments):
line = self.work_lines[i + j].code.copy()
while len(line) < comment_offset:
line.append(b" ")
line.append(b" ")
line.append(b";")
line += comment[:]
self.result.append(line)
else:
self.result.append(self.work_lines[i].code.copy())
# el_idx = 0
# inserted = 0
#
# for ds in self.definition_starts[0:len(self.definition_starts)]:
# while el_idx < len(self.extra_def_lines) and self.extra_def_lines[el_idx] < ds:
# el_idx += 1
#
# if el_idx >= len(self.extra_def_lines):
# break
#
# el = self.extra_def_lines[el_idx]
# if el <= ds + 1:
# insert_at = el + inserted
# self.result.insert(insert_at, [])
# inserted += 1
# We maintain a stack of indentation levels
# The following functions maintain that stack
def indent(self, cur_indent: int) -> None:
while self.out_col < cur_indent:
self.output_char(b" ")
def get_cur_indent(self) -> int:
if self.indent_stack:
return self.indent_stack[-1]
else:
return 0
def reset_indent(self, i: int) -> None:
if self.indent_stack:
self.indent_stack[-1] = i
def indent_paren(self) -> None:
current_indent = self.indent_stack[-1] if self.indent_stack else 0
self.indent_stack.append(current_indent + 2)
def retire_indent(self) -> None:
if self.indent_stack:
self.indent_stack.pop()
def concat_byte_array(bs: List[bytes]) -> bytes:
return b"".join(bs)
def main() -> None:
for arg in sys.argv[1:]:
path = Path(arg)
if path.is_dir():
all_paths = [*path.rglob("*.clsp"), *path.rglob("*.clib")]
else:
all_paths = [path]
for filename in all_paths:
with open(filename, "rb") as f:
filedata = f.read()
formatter = Formatter()
for ch in filedata:
formatter.run_char(bytes([ch]))
formatter.finish()
with open(filename, "wb") as f:
for i, line in enumerate(formatter.result):
f.write(concat_byte_array(line))
f.write(b"\n")
if __name__ == "__main__":
main()