practice/java/BasicInterpreter/src/InterpreterAlpha/Lexer.java

217 lines
6.1 KiB
Java

package InterpreterAlpha;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
enum TokenType {
INTEGER,
REAL,
INTEGER_CONST,
REAL_CONST,
PLUS, // +
MINUS, // -
MUL, // *
INTEGER_DIV, // DIV
FLOAT_DIV, // /
LPAREN, // (
RPAREN, // )
ID,
ASSIGN, // :=
BEGIN,
END,
SEMI, // ;
DOT, // .
PROGRAM,
VAR,
COLON, // :
COMMA, // ,
LOOP,
DO,
EOF,
// EOF token is used to indicate that
// there is no more input left for lexical analysis
}
class Token {
TokenType type;
String value;
Token(TokenType type, String value) {
this.type = type;
this.value = value;
}
}
class Lexer {
String text;
int pos;
char current_char;
private static final Map<String, Token> RESERVED_KEYWORDS = initMap();
// map of constant keywords
private static Map<String, Token> initMap() {
Map<String, Token> map = new HashMap<>();
map.put("PROGRAM", new Token(TokenType.PROGRAM, "PROGRAM"));
map.put("VAR", new Token(TokenType.VAR, "VAR"));
map.put("DIV", new Token(TokenType.INTEGER_DIV, "DIV"));
map.put("INTEGER", new Token(TokenType.INTEGER, "INTEGER"));
map.put("REAL", new Token(TokenType.REAL, "REAL"));
map.put("BEGIN", new Token(TokenType.BEGIN, "BEGIN"));
map.put("END", new Token(TokenType.END, "END"));
map.put("LOOP", new Token(TokenType.LOOP, "LOOP"));
map.put("DO", new Token(TokenType.DO, "DO"));
return Collections.unmodifiableMap(map);
}
Lexer(String text) {
this.text = text;
this.pos = 0;
this.current_char = this.text.charAt(this.pos);
}
void error() throws Exception {
throw new Exception("Invalid character!");
}
// advance the position and set the current_char variable
void advance() {
this.pos++;
if (this.pos > this.text.length() - 1) {
this.current_char = '\0'; // End of input
} else {
this.current_char = this.text.charAt(this.pos);
}
}
// look at the next character
char peek() {
int peek_pos = this.pos + 1;
if (peek_pos > this.text.length() - 1) {
return '\0';
} else {
return this.text.charAt(peek_pos);
}
}
// Return a multi digit integer or real number token (as a String)
Token number() {
StringBuilder result = new StringBuilder();
while (this.current_char != '\0' && Character.isDigit(this.current_char)) {
result.append(this.current_char);
this.advance();
}
if (this.current_char == '.') {
result.append(this.current_char);
this.advance();
while (this.current_char != '\0' && Character.isDigit(this.current_char)) {
result.append(this.current_char);
this.advance();
}
return new Token(TokenType.REAL_CONST, result.toString());
} else
return new Token(TokenType.INTEGER_CONST, result.toString());
}
Token _id() {
StringBuilder result = new StringBuilder();
while (this.current_char != '\0' && Character.isLetterOrDigit(this.current_char)) {
result.append(this.current_char);
this.advance();
}
return RESERVED_KEYWORDS.getOrDefault(result.toString(), new Token(TokenType.ID, result.toString()));
}
// Lexical analyzer
// that breaks a sentence apart into tokens (1 at a time)
Token get_next_token() throws Exception {
while (this.current_char != '\0') {
// skip whitespaces, End of Line, Tab
if (Character.isSpaceChar(this.current_char) || this.current_char == '\n' || this.current_char == '\t') {
this.advance();
continue;
}
// skip comments
if (this.current_char == '{') {
this.advance(); // starting bracket
while (this.current_char != '}') {
this.advance();
}
this.advance(); // closing bracket
continue;
}
if (Character.isLetter(this.current_char)) {
return this._id();
}
if (Character.isDigit(this.current_char)) {
return this.number();
}
if (this.current_char == ':' && this.peek() == '=') {
this.advance();
this.advance();
return new Token(TokenType.ASSIGN, ":=");
}
if (this.current_char == ';') {
this.advance();
return new Token(TokenType.SEMI, ";");
}
if (this.current_char == ':') {
this.advance();
return new Token(TokenType.COLON, ":");
}
if (this.current_char == ',') {
this.advance();
return new Token(TokenType.COMMA, ",");
}
if (this.current_char == '+') {
this.advance();
return new Token(TokenType.PLUS, "+");
}
if (this.current_char == '-') {
this.advance();
return new Token(TokenType.MINUS, "-");
}
if (this.current_char == '*') {
this.advance();
return new Token(TokenType.MUL, "*");
}
if (this.current_char == '/') {
this.advance();
return new Token(TokenType.FLOAT_DIV, "/");
}
if (this.current_char == '(') {
this.advance();
return new Token(TokenType.LPAREN, "(");
}
if (this.current_char == ')') {
this.advance();
return new Token(TokenType.RPAREN, ")");
}
if (this.current_char == '.') {
this.advance();
return new Token(TokenType.DOT, ".");
}
this.error();
}
return new Token(TokenType.EOF, "");
}
}