Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
142 changes: 63 additions & 79 deletions lexer.l
Original file line number Diff line number Diff line change
@@ -1,132 +1,116 @@
import java.util.*;

%%
%class lexer
%class LexerAnalysis
%standalone
%unicode
%type JavaType
%type int
%line
%column
%byaccj


/* main character classes */
LineTerminator = \r|\n|;|\r\n
InputCharacter = [^\r\n]
LineTerminator = \r|\n|\r\n
Letter = [a-zA-Z]
LetterOrDigit = [a-zA-Z0-9]

WhiteSpace = {LineTerminator} | [ \t\f]

/* comments */
Comment = {TraditionalComment} | {EndOfLineComment}
Comment = {TraditionalComment}|{EndOfLineComment}

TraditionalComment = "/*" [^*] ~"*/" | "/*" "*"+ "/"
EndOfLineComment = "//" {InputCharacter}* {LineTerminator}?
TraditionalComment = "/\**\*/"
EndOfLineComment = "//*{LineTerminator}?"

/* identifiers
Identifier = [:jletter:][:jletterdigit:]* */
/* identifiers */
Identifier = {Letter}{LetterOrDigit}*

/* integer literals */
IntegerLiteral = 0 | [1-9][0-9]*
IntegerLiteral = 0 | [1-9][0-9]*

/* Real number literals */
RealLiteral = ({FLit1}|{FLit2}|{FLit3}) {Exponent}? [fF]
RealLiteral = ({FLit1}|{FLit2}|{FLit3}){Exponent}?[fF]

FLit1 = [0-9]+ \. [0-9]*
FLit2 = \. [0-9]+
FLit3 = [0-9]+
Exponent = [eE] [+-]? [0-9]+


/* string literals */
StringCharacter = [^\r\n\"\\]
SingleCharacter = [^\r\n\'\\]
FLit1 = [0-9]+\.[0-9]*
FLit2 = \.[0-9]+
FLit3 = [0-9]+
Exponent = [eE][+-]?[0-9]+

UNKNOWN_TOKEN = .

%state STRING, CHARLITERAL
%state STRING

%%
<YYINITIAL> {

/* keywords */
"is" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return ParserLexer.KEYWORD; }
"var" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return ParserLexer.KEYWORD; }
"if" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return ParserLexer.KEYWORD; }
"then" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return ParserLexer.KEYWORD; }
"then" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return ParserLexer.KEYWORD; }
"else" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return ParserLexer.KEYWORD; }
"end" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return ParserLexer.KEYWORD; }
"while" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return ParserLexer.KEYWORD; }
"for" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return ParserLexer.KEYWORD; }
"loop" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return ParserLexer.KEYWORD; }
"return" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return ParserLexer.KEYWORD; }
"print" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return ParserLexer.KEYWORD; }
"funct" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return ParserLexer.KEYWORD; }
"is" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return Lexer.KW_IS; }
"var" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return Lexer.KW_VAR; }
"if" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return Lexer.KW_IF; }
"then" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return Lexer.KW_THEN; }
"else" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return Lexer.KW_ELSE; }
"end" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return Lexer.KW_END; }
"while" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return Lexer.KW_WHILE; }
"for" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return Lexer.KW_FOR; }
"in" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return Lexer.KW_IN; }
"loop" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return Lexer.KW_LOOP; }
"return" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return Lexer.KW_RETURN; }
"print" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return Lexer.KW_PRINT; }
"func" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return Lexer.KW_FUNC; }


/* boolean literals */
"true" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return ParserLexer.BOOLEAN_LITERAL; }
"false" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return ParserLexer.BOOLEAN_LITERAL; }
"true" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return Lexer.KW_TRUE; }
"false" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return Lexer.KW_FALSE; }


/* separators */
"(" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return ParserLexer.LPAREN; }
")" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return ParserLexer.RPAREN; }
"{" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return ParserLexer.LBRACE; }
"}" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return ParserLexer.RBRACE; }
"[" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return ParserLexer.LBRACK; }
"]" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return ParserLexer.RBRACK; }
";" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return ParserLexer.SEMICOLON; }
"," { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return ParserLexer.COMMA; }
"." { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return ParserLexer.DOT; }
"(" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return Lexer.LPAREN; }
")" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return Lexer.RPAREN; }
"{" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return Lexer.LBRACE; }
"}" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return Lexer.RBRACE; }
"[" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return Lexer.LBRACK; }
"]" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return Lexer.RBRACK; }
";" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return Lexer.SEMICOLON; }
"," { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return Lexer.COMMA; }
"." { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return Lexer.DOT; }

/* operators */
"=" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return ParserLexer.EQ; }
":=" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return ParserLexer.ASSIGN; }
">" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return ParserLexer.GT; }
"<" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return ParserLexer.LT; }
"<=" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return ParserLexer.LTEQ; }
">=" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return ParserLexer.GTEQ; }
"+" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return ParserLexer.PLUS; }
"-" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return ParserLexer.MINUS; }
"*" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return ParserLexer.MULT; }
"/" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return ParserLexer.DIV; }
"=>" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return ParserLexer.EXPR; }
"or" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return ParserLexer.TOKEN_OR; }
"and" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return ParserLexer.TOKEN_AND; }
"xor" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return ParserLexer.TOKEN_XOR; }
"=" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return Lexer.EQ; }
":=" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return Lexer.ASSIGN; }
">" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return Lexer.GT; }
"<" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return Lexer.LT; }
"<=" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return Lexer.LTEQ; }
">=" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return Lexer.GTEQ; }
"+" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return Lexer.PLUS; }
"-" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return Lexer.MINUS; }
"*" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return Lexer.MULT; }
"/" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return Lexer.DIV; }
"or" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return Lexer.OR; }
"and" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return Lexer.AND; }
"xor" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return Lexer.XOR; }
"not" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return Lexer.NOT; }

/* numeric literals */

{IntegerLiteral} { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return ParserLexer.INTEGER_LITERAL; }
{RealLiteral} { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return ParserLexer.REAL_LITERAL; }
{IntegerLiteral} { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return Lexer.INTEGER_LITERAL; }
{RealLiteral} { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return Lexer.REAL_LITERAL; }
{Identifier} { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return Lexer.IDENTIFIER; }

/* comments */
{Comment} { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return ParserLexer.COMMENT; }
{Comment} ;

/* whitespace */
{WhiteSpace} { return ParserLexer.WHITESPACE; }

<STRING> {
\" { yybegin(YYINITIAL); return ParserLexer.STRING }

{StringCharacter}+ { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return ParserLexer.STRING; }

/* escape sequences */
"\\n" { return ParserLexer.STRING; }
"\\\"" { return ParserLexer.STRING; }
"\\'" { return ParserLexer.STRING; }
"\\\\" { return ParserLexer.STRING; }
{WhiteSpace} ;

/* error cases */
\\. { throw new RuntimeException("Illegal escape sequence \""+yytext()+"\""); }
{LineTerminator} { throw new RuntimeException("Unterminated string at end of line"); }
"\"([^\\\"\n\r]*\\[^\n\r])*\"" { System.out.println("[token at line " + yyline + ":" + yycolumn + " = \"" + yytext() + "\"]"); return Lexer.STRING; }
}

/* UNKNOWN TOKENS */
{UNKNOWN_TOKEN} {return ParserLexer.UNKNOWN_TOKEN;}
{UNKNOWN_TOKEN} {return Lexer.UNKNOWN_TOKEN;}

/* error fallback */
[^] { throw new RuntimeException("Illegal character \""+yytext()+"\" at line "+yyline+", column "+yycolumn); }
<<EOF>> { return ParserLexer.EOF; }
<<EOF>> { return Lexer.EOF; }

Loading