利用Java实现简单的词法分析器实例代码
程序员文章站
2024-03-08 22:32:52
首先看下我们要分析的代码段如下:
输出结果如下:
输出结果(a).png
输出结果(b).png
输出结果(c).png
括号里是一个二元...
首先看下我们要分析的代码段如下:
输出结果如下:
输出结果(a).png
输出结果(b).png
输出结果(c).png
括号里是一个二元式:(单词类别编码,单词位置编号)
代码如下:
package yue.lexicalanalyzer; import java.io.*; /* * 主程序 */ public class main { public static void main(string[] args) throws ioexception { lexer lexer = new lexer(); lexer.printtoken(); lexer.printsymbolstable(); } }
package yue.lexicalanalyzer; import java.io.*; import java.util.*; /* * 词法分析并输出 */ public class lexer { /*记录行号*/ public static int line = 1; /*存放最新读入的字符*/ char character = ' '; /*保留字*/ hashtable<string, keyword> keywords = new hashtable<string, keyword>(); /*token序列*/ private arraylist<token> tokens = new arraylist<token>(); /*符号表*/ private arraylist<symbol> symtable = new arraylist<symbol>(); /*读取文件变量*/ bufferedreader reader = null; /*保存当前是否读取到了文件的结尾*/ private boolean isend = false; /* 是否读取到文件的结尾 */ public boolean getreaderstate() { return this.isend; } /*打印tokens序列*/ public void printtoken() throws ioexception { filewriter writer = new filewriter("e:\\lex.txt"); system.out.println("词法分析结果如下:"); system.out.print("杜悦-2015220201031\r\n\n"); writer.write("杜悦-2015220201031\r\n\r\n"); while (getreaderstate() == false) { token tok = scan(); string str = "line " + tok.line + "\t(" + tok.tag + "," + tok.pos + ")\t\t" + tok.name + ": " + tok.tostring() + "\r\n"; writer.write(str); system.out.print(str); } writer.flush(); } /*打印符号表*/ public void printsymbolstable() throws ioexception { filewriter writer = new filewriter("e:\\symtab1.txt"); system.out.print("\r\n\r\n符号表\r\n"); system.out.print("编号\t行号\t名称\r\n"); writer.write("符号表\r\n"); writer.write("编号 " + "\t行号 " + "\t名称 \r\n"); iterator<symbol> e = symtable.iterator(); while (e.hasnext()) { symbol symbol = e.next(); string desc = symbol.pos + "\t" + symbol.line + "\t" + symbol.tostring(); system.out.print(desc + "\r\n"); writer.write(desc + "\r\n"); } writer.flush(); } /*打印错误*/ public void printerror(token tok) throws ioexception{ filewriter writer = new filewriter("e:\\error.txt"); system.out.print("\r\n\r\n错误词法如下:\r\n"); writer.write("错误词法如下:\r\n"); string str = "line " + tok.line + "\t(" + tok.tag + "," + tok.pos + ")\t\t" + tok.name + ": " + tok.tostring() + "\r\n"; writer.write(str); } /*添加保留字*/ void reserve(keyword w) { keywords.put(w.lexme, w); } public lexer() { /*初始化读取文件变量*/ try { reader = new bufferedreader(new filereader("e:\\输入.txt")); } catch (ioexception e) { system.out.print(e); } /*添加保留字*/ this.reserve(keyword.begin); this.reserve(keyword.end); this.reserve(keyword.integer); this.reserve(keyword.function); this.reserve(keyword.read); this.reserve(keyword.write); this.reserve(keyword.aif); this.reserve(keyword.athen); this.reserve(keyword.aelse); } /*按字符读*/ public void readch() throws ioexception { character = (char) reader.read(); if ((int) character == 0xffff) { this.isend = true; } } /*判断是否匹配*/ public boolean readch(char ch) throws ioexception { readch(); if (this.character != ch) { return false; } this.character = ' '; return true; } /*数字的识别*/ public boolean isdigit() throws ioexception { if (character.isdigit(character)) { int value = 0; while (character.isdigit(character)) { value = 10 * value + character.digit(character, 10); readch(); } num n = new num(value); n.line = line; tokens.add(n); return true; } else return false; } /*保留字、标识符的识别*/ public boolean isletter() throws ioexception { if (character.isletter(character)) { stringbuffer sb = new stringbuffer(); /*首先得到整个的一个分割*/ while (character.isletterordigit(character)) { sb.append(character); readch(); } /*判断是保留字还是标识符*/ string s = sb.tostring(); keyword w = keywords.get(s); /*如果是保留字的话,w不应该是空的*/ if (w != null) { w.line = line; tokens.add(w); } else { /*否则就是标识符,此处多出记录标识符编号的语句*/ symbol sy = new symbol(s); symbol mark = sy; //用于标记已存在标识符 boolean isrepeat = false; sy.line = line; for (symbol i : symtable) { if (sy.tostring().equals(i.tostring())) { mark = i; isrepeat = true; } } if (!isrepeat) { sy.pos = symtable.size() + 1; symtable.add(sy); } else if (isrepeat) { sy.pos = mark.pos; } tokens.add(sy); } return true; } else return false; } /*符号的识别*/ public boolean issign() throws ioexception { switch (character) { case '#': readch(); allend.allend.line = line; tokens.add(allend.allend); return true; case '\r': if (readch('\n')) { readch(); lineend.lineend.line = line; tokens.add(lineend.lineend); line++; return true; } case '(': readch(); delimiter.lpar.line = line; tokens.add(delimiter.lpar); return true; case ')': readch(); delimiter.rpar.line = line; tokens.add(delimiter.rpar); return true; case ';': readch(); delimiter.sem.line = line; tokens.add(delimiter.sem); return true; case '+': readch(); calcword.add.line = line; tokens.add(calcword.add); return true; case '-': readch(); calcword.sub.line = line; tokens.add(calcword.sub); return true; case '*': readch(); calcword.mul.line = line; tokens.add(calcword.mul); return true; case '/': readch(); calcword.div.line = line; tokens.add(calcword.div); return true; case ':': if (readch('=')) { readch(); calcword.assign.line = line; tokens.add(calcword.assign); return true; } break; case '>': if (readch('=')) { readch(); calcword.ge.line = line; tokens.add(calcword.ge); return true; } break; case '<': if (readch('=')) { readch(); calcword.le.line = line; tokens.add(calcword.le); return true; } break; case '!': if (readch('=')) { readch(); calcword.ne.line = line; tokens.add(calcword.ne); return true; } break; } return false; } /*下面开始分割关键字,标识符等信息*/ public token scan() throws ioexception { token tok; while (character == ' ') readch(); if (isdigit() || issign() || isletter()) { tok = tokens.get(tokens.size() - 1); } else { tok = new token(character); printerror(tok); } return tok; } }
package yue.lexicalanalyzer; /* * token父类 */ public class token { public final int tag; public int line = 1; public string name = ""; public int pos = 0; public token(int t) { this.tag = t; } public string tostring() { return "" + (char) tag; } }
package yue.lexicalanalyzer; /* * 单词类别赋值 */ public class tag { public final static int begin = 1, //保留字 end = 2, //保留字 integer = 3, //保留字 function = 4, //保留字 read = 5, //保留字 write = 6, //保留字 if = 7, //保留字 then = 8, //保留字 else = 9, //保留字 symbol = 11, //标识符 constant = 12, //常数 add = 13, //运算符 "+" sub = 14, //运算符 "-" mul = 15, //运算符 "*" div = 16, //运算符 "/" le = 18, //运算符 "<=" ge = 19, //运算符 ">=" ne = 20, //运算符 "!=" assign = 23, //运算符 ":=" lpar = 24, //界符 "(" rpar = 25, //界符 ")" sem = 26, //界符 ";" line_end = 27, //行尾符 all_end = 28; //结尾符 "#" }
package yue.lexicalanalyzer; /** * 保留字 */ public class keyword extends token { public string lexme = ""; public keyword(string s, int t) { super(t); this.lexme = s; this.name = "保留字"; } public string tostring() { return this.lexme; } public static final keyword begin = new keyword("begin", tag.begin), end = new keyword("end", tag.end), integer = new keyword("integer", tag.integer), function = new keyword("function", tag.function), read = new keyword("read", tag.read), write = new keyword("write", tag.write), aif = new keyword("if", tag.if), athen = new keyword("then", tag.then), aelse = new keyword("else", tag.else); }
package yue.lexicalanalyzer; /* * 标识符 */ public class symbol extends token { public string lexme = ""; public symbol(string s) { super(tag.symbol); this.lexme = s; this.name = "标识符"; } public string tostring() { return this.lexme; } }
package yue.lexicalanalyzer; /** * 运算符 */ public class calcword extends token { public string lexme = ""; public calcword(string s, int t) { super(t); this.lexme = s; this.name = "运算符"; } public string tostring() { return this.lexme; } public static final calcword add = new calcword("+", tag.add), sub = new calcword("-", tag.sub), mul = new calcword("*", tag.mul), div = new calcword("/", tag.div), le = new calcword("<=", tag.le), ge = new calcword(">=", tag.ge), ne = new calcword("!=", tag.ne), assign = new calcword(":=", tag.assign); }
package yue.lexicalanalyzer; /** * 界符 */ public class delimiter extends token { public string lexme = ""; public delimiter(string s, int t) { super(t); this.lexme = s; this.name = "界符"; } public string tostring() { return this.lexme; } public static final delimiter lpar = new delimiter("(", tag.lpar), rpar = new delimiter(")", tag.rpar), sem = new delimiter(";", tag.sem); }
package yue.lexicalanalyzer; /* * 常数 */ public class num extends token { public final int value; public num(int v) { super(tag.constant); this.value = v; this.name = "常数"; } public string tostring() { return "" + value; } }
package yue.lexicalanalyzer; /** * 行尾符 */ public class lineend extends token { public string lexme = ""; public lineend(string s) { super(tag.line_end); this.lexme = s; this.name = "行尾符"; } public string tostring() { return this.lexme; } public static final lineend lineend = new lineend("\r\n"); }
package yue.lexicalanalyzer; /** * 结尾符 */ public class allend extends token { public string lexme = ""; public allend(string s) { super(tag.all_end); this.lexme = s; this.name = "结尾符"; } public string tostring() { return this.lexme; } public static final allend allend = new allend("#"); }
总结
以上就睡这篇文章的全部内容了,希望本文的内容对大家的学习或者工作能带来一定的帮助,如果有疑问大家可以留言交流。