欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页  >  IT编程

利用Java实现简单的词法分析器实例代码

程序员文章站 2024-03-08 22:32:52
首先看下我们要分析的代码段如下: 输出结果如下: 输出结果(a).png 输出结果(b).png 输出结果(c).png 括号里是一个二元...

首先看下我们要分析的代码段如下:

利用Java实现简单的词法分析器实例代码

输出结果如下:

利用Java实现简单的词法分析器实例代码
输出结果(a).png

利用Java实现简单的词法分析器实例代码

输出结果(b).png

利用Java实现简单的词法分析器实例代码

输出结果(c).png

括号里是一个二元式:(单词类别编码,单词位置编号)

代码如下:

package yue.lexicalanalyzer;

import java.io.*;

/*
 * 主程序
 */
public class main {
  public static void main(string[] args) throws ioexception {
    lexer lexer = new lexer();
    lexer.printtoken();
    lexer.printsymbolstable();
  }
}
package yue.lexicalanalyzer;

import java.io.*;
import java.util.*;

/*
 * 词法分析并输出
 */
public class lexer {
  /*记录行号*/
  public static int line = 1;
  /*存放最新读入的字符*/
  char character = ' ';

  /*保留字*/
  hashtable<string, keyword> keywords = new hashtable<string, keyword>();
  /*token序列*/
  private arraylist<token> tokens = new arraylist<token>();
  /*符号表*/
  private arraylist<symbol> symtable = new arraylist<symbol>();

  /*读取文件变量*/
  bufferedreader reader = null;
  /*保存当前是否读取到了文件的结尾*/
  private boolean isend = false;

  /* 是否读取到文件的结尾 */
  public boolean getreaderstate() {
    return this.isend;
  }

  /*打印tokens序列*/
  public void printtoken() throws ioexception {
    filewriter writer = new filewriter("e:\\lex.txt");
    system.out.println("词法分析结果如下:");
    system.out.print("杜悦-2015220201031\r\n\n");
    writer.write("杜悦-2015220201031\r\n\r\n");
    while (getreaderstate() == false) {
      token tok = scan();
      string str = "line " + tok.line + "\t(" + tok.tag + "," + tok.pos + ")\t\t"
          + tok.name + ": " + tok.tostring() + "\r\n";
      writer.write(str);
      system.out.print(str);
    }
    writer.flush();

  }

  /*打印符号表*/
  public void printsymbolstable() throws ioexception {
    filewriter writer = new filewriter("e:\\symtab1.txt");
    system.out.print("\r\n\r\n符号表\r\n");
    system.out.print("编号\t行号\t名称\r\n");
    writer.write("符号表\r\n");
    writer.write("编号 " + "\t行号 " + "\t名称 \r\n");
    iterator<symbol> e = symtable.iterator();
    while (e.hasnext()) {
      symbol symbol = e.next();
      string desc = symbol.pos + "\t" + symbol.line + "\t" + symbol.tostring();
      system.out.print(desc + "\r\n");
      writer.write(desc + "\r\n");
    }

    writer.flush();
  }

  /*打印错误*/
  public void printerror(token tok) throws ioexception{
    filewriter writer = new filewriter("e:\\error.txt");
    system.out.print("\r\n\r\n错误词法如下:\r\n");
    writer.write("错误词法如下:\r\n");
    string str = "line " + tok.line + "\t(" + tok.tag + "," + tok.pos + ")\t\t"
        + tok.name + ": " + tok.tostring() + "\r\n";
    writer.write(str);
  }

  /*添加保留字*/
  void reserve(keyword w) {
    keywords.put(w.lexme, w);
  }

  public lexer() {
    /*初始化读取文件变量*/
    try {
      reader = new bufferedreader(new filereader("e:\\输入.txt"));
    } catch (ioexception e) {
      system.out.print(e);
    }

    /*添加保留字*/
    this.reserve(keyword.begin);
    this.reserve(keyword.end);
    this.reserve(keyword.integer);
    this.reserve(keyword.function);
    this.reserve(keyword.read);
    this.reserve(keyword.write);
    this.reserve(keyword.aif);
    this.reserve(keyword.athen);
    this.reserve(keyword.aelse);
  }

  /*按字符读*/
  public void readch() throws ioexception {
    character = (char) reader.read();
    if ((int) character == 0xffff) {
      this.isend = true;
    }
  }

  /*判断是否匹配*/
  public boolean readch(char ch) throws ioexception {
    readch();
    if (this.character != ch) {
      return false;
    }

    this.character = ' ';
    return true;
  }

  /*数字的识别*/
  public boolean isdigit() throws ioexception {
    if (character.isdigit(character)) {
      int value = 0;
      while (character.isdigit(character)) {
        value = 10 * value + character.digit(character, 10);
        readch();
      }

      num n = new num(value);
      n.line = line;
      tokens.add(n);
      return true;
    } else
      return false;
  }

  /*保留字、标识符的识别*/
  public boolean isletter() throws ioexception {
    if (character.isletter(character)) {
      stringbuffer sb = new stringbuffer();

      /*首先得到整个的一个分割*/
      while (character.isletterordigit(character)) {
        sb.append(character);
        readch();
      }

      /*判断是保留字还是标识符*/
      string s = sb.tostring();
      keyword w = keywords.get(s);

      /*如果是保留字的话,w不应该是空的*/
      if (w != null) {
        w.line = line;
        tokens.add(w);
      } else {
        /*否则就是标识符,此处多出记录标识符编号的语句*/
        symbol sy = new symbol(s);
        symbol mark = sy;      //用于标记已存在标识符
        boolean isrepeat = false;
        sy.line = line;
        for (symbol i : symtable) {
          if (sy.tostring().equals(i.tostring())) {
            mark = i;
            isrepeat = true;
          }
        }
        if (!isrepeat) {
          sy.pos = symtable.size() + 1;
          symtable.add(sy);
        } else if (isrepeat) {
          sy.pos = mark.pos;
        }
        tokens.add(sy);
      }
      return true;
    } else
      return false;
  }

  /*符号的识别*/
  public boolean issign() throws ioexception {
    switch (character) {
      case '#':
        readch();
        allend.allend.line = line;
        tokens.add(allend.allend);
        return true;
      case '\r':
        if (readch('\n')) {
          readch();
          lineend.lineend.line = line;
          tokens.add(lineend.lineend);
          line++;
          return true;
        }
      case '(':
        readch();
        delimiter.lpar.line = line;
        tokens.add(delimiter.lpar);
        return true;
      case ')':
        readch();
        delimiter.rpar.line = line;
        tokens.add(delimiter.rpar);
        return true;
      case ';':
        readch();
        delimiter.sem.line = line;
        tokens.add(delimiter.sem);
        return true;
      case '+':
        readch();
        calcword.add.line = line;
        tokens.add(calcword.add);
        return true;
      case '-':
        readch();
        calcword.sub.line = line;
        tokens.add(calcword.sub);
        return true;
      case '*':
        readch();
        calcword.mul.line = line;
        tokens.add(calcword.mul);
        return true;
      case '/':
        readch();
        calcword.div.line = line;
        tokens.add(calcword.div);
        return true;
      case ':':
        if (readch('=')) {
          readch();
          calcword.assign.line = line;
          tokens.add(calcword.assign);
          return true;
        }
        break;
      case '>':
        if (readch('=')) {
          readch();
          calcword.ge.line = line;
          tokens.add(calcword.ge);
          return true;
        }
        break;
      case '<':
        if (readch('=')) {
          readch();
          calcword.le.line = line;
          tokens.add(calcword.le);
          return true;
        }
        break;
      case '!':
        if (readch('=')) {
          readch();
          calcword.ne.line = line;
          tokens.add(calcword.ne);
          return true;
        }
        break;
    }
    return false;
  }


  /*下面开始分割关键字,标识符等信息*/
  public token scan() throws ioexception {
    token tok;
    while (character == ' ')
      readch();
    if (isdigit() || issign() || isletter()) {
      tok = tokens.get(tokens.size() - 1);
    } else {
      tok = new token(character);
      printerror(tok);
    }
    return tok;
  }
}
package yue.lexicalanalyzer;

/*
 * token父类
 */
public class token {
  public final int tag;
  public int line = 1;
  public string name = "";
  public int pos = 0;

  public token(int t) {
    this.tag = t;
  }

  public string tostring() {
    return "" + (char) tag;
  }

}
package yue.lexicalanalyzer;

/*
 * 单词类别赋值
 */
public class tag {
  public final static int
      begin = 1,     //保留字
      end = 2,      //保留字
      integer = 3,    //保留字
      function = 4,    //保留字
      read = 5,      //保留字
      write = 6,     //保留字
      if = 7,       //保留字
      then = 8,      //保留字
      else = 9,      //保留字
      symbol = 11,    //标识符
      constant = 12,   //常数
      add = 13,      //运算符 "+"
      sub = 14,      //运算符 "-"
      mul = 15,      //运算符 "*"
      div = 16,      //运算符 "/"
      le = 18,      //运算符 "<="
      ge = 19,      //运算符 ">="
      ne = 20,      //运算符 "!="
      assign = 23,    //运算符 ":="
      lpar = 24,     //界符 "("
      rpar = 25,     //界符 ")"
      sem = 26,      //界符 ";"
      line_end = 27,   //行尾符
      all_end = 28;    //结尾符 "#"
}
package yue.lexicalanalyzer;

/**
 * 保留字
 */
public class keyword extends token {
  public string lexme = "";

  public keyword(string s, int t) {
    super(t);
    this.lexme = s;
    this.name = "保留字";
  }

  public string tostring() {
    return this.lexme;
  }

  public static final keyword
      begin = new keyword("begin", tag.begin),
      end = new keyword("end", tag.end),
      integer = new keyword("integer", tag.integer),
      function = new keyword("function", tag.function),
      read = new keyword("read", tag.read),
      write = new keyword("write", tag.write),
      aif = new keyword("if", tag.if),
      athen = new keyword("then", tag.then),
      aelse = new keyword("else", tag.else);
}
package yue.lexicalanalyzer;

/*
 * 标识符
 */
public class symbol extends token {
  public string lexme = "";

  public symbol(string s) {
    super(tag.symbol);
    this.lexme = s;
    this.name = "标识符";
  }

  public string tostring() {
    return this.lexme;
  }

}
package yue.lexicalanalyzer;

/**
 * 运算符
 */
public class calcword extends token {
  public string lexme = "";

  public calcword(string s, int t) {
    super(t);
    this.lexme = s;
    this.name = "运算符";
  }

  public string tostring() {
    return this.lexme;
  }

  public static final calcword
      add = new calcword("+", tag.add),
      sub = new calcword("-", tag.sub),
      mul = new calcword("*", tag.mul),
      div = new calcword("/", tag.div),
      le = new calcword("<=", tag.le),
      ge = new calcword(">=", tag.ge),
      ne = new calcword("!=", tag.ne),
      assign = new calcword(":=", tag.assign);
}
package yue.lexicalanalyzer;

/**
 * 界符
 */
public class delimiter extends token {
  public string lexme = "";

  public delimiter(string s, int t) {
    super(t);
    this.lexme = s;
    this.name = "界符";
  }

  public string tostring() {
    return this.lexme;
  }

  public static final delimiter
      lpar = new delimiter("(", tag.lpar),
      rpar = new delimiter(")", tag.rpar),
      sem = new delimiter(";", tag.sem);
}
package yue.lexicalanalyzer;

/*
 * 常数
 */
public class num extends token {
  public final int value;

  public num(int v) {
    super(tag.constant);
    this.value = v;
    this.name = "常数";
  }

  public string tostring() {
    return "" + value;
  }
}
package yue.lexicalanalyzer;

/**
 * 行尾符
 */
public class lineend extends token {
  public string lexme = "";

  public lineend(string s) {
    super(tag.line_end);
    this.lexme = s;
    this.name = "行尾符";
  }

  public string tostring() {
    return this.lexme;
  }

  public static final lineend lineend = new lineend("\r\n");
}
package yue.lexicalanalyzer;

/**
 * 结尾符
 */
public class allend extends token {
  public string lexme = "";

  public allend(string s) {
    super(tag.all_end);
    this.lexme = s;
    this.name = "结尾符";
  }

  public string tostring() {
    return this.lexme;
  }

  public static final allend allend = new allend("#");
}

总结

以上就睡这篇文章的全部内容了,希望本文的内容对大家的学习或者工作能带来一定的帮助,如果有疑问大家可以留言交流。