欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页  >  IT编程

c编译器的实现,利用flex实现字符串的识别,利用bison实现ast语法树的构建

程序员文章站 2024-01-02 14:43:40
无意中在github上发现一个很有意思的项目。它利用flex实现了字符串的识别,利用bison实现了ast语法树的构建,最后直接利用ast进行计算和识别。 比如,它的lex文件是...

无意中在github上发现一个很有意思的项目。它利用flex实现了字符串的识别,利用bison实现了ast语法树的构建,最后直接利用ast进行计算和识别。

比如,它的lex文件是这么写的,

/***********************************/
/* File: cmm.l    version 2.0      */
/* Flex version for CMM            */
/* CMM Interpreter Construction    */
/***********************************/
%option noyywrap
%{
#include "globals.h"
#include "util.h"
#include "scan.h"
#include "cmm.tab.h"
%}

digit		[0-9]
int_num		{digit}+
real_num	{digit}+"."{digit}*
letter		[a-zA-Z]
identifier	{letter}+({letter}|{digit}|_)*({letter}+|{digit}+)|{letter}+
whitespace	[ \t]*

%%

"if"		{ return IF;}
"else"		{ return ELSE;}
"while"		{ return WHILE;}
"int"		{ yylval.dataType = Int; return INT;} 
"real"		{ yylval.dataType = Real; return REAL;}
"bool"		{ yylval.dataType = Bool; return BOOL;}
"read"		{ return READ;}
"write"		{ return WRITE;}
"main"		{ return MAIN;}

{int_num}		{ yylval.intval = atoi(yytext); return INT_VALUE;}
{real_num}		{ yylval.realval = atof(yytext); return REAL_VALUE;}
{identifier}	{ yylval.idName = copyString(yytext); return ID;}
{whitespace}	{/*Do nothing.*/}

"+"		{ return PLUS;}
"-"		{ return SUB;}
"*"		{ return MUL;}
"/"		{ return DIV;}
"<"		{ yylval.binaryOperator = 1; return REL_OP;}
">"		{ yylval.binaryOperator = 2; return REL_OP;}
"<="	{ yylval.binaryOperator = 3; return REL_OP;}
">="	{ yylval.binaryOperator = 4; return REL_OP;}
"=="	{ yylval.binaryOperator = 5; return REL_OP;}
"<>"	{ yylval.binaryOperator = 6; return REL_OP;}
"&&"	{ yylval.binaryOperator = 7; return REL_OP;}
"||"	{ yylval.binaryOperator = 8; return REL_OP;}

"="		{ return ASSIGN;}
"("		{ return LPAREN;}
")"		{ return RPAREN;}
";"		{ return SEMI;}
"{"		{ return LBRACE;}
"}"		{ return RBRACE;}
"["		{ return LBRACKET;}
"]"		{ return RBRACKET;}
","		{ return COMMA;}

"//"	{ char c = input();
		  while(c != '\n')
		  {	if (c == EOF) break;
		  	c = input();
		  }
		  lineno++;
		}
"/*"	{	char c;
			int flag = 1;
			do
			{	c = input();
				entry1:
				if (c == EOF) break;
				if (c == '\n') lineno++;
				if (c == '*')
				{	c = input();
					if (c == '/')
						flag = 0;
					else
						goto entry1;
				}
			} while (flag);
		}
\n		{lineno++;}
.		{yyerror("Mystery character %s\n", yytext);return ERROR;}

%%
/* 用于语法分析时初始化词法分析接口 */
void iniLexer(void)
{
	static int firstTime = TRUE;
	lineno = 0;
	if (firstTime)
	{	firstTime = FALSE;
		lineno++;
		yyin = source;
		yyout = listing;
	}
}
/* 词法分析器专用 
TokenType getToken(void)
{	static int firstTime = TRUE;
	TokenType currentToken;
	if (firstTime)
	{	firstTime = FALSE;
		lineno++;
		yyin = source;
		yyout = listing;
	}
	currentToken = yylex();
	strncpy(tokenString,yytext,MAXTOKENLEN);
	if (TraceScan) {
		fprintf(listing,"\t%d: ",lineno);
		printToken(currentToken);
	}
	return currentToken;
}*/

它的yacc文件是这么写的,

/***********************************/
/* File: cmm.y                     */
/* Bison grammar file about CMM    */
/* CMM Interpreter Construction    */
/***********************************/

/*预计有1个移进/归约冲突*/
%expect 1
%{

#include "globals.h"
#include "util.h"
#include "scan.h"
#include "parse.h"

static TreeNode * savedTree; /* stores syntax tree for later return */

%}

%union{
	struct treeNode * node;
	int intval;
	double realval;
	char * idName;
	int binaryOperator;
	int dataType;
}
%token  INT_VALUE
%token  REAL_VALUE
%token  ID
%token  INT REAL BOOL

/* 优先级声明 */
%right ASSIGN
%left PLUS SUB
%left MUL DIV
%nonassoc  REL_OP
%nonassoc UMINUS
/* 声明文法中用到的tokens */
%token IF ELSE WHILE READ WRITE MAIN
%token LPAREN RPAREN SEMI LBRACE RBRACE LBRACKET RBRACKET COMMA
%token ASSIGN
%token NEWLINE ERROR

%type  stmt_list stmt
%type  if_stmt decl_stmt compound_stmt while_stmt assign_stmt read_stmt write_stmt
%type  exp factor bin_exp
%type  type_spec

%start program

%% /* CMM文法 */

program		: stmt_list
				{ savedTree = $1;}
			;
stmt_list	:	{ $$ = NULL; }
			| stmt_list stmt 
				{	TreeNode * t = $1;
					if (t != NULL)
					{	while (t->sibling != NULL){ t = t->sibling;}
						t->sibling = $2;
						$$ = $1;
					}
					else $$ = $2;
				}
			;
stmt		: if_stmt { $$ = $1; }
			| decl_stmt SEMI { $$ = $1; }
			| compound_stmt { $$ = $1; }
			| while_stmt { $$ = $1; }
			| assign_stmt SEMI { $$ = $1; }
			| read_stmt SEMI { $$ = $1; }
			| write_stmt SEMI { $$ = $1; }
			| error  { $$ = NULL; }
			;
compound_stmt
			: LBRACE stmt_list RBRACE
				{	$$ = newStmtNode(CompoundK);
					$$->child[0] = $2;
					$$->lineno = lineno;
				}
			;
decl_stmt	: type_spec ID 
				{	$$ = newStmtNode(DeclK);
					$$->attr.name = $2;
					/* 数组长度为0代表非数组 */
					$$->arrayLength = 0;
					$$->type = $1;
					$$->lineno = lineno;
				}
			| type_spec ID LBRACKET INT_VALUE RBRACKET
				{	$$ = newStmtNode(DeclK);
					$$->attr.name = $2;
					$$->arrayLength = $4;
					$$->type = $1;
					if($$->type == Int) {
						int int_array_temp[$4];
						$$->array.intArray = int_array_temp;
					}
					else if($$->type == Real) {
						double real_array_temp[$4];
						$$->array.realArray = real_array_temp;
					}
					$$->lineno = lineno;
				}
			;
type_spec	: INT 	{ $$ = $1; }
			| REAL 	{ $$ = $1; }
			| BOOL	{ $$ = $1; }
			;
if_stmt     : IF LPAREN bin_exp RPAREN stmt
				{	$$ = newStmtNode(IfK);
					$$->child[0] = $3;
					$$->child[1] = $5;
					$$->child[2] = NULL;
					$$->lineno = lineno;
				}
			| IF LPAREN bin_exp RPAREN stmt ELSE stmt
				{	$$ = newStmtNode(IfK);
					$$->child[0] = $3;
					$$->child[1] = $5;
					$$->child[2] = $7;
					$$->lineno = lineno;
				}
			;
while_stmt	: WHILE LPAREN bin_exp RPAREN stmt
				{	$$ = newStmtNode(WhileK);
					$$->child[0] = $3;
					$$->child[1] = $5;
					$$->lineno = lineno;
				}
			;
assign_stmt : ID ASSIGN exp
				{	$$ = newStmtNode(AssignK);
					$$->attr.name = $1;
					$$->child[0] = $3;
					$$->child[1] = NULL;
					$$->lineno = lineno;
				}
			| ID LBRACKET exp RBRACKET ASSIGN exp
				{	$$ = newStmtNode(AssignK);
					$$->attr.name = $1;
					/* child[1]不为NULL 表示引用的变量是一个数组元素 */
					/* child[0] 是数组元素的下标 */
					$$->child[0] = $3;
					$$->child[1] = $6;
					$$->lineno = lineno;
				}
			;
read_stmt   : READ LPAREN ID RPAREN
				{	$$ = newStmtNode(ReadK);
					$$->attr.name = $3;
					$$->child[0] = NULL;
					$$->lineno = lineno;
				}
			| READ LPAREN ID LBRACKET exp RBRACKET RPAREN
				{	$$ = newStmtNode(ReadK);
					$$->attr.name = $3;
					$$->child[0] = $5;
					$$->lineno = lineno;
				}
			;
write_stmt  : WRITE LPAREN exp RPAREN
				{	$$ = newStmtNode(WriteK);
					$$->child[0] = $3;
					$$->lineno = lineno;
				}
			;
exp			: factor
				{	$$ = $1; }
			| bin_exp
				{	$$ = $1; }
			;
factor		: INT_VALUE
				{	$$ = newExpNode(IntValueK);
					$$->value.int_val = $1;
					$$->type = Int;
					$$->lineno = lineno;
				}
			| REAL_VALUE
				{	$$ = newExpNode(RealValueK);
					$$->value.real_val = $1;
					$$->type = Real;
					$$->lineno = lineno;
				}
			| LPAREN exp RPAREN
				{	$$ = $2; }
			| ID
				{	$$ = newExpNode(IdK);
					$$->attr.name = $1;
					/* child[0]为NULL代表该变量不是数组,用作解释时 */
					$$->child[0] = NULL;
					$$->lineno = lineno;
				}
			| ID LBRACKET exp RBRACKET
				{	$$ = newExpNode(IdK);
					$$->attr.name = $1;
					$$->child[0] = $3;
					$$->lineno = lineno;
				}
			| error { $$ = NULL; }
			;
bin_exp     : /* 关系运算符 */
			  exp REL_OP exp
				{	$$ = newExpNode(OpK);
					$$->child[0] = $1;
					$$->child[1] = $3;
					$$->attr.op = $2;
					$$->lineno = lineno;
				}
			/* 算数运算符 */
			| exp PLUS exp
				{	$$ = newExpNode(OpK);
					$$->child[0] = $1;
					$$->child[1] = $3;
					$$->attr.op = PLUS;
					$$->lineno = lineno;
				}
			| exp SUB exp
				{	$$ = newExpNode(OpK);
					$$->child[0] = $1;
					$$->child[1] = $3;
					$$->attr.op = SUB;
					$$->lineno = lineno;
				}
			| exp MUL exp
				{	$$ = newExpNode(OpK);
					$$->child[0] = $1;
					$$->child[1] = $3;
					$$->attr.op = MUL;
					$$->lineno = lineno;
				}
			| exp DIV exp
				{	$$ = newExpNode(OpK);
					$$->child[0] = $1;
					$$->child[1] = $3;
					$$->attr.op = DIV;
					$$->lineno = lineno;
				}
			| SUB exp %prec UMINUS
				{	$$ = newExpNode(OpK);
					$$->child[0] = $2;
					$$->child[1] = NULL;
					$$->attr.op = UMINUS;
					$$->lineno = lineno;
				}
			;

%%

int yyerror(char * message)
{	fprintf(listing,"Syntax error at line %d: %s\n",lineno,message);
	fprintf(listing,"Current token: %s",tokenString);
	printToken(yychar);
	Error = TRUE;
	return 0;
}
/* 与主函数交互的语法分析函数 */
TreeNode * parse(void)
{	iniLexer();
	yyparse();
	return savedTree;
}

编译也非常简单,

#The makefile for interpreter based on Flex and Bison
CC = gcc
FLAG = -w
interpreter: main.o util.o cmm.tab.o cmm.lex.o symtab.o analyze.o
	$(CC) $(FLAG) -o ../bin\&test/interpreter main.o util.o cmm.tab.o cmm.lex.o symtab.o analyze.o
main.o: main.c parse.h analyze.h
	$(CC) $(FLAG) -c main.c
cmm.tab.o:cmm.tab.c parse.h
	$(CC) $(FLAG) -c cmm.tab.c
cmm.tab.c:cmm.y
	bison -d cmm.y
cmm.lex.o:cmm.lex.c cmm.tab.h
	$(CC) $(FLAG) -c cmm.lex.c
cmm.lex.c:cmm.l
	flex -o cmm.lex.c cmm.l
symtab.o:symtab.c symtab.h globals.h
	$(CC) $(FLAG) -c symtab.c
analyze.o:analyze.c globals.h symtab.h analyze.h
	$(CC) $(FLAG) -c analyze.c
util.o:util.c
	$(CC) $(FLAG) -c util.c

clean:
	-rm *.o

最后测试的时候,就是直接利用./interpreter [file_name]就可以了。

有兴趣的同学可以把这个项目下载下来,多看看,了解一下如果利用flex&bison快速实现一个脚本语言。

上一篇:

下一篇: