欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

编译原理--词法分析

程序员文章站 2022-07-13 17:37:00
...

程序流程图

编译原理--词法分析

代码实现

// BianYiYuanLi.cpp: 定义控制台应用程序的入口点。
//

#include "stdafx.h"
#include <vector>  
#include <string>  
#include <fstream>  
#include <iostream> 
#include <windows.h>

#define SOURCE_FILE_NAME "hello.txt"
#define OUT_FILE_NAME "out.txt"
#define TOKEN_FILE_NAME "token.txt"

#define WORD_NUMBER 68


#define ID 101
#define CONST_INT 102
#define CONST_DOUBLE 103
#define CONST_CHAR 104
#define CONST_BOOL 105

using namespace std;

//全局变量,关键字,全局变量,分界符,算术运算符,关系运算符,根据需要可以自行增加
static string word[] = {
	"auto",
	"break",
	"case",
	"char",
	"const",
	"continue",
	"default",
	"do",
	"double",
	"else",
	"enum",
	"extern",
	"float",
	"for",
	"goto",
	"if",
	"int",
	"long",
	"register",
	"return",
	"short",
	"signed",
	"sizeof",
	"static",
	"struct",
	"switch",
	"typedef",
	"union",
	"unsigned",
	"void",
	"volatile",
	"while",
	"+",
	"-",
	"++",
	"--",
	"*",
	"/",
	"<",
	"<=",
	">",
	">=",
	"=",
	"==",
	"!=",
	";",
	"(",
	")",
	"^",
	",",
	"\"",
	"\'",
	"#",
	"&",
	"&&",
	"|",
	"||",
	"%",
	"~",
	"<<",
	">>",
	"[",
	"]",
	"{",
	"}",
	"\\",
	".",
	"\?",
	":",
	"!"
};

int readFile(IN LPCSTR fileName, IN string& sourceStr) {
	//打开源文件
	ifstream sourceFile(fileName);
	//如果打开失败则退出
	if (!sourceFile.is_open())
	{
		cout << "未成功打开文件" << endl;
		sourceFile.close();
		return -1;
	}
	//临时string
	string temp;
	while (getline(sourceFile, temp))
	{
		sourceStr.append(temp);
		sourceStr += '\n';
	}
	return 0;
}

int saveFile(IN LPCSTR fileName,IN string& sourceStr) {
	//打开输出文件
	ofstream outFile(fileName, ofstream::app);
	//换行
	outFile << endl;
	//输入流
	outFile << sourceStr;
	//关闭文件流
	outFile.close();
	return 0;
}

int filterResource(IN string& sourceStr) {
	string temp;
	for (string::iterator it = sourceStr.begin(); it != sourceStr.end(); it++) {

		//若为单行注释“//”,则去除注释后面的东西,直至遇到回车换行
		if (*it == '/' && *(it + 1) == '/')
		{
			while (*it != '\n')
			{
				it++;//向后扫描
			}
		}
		//若为多行注释“/* 。。。*/”则去除该内容
		if (*it == '/' && *(it + 1) == '*')
		{
			it += 2;
			while (*it != '*' || *(it + 1) != '/')
			{
				it++;//继续扫描
				if (it == sourceStr.end())
				{
					cout << "注释出错,没有找到 */,程序结束!!!\n" << endl;
					exit(0);
				}
			}
			it += 2;//跨过“*/”
		}
		/*若出现无用字符,则过滤;否则加载*/
		if (*it != '\n' && *it != '\t' && *it != '\v' && *it != '\r')
		{
			temp += *it;
		}
	}
	saveFile(OUT_FILE_NAME,temp);
	temp.clear();
	return 0;
}

//判断是否是字母
bool isLetter(char letter)
{
	//C语言允许下划线也为标识符的一部分可以放在首部或其他地方
	if (letter >= 'a'&&letter <= 'z' || letter >= 'A'&&letter <= 'Z' || letter == '_')
	{
		return true;
	}
	else
	{
		return false;
	}
}

//判断是否为数字
bool isDigit(char digit)
{
	if (digit >= '0'&&digit <= '9')
	{
		return true;
	}
	else
	{
		return false;
	}
}

void keywordOrLabelProc(string::iterator& it) {

}

typedef struct TokenItem {
	int number;
	string word;
}TokenItem, *PTokenItem;

void scanner(string& sourceStr) {
	vector<TokenItem> token;

	string temp;
	bool isKeyWord = false;

	//遍历所有字符
	for (string::iterator it = sourceStr.begin(); it != sourceStr.end();)
	{
		if (isLetter(*it))
		{
			//关键字和标记符分析程序
			while (isLetter(*it) || isDigit(*it)) {
				temp += *it;
				it++;
			}
			for (int i = 0; i < WORD_NUMBER; i++)
			{
				if (word[i] == temp) {

					isKeyWord = true;

					TokenItem tokenItem;

					tokenItem.number = i;
					tokenItem.word = temp;

					token.push_back(tokenItem);
					temp.clear();
				}
			}
			//如果不是关键字,则是标记符
			if (!isKeyWord)
			{
				TokenItem tokenItem;

				tokenItem.number = ID;
				tokenItem.word = temp;

				token.push_back(tokenItem);
				temp.clear();
			}

			temp.clear();
			isKeyWord = false;
		}
		else if (isDigit(*it))
		{
			int constType = CONST_INT;

			//常数分析程序
			while (isDigit(*it))
			{
				temp += *it;
				it++;
			}

			if (((*it == '.') && (isDigit(*(it + 1)))))
			{
				constType = CONST_DOUBLE;

				temp += *it;
				it++;
				while (isDigit(*it))
				{
					temp += *it;
					it++;
				}
			}

			TokenItem tokenItem;

			tokenItem.number = constType;
			tokenItem.word = temp;

			token.push_back(tokenItem);
			temp.clear();
		}else if (*it == '+' || *it == '-' || *it == '*' || *it == '/' || *it == ';' || *it == '(' || *it == ')' || *it == '^'
			|| *it == ',' || *it == '\"' || *it == '\'' || *it == '~' || *it == '#' || *it == '%' || *it == '['
			|| *it == ']' || *it == '{' || *it == '}' || *it == '\\' || *it == '.' || *it == '\?' || *it == ':')
		{
			//其他单词分析程序

			string str;
			if (*(it + 1) != '+'&&*(it + 1) != '-'&&*(it + 1) != '='&&*(it + 1) != '&'&&*(it + 1) != '|'&&*(it + 1) != '<'&&*(it + 1) != '>')
			{
				//先判断一个字符的算术运算符和关系云算法
				str += *it;
				for (int i = 0; i < WORD_NUMBER; i++) {
					if (word[i] == str)
					{
						TokenItem tokenItem;

						tokenItem.number = i;
						tokenItem.word = str;

						token.push_back(tokenItem);
					}
				}
			}
			else {
				str += *it;
				str += *(it + 1);
				for (int i = 0; i < WORD_NUMBER; i++) {
					if (word[i] == str)
					{
						TokenItem tokenItem;

						tokenItem.number = i;
						tokenItem.word = str;

						token.push_back(tokenItem);
					}
				}
				it++;
			}
			it++;
			str.clear();
		}
		else {
			it++;
		}
	}

	//输出单词的内部表示
	string saveStr;
	for (TokenItem t : token)
	{
		saveStr += to_string(t.number);
		saveStr += '\t';
		saveStr += t.word;
		saveStr += '\n';
		cout << t.number << " " << t.word << endl;
	}
	//保存token
	saveFile(TOKEN_FILE_NAME, saveStr);
	saveStr.clear();
}

int main()
{
	//存放源文件的string
	string sourceStr;
	//读取源文件
	readFile(SOURCE_FILE_NAME, sourceStr);
	//添加结束标记
	sourceStr += '$';
	//过滤无用的注释
	filterResource(sourceStr);

	sourceStr.clear();
	readFile(OUT_FILE_NAME, sourceStr);

	//saveFile(sourceStr);
	scanner(sourceStr);

	sourceStr.clear();

	return 0;
}

源文件hello.txt

编译原理--词法分析

输出去注释文件out.txt

编译原理--词法分析

输出token文件,token.txt

编译原理--词法分析

项目源文件:https://download.csdn.net/download/a1013642808/10483013