README
该源码仅供参考,严禁直接抄袭!
源码如下
//
// main.cpp
// LexicalAnalyzer
//
// Created by 胡昱 on 2020/1/1.
// Copyright © 2020年 胡昱. All rights reserved.
//
#include <iostream>
#include <fstream>
#include <vector>
#include <string>
using namespace std;
/*****************全局变量*****************/
/*****************关键字表*****************/
const static string reserveWords[] = {
"main", "if", "else", "while", "do", "for", "int", "double", "float",
"char", "long", "short", "enum", "static", "bool", "void", "switch",
"case", "break", "continue", "signed", "unsigned", "return", "defalut",
"const", "union", "struct", "auto", "include", "define", "class", "virtual",
"friend", "public", "private", "protected", "this", "false", "true", "try",
"catch", "throw", "goto", "using", "template", "new", "namespace", "operator",
"register", "inline"};
const static int numOfreserveWords = 50;
/*****************操作运算符表*****************/
const static string operators[] = {
"+", "-", "++", "--", "*", "/", "%", "<", "<=", ">", ">=", "=", "==", "!=",
"<<",">>", "&&", "||", "!", "^"
};
const static int numOfOperator = 20;
/*****************界符表*****************/
const static string delimiters[] = {
",", ";", ".", "{", "}", "[", "]", "(", ")", "#", "\\", "\?"
};
const static int numOfDelimiter = 12;
/*****************标识符表*****************/
vector<string> identitierTable;
/*****************字符常量表表*****************/
vector<string> charTable;
/*****************字符串常量表*****************/
vector<string> stringTable;
/*****************整数常量表*****************/
vector<int> intTable;
/*****************浮点数常量表*****************/
vector<float> floatTable;
/*****************函数*****************/
/*****************输出错误信息并中止程序*****************/
void errorProcess(string errorMesage, int line)
{
cout<<"程序在第"<<line<<"行出现错误:"<<errorMesage<<endl;
exit(0);
}
void errorProcess(string errorMesage)
{
cout<<"程序出现错误:"<<errorMesage<<endl;
exit(0);
}
/*****************编译预处理,取出无用的字符和注释*****************/
void filterResource(string * sourceFile)
{
string tempString;
int line = 1;
for(int i = 0; i < sourceFile->length(); ++i)
{
if(((*sourceFile)[i] == '/')&&((*sourceFile)[i+1] == '/'))
{
//扫描到单行注释,直接去除掉该行剩余部分
while ((*sourceFile)[i] != '\n')
{
++i;
}
++line;
}
if(((*sourceFile)[i] == '/')&&((*sourceFile)[i+1] == '*'))
{
//扫描到多行注释,去除"/*"与"*/"之间的代码
//跨过"/*"
i += 2;
int tempLine = 0;
//持续扫描直到找到"*/"
while (((*sourceFile)[i] != '*' )||((*sourceFile)[i+1] != '/'))
{
++i;
if((*sourceFile)[i] == '$')
{
errorProcess("未找到相应的\"*/\"", line);
}
if((*sourceFile)[i] == '\n')
{
++tempLine;
}
}
line += tempLine;
//跨过"*/"
i += 2;
}
if(((*sourceFile)[i] != '\n')&&((*sourceFile)[i] != '\t')&&
((*sourceFile)[i] != '\v')&&((*sourceFile)[i] != '\r'))
{
//若出现无用字符,则过滤;否则加载
tempString += (*sourceFile)[i];
}
if((*sourceFile)[i] == '\n')
{
++line;
}
}
tempString += '\0';
*sourceFile = tempString;
}
/*****************是否为关键字*****************/
int searchReserveWorld(string s)
{
for(int i = 0; i < numOfreserveWords; ++i)
{
if(s.compare(reserveWords[i]) == 0)
{
//若查找成功则返回种别码
return i + 1;
}
}
//若不是关键字则返回-1,即为标识符
return -1;
}
/*****************是否为界符*****************/
int searchDelimiters(char c)
{
string temp = "";
temp += c;
for(int i = 0; i < numOfDelimiter; ++i)
{
if(delimiters[i].compare(temp) == 0)
{
//若查找成功则返回种别码
return numOfreserveWords + numOfOperator + i + 1;
}
}
//若不是界符则返回-1
return -1;
}
/*****************是否为操作运算符*****************/
int searchOperator(char c)
{
string temp = "";
temp += c;
for(int i = 0; i < numOfOperator; ++i)
{
if(operators[i].compare(temp) == 0)
{
//若查找成功则返回种别码
return numOfreserveWords + i + 1;
}
}
//若不是操作运算符符则返回-1
return -1;
}
/*****************是否为操作运算符*****************/
int searchOperator(string c)
{
for(int i = 0; i < numOfOperator; ++i)
{
if(operators[i].compare(c) == 0)
{
//若查找成功则返回种别码
return numOfreserveWords + i + 1;
}
}
//若不是操作运算符符则返回-1
return -1;
}
/*****************判断是否为字母*****************/
bool isLetter(char letter)
{
//注意C++语言允许下划线也为标识符的一部分可以放在首部或其他地方
if (((letter >= 'a')&&(letter <= 'z'))||((letter >= 'A')&&(letter <= 'Z'))||(letter == '_'))
{
return true;
}
else
{
return false;
}
}
/*****************判断是否为数字*****************/
bool isDigit(char digit)
{
if (digit >= '0'&&digit <= '9')
{
return true;
}
else
{
return false;
}
}
/*****************分析程序*****************/
void scanner(string sourceFile, int& syn, string& token, int& pProject)
{
while(sourceFile[pProject] == ' ')
{
//过滤掉多余的空格
++pProject;
}
//退出条件
if((pProject >= sourceFile.length()) || (sourceFile[pProject] == '\0'))
{
syn = 0;
return;
}
//每次收集字符之前先将token清空
token.clear();
if(isLetter(sourceFile[pProject]))
{
//开头为字母
token += sourceFile[pProject]; //收集
++pProject; //后移
while((isLetter(sourceFile[pProject])||isDigit(sourceFile[pProject]))&&(pProject < sourceFile.length()))
{
//字母后面跟着字母或者数字
token += sourceFile[pProject]; //收集
++pProject; //后移
}
syn = searchReserveWorld(token); //查找是否为关键字
if(syn == -1)
{
//如果不是关键字则为标识符,令syn为标识符种别码,即83
syn = 83;
return;
}
}
else if(isDigit(sourceFile[pProject]))
{
//首字符为数字
while((isDigit(sourceFile[pProject])||(sourceFile[pProject] =='.'))&&(pProject < sourceFile.length()))
{
//数字后面跟着数字或者小数点
token += sourceFile[pProject]; //收集
++pProject; //后移
//错误判断
if(count(token.begin(), token.end(), '.') > 1)
{
errorProcess("错误的小数 ---- " + token);
}
if((pProject < sourceFile.length())&&(isLetter(sourceFile[pProject])))
{
errorProcess("数字后面不能跟随字母 ---- " + token + sourceFile[pProject]);
}
}
if(token.find('.') == string::npos)
{
//整数
syn = 86;
return;
}
else
{
//浮点数
syn = 86;
return;
}
}
else if(searchDelimiters(sourceFile[pProject]) != -1)
{
//如果是界符的话直接收集
token += sourceFile[pProject];
syn = searchDelimiters(sourceFile[pProject]);
++pProject;
return;
}
else if(searchOperator(sourceFile[pProject]) != -1)
{
//如果首字符是运算符
token += sourceFile[pProject]; //收集
++pProject; //后移
//超前搜索,判断是双目运算符还是错单目运算符
if(searchOperator(sourceFile[pProject]) != -1)
{
//当前字符仍然为操作运算符
token += sourceFile[pProject]; //收集
++pProject; //后移
if(searchOperator(token) != -1)
{
//若token依旧能在操作运算符表里面找到说明是双目运算符
syn = searchOperator(token);
}
else
{
//错误的运算符
--pProject; //回退
errorProcess("错误的运算符 ----" + token); //报错
}
}
else
{
//单目运算符
syn = searchOperator(token[0]);
}
}
else if(sourceFile[pProject] == '\'')
{
//若首字符为单引号,判断是否为字符
if(((pProject + 2) >= sourceFile.length())||(sourceFile[pProject +2] != '\''))
{
//若只剩下一个字符或者往后第二个字符不是单引号,则出现字符错误
token += sourceFile[pProject];
++pProject;
token += sourceFile[pProject]; //多读一个字符以定位错误信息
errorProcess("错误的字符 ---- " + token);
}
else
{
//收集三个字符
token += sourceFile[pProject];
++pProject;
token += sourceFile[pProject];
++pProject;
token += sourceFile[pProject];
++pProject;
syn = 84;
}
}
else if(sourceFile[pProject] == '\"')
{
//若首字符为双引号,判断是否为字符串
token += sourceFile[pProject]; //收集
++pProject; //后移
for(int i = 0; i < (sourceFile.length() - pProject); ++i)
{
token += sourceFile[pProject + i]; //收集
//搜索剩余字符,找到下一个双引号
if(sourceFile[pProject + i] == '\"')
{
//找到双引号,是正确的字符串
syn = 85;
pProject += (i + 1); //后移
return;
}
}
//未找到第二个双引号,为错误的字符串
--pProject; //回退
errorProcess("错误的字符串 ---- " + token);
}
else
{
//未能识别的字符
string temp = "";
temp += sourceFile[pProject]; //收集
errorProcess("存在未能识别的字符 ---- " + temp);
}
}
int main(int argc, const char * argv[])
{
string sourceFile; //保存源代码的字符串
//打开源文件
string filePath;
cout<<"Please input the file path: ";
cin>>filePath;
ifstream fin(filePath);
while(!fin.is_open())
{
cout<<"Error opening file!"<<endl;
filePath.clear();
cout<<"Please input the file pahth again: ";
cin>>filePath;
}
//读取源文件
char temp;
while(!fin.eof())
{
fin.get(temp);
sourceFile += temp;
}
cout<<"Source file: \n"<<sourceFile<<endl;
//编译预处理,取出无用的字符和注释
filterResource(&sourceFile);
cout<<"Filtered source file: \n"<<sourceFile<<endl;
//创建输出文件
ofstream fout("/Users/huyu/Desktop/output.txt");
if(!fout.is_open())
{
cout<<"Creating the output file fails!"<<endl;
}
//开始扫描
cout<<"Token: "<<endl;
int syn = -1;
int pProject = 0;
string token = "";
while(syn != 0)
{
scanner(sourceFile, syn, token, pProject);
if(syn == 0)
{
break;
}
cout<<"<" + token + " , "<<syn<<">";
fout<<"<" + token + " , "<<syn<<">";
if(syn >= 1 && syn <= numOfreserveWords)
{
cout<<" ----- 关键字"<<endl;
fout<<" ----- 关键字"<<endl;
}
else if(syn > numOfreserveWords && syn <= (numOfreserveWords + numOfOperator))
{
cout<<" ----- 操作运算符"<<endl;
fout<<" ----- 操作运算符"<<endl;
}
else if(syn > numOfreserveWords + numOfOperator && syn <= (numOfreserveWords + numOfOperator + numOfDelimiter))
{
cout<<" ----- 界符"<<endl;
fout<<" ----- 界符"<<endl;
}
else if(syn == 83)
{
//标识符
if(!(count(identitierTable.begin(), identitierTable.end(), token) > 0))
{
//若该标识符不存在则添加进字符串表
identitierTable.push_back(token);
}
identitierTable.push_back(token);
cout<<" ----- 标识符"<<endl;
fout<<" ----- 标识符"<<endl;
}
else if(syn == 84)
{
//字符常量
if(!(count(charTable.begin(), charTable.end(), token) > 0))
{
//若该字符常量不存在则添加进字符串表
charTable.push_back(token);
}
cout<<" ----- 字符常量"<<endl;
fout<<" ----- 字符常量"<<endl;
}
else if(syn == 85)
{
//字符串常量
if(!(count(stringTable.begin(), stringTable.end(), token) > 0))
{
//若该字符串常量不存在则添加进字符串表
stringTable.push_back(token);
}
cout<<" ----- 字符串常量"<<endl;
fout<<" ----- 字符串常量"<<endl;
}
else if(syn == 86)
{
//整数常量
if(!(count(intTable.begin(), intTable.end(), stoi(token)) > 0))
{
//若该整数常量不存在则添加进整数表
intTable.push_back(stoi(token));
}
cout<<" ----- 整数常量"<<endl;
fout<<" ----- 整数常量"<<endl;
}
else if(syn == 87)
{
//浮点数常量
if(!(count(floatTable.begin(), floatTable.end(), stof(token)) > 0))
{
//若该浮点数常量不存在则添加进浮点数表
floatTable.push_back(stof(token));
}
cout<<" ----- 浮点数常量"<<endl;
fout<<" ----- 浮点数常量"<<endl;
}
}
//程序结束
cout<<"\n\nComplete!"<<endl;
fout.close();
fin.close();
return 0;
}
来源:CSDN
作者:Mister_Yu
链接:https://blog.csdn.net/Mister_Yu/article/details/104612682