【SEU&SE】编译原理 - 词法分析器

一曲冷凌霜 提交于 2020-03-05 08:04:17

README

该源码仅供参考,严禁直接抄袭!

源码如下

//
//  main.cpp
//  LexicalAnalyzer
//
//  Created by 胡昱 on 2020/1/1.
//  Copyright © 2020年 胡昱. All rights reserved.
//

#include <iostream>
#include <fstream>
#include <vector>
#include <string>
using namespace std;

/*****************全局变量*****************/
/*****************关键字表*****************/
const static string reserveWords[] = {
    "main", "if", "else", "while", "do", "for", "int", "double", "float",
    "char", "long", "short", "enum", "static", "bool", "void", "switch",
    "case", "break", "continue", "signed", "unsigned", "return", "defalut",
    "const", "union", "struct", "auto", "include", "define", "class", "virtual",
    "friend", "public", "private", "protected", "this", "false", "true", "try",
    "catch", "throw", "goto", "using", "template", "new", "namespace", "operator",
    "register", "inline"};
const static int numOfreserveWords = 50;

/*****************操作运算符表*****************/
const static string operators[] = {
    "+", "-", "++", "--", "*", "/", "%", "<", "<=", ">", ">=", "=", "==", "!=",
    "<<",">>", "&&", "||", "!", "^"
};
const static int numOfOperator = 20;

/*****************界符表*****************/
const static string delimiters[] = {
    ",", ";", ".", "{", "}", "[", "]", "(", ")", "#", "\\", "\?"
};
const static int numOfDelimiter = 12;

/*****************标识符表*****************/
vector<string> identitierTable;

/*****************字符常量表表*****************/
vector<string> charTable;

/*****************字符串常量表*****************/
vector<string> stringTable;

/*****************整数常量表*****************/
vector<int> intTable;

/*****************浮点数常量表*****************/
vector<float> floatTable;

/*****************函数*****************/
/*****************输出错误信息并中止程序*****************/
void errorProcess(string errorMesage, int line)
{
    cout<<"程序在第"<<line<<"行出现错误:"<<errorMesage<<endl;
    exit(0);
}
void errorProcess(string errorMesage)
{
    cout<<"程序出现错误:"<<errorMesage<<endl;
    exit(0);
}

/*****************编译预处理,取出无用的字符和注释*****************/
void filterResource(string * sourceFile)
{
    string tempString;
    int line = 1;
    
    for(int i = 0; i < sourceFile->length(); ++i)
    {
        if(((*sourceFile)[i] == '/')&&((*sourceFile)[i+1] == '/'))
        {
            //扫描到单行注释,直接去除掉该行剩余部分
            while ((*sourceFile)[i] != '\n')
            {
                ++i;
            }
            ++line;
        }
        if(((*sourceFile)[i] == '/')&&((*sourceFile)[i+1] == '*'))
        {
            //扫描到多行注释,去除"/*"与"*/"之间的代码
            //跨过"/*"
            i += 2;
            int tempLine = 0;
            //持续扫描直到找到"*/"
            while (((*sourceFile)[i] != '*' )||((*sourceFile)[i+1] != '/'))
            {
                ++i;
                if((*sourceFile)[i] == '$')
                {
                    errorProcess("未找到相应的\"*/\"", line);
                }
                if((*sourceFile)[i] == '\n')
                {
                    ++tempLine;
                }
            }
            line += tempLine;
            
            //跨过"*/"
            i += 2;
        }
        if(((*sourceFile)[i] != '\n')&&((*sourceFile)[i] != '\t')&&
                ((*sourceFile)[i] != '\v')&&((*sourceFile)[i] != '\r'))
        {
            //若出现无用字符,则过滤;否则加载
            tempString += (*sourceFile)[i];
        }
        if((*sourceFile)[i] == '\n')
        {
            ++line;
        }
    }
    
    tempString += '\0';
    *sourceFile = tempString;
}

/*****************是否为关键字*****************/
int searchReserveWorld(string s)
{
    for(int i = 0; i < numOfreserveWords; ++i)
    {
        if(s.compare(reserveWords[i]) == 0)
        {
            //若查找成功则返回种别码
            return i + 1;
        }
    }
    
    //若不是关键字则返回-1,即为标识符
    return -1;
}

/*****************是否为界符*****************/
int searchDelimiters(char c)
{
    string temp = "";
    temp += c;
    
    for(int i = 0; i < numOfDelimiter; ++i)
    {
        if(delimiters[i].compare(temp) == 0)
        {
            //若查找成功则返回种别码
            return numOfreserveWords + numOfOperator + i + 1;
        }
    }
    
    //若不是界符则返回-1
    return -1;
}

/*****************是否为操作运算符*****************/
int searchOperator(char c)
{
    string temp = "";
    temp += c;
    
    for(int i = 0; i < numOfOperator; ++i)
    {
        if(operators[i].compare(temp) == 0)
        {
            //若查找成功则返回种别码
            return numOfreserveWords + i + 1;
        }
    }
    
    //若不是操作运算符符则返回-1
    return -1;
}

/*****************是否为操作运算符*****************/
int searchOperator(string c)
{
    for(int i = 0; i < numOfOperator; ++i)
    {
        if(operators[i].compare(c) == 0)
        {
            //若查找成功则返回种别码
            return numOfreserveWords + i + 1;
        }
    }
    
    //若不是操作运算符符则返回-1
    return -1;
}

/*****************判断是否为字母*****************/
bool isLetter(char letter)
{
    //注意C++语言允许下划线也为标识符的一部分可以放在首部或其他地方
    if (((letter >= 'a')&&(letter <= 'z'))||((letter >= 'A')&&(letter <= 'Z'))||(letter == '_'))
    {
        return true;
    }
    else
    {
        return false;
    }
}

/*****************判断是否为数字*****************/
bool isDigit(char digit)
{
    if (digit >= '0'&&digit <= '9')
    {
        return true;
    }
    else
    {
        return false;
    }
}

/*****************分析程序*****************/
void scanner(string sourceFile, int& syn, string& token, int& pProject)
{
    while(sourceFile[pProject] == ' ')
    {
        //过滤掉多余的空格
        ++pProject;
    }
    
    
    //退出条件
    if((pProject >= sourceFile.length()) || (sourceFile[pProject] == '\0'))
    {
        syn = 0;
        return;
    }
    
    //每次收集字符之前先将token清空
    token.clear();
    
    if(isLetter(sourceFile[pProject]))
    {
        //开头为字母
        token += sourceFile[pProject];   //收集
        ++pProject;     //后移
        while((isLetter(sourceFile[pProject])||isDigit(sourceFile[pProject]))&&(pProject < sourceFile.length()))
        {
            //字母后面跟着字母或者数字
            token += sourceFile[pProject];   //收集
            ++pProject;     //后移
        }
        syn = searchReserveWorld(token);    //查找是否为关键字
        if(syn == -1)
        {
            //如果不是关键字则为标识符,令syn为标识符种别码,即83
            syn = 83;
            return;
        }
    }
    else if(isDigit(sourceFile[pProject]))
    {
        //首字符为数字
        while((isDigit(sourceFile[pProject])||(sourceFile[pProject] =='.'))&&(pProject < sourceFile.length()))
        {
            //数字后面跟着数字或者小数点
            token += sourceFile[pProject];   //收集
            ++pProject;     //后移
            
            //错误判断
            if(count(token.begin(), token.end(), '.') > 1)
            {
                errorProcess("错误的小数 ---- " + token);
            }
            if((pProject < sourceFile.length())&&(isLetter(sourceFile[pProject])))
            {
                errorProcess("数字后面不能跟随字母 ---- " + token + sourceFile[pProject]);
            }
        }
        
        if(token.find('.') == string::npos)
        {
            //整数
            syn = 86;
            return;
        }
        else
        {
            //浮点数
            syn = 86;
            return;
        }
    }
    else if(searchDelimiters(sourceFile[pProject]) != -1)
    {
        //如果是界符的话直接收集
        token += sourceFile[pProject];
        syn = searchDelimiters(sourceFile[pProject]);
        ++pProject;
        return;
    }
    else if(searchOperator(sourceFile[pProject]) != -1)
    {
        //如果首字符是运算符
        token += sourceFile[pProject];  //收集
        ++pProject;     //后移
        
        //超前搜索,判断是双目运算符还是错单目运算符
        if(searchOperator(sourceFile[pProject]) != -1)
        {
            //当前字符仍然为操作运算符
            token += sourceFile[pProject];  //收集
            ++pProject;     //后移
            
            if(searchOperator(token) != -1)
            {
                //若token依旧能在操作运算符表里面找到说明是双目运算符
                syn = searchOperator(token);
            }
            else
            {
                //错误的运算符
                --pProject;     //回退
                errorProcess("错误的运算符 ----" + token);    //报错
            }
        }
        else
        {
            //单目运算符
            syn = searchOperator(token[0]);
        }
    }
    else if(sourceFile[pProject] == '\'')
    {
        //若首字符为单引号,判断是否为字符
        if(((pProject + 2) >= sourceFile.length())||(sourceFile[pProject +2] != '\''))
        {
            //若只剩下一个字符或者往后第二个字符不是单引号,则出现字符错误
            token += sourceFile[pProject];
            ++pProject;
            token += sourceFile[pProject];  //多读一个字符以定位错误信息
            errorProcess("错误的字符 ---- " + token);
        }
        else
        {
            //收集三个字符
            token += sourceFile[pProject];
            ++pProject;
            token += sourceFile[pProject];
            ++pProject;
            token += sourceFile[pProject];
            ++pProject;
            
            syn = 84;
        }
    }
    else if(sourceFile[pProject] == '\"')
    {
        
        //若首字符为双引号,判断是否为字符串
        token += sourceFile[pProject];  //收集
        ++pProject;     //后移
        
        for(int i = 0; i < (sourceFile.length() - pProject); ++i)
        {
            token += sourceFile[pProject + i];  //收集
            //搜索剩余字符,找到下一个双引号
            if(sourceFile[pProject + i] == '\"')
            {
                //找到双引号,是正确的字符串
                syn = 85;
                pProject += (i + 1);    //后移
                return;
            }
        }
        
        //未找到第二个双引号,为错误的字符串
        --pProject; //回退
        errorProcess("错误的字符串 ---- " + token);
    }
    else
    {
        //未能识别的字符
        string temp = "";
        temp += sourceFile[pProject];  //收集
        errorProcess("存在未能识别的字符 ---- " + temp);
    }
}

int main(int argc, const char * argv[])
{
    string sourceFile;  //保存源代码的字符串
    
    //打开源文件
    string filePath;
    cout<<"Please input the file path: ";
    cin>>filePath;
    ifstream fin(filePath);
    while(!fin.is_open())
    {
        cout<<"Error opening file!"<<endl;
        filePath.clear();
        cout<<"Please input the file pahth again: ";
        cin>>filePath;
    }
    
    //读取源文件
    char temp;
    while(!fin.eof())
    {
        fin.get(temp);
        sourceFile += temp;
    }
    cout<<"Source file: \n"<<sourceFile<<endl;
    
    //编译预处理,取出无用的字符和注释
    filterResource(&sourceFile);
    cout<<"Filtered source file: \n"<<sourceFile<<endl;
    
    //创建输出文件
    ofstream fout("/Users/huyu/Desktop/output.txt");
    if(!fout.is_open())
    {
        cout<<"Creating the output file fails!"<<endl;
    }
    
    //开始扫描
    cout<<"Token: "<<endl;
    int syn = -1;
    int pProject = 0;
    string token = "";
    while(syn != 0)
    {
        scanner(sourceFile, syn, token, pProject);
        
        if(syn == 0)
        {
            break;
        }
        
        cout<<"<" + token + " , "<<syn<<">";
        fout<<"<" + token + " , "<<syn<<">";
        if(syn >= 1 && syn <= numOfreserveWords)
        {
            cout<<" ----- 关键字"<<endl;
            fout<<" ----- 关键字"<<endl;
        }
        else if(syn > numOfreserveWords && syn <= (numOfreserveWords + numOfOperator))
        {
            cout<<" ----- 操作运算符"<<endl;
            fout<<" ----- 操作运算符"<<endl;
        }
        else if(syn > numOfreserveWords + numOfOperator && syn <= (numOfreserveWords + numOfOperator + numOfDelimiter))
        {
            cout<<" ----- 界符"<<endl;
            fout<<" ----- 界符"<<endl;
        }
        else if(syn == 83)
        {
            //标识符
            if(!(count(identitierTable.begin(), identitierTable.end(), token) > 0))
            {
                //若该标识符不存在则添加进字符串表
                identitierTable.push_back(token);
            }
            identitierTable.push_back(token);
            cout<<" ----- 标识符"<<endl;
            fout<<" ----- 标识符"<<endl;
        }
        else if(syn == 84)
        {
            //字符常量
            if(!(count(charTable.begin(), charTable.end(), token) > 0))
            {
                //若该字符常量不存在则添加进字符串表
                charTable.push_back(token);
            }
            cout<<" ----- 字符常量"<<endl;
            fout<<" ----- 字符常量"<<endl;
        }
        else if(syn == 85)
        {
            //字符串常量
            if(!(count(stringTable.begin(), stringTable.end(), token) > 0))
            {
                //若该字符串常量不存在则添加进字符串表
                stringTable.push_back(token);
            }
            cout<<" ----- 字符串常量"<<endl;
            fout<<" ----- 字符串常量"<<endl;
        }
        else if(syn == 86)
        {
            //整数常量
            if(!(count(intTable.begin(), intTable.end(), stoi(token)) > 0))
            {
                //若该整数常量不存在则添加进整数表
                intTable.push_back(stoi(token));
            }
            cout<<" ----- 整数常量"<<endl;
            fout<<" ----- 整数常量"<<endl;
        }
        else if(syn == 87)
        {
            //浮点数常量
            if(!(count(floatTable.begin(), floatTable.end(), stof(token)) > 0))
            {
                //若该浮点数常量不存在则添加进浮点数表
                floatTable.push_back(stof(token));
            }
            cout<<" ----- 浮点数常量"<<endl;
            fout<<" ----- 浮点数常量"<<endl;
        }
        
    }
    
    //程序结束
    cout<<"\n\nComplete!"<<endl;
    
    fout.close();
    fin.close();
    return 0;
}

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!