c语言词法分析器
#include<iostream> #include<cstring> #include<cstdio> #include<fstream> #include<map> #include<string> #include<cstdlib> #include<set> #include<fstream> using namespace std; string checkstring(string filewriter,int &i); string checkchar(string filewriter,int &i); string checkdight(string filewriter,int &i); string checkoperator(string filewriter,int &i); bool checkdeadline(char ch); bool checkletterchar(char ch); void error(); string scanner(); string reseve[]= {"","auto","break","case","char","const","continue","default","do","double","else","enum","extern", "float","for","goto","if","int","long","register","return","short","signed","sizeof","static","struct","switch","typedef", "unsigned","union","void","volatile","while","main","include","{","}", "(",")","[","]","\"","\'", "#",":",";","<","<=",">",">=","!=","==","+","-","*","//","%","++","--" ,"&&","&","||","|","^","!","~",">>","<<","=","+=","-=","%=","*=","<<=",">>=","&=","|=","/=","^=",",","." };//保留字和所有运算符 char deadline[]= {'?','!','%','&','(',')','*','+',',','-','.','/',':',';','<','=','>','^','{','|','}','~',']','[','\'','\"'};//界符和运算符开头 bool checkdeadline(char ch)//鉴别是否是界符或者运算符第一个符号 { for(auto &it:deadline) { if(it==ch) return true; } return false; } string checkoperator(string filewriter,int &i)//判断是否为运算符或者界符 { string processstring=""; if (filewriter.length()>i+2&&filewriter[i]=='<'&&filewriter[i+1]=='<'&&filewriter[i+2]=='=') { processstring+=filewriter.substr(i,3); i+=3; } else if (filewriter.length()>i+2&&filewriter[i]=='>'&&filewriter[i+1]=='>'&&filewriter[i+2]=='=') { processstring+=filewriter.substr(i,3); i+=3; } else if (filewriter.length()>i+1&&filewriter[i]=='<'&&filewriter[i+1]=='=') { processstring+=filewriter.substr(i,2); i+=2; } else if (filewriter.length()>i+1&&filewriter[i]=='>'&&filewriter[i+1]=='=') { processstring+=filewriter.substr(i,2); i+=2; } else if (filewriter.length()>i+1&&filewriter[i]=='!'&&filewriter[i+1]=='=') { processstring+=filewriter.substr(i,2); i+=2; } else if (filewriter.length()>i+1&&filewriter[i]=='='&&filewriter[i+1]=='=') { processstring+=filewriter.substr(i,2); i+=2; } else if (filewriter.length()>i+1&&filewriter[i]=='+'&&filewriter[i+1]=='+') { processstring+=filewriter.substr(i,2); i+=2; } else if (filewriter.length()>i+1&&filewriter[i]=='-'&&filewriter[i+1]=='-') { processstring+=filewriter.substr(i,2); i+=2; } else if (filewriter.length()>i+1&&filewriter[i]=='&'&&filewriter[i+1]=='&') { processstring+=filewriter.substr(i,2); i+=2; } else if (filewriter.length()>i+1&&filewriter[i]=='|'&&filewriter[i+1]=='|') { processstring+=filewriter.substr(i,2); i+=2; } else if (filewriter.length()>i+1&&filewriter[i]=='>'&&filewriter[i+1]=='>') { processstring+=filewriter.substr(i,2); i+=2; } else if (filewriter.length()>i+1&&filewriter[i]=='<'&&filewriter[i+1]=='<') { processstring+=filewriter.substr(i,2); i+=2; } else if (filewriter.length()>i+1&&filewriter[i]=='+'&&filewriter[i+1]=='=') { processstring+=filewriter.substr(i,2); i+=2; } else if (filewriter.length()>i+1&&filewriter[i]=='-'&&filewriter[i+1]=='=') { processstring+=filewriter.substr(i,2); i+=2; } else if (filewriter.length()>i+1&&filewriter[i]=='%'&&filewriter[i+1]=='=') { processstring+=filewriter.substr(i,2); i+=2; } else if (filewriter.length()>i+1&&filewriter[i]=='*'&&filewriter[i+1]=='=') { processstring+=filewriter.substr(i,2); i+=2; } else if (filewriter.length()>i+1&&filewriter[i]=='&'&&filewriter[i+1]=='=') { processstring+=filewriter.substr(i,2); i+=2; } else if (filewriter.length()>i+1&&filewriter[i]=='|'&&filewriter[i+1]=='=') { processstring+=filewriter.substr(i,2); i+=2; } else if (filewriter.length()>i+1&&filewriter[i]=='/'&&filewriter[i+1]=='=') { processstring+=filewriter.substr(i,2); i+=2; } else if (filewriter.length()>i+1&&filewriter[i]=='^'&&filewriter[i+1]=='=') { processstring+=filewriter.substr(i,2); i+=2; } else { processstring+=filewriter.substr(i,1); i++; } return processstring; } string checkdight(string filewriter,int &i)//鉴别是否是数字(包含浮点数) { string processstring=""; while(i<filewriter.length()&&(filewriter[i]<='9'&&filewriter[i]>='0')) { processstring+=filewriter[i]; i++; } if(i<filewriter.length()&&filewriter[i]=='.') { processstring+=filewriter[i]; i++; if(!(i<filewriter.length()&&(filewriter[i]<='9'&&filewriter[i]>='0'))) { error(); } } while(i<filewriter.length()&&(filewriter[i]<='9'&&filewriter[i]>='0')) { processstring+=filewriter[i]; i++; } return processstring; } bool checkletterchar(char ch)//判断是否为字母 { if(ch>='a'&&ch<='z'||ch>='A'&&ch<='Z') { return true; } return false; } string checkchar(string filewriter,int &i)//判断一个字符(包括转义字符) { string processstring; if(filewriter[i]=='\\') { if(i+1<filewriter.length()) { if(filewriter[i+1]=='n'||filewriter[i+1]=='a'||filewriter[i+1]=='b'||filewriter[i+1]=='f'||filewriter[i+1]=='r'||filewriter[i+1]=='t'||filewriter[i+1]=='v'||filewriter[i+1]=='\''||filewriter[i+1]=='\"'||filewriter[i+1]=='\\') //转义字符 { processstring=filewriter.substr(i,2); i+=2; } else { processstring=filewriter.substr(i,1); i++; } } else { processstring=filewriter.substr(i,1); i++; } } else { processstring=filewriter.substr(i,1); i++; } return processstring; } string checkstring(string filewriter,int &i) { string processstring=""; while(i<filewriter.length()&&filewriter[i]!='\"') { processstring+=checkchar(filewriter,i); } return processstring; } string checkkey(string filewriter,int &i)//鉴别是否是标识符 { string processstring=""; processstring+=filewriter[i]; i++; while(i<filewriter.length()&&((filewriter[i]=='_'||((filewriter[i]>='a'&&filewriter[i]<='z')||(filewriter[i]>='A'&&filewriter[i]<='Z')))||(filewriter[i]<='9'&&filewriter[i]>='0'))) { processstring+=filewriter[i]; i++; } return processstring; } map<string,int>p;//种类编码的映射 void init()//映射种类编码 { int i=0; for(auto &it:reseve) { i++; p[it]=i; } } bool checkfile(string filewriter)//判断文件名是否为.c文件 { if(filewriter.length()<3) return false; if(filewriter[filewriter.length()-1]=='c'&&filewriter[filewriter.length()-2]=='.') return true; else return false; } string scanner(string filename)//扫描程序,过滤注释程序 { if(!checkfile(filename)) { cout<<"这不是c文件"<<endl; error(); } ifstream inf(filename); string filewriter; string processstring=""; int i; bool flag1=false; bool flag2=false; while(getline(inf,filewriter)) { flag1=false; for(i=0; i<filewriter.length(); i++) { if(i+1<filewriter.length()&&filewriter[i]=='/'&&filewriter[i+1]=='/') { if(!flag2&&!flag1) { if(i>=1) { processstring+=(filewriter.substr(0,i-1)+" "); } filewriter=filewriter.substr(i+2); i=0; flag1=true; continue; } flag1=true; i++; } if(i+1<filewriter.length()&&i<filewriter.length()&&filewriter[i]=='/'&&filewriter[i+1]=='*'&&!flag2) { flag2=true; if(!flag1) { if(i>=1) processstring+=(filewriter.substr(0,i-1)+" "); filewriter=filewriter.substr(i+2); i=0; continue; } } if(i+1<filewriter.length()&&i<filewriter.length()&&filewriter[i]=='*'&&filewriter[i+1]=='/'&&flag2) { flag2=false; filewriter=filewriter.substr(i+2); i=0; continue; } } if(!flag1&&!flag2) processstring+=(filewriter+"\n"); } inf.close(); return processstring; } void error() { cout<<"error"<<endl; exit(0); } int main() { init(); string filewriter,processstring; filewriter=scanner("inii.c"); ofstream onf; onf.open("Resultfile.c"); int i=0; cout<<"---扫描程序之后的程序---"<<endl; cout<<filewriter<<endl; cout<<"---单词---种类编码---单词种类---"<<endl; onf<<"---扫描程序之后的程序---"<<endl; onf<<filewriter<<endl; onf<<"---单词---种类编码---单词种类---"<<endl; while(i<filewriter.length()) { while(i<filewriter.length()&&(filewriter[i]=='\n'||filewriter[i]==' '||filewriter[i]=='\t'))//过滤空白符 i++; if(i>=filewriter.length())//判断过滤完空白符后已经到文件末尾 break; if(filewriter[i]=='\"')//判断为字符串常量 { cout<<"("<<'\"'<<","<<41<<","<<"界符)"<<endl; onf<<"("<<'\"'<<","<<41<<","<<"界符)"<<endl; i++; processstring=checkstring(filewriter,i); cout<<"("<<processstring<<","<<83<<","<<"字符串常量)"<<endl; onf<<"("<<processstring<<","<<83<<","<<"字符串常量)"<<endl; if(i<filewriter.length()&&filewriter[i]=='\"') { cout<<"("<<'\"'<<","<<41<<","<<"界符)"<<endl; onf<<"("<<'\"'<<","<<41<<","<<"界符)"<<endl; i++; } else { error(); } } else if(filewriter[i]=='\'')//判断是否为字符常量 { cout<<"("<<'\''<<","<<42<<","<<"界符)"<<endl; onf<<"("<<'\''<<","<<42<<","<<"界符)"<<endl; i++; processstring=checkchar(filewriter,i); cout<<"("<<processstring<<","<<84<<","<<"字符常量)"<<endl; onf<<"("<<processstring<<","<<84<<","<<"字符常量)"<<endl; if(i<filewriter.length()&&filewriter[i]=='\'') { cout<<"("<<'\''<<","<<42<<","<<"界符)"<<endl; onf<<"("<<'\''<<","<<42<<","<<"界符)"<<endl; i++; } else { error(); } } else if(filewriter[i]<='9'&&filewriter[i]>='0')//判断是否为数字常量 { processstring=checkdight(filewriter,i); cout<<"("<<processstring<<","<<82<<","<<"数字常量)"<<endl; onf<<"("<<processstring<<","<<82<<","<<"数字常量)"<<endl; } else if(checkdeadline(filewriter[i]))//判断是否为运算符或者界符 { processstring=checkoperator(filewriter,i); if(p[processstring]<=42&&p[processstring]>=35) { cout<<"("<<processstring<<","<<p[processstring]<<","<<"界符)"<<endl; onf<<"("<<processstring<<","<<p[processstring]<<","<<"界符)"<<endl; } else { cout<<"("<<processstring<<","<<p[processstring]<<","<<"运算符)"<<endl; onf<<"("<<processstring<<","<<p[processstring]<<","<<"运算符)"<<endl; } } else if(filewriter[i]=='_'||checkletterchar(filewriter[i]))//判断是否为标识符或者关键字 { processstring=checkkey(filewriter,i); if(p[processstring]!=0) { cout<<"("<<processstring<<","<<p[processstring]<<","<<"关键字)"<<endl; onf<<"("<<processstring<<","<<p[processstring]<<","<<"关键字)"<<endl; } else { cout<<"("<<processstring<<","<<81<<","<<"标识符)"<<endl; onf<<"("<<processstring<<","<<81<<","<<"标识符)"<<endl; } } else if(filewriter[i]=='#')//特殊字符 { processstring=""; while(i<filewriter.length()&&(filewriter[i]!='\n')) processstring+=filewriter[i],i++; cout<<"("<<processstring<<","<<43<<","<<"宏定义)"<<endl; onf<<"("<<processstring<<","<<43<<","<<"宏定义)"<<endl; i++; } else { error(); } } return 0; }