词法分析程序(Lexical Analyzer)要求:
- 从左至右扫描构成源程序的字符流
- 识别出有词法意义的单词(Lexemes)
- 返回单词记录(单词类别,单词本身)
- 滤掉空格
- 跳过注释
- 发现词法错误
程序结构:
输入:文件读取
处理:
–遍历(什么遍历方式)
–词法规则
输出:单词流(什么输出形式)
–二元组
单词类别:
1.标识符(10)
2.无符号数(11)
3.保留字(一词一码)
4.运算符(一词一码)
5.界符(一词一码)
单词符号 |
种别码 |
单词符号 |
种别码 |
单词符号 |
种别码 |
char |
12 |
: |
17 |
{ |
42 |
int |
13 |
/\ |
37 |
} |
43 |
if |
14 |
< |
30 |
[ |
44 |
else |
15 |
<= |
28 |
] |
43 |
return |
16 |
<> |
29 |
" |
47 |
END |
3 |
> |
32 |
, |
48 |
l(l|d)* |
25 |
>= |
31 |
' |
49 |
dd* |
26 |
= |
27 |
& |
50 |
+ |
33 |
; |
39 |
&& |
51 |
- |
34 |
( |
40 |
\\ |
52 |
* |
35 |
) |
41 |
||
/ |
36 |
: |
38 |
源代码如下:
#include<stdio.h> #include<conio.h> #include<math.h> #include<string.h> #include<stdlib.h> int i, row = 0, line = 0; char test[1000]; //test文件中的字符 int number[100]; //常数表 char mark[100][5]; //标识符表 //词法分析 int wordanalysis() { //标识符和保留字 if ((test[i] >= 'A'&&test[i] <= 'Z')||(test[i]>='a'&&test[i]<='z')) { char word[10]; //保留字表 char pro[100][100] = { "PROGRAM", "BEGIN", "END", "VAR", "INTEGER", "WHILE", "IF", "THEN", "ELSE", "DO", "PROCEDURE" ,"char", "int","if","else","var" ,"return","break", "do","while","for","double","float","short"}; int n = 0; word[n++] = test[i++]; while ((test[i] >= 'A'&&test[i] <= 'Z') || (test[i] >= '0' && test[i] <= '9')||(test[i]>='a'&&test[i]<='z')) { word[n++] = test[i++]; } word[n] = '\0'; i--; //判断该标识符是否为保留字 for (n = 0; n < 100; n++) { if (strcmp(word, pro[n]) == 0) { printf(">> %s\t(%d,-) 保留字\n", pro[n], n + 1); return 3; } } //判断该标识符是否在标识符表中 int m = 0; if (line != 0) { int q = 0; while (q<line) { if (strcmp(word, mark[q++]) == 0) { printf(">> %s\t(25,%d) 标识符\n", word, q); return 3; } } } //将该标识符保存到标识符表中 strcpy(mark[line], word); printf(">> %s\t(25, %d) 标识符\n", word, line + 1); line++; return 3; } //数字 else if (test[i] >= '0' && test[i] <= '9') { char x[100]; int n = 0; x[n++] = test[i++]; while (test[i] >= '0' && test[i] <= '9') { x[n++] = test[i++]; } x[n] = '\0'; i--; int num = atoi(x); //将字符串转换成int型 //判断该常数是否存在于常数表中 if (row != 0) { for(int y=0;y<row;y++) { if(number[y]==num) { printf(">> %d\t(26,%d) 数字\n", num, y + 1); return 3; } } } //将该常数保存到标识符表中 number[row]=num; int line = row; printf(">> %d\t(26,%d) 数字\n", num, line + 1); row++; return 3; } //各种符号 else switch (test[i]) { case ' ': case '\n': return -1; case '#': return 0; case '=':printf(">> =\t(27,-)\n"); return 3; case '<': i++; if (test[i] == '=') { printf(">> <= \t(28,-)\n"); return 3; } else if (test[i] == '>') { printf(">> <>\t(29,-)\n"); return 3; } else { i--; printf(">> <\t(30,-)\n"); return 3; } case '>': i++; if (test[i] == '=') { printf(">> >=\t(31,-)\n"); return 3; } else { i--; printf(">> >\t(32,-)\n"); return 3; } case '+': printf(">> +\t(33,-)\n"); return 3; case '-': printf(">> -\t(34,-)\n"); return 3; case '*': printf(">> *\t(35,-)\n"); return 3; case '/': i++; if(test[i]!='/'){ i--; printf(">> /\t(36,-)\n"); return 3; } else{ while(1){ if(test[i++]=='\n') return -1; } printf(">> //\t(37,-)\n");return 3; } case ':': printf(">> :\t(38,-)\n"); return 3; case ';': printf(">> ;\t(39,-)\n"); return 3; case '(': printf(">> (\t(40,-)\n"); return 3; case ')': printf(">> )\t(41,-)\n"); return 3; case '{': printf(">> {\t(42,-)\n"); return 3; case '}': printf(">> }\t(43,-)\n"); return 3; case '[': printf(">> [\t(44,-)\n"); return 3; case ']': printf(">> ]\t(45,-)\n"); return 3; case '|': printf(">> |\t(46,-)\n"); return 3; case '"': printf(">> \"\t(47,-)\n"); return 3; case ',': printf(">> ,\t(48,-)\n"); return 3; case '\'': printf(">> '\t(49,-)\n"); return 3;//单引号 case '&': i++; if(test[i]!='&'){ i--; printf(">> &\t(50,-)\n"); return 3; } else{ printf(">> &&\t(51,-)\n");return 3; } case '\\': printf(">> \\\t(52,-)\n"); return 3; } } int main() { int c = 0; int m; i = 0; FILE *fp; fp=fopen("test.txt","r"); if (fp == NULL) { printf("can't open file!\n"); exit(0); } while (!feof(fp)) { test[c++] = fgetc(fp); } test[c] = '#'; do { m = wordanalysis(); switch (m) { case -1:i++; break; case 0: i++; break; case 3: i++; break; } } while (m != 0); return 0; }
test.txt文件内容:
int i, row = 0, line = 0; char test[1000]; //test文件中的字符 int number[100]; //常数表 char mark[100][5]; //标识符表 //词法分析 int wordanalysis() { //标识符和保留字 if ((test[i] >= 'A'&&test[i] <= 'Z')||(test[i]>='a'&&test[i]<='z')) { char word[10]; //保留字表