第五次作业,词法分析程序设计与实践
词法分析程序(Lexical Analyzer)要求:
- 从左至右扫描构成源程序的字符流
- 识别出有词法意义的单词(Lexemes)
- 返回单词记录(单词类别,单词本身)
- 滤掉空格
- 跳过注释
- 发现词法错误
程序结构:
输入:字符流(什么输入方式,什么数据结构保存)
处理:
–遍历(什么遍历方式)
–词法规则
输出:单词流(什么输出形式)
–二元组
单词类别:
1.标识符(10)
2.无符号数(11)
3.保留字(一词一码)
4.运算符(一词一码)
5.界符(一词一码)
单词符号 |
种别码 |
单词符号 |
种别码 |
begin |
1 |
: |
17 |
if |
2 |
:= |
18 |
then |
3 |
< |
20 |
while |
4 |
<= |
21 |
do |
5 |
<> |
22 |
end |
6 |
> |
23 |
l(l|d)* |
10 |
>= |
24 |
dd* |
11 |
= |
25 |
+ |
13 |
; |
26 |
- |
14 |
( |
27 |
* |
15 |
) |
28 |
/ |
16 |
# |
0 |
代码如下:
#include <cstdio>
#include <cstring>
#include <iostream>
using namespace std;
char ch;
char arr1[100], arr2[15],arr3[15];//arr1输入,arr2输出
int syn, row, sum ; //syn为单词种别码,sum为整形常数。
int p=0,m=0,n=0;
char* words[6] = {"begin", "if", "then", "while", "do", "end"};
void sca();
int main()
{
p = 0;
row = 1;
cout << "请输入字符,以#号结束" << endl;
do
{
cin.get(ch);
arr1[p++] = ch;
} while (ch != '#');
p = 0;
do
{
sca();
switch (syn)
{
case 11:
cout << '(' << syn << "," << sum << ")" << endl;
break;
case -1:
cout << "该符号“"<<arr3<<"”无种别码" << "!" << endl;
break;
case -2:
++row;
break;
default:
cout << "(" << syn << "," << arr2 << ")" << endl;
break;
}
} while (syn != 0);
}
void sca()
{
for (n = 0; n < 8; n++)
{
arr2[n] = NULL;
}
ch = arr1[p++];
while(ch == ' ')//空指针+1
{
ch = arr1[p];
p++;
}
if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))
{
m = 0;
while((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))
{
arr2[m++] = ch;
ch = arr1[p++];
}
arr2[m++] = '\0';
--p;
syn = 10;
for (n = 0; n < 6; ++n)
{
if (strcmp(arr2, words[n]) == 0)
{
syn = n + 1;
break;
}
}
}
else if ((ch >= '0' && ch <= '9'))
{
sum = 0;
while (ch >= '0' && ch <= '9')
{
sum = sum * 10 + ch - '0';
ch = arr1[p++];
}
--p;
syn = 11;
if (sum > 32767)
{
syn = -1;
}
}
else switch (ch)
{
case '<':
m = 0;
arr2[m++] = ch;
ch = arr1[p++];
if (ch == '>')
{
syn = 21;
arr2[m++] = ch;
}
else if (ch == '=')
{
syn = 22;
arr2[m++] = ch;
}
else
{
syn = 23;
--p;
}
break;
case '>':
m = 0;
arr2[m++] = ch;
ch = arr1[p++];
if (ch == '=')
{
syn = 24;
arr2[m++] = ch;
}
else
{
syn = 20;
--p;
}
break;
case ':':
m = 0;
arr2[m++] = ch;
ch = arr1[p++];
if (ch == '=')
{
syn = 18;
arr2[m++] = ch;
}
else
{
syn = 17;
--p;
}
break;
case '*':
syn = 13;
arr2[0] = ch;
break;
case '/':
syn = 14;
arr2[0] = ch;
break;
case '+':
syn = 15;
arr2[0] = ch;
break;
case '-':
syn = 16;
arr2[0] = ch;
break;
case '=':
syn = 25;
arr2[0] = ch;
break;
case ';':
syn = 26;
arr2[0] = ch;
break;
case '(':
syn = 27;
arr2[0] = ch;
break;
case ')':
syn = 28;
arr2[0] = ch;
break;
case '#':
syn = 0;
arr2[0] = ch;
break;
case '\n':
syn = -2;
break;
default:
syn = -1;
arr3[0]=ch;
break;
}
} } while (syn != 0);
}
运行截图如下: