《编译原理》实验四:自下而上的语法分析(SLR分析法)
本实验采用SLR分析法,对PL/0语言的算术运算进行语法分析。
本程序由我个人独立完成,代码为C++98,因此可能较丑陋,且不能保证完全正确,还请见谅 ( ̄□ ̄;)
一. 设计思想
1. 文法
因实验二、三中的文法均不是LR(0)文法,所以本次实验采用了实验三中的文法进行SLR分析。
(1)EBNF
<表达式> ::= [+|-]<项>{<加法运算符> <项>}
<项> ::= <因子>{<乘法运算符> <因子>}
<因子> ::= <标识符>|<无符号整数>| ‘(’<表达式>‘)’
<加法运算符> ::= +|-
<乘法运算符> ::= *|/
(2)对EBNF中的各对象简称
E:表达式
T:项
F:因子
b:标识符
z:无符号整数
(3)将文法改写成常规的产生式形式
E -> T|E+T|E-T
T -> F|T*F|T/F
F -> (E)|b|z
(4)拓展文法
(0) S -> E
(1) E -> T
(2) E -> E+T
(3) E -> E-T
(4) T -> F
(5) T -> T*F
(6) T -> T/F
(7) F -> (E)
(8) F -> b
(9) F -> z
2. 项目集规范族
其中,I1、I2、I12、I13中存在“移进”-“归约”冲突,所以该文法不是LR(0)文法。
对于I1,Follow(S) = {#} ∩ {+, -} = Ø;
对于I2,Follow(E) = {#, +, -, )} ∩ {*, /} = Ø;
对于I12,Follow(E) = {#, +, -, )} ∩ {*, /} = Ø;
对于I13,Follow(E) = {#, +, -, )} ∩ {*, /} = Ø;
所以,该文法是SLR文法。
3. SLR分析表
Follow(T) = {#, +, -, ), *, /};
Follow(F) = {#, +, -, ), *, /};
状态 | ACTION | GOTO | ||||||||||
+ | - | * | / | ( | ) | b | z | # | E | T | F | |
0 | S4 | S5 | S6 | 1 | 2 | 3 | ||||||
1 | S7 | S8 | acc | |||||||||
2 | r1 | r1 | S9 | S10 | r1 | r1 | ||||||
3 | r4 | r4 | r4 | r4 | r4 | r4 | ||||||
4 | S4 | S5 | S6 | 11 | 2 | 3 | ||||||
5 | r8 | r8 | r8 | r8 | r8 | r8 | ||||||
6 | r9 | r9 | r9 | r9 | r9 | r9 | ||||||
7 | S4 | S5 | S6 | 12 | 3 | |||||||
8 | S4 | S5 | S6 | 13 | 3 | |||||||
9 | S4 | S5 | S6 | 14 | ||||||||
10 | S4 | S5 | S6 | 15 | ||||||||
11 | S7 | S8 | S16 | |||||||||
12 | r2 | r2 | S9 | S10 | r2 | r2 | ||||||
13 | r3 | r3 | S9 | S10 | r3 | r3 | ||||||
14 | r5 | r5 | r5 | r5 | r5 | r5 | ||||||
15 | r6 | r6 | r6 | r6 | r6 | r6 | ||||||
16 | r7 | r7 | r7 | r7 | r7 | r7 |
二. 算法流程
三. 源程序
1 #include <iostream>
2 #include <string>
3 #include <vector>
4 #include <map>
5 #include <stack>
6 using namespace std;
7
8 vector<vector<string> > input; // 输入(逗号两边)
9 const int nt_num=10; // 非终结符数目
10 const int t_num=10; // 终结符数目
11 const int st_num=20; // 状态数
12 vector<char> nonterminal,terminal; // 非终结符数组 终结符数组
13 map<char,int> ntchar2idx; // 非终结符转下标
14 map<char,int> tchar2idx; // 终结符转下标
15 // SLR分析表——ACTION表:移进为正数,归约为负数,acc为100,其它为0,以省略s、r、acc符号
16 vector<vector<int> > table_action(st_num,vector<int>(nt_num,0));
17 vector<vector<int> > table_goto(st_num,vector<int>(t_num,0)); // SLR分析表——GOTO表
18 vector<string> exgrammer; // 拓展文法
19 stack<int> state; // 状态栈
20 stack<char> symbol; // 符号栈
21
22 void getNonterminal() // 非终结符数组
23 {
24 // TODO
25 nonterminal.push_back('E');
26 nonterminal.push_back('T');
27 nonterminal.push_back('F');
28 }
29
30 void getTerminal() // 终结符数组
31 {
32 // TODO
33 terminal.push_back('+');
34 terminal.push_back('-');
35 terminal.push_back('*');
36 terminal.push_back('/');
37 terminal.push_back('(');
38 terminal.push_back(')');
39 terminal.push_back('b');
40 terminal.push_back('z');
41 terminal.push_back('#');
42 }
43
44 bool isTerminal(const char& ch) // 判断是否为终结符
45 {
46 for (int i=0;i<(int)terminal.size();++i)
47 {
48 if (ch==terminal[i])
49 {
50 return true;
51 }
52 }
53 return false;
54 }
55
56 void char2idx() // (非)终结符转下标
57 {
58 char ch;
59 for (int i=0;i<(int)nonterminal.size();++i)
60 {
61 ch=nonterminal[i];
62 ntchar2idx[ch]=i;
63 }
64 for (int i=0;i<(int)terminal.size();++i)
65 {
66 ch=terminal[i];
67 tchar2idx[ch]=i;
68 }
69 }
70
71 void createTable() // 创建SLR分析表
72 {
73 // TODO
74 // ACTION表
75 int temp_table_action[st_num][nt_num]={
76 {0,0,0,0,4,0,5,6,0},
77 {7,8,0,0,0,0,0,0,100},
78 {-1,-1,9,10,0,-1,0,0,-1},
79 {-4,-4,-4,-4,0,-4,0,0,-4},
80 {0,0,0,0,4,0,5,6,0},
81 {-8,-8,-8,-8,0,-8,0,0,-8},
82 {-9,-9,-9,-9,0,-9,0,0,-9},
83 {0,0,0,0,4,0,5,6,0},
84 {0,0,0,0,4,0,5,6,0},
85 {0,0,0,0,4,0,5,6,0},
86 {0,0,0,0,4,0,5,6,0},
87 {7,8,0,0,0,16,0,0,0},
88 {-2,-2,9,10,0,-2,0,0,-2},
89 {-3,-3,9,10,0,-3,0,0,-3},
90 {-5,-5,-5,-5,0,-5,0,0,-5},
91 {-6,-6,-6,-6,0,-6,0,0,-6},
92 {-7,-7,-7,-7,0,-7,0,0,-7}
93 };
94 for (int i=0;i<st_num;++i)
95 {
96 for (int j=0;j<nt_num;++j)
97 {
98 table_action[i][j]=temp_table_action[i][j];
99 }
100 }
101
102 // GOTO表
103 int temp_table_goto[st_num][t_num]={
104 {1,2,3},
105 {0,0,0},
106 {0,0,0},
107 {0,0,0},
108 {11,2,3},
109 {0,0,0},
110 {0,0,0},
111 {0,12,3},
112 {0,13,3},
113 {0,0,14},
114 {0,0,15},
115 {0,0,0},
116 {0,0,0},
117 {0,0,0},
118 {0,0,0},
119 {0,0,0},
120 {0,0,0}
121 };
122 for (int i=0;i<st_num;++i)
123 {
124 for (int j=0;j<t_num;++j)
125 {
126 table_goto[i][j]=temp_table_goto[i][j];
127 }
128 }
129 }
130
131 void getExpandedGrammer() // 拓展文法
132 {
133 exgrammer.push_back("S->E");
134 exgrammer.push_back("E->T");
135 exgrammer.push_back("E->E+T");
136 exgrammer.push_back("E->E-T");
137 exgrammer.push_back("T->F");
138 exgrammer.push_back("T->T*F");
139 exgrammer.push_back("T->T/F");
140 exgrammer.push_back("F->(E)");
141 exgrammer.push_back("F->b");
142 exgrammer.push_back("F->z");
143 }
144
145 bool ERROR(int pos)
146 {
147 //cout<<"wrong_pos = "<<pos<<endl;
148 return false;
149 }
150
151 bool SLRAnalyse() // SLR分析程序
152 {
153 state.push(0);
154 symbol.push('#');
155 int i=0;
156 string t0;
157 char t1;
158 while (i<(int)input.size())
159 { //cout<<"state.size="<<state.size()<<" symbol.size="<<symbol.size()<<" i="<<i<<endl;
160 t0=input[i][0];
161 if (t0=="ident")
162 {
163 t1='b';
164 }
165 else if (t0=="number")
166 {
167 t1='z';
168 }
169 else
170 {
171 t1=input[i][1][0];
172 }
173
174 int ret=table_action[state.top()][tchar2idx[t1]];
175 //cout<<"ret="<<ret<<" "<<state.top()<<" "<<t1<<endl;
176 if (ret==100) // acc
177 {
178 return true;
179 }
180 else if (ret>0) //移进
181 {
182 //shift()
183 state.push(ret);
184 symbol.push(t1);
185 ++i;
186 }
187 else if (ret<0) // 归约
188 {
189 //reduce();
190 string g=exgrammer[-ret];
191 int len=g.size()-3; // 需要归约的长度
192
193 while (len--)
194 {
195 state.pop();
196 symbol.pop();
197 }
198 state.push(table_goto[state.top()][ntchar2idx[g[0]]]);
199 symbol.push(g[0]);
200 }
201 else
202 {
203 return ERROR(0);
204 }
205 }
206 return ERROR(100); // 未到达acc
207 }
208
209 int main()
210 {
211 getNonterminal();
212 getTerminal();
213 char2idx();
214 createTable();
215 getExpandedGrammer();
216
217 string str;
218 vector<string> vec;
219 while (cin>>str)
220 {
221 int pos=str.find(',');
222 vec.clear();
223 vec.push_back(str.substr(1,pos-1));
224 vec.push_back(str.substr(pos+1,str.size()-pos-2));
225 input.push_back(vec);
226 }
227 vec.clear();
228 vec.push_back("#");vec.push_back("#");
229 input.push_back(vec);
230
231 if (SLRAnalyse())
232 {
233 cout<<"Yes,it is correct."<<endl;
234 }
235 else
236 {
237 cout<<"No,it is wrong."<<endl;
238 }
239
240 return 0;
241 }
其中,考虑到建表复杂,SLR分析表为手动填入。
四. 调试数据
1.样例输入:
1 (lparen,()
2 (ident,a)
3 (plus,+)
4 (number,15)
5 (rparen,))
6 (times,*)
7 (ident,b)
样例输出:
Yes,it is correct.
图片展示:
2.样例输入:
1 (number,0)
2 (plus,+)
3 (number,10)
4 (times,*)
5 (ident,b)
6 (minus,-)
7 (lparen,()
8 (ident,z)
9 (slash,/)
10 (number,3)
11 (rparen,))
样例输出:
Yes,it is correct.
图片展示:
3.样例输入:
1 (lparen,()
2 (lparen,()
3 (ident,a)
4 (plus,+)
5 (number,3)
6 (rparen,))
7 (times,*)
8 (lparen,()
9 (number,0)
10 (minus,-)
11 (ident,b)
12 (rparen,))
13 (minus,-)
14 (ident,c)
15 (slash,/)
16 (number,0)
17 (plus,+)
18 (lparen,()
19 (ident,a)
20 (times,*)
21 (ident,d)
22 (slash,/)
23 (ident,e)
24 (plus,+)
25 (ident,f)
26 (rparen,))
27 (rparen,))
样例输出:
Yes,it is correct.
图片展示:
4.样例输入:
1 (lparen,()
2 (ident,a)
3 (plus,+)
4 (number,15)
5 (rparen,))
6 (times,*)
样例输出:
No,it is wrong.
图片展示:
5.样例输入:
1 (ident,a)
2 (plus,+)
3 (number,15)
4 (rparen,))
5 (times,*)
6 (ident,b)
样例输出:
No,it is wrong.
图片展示:
6.样例输入:
1 (number,0)
2 (plus,+)
3 (number,10)
4 (times,*)
5 (ident,b)
6 (lparen,()
7 (ident,z)
8 (slash,/)
9 (number,3)
10 (rparen,))
样例输出:
No,it is wrong.
图片展示:
注:文法只包含简单的+、-、*、/运算等,而像实验1中的const和:=等符号均未引入。
五. 实验调试情况及体会
略