词法分析器
#include <bits/stdc++.h>
using namespace std;
vector<string> key, identify, symbol;
unordered_map<string, int> key_map, identify_map, symbol_map; // value
unordered_map<string, int> letter_map; // Type
int key_offset = 1;
int symbol_offset = 12;
struct node {
int type;
string val;
};
const int numbers_type = 27;
const int identify_type = 26;
vector<node> tokens; //词法分析结果
int index = 0; //tokens的下标
void init() {
string str_key = "begin end if then while do const var call procedure odd", s;
string str_symbol = "+ - * / = # < > := ( ) , . ;";
stringstream key_in;
key_in << str_key;
while (key_in >> s) {
key.push_back(s);
key_map[s] = int(key.size()) - 1;
letter_map[s] = int(key.size()) - 1 + key_offset;
}
stringstream symbolin;
symbolin << str_symbol;
while (symbolin >> s) {
symbol.push_back(s);
symbol_map[s] = int(symbol.size()) - 1;
letter_map[s] = int(symbol.size()) - 1 + symbol_offset;
}
}
void read(vector<string> &ans) {
if (not ans.empty()) ans.clear();
string str, res, s;
while (getline(cin, str)) {
if (str.find("//") != string::npos) {
str.erase(str.find("//"));
}
res += str; res.push_back('\n');
}
stringstream ss; ss << res;
int flag = 1;
while (ss >> s) {
if (s == "(*") flag ^= 1;
if (flag) {
ans.push_back(s);
}
if (s == "*)") flag ^= 1;
}
}
// (种别,属性值)
void LexicalAnalysis(vector<string> &str, vector<node> &ans) {
if (not ans.empty()) ans.clear();
for (string s: str) {
// cout << s << " ";
if (key_map.count(s)) { //
ans.push_back({letter_map[s], "-"});
} else if (symbol_map.count(s)) {
if (int(s.size()) > 10) {
cout << "symbol's size is too long!\n";
assert(false);
}
ans.push_back({letter_map[s], "-"});
} else if (identify_map.count(s)) {
ans.push_back({identify_type, to_string(identify_map[s])});
} else {
// continue;
int ns = s.size();
for (int i = 0; i < ns; i ++ ) {
if (isdigit(s[i])) {
string t; t += s[i];
int j = i; while (j + 1 < ns and isdigit(s[j + 1])) {
++ j;
t += s[j];
}
ans.push_back({numbers_type, t});
i = j;
} else if (isalpha(s[i])) {
string t; t += s[i];
int j = i; while (j + 1 < ns and (isalpha(s[j + 1]) or isdigit(s[j + 1]))) {
++ j;
t += s[j];
}
if (key_map.count(t)) {
ans.push_back({letter_map[t], "-"});
} else {
if (int(t.size()) > 10) {
cout << "identify's size is too long!\n";
assert(false);
}
if (!identify_map.count(t)) {
identify.push_back(t);
int new_val = (int)identify.size() - 1;
identify_map[t] = new_val;
}
ans.push_back({identify_type, to_string(identify_map[t])});
}
i = j;
} else {
string t; t += s[i];
if (symbol_map.count(t)) {
ans.push_back({letter_map[t], "-"});
} else {
if (i + 1 >= (int)s.size()) {
cout << "illegal!\n";
assert(false);
}
t += s[++ i];
if (symbol_map.count(t)) {
ans.push_back({letter_map[t], "-"});
} else {
assert(false);
}
}
}
}
}
}
}
int main() {
init();
vector<string> str;
read(str);
vector<node> ans;
LexicalAnalysis(str, ans);
for (auto [type, val]: ans) {
cout << "(" << type << "," << val << ")\n";
}
return 0;
}