2016.9.30 编译原理-词法分析器
词法分析程序的功能:
对用户输入的字符串从左到右进行扫描和分解,根据词法规则识别出一个一个具有独立意义的单词符号,并产生相应单词的种别码;若是发现词法错误,则返回出错信息。
符号与种别码对照表:
用文法描述词法规则:
代码如下:
#include<stdio.h> #include<string.h> char str[300],token[10],ch; int i,x,n,typenum; char *word[6] = { "begin", "if", "then", "while", "do", "end" }; void judge(); int main() { int length; printf("请输入字符串:"); gets(str); length = strlen(str); i = 0; do { judge(); if (typenum == -1) { printf("错误!\n"); return 0; } else { printf("%d\t%s\n",typenum,token); } } while (i<length); return 0; } void judge() { for (x = 0; x < 10; x++) //初始化token数组 { token[x] = NULL; } ch = str[i]; while (ch == ' ') //忽视空格 { i++; ch = str[i]; } x = 0; i++; if (((ch >= 'a') && (ch <= 'z')) || ((ch >= 'A') && (ch <= 'Z'))) //访问到字母 { while (((ch <= 'z') && (ch >= 'a')) || ((ch <= 'Z') && (ch >= 'A')) || ((ch >= '0') && (ch <= '9'))) { token[x] = ch; x++; ch = str[i]; i++; } i--; typenum = 10; for (n = 0; n < 6; n++) //判断是否是关键字 { if (strcmp(token, word[n]) == 0) { typenum = n + 1; break; } } } else if ((ch >= '0') && (ch <= '9')) //访问到数字 { while ((ch >= '0') && (ch <= '9')) { token[x] = ch; x++; ch = str[i]; i++; } i--; typenum = 11; } else { switch (ch) { case '<': token[x] = ch; x++; ch = str[i]; i++; if (ch == '=') { typenum = 21; token[x] = ch; x++; } else if (ch == '>') { typenum = 22; token[x] = ch; x++; } else { typenum = 20; i--; } break; case '>': token[x] = ch; x++; ch = str[i]; i++; if (ch == '=') { typenum = 24; token[x] = ch; x++; } else { typenum = 23; i--; } break; case '+': typenum = 13; token[x] = ch; x++; break; case '-': typenum = 14; token[x] = ch; x++; break; case '=': typenum = 25; token[x] = ch; x++; break; case '*': typenum = 15; token[x] = ch; x++; break; case '(': typenum = 27; token[x] = ch; x++; break; case ')': typenum = 28; token[x] = ch; x++; break; case ';': typenum = 26; token[x] = ch; x++; break; case '/': typenum = 16; token[x] = ch; x++; break; case '#': typenum = 0; token[x] = ch; x++; break; case ':': token[x] = ch; x++; ch = str[i]; i++; if (ch == '=') { typenum = 18; token[x] = ch; x++; } else { typenum = 17; i--; } break; default: typenum = -1; break; } } }