词法分析器
单词状态转换图
算法描述
0、单词以字母开头(保留字、标识符),通过查保留字表可以确定是哪种
1、单词以数字开头进行判断是小数、正整数、科学计数
2、遇到运算符、界符直接存到相关表
3、遇到>得判断后面的符号是不是和>是一个整体(>=),如果是将整体存到界符运算符表中,如果不是就只将>存到表中。(<、&、|类似)
4、上图中其它符号会作为一个新单词的开始继续扫描
5、遇到空格会直接跳过寻找下一个单词。
代码实现
import java.io.*;
import java.util.LinkedHashMap;
import java.util.Map;
public class Word1 {
public static String [] retainWords = new String[]{//c语言的32个保留字
"auto", "break", "case", "char", "const", "continue",
"default", "do", "double", "else", "enum", "extern",
"float", "for", "goto", "if", "int", "long",
"register", "return", "short", "signed", "sizeof", "static",
"struct", "switch", "typedef", "union", "unsigned", "void",
"volatile", "while"
};
public static String [] operatorWords = new String []{//界符
"+", "-", "*", "/", "<", "<=", ">", ">=", "=", "==",
"!=", ";", "(", ")", "^", ",", "\"", "\'", "#", "&",
"&&", "|", "||", "%", "~", "<<", ">>", "[", "]", "{",
"}", "\\", ".", ":", "!"};
public String headFile="";
public String define="";
public Map<String,Integer> remainWordMap = new LinkedHashMap<String, Integer>();//保留字
public Map<String,Integer> identifierMap = new LinkedHashMap<String, Integer>();//标识符
public Map<String,String> numMap = new LinkedHashMap<String, String>();//整数
public Map<String,Integer> operatorMap = new LinkedHashMap<String, Integer>();//界符
public String strToken = "";//存放构成单词符号的字符串
//判断是否是字母
public boolean isLetter(char ch){
return Character.isLetter(ch);
}
//判断是否是数字
public boolean isDigit(char ch){
return Character.isDigit(ch);
}
//判断是否是空格
public boolean isBC(char ch){
return " ".equals(ch+"");
}
//连接字符
public void concat(char ch){
strToken += ch+"";
}
//清清除strToken
public void clearStrToken(){
strToken = "";
}
//判断是否是保留字(返回保留字对应的code)
public int reserve(){
for(int i = 0;i < retainWords.length;i++){
if(strToken.equals(retainWords[i])){
return i+1;//是保留字
}
}
return -1;//标识符
}
public String getHeadFile() {
return headFile;
}
public String getDefine() {
return define;
}
//返回界符对应的code
public int isOperator(){
for (int i = 0; i <operatorWords.length ; i++) {
if(strToken.equals(operatorWords[i])){
return 33+i;
}
}return -1;
}
public String filter() throws IOException {//过滤器,过滤注释,去掉无用换行符
BufferedReader br;
String all = "";
int ch;
br = new BufferedReader(new FileReader("d:\\test.txt"));
while((ch = br.read()) != -1){
all += (char)ch + "";
}
//头文件和简单宏定义的处理
String []File = all.split("\n");
int k = 0;
while(File[k].contains("include")){
headFile += File[k].replaceAll("#( |\t|'')*include( |\t|'')*","")+"\n";
k++;
}
while(File[k].contains("define")){
define += File[k].replaceAll("#( |\t|'')*define( |\t|'')*","")+"\n";
k++;
}
int len = 0;
for (int i = 0; i < k; i++) {
len += File[i].length();
}
all = new StringBuffer(all).replace(0,len+k,"").toString();//删去宏定义和头文件部分
for (int i = 0; i <all.length()-1 ; i++) {
if(all.charAt(i)==all.charAt(i+1)&&all.charAt(i)=='/'){
for(int j=i+2;j<all.length();j++){
if(all.charAt(j)=='\n'){
all=all.replaceAll(all.substring(i,j),"");
break;
}
}
}
else if(all.charAt(i)=='/'&&all.charAt(i+1)=='*'){
for (int j = i+2; j <all.length()-1 ; j++) {
if(all.charAt(j)=='*'&&all.charAt(j+1)=='/'){
all = new StringBuffer(all).replace(i,j+1+1,"").toString();
break;
}
}
}
}
all = all.replaceAll("\r|\n|\t", "");//去除无用字符
all = all.trim();//去除前后无用空格
return all;//返回处理过的源程序
}
//扫描程序
//i扫描完一个单词后就会立刻单词结尾的下一个字符
public void scanner(String all) throws IOException {
int i=0;
while(i<all.length()) {
if(isLetter(all.charAt(i))){//开头是字母的情况
concat(all.charAt(i));
i++;
while(isLetter(all.charAt(i))||isDigit(all.charAt(i))){
concat(all.charAt(i));
i++;
}
if(reserve()!=-1){//保留字
remainWordMap.put(strToken,reserve());
}else{//标识符
identifierMap.put(strToken,99);
}
clearStrToken();
}
if(isDigit(all.charAt(i))){//开头是数字的情况
concat(all.charAt(i));
i++;
while(isDigit(all.charAt(i))){
concat(all.charAt(i));
i++;
}
if(all.charAt(i)=='.'){//浮点数判断
concat('.');
i++;
while(isDigit(all.charAt(i))){
concat(all.charAt(i));
i++;
}
}
if(all.charAt(i)=='e'||all.charAt(i)=='E'){//科学计数的判断
concat(all.charAt(i));
if(all.charAt(i+1)=='+'||all.charAt(i+1)=='-'){//类似于8e+5,8.4e-5
concat(all.charAt(i+1));
i += 2;
}else{//类似于8e9,8.12e9
i++;
}
while(isDigit(all.charAt(i))){
concat(all.charAt(i));
i++;
}
}
numMap.put(strToken,"100");
clearStrToken();
}
if (all.charAt(i) == '+' || all.charAt(i) == '-' || all.charAt(i) == '*' || all.charAt(i) == '/'
|| all.charAt(i) == ';' || all.charAt(i) == '(' || all.charAt(i) == ')' || all.charAt(i) == '^'
|| all.charAt(i) == ',' || all.charAt(i) == '~' || all.charAt(i) == '#' || all.charAt(i) == '%'
|| all.charAt(i) == '[' || all.charAt(i) == ']' || all.charAt(i) == '{' || all.charAt(i) == '}'){
concat(all.charAt(i));
operatorMap.put(strToken,isOperator());
clearStrToken();
if(i==all.length()-1){//因为结束是},在这里必须判断,否则数组越界(i++)
break;
}
i++;
}
if(all.charAt(i)=='='){//=和==
if(all.charAt(i+1)=='='){
strToken += "==";
operatorMap.put(strToken,isOperator());
i += 2;
}else{
concat('=');
operatorMap.put(strToken,isOperator());
i++;
}
clearStrToken();
}
if(all.charAt(i)=='<'){//<和<=和<<
if(all.charAt(i+1)=='='){
strToken += "<=";
operatorMap.put(strToken,isOperator());
i += 2;
}else if(all.charAt(i+1)=='<'){
strToken += "<<";
operatorMap.put(strToken,isOperator());
i += 2;
}else{
concat('<');
operatorMap.put(strToken,isOperator());
i++;
}
clearStrToken();
}
if(all.charAt(i)=='>'){//> 和>=和 >>
if(all.charAt(i+1)=='='){
strToken += ">=";
operatorMap.put(strToken,isOperator());
i += 2;
}else if(all.charAt(i+1)=='>'){
strToken += ">>";
operatorMap.put(strToken,isOperator());
i += 2;
} else{
concat('>');
operatorMap.put(strToken,isOperator());
i++;
}
clearStrToken();
}
if(all.charAt(i)=='!'){//!和!=
if(all.charAt(i+1)=='='){
strToken += "!=";
operatorMap.put(strToken,isOperator());
i += 2;
}else{
concat('!');
operatorMap.put(strToken,isOperator());
i++;
}
clearStrToken();
}
if(all.charAt(i)=='&'){//&和&&
if(all.charAt(i+1)=='&'){
strToken += "&&";
operatorMap.put(strToken,isOperator());
i += 2;
}else{
concat('&');
operatorMap.put(strToken,isOperator());
i++;
}
clearStrToken();
}
if(isBC(all.charAt(i))){//判断是否是空格
i++;
continue;
}
}
}
public static void main(String[] args) throws IOException {
Word1 w = new Word1();
System.out.println(w.filter());
System.out.println("包含头文件如下:");
System.out.println(w.getHeadFile());
System.out.println("包含宏定义如下:");
System.out.println(w.getDefine());
System.out.println("过滤后字符串长度:"+w.filter().length());
w.scanner(w.filter());
System.out.println("保留字如下:");
BufferedWriter out = new BufferedWriter(new FileWriter("d:\\remainWord.txt"));
//保留字
for(Map.Entry<String, Integer> mEntry:w.remainWordMap.entrySet()){
String key=mEntry.getKey();
Integer value =mEntry.getValue();
out.write("("+key+","+value+")");
out.newLine();
System.out.println("("+key+","+value+")");
}out.close();
out = new BufferedWriter(new FileWriter("d:\\operatorWord.txt"));
//界符
System.out.println("界符和运算符如下:");
for(Map.Entry<String, Integer> mEntry:w.operatorMap.entrySet()){
String key=mEntry.getKey();
Integer value =mEntry.getValue();
out.write("("+key+","+value+")");
out.newLine();
System.out.println("("+key+","+value+")");
}out.close();
out = new BufferedWriter(new FileWriter("d:\\numWord.txt"));
System.out.println("数字如下:");
//数字
for(Map.Entry<String, String> mEntry:w.numMap.entrySet()){
String key=mEntry.getKey();
String value =mEntry.getValue();
out.write("("+key+","+value+")");
out.newLine();
System.out.println("("+key+","+value+")");
}out.close();
out = new BufferedWriter(new FileWriter("d:\\identifierWord.txt"));
System.out.println("标识符如下:");
//标识符
for(Map.Entry<String, Integer> mEntry:w.identifierMap.entrySet()){
String key=mEntry.getKey();
Integer value =mEntry.getValue();
out.write("("+key+","+value+")");
out.newLine();
System.out.println("("+key+","+value+")");
}out.close();
}
}
不一样的烟火
【推荐】还在用 ECharts 开发大屏?试试这款永久免费的开源 BI 工具!
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 从二进制到误差:逐行拆解C语言浮点运算中的4008175468544之谜
· .NET制作智能桌面机器人:结合BotSharp智能体框架开发语音交互
· 软件产品开发中常见的10个问题及处理方法
· .NET 原生驾驭 AI 新基建实战系列:向量数据库的应用与畅想
· 从问题排查到源码分析:ActiveMQ消费端频繁日志刷屏的秘密
· Windows桌面应用自动更新解决方案SharpUpdater5发布
· 我的家庭实验室服务器集群硬件清单
· C# 13 中的新增功能实操
· Supergateway:MCP服务器的远程调试与集成工具
· Vue3封装支持Base64导出的电子签名组件