编译原理:正规式转变成DFA算法
//将正规式转变成NFA
package hjzgg.formal_ceremony_to_dfa; import java.util.ArrayList; class Edge{ public int u, v; public char key; public Edge(int u, int v, char key) { super(); this.u = u; this.v = v; this.key = key; } @Override public String toString() { return u + "->" + v + " " + key; } @Override public boolean equals(Object arg0) { Edge tmp = (Edge)arg0; return tmp.u==this.u && tmp.v==this.v && tmp.key==this.key; } @Override public int hashCode() { return u+v+key; } } class NFA{ public static final int MAX_NODE = 100; private boolean finalState[] = new boolean[MAX_NODE];//记录每一个节点是否为终态 private String formal_ceremony;//正规式字符串 private int cnt_node=1;//记录节点的个数 private Map<Integer, Integer> endNode = new TreeMap<Integer, Integer>();//每一个开始节点对应的终端节点 private ArrayList<Edge> nodeAl = new ArrayList<Edge>(); private Vector<Pair>[] g = new Vector[MAX_NODE];//NFA图 private Set<Character> st = new TreeSet<Character>();//正规式中出现的字符的集合 public NFA(String formal_ceremony) { super(); this.formal_ceremony = formal_ceremony; } private void addEdge(int u, int v, char ch){ nodeAl.add(new Edge(u, v, ch)); if(g[u] == null) g[u] = new Vector<Pair>(); g[u].add(new Pair(v, ch)); if(ch!='$') st.add(ch); } public boolean kernel_way(int fa, int ld, int rd, boolean isClosure){//fa表示区间的开始点,正规式的区间[ld, rd], isClosure表示这段区间查是否存在闭包 if(ld < 0 || rd >= formal_ceremony.length()){ System.out.println("正规式不正确---发生数组越界!"); return false; } int pre_node = fa; int inBracket = 0;//判断'|'是否在括弧内 for(int i=ld; i<=rd; ++i){ if(formal_ceremony.charAt(i)=='(') ++inBracket; else if(formal_ceremony.charAt(i)==')') --inBracket; else if(formal_ceremony.charAt(i)=='|' && 0==inBracket){ if(!kernel_way(fa, ld, i-1, isClosure)) return false; if(!kernel_way(fa, i+1, rd, isClosure)) return false; return true; } } for(int i=ld; i<=rd; ++i){ if(formal_ceremony.charAt(i)=='('){//又是一个子区间 //寻找和 该 '('相匹配的')' int cntLeftBracket = 0;//统计遍历过程中'('出现的次数,遇到')'减去1 int posRightBracket = -1;//记录相匹配的')'的位置 int posLeftBracket = i; for(int j=i+1; j<=rd; ++j){ if(formal_ceremony.charAt(j)=='(') ++cntLeftBracket; else if(formal_ceremony.charAt(j)==')'){ if(cntLeftBracket == 0){ posRightBracket = j; break; } --cntLeftBracket; } } if(posRightBracket == -1){//出错 System.out.println("正规式出错----括弧不匹配!"); return false; } int nodeFather = 0;//括弧内正则式的开始节点 if(posRightBracket+1 <= rd && formal_ceremony.charAt(posRightBracket+1)=='*'){ i = posRightBracket+1;//过滤掉"()*" addEdge(pre_node, ++cnt_node, '$');//表示这一条边为空 pre_node = cnt_node; nodeFather = cnt_node; addEdge(pre_node, ++cnt_node, '$');//表示这一条边为空 pre_node = cnt_node; //处理()*括弧内的正规式 if(!kernel_way(nodeFather, posLeftBracket+1, posRightBracket-1, true)) return false; } else { nodeFather = pre_node; if(!kernel_way(nodeFather, posLeftBracket+1, posRightBracket-1, false))//对于"(101)", 看成101 return false; i = posRightBracket; } } else {//单个字符 if(formal_ceremony.charAt(i)==')') continue; if(i+1 <= rd && formal_ceremony.charAt(i+1)=='*'){ addEdge(pre_node, ++cnt_node, '$');//表示这一条边为空 pre_node = cnt_node; addEdge(pre_node, pre_node, formal_ceremony.charAt(i)); if(i+1==rd && isClosure) addEdge(pre_node, fa, '$');//表示这一条边为空并且是连接到父亲节点 else{ if(endNode.containsKey(fa)) addEdge(pre_node, endNode.get(fa), '$'); else{ addEdge(pre_node, ++cnt_node, '$');//表示这一条边为空 if(i==rd) endNode.put(fa, cnt_node);//记录非闭包状态下 第一个节点对应的最后一个节点 } } pre_node = cnt_node; ++i;//过滤* } else { if(i==rd && isClosure){//是闭包的情况 addEdge(pre_node, fa, formal_ceremony.charAt(i)); } else{ if(endNode.containsKey(fa)) addEdge(pre_node, endNode.get(fa), formal_ceremony.charAt(i)); else{ addEdge(pre_node, ++cnt_node, formal_ceremony.charAt(i)); if(i==rd) endNode.put(fa, cnt_node);//记录非闭包状态下 第一个节点对应的最后一个节点 } } pre_node = cnt_node; } } } return true; } private void checkFinalState(){//检查哪一个节点是终态 for(int i=1; i<=cnt_node; ++i){ int cc = 0; if(g[i] == null){//表明是终态 finalState[i] = true; continue; } for(int j=0; j<g[i].size(); ++j) if(g[i].elementAt(j).v != i) ++cc; if(cc == 0)//表明是终态 finalState[i] = true; } } public boolean[] getFinalState(){ return finalState; } public Vector<Pair>[] getNFAGraphics(){ if(kernel_way(1, 0, formal_ceremony.length()-1, false)){ // for(Edge e : nodeAl)//打印NFA // System.out.println(e); checkFinalState(); return g; } return null; } public Set<Character> getCharacterSet(){ return st; } public void outputNFA(){ if(kernel_way(1, 0, formal_ceremony.length()-1, false)){ checkFinalState(); for(Edge e : nodeAl) System.out.println(e); } } } /* * 将正规式转换成NFA * */ public class ToNFA { public static void main(String[] args){ String formal_ceremony = "0*(100*)*0*"; // String formal_ceremony = "1(1010*|1(010)*1)*0"; // String formal_ceremony = "1(0|1)*101"; // String formal_ceremony = "0*1*(010)0*1*"; // String formal_ceremony = "(0|1|2)*"; // String formal_ceremony = "0|1"; // String formal_ceremony = "0|1|2|3"; // String formal_ceremony = "(0|1|6)|(2|3)|(4|5)"; // String formal_ceremony = "(0|1)*|(2|3)*"; // String formal_ceremony = "((10)|(01)*|(0|1))"; NFA nfa = new NFA(formal_ceremony); nfa.outputNFA(); } }
//将NFA转变成确定化DFA
package hjzgg.formal_ceremony_to_dfa; import java.util.ArrayList; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedList; import java.util.Queue; import java.util.Set; import java.util.Vector; class Pair { public int v; public char ch; public Pair(int v, char ch) { super(); this.v = v; this.ch = ch; } } class MyHashSet extends HashSet<Integer>{//重写 set 集合的 hashcode()和equals()方法 private int state; public void setState(int state){ this.state = state; } public int getState(){ return state; } @Override public boolean equals(Object arg0) { MyHashSet tmp = (MyHashSet)arg0; if(tmp.size() != this.size()) return false; Iterator<Integer> it = this.iterator(); while(it.hasNext()){ if(!tmp.contains(it.next())) return false; } return true; } @Override public int hashCode() { int sum = 0; Iterator<Integer> it = this.iterator(); while(it.hasNext()) sum += (((java.lang.Integer)it.next()).intValue()); return sum; } } class DefinedNFA{ private int dfaNode = 0;//defined DFA节点的个数 private boolean[] finalState = null;//表示NFA中哪一个节点是终态 private boolean[] newFinalState = new boolean[NFA.MAX_NODE] ; private Vector<Pair>[] g = null;//NFA 图 private Set<Edge>edgeSet = new HashSet<Edge>(); //标记图中的边是否被访问过 private MyHashSet st = null; //集合,表示每一个子集状态 private Queue<MyHashSet> queue = new LinkedList<MyHashSet>();//存放要执行的子集状态 private Set<MyHashSet> sst = new HashSet<MyHashSet>(); private Set<Character> characterSet = null;//正规式中的字符的集合 private ArrayList<Edge> nodeAl = new ArrayList<Edge>();//NFA边的集合 public DefinedNFA(Vector<Pair>[] g, Set<Character> characterSet, boolean[] finalState) { super(); this.g = g; this.characterSet = characterSet; this.finalState = finalState; } public Set<Character> getCharacterSet(){ return characterSet; } public int getDfaNode(){ return dfaNode; } public boolean[] getNewFinalState(){ return newFinalState; } public ArrayList<Edge> getNodeAl(){ return nodeAl; } private void dfs(int u, char ch){ if(g[u]==null) return ; int len = g[u].size(); for(int i=0; i<len; ++i){ Pair pair = g[u].elementAt(i); Edge edge = new Edge(u, pair.v, pair.ch); if(!edgeSet.contains(edge) && pair.ch==ch){ edgeSet.add(edge); st.add(pair.v); dfs(pair.v, '$'); } } } public void checkIsFinalState(Set<Integer> st, int state){ Iterator<Integer> it = st.iterator(); while(it.hasNext()){ int val = it.next(); if(finalState[val]) newFinalState[state] = true; } } private void initFirstSet(){ edgeSet.clear(); st = new MyHashSet(); st.add(1); st.setState(++dfaNode); dfs(1, '$'); checkIsFinalState(st, dfaNode); sst.add(st); queue.add(st); } private void addEdge(int u, int v, char ch){ nodeAl.add(new Edge(u, v, ch)); } public void ToStateMatrix(){ initFirstSet(); while(!queue.isEmpty()){ MyHashSet myset = queue.poll(); for(Character ch : characterSet){ st = new MyHashSet(); for(Integer i : myset){ edgeSet.clear(); dfs(i, ch); } if(st.size()>0){ if(!sst.contains(st)){ sst.add(st); queue.add(st); st.setState(++dfaNode); checkIsFinalState(st, dfaNode); } else { Iterator<MyHashSet> it = sst.iterator(); while(it.hasNext()){ MyHashSet tmp = it.next(); if(tmp.equals(st)){ st = tmp; break; } } } addEdge(myset.getState(), st.getState(), ch); } } } } public void outputDFA(){ ToStateMatrix();//有状态转换矩阵得到defined NFA for(Edge e : nodeAl) System.out.println(e); } } public class ToDefinedDFA { public static void main(String[] args) { // String formal_ceremony = "((10)|(01)*|(0|1))"; // String formal_ceremony = "(0|1|6)|(2|3)|(4|5)"; // String formal_ceremony = "1(0|1)*101"; String formal_ceremony = "0*(100*)*0*"; NFA nfa = new NFA(formal_ceremony); DefinedNFA definedDFA = new DefinedNFA(nfa.getNFAGraphics(), nfa.getCharacterSet(), nfa.getFinalState()); definedDFA.outputDFA(); } }
//将确定化DFA最小化
package hjzgg.formal_ceremony_to_dfa; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Map.Entry; import java.util.Set; class MinimumDFA{ private boolean[] newFinalState = null;//由确定化DFA得到 private ArrayList<Edge> nodeAl = null;//由确定化DFA得到 private int dfaNode;//确定化DFA节点的个数 private Set<Character> characterSet = null;//正规式中的字符的集合 private ArrayList<Set<Integer>> setList = new ArrayList<Set<Integer>>(); public MinimumDFA(boolean[] newFinalState, ArrayList<Edge> nodeAl, int dfaNode, Set<Character> characterSet) { super(); this.newFinalState = newFinalState; this.nodeAl = nodeAl; this.dfaNode = dfaNode; this.characterSet = characterSet; } private void init(){//利用分割法将集合分成终态和非终态 Set<Integer> finalStateSet = new HashSet<Integer>(); Set<Integer> NofinalStateSet = new HashSet<Integer>(); for(int i=1; i<=dfaNode; ++i) if(newFinalState[i])//终态 finalStateSet.add(i); else NofinalStateSet.add(i); setList.add(finalStateSet); setList.add(NofinalStateSet); } public void toMinimumDfa(){ init(); boolean flag = true; ArrayList<Set<Integer>> tmpSetList = new ArrayList<Set<Integer>>(); while(flag){ flag = false; hjzgg: for(int k=0; k<setList.size(); ++k){ Set<Integer> st = setList.get(k); if(st.size()<=1) continue; for(Character ch : characterSet){ Map<Integer, Integer> mp = new HashMap<Integer, Integer>(); for(int i=0; i<nodeAl.size(); ++i){//st集合(也就是map的val值)在 ch这个点对应的集合 {st}a = {...} Edge edge = nodeAl.get(i); if(edge.key == ch && st.contains(edge.u)) mp.put(edge.u, edge.v); }
for(Integer i : st)
if(!mp.containsKey(i))//表明i节点对应的是一条空边
mp.put(i, -1);
//将st集合拆分成两个不想交的集合 Set<Integer> firstSet = new HashSet<Integer>(); Set<Integer> secondSet = new HashSet<Integer>(); for(int j=0; j<setList.size(); ++j){ firstSet.clear(); secondSet.clear(); Set<Integer> tmpSt = setList.get(k); for(Entry<Integer, Integer> entry : mp.entrySet()){//返回此映射中包含的映射关系的 set 视图。返回的 set 中的每个元素都是一个 Map.Entry if(tmpSt.contains(entry.getValue())) firstSet.add(entry.getKey()); else secondSet.add(entry.getKey()); } if(firstSet.size()!=0 && secondSet.size()!=0){ flag = true;//如果发现可以拆分的集合,则继续最顶层的while循环 for(Integer i : tmpSt){//将firstSet 和 secondSet中都没有的元素添加到firstSet中 if(!firstSet.contains(i) && !secondSet.contains(i)) firstSet.add(i); } setList.remove(k); setList.add(firstSet); setList.add(secondSet); break hjzgg; } } } } } // for(int k=0; k<setList.size(); ++k)//输出最终的集合划分 // System.out.println(setList.get(k)); // System.out.println("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&"); for(int k=0; k<setList.size(); ++k){ Set<Integer> st = setList.get(k); if(st.size() > 1){//看成是一个等价的状态,选择第一个元素当作代表 int first=0; for(Integer i : st){//取得第一个元素 first = i; break; } ArrayList<Edge> tmpList = new ArrayList<Edge>(); for(int i=0; i<nodeAl.size(); ++i){//遍历所有的边,找到不是first Edge edge = nodeAl.get(i); if(st.contains(edge.u) && edge.u!=first){ nodeAl.remove(i); --i; } else if(st.contains(edge.v) && edge.v!=first){ nodeAl.remove(i); --i; tmpList.add(new Edge(edge.u, first, edge.key)); } } nodeAl.addAll(tmpList); } } } public void outputMinimumDFA(){ // for(int i=0; i<nodeAl.size(); ++i)//输出未确定化的DFA // System.out.println(nodeAl.get(i)); toMinimumDfa(); for(int i=0; i<nodeAl.size(); ++i) System.out.println(nodeAl.get(i)); } } public class ToMinimumDFA { public static void main(String[] args) { // String formal_ceremony = "1(0|1)*101"; String formal_ceremony = "0*(100*)*0*"; NFA nfa = new NFA(formal_ceremony); DefinedNFA definedDFA = new DefinedNFA(nfa.getNFAGraphics(), nfa.getCharacterSet(), nfa.getFinalState()); definedDFA.ToStateMatrix(); MinimumDFA minimumDFA = new MinimumDFA(definedDFA.getNewFinalState(), definedDFA.getNodeAl(), definedDFA.getDfaNode(), definedDFA.getCharacterSet()); minimumDFA.outputMinimumDFA(); } }
本文来自博客园,作者:hjzqyx,转载请注明原文链接:https://www.cnblogs.com/hujunzheng/p/4421132.html