Candidate-Elimination:候选消除算法_Java实现

算法描述:

Initialize G to the set of most-general hypotheses in H  
Initialize S to the set of most-specific hypotheses in H  
For each training example, d, do:  
If dis a positive example then:  
Remove from G any hypotheses that do not match d  
For each hypothesis sin S that does not match d  
Remove s from S  
Add to S all minimal generalizations, h, of s such that:  
1) h matches d  
2) some member of G is more general than h  
Remove from S any h that is more general than another hypothesis in S  
If dis a negative example then:  
Remove from S any hypotheses that match d  
For each hypothesis gin G that matches d  
Remove g from G  
Add to G all minimal specializations, h, of g such that:  
1) h does not match d  
2) some member of Sis more specific than h  
Remove from G any h that is more specific than another hypothesis in G  

算法说明:

基本的算法思想是,泛化边界初始化为全?,特化边界特化为全空集(@),

1、若遇到正实例,首先从G中删除不包含该实例的概念,然后对S删除与实例不相符的概念,同时做最小泛化,使其包含该实例

2、若遇到负实例,首先从S中删除包含了该实例的概念,然后对G删除包含了该实例的概念,同时做最小特化,注意最小特化集是与当前S一一枚举出可能的特化概念,删除那些符合该实例的概念

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.Vector;

public class CandidateElimination {

    /**
     * @param args
     */
    
    private static Vector <Vector <String>> state = new Vector<Vector<String>>(); //实例集
    private static Vector <Vector <String>> S = new Vector<Vector<String>>(); //specific boundary
    private static Vector <Vector <String>> G = new Vector<Vector<String>>(); //general boundary
    private static Vector <String> item ;
    private static int maxLen ;
    private static String white = "@"; // @ stand for empty set
    private static String all = "?";
    
    public static void main(String[] args) throws IOException {
        
        input(7);
        solve();
        output();
    }
    
    private static void output() {
        int i,j;
        System.out.println("The specific boundary is :");
        for(i = 0; i < S.size(); i++){
            for(j = 1; j < maxLen-1; j++){
                System.out.print(S.get(i).get(j) + " ");
            }
            System.out.println();
        }
        System.out.println("The general boundary is :");
        for(i = 0; i < G.size(); i++){
            for(j = 1; j < maxLen-1; j++){
                System.out.print(G.get(i).get(j) + " ");
            }
            System.out.println();
        }
    }

    private static void solve() {
        int i,j,k;
        for(i = 0; i < state.size(); i++){
            //正实例的情况,首先从G中删除不包含该实例的概念,  
            //然后对S删除与实例不相符的概念,同时做最小泛化,使其包含该实例  
            if(state.get(i).get(maxLen-1).equals("Yes")){
                for(j = 0; j < G.size(); j++){
                    if(!checkInclude(G.get(j),state.get(i))){
                        G.remove(j);
                    }
                }
                for(j = 0; j < S.size(); j++){
                    doMinGeneral(j, state.get(i));
                }
            } else {
             //负实例的情况,首先从S中删除包含了该实例的概念,  
             //然后对G做最小特化,与当前S一一枚举出可能的特化概念,删除那些符合该实例的概念 
                for(j = 0; j < S.size(); j++){
                    if(checkInclude(S.get(j), state.get(i))){
                        S.remove(j);
                    }
                }
                int orginGSize = G.size();
                for(j = 0; j < orginGSize; j++){
                    doMinSpecific(j, state.get(i), S.get(0));
                }
                for(k = 0; k < orginGSize; k++){
                    G.remove(k);
                }
            }
        }
    }

           //判定概念h是否包含实例d
    private static boolean checkInclude(Vector <String> h, Vector <String> d){
        boolean res = true;
        for(int i = 1; i < maxLen - 1; i++){
            if(h.get(i).equals(all) || h.get(i).equals(d.get(i))){
                continue;
            } else if(h.get(i).equals(white) || !h.get(i).equals(d.get(i))){
                res = false;
                break;
            }
        }
        return res;
    }

          //对概念S[offset]做最小泛化,使得S[offset]包含d  
    private static void doMinGeneral(int offset, Vector<String> d) {
        for(int i = 1; i < maxLen - 1; i++){
            if(S.get(offset).get(i).equals(white)){
                S.get(offset).set(i, d.get(i));
            } else if(!S.get(offset).get(i).equals(d.get(i))){
                S.get(offset).set(i, all);
            } else{
                continue;
            }
        }
    }
    
        //对概念G[offset]做最小特化,使得G[offset]包不含d,注意最小特化来自于和S的组合 
    private static void doMinSpecific(int offset, Vector<String> d,
            Vector<String> s) {
        for(int i = 1; i < maxLen - 1; i++){
            if(!G.get(offset).get(i).equals(s.get(i)) &&
                    G.get(offset).get(i).equals(all)){
                if(!s.get(i).equals(d.get(i))){
                    Vector <String> temp = new Vector<String>(maxLen);
                    for(int j = 0; j < maxLen; j++){
                        temp.add(G.get(offset).get(j));
                    }
                    temp.set(i, s.get(i));
                    G.add(temp);
                }
            }
            if(G.get(offset).equals(s.get(i)) && G.get(offset).equals(all)){
                System.err.println("doMinSpecific: error in data");
            }
        }
    }
    

    private static void input(int maxlen) throws IOException {
        maxLen = maxlen;
        FileReader fr = new FileReader("e:\\workspace\\train.txt");
        BufferedReader bf = new BufferedReader(fr);
        String str = null;
        String[] strArr = null;
        while((str = bf.readLine()) != null){
            item = new Vector<String>();
            strArr = str.split(" ");
            for(int i = 0; i < maxLen; i++){
                item.add(i,strArr[i]);
            }
            state.add(item);
        }
        item = new Vector<String>();
        for(int i = 0 ; i < maxLen; i++){
            item.add(white);
        }
        S.add(item);
        item = new Vector<String>();
        for(int i = 0 ; i < maxLen; i++){
            item.add(all);
        }
        G.add(item);
        bf.close();
    }

}

测试数据:

0 Sunny Warm High Strong Warm Yes  
1 Sunny Warm Normal Strong Warm No  
2 Rainy Cold High Strong Warm No  
3 Sunny Warm High Strong Cool Yes 

结果输出:

The specific boundary is:  
Sunny Warm High Strong ?  
The general boundary is:  
Sunny ? High ? ?  
? Warm High ? ?

 

posted @ 2014-04-05 23:54  Bleachzk  阅读(728)  评论(0编辑  收藏  举报