matlab 信息增益函数 入侵检测kudcupp

function InforGain = gain(data) 
    [m, n] = size(data);
    InforGain = zeros(n-1,2);
    labels = data(:,n);
    for i=1:n
        tmp{i} = [];
        percen{i} = [];
        rate=[];
        col = data(:,i);
        unicol = unique(col);
        %disp(unicol);
        
        %计算每一列有几类,并把每一类的信息熵和比例存储起来
        if i<42
        for j = 1:length(unicol)
            num = length(find(col==unicol(j)));
            pnum = length(find(col==unicol(j) & labels == 0));
            pnum1 = length(find(col==unicol(j) & labels == 1));
            pnum2 = length(find(col==unicol(j) & labels == 2));
            rate1 = pnum/num;
            rate2 = pnum1/num;
            rate3 = pnum2/num;
            rate=[rate1 rate2 rate3];
            rate(isnan(rate)) = 0;    
            a=rate(1)*log2(rate(1));
            b=rate(2)*log2(rate(2));
            c=rate(3)*log2(rate(3));
            
            if(isnan(a))
                a=0;
            end
                
             if(isnan(b))
                b=0;
             end 
             
             if(isnan(c))
                c=0;
             end
            
             gain = -(a+b+c);
            tmp{i}=[tmp{i} gain];
            percen{i}=[percen{i} num/length(col)];  
            %disp(percen{i});
        end
        end
            if i==42
                  pnum3 = length(find(labels == 0));
                  pnum4 = length(find(labels == 1));
                  pnum5 = length(find(labels == 2));
                rate1 = pnum3/length(labels);
                rate2 = pnum4/length(labels);
                rate3 = pnum5/length(labels);  
                
            gain = -(rate1*log2(rate1)+rate2*log2(rate2)+rate3*log2(rate3));
            tmp{42}=[tmp{42} gain];
            percen{42}=[percen{42} num/length(col)];
            end                     
  end
    %整体信息熵
    InforEntropy = tmp{length(tmp)}(1)
    disp(length(tmp));
    disp(InforEntropy);
    %将NAN转化为0
    for i = 1:length(tmp)
        tmp{i}(isnan(tmp{i})) = 0;
        disp(1111111111111111111);
        disp(tmp{i});
    end
    %求每一个属性列的信息增益
    for i = 1:length(percen)-1
        InforGain(i,:) = [i,roundn(InforEntropy-sum(tmp{i}.*percen{i}),-4)];
        %disp(InforEntropy-sum(tmp{i}.*percen{i}));
        %disp(InforEntropy);
    end
end
close all;
clear all;
clc;
data = csvread('corrected9.csv');
InforGain = hanshu(data);

   
posted @ 2019-07-09 20:28  princeness  阅读(347)  评论(0编辑  收藏  举报