Chameleon

# -*- coding: utf-8 -*-
"""
Created on Tue Dec 18 09:55:16 2018

@author: Mark,LI
"""
import numpy as np
from sklearn.datasets import load_iris

class Chameleon:
    W = None; # weight矩阵(方阵)
    Conn = None; # 连接矩阵(方阵)
    clusters = None;
    MI = 0; # 综合指数
    
 
    # 构造函数,初始化变量
    def __init__(self,datanum, mi):
        self.W = np.ones((datanum,datanum));
        self.Conn = np.zeros((datanum,datanum));
        self.clusters = [];
        self.MI = mi;
        self.inter_EC = None;
    
    # 构造weight矩阵。根据两点间距离的倒数计算两点的相似度,作为连接权重
    def buildWeightMatrix(self,data):
        for i in range(data.shape[0]):
            row = data[i];
            temp = data - row;
            temp = np.multiply(temp,temp);
            temp = np.sum(temp,axis=1);
            self.W[i] = 1/np.sqrt(temp);
            self.W[i][i] = 1.0;
        
    
 
    # CHAMELEON第一阶段,按照K(包括自己)最邻近建立较小的子簇
    def buildSmallCluster(self):
        for i in range(self.W.shape[0]):
            row = self.W[i];
            index = np.argsort(row);
            index = index[-K:];
            index = list(index);
            self.Conn[i,index] = 1;
            self.Conn[i][i] = 0;
        
        visited = [False for i in range(self.W.shape[0])];
        visited = np.array(visited);
        for i in range(self.Conn.shape[0]):
            if(not visited[i]):
                cluster = [];
                findConnectGraph(self.Conn.copy(),i,cluster);
                self.clusters.append(list(set(cluster)));
                visited[cluster] = True;
            
                    
    # 打印子簇
    def printClusters(self):
        for i in range(len(self.clusters)) :
            print("以下数据点属于第" + str(i) + "簇:");
            item = self.clusters[i];
            print(item);
        
    # CHAMELEON第二阶段,合并相对互联度RI和相对紧密度RC都较高的簇
    def cluster(self):
        self.interConnectivity();
        l = len(self.clusters);
        end = True;
        i = 0;
        while(i<l):
            EC_i = self.inter_EC[i];
            j = i + 1;
            while(j<l):
                EC_j = self.inter_EC[j];
                vec1 = self.clusters[i];
                vec2 = self.clusters[j];
                EC = 0.0;
                RI = 0.0;
                SEC = 0.0;
                RC = 0.0;
                for k in range(len(vec1)):
                    for m in range(len(vec2)):
                        EC += self.W[vec1[k]][vec2[m]];
                    
                RI = 2 * EC / (EC_i + EC_j);
                RC = (len(vec1) + len(vec2)) * EC / (len(vec2) * EC_i + len(vec1) * EC_j);
                # 以RI*RC作为综合指数
                if (RI * RC > self.MI) :
                    self.mergeClusters(i, j);
                    l = l - 1;
                    end = False;
                    break;
                j = j + 1;
            i = i + 1;
        # 递归合并子簇
        if (not end):
            self.cluster();
            
    def interConnectivity(self):
        l = len(self.clusters);
        self.inter_EC = [0 for i in range(l)];
        for i in range(l):
            vec = self.clusters[i];
            for j in range(len(vec)):
                for k in range(len(vec)):
                    self.inter_EC[i] += self.W[vec[j]][vec[k]];
 
    # 把簇b合并到簇a里面去
    def mergeClusters(self,a, b) :
        item = self.clusters[b];
        self.clusters.pop(b);
        #self.clusters[b] = [];
        self.clusters[a].extend(item);
        
    
def findConnectGraph(matrix,r,cluster):
        row  = matrix[r];
        cluster.append(r);
        index_r = np.where(row==1)[0];
        for j in index_r:
            temp = matrix[j];
            temp_index = np.where(temp==1)[0];
            if(len(temp_index)>1):
                matrix[r,j] = matrix[j,r] = 0;
                findConnectGraph(matrix,j,cluster);
            else:
                cluster.append(j);

                 
if __name__ == '__main__':
    K = 2; # 2最邻近,这里面包括它自己
    iris = load_iris();
    data = iris.data;
    label = iris.target;
#        #综合指数0.1
    cham = Chameleon(data.shape[0], 0.1);
    cham.buildWeightMatrix(data);
    cham.buildSmallCluster();
    print("==============第一阶段后的分类结果==============");
    cham.printClusters();
    for c in cham.clusters:
        print(label[c]);
    cham.cluster();
    print("==============第二阶段后的分类结果==============");
    cham.printClusters();
    for c in cham.clusters:
        print(label[c]);

用python实现Chameleon算法,改进了Orisun java实现方式,不知道对不对,有问题请交流学习。通过结果发现Chameleon算法的召回率还不错,准确率有待提高。

参考文献:

https://www-users.cs.umn.edu/~hanxx023/dmclass/chameleon.pdf

http://www.cnblogs.com/zhangchaoyang/articles/2182752.html

posted on 2018-12-18 17:30  FightLi  阅读(500)  评论(0编辑  收藏  举报