灰色关联分析法(GRA)实现

GRA算法步骤

step1:确定参考序列 \(x_0\) 和比较序列 \(x_i\)

step2:对原始数据变换,将第 i 个属性的第 k 个数据 \(x_i(k)\) 转换为 \(y_i(k)\) 。方法有:

  • 初值变换: \(y_i(k) = \frac{x_i(k)}{x_i(1)}\).
  • 均值变换: \(y_i(k) = \frac{x_i(k)}{\overline{x}_i}\).
  • 百分比变换(变换成小于1的数):\(y_i(k) = \frac{x_i(k)}{\max_k {x}_i(k)}\) .
  • 倍数变换(变换成大于1的数):\(y_i(k) = \frac{x_i(k)}{\min_k {x}_i(k)}\) .
  • 归一化变换:\(y_i(k) = \frac{x_i(k)}{∑_k {x}_i(k)}\) .
  • 区间变换:\(y_i(k) = \frac{x_i(k) - \min_k {x}_i(k)}{\max_k {x}_i(k) - \min_k {x}_i(k)}\) .

step3:求绝对差序列,即比较序列与参考序列的差值 $△_{0i}(k) =|x_0(k)-x_i(k)| $ .

step4:使用下面公式计算灰关联系数,\(γ(x_0(k),x_i(k))=\frac{△_{min}+ρ△_{max}}{△_{0i}(k)+ρ△_{max}}\),一般取分辨系数ρ=0.5。

step5:使用下面公式计算灰关联度,$γ(x_0,x_i)=\frac{1}{N}∑_{k=1}^N w_kγ(x_0(k),x_i(k)) $ ,wk为第k条数据权重。

step6:得到关键属性

  • 如果参考序列 \(\{x_0\}\) 为最优值数据列,那么灰关联度 \(γ(x_0,x_i)\) 越大,则第 i 个属性越好;
  • 如果参考序列 \(\{x_0\}\) 为最劣值数据列,那么灰关联度 \(γ(x_0,x_i)\) 越大,则第 i 个属性越不好;

GRA算法的Python实现

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.sparse import issparse


## 矩阵检查,必须是数据型,必须是二维的
def check_array(array, dtype="numeric"):
    if issparse(array):
        raise TypeError('PCA does not support sparse input.')  # 不接受稀疏矩阵

    if array.ndim != 2:
        raise ValueError('Expected 2D array.')  # 只接受二维矩阵

    array = np.array(array, dtype=np.float64)
    return array



class GRA():
    def __init__(self, k=0, norm_method='norm', rho=0.5):
        self.k = k
        self.norm_method = norm_method
        self.rho = rho


    def fit(self, X, k=None):
        ''' 与单个参考序列比较 '''
        if not k==None: self.k = k
        X = check_array(X)
        Y = self.__normalization(X) #归一化
        self.r = self.__calculation_relevancy(Y)
        return self.r


    def fit_all(self, X):
        ''' 所有序列依次为参考序列,互相比较 '''
        X = check_array(X)
        self.data = np.zeros([X.shape[1], X.shape[1]])
        for k in range(X.shape[1]):
            self.k = k
            Y = self.__normalization(X) #归一化
            r = self.__calculation_relevancy(Y)
            self.data[:,k] = r
        return self.data


    def __normalization(self, X):
        if self.norm_method == 'mean':
            Y = self.__mean(X)
        elif self.norm_method == 'initial':
            Y = self.__initial(X)
        elif self.norm_method == 'norm':
            Y = self.__norm(X)
        elif self.norm_method == 'section':
            Y = self.__section(X)
        elif self.norm_method == 'max':
            Y = self.__max(X)
        elif self.norm_method == 'min':
            Y = self.__min(X)
        else:
            raise ValueError("Unrecognized norm_method='{0}'".format(self.norm_method))
        print(Y)
        return Y


    def __norm(self, X):
        Xsum = np.sum(X, axis=0)
        for i in range(X.shape[1]):
            X[:, i] = X[:, i]/Xsum[i]
        return X


    def __mean(self, X):
        ''' 平均值归一化 '''
        Xmean = np.mean(X, axis=0, keepdims=True) #每一列的平均
        for i in range(X.shape[1]):
            X[:, i] = X[:, i]/Xmean[0][i]
        return X


    def __initial(self, X):
        ''' 初值归一化 '''
        X0 = X[0,:]
        for i in range(X.shape[1]):
            X[:, i] = X[:, i]/X0[i]
        return X


    def __max(self, X):
        ''' 百分比归一化 '''
        Xmax = np.max(X, axis=0)
        for i in range(X.shape[1]):
            X[:, i] = X[:, i]/Xmax[i]
        return X


    def __min(self, X):
        ''' 倍数归一化 '''
        Xmin = np.min(X, axis=0)
        for i in range(X.shape[1]):
            X[:, i] = X[:, i]/(Xmin[i]+0.000001) #避免除数为零
        return X


    def __section(self, X):
        Xmax = np.max(X, axis=0)
        Xmin = np.min(X, axis=0)
        for i in range(X.shape[1]):
            X[:, i] = (X[:, i]-Xmin[i])/(Xmax[i]-Xmin[i])
        return X


    def __calculation_relevancy(self, X):
        ''' 计算关联度 '''
        # 计算参考序列与比较序列差值
        Delta = np.zeros((X.shape))
        for i in range(X.shape[1]):
            Delta[:, i] = np.fabs(X[:, i]-X[:, self.k])

        # 计算关联系数
        t = np.delete(Delta, self.k, axis=1)
        mmax=t.max().max()
        mmin=t.min().min()
        ksi=((mmin+self.rho*mmax)/(Delta+self.rho*mmax))

        # 计算关联度
        r = ksi.sum(axis=0) / ksi.shape[0]
        return r


    def sort_comparison(self):
        idxs = np.argsort(-self.r)
        data = []
        for idx in idxs:
            if idx == self.k: continue
            data.append(['第{}个特征'.format(idx), self.r[idx]])
        df = pd.DataFrame(
            data=np.array(data),
            columns=['特征','相关度'],
            index=[f"{i+1}" for i in range(len(data))],
        )
        print('\n与第{}个特征相关度的从大到小排序:'.format(self.k))
        print(df)
        return df


    def ShowGRAHeatMap(self):
        ''' 灰色关联结果矩阵可视化 '''
        df = pd.DataFrame(
            data=self.data,
            columns=[f"{i}" for i in range(self.data.shape[1])],
            index=[f"{i}" for i in range(self.data.shape[0])],
        )
        colormap = plt.cm.RdBu
        plt.figure()
        plt.title('Pearson Correlation of Features')
        sns.heatmap(df.astype(float), linewidths=0.1, vmax=1.0, square=True, cmap=colormap, linecolor='white', annot=True)
        plt.show()

接口调用

test 1

import numpy as np

X = np.array([[0.732, 0.646, 0.636, 0.598, 0.627],
[0.038, 0.031, 0.042, 0.036, 0.043],
[0.507, 0.451, 0.448, 0.411, 0.122],
[0.048, 0.034, 0.030, 0.030, 0.031],
[183.25, 207.28, 240.98, 290.80, 370.00],
[24.03, 44.98, 62.79, 83.44, 127.22],
[85508, 74313, 85966, 100554, 109804],
[175.87, 175.72, 183.69, 277.11, 521.26],
[10, 13, 13, 1, 1],])


X=X.T # 每一行为一条记录,每一列为一个特征数据
print(X.shape)
print(X)

gra = GRA(k=0, norm_method='min')
r = gra.fit(X)
print(r)
gra.sort_comparison()

tese 2

from GRA import GRA
import numpy as np

X = np.array([[0.732, 0.646, 0.636, 0.598, 0.627],
[0.038, 0.031, 0.042, 0.036, 0.043],
[0.507, 0.451, 0.448, 0.411, 0.122],
[0.048, 0.034, 0.030, 0.030, 0.031],
[183.25, 207.28, 240.98, 290.80, 370.00],
[24.03, 44.98, 62.79, 83.44, 127.22],
[85508, 74313, 85966, 100554, 109804],
[175.87, 175.72, 183.69, 277.11, 521.26],
[10, 13, 13, 1, 1],])

gra = GRA(norm_method='initial')
data = gra.fit_all(X)
print(data)
gra.ShowGRAHeatMap()

参考:

posted on 2020-06-17 15:36  yejifeng  阅读(1658)  评论(0编辑  收藏  举报

导航