灰色关联分析法(GRA)实现
GRA算法步骤
step1:确定参考序列 \(x_0\) 和比较序列 \(x_i\);
step2:对原始数据变换,将第 i 个属性的第 k 个数据 \(x_i(k)\) 转换为 \(y_i(k)\) 。方法有:
- 初值变换: \(y_i(k) = \frac{x_i(k)}{x_i(1)}\).
- 均值变换: \(y_i(k) = \frac{x_i(k)}{\overline{x}_i}\).
- 百分比变换(变换成小于1的数):\(y_i(k) = \frac{x_i(k)}{\max_k {x}_i(k)}\) .
- 倍数变换(变换成大于1的数):\(y_i(k) = \frac{x_i(k)}{\min_k {x}_i(k)}\) .
- 归一化变换:\(y_i(k) = \frac{x_i(k)}{∑_k {x}_i(k)}\) .
- 区间变换:\(y_i(k) = \frac{x_i(k) - \min_k {x}_i(k)}{\max_k {x}_i(k) - \min_k {x}_i(k)}\) .
step3:求绝对差序列,即比较序列与参考序列的差值 $△_{0i}(k) =|x_0(k)-x_i(k)| $ .
step4:使用下面公式计算灰关联系数,\(γ(x_0(k),x_i(k))=\frac{△_{min}+ρ△_{max}}{△_{0i}(k)+ρ△_{max}}\),一般取分辨系数ρ=0.5。
step5:使用下面公式计算灰关联度,$γ(x_0,x_i)=\frac{1}{N}∑_{k=1}^N w_kγ(x_0(k),x_i(k)) $ ,wk为第k条数据权重。
step6:得到关键属性
- 如果参考序列 \(\{x_0\}\) 为最优值数据列,那么灰关联度 \(γ(x_0,x_i)\) 越大,则第 i 个属性越好;
- 如果参考序列 \(\{x_0\}\) 为最劣值数据列,那么灰关联度 \(γ(x_0,x_i)\) 越大,则第 i 个属性越不好;
GRA算法的Python实现
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.sparse import issparse
## 矩阵检查,必须是数据型,必须是二维的
def check_array(array, dtype="numeric"):
if issparse(array):
raise TypeError('PCA does not support sparse input.') # 不接受稀疏矩阵
if array.ndim != 2:
raise ValueError('Expected 2D array.') # 只接受二维矩阵
array = np.array(array, dtype=np.float64)
return array
class GRA():
def __init__(self, k=0, norm_method='norm', rho=0.5):
self.k = k
self.norm_method = norm_method
self.rho = rho
def fit(self, X, k=None):
''' 与单个参考序列比较 '''
if not k==None: self.k = k
X = check_array(X)
Y = self.__normalization(X) #归一化
self.r = self.__calculation_relevancy(Y)
return self.r
def fit_all(self, X):
''' 所有序列依次为参考序列,互相比较 '''
X = check_array(X)
self.data = np.zeros([X.shape[1], X.shape[1]])
for k in range(X.shape[1]):
self.k = k
Y = self.__normalization(X) #归一化
r = self.__calculation_relevancy(Y)
self.data[:,k] = r
return self.data
def __normalization(self, X):
if self.norm_method == 'mean':
Y = self.__mean(X)
elif self.norm_method == 'initial':
Y = self.__initial(X)
elif self.norm_method == 'norm':
Y = self.__norm(X)
elif self.norm_method == 'section':
Y = self.__section(X)
elif self.norm_method == 'max':
Y = self.__max(X)
elif self.norm_method == 'min':
Y = self.__min(X)
else:
raise ValueError("Unrecognized norm_method='{0}'".format(self.norm_method))
print(Y)
return Y
def __norm(self, X):
Xsum = np.sum(X, axis=0)
for i in range(X.shape[1]):
X[:, i] = X[:, i]/Xsum[i]
return X
def __mean(self, X):
''' 平均值归一化 '''
Xmean = np.mean(X, axis=0, keepdims=True) #每一列的平均
for i in range(X.shape[1]):
X[:, i] = X[:, i]/Xmean[0][i]
return X
def __initial(self, X):
''' 初值归一化 '''
X0 = X[0,:]
for i in range(X.shape[1]):
X[:, i] = X[:, i]/X0[i]
return X
def __max(self, X):
''' 百分比归一化 '''
Xmax = np.max(X, axis=0)
for i in range(X.shape[1]):
X[:, i] = X[:, i]/Xmax[i]
return X
def __min(self, X):
''' 倍数归一化 '''
Xmin = np.min(X, axis=0)
for i in range(X.shape[1]):
X[:, i] = X[:, i]/(Xmin[i]+0.000001) #避免除数为零
return X
def __section(self, X):
Xmax = np.max(X, axis=0)
Xmin = np.min(X, axis=0)
for i in range(X.shape[1]):
X[:, i] = (X[:, i]-Xmin[i])/(Xmax[i]-Xmin[i])
return X
def __calculation_relevancy(self, X):
''' 计算关联度 '''
# 计算参考序列与比较序列差值
Delta = np.zeros((X.shape))
for i in range(X.shape[1]):
Delta[:, i] = np.fabs(X[:, i]-X[:, self.k])
# 计算关联系数
t = np.delete(Delta, self.k, axis=1)
mmax=t.max().max()
mmin=t.min().min()
ksi=((mmin+self.rho*mmax)/(Delta+self.rho*mmax))
# 计算关联度
r = ksi.sum(axis=0) / ksi.shape[0]
return r
def sort_comparison(self):
idxs = np.argsort(-self.r)
data = []
for idx in idxs:
if idx == self.k: continue
data.append(['第{}个特征'.format(idx), self.r[idx]])
df = pd.DataFrame(
data=np.array(data),
columns=['特征','相关度'],
index=[f"{i+1}" for i in range(len(data))],
)
print('\n与第{}个特征相关度的从大到小排序:'.format(self.k))
print(df)
return df
def ShowGRAHeatMap(self):
''' 灰色关联结果矩阵可视化 '''
df = pd.DataFrame(
data=self.data,
columns=[f"{i}" for i in range(self.data.shape[1])],
index=[f"{i}" for i in range(self.data.shape[0])],
)
colormap = plt.cm.RdBu
plt.figure()
plt.title('Pearson Correlation of Features')
sns.heatmap(df.astype(float), linewidths=0.1, vmax=1.0, square=True, cmap=colormap, linecolor='white', annot=True)
plt.show()
接口调用
test 1
import numpy as np
X = np.array([[0.732, 0.646, 0.636, 0.598, 0.627],
[0.038, 0.031, 0.042, 0.036, 0.043],
[0.507, 0.451, 0.448, 0.411, 0.122],
[0.048, 0.034, 0.030, 0.030, 0.031],
[183.25, 207.28, 240.98, 290.80, 370.00],
[24.03, 44.98, 62.79, 83.44, 127.22],
[85508, 74313, 85966, 100554, 109804],
[175.87, 175.72, 183.69, 277.11, 521.26],
[10, 13, 13, 1, 1],])
X=X.T # 每一行为一条记录,每一列为一个特征数据
print(X.shape)
print(X)
gra = GRA(k=0, norm_method='min')
r = gra.fit(X)
print(r)
gra.sort_comparison()
tese 2
from GRA import GRA
import numpy as np
X = np.array([[0.732, 0.646, 0.636, 0.598, 0.627],
[0.038, 0.031, 0.042, 0.036, 0.043],
[0.507, 0.451, 0.448, 0.411, 0.122],
[0.048, 0.034, 0.030, 0.030, 0.031],
[183.25, 207.28, 240.98, 290.80, 370.00],
[24.03, 44.98, 62.79, 83.44, 127.22],
[85508, 74313, 85966, 100554, 109804],
[175.87, 175.72, 183.69, 277.11, 521.26],
[10, 13, 13, 1, 1],])
gra = GRA(norm_method='initial')
data = gra.fit_all(X)
print(data)
gra.ShowGRAHeatMap()