多分类与多标签分类评价指标
单标签评价指标
import matplotlib.pyplot as plt import numpy as np def F1(P,R): return 2*P*R/(P+R) def ROC(pos,neg): TPR = [] FPR = [] for i in np.arange(0.05,1,0.05): if i == 0.8: print(1) TP = 0 FP = 0 TN = 0 FN = 0 for item in pos: if item >=i: #预测为正 TP +=1 else: #预测为负 FN +=1 for item in neg: if item <i: #预测为负 TN +=1 else: #预测为正 FP +=1 TPR.append(TP/(TP+FN)) FPR.append(FP/(FP+TN)) plt.xlabel("FPR", fontsize=12) plt.ylabel("TPR", fontsize=12) plt.plot(FPR,TPR) plt.show() def AUC(pos,neg): pos = sorted(pos) neg = sorted(neg) count = 0 for pos1 in pos: for neg1 in neg: if pos1>neg1: count += 1 elif pos1==neg1: count+=0.5 else: break return count/(len(pos) *len(neg))
多标签评价指标
#多标签评价指标 import numpy as np #汉明损失 # 错误率:sum(sum(y != yhat)) / (N*D) # 1.每个样本的错误率 2.对所有样本错误率平均 #from sklearn.metrics import hamming_loss def HammingLoss(label, predict): # label: (N, D) D = len(label[0]) N = len(label) tmp = 0 for i in range(N): tmp = tmp + np.sum(label[i] ^ predict[i]) hamming_loss = tmp / N / D return hamming_loss #覆盖率 #覆盖所有true标签的最大序号(降序)的均值 #1.降序序号 2.True标签对应序号 3.求序号最大值 def Coverage(label, logit): N = len(label) label_index = [] for i in range(N): index = np.where(label[i] == 1)[0] label_index.append(index) cover = 0 for i in range(N): # 从大到小排序 index = np.argsort(-logit[i]).tolist() tmp = 0 for item in label_index[i]: tmp = max(tmp, index.index(item) + 1) cover += tmp coverage = cover * 1.0 / N return coverage # 1-错误率 # 概率最大标签错误率 def One_error(label, logit): N = len(label) for i in range(N): if max(label[i]) == 0: print("该条数据哪一类都不是") label_index = [] for i in range(N): index = np.where(label[i] == 1)[0] label_index.append(index) OneError = 0 for i in range(N): if np.argmax(logit[i]) not in label_index[i]: OneError += 1 OneError = OneError * 1.0 / N return OneError #平均精确率 #1.计算每个样本所有标签实际排名与预测排名比值的均值 2.对整个数据集合平均 def Average_Precision(label, logit): N = len(label) for i in range(N): if max(label[i]) == 0 or min(label[i]) == 1: print("该条数据哪一类都不是或者全都是") precision = 0 for i in range(N): index = np.where(label[i] == 1)[0] score = logit[i][index] score = sorted(score) score_all = sorted(logit[i]) precision_tmp = 0 for item in score: tmp1 = score.index(item) tmp1 = len(score) - tmp1 tmp2 = score_all.index(item) tmp2 = len(score_all) - tmp2 precision_tmp += tmp1 / tmp2 precision += precision_tmp / len(score) Average_Precision = precision / N return Average_Precision #排序损失# # 1.计算每个样本False标签预测值大于True标签预测值的比重(偏离程度) 2.对整个样本集平均 def RankingLoss(label, logit): N = len(label) for i in range(N): if max(label[i]) == 0 or min(label[i]) == 1: print("该条数据哪一类都不是或者全都是") rankloss = 0 for i in range(N): index1 = np.where(label[i] == 1)[0] index0 = np.where(label[i] == 0)[0] tmp = 0 for j in index1: for k in index0: if logit[i][j] <= logit[i][k]: tmp += 1 rankloss += tmp * 1.0 / ((len(index1)) * len(index0)) rankloss = rankloss / N return rankloss #杰卡德系数 jaccard similarity #交并比 #from sklearn.metrics import jaccard_similarity_score logit = np.array([[0.3, 0.4, 0.5, 0.1, 0.15]]) label = np.array([[1, 0, 1, 0, 0]]) pred = np.array([[0, 1, 1, 0, 0]]) print(HammingLoss(label, pred))