机器学习评估指标代码参考 特异性 准确率 召回率 等

import xlrd
import numpy as np
from sklearn.model_selection import train_test_split   #划分测试集和训练集
import matplotlib.pyplot as plt
from sklearn.model_selection import cross_val_score     #交叉验证
from sklearn import metrics
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay  #混淆矩阵

# 特异度函数
def specificity_loss_func(ground_truth, predictions):
    tp, tn, fn, fp = 0.0,0.0,0.0,0.0
    for l,m in enumerate(ground_truth):
        if m==predictions[l] and m==1:
            tp+=1
        if m==predictions[l] and m==0:
            tn+=1
        if m!=predictions[l] and m==1:
            fn+=1
        if m!=predictions[l] and m==0:
            fp+=1
    return tn/(tn+fp)

# 读取表格
input_list = []
input_sheet = xlrd.open_workbook("./data.xls")
input_sheet = input_sheet.sheet_by_index(0)
nrows = input_sheet.nrows

for i in range(nrows):
    input_list.append(np.array(input_sheet.row_values(i)))
input_data = np.array(input_list)

# 去除表头第一行
input_data = input_data[1:,:]
# 去除第一列
input_data = input_data[:,1:]

input_data = np.array(input_data, dtype=float)




svc_list = []
tree_list = []
adaboost_list =[]
train_x,test_x,train_y,test_y = train_test_split(input_data[:,:-1],input_data[:,-1],test_size=0.2,random_state=10)#30





# 1.决策树模型

# 1-1.评估指标

from sklearn import tree

model = tree.DecisionTreeClassifier(criterion = 'entropy',)
model.fit(train_x,train_y)
result = model.predict(test_x)
prob = model.predict_proba(test_x)
acc = np.mean(result == test_y)
scores = cross_val_score(model, input_data[:,:-1], input_data[:,-1],cv=10)
precision = metrics.precision_score(np.argmax(prob, axis=-1), test_y, average='micro')
recall = metrics.recall_score(np.argmax(prob, axis=-1), test_y, average="micro")
f1 = metrics.f1_score(np.argmax(prob, axis=-1), test_y, average="micro")
spe = specificity_loss_func(test_y, np.argmax(prob, axis=-1))
#tree_list.append(acc)
print("决策树模型")
print("准确率:",acc)
print("精确率:",precision)
print("召回率:",recall)
print("特异度:",spe)
print("F1:",f1)
print("交叉验证准确率:",scores.mean())


# 1-2.混淆矩阵

confusion_mat = confusion_matrix(np.array(test_y), np.array(result))

disp = ConfusionMatrixDisplay(confusion_matrix=confusion_mat)
disp.plot(
    include_values=True,            
    cmap=plt.cm.Blues,
    ax=None,                        
    xticks_rotation="horizontal",   
    values_format=".2f"
)
plt.title("Tree confusion matrix")
plt.show()


# 1-3.ROC曲线

roc_list = []
for i in range(len(test_y)):
    roc_list.append(prob[i][int(test_y[i])])
roc = metrics.roc_auc_score(test_y,np.array(result))
print("AUC值:",roc)
fpr,tpr,thresholds=metrics.roc_curve(test_y,np.array(result))
plt.plot(fpr,tpr, label="ROC curve (area={0})".format(round(roc,2)))
plt.plot([0,1],[0,1],linestyle='dashed')
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("Tree-ROC")
plt.legend(loc='lower right')
plt.show()
print("\n\n")




# 2.逻辑回归模型

# 2-1.评估指标

from sklearn.linear_model import LogisticRegression

LogisticRegressionModel = LogisticRegression()
LogisticRegressionModel.fit(train_x,train_y)  #训练模型
LogisticRegressionModel_result = LogisticRegressionModel.predict(test_x)  #模型预测
prob = model.predict_proba(test_x)   #proba返回的是对于预测为各个类别的概率
acc=np.mean(LogisticRegressionModel_result == test_y)  #计算测试集的准确率
scores = cross_val_score(LogisticRegressionModel, input_data[:,:-1], input_data[:,-1],cv=10)  #计算十折交叉验证的准确率
precision = metrics.precision_score(np.argmax(prob, axis=-1), test_y, average='macro')   #计算精确率
recall = metrics.recall_score(np.argmax(prob, axis=-1), test_y, average="macro")   #计算召回率
f1 = metrics.f1_score(np.argmax(prob, axis=-1), test_y, average="macro")      #计算F1
spe = specificity_loss_func(test_y, np.argmax(prob, axis=-1))       #计算特异度
print("逻辑回归模型")
print("准确率:",acc)
print("精确率:",precision)
print("召回率:",recall)
print("特异度:",spe)
print("F1:",f1)
print("交叉验证准确率:",scores.mean())


# 2-2.混淆矩阵

confusion_mat = confusion_matrix(np.array(test_y), np.array(LogisticRegressionModel_result))

disp = ConfusionMatrixDisplay(confusion_matrix=confusion_mat)
disp.plot(
    include_values=True,         #每个单元格上显示具体数值
    cmap=plt.cm.Blues,           #热力图颜色设为蓝色
    ax=None,                     #默认,绘制图的坐标轴,否则使用当前活动的坐标轴
    xticks_rotation="horizontal",#默认,类似旋转日期刻度
    values_format=".2f",         #显示数值的格式,两位小数
)
plt.title("LR-ROC")  #图标题
plt.show()


# 2-3.ROC曲线
roc_list = []
for i in range(len(test_y)):     #得到测试样本的个数,次数
    roc_list.append(prob[i][int(test_y[i])])
    #print(roc_list)  # 15个测试集取0|1的概率
roc = metrics.roc_auc_score(test_y,np.array(roc_list))  #用真值,概率值计算AUC值
print("AUC值:",roc)
fpr,tpr,thresholds=metrics.roc_curve(test_y,np.array(roc_list))   #真实值,概率值放入ROC曲线
# print('FPR:',fpr)
# print('TPR:',tpr)
# print('thresholds:',thresholds)
plt.plot(fpr,tpr, label="ROC curve (area={0})".format(round(roc,2))) #画ROC曲线,保留两位小数
plt.plot([0,1],[0,1],linestyle='dashed')  #dashed,线条为虚线
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("LR-ROC")
plt.legend(loc='lower right')  #将图例显示在右下角
plt.show()
print("\n\n")




# 3.随机森林

# 3-1.评估指标

from sklearn.ensemble import RandomForestClassifier

SLmodel = RandomForestClassifier()
SLmodel.fit(train_x,train_y)
SLmodel_result = SLmodel.predict(test_x)
prob = SLmodel.predict_proba(test_x)
acc = np.mean(SLmodel_result  == test_y)
scores = cross_val_score(SLmodel, input_data[:,:-1], input_data[:,-1],cv=10)
precision = metrics.precision_score(np.argmax(prob, axis=-1), test_y, average='macro')
recall = metrics.recall_score(np.argmax(prob, axis=-1), test_y, average="macro")
f1 = metrics.f1_score(np.argmax(prob, axis=-1), test_y, average="macro")
spe = specificity_loss_func(test_y, np.argmax(prob, axis=-1))
#tree_list.append(acc)
print("随机森林模型")
print("准确率:",acc)
print("精确率:",precision)
print("召回率:",recall)
print("特异度:",spe)
print("F1:",f1)
print("交叉验证准确率:",scores.mean())


# 3-1.混淆矩阵

confusion_mat = confusion_matrix(np.array(test_y), np.array(SLmodel_result))

disp = ConfusionMatrixDisplay(confusion_matrix=confusion_mat)
disp.plot(
    include_values=True,
    cmap=plt.cm.Blues,
    ax=None,
    xticks_rotation="horizontal",
    values_format=".2f"
)
plt.title("RF confusion matrix")
plt.show()


# 3-2.ROC曲线

roc_list = []
for i in range(len(test_y)):
    roc_list.append(prob[i][int(test_y[i])])
roc = metrics.roc_auc_score(test_y,np.array(SLmodel_result))
print("AUC值:",roc)
fpr,tpr,thresholds=metrics.roc_curve(test_y,np.array(SLmodel_result))
plt.plot(fpr,tpr, label="ROC curve (area={0})".format(round(roc,2)))
plt.plot([0,1],[0,1],linestyle='dashed')
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("RF-ROC")
plt.legend(loc='lower right')  #
plt.show()
print("\n\n")




# 4.SVM模型

# 4-1.评估指标

from sklearn.svm import SVC

SVCModel = SVC(probability=True)
SVCModel.fit(train_x,train_y)
SVCModel_result = SVCModel.predict(test_x)
prob = SVCModel.predict_proba(test_x)
# print(prob)
acc = np.mean(SVCModel_result == test_y)
scores = cross_val_score(SVCModel, input_data[:,:-1], input_data[:,-1],cv=10)
precision = metrics.precision_score(np.argmax(prob, axis=-1), test_y, average='macro')
recall = metrics.recall_score(np.argmax(prob, axis=-1), test_y, average="macro")
f1 = metrics.f1_score(np.argmax(prob, axis=-1), test_y, average="macro")
spe = specificity_loss_func(test_y, np.argmax(prob, axis=-1))

#svc_list.append(acc)
print("SVM模型")
print("准确率:",acc)
print("精确率:",precision)
print("召回率:",recall)
print("特异度:",spe)
print("F1:",f1)
print("交叉验证准确率:",scores.mean())


# 4-2.混淆矩阵

confusion_mat = confusion_matrix(np.array(test_y), np.array(SVCModel_result))

disp = ConfusionMatrixDisplay(confusion_matrix=confusion_mat)
disp.plot(
    include_values=True,
    cmap=plt.cm.Blues,
    ax=None,
    xticks_rotation="horizontal",
    values_format=".2f",
)
plt.title("SVM-confusion matrix")
plt.show()


# 4-3.ROC曲线

roc_list = []
for i in range(len(test_y)):
    roc_list.append(prob[i][int(test_y[i])])
roc = metrics.roc_auc_score(test_y,np.array(roc_list))
print("AUC值:",roc)
fpr,tpr,thresholds=metrics.roc_curve(test_y,np.array(roc_list))
plt.plot(fpr,tpr, label="ROC curve (area={0})".format(round(roc,2)))
plt.plot([0,1],[0,1],linestyle='dashed')
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("SVM-ROC")
plt.legend(loc='lower right')
plt.show()
print("\n\n")




# 5.AdaBoost模型

# 5-1.评估指标

from sklearn.ensemble import AdaBoostClassifier

model = AdaBoostClassifier(n_estimators=100)
model.fit(train_x,train_y)
result = model.predict(test_x)
prob = model.predict_proba(test_x)
# print(prob)
acc = np.mean(result == test_y)
scores = cross_val_score(model, input_data[:,:-1], input_data[:,-1],cv=10)
precision = metrics.precision_score(np.argmax(prob, axis=-1), test_y, average='macro')
recall = metrics.recall_score(np.argmax(prob, axis=-1), test_y, average="macro")
f1 = metrics.f1_score(np.argmax(prob, axis=-1), test_y, average="macro")
spe = specificity_loss_func(test_y, np.argmax(prob, axis=-1))
#adaboost_list.append(acc)
print("AdaBoost模型")
print("准确率:",acc)
print("精确率:",precision)
print("召回率:",recall)
print("特异度:",spe)
print("F1:",f1)
print("交叉验证准确率:",scores.mean())
print("特征重要性:",model.feature_importances_)



# 5-2.混淆矩阵

confusion_mat = confusion_matrix(np.array(test_y), np.array(result))

disp = ConfusionMatrixDisplay(confusion_matrix=confusion_mat)
disp.plot(
    include_values=True,            
    cmap=plt.cm.Blues,
    ax=None,                        
    xticks_rotation="horizontal",   
    values_format=".2f"
)
plt.title("Adboost confusion matrix")
plt.show()

#3.画ROC曲线

roc_list = []
for i in range(len(test_y)):
    roc_list.append(prob[i][int(test_y[i])])
roc = metrics.roc_auc_score(test_y,np.array(roc_list))
print("AUC值:",roc)
fpr,tpr,thresholds=metrics.roc_curve(test_y,np.array(roc_list))
plt.plot(fpr,tpr, label="ROC curve (area={0})".format(round(roc,2)))
plt.plot([0,1],[0,1],linestyle='dashed')
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("Adboost-ROC")
plt.legend(loc='lower right')
plt.show()

# print(test_x)
print("十次交叉验证准确率:",scores)

 

posted @ 2022-10-05 16:05  cup_leo  阅读(445)  评论(0编辑  收藏  举报