分类预测输出precision,recall,accuracy,auc和tp,tn,fp,fn矩阵

此次我做的实验是二分类问题,输出precision,recall,accuracy,auc

 

# -*- coding: utf-8 -*-
#from sklearn.neighbors import 
import numpy as np
from pandas import read_csv
import pandas as pd
import sys  
import importlib
from sklearn.neighbors import KNeighborsClassifier     
from sklearn.ensemble import GradientBoostingClassifier    
from sklearn import svm
from sklearn import cross_validation
from sklearn.metrics import hamming_loss
from sklearn import metrics    
importlib.reload(sys)
from sklearn.linear_model import LogisticRegression 
from imblearn.combine import SMOTEENN
from sklearn.tree import DecisionTreeClassifier 
from sklearn.ensemble import RandomForestClassifier  #92%
from sklearn import tree
from xgboost.sklearn import XGBClassifier
from sklearn.linear_model import SGDClassifier
from sklearn import neighbors
from sklearn.naive_bayes import BernoulliNB
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from numpy import mat

def metrics_result(actual, predict):  
    print('准确度:{0:.3f}'.format(metrics.accuracy_score(actual, predict))) 
    print('精密度:{0:.3f}'.format(metrics.precision_score(actual, predict,average='weighted'))) 
    print('召回:{0:0.3f}'.format(metrics.recall_score(actual, predict,average='weighted')))
    print('f1-score:{0:.3f}'.format(metrics.f1_score(actual, predict,average='weighted')))
    print('auc:{0:.3f}'.format(metrics.roc_auc_score(test_y, predict)))

 

输出混淆矩阵

 

 

    matr=confusion_matrix(test_y,predict)
    matr=mat(matr)
    conf=np.matrix([[0,0],[0,0]])
    conf[0,0]=matr[1,1]
    conf[1,0]=matr[1,0]
    conf[0,1]=matr[0,1]
    conf[1,1]=matr[0,0]
    print(conf)

 

全代码:

 

# -*- coding: utf-8 -*-
#from sklearn.neighbors import 
import numpy as np
from pandas import read_csv
import pandas as pd
import sys  
import importlib
from sklearn.neighbors import KNeighborsClassifier     
from sklearn.ensemble import GradientBoostingClassifier    
from sklearn import svm
from sklearn import cross_validation
from sklearn.metrics import hamming_loss
from sklearn import metrics    
importlib.reload(sys)
from sklearn.linear_model import LogisticRegression 
from imblearn.combine import SMOTEENN
from sklearn.tree import DecisionTreeClassifier 
from sklearn.ensemble import RandomForestClassifier  #92%
from sklearn import tree
from xgboost.sklearn import XGBClassifier
from sklearn.linear_model import SGDClassifier
from sklearn import neighbors
from sklearn.naive_bayes import BernoulliNB
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from numpy import mat

def metrics_result(actual, predict):  
    print('准确度:{0:.3f}'.format(metrics.accuracy_score(actual, predict))) 
    print('精密度:{0:.3f}'.format(metrics.precision_score(actual, predict,average='weighted'))) 
    print('召回:{0:0.3f}'.format(metrics.recall_score(actual, predict,average='weighted')))
    print('f1-score:{0:.3f}'.format(metrics.f1_score(actual, predict,average='weighted')))
    print('auc:{0:.3f}'.format(metrics.roc_auc_score(test_y, predict)))




'''分类0-1'''
root1="D:/ProgramData/station3/10.csv"
root2="D:/ProgramData/station3/more+average2.csv"
root3="D:/ProgramData/station3/new_10.csv"
root4="D:/ProgramData/station3/more+remove.csv"
root5="D:/ProgramData/station3/new_10 2.csv"
root6="D:/ProgramData/station3/new10.csv"
root7="D:/ProgramData/station3/no_-999.csv"

root=root4
data1 = read_csv(root) #数据转化为数组
data1=data1.values
print(root)
time=1
         
accuracy=[]
aucc=[]
pre=[]
recall=[]
for i in range(time):
    train, test= cross_validation.train_test_split(data1, test_size=0.2, random_state=i)
    test_x=test[:,:-1]  
    test_y=test[:,-1]
    train_x=train[:,:-1]
    train_y=train[:,-1]
# =============================================================================
#     print(train_x.shape)
#     print(train_y.shape)
#     print(test_x.shape)
#     print(test_y.shape)
#     print(type(train_x))
# =============================================================================
    
    #X_Train=train_x
    #Y_Train=train_y
    
    X_Train, Y_Train = SMOTEENN().fit_sample(train_x, train_y)
  
    #clf = RandomForestClassifier()  #82 
    #clf = LogisticRegression()  #82 
    
    #penalty=’l2’, dual=False, tol=0.0001, C=1.0, fit_intercept=True, intercept_scaling=1, class_weight=None, random_state=None, solver=’liblinear’, max_iter=100, multi_class=’ovr’, verbose=0, warm_start=False, n_jobs=1
    #clf=svm.SVC()
    clf= XGBClassifier()
    #from sklearn.ensemble import RandomForestClassifier  #92%
    #clf = DecisionTreeClassifier() 
    #clf = GradientBoostingClassifier()
    
    #clf=neighbors.KNeighborsClassifier()  
    #clf=BernoulliNB()
    print(clf)
    clf.fit(X_Train, Y_Train) 
    predict=clf.predict(test_x)
    
    matr=confusion_matrix(test_y,predict)
    matr=mat(matr)
    conf=np.matrix([[0,0],[0,0]])
    conf[0,0]=matr[1,1]
    conf[1,0]=matr[1,0]
    conf[0,1]=matr[0,1]
    conf[1,1]=matr[0,0]
    print(conf)
    #a=metrics_result(test_y, predict)
    
    #a=metrics_result(test_y,predict)
    '''accuracy'''
    aa=metrics.accuracy_score(test_y, predict)
    
    #print(metrics.accuracy_score(test_y, predict))
    accuracy.append(aa)
    
    '''auc'''
    bb=metrics.roc_auc_score(test_y, predict, average=None)
    aucc.append(bb)
    
    '''precision'''
    cc=metrics.precision_score(test_y, predict, average=None)
    pre.append(cc[1])
    
# =============================================================================
#     print('cc') 
#     print(type(cc))
#     print(cc[1])
#     print('cc')
# =============================================================================
    
    '''recall'''
    dd=metrics.recall_score(test_y, predict, average=None)
    #print(metrics.recall_score(test_y, predict,average='weighted'))
    recall.append(dd[1])
    
    f=open('D:\ProgramData\station3\predict.txt', 'w')
    for i in range(len(predict)):
        f.write(str(predict[i]))
        f.write('\n')
    f.write("写好了")
    f.close()
    
    f=open('D:\ProgramData\station3\y_.txt', 'w')
    for i in range(len(predict)):
        f.write(str(test_y[i]))
        f.write('\n')
    f.write("写好了")
    f.close()
    
# =============================================================================
#     f=open('D:/ProgramData/station3/predict.txt', 'w')
#     for i in range(len(predict)):
#        f.write(str(predict[i]))
#        f.write('\n')
#     f.write("写好了")
#     f.close()
#     
#     f=open('D:/ProgramData/station3/y.txt', 'w')
#     for i in range(len(test_y)):
#        f.write(str(test_y[i]))
#        f.write('\n')
#     f.write("写好了")
#     f.close()
#     
# =============================================================================
# =============================================================================
#     print('调用函数auc:', metrics.roc_auc_score(test_y, predict, average='micro'))
#     
#     fpr, tpr, thresholds = metrics.roc_curve(test_y.ravel(),predict.ravel())
#     auc = metrics.auc(fpr, tpr)
#     print('手动计算auc:', auc)
#     #绘图
#     mpl.rcParams['font.sans-serif'] = u'SimHei'
#     mpl.rcParams['axes.unicode_minus'] = False
#     #FPR就是横坐标,TPR就是纵坐标
#     plt.plot(fpr, tpr, c = 'r', lw = 2, alpha = 0.7, label = u'AUC=%.3f' % auc)
#     plt.plot((0, 1), (0, 1), c = '#808080', lw = 1, ls = '--', alpha = 0.7)
#     plt.xlim((-0.01, 1.02))
#     plt.ylim((-0.01, 1.02))
#     plt.xticks(np.arange(0, 1.1, 0.1))
#     plt.yticks(np.arange(0, 1.1, 0.1))
#     plt.xlabel('False Positive Rate', fontsize=13)
#     plt.ylabel('True Positive Rate', fontsize=13)
#     plt.grid(b=True, ls=':')
#     plt.legend(loc='lower right', fancybox=True, framealpha=0.8, fontsize=12)
#     plt.title(u'大类问题一分类后的ROC和AUC', fontsize=17)
#     plt.show()
# =============================================================================
    
    
sum_acc=0
sum_auc=0
sum_pre=0
sum_recall=0
for i in range(time):
    sum_acc+=accuracy[i]
    sum_auc+=aucc[i]
    sum_pre+=pre[i]
    sum_recall+=recall[i]
    
acc1=sum_acc*1.0/time
auc1=sum_auc*1.0/time
pre1=sum_pre*1.0/time
recall1=sum_recall*1.0/time
print("acc",acc1)
print("auc",auc1)
print("pre",pre1)
print("recall",recall1)

# =============================================================================
# 
# data1 = read_csv(root2) #数据转化为数组
# data1=data1.values
# 
#            
# accuracy=[]
# auc=[]
# pre=[]
# recall=[]
# for i in range(30):
#     train, test= cross_validation.train_test_split(data1, test_size=0.2, random_state=i)
#     test_x=test[:,:-1]  
#     test_y=test[:,-1]
#     train_x=train[:,:-1]
#     train_y=train[:,-1]
#     X_Train, Y_Train = SMOTEENN().fit_sample(train_x, train_y)
#   
#     #clf = RandomForestClassifier()  #82 
#     clf = LogisticRegression()  #82 
#     #clf=svm.SVC()
#     #clf= XGBClassifier()
#     #from sklearn.ensemble import RandomForestClassifier  #92%
#     #clf = DecisionTreeClassifier() 
#     #clf = GradientBoostingClassifier()
#     
#     #clf=neighbors.KNeighborsClassifier()  65.25%
#     #clf=BernoulliNB()
#     clf.fit(X_Train, Y_Train) 
#     predict=clf.predict(test_x)
#     
#     '''accuracy'''
#     aa=metrics.accuracy_score(test_y, predict)
#     accuracy.append(aa)
#     
#     '''auc'''
#     aa=metrics.roc_auc_score(test_y, predict)
#     auc.append(aa)
#     
#     '''precision'''
#     aa=metrics.precision_score(test_y, predict,average='weighted')
#     pre.append(aa)
#     
#     '''recall'''
#     aa=metrics.recall_score(test_y, predict,average='weighted')
#     recall.append(aa)
#     
#     
# sum_acc=0
# sum_auc=0
# sum_pre=0
# sum_recall=0
# for i in range(30):
#     sum_acc+=accuracy[i]
#     sum_auc+=auc[i]
#     sum_pre+=pre[i]
#     sum_recall+=recall[i]
#     
# acc1=sum_acc*1.0/30
# auc1=sum_auc*1.0/30
# pre1=sum_pre*1.0/30
# recall1=sum_recall*1.0/30
# print("more 的 acc:", acc1)
# print("more 的 auc:", auc1)
# print("more 的 precision:", pre1)
# print("more 的 recall:", recall1)
# 
# =============================================================================
    #X_train, X_test, y_train, y_test = cross_validation.train_test_split(X_Train,Y_Train, test_size=0.2, random_state=i)
  

 

 

输出结果:

 

posted on 2018-09-06 11:29  蔡军帅  阅读(2034)  评论(0编辑  收藏  举报