银行分控模型的建立(神经网络算法和决策树模型)
神经网络算法和决策树模型
模型一:神经网络
# -*- coding: utf-8 -*-
"""
Created on Tue Mar 29 17:34:22 2022
@author: LZL
"""
'''神经网络测试'''
import pandas as pd
from tensorflow.keras import Sequential
#from keras.models import Sequential
from tensorflow.keras import layers
from keras.layers.core import Dense, Activation
import numpy as np
# 参数初始化
inputfile = 'data/bankloan.xls'
data = pd.read_excel(inputfile)
x_test = data.iloc[:,:8].values
y_test = data.iloc[:,8].values
from keras.metrics import BinaryAccuracy
model = Sequential() # 建立模型
model.add(Dense(input_dim = 8, units = 800))
model.add(Activation('relu'))
model.add(Dense(input_dim = 800, units = 1))
model.add(Activation('sigmoid'))
model.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics=[BinaryAccuracy()])
model.fit(x_test, y_test, epochs = 100, batch_size = 128)
predict_x=model.predict(x_test)
classes_x=np.argmax(predict_x,axis=1)
yp = classes_x.reshape(len(y_test))
def cm_plot(y, yp):
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y, yp)
import matplotlib.pyplot as plt
plt.matshow(cm, cmap=plt.cm.Greens)
plt.colorbar()
for x in range(len(cm)):
for y in range(len(cm)):
plt.annotate(cm[x,y], xy=(x, y), horizontalalignment='center', verticalalignment='center')
plt.ylabel('True label')
plt.xlabel('Predicted label')
return plt
cm_plot(y_test,yp).show()# 显示混淆矩阵可视化结果
loss,binary_accuracy = model.evaluate(x_test,y_test,batch_size=128) # 模型评估
print('神经网络模型精度:{}'.format(binary_accuracy))
1.运行结果:
2.混淆矩阵:
模型二:决策树
# -*- coding: utf-8 -*- """ Created on Wed Mar 30 08:13:33 2022 @author: LZL """ import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeClassifier as DTC filePath = 'data/bankloan.xls' data = pd.read_excel(filePath) x = data.iloc[:,:8] y = data.iloc[:,8] x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=100) dtc_clf = DTC(criterion='entropy')#决策树 dtc_clf.fit(x_train,y_train) from sklearn.metrics import accuracy_score from sklearn.metrics import roc_auc_score dtc_yp = dtc_clf.predict(x) dtc_score = accuracy_score(y, dtc_yp)print('决策树模型精度:{}'.format(dtc_score))
import matplotlib.pyplot as plt from sklearn.metrics import confusion_matrix import seaborn as sns #绘制混淆矩阵 dtc_cm = confusion_matrix(y, dtc_yp) heatmap = sns.heatmap(dtc_cm, annot=True, fmt='d') heatmap.yaxis.set_ticklabels(heatmap.yaxis.get_ticklabels(), rotation=0, ha='right') heatmap.xaxis.set_ticklabels(heatmap.xaxis.get_ticklabels(), rotation=45, ha='right') plt.ylabel("true label") plt.xlabel("predict label")
import pandas as pd # 参数初始化 filename = 'data/bankloan.xls' data = pd.read_excel(filename) # 导入数据 # 数据是类别标签,要将它转换为数据 # 用1来表示“好”“是”“高”这三个属性,用-1来表示“坏”“否”“低” x = data.iloc[:,:8].astype(int) y = data.iloc[:,8].astype(int) from sklearn.tree import DecisionTreeClassifier as DTC dtc = DTC(criterion='entropy') # 建立决策树模型,基于信息熵 dtc.fit(x, y) # 训练模型 # 导入相关函数,可视化决策树。 # 导出的结果是一个dot文件,需要安装Graphviz才能将它转换为pdf或png等格式。 from sklearn.tree import export_graphviz x = pd.DataFrame(x) """ string1 = ''' edge [fontname="NSimSun"]; node [ fontname="NSimSun" size="15,15"]; { ''' string2 = '}' """ with open("data/tree.dot", 'w') as f: export_graphviz(dtc, feature_names = x.columns, out_file = f) f.close() from IPython.display import Image from sklearn import tree import pydotplus dot_data = tree.export_graphviz(dtc, out_file=None, #regr_1 是对应分类器 feature_names=data.columns[:8], #对应特征的名字 class_names=data.columns[8], #对应类别的名字 filled=True, rounded=True, special_characters=True) graph = pydotplus.graph_from_dot_data(dot_data) graph.write_png('data/train.png') #保存图像 Image(graph.create_png())
1.运行结果:
2. 混淆矩阵:
3.决策树分类结果图:
对比结果:
神经网络模型精度为:0.8114285469055176 < 决策树模型精度为:0.9514285714285714,因此银行分控模型的建立更建议用决策树模型