这本笔记本的目的是让你可视化各种分类器的决策边界。

本笔记本中使用的数据基于存储在mushrooms.csv中的UCI蘑菇数据集。

为了更好地确定决策边界,我们将对数据执行主成分分析(PCA)以将维度降至2维。 降维将在本课程后面的模块中介绍。

玩弄不同的模型和参数,看看它们如何影响分类器的决策边界和准确性!

 1 %matplotlib notebook
 2 
 3 import pandas as pd
 4 import numpy as np
 5 import matplotlib.pyplot as plt
 6 from sklearn.decomposition import PCA
 7 from sklearn.model_selection import train_test_split
 8 
 9 df = pd.read_csv('mushrooms.csv')
10 df2 = pd.get_dummies(df)
11 
12 df3 = df2.sample(frac=0.08)
13 
14 X = df3.iloc[:,2:]
15 y = df3.iloc[:,1]
16 
17 
18 pca = PCA(n_components=2).fit_transform(X)
19 
20 X_train, X_test, y_train, y_test = train_test_split(pca, y, random_state=0)
21 
22 
23 plt.figure(dpi=120)
24 plt.scatter(pca[y.values==0,0], pca[y.values==0,1], alpha=0.5, label='Edible', s=2)
25 plt.scatter(pca[y.values==1,0], pca[y.values==1,1], alpha=0.5, label='Poisonous', s=2)
26 plt.legend()
27 plt.title('Mushroom Data Set\nFirst Two Principal Components')
28 plt.xlabel('PC1')
29 plt.ylabel('PC2')
30 plt.gca().set_aspect('equal')

 1 def plot_mushroom_boundary(X, y, fitted_model):
 2 
 3     plt.figure(figsize=(9.8,5), dpi=100)
 4     
 5     for i, plot_type in enumerate(['Decision Boundary', 'Decision Probabilities']):
 6         plt.subplot(1,2,i+1)
 7 
 8         mesh_step_size = 0.01  # step size in the mesh
 9         x_min, x_max = X[:, 0].min() - .1, X[:, 0].max() + .1
10         y_min, y_max = X[:, 1].min() - .1, X[:, 1].max() + .1
11         xx, yy = np.meshgrid(np.arange(x_min, x_max, mesh_step_size), np.arange(y_min, y_max, mesh_step_size))
12         if i == 0:
13             Z = fitted_model.predict(np.c_[xx.ravel(), yy.ravel()])
14         else:
15             try:
16                 Z = fitted_model.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:,1]
17             except:
18                 plt.text(0.4, 0.5, 'Probabilities Unavailable', horizontalalignment='center',
19                      verticalalignment='center', transform = plt.gca().transAxes, fontsize=12)
20                 plt.axis('off')
21                 break
22         Z = Z.reshape(xx.shape)
23         plt.scatter(X[y.values==0,0], X[y.values==0,1], alpha=0.4, label='Edible', s=5)
24         plt.scatter(X[y.values==1,0], X[y.values==1,1], alpha=0.4, label='Posionous', s=5)
25         plt.imshow(Z, interpolation='nearest', cmap='RdYlBu_r', alpha=0.15, 
26                    extent=(x_min, x_max, y_min, y_max), origin='lower')
27         plt.title(plot_type + '\n' + 
28                   str(fitted_model).split('(')[0]+ ' Test Accuracy: ' + str(np.round(fitted_model.score(X, y), 5)))
29         plt.gca().set_aspect('equal');
30         
31     plt.tight_layout()
32     plt.subplots_adjust(top=0.9, bottom=0.08, wspace=0.02)
1 from sklearn.linear_model import LogisticRegression
2 
3 model = LogisticRegression()
4 model.fit(X_train,y_train)
5 
6 plot_mushroom_boundary(X_test, y_test, model)

1 from sklearn.neighbors import KNeighborsClassifier
2 
3 model = KNeighborsClassifier(n_neighbors=20)
4 model.fit(X_train,y_train)
5 
6 plot_mushroom_boundary(X_test, y_test, model)

1 from sklearn.tree import DecisionTreeClassifier
2 
3 model = DecisionTreeClassifier(max_depth=3)
4 model.fit(X_train,y_train)
5 
6 plot_mushroom_boundary(X_test, y_test, model)

1 from sklearn.tree import DecisionTreeClassifier
2 
3 model = DecisionTreeClassifier()
4 model.fit(X_train,y_train)
5 
6 plot_mushroom_boundary(X_test, y_test, model)

1 from sklearn.ensemble import RandomForestClassifier
2 
3 model = RandomForestClassifier()
4 model.fit(X_train,y_train)
5 
6 plot_mushroom_boundary(X_test, y_test, model)

1 from sklearn.svm import SVC
2 
3 model = SVC(kernel='linear')
4 model.fit(X_train,y_train)
5 
6 plot_mushroom_boundary(X_test, y_test, model)

1 from sklearn.svm import SVC
2 
3 model = SVC(kernel='rbf', C=1)
4 model.fit(X_train,y_train)
5 
6 plot_mushroom_boundary(X_test, y_test, model)

1 from sklearn.svm import SVC
2 
3 model = SVC(kernel='rbf', C=10)
4 model.fit(X_train,y_train)
5 
6 plot_mushroom_boundary(X_test, y_test, model)

1 from sklearn.naive_bayes import GaussianNB
2 
3 model = GaussianNB()
4 model.fit(X_train,y_train)
5 
6 plot_mushroom_boundary(X_test, y_test, model)

1 from sklearn.neural_network import MLPClassifier
2 
3 model = MLPClassifier()
4 model.fit(X_train,y_train)
5 
6 plot_mushroom_boundary(X_test, y_test, model)

 

posted on 2018-03-15 09:41  郑哲  阅读(2064)  评论(0编辑  收藏  举报