博客园  :: 首页  :: 新随笔  :: 联系 :: 订阅 订阅  :: 管理

sklearn之交叉检验1

Posted on 2018-04-09 17:25  风吹白杨的安妮  阅读(218)  评论(0编辑  收藏  举报
#下面是只是普通的,仅一次
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

iris = load_iris()
X = iris.data
y = iris.target

X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=4)
knn = KNeighborsClassifier(n_neighbors=5)#附近点的5个值
knn.fit(X_train,y_train)
print(knn.score(X_test,y_test))

#下面开始交叉检验
#调用形式为:sklearn.cross_validation.cross_val_score
from sklearn.cross_validation import cross_val_score
knn = KNeighborsClassifier(n_neighbors=5)
#socring是判断准确度的方法
scores = sklearn.cross_validation.cross_val_score(knn,X,y,cv=5,scoring='accuracy')
#如果是回归的话,scoring='mean_squared_error',判断误差,要在前面加负号,
#即loss = -sklearn.cross_validation.cross_val_score(knn,X,y,cv=5,scoring='mean_squared_error')
print(scores)
#有两组到了100的境界,cv指的是分成几个set
#我们可以把它们平均一下
print(scores.mean())

#现在我想看一下,n_neighbors为多少对结果有什么影响
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
iris = load_iris()
X = iris.data
y = iris.target
k_range = (1,31)
k_scores = []
for k in k_range:
    knn = KNeighborsClassifier(n_neighbors=k)
    scores = sklearn.cross_validation.cross_val_score(knn,X,y,cv=10,scoring='accuracy')
    k_scores.append(scores.mean())

plt.plot(k_range,k_scores)
plt.xlabel('Value of K for KNN')
plt.ylabel('Cross-Validated Accuracy')
plt.show()

 

 感觉上面这个线怪怪的啊,我得想一想。。。。。