import numpy as np import matplotlib.pyplot as plt from sklearn import neighbors, datasets from sklearn.model_selection import train_test_split def create_regression_data(n): ''' 创建回归模型使用的数据集 ''' X =5 * np.random.rand(n, 1) y = np.sin(X).ravel() # 每隔 5 个样本就在样本的值上添加噪音 y[::5] += 1 * (0.5 - np.random.rand(int(n/5))) # 进行简单拆分,测试集大小占 1/4 return train_test_split(X, y,test_size=0.25,random_state=0) #KNN回归KNeighborsRegressor模型 def test_KNeighborsRegressor(*data): X_train,X_test,y_train,y_test=data regr=neighbors.KNeighborsRegressor() regr.fit(X_train,y_train) print("Training Score:%f"%regr.score(X_train,y_train)) print("Testing Score:%f"%regr.score(X_test,y_test)) #获取回归模型的数据集 X_train,X_test,y_train,y_test=create_regression_data(1000) # 调用 test_KNeighborsRegressor test_KNeighborsRegressor(X_train,X_test,y_train,y_test)
def test_KNeighborsRegressor_k_w(*data): ''' 测试 KNeighborsRegressor 中 n_neighbors 和 weights 参数的影响 ''' X_train,X_test,y_train,y_test=data Ks=np.linspace(1,y_train.size,num=100,endpoint=False,dtype='int') weights=['uniform','distance'] fig=plt.figure() ax=fig.add_subplot(1,1,1) ### 绘制不同 weights 下, 预测得分随 n_neighbors 的曲线 for weight in weights: training_scores=[] testing_scores=[] for K in Ks: regr=neighbors.KNeighborsRegressor(weights=weight,n_neighbors=K) regr.fit(X_train,y_train) testing_scores.append(regr.score(X_test,y_test)) training_scores.append(regr.score(X_train,y_train)) ax.plot(Ks,testing_scores,label="testing score:weight=%s"%weight) ax.plot(Ks,training_scores,label="training score:weight=%s"%weight) ax.legend(loc='best') ax.set_xlabel("K") ax.set_ylabel("score") ax.set_ylim(0,1.05) ax.set_title("KNeighborsRegressor") plt.show() # 调用 test_KNeighborsRegressor_k_w test_KNeighborsRegressor_k_w(X_train,X_test,y_train,y_test)
def test_KNeighborsRegressor_k_p(*data): ''' 测试 KNeighborsRegressor 中 n_neighbors 和 p 参数的影响 ''' X_train,X_test,y_train,y_test=data Ks=np.linspace(1,y_train.size,endpoint=False,dtype='int') Ps=[1,2,10] fig=plt.figure() ax=fig.add_subplot(1,1,1) ### 绘制不同 p 下, 预测得分随 n_neighbors 的曲线 for P in Ps: training_scores=[] testing_scores=[] for K in Ks: regr=neighbors.KNeighborsRegressor(p=P,n_neighbors=K) regr.fit(X_train,y_train) testing_scores.append(regr.score(X_test,y_test)) training_scores.append(regr.score(X_train,y_train)) ax.plot(Ks,testing_scores,label="testing score:p=%d"%P) ax.plot(Ks,training_scores,label="training score:p=%d"%P) ax.legend(loc='best') ax.set_xlabel("K") ax.set_ylabel("score") ax.set_ylim(0,1.05) ax.set_title("KNeighborsRegressor") plt.show() # 调用 test_KNeighborsRegressor_k_p test_KNeighborsRegressor_k_p(X_train,X_test,y_train,y_test)