KNN算法实战——海伦约会(KDtree优化)

 本文通过海伦约会的例子来测试之前写的KDTree的效果,并且探讨了特征是否进行归一化对整个模型的表现的影响。
最后发现在机器学习中,特征归一化确实对模型能提供非常大的帮助。
1
from KDTree import KDTree # 参考实现KDtree的随笔 2 from sklearn import model_selection,preprocessing 3 import pandas as pd 4 class KNN(object): 5 def __init__(self,K=1,p=2): 6 self.kdtree= KDTree() 7 self.K =K 8 self.p=p 9 def fit(self,x_data,y_data): 10 self.kdtree.build_tree(x_data,y_data) 11 def predict(self,pre_x,label): 12 if 'class' in label: 13 return self.kdtree.predict_classification(pre_x,K=self.K) 14 else : 15 return self.kdtree.predict_regression(pre_x,K=self.K) 16 def test_check(self,test_xx,test_y): 17 # only support classification problem 18 correct =0 19 for i,xi in enumerate(test_xx): 20 pre_y = self.kdtree.predict_classification(Xi=xi,K=self.K) 21 if pre_y == test_y[i]: 22 correct+=1 23 return correct/len(test_y) 24 25 26 file_path = "datingTestSet.txt" 27 data = pd.read_csv(file_path, sep="\t",header=None) 28 XX = data.iloc[:,:-1].values 29 Y = data.iloc[:,-1].values 30 train_xx , test_xx, train_y,test_y = model_selection.train_test_split(XX,Y,test_size= 0.2,random_state=123,shuffle=True) 31 knn=KNN(K=5,p=2) 32 knn.fit(train_xx,train_y) 33 acc = knn.test_check(test_xx,test_y) 34 print("No Standard Scale Accuracy: ",acc) 35 # 考虑到数据中不同维度之间的数值相差过大,进行特征缩放 36 scaler = preprocessing.StandardScaler() 37 # 计算均值和标准差只能用训练集的数据 38 scaler.fit(train_xx) 39 stand_train_xx = scaler.transform(train_xx) 40 stand_test_xx = scaler.transform(test_xx) 41 new_knn = KNN(K=5,p=2) 42 new_knn.fit(stand_train_xx,train_y) 43 new_acc = new_knn.test_check(stand_test_xx,test_y) 44 print("Standard Scale Accuracy: ",new_acc)

 

posted @ 2020-10-05 15:48  ISGuXing  阅读(642)  评论(0编辑  收藏  举报