import numpy as np
import urllib.request
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
url = "http://archive.ics.uci.edu/ml/machine-learning-databases/pima-indians-diabetes/pima-indians-diabetes.data"
# 下载网络数据
raw_data = urllib.request.urlopen(url)
# 处理网络下载的数据为矩阵,方便后面的数据操作
dataset = np.loadtxt(raw_data, delimiter=",")
# 提取特征矩阵数据,dataset中的所有行,所有0-7列的数据都保存在X中
X = dataset[:,0:7]
# 提取目标变量数据,dataset中的所有行,所有8列的数据都保存在y中
y = dataset[:,8]
# 随机选择25%作为测试集,剩余作为训练集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05, random_state=0)
# k-最近邻
def KNN(X,y,XX):#X,y 分别为训练数据集的数据和标签,XX为测试数据
model = KNeighborsClassifier(n_neighbors=10)#默认为5
model.fit(X,y)
print(y_test)
predicted = model.predict(XX)
return predicted
a = KNN(X_train,y_train,X_test)
print(a)
# 测试训练模型的正确率
count = 0
for i in range(len(a)):
if a[1] == y_test[i]:
count += 1
counts = count/len(a)
print("正确率可以达到:",counts)