10,knn手写数字识别
# 导包 import numpy as np import matplotlib.pyplot as plt from sklearn.neighbors import KNeighborsClassifier # 获取数据 feature = [] target = [] for i in range(10): for j in range(1,501): img_arr = plt.imread('F:/data/%d/%d_%d.bmp'%(i,i,j)) feature.append(img_arr) target.append(i) feature = np.array(feature) target = np.array(target) print(feature.shape,target.shape) # 测试结果 index = np.random.randint(0,5000,size=1)[0] print('该索引对应的目标值',target[index]) digit = feature[index] plt.figure(figsize=(2,2)) plt.imshow(digit,cmap='gray') # 打乱数据顺序 np.random.seed(3) #按照同一标准打乱 np.random.shuffle(feature) np.random.seed(3) np.random.shuffle(target) # 分别获取训练,测试数据 x_train = feature[:4950] y_train = target[:4950] x_test = feature[-50:] y_test = target[-50:] x_train.shape #(4950, 28, 28) # 特征数据必须保证是二维 x_train = x_train.reshape(4950,784) #像素点一共784个,倒着数为-1 x_test = x_test.reshape(50,-1) # 建立knn对象 knn = KNeighborsClassifier(n_neighbors=15) knn.fit(x_train,y_train) knn.score(x_train,y_train) # 比对结果 y_ = knn.predict(x_test) print('真实:',y_test) print('预测:',y_) #模型保存 from sklearn.externals import joblib joblib.dump(knn,'./knn.m') knn = joblib.load('./knn.m') # 让模型进行外部模型的识别操作 img_arr = plt.imread('F:/数字.jpg') plt.imshow(img_arr) five_img = img_arr[95:150,85:130] plt.imshow(five_img) five_img.shape # 对目标照片进行降维 five_img = five_img.mean(axis=2) # 将照片的像素压缩成和样本同样的像素,即28*28 import scipy.ndimage as ndimage five_img.shape five = ndimage.zoom(five_img,zoom=(28/55,28/45)) five.shape knn.predict(five.reshape(1,784)) 最终获得结果