机器学习基石第二讲 Learning to Answer Yes/No
一.Perceptron Hypothesis Set
二.Perceptron Learning Algorithm (PLA)
PLA算法流程:
三.Guarantee of PLA
下面是对PLA的证明:
四.Non-Separable Data
在学习完percetron算法后,我自己编写了一个识别0,1的程序,训练集和测试集使用的是Yann LeCun大神很有名的MNIST数字images,经过训练之后,测试集的正确率有70%左右,代码如下:
1 import cv2 2 import os 3 import numpy 4 import glob 5 6 class Percetron(object): 7 def __init__(self, image_x_dimension, image_y_dimension, positive_dimension, negtive_dimension): 8 self.image_x_dimension = image_x_dimension 9 self.image_y_dimension = image_y_dimension 10 self.positive_dimension = positive_dimension 11 self.negtive_dimension = negtive_dimension 12 self.dimension = self.image_x_dimension * self.image_y_dimension + 1 13 self.count = self.positive_dimension + self.negtive_dimension 14 15 def image2vector(self, filename): 16 returnVect = numpy.zeros((1,self.dimension)) 17 file_matrix = cv2.imread(filename, cv2.IMREAD_GRAYSCALE) 18 returnVect[0, 0] = 1 19 for i in range(self.image_x_dimension): 20 for j in range(self.image_y_dimension): 21 returnVect[0, self.image_y_dimension * i + j + 1] = float(file_matrix[i, j]) 22 return returnVect 23 24 def image2Matrix(self): 25 X_train = numpy.zeros((self.count, self.dimension)) 26 i = 0 27 for bmp in glob.glob(".\\*.bmp"): 28 X_train[i, :] = self.image2vector(bmp) 29 i += 1 30 return X_train 31 32 def yMatrix(self): 33 y_train = numpy.zeros((self.count, 1)) 34 for i in range(self.negtive_dimension): 35 y_train[i] = -1 36 for i in range(self.positive_dimension): 37 y_train[i + self.negtive_dimension] = 1 38 return y_train 39 40 def opPercetron(self): 41 w = numpy.zeros((self.dimension, 1)) 42 X_train = self.image2Matrix() 43 y_train = self.yMatrix() 44 while True: 45 flag = 0 46 num = 0 47 for i in range(self.count): 48 if numpy.dot(X_train[i, :], w)[0] * y_train[i] <= 0: 49 w += y_train[i, 0] * X_train[i,:].reshape(self.dimension, 1) 50 flag = 1 51 if flag == 0: 52 break 53 return w 54 55 def accuracy(self, w): 56 num = 0 57 X_test = self.image2Matrix() 58 y_test = self.yMatrix() 59 for i in range(self.count): 60 if numpy.dot(X_test[i, :], w)[0] * y_test[i] > 0: 61 num += 1 62 print float(num) / self.count 63 64 65 my_train_Percetron = Percetron(28, 28, 800, 800) 66 os.chdir("C:\\Users\\samsung\\Desktop\\Perceptron_train") 67 w = my_train_Percetron.opPercetron() 68 my_train_Percetron.accuracy(w) 69 70 my_test_Percetron = Percetron(28, 28, 180, 335) 71 os.chdir("C:\\Users\\samsung\\Desktop\\Perceptron_test") 72 my_test_Percetron.accuracy(w)