CNN for Visual Recognition (assignment1_Q1)
2015-01-13 19:42 Jack King 阅读(1560) 评论(1) 编辑 收藏 举报参考:http://cs231n.github.io/assignment1/
Q1: k-Nearest Neighbor classifier (30 points)
1 import numpy as np 2 from matplotlib.cbook import todate 3 4 class KNearestNeighbor: 5 """ a kNN classifier with L2 distance """ 6 7 def __init__(self): 8 pass 9 10 def train(self, X, y): 11 """ 12 Train the classifier. For k-nearest neighbors this is just 13 memorizing the training data. 14 15 Input: 16 X - A num_train x dimension array where each row is a training point. 17 y - A vector of length num_train, where y[i] is the label for X[i, :] 18 """ 19 self.X_train = X 20 self.y_train = y 21 22 def predict(self, X, k=1, num_loops=0): 23 """ 24 Predict labels for test data using this classifier. 25 26 Input: 27 X - A num_test x dimension array where each row is a test point. 28 k - The number of nearest neighbors that vote for predicted label 29 num_loops - Determines which method to use to compute distances 30 between training points and test points. 31 32 Output: 33 y - A vector of length num_test, where y[i] is the predicted label for the 34 test point X[i, :]. 35 """ 36 if num_loops == 0: 37 dists = self.compute_distances_no_loops(X) 38 elif num_loops == 1: 39 dists = self.compute_distances_one_loop(X) 40 elif num_loops == 2: 41 dists = self.compute_distances_two_loops(X) 42 else: 43 raise ValueError('Invalid value %d for num_loops' % num_loops) 44 45 return self.predict_labels(dists, k=k) 46 47 def compute_distances_two_loops(self, X): 48 """ 49 Compute the distance between each test point in X and each training point 50 in self.X_train using a nested loop over both the training data and the 51 test data. 52 53 Input: 54 X - An num_test x dimension array where each row is a test point. 55 56 Output: 57 dists - A num_test x num_train array where dists[i, j] is the distance 58 between the ith test point and the jth training point. 59 """ 60 num_test = X.shape[0] 61 num_train = self.X_train.shape[0] 62 dists = np.zeros((num_test, num_train)) 63 for i in xrange(num_test): 64 for j in xrange(num_train): 65 ##################################################################### 66 # TODO: # 67 # Compute the l2 distance between the ith test point and the jth # 68 # training point, and store the result in dists[i, j] # 69 ##################################################################### 70 dists[i,j] = np.sqrt(np.sum(np.square(X[i,:] - self.X_train[j,:]))) 71 ##################################################################### 72 # END OF YOUR CODE # 73 ##################################################################### 74 return dists 75 76 def compute_distances_one_loop(self, X): 77 """ 78 Compute the distance between each test point in X and each training point 79 in self.X_train using a single loop over the test data. 80 81 Input / Output: Same as compute_distances_two_loops 82 """ 83 num_test = X.shape[0] 84 num_train = self.X_train.shape[0] 85 dists = np.zeros((num_test, num_train)) 86 for i in xrange(num_test): 87 ####################################################################### 88 # TODO: # 89 # Compute the l2 distance between the ith test point and all training # 90 # points, and store the result in dists[i, :]. # 91 ####################################################################### 92 dists[i, :] = np.sqrt(np.sum(np.square(self.X_train - X[i,:]), axis=1)) 93 ####################################################################### 94 # END OF YOUR CODE # 95 ####################################################################### 96 return dists 97 98 def compute_distances_no_loops(self, X): 99 """ 100 Compute the distance between each test point in X and each training point 101 in self.X_train using no explicit loops. 102 103 Input / Output: Same as compute_distances_two_loops 104 """ 105 num_test = X.shape[0] 106 num_train = self.X_train.shape[0] 107 dists = np.zeros((num_test, num_train)) 108 ######################################################################### 109 # TODO: # 110 # Compute the l2 distance between all test points and all training # 111 # points without using any explicit loops, and store the result in # 112 # dists. # 113 # HINT: Try to formulate the l2 distance using matrix multiplication # 114 # and two broadcast sums. # 115 ######################################################################### 116 tDot = np.multiply(np.dot(X, self.X_train.T), -2) 117 t1 = np.sum(np.square(X), axis=1, keepdims=True) 118 t2 = np.sum(np.square(self.X_train), axis=1) 119 tDot = np.add(t1, tDot) 120 tDot = np.add(tDot, t2) 121 dists = np.sqrt(tDot) 122 ######################################################################### 123 # END OF YOUR CODE # 124 ######################################################################### 125 return dists 126 127 def predict_labels(self, dists, k=1): 128 """ 129 Given a matrix of distances between test points and training points, 130 predict a label for each test point. 131 132 Input: 133 dists - A num_test x num_train array where dists[i, j] gives the distance 134 between the ith test point and the jth training point. 135 136 Output: 137 y - A vector of length num_test where y[i] is the predicted label for the 138 ith test point. 139 """ 140 num_test = dists.shape[0] 141 y_pred = np.zeros(num_test) 142 for i in xrange(num_test): 143 # A list of length k storing the labels of the k nearest neighbors to 144 # the ith test point. 145 closest_y = [] 146 ######################################################################### 147 # TODO: # 148 # Use the distance matrix to find the k nearest neighbors of the ith # 149 # training point, and use self.y_train to find the labels of these # 150 # neighbors. Store these labels in closest_y. # 151 # Hint: Look up the function numpy.argsort. # 152 ######################################################################### 153 # pass 154 closest_y = self.y_train[np.argsort(dists[i, :])[:k]] 155 ######################################################################### 156 # TODO: # 157 # Now that you have found the labels of the k nearest neighbors, you # 158 # need to find the most common label in the list closest_y of labels. # 159 # Store this label in y_pred[i]. Break ties by choosing the smaller # 160 # label. # 161 ######################################################################### 162 163 y_pred[i] = np.argmax(np.bincount(closest_y)) 164 ######################################################################### 165 # END OF YOUR CODE # 166 ######################################################################### 167 168 return y_pred
输出:
Two loop version took 55.817642 seconds
One loop version took 49.692089 seconds
No loop version took 1.267753 seconds