基于theano的深度卷积神经网络

使用了两个卷积层、一个全连接层和一个softmax分类器。

在测试数据集上正确率可以达到99.22%。

代码参考了neural-networks-and-deep-learning

  1 #coding:utf8
  2 import cPickle
  3 import numpy as np
  4 import theano
  5 import theano.tensor as T
  6 from theano.tensor.nnet import conv
  7 from theano.tensor.nnet import softmax
  8 from theano.tensor import shared_randomstreams
  9 from theano.tensor.signal import downsample
 10 def ReLU(z): return T.maximum(0.0, z)
 11 from theano.tensor.nnet import sigmoid
 12 
 13 def load_data_shared():
 14     f = open('mnist.pkl', 'rb')
 15     training_data, validation_data, test_data = cPickle.load(f)
 16     f.close()
 17     def shared(data):
 18         shared_x = theano.shared(
 19             np.asarray(data[0], dtype=theano.config.floatX), borrow=True)
 20         shared_y = theano.shared(
 21             np.asarray(data[1], dtype=theano.config.floatX), borrow=True)
 22         return shared_x, T.cast(shared_y, "int32")
 23     return [shared(training_data), shared(validation_data), shared(test_data)]
 24 
 25 class Network(object):
 26     def __init__(self, layers, mini_batch_size):
 27         self.layers = layers
 28         self.mini_batch_size = mini_batch_size
 29         self.params = [param for layer in self.layers for param in layer.params]  # w,b
 30         self.x = T.matrix("x")
 31         self.y = T.ivector("y")  # 1 dimensional
 32         init_layer = self.layers[0]
 33         init_layer.set_inpt(self.x, self.x, self.mini_batch_size)
 34         for j in xrange(1, len(self.layers)):
 35             prev_layer, layer  = self.layers[j-1], self.layers[j]  # layer[j-1]->j
 36             layer.set_inpt(
 37                 prev_layer.output, prev_layer.output_dropout, self.mini_batch_size)
 38         self.output = self.layers[-1].output
 39         self.output_dropout = self.layers[-1].output_dropout
 40 
 41     def SGD(self, training_data, epochs, mini_batch_size, eta,
 42             validation_data, test_data, lmbda=0.0):
 43         training_x, training_y = training_data
 44         validation_x, validation_y = validation_data
 45         test_x, test_y = test_data
 46         num_training_batches = size(training_data)/mini_batch_size
 47         num_validation_batches = size(validation_data)/mini_batch_size
 48         num_test_batches = size(test_data)/mini_batch_size
 49         l2_norm_squared = sum([(layer.w**2).sum() for layer in self.layers])
 50         cost = self.layers[-1].cost(self)+\
 51                0.5*lmbda*l2_norm_squared/num_training_batches
 52         grads = T.grad(cost, self.params)  # 根据cost计算梯度,无需prime函数
 53         updates = [(param, param-eta*grad)
 54                    for param, grad in zip(self.params, grads)]
 55 
 56         i = T.lscalar() # mini-batch index
 57         train_mb = theano.function(
 58             [i], cost, updates=updates,
 59             givens={
 60                 self.x:
 61                 training_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],
 62                 self.y:
 63                 training_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
 64             })
 65         validate_mb_accuracy = theano.function(
 66             [i], self.layers[-1].accuracy(self.y),
 67             givens={
 68                 self.x:
 69                 validation_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],
 70                 self.y:
 71                 validation_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
 72             })
 73         test_mb_accuracy = theano.function(
 74             [i], self.layers[-1].accuracy(self.y),
 75             givens={
 76                 self.x:
 77                 test_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],
 78                 self.y:
 79                 test_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
 80             })
 81         self.test_mb_predictions = theano.function(
 82             [i], self.layers[-1].y_out,
 83             givens={
 84                 self.x:
 85                 test_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
 86             })
 87 
 88         best_validation_accuracy = 0.0
 89         for epoch in xrange(epochs):
 90             for minibatch_index in xrange(num_training_batches):
 91                 iteration = num_training_batches*epoch+minibatch_index
 92                 if iteration % 1000 == 0:
 93                     print("Training mini-batch number {0}".format(iteration))
 94                 cost_ij = train_mb(minibatch_index)
 95                 if (iteration+1) % num_training_batches == 0:
 96                     validation_accuracy = np.mean(
 97                         [validate_mb_accuracy(j) for j in xrange(num_validation_batches)])
 98                     print("Epoch {0}: validation accuracy {1:.2%},cost={2}".format(
 99                         epoch, validation_accuracy,cost_ij))
100                     if validation_accuracy >= best_validation_accuracy:
101                         print("This is the best validation accuracy to date.")
102                         best_validation_accuracy = validation_accuracy
103                         best_iteration = iteration
104                         if test_data:
105                             test_accuracy = np.mean(
106                                 [test_mb_accuracy(j) for j in xrange(num_test_batches)])
107                             print('The corresponding test accuracy is {0:.2%}'.format(
108                                 test_accuracy))
109         print("Finished training network.")
110         print("Best validation accuracy of {0:.2%} obtained at iteration {1}".format(
111             best_validation_accuracy, best_iteration))
112         print("Corresponding test accuracy of {0:.2%}".format(test_accuracy))
113 
114 
115 class ConvPoolLayer(object):  # layer init
116     def __init__(self, filter_shape, image_shape, poolsize=(2, 2),
117                  activation_fn=ReLU):
118         self.filter_shape = filter_shape  # 20, 1, 5, 5, 输入个数1, 卷积核5*5,20个
119         self.image_shape = image_shape  # 10, 1, 28, 28, 1与上面一致
120         self.poolsize = poolsize  # 2,2
121         self.activation_fn=activation_fn  # theano.tensor.nnet.sigmoid
122         n_out = (filter_shape[0]*np.prod(filter_shape[2:])/np.prod(poolsize))  # 20*5*5/2/2=125
123         self.w = theano.shared(  # 20, 1, 5, 5
124             np.asarray(
125                 np.random.normal(loc=0, scale=np.sqrt(1.0/n_out), size=filter_shape),
126                 dtype=theano.config.floatX),
127             borrow=True)
128         self.b = theano.shared(  # 20
129             np.asarray(
130                 np.random.normal(loc=0, scale=1.0, size=(filter_shape[0],)),
131                 dtype=theano.config.floatX),
132             borrow=True)
133         self.params = [self.w, self.b]
134 
135     def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
136         self.inpt = inpt.reshape(self.image_shape)    # 10, 1, 28, 28
137         conv_out = conv.conv2d(  # 28-5+1=24   1, 20, 24, 24
138             input=self.inpt, filters=self.w, filter_shape=self.filter_shape,
139             image_shape=self.image_shape)
140         pooled_out = downsample.max_pool_2d(  # 24/2=12   1, 20, 12, 12
141             input=conv_out, ds=self.poolsize, ignore_border=True)
142         self.output = self.activation_fn(  # 1, 20, 12, 12 + 1, 20, 1, 1= 1, 20, 12, 12
143             pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))  # 1, 20, 1, 1
144         self.output_dropout = self.output  # no dropout in the convolutional layers
145 
146 class FullyConnectedLayer(object):
147     def __init__(self, n_in, n_out, activation_fn=sigmoid, p_dropout=0.0):
148         self.n_in = n_in
149         self.n_out = n_out
150         self.activation_fn = activation_fn
151         self.p_dropout = p_dropout
152         self.w = theano.shared(
153             np.asarray(
154                 np.random.normal(
155                     loc=0.0, scale=np.sqrt(1.0/n_out), size=(n_in, n_out)),
156                 dtype=theano.config.floatX),
157             name='w', borrow=True)
158         self.b = theano.shared(
159             np.asarray(np.random.normal(loc=0.0, scale=1.0, size=(n_out,)),
160                        dtype=theano.config.floatX),
161             name='b', borrow=True)
162         self.params = [self.w, self.b]
163 
164     def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
165         self.inpt = inpt.reshape((mini_batch_size, self.n_in))
166         self.output = self.activation_fn(
167             (1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b)
168         self.y_out = T.argmax(self.output, axis=1)
169         self.inpt_dropout = dropout_layer(
170             inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout)
171         self.output_dropout = self.activation_fn(
172             T.dot(self.inpt_dropout, self.w) + self.b)
173 
174     def accuracy(self, y):
175         return T.mean(T.eq(y, self.y_out))
176 
177 class SoftmaxLayer(object):
178 
179     def __init__(self, n_in, n_out, p_dropout=0.0):
180         self.n_in = n_in
181         self.n_out = n_out
182         self.p_dropout = p_dropout
183         self.w = theano.shared(
184             np.zeros((n_in, n_out), dtype=theano.config.floatX),
185             name='w', borrow=True)
186         self.b = theano.shared(
187             np.zeros((n_out,), dtype=theano.config.floatX),
188             name='b', borrow=True)
189         self.params = [self.w, self.b]
190 
191     def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
192         self.inpt = inpt.reshape((mini_batch_size, self.n_in))
193         self.output = softmax((1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b)  # theano.tensor.nnet.softmax
194         self.y_out = T.argmax(self.output, axis=1)
195         self.inpt_dropout = dropout_layer(
196             inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout)
197         self.output_dropout = softmax(T.dot(self.inpt_dropout, self.w) + self.b)
198 
199     def cost(self, net):
200         return -T.mean(T.log(self.output_dropout)[T.arange(net.y.shape[0]), net.y])
201 
202     def accuracy(self, y):
203         return T.mean(T.eq(y, self.y_out))
204 
205 
206 def size(data):  # for shared data
207     return len(data[0].get_value())
208 
209 
210 def dropout_layer(layer, p_dropout):  # 随机无视p_dropout的隐含层节点
211     srng = shared_randomstreams.RandomStreams(
212         np.random.RandomState(0).randint(999999))
213     mask = srng.binomial(n=1, p=1-p_dropout, size=layer.shape)
214     return layer*T.cast(mask, theano.config.floatX)
215 
216 
217 if __name__ =='__main__':
218     training_data, validation_data, test_data = load_data_shared()
219     mini_batch_size = 10
220     net = Network([
221         ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28),
222                       filter_shape=(20, 1, 5, 5),
223                       poolsize=(2, 2)),
224         ConvPoolLayer(image_shape=(mini_batch_size, 20, 12, 12),
225                       filter_shape=(40, 20, 5, 5),
226                       poolsize=(2, 2)),
227         FullyConnectedLayer(n_in=40*4*4, n_out=100),
228         SoftmaxLayer(n_in=100, n_out=10)], mini_batch_size)
229     net.SGD(training_data, 30, mini_batch_size, 0.1,
230             validation_data, test_data)
231 
232 # Sigmoid ConvPoolLayer
233 # Epoch 29: validation accuracy 98.96%,cost=9.70275432337e-05
234 # This is the best validation accuracy to date.
235 # The corresponding test accuracy is 98.86%
236 
237 # ReLU ConvPoolLayer
238 # Epoch 29: validation accuracy 99.06%,cost=4.11269593315e-06
239 # This is the best validation accuracy to date.
240 # The corresponding test accuracy is 99.22%

 

posted on 2016-12-01 21:42  1357  阅读(532)  评论(0编辑  收藏  举报

导航