参考黄文坚《TensorFlow实战》一书,完成AlexNet的整体实现并展望其训练和预测过程。
import tensorflow as tf batch_size = 32 num_batches = 100 # 显示网络每一层结构,展示每一个卷积层或池化层输出tensor的尺寸,接受一个tensor作为输入 def print_activations(t): print(t.op.name, ' ', t.get_shape().as_list()) # 接受images作为输入,返回最后一层pool5(第五个池化层) # 及parameters(AlexNet中所有需要训练的模型参数)''' def inference(images): parameters = [] with tf.name_scope('conv1') as scope: # 用截断的正态分布函数(标准差为0.1)初始化卷积核的参数kernel。卷积核尺寸为11*11,颜色通道为3,卷积核数量为64 kernel = tf.Variable(tf.truncated_normal([11, 11, 3, 64], dtype=tf.float32, stddev=1e-1), name='weights') # 使用tf.nn.conv2d对输入images完成卷积操作 conv = tf.nn.conv2d(images, kernel, [1, 4, 4, 1], padding='SAME') biases = tf.Variable(tf.constant(0.0, shape=[64], dtype=tf.float32), trainable=True, name='biases') bias = tf.nn.bias_add(conv, biases) conv1 = tf.nn.relu(bias, name=scope) print_activations(conv1) parameters += [kernel, biases] # LRN处理和最大池化处理 lrn1 = tf.nn.lrn(conv1, 4, bias=1.0, alpha=0.001/9, beta=0.75, name='lrn1') pool1 = tf.nn.max_pool(lrn1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='VALID', name='pool1') print_activations(pool1) # 打印输出结果pool1的结构 # 设计第二个卷积层 卷积核尺寸5*5 输入通道数64 卷积核数量192 with tf.name_scope('conv2') as scope: kernel = tf.Variable(tf.truncated_normal([5, 5, 64, 192], dthpe=tf.float32, stddev=1e-1), name='weights') # 卷积步长全部设为1,即扫描全图像素 conv = tf.nn.conv2d(pool1, kernel, [1, 1, 1, 1], padding='SAME') biases = tf.Variable(tf.constant(0.0, shape=[192], dtype=tf.float32), trainable=True, name='biases') bias = tf.nn.bias_add(conv, biases) conv2 = tf.nn.relu(bias, name=scope) parameters += [kernel, biases] print_activations(conv2) lrn2 = tf.nn.lrn(conv2, 4, bias=1.0, alpha=0.001/9, beta=0.75, name='lrn2') pool2 = tf.nn.max_pool(lrn2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='VALID', name='pool2') print_activations(pool2) # 创建第三个卷积层 卷积核尺寸3*3 输入通道数192 卷积核数量384 步长全为1 with tf.name_scope('conv3') as scope: kernel = tf.Variable(tf.truncated_normal([3, 3, 192, 384], dthpe=tf.float32, stddev=1e-1), name='weights') # 卷积步长全部设为1,即扫描全图像素 conv = tf.nn.conv2d(pool2, kernel, [1, 1, 1, 1], padding='SAME') biases = tf.Variable(tf.constant(0.0, shape=[384], dtype=tf.float32), trainable=True, name='biases') bias = tf.nn.bias_add(conv, biases) conv3 = tf.nn.relu(bias, name=scope) parameters += [kernel, biases] print_activations(conv3) # 创建第四个卷积层 卷积核尺寸3*3 输入通道数384 卷积核数量降为256 with tf.name_scope('conv4') as scope: kernel = tf.Variable(tf.truncated_normal([3, 3, 384, 256], dthpe=tf.float32, stddev=1e-1), name='weights') # 卷积步长全部设为1,即扫描全图像素 conv = tf.nn.conv2d(conv3, kernel, [1, 1, 1, 1], padding='SAME') biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32), trainable=True, name='biases') bias = tf.nn.bias_add(conv, biases) conv4 = tf.nn.relu(bias, name=scope) parameters += [kernel, biases] print_activations(conv4) # 最后的第五个卷积层 卷积核尺寸3*3 输入通道数256 卷积核数量为256 with tf.name_scope('conv5') as scope: kernel = tf.Variable(tf.truncated_normal([3, 3, 256, 256], dthpe=tf.float32, stddev=1e-1), name='weights') # 卷积步长全部设为1,即扫描全图像素 conv = tf.nn.conv2d(conv4, kernel, [1, 1, 1, 1], padding='SAME') biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32), trainable=True, name='biases') bias = tf.nn.bias_add(conv, biases) conv5 = tf.nn.relu(bias, name=scope) parameters += [kernel, biases] print_activations(conv5) # 在5个卷积层之后,还有一个最大池化层,这个池化层和前两个卷积层后的池化层一致 pool5 = tf.nn.max_pool(conv5, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='VALID', name='pool5') print_activations(pool5) return pool5, parameters # 在正式使用AlexNet来训练或预测时,还需要添加3个全连接层,隐含节点数分别为4096、4096、1000
后续形成实现卷积神经网络构建、训练、测试的代码架构,会将Alexnet实现结构重新组织完整和优化。