用tensorflow迁移学习猫狗分类
笔者这几天在跟着莫烦学习TensorFlow,正好到迁移学习(至于什么是迁移学习,看这篇),莫烦老师做的是预测猫和老虎尺寸大小的学习。作为一个有为的学生,笔者当然不能再预测猫啊狗啊的大小啦,正好之前正好有做过猫狗大战数据集的图像分类,做好的数据都还在,二话不说,开撸。
既然是VGG16模型,当然首先上模型代码了:
1 def conv_layers_simple_api(net_in): 2 with tf.name_scope('preprocess'): 3 # Notice that we include a preprocessing layer that takes the RGB image 4 # with pixels values in the range of 0-255 and subtracts the mean image 5 # values (calculated over the entire ImageNet training set). 6 mean = tf.constant([123.68, 116.779, 103.939], dtype=tf.float32, shape=[1, 1, 1, 3], name='img_mean') 7 net_in.outputs = net_in.outputs - mean 8 9 # conv1 10 network = Conv2d(net_in, n_filter=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 11 name='conv1_1') 12 network = Conv2d(network, n_filter=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 13 name='conv1_2') 14 network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool1') 15 16 # conv2 17 network = Conv2d(network, n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 18 name='conv2_1') 19 network = Conv2d(network, n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 20 name='conv2_2') 21 network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool2') 22 23 # conv3 24 network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 25 name='conv3_1') 26 network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 27 name='conv3_2') 28 network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 29 name='conv3_3') 30 network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool3') 31 32 # conv4 33 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 34 name='conv4_1') 35 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 36 name='conv4_2') 37 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 38 name='conv4_3') 39 network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool4') 40 41 # conv5 42 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 43 name='conv5_1') 44 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 45 name='conv5_2') 46 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 47 name='conv5_3') 48 network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool5') 49 return network`` 50 def conv_layers_simple_api(net_in): 51 with tf.name_scope('preprocess'): 52 # Notice that we include a preprocessing layer that takes the RGB image 53 # with pixels values in the range of 0-255 and subtracts the mean image 54 # values (calculated over the entire ImageNet training set). 55 mean = tf.constant([123.68, 116.779, 103.939], dtype=tf.float32, shape=[1, 1, 1, 3], name='img_mean') 56 net_in.outputs = net_in.outputs - mean 57 58 # conv1 59 network = Conv2d(net_in, n_filter=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 60 name='conv1_1') 61 network = Conv2d(network, n_filter=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 62 name='conv1_2') 63 network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool1') 64 65 # conv2 66 network = Conv2d(network, n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 67 name='conv2_1') 68 network = Conv2d(network, n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 69 name='conv2_2') 70 network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool2') 71 72 # conv3 73 network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 74 name='conv3_1') 75 network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 76 name='conv3_2') 77 network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 78 name='conv3_3') 79 network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool3') 80 81 # conv4 82 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 83 name='conv4_1') 84 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 85 name='conv4_2') 86 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 87 name='conv4_3') 88 network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool4') 89 90 # conv5 91 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 92 name='conv5_1') 93 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 94 name='conv5_2') 95 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 96 name='conv5_3') 97 network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool5') 98 return network`` 99 def conv_layers_simple_api(net_in): 100 with tf.name_scope('preprocess'): 101 # Notice that we include a preprocessing layer that takes the RGB image 102 # with pixels values in the range of 0-255 and subtracts the mean image 103 # values (calculated over the entire ImageNet training set). 104 mean = tf.constant([123.68, 116.779, 103.939], dtype=tf.float32, shape=[1, 1, 1, 3], name='img_mean') 105 net_in.outputs = net_in.outputs - mean 106 107 # conv1 108 network = Conv2d(net_in, n_filter=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 109 name='conv1_1') 110 network = Conv2d(network, n_filter=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 111 name='conv1_2') 112 network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool1') 113 114 # conv2 115 network = Conv2d(network, n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 116 name='conv2_1') 117 network = Conv2d(network, n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 118 name='conv2_2') 119 network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool2') 120 121 # conv3 122 network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 123 name='conv3_1') 124 network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 125 name='conv3_2') 126 network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 127 name='conv3_3') 128 network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool3') 129 130 # conv4 131 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 132 name='conv4_1') 133 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 134 name='conv4_2') 135 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 136 name='conv4_3') 137 network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool4') 138 139 # conv5 140 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 141 name='conv5_1') 142 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 143 name='conv5_2') 144 network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', 145 name='conv5_3') 146 network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool5') 147 return network
笔者偷懒直接用的是TensorLayer库中的Vgg16模型,至于什么是tensorlayer请移步这里
按照莫烦老师的教程,改写最后的全连接层做二分类学习:
def fc_layers(net): # 全连接层前的预处理 network = FlattenLayer(net, name='flatten') # tf.layers.dense(self.flatten, 256, tf.nn.relu, name='fc6') network = DenseLayer(network, n_units=256, act=tf.nn.relu, name='fc1_relu') # network = DenseLayer(network, n_units=4096, act=tf.nn.relu, name='fc2_relu') # self.out = tf.layers.dense(self.fc6, 1, name='out') network = DenseLayer(network, n_units=2, act=tf.identity, name='fc3_relu') return network
定义输入输出以及损失函数已及学习步骤:
1 # 输入 2 x = tf.placeholder(tf.float32, [None, 224, 224, 3]) 3 # 输出 4 y_ = tf.placeholder(tf.int32, shape=[None, ], name='y_') 5 net_in = InputLayer(x, name='input') 6 # net_cnn = conv_layers(net_in) # professional CNN APIs 7 net_cnn = conv_layers_simple_api(net_in) # simplified CNN APIs 8 network = fc_layers(net_cnn) 9 y = network.outputs 10 # probs = tf.nn.softmax(y) 11 y_op = tf.argmax(tf.nn.softmax(y), 1) 12 cost = tl.cost.cross_entropy(y, y_, name='cost') 13 correct_prediction = tf.equal(tf.cast(tf.argmax(y, 1), tf.float32), tf.cast(y_, tf.float32)) 14 acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 15 # 定义 optimizer 16 train_params = network.all_params[26:] 17 # print(train_params) 18 global_step = tf.Variable(0) 19 # --------------学习速率的设置(学习速率呈指数下降)--------------------- #将 global_step/decay_steps 强制转换为整数 20 # learning_rate = tf.train.exponential_decay(1e-2, global_step, decay_steps=1000, decay_rate=0.98, staircase=True) 21 train_op = tf.train.AdamOptimizer(learning_rate=0.0001, beta1=0.9, beta2=0.999, 22 epsilon=1e-08, use_locking=False).minimize(cost, var_list=train_params)
读取数据读取训练、验证数据,加载模型参数:
1 img, label = read_and_decode("F:\\001-python\\train.tfrecords") 2 img_v, label_v = read_and_decode("F:\\001-python\\val.tfrecords") 3 # 使用shuffle_batch可以随机打乱输入 4 X_train, y_train = tf.train.shuffle_batch([img, label], 5 batch_size=30, capacity=400, 6 min_after_dequeue=300) 7 X_Val, y_val = tf.train.shuffle_batch([img_v, label_v], 8 batch_size=30, capacity=400, 9 min_after_dequeue=300) 10 tl.layers.initialize_global_variables(sess) 11 network.print_params() 12 network.print_layers() 13 npz = np.load('vgg16_weights.npz') 14 params = [] 15 for val in sorted(npz.items())[0:25]: 16 # print(" Loading %s" % str(val[1].shape)) 17 params.append(val[1]) 18 加载预训练的参数 19 tl.files.assign_params(sess, params, network)
加载好之后,开始训练,200个epoch:
1 for epoch in range(n_epoch): 2 start_time = time.time() 3 val, l = sess.run([X_train, y_train]) 4 for X_train_a, y_train_a in tl.iterate.minibatches(val, l, batch_size, shuffle=True): 5 sess.run(train_op, feed_dict={x: X_train_a, y_: y_train_a}) 6 if epoch + 1 == 1 or (epoch + 1) % 5 == 0: 7 print("Epoch %d of %d took %fs" % (epoch + 1, n_epoch, time.time() - start_time)) 8 train_loss, train_acc, n_batch = 0, 0, 0 9 for X_train_a, y_train_a in tl.iterate.minibatches(val, l, batch_size, shuffle=True): 10 err, ac = sess.run([cost, acc], feed_dict={x: X_train_a, y_: y_train_a}) 11 train_loss += err 12 train_acc += ac 13 n_batch += 1 14 print(" train loss: %f" % (train_loss / n_batch)) 15 print(" train acc: %f" % (train_acc / n_batch))
保存训练的参数:
1 tl.files.save_npz(network.all_params, name='model.npz', sess=sess)
下面就是开始训练啦,笔者很高兴的拿着自己的笔记本显卡呼呼的跑了一遍:
~~~~~~~~~~~~~~~~~~~~~~~~下面是漫长的等待
....... [TL] Epoch 138 of 150 took 0.999402s [TL] val loss: 0.687194 [TL] val acc: 0.562500 [TL] Epoch 140 of 150 took 3.782207s [TL] val loss: 0.619966 [TL] val acc: 0.750000 [TL] Epoch 142 of 150 took 0.983802s [TL] val loss: 0.685686 [TL] val acc: 0.562500 [TL] Epoch 144 of 150 took 0.986604s [TL] val loss: 0.661224 [TL] val acc: 0.687500 [TL] Epoch 146 of 150 took 1.022403s [TL] val loss: 0.675885 [TL] val acc: 0.687500 [TL] Epoch 148 of 150 took 0.991802s [TL] val loss: 0.682124 [TL] val acc: 0.625000 [TL] Epoch 150 of 150 took 3.487811s [TL] val loss: 0.674932 [TL] val acc: 0.687500 [TL] Total training time: 319.859640s [TL] [*] model.npz saved
额~~~~~~~~~~~~~~~~~
0.68的正确率,群里一位朋友看了之后说:跟猜差不多了(一脸黑线)。问题出哪儿呢?难道是笔者训练的次数不够多?莫烦老师可是100次就能出很好的结果啊
不管怎么样,要试试,笔者于是加载刚刚保存的model.npz参数继续跑100个epoch
~~~~~~~~~~~~~~~~~~~~~~~~又是漫长的等待
[TL] Epoch 1 of 100 took 8.477617s [TL] val loss: 0.685957 [TL] val acc: 0.562500 [TL] Epoch 2 of 100 took 0.999402s [TL] val loss: 0.661529 [TL] val acc: 0.625000 ...... [TL] Epoch 94 of 100 took 0.992208s [TL] val loss: 0.708815 [TL] val acc: 0.562500 [TL] Epoch 96 of 100 took 0.998406s [TL] val loss: 0.710636 [TL] val acc: 0.562500 [TL] Epoch 98 of 100 took 0.992807s [TL] val loss: 0.621505 [TL] val acc: 0.687500 [TL] Epoch 100 of 100 took 0.986405s [TL] val loss: 0.670647 [TL] val acc: 0.625000 [TL] Total training time: 156.734633s [TL] [*] model.npz saved
坑爹啊这是,还不如之前的结果。
笔者陷入深深的沉思中,难道是改了全连接层导致的?于是笔者又把之前去掉的全连接层加上:
1 def fc_layers(net): 2 # 全连接层前的预处理 3 network = FlattenLayer(net, name='flatten') 4 # tf.layers.dense(self.flatten, 256, tf.nn.relu, name='fc6') 5 network = DenseLayer(network, n_units=256, act=tf.nn.relu, name='fc1_relu') 6 network = DenseLayer(network, n_units=256, act=tf.nn.relu, name='fc2_relu') 7 # self.out = tf.layers.dense(self.fc6, 1, name='out') 8 network = DenseLayer(network, n_units=2, act=tf.identity, name='fc3_relu') 9 return network
接着训练
~~~~~~~~~~~~~~~~~~~~~~~~下面又是漫长的等待
1 [TL] Epoch 1 of 100 took 8.477229s 2 [TL] val loss: 2.370650 3 [TL] val acc: 0.562500 4 ... 5 [TL] Epoch 100 of 100 took 1.016002s 6 [TL] val loss: 0.762171 7 [TL] val acc: 0.437500 8 [TL] Total training time: 156.836465s 9 [TL] [*] model.npz saved
还是一样,笔者已崩溃了,一定是哪儿不对啊啊啊....于是笔者去翻莫烦老师的代码,一点点对下来,每一层参数肯定不会有错,那就是在训练设置的参数有问题。
1 self.train_op = tf.train.RMSPropOptimizer(0.001).minimize(self.loss) #莫烦的代码 2 train_op = tf.train.AdamOptimizer(learning_rate=0.0001, beta1=0.9, beta2=0.999, 3 epsilon=1e-08, use_locking=False).minimize(cost, var_list=train_params)#笔者的
看到train_params难道是这个train_params?笔者只优化了最后的全连接层参数而莫烦老师优化的是全部参数
已经深夜了,笔者表示即使不睡觉也要跑一遍试试,于是改成
1 # 定义 optimizer 2 train_params = network.all_params 3 ~~~~~~~~~~~~~~~~~~~~~~~~于是又是是漫长的等待 4 5 [TL] Epoch 1 of 100 took 20.286640s 6 [TL] val loss: 11.938850 7 [TL] val acc: 0.312500 8 [TL] Epoch 2 of 100 took 3.091806s 9 [TL] val loss: 2.890055 10 [TL] val acc: 0.625000 11 [TL] Epoch 4 of 100 took 3.074205s 12 [TL] val loss: 24.055895 13 [TL] val acc: 0.687500 14 [TL] .... 15 [TL] val loss: 0.699907 16 [TL] val acc: 0.500000 17 [TL] Epoch 98 of 100 took 3.089206s 18 [TL] val loss: 0.683627 19 [TL] val acc: 0.562500 20 [TL] Epoch 100 of 100 took 3.091806s 21 [TL] val loss: 0.708496 22 [TL] val acc: 0.562500 23 [TL] Total training time: 375.727307s 24 [TL] [*] model.npz saved
效果变得更差了....
排除参数的问题,已经深夜1点了,明天还要上班,不得不睡啦。
继续崩溃第三天~~~
第四天~~~
第五天,今天供应商过来公司调试机器,正好是一个学图像处理的小伙子,我提到这个说:我为啥训练了这么多代为啥还是像猜一样的概率....?小伙儿说:莫不是过拟合了吧?我说:不可能啊现成的数据现成的模型和参数,不应该的啊!
不过我还是得检查一下数据处理的代码
1 # 生成是数据文件 2 def create_record(filelist): 3 random.shuffle(filelist) 4 i = 0 5 writer = tf.python_io.TFRecordWriter(recordpath) 6 for file in filelist: 7 name = file.split(sep='.') 8 lable_val = 0 9 if name[0] == 'cat': 10 lable_val = 0 11 else: 12 lable_val = 1 13 img_path = file_dir + file 14 img = Image.open(img_path) 15 img = img.resize((240, 240)) 16 img_raw = img.tobytes() # 将图片转化为原生bytes 17 example = tf.train.Example(features=tf.train.Features(feature={ 18 "label": tf.train.Feature(int64_list=tf.train.Int64List(value=[lable_val])), 19 'img_raw': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img_raw])) 20 })) #example对象对label和image进行封装 21 writer.write(example.SerializeToString()) 22 i=i+1 23 print(name[1]) 24 print(lable_val) 25 print(i) 26 writer.close() 27 # 用队列形式读取文件 28 def read_and_decode(filename): 29 # 根据文件名生成一个队列 30 filename_queue = tf.train.string_input_producer([filename]) 31 reader = tf.TFRecordReader() 32 _, serialized_example = reader.read(filename_queue) # 返回文件名和文件 33 features = tf.parse_single_example(serialized_example, 34 features={ 35 'label': tf.FixedLenFeature([], tf.int64), 36 'img_raw': tf.FixedLenFeature([], tf.string), 37 }) 38 img = tf.decode_raw(features['img_raw'], tf.uint8) 39 img = tf.reshape(img, [224, 224, 3]) 40 img = tf.cast(img, tf.float32) * (1. / 255) - 0.5 41 label = tf.cast(features['label'], tf.int32) 42 return img, label
img = tf.cast(img, tf.float32) * (1. / 255) - 0.5 难道是这一步处理多余?注销之后,训练模型
1 Epoch 85 of 200 took 1.234071s 2 train loss: 14.689816 3 train acc: 0.900000 4 [TL] [*] model3.npz saved 5 Epoch 90 of 200 took 1.241071s 6 train loss: 17.104382 7 train acc: 0.800000 8 [TL] [*] model3.npz saved 9 Epoch 95 of 200 took 1.236071s 10 train loss: 11.190630 11 train acc: 0.850000 12 [TL] [*] model3.npz saved 13 Epoch 100 of 200 took 1.238071s 14 train loss: 0.000000 15 train acc: 1.000000 16 [TL] [*] model3.npz saved 17 Epoch 105 of 200 took 1.236071s 18 train loss: 7.622324 19 train acc: 0.900000 20 [TL] [*] model3.npz saved 21 Epoch 110 of 200 took 1.234071s 22 train loss: 2.164670 23 train acc: 0.950000 24 [TL] [*] model3.npz saved 25 Epoch 115 of 200 took 1.237071s 26 train loss: 0.000000 27 train acc: 1.000000 28 [TL] [*] model3.npz saved
准确度1,停停停...不用跑完了,Perfect!
原来如此,必须要真实的像素值.......心好累......,笔者已经不记得哪儿抄来的这一行了。
嗯,VGG16模型的迁移学习到此结束,代码见github