tensorflow 2.0 学习 (十三)卷积神经网络 (三) CIFAR10数据集与修改的ResNet18网络 + CoLab
ResNet网络结构如下:
采用模型和数据分离的代码方式,模型如下:
1 # encoding: utf-8 2 import tensorflow as tf 3 from tensorflow.keras import optimizers, datasets, Model, layers, Sequential, losses 4 from tensorflow.keras.layers import Conv2D, Dense, add, BatchNormalization, GlobalAveragePooling2D 5 import matplotlib.pyplot as plt 6 7 # load data --------- 8 (x, y), (x_test, y_test) = datasets.cifar10.load_data() 9 y = tf.squeeze(y, axis=1) 10 y_test = tf.squeeze(y_test, axis=1) 11 # print(x.shape, y.shape, x_test.shape, y_test.shape) 12 # (50000, 32, 32, 3) (50000,) (10000, 32, 32, 3) (10000,) 13 14 15 def pre_process(x, y): 16 x_reshape = tf.cast(x, dtype=tf.float32) / 255. 17 y_reshape = tf.cast(y, dtype=tf.int32) # 转化为整型32 18 y_onehot = tf.one_hot(y_reshape, depth=10) # 训练数据所需的one-hot编码 19 return x_reshape, y_onehot 20 21 22 train_db = tf.data.Dataset.from_tensor_slices((x, y)) 23 train_db = train_db.shuffle(1000).map(pre_process).batch(128) 24 test_db = tf.data.Dataset.from_tensor_slices((x_test, y_test)) 25 test_db = test_db.shuffle(1000).map(pre_process).batch(128) 26 27 # sample = next(iter(train_db)) 28 # print('sample:', sample[0].shape, sample[1].shape, 29 # tf.reduce_min(sample[0]), tf.reduce_max(sample[0])) 30 # sample: (128, 32, 32, 3) (128, 10) 31 # tf.Tensor(0.0, shape=(), dtype=float32) tf.Tensor(1.0, shape=(), dtype=float32) 32 # ---------------------- 33 34 35 # Net ------------------------ 36 class ResNet(Model): 37 def __init__(self): 38 super(ResNet, self).__init__() 39 self.conv1 = Sequential([ 40 Conv2D(64, kernel_size=3, strides=1, padding='same', activation='relu') 41 ]) 42 self.conv2 = Sequential([ 43 Conv2D(64, kernel_size=3, strides=1, padding='same', activation='relu'), 44 BatchNormalization(), 45 Conv2D(64, kernel_size=3, strides=1, padding='same', activation='relu'), 46 BatchNormalization() 47 ]) 48 self.conv3 = Sequential([ 49 Conv2D(64, kernel_size=3, strides=1, padding='same', activation='relu'), 50 BatchNormalization(), 51 Conv2D(64, kernel_size=3, strides=1, padding='same', activation='relu'), 52 BatchNormalization() 53 ]) 54 self.conv4 = Sequential([ 55 Conv2D(128, kernel_size=3, strides=2, padding='same', activation='relu'), 56 BatchNormalization(), 57 Conv2D(128, kernel_size=3, strides=1, padding='same', activation='relu'), 58 BatchNormalization() 59 ]) 60 self.conv5 = Sequential([ 61 Conv2D(128, kernel_size=3, strides=1, padding='same', activation='relu'), 62 BatchNormalization(), 63 Conv2D(128, kernel_size=3, strides=1, padding='same', activation='relu'), 64 BatchNormalization() 65 ]) 66 self.conv6 = Sequential([ 67 Conv2D(256, kernel_size=3, strides=2, padding='same', activation='relu'), 68 BatchNormalization(), 69 Conv2D(256, kernel_size=3, strides=1, padding='same', activation='relu'), 70 BatchNormalization(), 71 ]) 72 self.conv7 = Sequential([ 73 Conv2D(256, kernel_size=3, strides=1, padding='same', activation='relu'), 74 BatchNormalization(), 75 Conv2D(256, kernel_size=3, strides=1, padding='same', activation='relu'), 76 BatchNormalization() 77 ]) 78 self.conv8 = Sequential([ 79 Conv2D(512, kernel_size=3, strides=2, padding='same', activation='relu'), 80 BatchNormalization(), 81 Conv2D(512, kernel_size=3, strides=1, padding='same', activation='relu'), 82 BatchNormalization() 83 ]) 84 self.conv9 = Sequential([ 85 Conv2D(512, kernel_size=3, strides=1, padding='same', activation='relu'), 86 BatchNormalization(), 87 Conv2D(512, kernel_size=3, strides=1, padding='same', activation='relu'), 88 BatchNormalization() 89 ]) 90 91 self.avgPool = GlobalAveragePooling2D() 92 93 self.fc10 = Dense(10) 94 95 self.conv_128 = Conv2D(128, kernel_size=1, strides=2, padding='same', activation='relu') 96 self.conv_256 = Conv2D(256, kernel_size=1, strides=2, padding='same', activation='relu') 97 self.conv_512 = Conv2D(512, kernel_size=1, strides=2, padding='same', activation='relu') 98 99 def call(self, inputs): 100 layer1 = self.conv1(inputs) 101 layer2 = self.conv2(layer1) 102 layer_one = add([layer1, layer2]) 103 104 layer3 = self.conv3(layer_one) 105 layer_two = add([layer_one, layer3]) 106 107 layer4 = self.conv4(layer_two) 108 layer4_1 = self.conv_128(layer_two) 109 layer_thi = add([layer4, layer4_1]) 110 111 layer5 = self.conv5(layer_thi) 112 layer6 = self.conv6(layer5) 113 layer6_1 = self.conv_256(layer5) 114 layer_fou = add([layer6, layer6_1]) 115 116 layer7 = self.conv7(layer_fou) 117 layer8 = self.conv8(layer7) 118 layer8_1 = self.conv_512(layer7) 119 layer_fiv = add([layer8, layer8_1]) 120 121 layer9 = self.conv9(layer_fiv) 122 layer9_1 = self.avgPool(layer9) 123 layer10 = self.fc10(layer9_1) 124 125 return layer10 126 # -------------------------- 127 128 129 def main(): 130 model = ResNet() 131 model.build(input_shape=(None, 32, 32, 3)) 132 model.summary() 133 134 optimizer = tf.keras.optimizers.RMSprop(0.001) # 创建优化器,指定学习率 135 criteon = losses.CategoricalCrossentropy(from_logits=True) 136 Epoch = 50 137 # 保存训练和测试过程中的误差情况 138 train_tot_loss = [] 139 test_tot_loss = [] 140 141 for epoch in range(Epoch): 142 cor, tot = 0, 0 143 for step, (x, y) in enumerate(train_db): # (128, 32, 32, 3), (128, 10) 144 with tf.GradientTape() as tape: # 构建梯度环境 145 # train 146 out = model(x) # (128, 10) 147 148 # calculate loss 149 y = tf.cast(y, dtype=tf.float32) 150 loss = criteon(y, out) 151 152 variables = model.trainable_variables 153 grads = tape.gradient(loss, variables) 154 optimizer.apply_gradients(zip(grads, variables)) 155 156 # train var 157 train_out = tf.nn.softmax(out, axis=1) 158 train_out = tf.argmax(train_out, axis=1) 159 train_out = tf.cast(train_out, dtype=tf.int64) 160 161 train_y = tf.nn.softmax(y, axis=1) 162 train_y = tf.argmax(train_y, axis=1) 163 164 # calculate train var loss 165 train_cor = tf.equal(train_y, train_out) 166 train_cor = tf.cast(train_cor, dtype=tf.float32) 167 train_cor = tf.reduce_sum(train_cor) 168 cor += train_cor 169 tot += x.shape[0] 170 171 print('After %d Epoch' % epoch) 172 print('training acc is ', cor / tot) 173 train_tot_loss.append(cor / tot) 174 175 correct, total = 0, 0 176 for x, y in test_db: 177 # test 178 pred = model(x) 179 180 # test var 181 test_out = tf.nn.softmax(pred, axis=1) 182 test_out = tf.argmax(test_out, axis=1) 183 test_out = tf.cast(test_out, dtype=tf.int64) 184 185 test_y = tf.nn.softmax(y, axis=1) 186 test_y = tf.argmax(test_y, axis=1) 187 188 test_cor = tf.equal(test_y, test_out) 189 test_cor = tf.cast(test_cor, dtype=tf.float32) 190 test_cor = tf.reduce_sum(test_cor) 191 correct += test_cor 192 total += x.shape[0] 193 194 print('testing acc is : ', correct / total) 195 test_tot_loss.append(correct / total) 196 197 plt.figure() 198 plt.plot(train_tot_loss, 'b', label='train') 199 plt.plot(test_tot_loss, 'r', label='test') 200 plt.xlabel('Epoch') 201 plt.ylabel('ACC') 202 plt.legend() 203 # plt.savefig('exam8.3_train_test_CNN1.png') 204 plt.show() 205 206 207 if __name__ == "__main__": 208 main()
程序调试成功,没有训练,测试数据,
数据量太大,目前的机器不行,待有合适的时机再做预测。
下次更新:RNN网络实战IMDB数据集
2020.5.17 重新更新代码 用CoLab跑代码
训练效果:
预测效果在75%左右,但有小幅度的波动。