第五讲 卷积神经网络 AlexNet8 cifar10
import tensorflow as tf import os import numpy as np from matplotlib import pyplot as plt from tensorflow.keras.layers import Conv2D, BatchNormalization, Activation, MaxPool2D, Dropout, Flatten, Dense from tensorflow.keras import Model np.set_printoptions(threshold=np.inf) cifar10 = tf.keras.datasets.cifar10 (x_train, y_train), (x_test, y_test) = cifar10.load_data() x_train, x_test = x_train/25.0, x_test/255.0 class AlexNet8(Model): def __init__(self): super(AlexNet8, self).__init__() self.c1 = Conv2D(filters=96, kernel_size=(3, 3)) self.b1 = BatchNormalization() self.a1 = Activation('relu') self.p1 = MaxPool2D(pool_size=(3, 3), strides=2) self.c2 = Conv2D(filters=256, kernel_size=(3, 3)) self.b2 = BatchNormalization() self.a2 = Activation('relu') self.p2 = MaxPool2D(pool_size=(3, 3), strides=2) self.c3 = Conv2D(filters=384, kernel_size=(3, 3), padding='same', activation='relu') self.c4 = Conv2D(filters=384, kernel_size=(3, 3), padding='same', activation='relu') self.c5 = Conv2D(filters=256, kernel_size=(3, 3), padding='same', activation='relu') self.p3 = MaxPool2D(pool_size=(3, 3), strides=2) self.flatten = Flatten() self.f1 = Dense(2048, activation='relu') self.d1 = Dropout(0.5) self.f2 = Dense(2048, activation='relu') self.d2 = Dropout(0.5) self.f3 = Dense(10, activation='softmax') def call(self, x): x = self.c1(x) x = self.b1(x) x = self.a1(x) x = self.p1(x) x = self.c2(x) x = self.b2(x) x = self.a2(x) x = self.p2(x) x = self.c3(x) x = self.c4(x) x = self.c5(x) x = self.p3(x) x = self.flatten(x) x = self.f1(x) x = self.d1(x) x = self.f2(x) x = self.d2(x) y = self.f3(x) return y model = AlexNet8() model.compile(optimizer='adam', loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False), metrics = ['sparse_categorical_accuracy']) checkpoint_save_path = "./checkpoint/Baseline.ckpt" if os.path.exists(checkpoint_save_path + ".index"): print("--------------------load the model-----------------") model.load_weights(checkpoint_save_path) cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_save_path, save_weights_only=True, save_best_only=True) history = model.fit(x_train, y_train, batch_size=32, epochs=100, validation_data=(x_test, y_test), validation_freq=1, callbacks=[cp_callback]) model.summary() with open('./weights.txt', 'w') as file: for v in model.trainable_variables: file.write(str(v.name) + '\n') file.write(str(v.shape) + '\n') file.write(str(v.numpy()) + '\n') def plot_acc_loss_curve(history): # 显示训练集和验证集的acc和loss曲线 from matplotlib import pyplot as plt acc = history.history['sparse_categorical_accuracy'] val_acc = history.history['val_sparse_categorical_accuracy'] loss = history.history['loss'] val_loss = history.history['val_loss'] plt.figure(figsize=(15, 5)) plt.subplot(1, 2, 1) plt.plot(acc, label='Training Accuracy') plt.plot(val_acc, label='Validation Accuracy') plt.title('Training and Validation Accuracy') plt.legend() #plt.grid() plt.subplot(1, 2, 2) plt.plot(loss, label='Training Loss') plt.plot(val_loss, label='Validation Loss') plt.title('Training and Validation Loss') plt.legend() #plt.grid() plt.show() plot_acc_loss_curve(history)
#以下代码来自dive into DL TF2.0, 训练耗时较长, 建议在colab上运行。
1 import tensorflow as tf 2 print(tf.__version__) 3 4 5 for gpu in tf.config.experimental.list_physical_devices('GPU'): 6 tf.config.experimental.set_memory_growth(gpu, True) 7 8 9 net = tf.keras.models.Sequential([ 10 tf.keras.layers.Conv2D(filters=96, kernel_size=11, strides=4, activation='relu'), 11 tf.keras.layers.MaxPool2D(pool_size=3, strides=2), 12 tf.keras.layers.Conv2D(filters=256, kernel_size=5, padding='same', activation='relu'), 13 tf.keras.layers.MaxPool2D(pool_size=3, strides=2), 14 tf.keras.layers.Conv2D(filters=384, kernel_size=3, padding='same', activation='relu'), 15 tf.keras.layers.Conv2D(filters=384, kernel_size=3, padding='same', activation='relu'), 16 tf.keras.layers.Conv2D(filters=256, kernel_size=3, padding='same', activation='relu'), 17 tf.keras.layers.MaxPool2D(pool_size=3, strides=2), 18 tf.keras.layers.Flatten(), 19 tf.keras.layers.Dense(4096, activation='relu'), 20 tf.keras.layers.Dropout(0.5), 21 tf.keras.layers.Dense(4096, activation='relu'), 22 tf.keras.layers.Dropout(0.5), 23 tf.keras.layers.Dense(10, activation='softmax') 24 ]) 25 26 27 X = tf.random.uniform((1, 224, 224, 1)) 28 for layer in net.layers: 29 X = layer(X) 30 print(layer.name, 'output shape\t', X.shape) 31 32 33 import numpy as np 34 35 class DataLoader(): 36 def __init__(self): 37 fashion_mnist = tf.keras.datasets.fashion_mnist 38 (self.train_images, self.train_labels), (self.test_images, self.test_labels) = fashion_mnist.load_data() 39 self.train_images = np.expand_dims(self.train_images.astype(np.float32)/255.0, axis=-1) 40 self.test_images = np.expand_dims(self.test_images.astype(np.float32)/255.0, axis=-1) 41 self.train_labels = self.train_labels.astype(np.int32) 42 self.test_labels = self.test_labels.astype(np.int32) 43 self.num_train, self.num_test = self.train_images.shape[0], self.test_images.shape[0] 44 45 def get_batch_train(self, batch_size): 46 index = np.random.randint(0, np.shape(self.train_images)[0], batch_size) 47 #need to resize images to (224, 224) 48 resized_images = tf.image.resize_with_pad(self.train_images[index], 224, 224,) 49 return resized_images.numpy(), self.train_labels[index] 50 51 def get_batch_test(self, batch_size): 52 index = np.random.randint(0, np.shape(self.test_images)[0], batch_size) 53 #need to resize images to (224, 224) 54 resized_images = tf.image.resize_with_pad(self.test_images[index], 224, 224,) 55 return resized_images.numpy(), self.test_labels[index] 56 57 batch_size = 128 58 dataLoader = DataLoader() 59 x_batch, y_batch = dataLoader.get_batch_train(batch_size) 60 print('x_batch shape:', x_batch.shape, 'y_batch shape:', y_batch.shape) 61 62 63 64 def train_alexnet(): 65 epoch = 5 66 num_iter = dataLoader.num_train // batch_size 67 for e in range(epoch): 68 for n in range(num_iter): 69 x_batch, y_batch = dataLoader.get_batch_train(batch_size) 70 net.fit(x_batch, y_batch) 71 if n%100 == 0: 72 net.save_weights('5.6_alexnet_weights.h5') 73 74 optimizer = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.0, nesterov=False) 75 76 net.compile(optimizer=optimizer, 77 loss = 'sparse_categorical_crossentropy', 78 metrics = ['accuracy']) 79 80 x_batch, y_batch = dataLoader.get_batch_train(batch_size) 81 net.fit(x_batch, y_batch) 82 train_alexnet() 83 84 85 net.load_weights('5.6_alexnet_weights.h5') 86 87 x_test, y_test = dataLoader.get_batch_test(2000) 88 net.evaluate(x_test, y_test, verbose=2)