深度残差网络+自适应参数化ReLU激活函数(调参记录15)
在调参记录14里,只有2个残差模块,结果遭遇欠拟合了。这次增加一个残差模块试试。
自适应参数化ReLU激活函数的基本原理如下:
Keras程序如下:
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Tue Apr 14 04:17:45 2020 Implemented using TensorFlow 1.10.0 and Keras 2.2.1 Minghang Zhao, Shisheng Zhong, Xuyun Fu, Baoping Tang, Shaojiang Dong, Michael Pecht, Deep Residual Networks with Adaptively Parametric Rectifier Linear Units for Fault Diagnosis, IEEE Transactions on Industrial Electronics, 2020, DOI: 10.1109/TIE.2020.2972458 @author: Minghang Zhao """ from __future__ import print_function import keras import numpy as np from keras.datasets import cifar10 from keras.layers import Dense, Conv2D, BatchNormalization, Activation, Minimum from keras.layers import AveragePooling2D, Input, GlobalAveragePooling2D, Concatenate, Reshape from keras.regularizers import l2 from keras import backend as K from keras.models import Model from keras import optimizers from keras.preprocessing.image import ImageDataGenerator from keras.callbacks import LearningRateScheduler K.set_learning_phase(1) # The data, split between train and test sets (x_train, y_train), (x_test, y_test) = cifar10.load_data() # Noised data x_train = x_train.astype('float32') / 255. x_test = x_test.astype('float32') / 255. x_test = x_test-np.mean(x_train) x_train = x_train-np.mean(x_train) print('x_train shape:', x_train.shape) print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples') # convert class vectors to binary class matrices y_train = keras.utils.to_categorical(y_train, 10) y_test = keras.utils.to_categorical(y_test, 10) # Schedule the learning rate, multiply 0.1 every 1500 epoches def scheduler(epoch): if epoch % 1500 == 0 and epoch != 0: lr = K.get_value(model.optimizer.lr) K.set_value(model.optimizer.lr, lr * 0.1) print("lr changed to {}".format(lr * 0.1)) return K.get_value(model.optimizer.lr) # An adaptively parametric rectifier linear unit (APReLU) def aprelu(inputs): # get the number of channels channels = inputs.get_shape().as_list()[-1] # get a zero feature map zeros_input = keras.layers.subtract([inputs, inputs]) # get a feature map with only positive features pos_input = Activation('relu')(inputs) # get a feature map with only negative features neg_input = Minimum()([inputs,zeros_input]) # define a network to obtain the scaling coefficients scales_p = GlobalAveragePooling2D()(pos_input) scales_n = GlobalAveragePooling2D()(neg_input) scales = Concatenate()([scales_n, scales_p]) scales = Dense(channels//8, activation='linear', kernel_initializer='he_normal', kernel_regularizer=l2(1e-4))(scales) scales = BatchNormalization(momentum=0.9, gamma_regularizer=l2(1e-4))(scales) scales = Activation('relu')(scales) scales = Dense(channels, activation='linear', kernel_initializer='he_normal', kernel_regularizer=l2(1e-4))(scales) scales = BatchNormalization(momentum=0.9, gamma_regularizer=l2(1e-4))(scales) scales = Activation('sigmoid')(scales) scales = Reshape((1,1,channels))(scales) # apply a paramtetric relu neg_part = keras.layers.multiply([scales, neg_input]) return keras.layers.add([pos_input, neg_part]) # Residual Block def residual_block(incoming, nb_blocks, out_channels, downsample=False, downsample_strides=2): residual = incoming in_channels = incoming.get_shape().as_list()[-1] for i in range(nb_blocks): identity = residual if not downsample: downsample_strides = 1 residual = BatchNormalization(momentum=0.9, gamma_regularizer=l2(1e-4))(residual) residual = aprelu(residual) residual = Conv2D(out_channels, 3, strides=(downsample_strides, downsample_strides), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(1e-4))(residual) residual = BatchNormalization(momentum=0.9, gamma_regularizer=l2(1e-4))(residual) residual = aprelu(residual) residual = Conv2D(out_channels, 3, padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(1e-4))(residual) # Downsampling if downsample_strides > 1: identity = AveragePooling2D(pool_size=(1,1), strides=(2,2))(identity) # Zero_padding to match channels if in_channels != out_channels: zeros_identity = keras.layers.subtract([identity, identity]) identity = keras.layers.concatenate([identity, zeros_identity]) in_channels = out_channels residual = keras.layers.add([residual, identity]) return residual # define and train a model inputs = Input(shape=(32, 32, 3)) net = Conv2D(16, 3, padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(1e-4))(inputs) net = residual_block(net, 1, 16, downsample=False) net = residual_block(net, 1, 32, downsample=True) # net = residual_block(net, 2, 32, downsample=False) net = residual_block(net, 1, 64, downsample=True) # net = residual_block(net, 2, 64, downsample=False) net = BatchNormalization(momentum=0.9, gamma_regularizer=l2(1e-4))(net) net = aprelu(net) net = GlobalAveragePooling2D()(net) outputs = Dense(10, activation='softmax', kernel_initializer='he_normal', kernel_regularizer=l2(1e-4))(net) model = Model(inputs=inputs, outputs=outputs) sgd = optimizers.SGD(lr=0.1, decay=0., momentum=0.9, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) # data augmentation datagen = ImageDataGenerator( # randomly rotate images in the range (deg 0 to 180) rotation_range=30, # Range for random zoom zoom_range = 0.2, # shear angle in counter-clockwise direction in degrees shear_range = 30, # randomly flip images horizontal_flip=True, # randomly shift images horizontally width_shift_range=0.125, # randomly shift images vertically height_shift_range=0.125) reduce_lr = LearningRateScheduler(scheduler) # fit the model on the batches generated by datagen.flow(). model.fit_generator(datagen.flow(x_train, y_train, batch_size=1000), validation_data=(x_test, y_test), epochs=5000, verbose=1, callbacks=[reduce_lr], workers=4) # get results K.set_learning_phase(0) DRSN_train_score = model.evaluate(x_train, y_train, batch_size=1000, verbose=0) print('Train loss:', DRSN_train_score[0]) print('Train accuracy:', DRSN_train_score[1]) DRSN_test_score = model.evaluate(x_test, y_test, batch_size=1000, verbose=0) print('Test loss:', DRSN_test_score[0]) print('Test accuracy:', DRSN_test_score[1])
实验结果如下:
Epoch 2575/5000 10s 197ms/step - loss: 0.3505 - acc: 0.9039 - val_loss: 0.4548 - val_acc: 0.8745 Epoch 2576/5000 10s 198ms/step - loss: 0.3571 - acc: 0.9003 - val_loss: 0.4483 - val_acc: 0.8732 Epoch 2577/5000 10s 194ms/step - loss: 0.3536 - acc: 0.9033 - val_loss: 0.4547 - val_acc: 0.8725 Epoch 2578/5000 10s 196ms/step - loss: 0.3514 - acc: 0.9033 - val_loss: 0.4429 - val_acc: 0.8766
程序还没跑完,似乎也没必要跑完了。
训练集上还没拟合得很好,测试集准确率已经低于训练集准确率大约2.5%了。这是同时存在欠拟合和过拟合呀!
Minghang Zhao, Shisheng Zhong, Xuyun Fu, Baoping Tang, Shaojiang Dong, Michael Pecht, Deep Residual Networks with Adaptively Parametric Rectifier Linear Units for Fault Diagnosis, IEEE Transactions on Industrial Electronics, 2020, DOI: 10.1109/TIE.2020.2972458