深度学习算法原理实现——模型欠拟合和过拟合处理
欠拟合:
from tensorflow.keras import regularizers import numpy as np from tensorflow import keras from tensorflow.keras import layers from tensorflow.keras.datasets import imdb from tensorflow.keras.datasets import mnist def plot_val_loss_and_acc(model): import matplotlib.pyplot as plt val_loss = model.history["val_loss"] epochs = range(1, 21) plt.plot(epochs, val_loss, "b--", label="Validation loss") plt.xlabel("Epochs") plt.ylabel("Loss") plt.legend() val_acc = model.history["val_accuracy"] epochs = range(1, 21) plt.plot(epochs, val_acc, "b-", label="Validation accuracy") plt.title("validation loss and accuracy") plt.xlabel("Epochs") plt.ylabel("Accuracy/Loss") plt.legend() plt.show() (train_images, train_labels), _ = mnist.load_data() train_images = train_images.reshape((60000, 28 * 28)) train_images = train_images.astype("float32") / 255 model = keras.Sequential([layers.Dense(10, activation="softmax")]) model.compile(optimizer="rmsprop", loss="sparse_categorical_crossentropy", metrics=["accuracy"]) history_small_model = model.fit( train_images, train_labels, epochs=20, batch_size=128, validation_split=0.2) plot_val_loss_and_acc(history_small_model)
python深度学习这本书里提到的欠拟合现象和解决思路:
ok,有了思路,我们改进下:
from tensorflow.keras import regularizers import numpy as np from tensorflow import keras from tensorflow.keras import layers from tensorflow.keras.datasets import imdb from tensorflow.keras.datasets import mnist def plot_val_loss_and_acc(model): import matplotlib.pyplot as plt val_loss = model.history["val_loss"] epochs = range(1, 21) plt.plot(epochs, val_loss, "b--", label="Validation loss") plt.xlabel("Epochs") plt.ylabel("Loss") plt.legend() val_acc = model.history["val_accuracy"] epochs = range(1, 21) plt.plot(epochs, val_acc, "b-", label="Validation accuracy") plt.title("validation loss and accuracy") plt.xlabel("Epochs") plt.ylabel("Accuracy/Loss") plt.legend() plt.show() (train_images, train_labels), _ = mnist.load_data() train_images = train_images.reshape((60000, 28 * 28)) train_images = train_images.astype("float32") / 255 model = keras.Sequential([layers.Dense(10, activation="softmax")]) model.compile(optimizer="rmsprop", loss="sparse_categorical_crossentropy", metrics=["accuracy"]) history_small_model = model.fit( train_images, train_labels, epochs=20, batch_size=128, validation_split=0.2) plot_val_loss_and_acc(history_small_model) model = keras.Sequential([ layers.Dense(128, activation="relu"), layers.Dense(128, activation="relu"), layers.Dense(10, activation="softmax"), ]) model.compile(optimizer="rmsprop", loss="sparse_categorical_crossentropy", metrics=["accuracy"]) history_large_model = model.fit( train_images, train_labels, epochs=20, batch_size=128, validation_split=0.2) plot_val_loss_and_acc(history_large_model)
看到有过拟合的迹象了!
接下来,我们看看L1/L2正则化和 dropout处理过拟合:
from tensorflow.keras import regularizers import numpy as np from tensorflow import keras from tensorflow.keras import layers from tensorflow.keras.datasets import imdb from tensorflow.keras.datasets import mnist def plot_val_loss_and_acc(model): import matplotlib.pyplot as plt val_loss = model.history["val_loss"] epochs = range(1, 21) plt.plot(epochs, val_loss, "b--", label="Validation loss") plt.xlabel("Epochs") plt.ylabel("Loss") plt.legend() val_acc = model.history["val_accuracy"] epochs = range(1, 21) plt.plot(epochs, val_acc, "b-", label="Validation accuracy") plt.title("validation loss and accuracy") plt.xlabel("Epochs") plt.ylabel("Accuracy/Loss") plt.legend() plt.show() # (train_images, train_labels), _ = mnist.load_data() # train_images = train_images.reshape((60000, 28 * 28)) # train_images = train_images.astype("float32") / 255 # model = keras.Sequential([layers.Dense(10, activation="softmax")]) # model.compile(optimizer="rmsprop", # loss="sparse_categorical_crossentropy", # metrics=["accuracy"]) # history_small_model = model.fit( # train_images, train_labels, # epochs=20, # batch_size=128, # validation_split=0.2) # plot_val_loss_and_acc(history_small_model) # model = keras.Sequential([ # layers.Dense(128, activation="relu"), # layers.Dense(128, activation="relu"), # layers.Dense(10, activation="softmax"), # ]) # model.compile(optimizer="rmsprop", # loss="sparse_categorical_crossentropy", # metrics=["accuracy"]) # history_large_model = model.fit( # train_images, train_labels, # epochs=20, # batch_size=128, # validation_split=0.2) # plot_val_loss_and_acc(history_large_model) # L1/L2 and dropout ############################################################################ (train_data, train_labels), _ = imdb.load_data(num_words=10000) def vectorize_sequences(sequences, dimension=10000): results = np.zeros((len(sequences), dimension)) for i, sequence in enumerate(sequences): results[i, sequence] = 1. return results train_data = vectorize_sequences(train_data) # small model !!! model = keras.Sequential([ layers.Dense(4, activation="relu"), layers.Dense(1, activation="sigmoid") ]) model.compile(optimizer="rmsprop", loss="binary_crossentropy", metrics=["accuracy"]) history_small_original = model.fit(train_data, train_labels, epochs=20, batch_size=512, validation_split=0.4) plot_val_loss_and_acc(history_small_original) # we need more complex or large model, but overfitted!!! model = keras.Sequential([ layers.Dense(16, activation="relu"), layers.Dense(16, activation="relu"), layers.Dense(1, activation="sigmoid") ]) model.compile(optimizer="rmsprop", loss="binary_crossentropy", metrics=["accuracy"]) history_original = model.fit(train_data, train_labels, epochs=20, batch_size=512, validation_split=0.4) plot_val_loss_and_acc(history_original) """ Version of the model with lower capacity model = keras.Sequential([ layers.Dense(4, activation="relu"), layers.Dense(4, activation="relu"), layers.Dense(1, activation="sigmoid") ]) model.compile(optimizer="rmsprop", loss="binary_crossentropy", metrics=["accuracy"]) history_smaller_model = model.fit( train_data, train_labels, epochs=20, batch_size=512, validation_split=0.4) Version of the model with higher capacity model = keras.Sequential([ layers.Dense(512, activation="relu"), layers.Dense(512, activation="relu"), layers.Dense(1, activation="sigmoid") ]) model.compile(optimizer="rmsprop", loss="binary_crossentropy", metrics=["accuracy"]) history_larger_model = model.fit( train_data, train_labels, epochs=20, batch_size=512, validation_split=0.4) """ ### Adding L2 weight regularization to the model model = keras.Sequential([ layers.Dense(16, kernel_regularizer=regularizers.l2(0.002), activation="relu"), layers.Dense(16, kernel_regularizer=regularizers.l2(0.002), activation="relu"), layers.Dense(1, activation="sigmoid") ]) model.compile(optimizer="rmsprop", loss="binary_crossentropy", metrics=["accuracy"]) history_l2_reg = model.fit( train_data, train_labels, epochs=20, batch_size=512, validation_split=0.4) plot_val_loss_and_acc(history_l2_reg) # from tensorflow.keras import regularizers # regularizers.l1(0.001) # regularizers.l1_l2(l1=0.001, l2=0.001) model = keras.Sequential([ layers.Dense(16, activation="relu"), layers.Dropout(0.5), layers.Dense(16, activation="relu"), layers.Dropout(0.5), layers.Dense(1, activation="sigmoid") ]) model.compile(optimizer="rmsprop", loss="binary_crossentropy", metrics=["accuracy"]) history_dropout = model.fit( train_data, train_labels, epochs=20, batch_size=512, validation_split=0.4) plot_val_loss_and_acc(history_dropout)
原始的欠拟合模型:
过拟合的模型:
加入正则化 后的:
加入dropout后的: