深度学习算法原理实现——模型欠拟合和过拟合处理

欠拟合:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
from tensorflow.keras import regularizers
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import imdb
from tensorflow.keras.datasets import mnist
 
 
def plot_val_loss_and_acc(model):
    import matplotlib.pyplot as plt
    val_loss = model.history["val_loss"]
    epochs = range(1, 21)
    plt.plot(epochs, val_loss, "b--",
            label="Validation loss")
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.legend()
 
    val_acc = model.history["val_accuracy"]
    epochs = range(1, 21)
    plt.plot(epochs, val_acc, "b-",
            label="Validation accuracy")
    plt.title("validation loss and accuracy")
    plt.xlabel("Epochs")
    plt.ylabel("Accuracy/Loss")
    plt.legend()
    plt.show()
 
 
(train_images, train_labels), _ = mnist.load_data()
train_images = train_images.reshape((60000, 28 * 28))
train_images = train_images.astype("float32") / 255
model = keras.Sequential([layers.Dense(10, activation="softmax")])
model.compile(optimizer="rmsprop",
              loss="sparse_categorical_crossentropy",
              metrics=["accuracy"])
history_small_model = model.fit(
    train_images, train_labels,
    epochs=20,
    batch_size=128,
    validation_split=0.2)
plot_val_loss_and_acc(history_small_model)

  

python深度学习这本书里提到的欠拟合现象和解决思路:

 

ok,有了思路,我们改进下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
from tensorflow.keras import regularizers
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import imdb
from tensorflow.keras.datasets import mnist
 
 
def plot_val_loss_and_acc(model):
    import matplotlib.pyplot as plt
    val_loss = model.history["val_loss"]
    epochs = range(1, 21)
    plt.plot(epochs, val_loss, "b--",
            label="Validation loss")
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.legend()
 
    val_acc = model.history["val_accuracy"]
    epochs = range(1, 21)
    plt.plot(epochs, val_acc, "b-",
            label="Validation accuracy")
    plt.title("validation loss and accuracy")
    plt.xlabel("Epochs")
    plt.ylabel("Accuracy/Loss")
    plt.legend()
    plt.show()
 
 
(train_images, train_labels), _ = mnist.load_data()
train_images = train_images.reshape((60000, 28 * 28))
train_images = train_images.astype("float32") / 255
model = keras.Sequential([layers.Dense(10, activation="softmax")])
model.compile(optimizer="rmsprop",
              loss="sparse_categorical_crossentropy",
              metrics=["accuracy"])
history_small_model = model.fit(
    train_images, train_labels,
    epochs=20,
    batch_size=128,
    validation_split=0.2)
plot_val_loss_and_acc(history_small_model)
 
model = keras.Sequential([
    layers.Dense(128, activation="relu"),
    layers.Dense(128, activation="relu"),
    layers.Dense(10, activation="softmax"),
])
model.compile(optimizer="rmsprop",
              loss="sparse_categorical_crossentropy",
              metrics=["accuracy"])
history_large_model = model.fit(
    train_images, train_labels,
    epochs=20,
    batch_size=128,
    validation_split=0.2)
plot_val_loss_and_acc(history_large_model)

  

看到有过拟合的迹象了!

 

 接下来,我们看看L1/L2正则化和 dropout处理过拟合:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
from tensorflow.keras import regularizers
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import imdb
from tensorflow.keras.datasets import mnist
 
 
def plot_val_loss_and_acc(model):
    import matplotlib.pyplot as plt
    val_loss = model.history["val_loss"]
    epochs = range(1, 21)
    plt.plot(epochs, val_loss, "b--",
            label="Validation loss")
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.legend()
 
    val_acc = model.history["val_accuracy"]
    epochs = range(1, 21)
    plt.plot(epochs, val_acc, "b-",
            label="Validation accuracy")
    plt.title("validation loss and accuracy")
    plt.xlabel("Epochs")
    plt.ylabel("Accuracy/Loss")
    plt.legend()
    plt.show()
 
 
# (train_images, train_labels), _ = mnist.load_data()
# train_images = train_images.reshape((60000, 28 * 28))
# train_images = train_images.astype("float32") / 255
# model = keras.Sequential([layers.Dense(10, activation="softmax")])
# model.compile(optimizer="rmsprop",
#               loss="sparse_categorical_crossentropy",
#               metrics=["accuracy"])
# history_small_model = model.fit(
#     train_images, train_labels,
#     epochs=20,
#     batch_size=128,
#     validation_split=0.2)
# plot_val_loss_and_acc(history_small_model)
 
# model = keras.Sequential([
#     layers.Dense(128, activation="relu"),
#     layers.Dense(128, activation="relu"),
#     layers.Dense(10, activation="softmax"),
# ])
# model.compile(optimizer="rmsprop",
#               loss="sparse_categorical_crossentropy",
#               metrics=["accuracy"])
# history_large_model = model.fit(
#     train_images, train_labels,
#     epochs=20,
#     batch_size=128,
#     validation_split=0.2)
# plot_val_loss_and_acc(history_large_model)
 
# L1/L2 and dropout
############################################################################
 
(train_data, train_labels), _ = imdb.load_data(num_words=10000)
 
def vectorize_sequences(sequences, dimension=10000):
    results = np.zeros((len(sequences), dimension))
    for i, sequence in enumerate(sequences):
        results[i, sequence] = 1.
    return results
train_data = vectorize_sequences(train_data)
 
# small model !!!
model = keras.Sequential([
    layers.Dense(4, activation="relu"),
    layers.Dense(1, activation="sigmoid")
])
model.compile(optimizer="rmsprop",
              loss="binary_crossentropy",
              metrics=["accuracy"])
history_small_original = model.fit(train_data, train_labels,
                             epochs=20, batch_size=512, validation_split=0.4)
plot_val_loss_and_acc(history_small_original)
 
 
# we need more complex or large model, but overfitted!!!
model = keras.Sequential([
    layers.Dense(16, activation="relu"),
    layers.Dense(16, activation="relu"),
    layers.Dense(1, activation="sigmoid")
])
model.compile(optimizer="rmsprop",
              loss="binary_crossentropy",
              metrics=["accuracy"])
history_original = model.fit(train_data, train_labels,
                             epochs=20, batch_size=512, validation_split=0.4)
 
plot_val_loss_and_acc(history_original)
 
"""
Version of the model with lower capacity
model = keras.Sequential([
    layers.Dense(4, activation="relu"),
    layers.Dense(4, activation="relu"),
    layers.Dense(1, activation="sigmoid")
])
model.compile(optimizer="rmsprop",
              loss="binary_crossentropy",
              metrics=["accuracy"])
history_smaller_model = model.fit(
    train_data, train_labels,
    epochs=20, batch_size=512, validation_split=0.4)
 
Version of the model with higher capacity   
model = keras.Sequential([
    layers.Dense(512, activation="relu"),
    layers.Dense(512, activation="relu"),
    layers.Dense(1, activation="sigmoid")
])
model.compile(optimizer="rmsprop",
              loss="binary_crossentropy",
              metrics=["accuracy"])
history_larger_model = model.fit(
    train_data, train_labels,
    epochs=20, batch_size=512, validation_split=0.4)
"""
 
### Adding L2 weight regularization to the model
model = keras.Sequential([
    layers.Dense(16,
                 kernel_regularizer=regularizers.l2(0.002),
                 activation="relu"),
    layers.Dense(16,
                 kernel_regularizer=regularizers.l2(0.002),
                 activation="relu"),
    layers.Dense(1, activation="sigmoid")
])
model.compile(optimizer="rmsprop",
              loss="binary_crossentropy",
              metrics=["accuracy"])
history_l2_reg = model.fit(
    train_data, train_labels,
    epochs=20, batch_size=512, validation_split=0.4)
 
plot_val_loss_and_acc(history_l2_reg)
 
# from tensorflow.keras import regularizers
# regularizers.l1(0.001)
#  regularizers.l1_l2(l1=0.001, l2=0.001)
 
model = keras.Sequential([
    layers.Dense(16, activation="relu"),
    layers.Dropout(0.5),
    layers.Dense(16, activation="relu"),
    layers.Dropout(0.5),
    layers.Dense(1, activation="sigmoid")
])
model.compile(optimizer="rmsprop",
              loss="binary_crossentropy",
              metrics=["accuracy"])
history_dropout = model.fit(
    train_data, train_labels,
    epochs=20, batch_size=512, validation_split=0.4)
plot_val_loss_and_acc(history_dropout)

 

原始的欠拟合模型:

过拟合的模型:

 

加入正则化 后的:

 

加入dropout后的:

 

 

 

 

 

  

 

posted @   bonelee  阅读(30)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 全程不用写代码,我用AI程序员写了一个飞机大战
· MongoDB 8.0这个新功能碉堡了,比商业数据库还牛
· 记一次.NET内存居高不下排查解决与启示
· 白话解读 Dapr 1.15:你的「微服务管家」又秀新绝活了
· DeepSeek 开源周回顾「GitHub 热点速览」
历史上的今天:
2017-10-10 如何解读「量子计算应对大数据挑战:中国科大首次实现量子机器学习算法」?——是KNN算法吗?
点击右上角即可分享
微信分享提示