TensorFlow中设置学习率的方式

学习率

本主要主要介绍的学习率设置方式有：

指数衰减: tf.train.exponential_decay()
分段常数衰减: tf.train.piecewise_constant()
自然指数衰减: tf.train.natural_exp_decay()
多项式衰减tf.train.polynomial_decay()
倒数衰减tf.train.inverse_time_decay()
余弦衰减tf.train.cosine_decay()

指数衰减

1 tf.train.exponential_decay(
2     learning_rate,
3     global_step, 
4     decay_steps, 
5     decay_rate,
6     staircase=False, 
7     name=None):

计算方式：

1 decayed_learning_rate = learning_rate * decay_rate ^ (global_step / decay_steps)
2 # 如果staircase=True，则学习率会在得到离散值，每decay_steps迭代次数，更新一次；

示例：

 1 import matplotlib.pyplot as plt
 2 import tensorflow as tf
 3 
 4 global_step = tf.Variable(0, name='global_step', trainable=False) # 迭代次数
 5 
 6 y = []
 7 z = []
 8 epochs = 200
 9 
10 with tf.Session() as sess:
11     sess.run(tf.global_variables_initializer())
12     for global_step in range(epochs):
13         # 阶梯型衰减
14         learning_rate_1 = tf.train.exponential_decay(
15             learning_rate=0.5, global_step=global_step, decay_steps=10, decay_rate=0.9, staircase=True
16         )
17         # 标准指数衰减
18         learning_rate_2 = tf.train.exponential_decay(
19             learning_rate=0.5, global_step=global_step, decay_steps=10, decay_rate=0.9, staircase=False
20         )
21         lr1 = sess.run([learning_rate_1])
22         lr2 = sess.run([learning_rate_2])
23         y.append(lr1)
24         z.append(lr2)
25 
26 x = range(epochs)
27 fig = plt.figure()
28 ax = fig.add_subplot(111)
29 ax.set_ylim([0, 0.55])
30 
31 plt.plot(x, y, 'r-', linewidth=2)
32 plt.plot(x, z, 'g-', linewidth=2)
33 plt.title('exponential_decay')
34 ax.set_xlabel('step')
35 ax.set_ylabel('learning_rate')
36 plt.legend(labels=['staircase', 'continuous'], loc='upper right')
37 plt.show()

分段常数衰减

1 tf.train.piecewise_constant(
2     x, 
3     boundaries, 
4     values, 
5     name=None):

计算方式：

 1 # parameter
 2 global_step = tf.Variable(0, trainable=False)
 3 boundaries = [100, 200]
 4 values = [1.0, 0.5, 0.1]
 5 # learning_rate
 6 learning_rate = tf.train.piecewise_constant(global_step, boundaries, values)
 7 # 解释
 8 # 当global_step=[1, 100]时，learning_rate=1.0;
 9 # 当global_step=[101, 200]时，learning_rate=0.5;
10 # 当global_step=[201, ~]时，learning_rate=0.1;

示例：

 1 import matplotlib.pyplot as plt
 2 import tensorflow as tf
 3 
 4 global_step = tf.Variable(0, name='global_step', trainable=False) # 迭代次数
 5 boundaries = [10, 20, 30]
 6 learning_rates = [0.1, 0.07, 0.025, 0.0125]
 7 
 8 y = []
 9 N = 40
10 
11 with tf.Session() as sess:
12     sess.run(tf.global_variables_initializer())
13     for global_step in range(N):
14         learning_rate = tf.train.piecewise_constant(global_step, boundaries=boundaries, values=learning_rates)
15         lr = sess.run([learning_rate])
16         y.append(lr)
17 
18 x = range(N)
19 plt.plot(x, y, 'r-', linewidth=2)
20 plt.title('piecewise_constant')
21 plt.show()

自然指数衰减

类似与指数衰减，同样与当前迭代次数相关，只不过以e为底；

1 tf.train.natural_exp_decay(
2     learning_rate,
3     global_step,
4     decay_steps,
5     decay_rate,
6     staircase=False,
7     name=None
8 )

计算方式：

1 decayed_learning_rate = learning_rate * exp(-decay_rate * global_step)
2 # 如果staircase=True，则学习率会在得到离散值，每decay_steps迭代次数，更新一次；

示例：

 1 import matplotlib.pyplot as plt
 2 import tensorflow as tf
 3 
 4 global_step = tf.Variable(0, name='global_step', trainable=False)
 5 
 6 y = []
 7 z = []
 8 w = []
 9 m = []
10 EPOCH = 200
11 
12 with tf.Session() as sess:
13     sess.run(tf.global_variables_initializer())
14     for global_step in range(EPOCH):
15 
16         # 阶梯型衰减
17         learning_rate1 = tf.train.natural_exp_decay(
18             learning_rate=0.5, global_step=global_step, decay_steps=10, decay_rate=0.9, staircase=True)
19 
20         # 标准指数型衰减
21         learning_rate2 = tf.train.natural_exp_decay(
22             learning_rate=0.5, global_step=global_step, decay_steps=10, decay_rate=0.9, staircase=False)
23 
24         # 阶梯型指数衰减
25         learning_rate3 = tf.train.exponential_decay(
26             learning_rate=0.5, global_step=global_step, decay_steps=10, decay_rate=0.9, staircase=True)
27 
28         # 标准指数衰减
29         learning_rate4 = tf.train.exponential_decay(
30             learning_rate=0.5, global_step=global_step, decay_steps=10, decay_rate=0.9, staircase=False)
31 
32         lr1 = sess.run([learning_rate1])
33         lr2 = sess.run([learning_rate2])
34         lr3 = sess.run([learning_rate3])
35         lr4 = sess.run([learning_rate4])
36 
37         y.append(lr1)
38         z.append(lr2)
39         w.append(lr3)
40         m.append(lr4)
41 
42 x = range(EPOCH)
43 fig = plt.figure()
44 ax = fig.add_subplot(111)
45 ax.set_ylim([0, 0.55])
46 
47 plt.plot(x, y, 'r-', linewidth=2)
48 plt.plot(x, z, 'g-', linewidth=2)
49 plt.plot(x, w, 'r--', linewidth=2)
50 plt.plot(x, m, 'g--', linewidth=2)
51 
52 plt.title('natural_exp_decay')
53 ax.set_xlabel('step')
54 ax.set_ylabel('learning rate')
55 plt.legend(labels = ['natural_staircase', 'natural_continuous', 'staircase', 'continuous'], loc = 'upper right')
56 plt.show()

可以看到自然指数衰减对学习率的衰减程度远大于一般的指数衰减；

多项式衰减

1 tf.train.polynomial_decay(
2     learning_rate, 
3     global_step, 
4     decay_steps,
5     end_learning_rate=0.0001, 
6     power=1.0,
7     cycle=False, name=None):

计算方式：

 1 # 如果cycle=False
 2 global_step = min(global_step, decay_steps)
 3 decayed_learning_rate = (learning_rate - end_learning_rate) *
 4                           (1 - global_step / decay_steps) ^ (power) +
 5                           end_learning_rate
 6 # 如果cycle=True
 7 decay_steps = decay_steps * ceil(global_step / decay_steps)
 8 decayed_learning_rate = (learning_rate - end_learning_rate) *
 9                           (1 - global_step / decay_steps) ^ (power) +
10                           end_learning_rate

示例：

 1 import matplotlib.pyplot as plt
 2 import tensorflow as tf
 3 
 4 y = []
 5 z = []
 6 EPOCH = 200
 7 
 8 global_step = tf.Variable(0, name='global_step', trainable=False)
 9 
10 with tf.Session() as sess:
11     sess.run(tf.global_variables_initializer())
12     for global_step in range(EPOCH):
13         # cycle=False
14         learning_rate1 = tf.train.polynomial_decay(
15             learning_rate=0.1, global_step=global_step, decay_steps=50,
16             end_learning_rate=0.01, power=0.5, cycle=False)
17         # cycle=True
18         learning_rate2 = tf.train.polynomial_decay(
19             learning_rate=0.1, global_step=global_step, decay_steps=50,
20             end_learning_rate=0.01, power=0.5, cycle=True)
21 
22         lr1 = sess.run([learning_rate1])
23         lr2 = sess.run([learning_rate2])
24         y.append(lr1)
25         z.append(lr2)
26 
27 x = range(EPOCH)
28 fig = plt.figure()
29 ax = fig.add_subplot(111)
30 plt.plot(x, z, 'g-', linewidth=2)
31 plt.plot(x, y, 'r--', linewidth=2)
32 plt.title('polynomial_decay')
33 ax.set_xlabel('step')
34 ax.set_ylabel('learning rate')
35 plt.legend(labels=['cycle=True', 'cycle=False'], loc='upper right')
36 plt.show()

可以看到学习率在decay_steps=50迭代次数后到达最小值；同时，当cycle=False时，学习率达到预设的最小值后，就保持最小值不再变化；当cycle=True时，学习率将会瞬间增大，再降低；

多项式衰减中设置学习率可以往复升降的目的：时为了防止在神经网络训练后期由于学习率过小，导致网络参数陷入局部最优，将学习率升高，有可能使其跳出局部最优；

倒数衰减

1 inverse_time_decay(
2     learning_rate,
3     global_step,
4     decay_steps,
5     decay_rate,
6     staircase=False,
7     name=None):

计算方式：

1 # 如果staircase=False，即得到连续型衰减学习率；
2 decayed_learning_rate = learning_rate / (1 + decay_rate * global_step / decay_step)
3 
4 # 如果staircase=True，即得到离散型衰减学习率；
5 decayed_learning_rate = learning_rate / (1 + decay_rate * floor(global_step / decay_step))

示例：

 1 import matplotlib.pyplot as plt
 2 import tensorflow as tf
 3 
 4 y = []
 5 z = []
 6 EPOCH = 200
 7 global_step = tf.Variable(0, name='global_step', trainable=False)
 8 
 9 with tf.Session() as sess:
10     sess.run(tf.global_variables_initializer())
11     for global_step in range(EPOCH):
12         # 阶梯型衰减
13         learning_rate1 = tf.train.inverse_time_decay(
14             learning_rate=0.1, global_step=global_step, decay_steps=20,
15             decay_rate=0.2, staircase=True)
16 
17         # 连续型衰减
18         learning_rate2 = tf.train.inverse_time_decay(
19             learning_rate=0.1, global_step=global_step, decay_steps=20,
20             decay_rate=0.2, staircase=False)
21 
22         lr1 = sess.run([learning_rate1])
23         lr2 = sess.run([learning_rate2])
24 
25         y.append(lr1)
26         z.append(lr2)
27 
28 x = range(EPOCH)
29 fig = plt.figure()
30 ax = fig.add_subplot(111)
31 plt.plot(x, z, 'r-', linewidth=2)
32 plt.plot(x, y, 'g-', linewidth=2)
33 plt.title('inverse_time_decay')
34 ax.set_xlabel('step')
35 ax.set_ylabel('learning rate')
36 plt.legend(labels=['continuous', 'staircase'])
37 plt.show()

同样可以看到，随着迭代次数的增加，学习率在逐渐减小，同时减小的幅度也在降低；

余弦衰减

1. 标准余弦衰减

1 tf.train.cosine_decay(
2     learning_rate, 
3     global_step, 
4     decay_steps, 
5     alpha=0.0, 
6     name=None):

计算方式：

1 global_step = min(global_step, decay_steps)
2 cosine_decay = 0.5 * (1 + cos(pi * global_step / decay_steps))
3 decayed = (1 - alpha) * cosine_decay + alpha
4 decayed_learning_rate = learning_rate * decayed

示例：

 1 import matplotlib.pyplot as plt
 2 import tensorflow as tf
 3 
 4 y = []
 5 z = []
 6 EPOCH = 200
 7 global_step = tf.Variable(0, name='global_step', trainable=False)
 8 
 9 with tf.Session() as sess:
10     sess.run(tf.global_variables_initializer())
11     for global_step in range(EPOCH):
12         # 余弦衰减
13         learning_rate1 = tf.train.cosine_decay(
14             learning_rate=0.1, global_step=global_step, decay_steps=50)
15         learning_rate2 = tf.train.cosine_decay(
16             learning_rate=0.1, global_step=global_step, decay_steps=100)
17 
18         lr1 = sess.run([learning_rate1])
19         lr2 = sess.run([learning_rate2])
20         y.append(lr1)
21         z.append(lr2)
22 
23 x = range(EPOCH)
24 fig = plt.figure()
25 ax = fig.add_subplot(111)
26 plt.plot(x, y, 'r-', linewidth=2)
27 plt.plot(x, z, 'b-', linewidth=2)
28 plt.title('cosine_decay')
29 ax.set_xlabel('step')
30 ax.set_ylabel('learning rate')
31 plt.legend(labels=['decay_steps=50', 'decay_steps=100'], loc='upper right')
32 plt.show()

2.重启余弦衰减

1 tf.train.cosine_decay_restarts(
2     learning_rate,
3     global_step,
4     first_decay_steps,
5     t_mul=2.0,
6     m_mul=1.0,
7     alpha=0.0,
8     name=None):

示例：

 1 import matplotlib.pyplot as plt
 2 import tensorflow as tf
 3 
 4 y = []
 5 z = []
 6 EPOCH = 100
 7 global_step = tf.Variable(0, name='global_step', trainable=False)
 8 
 9 with tf.Session() as sess:
10     sess.run(tf.global_variables_initializer())
11     for global_step in range(EPOCH):
12         # 重启余弦衰减
13         learning_rate1 = tf.train.cosine_decay_restarts(learning_rate=0.1, global_step=global_step,
14                                            first_decay_steps=40)
15         learning_rate2 = tf.train.cosine_decay_restarts(learning_rate=0.1, global_step=global_step,
16                                                        first_decay_steps=60)
17 
18         lr1 = sess.run([learning_rate1])
19         lr2 = sess.run([learning_rate2])
20         y.append(lr1)
21         z.append(lr2)
22 
23 x = range(EPOCH)
24 fig = plt.figure()
25 ax = fig.add_subplot(111)
26 plt.plot(x, y, 'r-', linewidth=2)
27 plt.plot(x, z, 'b-', linewidth=2)
28 plt.title('cosine_decay')
29 ax.set_xlabel('step')
30 ax.set_ylabel('learning rate')
31 plt.legend(labels=['decay_steps=40', 'decay_steps=60'], loc='upper right')
32 plt.show()

3. 线性余弦噪声

1 tf.train.linear_cosine_decay(
2     learning_rate,
3     global_step,
4     decay_steps,
5     num_periods=0.5,
6     alpha=0.0,
7     beta=0.001,
8     name=None):

计算方式：

1 global_step = min(global_step, decay_steps)
2 linear_decay = (decay_steps - global_step) / decay_steps)
3 cosine_decay = 0.5 * (1 + cos(pi * 2 * num_periods * global_step / decay_steps))
4 decayed = (alpha + linear_decay) * cosine_decay + beta
5 decayed_learning_rate = learning_rate * decayed

示例：

 1 import matplotlib.pyplot as plt
 2 import tensorflow as tf
 3 
 4 y = []
 5 z = []
 6 EPOCH = 100
 7 global_step = tf.Variable(0, name='global_step', trainable=False)
 8 
 9 with tf.Session() as sess:
10     sess.run(tf.global_variables_initializer())
11     for global_step in range(EPOCH):
12         # 线性余弦衰减
13         learing_rate1 = tf.train.linear_cosine_decay(
14             learning_rate=0.1, global_step=global_step, decay_steps=40,
15             num_periods=0.2, alpha=0.5, beta=0.2)
16         learing_rate2 = tf.train.linear_cosine_decay(
17             learning_rate=0.1, global_step=global_step, decay_steps=60,
18             num_periods=0.2, alpha=0.5, beta=0.2)
19 
20         lr1 = sess.run([learing_rate1])
21         lr2 = sess.run([learing_rate2])
22         y.append(lr1)
23         z.append(lr2)
24 
25 
26 x = range(EPOCH)
27 fig = plt.figure()
28 ax = fig.add_subplot(111)
29 plt.plot(x, y, 'r-', linewidth=2)
30 plt.plot(x, z, 'b-', linewidth=2)
31 plt.title('linear_cosine_decay')
32 ax.set_xlabel('step')
33 ax.set_ylabel('learing rate')
34 plt.legend(labels=['decay_steps=40', 'decay_steps=60'], loc='upper right')
35 plt.show()

4.噪声余弦衰减

 1 tf.train.noisy_linear_cosine_decay(
 2     learning_rate,
 3     global_step,
 4     decay_steps,
 5     initial_variance=1.0,
 6     variance_decay=0.55,
 7     num_periods=0.5,
 8     alpha=0.0,
 9     beta=0.001,
10     name=None):

计算方式：

1 global_step = min(global_step, decay_steps)
2 linear_decay = (decay_steps - global_step) / decay_steps)
3 cosine_decay = 0.5 * (
4     1 + cos(pi * 2 * num_periods * global_step / decay_steps))
5 decayed = (alpha + linear_decay + eps_t) * cosine_decay + beta
6 decayed_learning_rate = learning_rate * decayed

示例：

 1 import matplotlib.pyplot as plt
 2 import tensorflow as tf
 3 
 4 y = []
 5 z = []
 6 EPOCH = 100
 7 global_step = tf.Variable(0, name='global_step', trainable=False)
 8 
 9 with tf.Session() as sess:
10     sess.run(tf.global_variables_initializer())
11     for global_step in range(EPOCH):
12         # # 噪声线性余弦衰减
13         learning_rate1 = tf.train.noisy_linear_cosine_decay(
14             learning_rate=0.1, global_step=global_step, decay_steps=40,
15             initial_variance=0.01, variance_decay=0.1, num_periods=2, alpha=0.5, beta=0.2)
16         learning_rate2 = tf.train.noisy_linear_cosine_decay(
17             learning_rate=0.1, global_step=global_step, decay_steps=60,
18             initial_variance=0.01, variance_decay=0.1, num_periods=2, alpha=0.5, beta=0.2)
19 
20         lr1 = sess.run([learning_rate1])
21         lr2 = sess.run([learning_rate2])
22         y.append(lr1)
23         z.append(lr2)
24 
25 x = range(EPOCH)
26 fig = plt.figure()
27 ax = fig.add_subplot(111)
28 plt.plot(x, y, 'r-', linewidth=2)
29 plt.plot(x, z, 'b-', linewidth=2)
30 plt.title('noisy_linear_cosine_decay')
31 ax.set_xlabel('step')
32 ax.set_ylabel('learning rate')
33 plt.legend(labels=['decay_steps=40', 'decay_steps=60'], loc='upper right')
34 plt.show()

posted @ 2022-03-08 10:21 木屐呀阅读(894) 评论(0) 编辑收藏举报

会员力量，点亮园子希望

刷新页面返回顶部

木屐呀

TensorFlow中设置学习率的方式

指数衰减

分段常数衰减

自然指数衰减

多项式衰减

倒数衰减

余弦衰减

1. 标准余弦衰减

2.重启余弦衰减

3. 线性余弦噪声

4.噪声余弦衰减

公告