2-3自动微分机制——eat_tensorflow2_in_30_days

神经网络通常依赖反向传播求梯度来更新网络参数，求梯度过程通常是一件非常复杂而且容易出错的事情

而深度学习框架可以帮助我们自动地完成这种求梯度运算

TensorFlow一般使用梯度磁带tf.GradientTape来记录正向运算过程，然后反播磁带自动得到梯度值

这种利用tf.GradientTape求微分的方法叫作TensorFlow的自动微分机制

利用梯度磁带求导数#

import tensorflow as tf
import numpy as np

# 求f(x) = a*x**2 + b*x + c的导数

x = tf.Variable(0.0, name='x', dtype=tf.float32)
a = tf.constant(1.0)
b = tf.constant(-2.0)
c = tf.constant(1.0)

with tf.GradientTape() as tape:
    y = a*tf.pow(x, 2) + b*x +c

dy_dx = tape.gradient(y, x)
print(dy_dx)

"""
tf.Tensor(-2.0, shape=(), dtype=float32)
"""

# 对常量张量也可以求导，需要增加watch

with tf.GradientTape() as tape:
    tape.watch([a, b, c])
    y = a*tf.pow(x, 2) + b*x + c

dy_dx, dy_da, dy_db, dy_dc = tape.gradient(y, [x, a, b, c])
print(dy_dx)
print(dy_da)
print(dy_db)
print(dy_dc)

"""
tf.Tensor(-2.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(1.0, shape=(), dtype=float32)
"""

# 可以求二阶导数
with tf.GradientTape() as tape2:
    with tf.GradientTape() as tape1:
        y = a*tf.pow(x, 2) + b*x + c
    dy_dx = tape1.gradient(y, x)
dy2_dx2 = tape2.gradient(dy_dx, x)

print(dy2_dx2)

"""
tf.Tensor(2.0, shape=(), dtype=float32)
"""

# 可以在autograph中使用

@tf.function
def f(x):
    a = tf.constant(1.0)
    b = tf.constant(-2.0)
    c = tf.constant(1.0)
    
    # 自变量转换成tf.float32
    x = tf.cast(x, tf.float32)
    with tf.GradientTape() as tape:
        tape.watch(x)
        y = a*tf.pow(x, 2) + b*x + c
    dy_dx = tape.gradient(y, x)
    return dy_dx, y

tf.print(f(tf.constant(0.0)))
tf.print(f(tf.constant(1.0)))

"""
(-2, 1)
(0, 0)
"""

利用梯度磁带和优化器求最小值#

# 求f(x) = a*x**2 + b*x + c的最小值
# 使用optimizer.apply_gradients

x = tf.Variable(0.0, name='x', dtype=tf.float32)
a = tf.constant(1.0)
b = tf.constant(-2.0)
c = tf.constant(1.0)

optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)
for _ in range(1000):
    with tf.GradientTape() as tape:
        y = a*tf.pow(x, 2) + b*x + c
    dy_dx = tape.gradient(y, x)
    optimizer.apply_gradients(grads_and_vars=[(dy_dx, x)])
    
tf.print('y=', y, ";x=", x)

"""
y= 0 ;x= 0.999998569
"""

# 求f(x)=a*x**2 + b*x + c的最小值
# 使用optimizer.minimize
# optimizer.minimize相当于先用tape求gradient，在apply_gradient

x = tf.Variable(0.0, name='x', dtype=tf.float32)

# 注意f()无参数
def f():
    a = tf.constant(1.0)
    b = tf.constant(-2.0)
    c = tf.constant(1.0)
    y = a*tf.pow(x, 2) + b*x + c
    return y

optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)
for _ in range(1000):
    optimizer.minimize(f, [x])
    
tf.print('y=', f(), ':x=', x)

"""
y = 0 ; x = 0.999998569
"""

# 在autograph中完成最小值求解
# 使用optimizer.apply_gradients

x = tf.Variable(0.0, name='x', dtype=tf.float32)
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)

@tf.function
def minimizef():
    a = tf.constant(1.0)
    b = tf.constant(-2.0)
    c = tf.constant(1.0)
    
    for _ in tf.range(1000):  # 注意autograph时使用tf.range(1000)而不是range(1000)
        with tf.GradientTape() as tape:
            y = a*tf.pow(x, 2) + b*x + c
        dy_dx = tape.gradient(y, x)
        optimizer.apply_gradients(grads_and_vars=[(dy_dx, x)])
    y = a*tf.pow(x, 2) + b*x + c
    return y

tf.print(minimizef())
tf.print(x)

"""
0
0.999998569
"""

# 在autograph中完成最小值求解
# 使用optimizer.minimize

x = tf.Variable(0.0,name = "x",dtype = tf.float32)
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)

@tf.function
def f():
    a = tf.constant(1.0)
    b = tf.constant(-2.0)
    c = tf.constant(1.0)
    y = a*tf.pow(x, 2) + b*x + c
    return y

@tf.function
def train(epoch):
    for _ in tf.range(epoch):
        optimizer.minimize(f, [x])
    return f()

tf.print(train(1000))
tf.print(x)

"""
0
0.999998569
"""