『PyTorch x TensorFlow』第六弹_从最小二乘法看自动求导
TensoFlow自动求导机制
『TensorFlow』第二弹_线性拟合&神经网络拟合_恰是故人归
下面做了三个简单尝试,
- 利用包含gradients、assign等tf函数直接构建图进行自动梯度下降
- 利用优化器计算出导数,再将导数应用到变量上
- 直接使用优化器不显式得到导数
更新参数必须使用assign,这也可能会涉及到控制依赖问题。
# Author : Hellcat # Time : 2/20/2018 import tensorflow as tf tf.set_random_seed(1000) def get_fake_data(batch_size=8): x = 20 * tf.random_uniform([batch_size,1],dtype=tf.float32) y = tf.multiply(x,3) + 1 + tf.multiply( tf.random_normal([batch_size,1],mean=0,stddev=0.01,dtype=tf.float32),1) return x, y x, y = get_fake_data() w = tf.Variable(tf.random_uniform([1,1], dtype=tf.float32), name='w') b = tf.Variable(tf.random_uniform([1,1], dtype=tf.float32), name='b') lr = 0.001 y_pred = tf.add(tf.multiply(w,x),b) loss = tf.reduce_mean(tf.pow(tf.multiply(0.5,(y_pred - y)),2),axis=0) # 梯度尝试 grad_w, grad_b = tf.gradients(loss,[w,b]) train_w = tf.assign(w,tf.subtract(w,lr*grad_w)) train_b = tf.assign(b,tf.subtract(b,lr*grad_b)) train = [train_w, train_b] # 使用优化器 # optimizer = tf.train.GradientDescentOptimizer(lr) # 优化器&学习率选择 # ## 优化器+梯度操作 # grads_and_vars = optimizer.compute_gradients(loss, [w,b]) # train = optimizer.apply_gradients(grads_and_vars) ## 优化器径直优化 # train = optimizer.minimize(loss) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for ii in range(80000): sess.run([train]) if ii % 1000 == 0: print(sess.run(w),sess.run(b))
PyTorch自动求导机制
由于梯度是会累加的,所以清空梯度一定不要忘记。
import torch as t from torch.autograd import Variable as V import matplotlib.pyplot as plt from IPython import display # 指定随机数种子 t.manual_seed(1000) def get_fake_data(batch_size=8): x = t.rand(batch_size,1)*20 y = x * 2 + 3 + 3*t.randn(batch_size,1) return x, y x, y = get_fake_data() plt.scatter(x.squeeze(), y.squeeze()) w = V(t.rand(1,1),requires_grad=True) b = V(t.rand(1,1),requires_grad=True) lr = 0.001 for ii in range(8000): x, y = get_fake_data() x, y = V(x), V(y) # print(x, y) y_pred = x.mm(w) + b.expand_as(x) loss = 0.5*(y_pred - y)**2 loss = loss.sum() # 集结loss向量 loss.backward() w.data.sub_(lr * w.grad.data) b.data.sub_(lr * b.grad.data) w.grad.data.zero_() b.grad.data.zero_() if ii % 1000 == 0: display.clear_output(wait=True) x = t.arange(0,20).view(-1,1) y = x.mm(w.data) + b.data.expand_as(x) plt.plot(x.numpy(), y.numpy()) x2, y2 = get_fake_data(batch_size=20) plt.scatter(x2, y2) plt.xlim(0,20) plt.ylim(0,40) plt.show() print(w.data.squeeze(), b.data.squeeze())