3-线性回归从零实现
1、注意 = 和 -=在梯度更新时的区别
def minbatch_sgd(params, lr, batch_size):
with torch.no_grad():
for param in params:
param = param - lr*param.grad / batch_size
param.grad.zero_()
这段代码是错误的,应该改成param -= lr * param.grad / batch_size
原因如下:
import torch
import random
w = torch.normal(0, 0.01, size=(2,1), requires_grad=True)
b = torch.zeros(1, requires_grad=True)
# 准备数据
# 生成features和labels
def synthetic_data(w, b, num_examples):
x = torch.normal(0, 1, (num_examples, 2))
y = torch.matmul(x, w) + b
y += torch.normal(0, 0.01, y.shape)
return x, y.reshape((-1, 1))
temporary_w = torch.tensor([2, -3.4])
temporary_b = 4.2
features, labels = synthetic_data(temporary_w, temporary_b, 10000)
# 批量读取数据集
def data_iter(featurs, labels, batch_size):
num_examples = len(featurs)
indices = list(range(num_examples))
random.shuffle(indices)
for i in range(0, num_examples, batch_size):
batch_indices = torch.tensor(indices[i: min(num_examples, i + batch_size)])
yield featurs[batch_indices], labels[batch_indices]
# 定义模型
def linreg(x, w, b):
return torch.matmul(x, w) + b
# 定义损失函数和优化器
def square_loss(y_pred, y):
return (y_pred - y.reshape(y_pred.shape)) ** 2 / 2
def minbatch_sgd(params, lr, batch_size):
with torch.no_grad():
for param in params:
param -= lr*param.grad / batch_size
param.grad.zero_()
# 进行训练
lr = 0.01
epochs = 100
batch_size = 10
for epoch in range(epochs):
for X, Y in data_iter(features, labels, batch_size): # 每次批量取出数据
loss = square_loss(linreg(X, w, b), Y)
# 因为l形状是(batch_size,1),⽽不是⼀个标量。l中的所有元素被加到⼀起,
# 并以此计算关于[w,b]的梯度
loss.sum().backward() # 反向传播
minbatch_sgd([w, b], lr, batch_size)
with torch.no_grad():
train_loss = square_loss(linreg(features, w, b), labels)
print('epoch:{} loss:{}'.format(epoch, train_loss.mean())) # train_loss.mean() 对张量求和并取均值