3.2 线性回归模型的从零实现

3.2.0 头文件

import random
import torch
from d2l import torch as d2l

3.2.1 关于yield关键字

# 关于yield关键字
def foo():
    print("starting...")
    while True:
        res = yield 4
        print("res:",res)
g = foo()   #因为该函数含有yield关键字，所以这里并不会执行该函数
#调用next方法后，foo函数开始执行，输出（starting...），然后进入while循环，遇到yield后将后面的4返回，输出（4）
#下一次调用next方法后，从之前中断的yield之后开始执行，输出（res: None），之后遇到yield后将后面的4返回，输出（4）
print(next(g))
# 输出：
# starting...
# 4
print(next(g))
# 输出：
# res: None
# 4

3.2.2 制作数据集

# 定义一个数据集生成器    num_examples为数据集大小，w为真实的权重w，b为真实的偏离量b
def synthetic_data(w, b, num_examples):
    """生成y=Xw+b+噪声"""
    X = torch.normal(0, 1, (num_examples, len(w)))  #生成的特征X服从均值为0，方差为1的正态分布，(num_examples, len(w))为特征X的尺寸，num_examples表示特征的个数，len(w)表示特征的维度
    y = torch.matmul(X, w) + b  #生成每个特征对应的真实标签
    y += torch.normal(0, 0.01, y.shape) # 将每个标签加上一个随机的噪声
    return X, y.reshape((-1, 1))

# 定义真实的权重参数
true_w = torch.tensor([2, -3.4])
# 定义真实的偏移量
true_b = 4.2
# 生成数据集，features为特征集合，labels为标签集合
features, labels = synthetic_data(true_w, true_b, 1000)
# print(features)
# 输出：
# tensor([[-1.2232, -1.1287],
#         [ 0.7761,  1.3506],
#         [ 1.8760, -0.1813],
#         ...,
#         [-0.3016,  0.9798],
#         [-1.2308, -1.4371],
#         [ 0.2694, -0.8200]])
# print(labels)
# 输出：
# tensor([[ 6.7570e+00],
#         [ 1.1429e+00],
#         [ 8.2363e+00],
#         ...,
#         [ 1.7683e+00],
#         [ 2.7000e+00],
#         [ 5.0721e+00]])

3.2.3 迭代读取数据集

# 定义一个数据集读取迭代器
# batch_size：批量大小
# features：特征集合
# labels：标签集合
def data_iter(batch_size, features, labels):
    num_examples = len(features)    # 计算特征的个数
    indices = list(range(num_examples))     # 所有特征的下标编号，从0~num_examples-1
    # 这些样本是随机读取的，没有特定的顺序
    random.shuffle(indices)     # 将所有特征的下标编号顺序打乱
    # 每次next操作，都会从数据集中返回一个批量的特征和相应的标签集合
    for i in range(0, num_examples, batch_size):
        batch_indices = torch.tensor(
            indices[i: min(i + batch_size, num_examples)])
        yield features[batch_indices], labels[batch_indices]

# 定义批量大小
batch_size = 10

# 获取数据集中的一个批量的特征集合和对应的标签集合
for X, y in data_iter(batch_size, features, labels):
    print(X, '\n', y)
    break
# 输出：
# tensor([[-5.7894e-01, -2.4479e+00],
#         [ 2.0853e-01,  3.2863e-01],
#         [ 1.0836e-01, -3.0798e-01],
#         [-6.8576e-01, -1.2096e+00],
#         [ 1.0776e+00,  1.9446e-01],
#         [-1.9330e+00,  1.4515e+00],
#         [ 1.0839e+00, -9.5073e-01],
#         [-1.3466e+00,  1.7328e+00],
#         [ 1.0802e-03,  7.4219e-01],
#         [ 6.2577e-01,  1.0039e+00]])
#  tensor([[11.3540],
#         [ 3.4885],
#         [ 5.4594],
#         [ 6.9395],
#         [ 5.6864],
#         [-4.5882],
#         [ 9.6101],
#         [-4.3811],
#         [ 1.6704],
#         [ 2.0378]])

3.2.4 随机初始化模型参数

# 随机初始化权重w，均值为0，方差为0.01的正态分布
w = torch.normal(0, 0.01, size=(2,1), requires_grad=True)
# print(w)
# 输出：
# tensor([[ 0.0110],
#         [-0.0161]], requires_grad=True)

# 随机初始化偏移量b，初始值为0
b = torch.zeros(1, requires_grad=True)
# print(b)
# 输出：
# tensor([0.], requires_grad=True)

3.2.5 定义网络模型

# 定义线性回归模型
# X：为一个批量的特征集合
# w：随机初始化的权重
# b：为初始值为0的偏移量
def linreg(X, w, b):
    return torch.matmul(X, w) + b

3.2.6 定义损失函数

# 定义均方损失函数
# y_hat：为一个批量的预测值
# y：为一个批量的实际值
def squared_loss(y_hat, y):
    return (y_hat - y.reshape(y_hat.shape)) ** 2 / 2

3.2.7 定义优化器

# 定义随机梯度下降优化器
# params：权重和偏移量
# lr：学习率
# batch_size：批量大小
def sgd(params, lr, batch_size):
    with torch.no_grad():
        for param in params:
            param -= lr * param.grad / batch_size
            param.grad.zero_()

3.2.8 训练过程

# 定义学习率
lr = 0.03
# 定义训练轮数
num_epochs = 3
# 定义网络模型
net = linreg
# 定义损失函数
loss = squared_loss
# 对于每一轮训练过程
for epoch in range(num_epochs):
    # 对于每一个批量
    for X, y in data_iter(batch_size, features, labels):# 从数据集中取出一个批量的特征集合和对应的标签集合
        # 求一个批量的预测损失值
        l = loss(net(X, w, b), y)
        # 用随机梯度下降优化器来更新权重和偏移量
        l.sum().backward()
        sgd([w, b], lr, batch_size)
    # 一轮训练完成后，在整个数据集上计算损失
    with torch.no_grad():
        # 当前轮训练完成后，利用得到的权重和参数来计算在整个数据集上的损失均值
        train_l = loss(net(features, w, b), labels)
        print(f'epoch {epoch + 1}, loss {float(train_l.mean()):f}')
# 输出：
# epoch 1, loss 0.033427
# epoch 2, loss 0.000117
# epoch 3, loss 0.000052

# 计算经过学习后，得到的权重和偏移量与真实值之间的误差
print(f'w的估计误差: {true_w - w.reshape(true_w.shape)}')
print(f'b的估计误差: {true_b - b}')
# 输出：
# w的估计误差: tensor([-0.0001, -0.0002], grad_fn=<SubBackward0>)
# b的估计误差: tensor([0.0006], grad_fn=<RsubBackward1>)

本小节的完整代码如下

import random
import torch
from d2l import torch as d2l

# ------------------------------yield关键字------------------------------------
def foo():
    print("starting...")
    while True:
        res = yield 4
        print("res:",res)
g = foo()   #因为该函数含有yield关键字，所以这里并不会执行该函数
#调用next方法后，foo函数开始执行，输出（starting...），然后进入while循环，遇到yield后将后面的4返回，输出（4）
#下一次调用next方法后，从之前中断的yield之后开始执行，输出（res: None），之后遇到yield后将后面的4返回，输出（4）
print(next(g))
# 输出：
# starting...
# 4
print(next(g))
# 输出：
# res: None
# 4

# ------------------------------制作数据集------------------------------------
# 定义一个数据集生成器    num_examples为数据集大小，w为真实的权重w，b为真实的偏离量b
def synthetic_data(w, b, num_examples):
    """生成y=Xw+b+噪声"""
    X = torch.normal(0, 1, (num_examples, len(w)))  #生成的特征X服从均值为0，方差为1的正态分布，(num_examples, len(w))为特征X的尺寸，num_examples表示特征的个数，len(w)表示特征的维度
    y = torch.matmul(X, w) + b  #生成每个特征对应的真实标签
    y += torch.normal(0, 0.01, y.shape) # 将每个标签加上一个随机的噪声
    return X, y.reshape((-1, 1))

# 定义真实的权重参数
true_w = torch.tensor([2, -3.4])
# 定义真实的偏移量
true_b = 4.2
# 生成数据集，features为特征集合，labels为标签集合
features, labels = synthetic_data(true_w, true_b, 1000)
# print(features)
# 输出：
# tensor([[-1.2232, -1.1287],
#         [ 0.7761,  1.3506],
#         [ 1.8760, -0.1813],
#         ...,
#         [-0.3016,  0.9798],
#         [-1.2308, -1.4371],
#         [ 0.2694, -0.8200]])
# print(labels)
# 输出：
# tensor([[ 6.7570e+00],
#         [ 1.1429e+00],
#         [ 8.2363e+00],
#         ...,
#         [ 1.7683e+00],
#         [ 2.7000e+00],
#         [ 5.0721e+00]])


# ------------------------------迭代读取数据集------------------------------------
# 定义一个数据集读取迭代器
# batch_size：批量大小
# features：特征集合
# labels：标签集合
def data_iter(batch_size, features, labels):
    num_examples = len(features)    # 计算特征的个数
    indices = list(range(num_examples))     # 所有特征的下标编号，从0~num_examples-1
    # 这些样本是随机读取的，没有特定的顺序
    random.shuffle(indices)     # 将所有特征的下标编号顺序打乱
    # 每次next操作，都会从数据集中返回一个批量的特征和相应的标签集合
    for i in range(0, num_examples, batch_size):
        batch_indices = torch.tensor(
            indices[i: min(i + batch_size, num_examples)])
        yield features[batch_indices], labels[batch_indices]

# 定义批量大小
batch_size = 10

# 获取数据集中的一个批量的特征集合和对应的标签集合
for X, y in data_iter(batch_size, features, labels):
    print(X, '\n', y)
    break
# 输出：
# tensor([[-5.7894e-01, -2.4479e+00],
#         [ 2.0853e-01,  3.2863e-01],
#         [ 1.0836e-01, -3.0798e-01],
#         [-6.8576e-01, -1.2096e+00],
#         [ 1.0776e+00,  1.9446e-01],
#         [-1.9330e+00,  1.4515e+00],
#         [ 1.0839e+00, -9.5073e-01],
#         [-1.3466e+00,  1.7328e+00],
#         [ 1.0802e-03,  7.4219e-01],
#         [ 6.2577e-01,  1.0039e+00]])
#  tensor([[11.3540],
#         [ 3.4885],
#         [ 5.4594],
#         [ 6.9395],
#         [ 5.6864],
#         [-4.5882],
#         [ 9.6101],
#         [-4.3811],
#         [ 1.6704],
#         [ 2.0378]])


# ------------------------------随机初始化模型参数------------------------------------

# 随机初始化权重w，均值为0，方差为0.01的正态分布
w = torch.normal(0, 0.01, size=(2,1), requires_grad=True)
# print(w)
# 输出：
# tensor([[ 0.0110],
#         [-0.0161]], requires_grad=True)

# 随机初始化偏移量b，初始值为0
b = torch.zeros(1, requires_grad=True)
# print(b)
# 输出：
# tensor([0.], requires_grad=True)

# ------------------------------定义网络模型------------------------------------
# 定义线性回归模型
# X：为一个批量的特征集合
# w：随机初始化的权重
# b：为初始值为0的偏移量
def linreg(X, w, b):
    return torch.matmul(X, w) + b

# ------------------------------定义损失函数------------------------------------
# 定义均方损失函数
# y_hat：为一个批量的预测值
# y：为一个批量的实际值
def squared_loss(y_hat, y):
    return (y_hat - y.reshape(y_hat.shape)) ** 2 / 2

# ------------------------------定义优化器------------------------------------
# 定义随机梯度下降优化器
# params：权重和偏移量
# lr：学习率
# batch_size：批量大小
def sgd(params, lr, batch_size):
    with torch.no_grad():
        for param in params:
            param -= lr * param.grad / batch_size
            param.grad.zero_()

# ------------------------------训练过程------------------------------------
# 定义学习率
lr = 0.03
# 定义训练轮数
num_epochs = 3
# 定义网络模型
net = linreg
# 定义损失函数
loss = squared_loss
# 对于每一轮训练过程
for epoch in range(num_epochs):
    # 对于每一个批量
    for X, y in data_iter(batch_size, features, labels):# 从数据集中取出一个批量的特征集合和对应的标签集合
        # 求一个批量的预测损失值
        l = loss(net(X, w, b), y)
        # 用随机梯度下降优化器来更新权重和偏移量
        l.sum().backward()
        sgd([w, b], lr, batch_size)
    # 一轮训练完成后，在整个数据集上计算损失
    with torch.no_grad():
        # 当前轮训练完成后，利用得到的权重和参数来计算在整个数据集上的损失均值
        train_l = loss(net(features, w, b), labels)
        print(f'epoch {epoch + 1}, loss {float(train_l.mean()):f}')
# 输出：
# epoch 1, loss 0.033427
# epoch 2, loss 0.000117
# epoch 3, loss 0.000052

# 计算经过学习后，得到的权重和偏移量与真实值之间的误差
print(f'w的估计误差: {true_w - w.reshape(true_w.shape)}')
print(f'b的估计误差: {true_b - b}')
# 输出：
# w的估计误差: tensor([-0.0001, -0.0002], grad_fn=<SubBackward0>)
# b的估计误差: tensor([0.0006], grad_fn=<RsubBackward1>)

posted on 2022-11-01 21:45 yc-limitless 阅读(51) 评论(0) 编辑收藏举报