0801线性回归从零开始

点击查看代码

import matplotlib as matplotlib
import random
import torch
from d2l import torch as d2l

"""
线性回归是对n维输入的加权，外加偏差
使用平方损失来衡量预测值和真实值的差异
线性回归有显示解
线性回归可以看作单层神经网络 

超参数：人为指定的值
梯度下降通过不断沿着反梯度的方向更新参数求解
小批量随机梯度下降是深度学习默认的求解方法
两个重要的超参数：批量大小、学习率

"""

# 生成y=Xw+b+噪声
def synthetic_data(w, b, num_examples):
    X = torch.normal(0, 1, (num_examples, len(w)))
    y = torch.matmul(X, w) + b
    y += torch.normal(0, 0.01, y.shape)
    return X, y.reshape((-1, 1))
# w=[2, -3.4],b=4.2
true_w = torch.tensor([2, -3.4])
true_b = 4.2
# 生成1000个样本
features, labels = synthetic_data(true_w, true_b, 1000)
print('features:', features[0], '\nlabel:', labels[0])
d2l.set_figsize()
d2l.plt.scatter(features[:, 1].detach().numpy(), labels.detach().numpy(), 1)
# d2l.plt.show()

# 定义一个data_iter函数， 该函数接收批量大小、特征矩阵和标签向量作为输入，
# 生成大小为batch_size的小批量
def data_iter(batch_size, features, labels):
    # 样本数
    num_examples = len(features)
    # 下标
    indices = list(range(num_examples))
    # 样本随机读取，故将下标顺序打乱
    random.shuffle(indices)
    for i in range(0, num_examples, batch_size):
        batch_indices = torch.tensor(indices[i : min(i + batch_size, num_examples)])
        yield features[batch_indices], labels[batch_indices]

batch_size = 10

for X, y in data_iter(batch_size, features,labels):
    print(X, '\n', y)
    break

# 定义初始化模型参数
# 均值为0，方差为0.01的正态分布，长度为2
w = torch.zeros(size=(2, 1), requires_grad=True)
# w = torch.normal(0, 0.01, size=(2, 1), requires_grad=True)
# 初始值为0，长度为1
b = torch.zeros(1, requires_grad=True)

# 定义模型 线性回归模型
def linreg(X, w, b):
    return torch.matmul(X, w) + b

# 定义损失函数 均方损失
def squard_loss(y_hat, y):
    return (y_hat - y.reshape(y_hat.shape)) ** 2 / 2

# 定义优化算法 小批量随机梯度下降
def sgd(params, lr, batch_size):
    with torch.no_grad():
        for param in params:
            param -= lr * param.grad / batch_size
            param.grad.zero_()

# 训练过程
# 学习率
lr = 0.03
# 训练轮数
num_epochs = 3
# 训练模型
net = linreg
# 损失函数
loss = squard_loss

for epoch in range(num_epochs):
    for X, y in data_iter(batch_size, features,labels):
        # w,b前面已经定义过
        l = loss(net(X, w, b), y)
        # l的形状是(batch_size,1),不是一个标量
        # l中的所有元素被加到一起，并以此计算关于[w,b]的梯度
        l.sum().backward()
        # 优化w和b
        sgd([w, b], lr, batch_size)
    with torch.no_grad():
        train_loss = loss(net(features, w, b), labels)
        print("epoch {}, loss {}".format(epoch + 1, float(train_loss.mean())))

# 误差
print("w的估计误差：{}".format(true_w - w.reshape(true_w.shape)))
print("b的估计误差：{}".format(true_b - b))

1、如果我们将权重初始化为零，会发生什么。算法仍然有效吗？

将权重初始化为零，算法依然有效。但网络层数加深后，在全连接的情况下，反向传播时，由于权重的对称性会导致出现隐藏神经元的对称性，是的多个隐藏神经元的作用就如同一个神经元，影响算法效果。

2、假设你是乔治·西蒙·欧姆，试图为电压和电流的关系建立一个模型。你能使用自动微分来学习模型的参数吗?

点击查看代码

import torch
import random

# 生成数据集 u = Ir + b + 噪声
def synthetic_data(r, b, num_examples):
    I = torch.normal(0, 1, (num_examples, len(r)))
    u = torch.matmul(I, r) + b
    u += torch.normal(0, 0.01, u.shape)
    return I, u.reshape((-1, 1))

true_r = torch.tensor([3.3])
true_b = 2.2
features, labels = synthetic_data(true_r, true_b, 1000)
print(features[0])
print(labels[0])

# 读取数据集
def data_iter(batch_size, features, labels):
    num_examples = len(features)
    indices = list(range(num_examples))
    random.shuffle(indices)
    for i in range(0, num_examples, batch_size):
        batch_indices = torch.tensor(indices[i : min(i + batch_size, num_examples)])
        yield features[batch_indices], labels[batch_indices]

# 初始化模型参数
r = torch.normal(0, 0.01, size=(1,1), requires_grad=True)
b = torch.zeros(1, requires_grad=True)

# 定义模型
def linreg(I, r, b):
    return torch.matmul(I, r) + b

# 定义损失函数
def squared_loss(u_hat, u):
    return (u_hat - u.reshape(u_hat.shape)) ** 2 / 2

# 定义优化算法
def sgd(params, lr, batch_size):
    with torch.no_grad():
        for param in params:
            param -= lr * param.grad / batch_size
            param.grad.zero_()

# 训练
lr = 0.05
num_epochs = 5
batch_size= 10
net = linreg
loss = squared_loss

for epoch in range(num_epochs):
    for I, u in data_iter(batch_size, features, labels):
        l = loss(net(I, r, b), u)
        l.sum().backward()
        sgd([r, b], lr, batch_size)
    with torch.no_grad():
        train_loss = loss(net(features, r, b), labels)
        print("epoch {}, loss {}".format(epoch + 1, float(train_loss.mean())))

# 误差
print("w的估计误差：{}".format(true_r - r.reshape(true_r.shape)))
print("b的估计误差：{}".format(true_b - b))

3、您能基于普朗克定律使用光谱能量密度来确定物体的温度吗？

4、如果你想计算二阶导数可能会遇到什么问题？你会如何解决这些问题？

二阶导数可能不存在，或无法得到显式的一阶导数。可以在求一阶导数时使用retain_graph=True参数保存计算图，进而求二阶导。

5、为什么在squared_loss函数中需要使用reshape函数？

防止一个是列向量一个是行向量

6、尝试使用不同的学习率，观察损失函数值下降的快慢。

学习率越大，损失函数下降越快。但学习率过大可能导致无法收敛。

7、如果样本个数不能被批量大小整除，data_iter函数的行为会有什么变化？

设置了indices[i: min(i + batch_size, num_examples)]，样本个数不能被批量大小整除不会导致data_iter变化，不设置的话可能会报错。

posted @ 2022-06-11 14:32 荒北阅读(119) 评论(0) 编辑收藏举报

刷新页面返回顶部

登录后才能查看或发表评论，立即登录或者逛逛博客园首页

相关博文：

· 0802线性回归简洁实现

· 5501循环神经网络从零开始

· 动手学深度学习01 线性回归从零实现

· 小灰灰深度学习day8——线性回归(从零开始实现与简洁的实现)

· 从0开始实现线性回归模型

阅读排行：
· 无需6万激活码！GitHub神秘组织3小时极速复刻Manus，手把手教你使用OpenManus搭建本
· Manus爆火，是硬核还是营销？
· 终于写完轮子一部分：tcp代理了，记录一下
· 别再用vector＜bool＞了！Google高级工程师：这可能是STL最大的设计失误
· 单元测试从入门到精通

公告

昵称：荒北
园龄： 2年9个月
粉丝： 1
关注： 0

+加关注

2025年3月

日

一

二

三

四

五

六

0801线性回归从零开始

1、如果我们将权重初始化为零，会发生什么。算法仍然有效吗？

2、假设你是乔治·西蒙·欧姆，试图为电压和电流的关系建立一个模型。你能使用自动微分来学习模型的参数吗?

3、您能基于普朗克定律使用光谱能量密度来确定物体的温度吗？

4、如果你想计算二阶导数可能会遇到什么问题？你会如何解决这些问题？

5、为什么在squared_loss函数中需要使用reshape函数？

6、尝试使用不同的学习率，观察损失函数值下降的快慢。

7、如果样本个数不能被批量大小整除，data_iter函数的行为会有什么变化？

公告

搜索

常用链接

随笔分类

随笔档案

阅读排行榜