练习题1
import torch
import numpy as np
import matplotlib.pyplot as plt
import random
%matplotlib inline
1.生成数据集
\[\boldsymbol{y} = \boldsymbol{X}\boldsymbol{w} + b + \epsilon
\]
其中, 噪声项 \(\epsilon\) 服从均值为0、标准差为0.01的正态分布。
一些数据已经给出
num_inputs = 2 # 特征数
num_examples = 1000 # 样本数
true_w = [2, -3.4] # 真实的W
true_b = 4.2 # 真实的b
num_inputs = 2
num_examples = 1000
true_w = np.array([2, -3.4])
true_b = 4.2
X = np.random.randn(num_examples, num_inputs)
epson = np.random.normal(0, 0.01, size = X.shape[0])
y = np.dot(X, true_w) + true_b + epson
y = torch.from_numpy(y)
# 可视化
plt.plot(X[:, 1], y.numpy(), 'o')
2.定义一个函数:它每次返回batch_size(批量大小)个随机样本的特征和标签。
开头已经给出:
def data_iter(batch_size, features, labels):
def data_iter(batch_size, features, labels):
indices = list(range(len(features)))
random.shuffle(indices)
indices = indices[:batch_size]
yield features[indices, :], labels[indices]
3.模型的训练一般分为几步
- 定义模型
- 初始化参数
- 定义损失函数
- 定义优化算法
- 训练模型,导入数据,并调用1234
4.初始化线性模型的参数w,b
要求:w为(0, 0.01)分布的正态分布, b 为 0, 并且开启自动梯度
W = torch.tensor(np.random.normal(0, 0.01, size=(num_inputs, 1)))
b = torch.zeros(1)
W.requires_grad_(requires_grad=True)
b.requires_grad_(requires_grad=True)
5.写出sgd优化算法
def sgd(params, lr, batch_size)
def sgd(params, lr, batch_size):
for param in params:
param.data -= lr * param.grad / batch_size
6.分别使用nn.Module 和 Sequential构建线性模型
输入:10
输出:1
import torch.nn as nn
class LinearNet(nn.Module):
def __init__(self):
super(LinearNet, self).__init__()
self.Linear = nn.Linear(10, 1)
def forward(self, X):
return self.Linear(X)
net = nn.Sequential(
nn.Linear(10, 1)
)
net = nn.Sequential()
net.add_module('Lienar', nn.Linear(10, 1))
from collections import OrderedDict
net = nn.Sequential(
OrderedDict([
('Linear', nn.Linear(10, 1))
])
)
7.实现softmax运算
def softmax(X):
def softmax(X):
X_exp = X.exp()
partition = X_exp.sum()
return X_exp / partition
8.可以评价模型net
在数据集data_iter
上的准确率
def evaluate_accuracy(data_iter, net):
def evaluate_accuracy(data_iter, net):
acc_sum, n = 0, 0
for X, y in data_iter:
acc_sum += (net(X).argmax() == y).sum().item()
n += y.shape[0]
return acc_sum / n
9实现交叉熵
def cross_entropy(y_hat, y):
def cross_entropy(y_hat, y):
return -torch.log(y_hat.gather(1, y.view(-1, 1))) # 维度, 形状
10.补充如下模型的训练过程
def train(net, train_iter, test_iter, loss, num_epochs, batch_size,
optimizer=None):
def evaluate_accuracy(data_iter, net):
acc_sum, n = 0, 0
for X, y in data_iter:
acc_sum += (net(X).argmax() == y).sum().item()
n += y.shape[0]
return acc_sum / n
def train(net, train_iter, test_iter, loss, num_epochs, batch_size,
optimizer=None):
for epoch in range(num_epochs):
train_l_sum, train_acc_sum, n = 0, 0, 0
for X, y in train_iter:
optimizer.zero_grad() # 全部清零
y_hat = net(X)
l = loss(y_hat, y)
l.backward()
optimizer.step()
train_l_sum += l.item()
train_acc_sum += (y_hat.argmax() == y).sum().item()
n += y.shape[0]
test_acc = evaluate_accuracy(data_iter, net)
printf('epoch %d, loss .4%f, train acc .3%f, test acc .3%f',
epoch, train_l_sum, train_acc_sum, test_acc)
11.softmax回归的整个实现过程
- 定义模型:
\[y = WX +b
\]
- 初始化参数
num_inputs = 784
num_outputs = 10 - 定义损失函数:
交叉熵损失 - 定义优化算法:
SGD
lr = 1e-3 - 训练模型,导入数据,并调用1234
数据:
batch_size = 256
train_iter, test_iter = load_data_fashion_mnist(batch_size)
from torch import nn, optim
batch_size = 256
train_iter, test_iter = load_data_fashion_mnist(batch_size)
num_inputs = 784
num_outputs = 10
net = nn.Sequential(
nn.Flatten(),
nn.Linear(num_inputs, num_outputs)
)
loss = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=1e-3)
num_epochs = 5
train(net, train_iter, test_iter, loss, num_epochs, batch_size,
optimizer=optimizer)
12.多层感知机(MLP)的整个实现过程
- 定义模型:
\[H = W1 X + b1 \\
Y = W2 H + b2
\]
激活函数:relu
2. 初始化参数
num_inputs, num_outputs, num_hiddens = 784, 10, 256
3. 定义损失函数:
交叉熵损失
4. 定义优化算法:
SGD
lr = 1e-3
5. 训练模型,导入数据,并调用1234
数据:
batch_size = 256
train_iter, test_iter = load_data_fashion_mnist(batch_size)
from torch import nn, optim
batch_size = 256
train_iter, test_iter = load_data_fashion_mnist(batch_size)
num_inputs, num_outputs, num_hiddens = 784, 10, 256
net = nn.Sequential(
nn.Flatten(),
nn.Linear(num_inputs, num_hiddens),
nn.ReLU(),
nn.Linear(num_hiddens, num_outputs)
)
loss = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=1e-3)
num_epochs = 5
train(net, train_iter, test_iter, loss, num_epochs, batch_size,
optimizer)
13.减小过拟合的方法有哪些?
- 权重衰减
- 丢弃
14.写出添加了L2范数惩罚项的损失函数
def Loss(net, X, y, W, b, lambd):
def Loss(net, X, y, W, b, lambd):
loss = ((net(X, W, b) - y)**2).sum()
loss += lambd * (W**2).sum()
return loss
15.给顶丢弃概率drop_prob, 写出dropout函数
def dropout(X, drop_prob):
def dropout(X, drop_prob):
assert 0 <= drop_prob <= 1
keep_prob = 1 - drop_prob
if keep_prob == 0:
return torch.zeros_like(X.shape)
mask = (torch.rand(X.shape) < keep_prob).float()
return mask * X