练习题1

import torch
import numpy as np
import matplotlib.pyplot as plt
import random
%matplotlib inline

1.生成数据集

\[\boldsymbol{y} = \boldsymbol{X}\boldsymbol{w} + b + \epsilon \]

其中, 噪声项 \(\epsilon\) 服从均值为0、标准差为0.01的正态分布。
一些数据已经给出

num_inputs = 2 # 特征数
num_examples = 1000 # 样本数
true_w = [2, -3.4] # 真实的W
true_b = 4.2 # 真实的b
num_inputs = 2
num_examples = 1000
true_w = np.array([2, -3.4])
true_b = 4.2
X = np.random.randn(num_examples, num_inputs)
epson = np.random.normal(0, 0.01, size = X.shape[0])
y = np.dot(X, true_w) + true_b + epson
y = torch.from_numpy(y)

# 可视化
plt.plot(X[:, 1], y.numpy(), 'o')

2.定义一个函数:它每次返回batch_size(批量大小)个随机样本的特征和标签。

开头已经给出:

def data_iter(batch_size, features, labels):
def data_iter(batch_size, features, labels):
        indices = list(range(len(features)))
        random.shuffle(indices)
        indices = indices[:batch_size]
        yield features[indices, :], labels[indices]

3.模型的训练一般分为几步

  1. 定义模型
  2. 初始化参数
  3. 定义损失函数
  4. 定义优化算法
  5. 训练模型,导入数据,并调用1234

4.初始化线性模型的参数w,b

要求:w为(0, 0.01)分布的正态分布, b 为 0, 并且开启自动梯度

W = torch.tensor(np.random.normal(0, 0.01, size=(num_inputs, 1)))
b = torch.zeros(1)

W.requires_grad_(requires_grad=True)
b.requires_grad_(requires_grad=True)

5.写出sgd优化算法

def sgd(params, lr, batch_size)
def sgd(params, lr, batch_size):
    for param in params:
        param.data -= lr * param.grad  / batch_size

6.分别使用nn.Module 和 Sequential构建线性模型

输入:10
输出:1

import torch.nn as nn
class LinearNet(nn.Module):
    def __init__(self):
        super(LinearNet, self).__init__()
        self.Linear = nn.Linear(10, 1)
        
    def forward(self, X):
        return self.Linear(X)
net = nn.Sequential(
    nn.Linear(10, 1)
)

net = nn.Sequential()
net.add_module('Lienar', nn.Linear(10, 1))

from  collections import OrderedDict
net = nn.Sequential(
    OrderedDict([
        ('Linear', nn.Linear(10, 1))
    ])
)

7.实现softmax运算

def softmax(X):
def softmax(X):
    X_exp = X.exp()
    partition = X_exp.sum()
    return X_exp / partition

8.可以评价模型net在数据集data_iter上的准确率

def evaluate_accuracy(data_iter, net):
def evaluate_accuracy(data_iter, net):
    acc_sum, n = 0, 0
    for X, y in data_iter:
        acc_sum += (net(X).argmax() == y).sum().item()
        n += y.shape[0]
    return acc_sum / n

9实现交叉熵

def cross_entropy(y_hat, y):
def cross_entropy(y_hat, y):
    return -torch.log(y_hat.gather(1, y.view(-1, 1))) # 维度, 形状

10.补充如下模型的训练过程

def train(net, train_iter, test_iter, loss, num_epochs, batch_size,
              optimizer=None):
def evaluate_accuracy(data_iter, net):
    acc_sum, n = 0, 0
    for X, y in data_iter:
        acc_sum += (net(X).argmax() == y).sum().item()
        n += y.shape[0]
    return acc_sum / n

def train(net, train_iter, test_iter, loss, num_epochs, batch_size,
              optimizer=None):
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n = 0, 0, 0
        for X, y in train_iter:
            optimizer.zero_grad() # 全部清零
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            optimizer.step()
            
            train_l_sum += l.item()
            train_acc_sum += (y_hat.argmax() == y).sum().item()
            n += y.shape[0]
            
        test_acc = evaluate_accuracy(data_iter, net)
        printf('epoch %d, loss .4%f, train acc .3%f, test acc .3%f', 
               epoch, train_l_sum, train_acc_sum, test_acc)            

11.softmax回归的整个实现过程

  1. 定义模型:

\[y = WX +b \]

  1. 初始化参数
    num_inputs = 784
    num_outputs = 10
  2. 定义损失函数:
    交叉熵损失
  3. 定义优化算法:
    SGD
    lr = 1e-3
  4. 训练模型,导入数据,并调用1234

数据:

batch_size = 256
train_iter, test_iter = load_data_fashion_mnist(batch_size)
from torch import nn, optim
batch_size = 256
train_iter, test_iter = load_data_fashion_mnist(batch_size)

num_inputs = 784
num_outputs = 10

net = nn.Sequential(
    nn.Flatten(),
    nn.Linear(num_inputs, num_outputs)
)
loss = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=1e-3)

num_epochs = 5
train(net, train_iter, test_iter, loss, num_epochs, batch_size,
              optimizer=optimizer)

12.多层感知机(MLP)的整个实现过程

  1. 定义模型:

\[H = W1 X + b1 \\ Y = W2 H + b2 \]

激活函数:relu
2. 初始化参数
num_inputs, num_outputs, num_hiddens = 784, 10, 256
3. 定义损失函数:
交叉熵损失
4. 定义优化算法:
SGD
lr = 1e-3
5. 训练模型,导入数据,并调用1234

数据:

batch_size = 256
train_iter, test_iter = load_data_fashion_mnist(batch_size)
from torch import nn, optim
batch_size = 256
train_iter, test_iter = load_data_fashion_mnist(batch_size)

num_inputs, num_outputs, num_hiddens = 784, 10, 256
net = nn.Sequential(
    nn.Flatten(),
    nn.Linear(num_inputs, num_hiddens),
    nn.ReLU(),
    nn.Linear(num_hiddens, num_outputs)
)

loss = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=1e-3)

num_epochs = 5
train(net, train_iter, test_iter, loss, num_epochs, batch_size,
         optimizer)

13.减小过拟合的方法有哪些?

  • 权重衰减
  • 丢弃

14.写出添加了L2范数惩罚项的损失函数

def Loss(net, X, y, W, b, lambd):
def Loss(net, X, y, W, b, lambd):
    loss = ((net(X, W, b) - y)**2).sum()
    loss += lambd * (W**2).sum()
    return loss

15.给顶丢弃概率drop_prob, 写出dropout函数

def dropout(X, drop_prob):
def dropout(X, drop_prob):
    assert 0 <= drop_prob <= 1
    keep_prob = 1 - drop_prob
    
    if keep_prob == 0:
        return torch.zeros_like(X.shape)
    
    mask = (torch.rand(X.shape) < keep_prob).float()
    return mask * X 
posted @ 2020-03-26 22:51  hichens  阅读(265)  评论(0编辑  收藏  举报