pytorch(5) 多项式回归模型选择、欠拟合和过拟合 - MKT-porter

随笔- 1525 文章- 2 评论- 66 阅读- 127万

pytorch(5) 多项式回归模型选择、欠拟合和过拟合

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

import math
import numpy as np
import torch
from torch import nn
from d2l import torch as d2l
 
max_degree = 20 # 多项式的最大阶数
n_train, n_test = 100, 100 # 训练和测试数据集大小
true_w = np.zeros(max_degree) # 分配大量的空间  【0,0，。。。0】 w 200*1
true_w[0:4] = np.array([5, 1.2, -3.4, 5.6])  #[5,1.2,-3.4,5.6,0,0...,0] w 200*1 初始化前洗个参数 w0 w1 w2 w4
# 高斯随机数 200*1 均值0 方差1
features = np.random.normal(size=(n_train + n_test, 1))
#打乱随机
np.random.shuffle(features)
 
 
# 将features 按照后面的幂次逐级升高 
# features=[]
# features.append([1])
# features.append([2])
# print("features",features)
'''
features [[1], [2]]
poly_features [[     1      1      1      1      1      1      1      1      1      1
       1      1      1      1      1      1      1      1      1      1]
 [     1      2      4      8     16     32     64    128    256    512
    1024   2048   4096   8192  16384  32768  65536 131072 262144 524288]] (2, 20)
 
'''
# x0 + x1^1 +x2^2 +x^3+...x^n
poly_features = np.power(features, np.arange(max_degree).reshape(1, -1))
 
 
#print("poly_features",poly_features,poly_features.shape)
# #poly_features  [] (200, 20)
 
#poly_features （200，20） true_w【w1,w2,w3,...w20】
# 将 x^1 x^2 .. x^n  ==>  x^1/1!  x^2/2! ... x^n/n!
for i in range(max_degree):
    poly_features[:, i] /= math.gamma(i + 1) # gamma(n)=(n-1)!
# labels的维度:(n_train+n_test,)
 
#poly_features （200，20） true_w【w1,w2,w3,...w20】
# 点成  y1= 1*w0 x1^1/1!*w1   x1^2/2!*w2    x1^i/(i-1)!*wi    x1^19/19*w19! + 噪声
labels = np.dot(poly_features, true_w)
# labels 加上高斯噪声
labels += np.random.normal(scale=0.1, size=labels.shape)
 
 
# y = w0*1 + w1*x^1/1 + w2*x^2/2! + ... + w19 *x^19/19!
#true_w [w0,w2,...,w19]
#features[x1,x2,...,x200]
#poly_features  样本
#  1 x1^1/1!   x1^2/2!    x1^i/(i-1)!    x1^19/19!
#  1 x2^1/1!   x2^2/2!    x2^i/(i-1)!    x2^19/19!
#...
#  1+ x200^1/1! x200^2/2!  x200^i/(i-1)!  x200^19/19!
#labels + 噪声
#  1*w0 x1^1/1!*w1   x1^2/2!*w2    x1^i/(i-1)!*wi    x1^19/19*w19! + 噪声
# 
#...
#  1*w0 x200^1/1!*w1 x200^2/2!*w2   x200^i/(i-1)!*wi   x200^19/19!*w19 + 噪声
# NumPy ndarray转换为tensor
true_w, features, poly_features, labels = [torch.tensor(x, dtype=
torch.float32) for x in [true_w, features, poly_features, labels]]
 
 
def evaluate_loss(net, data_iter, loss): #@save
    """评估给定数据集上模型的损失"""
    metric = d2l.Accumulator(2) # 损失的总和,样本数量
    for X, y in data_iter:
        out = net(X)
        y = y.reshape(out.shape)
        l = loss(out, y)
        metric.add(l.sum(), l.numel())
    return metric[0] / metric[1]
 
 
 
def train(train_features, test_features, train_labels, test_labels,num_epochs=400):
    loss = nn.MSELoss(reduction='none')
    input_shape = train_features.shape[-1]
    # 不设置偏置，因为我们已经在多项式中实现了它
    # 1 网络模型 input_shape * 1 
    net = nn.Sequential(nn.Linear(input_shape, 1, bias=False))
    # 每次训练批次大小  最大10
    batch_size = min(10, train_labels.shape[0])
    # 训练集 原数据x 真值y
    train_iter = d2l.load_array((train_features, train_labels.reshape(-1,1)),batch_size)
    # 测试集 原数据x 真值y
    test_iter = d2l.load_array((test_features, test_labels.reshape(-1,1)),batch_size, is_train=False)
    # 跟新步长策略
    trainer = torch.optim.SGD(net.parameters(), lr=0.01)
    #画图
    animator = d2l.Animator(xlabel='epoch', ylabel='loss', yscale='log',
    xlim=[1, num_epochs], ylim=[1e-3, 1e2],
    legend=['train', 'test'])
    # 开始训练
    for epoch in range(num_epochs): #迭代400次
        d2l.train_epoch_ch3(net, train_iter, loss, trainer)
        if epoch == 0 or (epoch + 1) % 20 == 0:
            animator.add(epoch + 1, (evaluate_loss(net, train_iter, loss),evaluate_loss(net, test_iter, loss)))
    print('weight:', net[0].weight.data.numpy())
 
 
# 从多项式特征中选择前4个维度，即1,x,x^2/2!,x^3/3!
#n_train=100
# 训练集X poly_features[:n_train, :4] 使用前100个训练数据 前四列  X1-X100
# 测试集X  poly_features[n_train:, :4] 使用后100个训练数据 前四列  X100-X200
# 训练集真值Y train_labels labels[:n_train] 使用前100个训练数据的真值y(加了高斯误差) 前四列    Y1-100  
# 测试集真值Y test_labels labels[n_train:]  使用后100个训练数据的真值y(加了高斯误差) 前四列    Y1-100  
train(poly_features[:n_train, :4], poly_features[n_train:, :4],labels[:n_train], labels[n_train:])
 
# y = w0*1 + w1*x^1/1 + w2*x^2/2! + ... + w19 *x^19/19!
#true_w [w0,w2,...,w19]
#features[x1,x2,...,x200]
#poly_features  样本  200*20
#  1 x1^1/1!   x1^2/2!    x1^i/(i-1)!    x1^19/19!  =》 X1
#  1 x2^1/1!   x2^2/2!    x2^i/(i-1)!    x2^19/19!  =》 X2
#... 
#  1+ x200^1/1! x200^2/2!  x200^i/(i-1)!  x200^19/19! =》 X200
#labels + 噪声
#  1*w0 x1^1/1!*w1   x1^2/2!*w2    x1^i/(i-1)!*wi    x1^19/19*w19! + 噪声     Y1
# 
#...
#  1*w0 x200^1/1!*w1 x200^2/2!*w2   x200^i/(i-1)!*wi   x200^19/19!*w19 + 噪声 Y200

模型复杂度

我们应该选择一个复杂度适当的模型，避免使用数量不足的训练样本。

数据集大小

另一个重要因素是数据集的大小。训练数据集中的样本越少，我们就有可能（而且更严重地）过度增加。训练随着数据量的增加，泛化投影通常会减少。另外，一般来说，更多的数据不会有什么坏处。对于固定的任务和数据分布，我们的模型复杂性和数据集大小之间通常存在关系。给出更多的数据，可以尝试构造一个更复杂的模型。能够构建更复杂的模型可能是有价值的。如果没有足够的数据，简单的模型可能更有用。对于许多任务，深度学习只有在有数千个训练样本时才可以构建线性模型从一定的编程来说，深度学习目前的生机要满足廉价存储、互联设备数字化经济带来的海量数据集。