51序列模型

点击查看代码

import torch
from torch import nn
from d2l import torch as d2l

"""
𝑥𝑡∼𝑃(𝑥𝑡∣𝑥𝑡−1,…,𝑥1)
在时间t观察到𝑥𝑡，那么得到T个不独立的随机变量(𝑥1,…,xT)~p(x)
使用条件概率展开
p(a,b) = p(a)p(b|a) = p(b)p(a|b)

序列模型
当前数据跟之前观察到的数据相关
p(x) = p(x1)p(x2|x1)p(x3|x1,x2)…p(xT|x1,…xT-1)
对条件概率建模
p(xT|x1,…xT-1) = p(xT|f(x1,…xT-1))
                      对见过的数据建模，自回归模型：使用过去数据预测未来
                      
马尔可夫假设
假设当前数据只跟过去τ个过去数据点相关
p(xt|x1,…xt-1) = p(xt|xt-τ,…xt-1) = p(xt|f(xt-τ,…xt-1))
                                         在过去数据上训练一个MLP模型
                                         
潜变量模型
p(x) = p(x1)p(x2|x1)p(x3|x1,x2)…p(xT|x1,…xT-1)
引入潜变量ht表示过去信息ht = f(x1,...xt-1)
                      xt = p(xt|ht)          
"""

# 马尔可夫 MLP

# 使用正弦函数和噪声生成序列数据
# 时间为1-1000
T = 1000  # 总共产生1000个点
time = torch.arange(1, T + 1, dtype=torch.float32)
x = torch.sin(0.01 * time) + torch.normal(0, 0.2, (T,))
print('x.shape',x.shape)
# d2l.plot(time, [x], 'time', 'x', xlim=[1, 1000], figsize=(6, 3))
# d2l.plt.show()

# 将数据映射为数据对
# 将数据映射为数据对 𝑦𝑡=𝑥𝑡  和 𝐱𝑡=[𝑥𝑡−𝜏,…,𝑥𝑡−1]
tau = 4
features = torch.zeros((T - tau, tau))
print('features.shape', features.shape)
for i in range(tau):
    features[:, i] = x[i:T - tau + i]
    # print(features[:, i].shape)
    # print(x[i:T - tau + i].shape)
    # print(x[i:T - tau + i])
# print(features)
"""
0 1 2 3
1 2 3 4
...
995 996 997 998
"""
labels = x[tau:].reshape((-1, 1))
"""
4
...
999
"""
print('labels.shape', labels.shape)

batch_size, n_train = 16, 600
train_iter = d2l.load_array((features[:n_train], labels[:n_train]),
                            batch_size=batch_size, is_train=True)

# 多层感知机
# 权重初始化
def init_weights(m):
    if type(m) == nn.Linear:
        nn.init.xavier_uniform_(m.weight)

def get_net():
    net = nn.Sequential(
        nn.Linear(4, 10),
        nn.ReLU(),
        nn.Linear(10, 1)
    )
    # pytorch中的model.apply(fn)会递归地将函数fn应用到父模块的每个子模块submodule，
    # 也包括model这个父模块自身。
    net.apply(init_weights)
    return net

loss = nn.MSELoss()

# 训练
def train(net, train_iter, loss, epchos, lr):
    # 优化器
    trainer = torch.optim.Adam(net.parameters(), lr=lr)
    for epcho in range(epchos):
        for X, y in train_iter:
            trainer.zero_grad()
            l = loss(net(X), y)
            l.backward()
            trainer.step()
        print("epoch {}, loss: {}".format(epcho + 1, d2l.evaluate_loss(net, train_iter, loss)))

net = get_net()
num_epochs = 5
lr = 0.01
# train(net, train_iter, loss, epchos=num_epochs, lr=lr)
# torch.save(net.state_dict(), 'mlp51.params')
net.load_state_dict(torch.load('mlp51.params'))
# 预测下一个时间步
# 4 -> 1
onestep_preds = net(features)
print('onestep_preds.shape', onestep_preds.shape)
d2l.plot(
    [time, time[tau:]],
    [x.detach().numpy(), onestep_preds.detach().numpy()], 'time', 'x',
    legend=['data', '1-step preds'], xlim=[1, 1000], figsize=(6, 3)
)
# d2l.plt.show()

# 多步预测
multistep_preds = torch.zeros(T)
# 0-603
multistep_preds[:n_train + tau] = x[:n_train + tau]
# 604-999
for i in range(n_train + tau, T):
    # 604 | 600 601 602 603
    """
    604 | 600 601 602 603
    605 | 601 602 603 604
    ...
    999 | 995 996 997 998
    """
    multistep_preds[i] = net(multistep_preds[i - tau:i].reshape(1, -1))

d2l.plot([time, time[tau:], time[n_train + tau:]],
         [x.detach().numpy(), onestep_preds.detach().numpy(),
          multistep_preds[n_train + tau:].detach().numpy()], 'time',
         'x', legend=['data', '1-step preds', 'multistep preds'],
         xlim=[1, 1000], figsize=(6, 3))
d2l.plt.show()

# 难点：预测远的未来