点击查看代码
import torch
from torch import nn
from torch.nn import functional as F
from d2l import torch as d2l
batch_size, num_steps = 32, 35
train_iter, vocab = d2l.load_data_time_machine(batch_size, num_steps)
num_hiddens = 256
"""
input_size – The number of expected features in the input x
hidden_size – The number of features in the hidden state h
num_layers – Number of recurrent layers.
E.g., setting num_layers=2 would mean stacking two RNNs
together to form a stacked RNN, with the second RNN taking
in outputs of the first RNN and computing the final results.
Default: 1
nonlinearity – The non-linearity to use.
Can be either 'tanh' or 'relu'.
Default: 'tanh'
bias – If False, then the layer does not use bias weights b_ih and b_hh.
Default: True
batch_first – If True, then the input and output tensors are provided as
(batch, seq, feature) instead of (seq, batch, feature).
Note that this does not apply to hidden or cell states.
See the Inputs/Outputs sections below for details.
Default: False (seq, batch, feature)
dropout – If non-zero, introduces a Dropout layer on the outputs of each RNN layer
except the last layer, with dropout probability equal to dropout.
Default: 0
bidirectional – If True, becomes a bidirectional RNN.
Default: False
"""
rnn_layer = nn.RNN(len(vocab), num_hiddens)
state = torch.zeros((1, batch_size, num_hiddens))
X = torch.rand(size=(num_steps, batch_size, len(vocab)))
Y, state_new = rnn_layer(X, state)
print(Y.shape, state_new.shape)
"""
(seq, batch, feature)
torch.Size([35, 32, 256]) torch.Size([1, 32, 256])
"""
class RNNModel(nn.Module):
"""循环神经网络模型"""
def __init__(self, rnn_layer, vocab_size, **kwargs):
super(RNNModel, self).__init__(**kwargs)
self.rnn = rnn_layer
self.vocab_size = vocab_size
self.num_hiddens = self.rnn.hidden_size
if not self.rnn.bidirectional:
self.num_directions = 1
self.linear = nn.Linear(self.num_hiddens, self.vocab_size)
else:
self.num_directions = 2
self.linear = nn.Linear(self.num_hiddens * 2, self.vocab_size)
def forward(self, inputs, state):
X = F.one_hot(inputs.T.long(), self.vocab_size)
X = X.to(torch.float32)
Y, state = self.rnn(X, state)
output = self.linear(Y.reshape((-1, Y.shape[-1])))
return output, state
def begin_state(self, device, batch_size=1):
if not isinstance(self.rnn, nn.LSTM):
return torch.zeros((self.num_directions * self.rnn.num_layers,
batch_size, self.num_hiddens),
device=device)
else:
return (torch.zeros((
self.num_directions * self.rnn.num_layers,
batch_size, self.num_hiddens), device=device),
torch.zeros((
self.num_directions * self.rnn.num_layers,
batch_size, self.num_hiddens), device=device))
device = d2l.try_gpu()
net = RNNModel(rnn_layer, vocab_size=len(vocab))
net = net.to(device)
re = d2l.predict_ch8('time traveller', 10, net, vocab, device)
print(re)
num_epochs, lr = 500, 1
clone = RNNModel(rnn_layer, vocab_size=len(vocab))
clone = clone.to(device)
clone.load_state_dict(torch.load('rnn.params'))
re = d2l.predict_ch8('time traveller', 10, net, vocab, device)
print(re)
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了
· 别再用vector<bool>了!Google高级工程师:这可能是STL最大的设计失误
· 单元测试从入门到精通
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
· 上周热点回顾(3.3-3.9)