import torch
import torch.nn as nn
import time
import math
import sys
sys.path.append("/home/kesci/input")import d2l_jay9460 as d2l
(corpus_indices, char_to_idx, idx_to_char, vocab_size)= d2l.load_data_jay_lyrics()
device = torch.device('cuda'if torch.cuda.is_available()else'cpu')
input of shape (num_steps, batch_size, input_size): tensor containing the features of the input sequence.
h_0 of shape (num_layers * num_directions, batch_size, hidden_size): tensor containing the initial hidden state for each element in the batch. Defaults to zero if not provided. If the RNN is bidirectional, num_directions should be 2, else it should be 1.
forward函数的返回值是:
output of shape (num_steps, batch_size, num_directions * hidden_size): tensor containing the output features (h_t) from the last layer of the RNN, for each t.
h_n of shape (num_layers * num_directions, batch_size, hidden_size): tensor containing the hidden state for t = num_steps.
现在我们构造一个nn.RNN实例,并用一个简单的例子来看一下输出的形状。
rnn_layer = nn.RNN(input_size=vocab_size, hidden_size=num_hiddens)
num_steps, batch_size =35,2
X = torch.rand(num_steps, batch_size, vocab_size)
state =None
Y, state_new = rnn_layer(X, state)print(Y.shape, state_new.shape)
def predict_rnn_pytorch(prefix, num_chars, model, vocab_size, device, idx_to_char,
char_to_idx):
state =None
output =[char_to_idx[prefix[0]]]# output记录prefix加上预测的num_chars个字符for t in range(num_chars + len(prefix)-1):
X = torch.tensor([output[-1]], device=device).view(1,1)(Y, state)= model(X, state)# 前向计算不需要传入模型参数if t < len(prefix)-1:
output.append(char_to_idx[prefix[t +1]])else:
output.append(Y.argmax(dim=1).item())return''.join([idx_to_char[i]for i in output])
使用权重为随机值的模型来预测一次。
model =RNNModel(rnn_layer, vocab_size).to(device)
predict_rnn_pytorch('分开',10, model, vocab_size, device, idx_to_char, char_to_idx)
'分开胸呵以轮轮轮轮轮轮轮'
接下来实现训练函数,这里只使用了相邻采样。
def train_and_predict_rnn_pytorch(model, num_hiddens, vocab_size, device,
corpus_indices, idx_to_char, char_to_idx,
num_epochs, num_steps, lr, clipping_theta,
batch_size, pred_period, pred_len, prefixes):
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
model.to(device)for epoch in range(num_epochs):
l_sum, n, start =0.0,0, time.time()
data_iter = d2l.data_iter_consecutive(corpus_indices, batch_size, num_steps, device)# 相邻采样
state =Nonefor X, Y in data_iter:if state isnotNone:# 使用detach函数从计算图分离隐藏状态if isinstance (state, tuple):# LSTM, state:(h, c)
state[0].detach_()
state[1].detach_()else:
state.detach_()(output, state)= model(X, state)# output.shape: (num_steps * batch_size, vocab_size)
y = torch.flatten(Y.T)
l = loss(output, y.long())
optimizer.zero_grad()
l.backward()
grad_clipping(model.parameters(), clipping_theta, device)
optimizer.step()
l_sum += l.item()* y.shape[0]
n += y.shape[0]if(epoch +1)% pred_period ==0:print('epoch %d, perplexity %f, time %.2f sec'%(
epoch +1, math.exp(l_sum / n), time.time()- start))for prefix in prefixes:print(' -', predict_rnn_pytorch(
prefix, pred_len, model, vocab_size, device, idx_to_char,
char_to_idx))
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· AI与.NET技术实操系列:向量存储与相似性搜索在 .NET 中的实现
· 基于Microsoft.Extensions.AI核心库实现RAG应用
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· 开发者必知的日志记录最佳实践
· SQL Server 2025 AI相关能力初探
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了
· winform 绘制太阳,地球,月球 运作规律
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
· 上周热点回顾(3.3-3.9)
· 超详细:普通电脑也行Windows部署deepseek R1训练数据并当服务器共享给他人