pytorch --Rnn语言模型(LSTM,BiLSTM) -- 《Recurrent neural network based language model》
论文通过实现RNN来完成了文本分类。
论文地址:88888888
模型结构图:
原理自行参考论文,code and comment(https://github.com/graykode/nlp-tutorial):
1 # -*- coding: utf-8 -*- 2 # @time : 2019/11/9 15:12 3 4 import numpy as np 5 import torch 6 import torch.nn as nn 7 import torch.optim as optim 8 from torch.autograd import Variable 9 10 dtype = torch.FloatTensor 11 12 sentences = [ "i like dog", "i love coffee", "i hate milk"] 13 14 word_list = " ".join(sentences).split() 15 word_list = list(set(word_list)) 16 word_dict = {w: i for i, w in enumerate(word_list)} 17 number_dict = {i: w for i, w in enumerate(word_list)} 18 n_class = len(word_dict) 19 20 # TextRNN Parameter 21 batch_size = len(sentences) 22 n_step = 2 # number of cells(= number of Step) 23 n_hidden = 5 # number of hidden units in one cell 24 25 def make_batch(sentences): 26 input_batch = [] 27 target_batch = [] 28 29 for sen in sentences: 30 word = sen.split() 31 input = [word_dict[n] for n in word[:-1]] 32 target = word_dict[word[-1]] 33 34 input_batch.append(np.eye(n_class)[input]) 35 target_batch.append(target) 36 37 return input_batch, target_batch 38 39 # to Torch.Tensor 40 input_batch, target_batch = make_batch(sentences) 41 input_batch = Variable(torch.Tensor(input_batch)) 42 target_batch = Variable(torch.LongTensor(target_batch)) 43 44 class TextRNN(nn.Module): 45 def __init__(self): 46 super(TextRNN, self).__init__() 47 48 self.rnn = nn.RNN(input_size=n_class, hidden_size=n_hidden,batch_first=True) 49 self.W = nn.Parameter(torch.randn([n_hidden, n_class]).type(dtype)) 50 self.b = nn.Parameter(torch.randn([n_class]).type(dtype)) 51 52 def forward(self, hidden, X): 53 if self.rnn.batch_first == True: 54 # X [batch_size,time_step,word_vector] 55 outputs, hidden = self.rnn(X, hidden) 56 57 # outputs [batch_size, time_step, hidden_size*num_directions] 58 output = outputs[:, -1, :] # [batch_size, num_directions(=1) * n_hidden] 59 model = torch.mm(output, self.W) + self.b # model : [batch_size, n_class] 60 return model 61 else: 62 X = X.transpose(0, 1) # X : [n_step, batch_size, n_class] 63 outputs, hidden = self.rnn(X, hidden) 64 # outputs : [n_step, batch_size, num_directions(=1) * n_hidden] 65 # hidden : [num_layers(=1) * num_directions(=1), batch_size, n_hidden] 66 67 output = outputs[-1,:,:] # [batch_size, num_directions(=1) * n_hidden] 68 model = torch.mm(output, self.W) + self.b # model : [batch_size, n_class] 69 return model 70 71 model = TextRNN() 72 73 criterion = nn.CrossEntropyLoss() 74 optimizer = optim.Adam(model.parameters(), lr=0.001) 75 76 # Training 77 for epoch in range(5000): 78 optimizer.zero_grad() 79 80 # hidden : [num_layers * num_directions, batch, hidden_size] 81 hidden = Variable(torch.zeros(1, batch_size, n_hidden)) 82 # input_batch : [batch_size, n_step, n_class] 83 output = model(hidden, input_batch) 84 85 # output : [batch_size, n_class], target_batch : [batch_size] (LongTensor, not one-hot) 86 loss = criterion(output, target_batch) 87 if (epoch + 1) % 1000 == 0: 88 print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss)) 89 90 loss.backward() 91 optimizer.step() 92 93 94 # Predict 95 hidden_initial = Variable(torch.zeros(1, batch_size, n_hidden)) 96 predict = model(hidden_initial, input_batch).data.max(1, keepdim=True)[1] 97 print([sen.split()[:2] for sen in sentences], '->', [number_dict[n.item()] for n in predict.squeeze()])
LSTM unit的RNN模型:
1 import numpy as np 2 import torch 3 import torch.nn as nn 4 import torch.optim as optim 5 from torch.autograd import Variable 6 7 dtype = torch.FloatTensor 8 9 char_arr = [c for c in 'abcdefghijklmnopqrstuvwxyz'] 10 word_dict = {n: i for i, n in enumerate(char_arr)} 11 number_dict = {i: w for i, w in enumerate(char_arr)} 12 n_class = len(word_dict) # number of class(=number of vocab) 13 14 seq_data = ['make', 'need', 'coal', 'word', 'love', 'hate', 'live', 'home', 'hash', 'star'] 15 16 # TextLSTM Parameters 17 n_step = 3 18 n_hidden = 128 19 20 21 def make_batch(seq_data): 22 input_batch, target_batch = [], [] 23 24 for seq in seq_data: 25 input = [word_dict[n] for n in seq[:-1]] # 'm', 'a' , 'k' is input 26 target = word_dict[seq[-1]] # 'e' is target 27 input_batch.append(np.eye(n_class)[input]) 28 target_batch.append(target) 29 30 return Variable(torch.Tensor(input_batch)), Variable(torch.LongTensor(target_batch)) 31 32 33 class TextLSTM(nn.Module): 34 def __init__(self): 35 super(TextLSTM, self).__init__() 36 37 self.lstm = nn.LSTM(input_size=n_class, hidden_size=n_hidden) 38 self.W = nn.Parameter(torch.randn([n_hidden, n_class]).type(dtype)) 39 self.b = nn.Parameter(torch.randn([n_class]).type(dtype)) 40 41 def forward(self, X): 42 input = X.transpose(0, 1) # X : [n_step, batch_size, n_class] 43 44 hidden_state = Variable( 45 torch.zeros(1, len(X), n_hidden)) # [num_layers(=1) * num_directions(=1), batch_size, n_hidden] 46 cell_state = Variable( 47 torch.zeros(1, len(X), n_hidden)) # [num_layers(=1) * num_directions(=1), batch_size, n_hidden] 48 49 outputs, (_, _) = self.lstm(input, (hidden_state, cell_state)) 50 outputs = outputs[-1] # [batch_size, n_hidden] 51 model = torch.mm(outputs, self.W) + self.b # model : [batch_size, n_class] 52 return model 53 54 55 input_batch, target_batch = make_batch(seq_data) 56 57 model = TextLSTM() 58 59 criterion = nn.CrossEntropyLoss() 60 optimizer = optim.Adam(model.parameters(), lr=0.001) 61 62 # Training 63 for epoch in range(1000): 64 65 output = model(input_batch) 66 loss = criterion(output, target_batch) 67 if (epoch + 1) % 100 == 0: 68 print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss)) 69 optimizer.zero_grad() 70 loss.backward() 71 optimizer.step() 72 73 inputs = [sen[:3] for sen in seq_data] 74 75 predict = model(input_batch).data.max(1, keepdim=True)[1] 76 print(inputs, '->', [number_dict[n.item()] for n in predict.squeeze()])
BiLSTM RNN model:
1 import numpy as np 2 import torch 3 import torch.nn as nn 4 import torch.optim as optim 5 from torch.autograd import Variable 6 import torch.nn.functional as F 7 8 dtype = torch.FloatTensor 9 10 sentence = ( 11 'Lorem ipsum dolor sit amet consectetur adipisicing elit ' 12 'sed do eiusmod tempor incididunt ut labore et dolore magna ' 13 'aliqua Ut enim ad minim veniam quis nostrud exercitation' 14 ) 15 16 word_dict = {w: i for i, w in enumerate(list(set(sentence.split())))} 17 number_dict = {i: w for i, w in enumerate(list(set(sentence.split())))} 18 n_class = len(word_dict) 19 max_len = len(sentence.split()) 20 n_hidden = 5 21 22 def make_batch(sentence): 23 input_batch = [] 24 target_batch = [] 25 26 words = sentence.split() 27 for i, word in enumerate(words[:-1]): 28 input = [word_dict[n] for n in words[:(i + 1)]] 29 input = input + [0] * (max_len - len(input)) 30 target = word_dict[words[i + 1]] 31 input_batch.append(np.eye(n_class)[input]) 32 target_batch.append(target) 33 34 return Variable(torch.Tensor(input_batch)), Variable(torch.LongTensor(target_batch)) 35 36 class BiLSTM(nn.Module): 37 def __init__(self): 38 super(BiLSTM, self).__init__() 39 40 self.lstm = nn.LSTM(input_size=n_class, hidden_size=n_hidden, bidirectional=True) 41 self.W = nn.Parameter(torch.randn([n_hidden * 2, n_class]).type(dtype)) 42 self.b = nn.Parameter(torch.randn([n_class]).type(dtype)) 43 44 def forward(self, X): 45 input = X.transpose(0, 1) # input : [n_step, batch_size, n_class] 46 47 hidden_state = Variable(torch.zeros(1*2, len(X), n_hidden)) # [num_layers(=1) * num_directions(=1), batch_size, n_hidden] 48 cell_state = Variable(torch.zeros(1*2, len(X), n_hidden)) # [num_layers(=1) * num_directions(=1), batch_size, n_hidden] 49 50 outputs, (_, _) = self.lstm(input, (hidden_state, cell_state)) 51 outputs = outputs[-1] # [batch_size, n_hidden] 52 model = torch.mm(outputs, self.W) + self.b # model : [batch_size, n_class] 53 return model 54 55 input_batch, target_batch = make_batch(sentence) 56 57 model = BiLSTM() 58 59 criterion = nn.CrossEntropyLoss() 60 optimizer = optim.Adam(model.parameters(), lr=0.001) 61 62 # Training 63 for epoch in range(10000): 64 output = model(input_batch) 65 loss = criterion(output, target_batch) 66 if (epoch + 1) % 1000 == 0: 67 print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss)) 68 69 optimizer.zero_grad() 70 loss.backward() 71 optimizer.step() 72 73 predict = model(input_batch).data.max(1, keepdim=True)[1] 74 print(sentence) 75 print([number_dict[n.item()] for n in predict.squeeze()])
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 从 HTTP 原因短语缺失研究 HTTP/2 和 HTTP/3 的设计差异
· AI与.NET技术实操系列:向量存储与相似性搜索在 .NET 中的实现
· 基于Microsoft.Extensions.AI核心库实现RAG应用
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· 开发者必知的日志记录最佳实践
· winform 绘制太阳,地球,月球 运作规律
· AI与.NET技术实操系列(五):向量存储与相似性搜索在 .NET 中的实现
· 超详细:普通电脑也行Windows部署deepseek R1训练数据并当服务器共享给他人
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
· 上周热点回顾(3.3-3.9)