Pytorch LSTM框架
在Pytorch中,我们会继承nn.Module来构建我们自己的类;
在这里,需要实现forward()方法,用于进行网络的前向传播,之后,你可以运行网络的前向传播如下所示:
1 #Define model 2 model = LSTM(...) 3 4 # Forward pass 5 y_pred = modelel(X_batch) #this is the same as model.forward(X_batch)
你完全可以自己手动实现LSTM,但是在此我们使用torch.nn.LSTM对象进行实现。
1 # Here we define our model as a class 2 3 class LSTM(nn.Module): 4 5 def __init__(self,input_dim, hidden_dim, batch_size,output_dim=1,num_layers=2): 6 super(LSTM, self).__init__() 7 self.input_dim = input_dim 8 self.hidden_dim = hidden_dim 9 self.batch_size = batch_size 10 self.num_layers = num_layers 11 12 # Define the LSTM layer 13 self.lstm = nn.LSTM(self.input_dim, self.hidden_dim, self.num_layers) 14 15 # Define the output layer 16 self.linear = nn.Linear(self.hidden_dim, self.output_dim) 17 18 def init_hidden(self): 19 # This is what we will initialise our hidden state as 20 return (torch.zeros(self.num_layers, self.batch_size, self.hidden_dim), 21 torch.zeros(self.num_layers,self.batch_size,self.hidden_dim)) 22 23 def forward(self, input): 24 # Forward pass through LSTM layer 25 #shape of lstm_out: [imput_size, batch_size, hidden_dim] 26 # shape of self.hidden: (a,b), where a and b both have shape (num_layers, batch_size, hidden_dim) 27 lstm_out, self.hidden = self.lstm(input.view(len(input), self.batch_size, -1)) 28 29 # Only take the output from the final timestep 30 # can pass on the entirety of lstm_out to the next layer if it is a seq2seq prediction 31 y_pred = self.linear(lstm_out[-1].view(self.batch_size,-1)) 32 return y_pred.view(-1) 33 34 model = LSTM(lstm_input_size, h1, batch_size=num_train, output_dim=output_dim,num_layers=num_layers)
模型的训练
1 #Training the LSTM 2 loss_fn = torch.nn.MSELoss(size_average=False) 3 4 optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) 5 6 ########Train model############### 7 hist = np.zeros(num_epochs) 8 9 for t in range(num_epochs): 10 # Clear stored gradient 11 model.zero_grad() 12 13 #Initialise hidden state 14 #Don't do this if you want your LSTM to be stateful 15 model.hidden = model.init_hidden() 16 17 # Forward pass 18 y_pred = model(X_train) 19 20 loss = loss_fn(y_pred,y_train) 21 if t%100 ==0: 22 print('EPOCH',t,',MSE',loss.item()) 23 24 hist[t] = loss.item() 25 26 # zero out gradient, else they will accumulate between epochs 27 optimizer.zero_grad() 28 29 # backward pass 30 loss.backward() 31 32 # update parameters 33 optimizer.step()