transformers、torch train demo

通过 pytorch 训练模型的逻辑:

import torch.nn as nn
import torch
import numpy
#from torch.utils.tensorboard import SummaryWriter
import time
vocabList = ["0","1","2","3","4","5","6","7","8","9"]

class TwoLayerNet(nn.Module):
    def __init__(self, dim_in, dim_hide_1,dim_hide_2, dim_out):
        super(TwoLayerNet, self).__init__()
        self.linear1 = nn.Linear(dim_in, dim_hide_1, bias=True)
        self.linear2 = nn.Linear(dim_hide_1, dim_hide_2, bias=True)
        # 最后输出32维度

    def forward(self, x):
        y_predict = self.linear2(self.linear1(x).clamp(min=0))
        return y_predict

if __name__ == "__main__":
    #writer = SummaryWriter('log')
    N = 5       # batch size
    D_in = 10 # 输入64 x 1000维
    H1 = 10      # 100个隐藏单元
    H2 = 15      # 100个隐藏单元
    D_out = 1   # 输出100维

    # 创建训练数据,这里是对训练数据进行随机初始化
    x1_data = torch.randn(N, D_in)
    x2_data = torch.randn(N, D_in)
    y = torch.randn(N, 1)

    model = TwoLayerNet(D_in, H1, H2, D_out)    # 模型
    loss_fn = nn.MSELoss(reduction='sum')  # 损失函数
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)  # 优化器
    #writer.add_graph(model, input_to_model = torch.rand(5,10))
    #writer.close()
    for t in range(5):
        print("Start train : ---------- ",t)
        y_query = model(x1_data)     # 前向传播
        y_title = model(x2_data)     # 前向传播
        logits = torch.cosine_similarity(y_query,y_title)
        loss = loss_fn(logits, y)  # 计算损失
        loss.backward()            # 反向传播
        optimizer.step()           # 更新权重
        print(t, loss.item())      # 打印
        optimizer.zero_grad()      # 把模型内参数的梯度清零
        for name, param in model.named_parameters():
            print(name)
            print(param)
            if param.grad is not None:
                print(param.grad)
                print(param.grad.shape)
                paramGradValue = [t.numpy() for t in param.grad]
                print(paramGradValue)

 

posted on 2024-04-16 18:17  细雨微光  阅读(12)  评论(0编辑  收藏  举报