transformers、torch train demo
通过 pytorch 训练模型的逻辑:
import torch.nn as nn import torch import numpy #from torch.utils.tensorboard import SummaryWriter import time vocabList = ["0","1","2","3","4","5","6","7","8","9"] class TwoLayerNet(nn.Module): def __init__(self, dim_in, dim_hide_1,dim_hide_2, dim_out): super(TwoLayerNet, self).__init__() self.linear1 = nn.Linear(dim_in, dim_hide_1, bias=True) self.linear2 = nn.Linear(dim_hide_1, dim_hide_2, bias=True) # 最后输出32维度 def forward(self, x): y_predict = self.linear2(self.linear1(x).clamp(min=0)) return y_predict if __name__ == "__main__": #writer = SummaryWriter('log') N = 5 # batch size D_in = 10 # 输入64 x 1000维 H1 = 10 # 100个隐藏单元 H2 = 15 # 100个隐藏单元 D_out = 1 # 输出100维 # 创建训练数据,这里是对训练数据进行随机初始化 x1_data = torch.randn(N, D_in) x2_data = torch.randn(N, D_in) y = torch.randn(N, 1) model = TwoLayerNet(D_in, H1, H2, D_out) # 模型 loss_fn = nn.MSELoss(reduction='sum') # 损失函数 optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) # 优化器 #writer.add_graph(model, input_to_model = torch.rand(5,10)) #writer.close() for t in range(5): print("Start train : ---------- ",t) y_query = model(x1_data) # 前向传播 y_title = model(x2_data) # 前向传播 logits = torch.cosine_similarity(y_query,y_title) loss = loss_fn(logits, y) # 计算损失 loss.backward() # 反向传播 optimizer.step() # 更新权重 print(t, loss.item()) # 打印 optimizer.zero_grad() # 把模型内参数的梯度清零 for name, param in model.named_parameters(): print(name) print(param) if param.grad is not None: print(param.grad) print(param.grad.shape) paramGradValue = [t.numpy() for t in param.grad] print(paramGradValue)
分类:
自然语言处理 / 深度学习
标签:
深度学习
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· TypeScript + Deepseek 打造卜卦网站:技术与玄学的结合
· Manus的开源复刻OpenManus初探
· AI 智能体引爆开源社区「GitHub 热点速览」
· 从HTTP原因短语缺失研究HTTP/2和HTTP/3的设计差异
· 三行代码完成国际化适配,妙~啊~