9. 使用 GPU 进行训练,并对训练后的模型进行测试
正文
在上一篇博客中,我们快速搭建,训练了一个小型的网络,但是存在一下问题。
- 仅仅是使用了 CPU,并没有使用 GPU 进行训练;
- 学习率太高,导致最后数值提不上去;
针对这2个问题,我们进行统一的解决。
并最后写一个 detect 模块,将我们写出的网络进行应用。
pytorch 使用 GPU 进行训练
在代码中,修改训练设备为 GPU 较为简单,主要有两种方法,而且主要是对 3 个对象进行使用
主要是 模型、数据和损失函数
使用 .cude() 方法
简述几个修改的地方
启动GPU if torch.cuda.is_available(): images = images.cuda() targets = targets.cuda() my_model = my_model_cuda() my_loss_fn = my_loss_fn.cuda() 修改学习率 learning_rate = 1e-3
修改之后的代码
import torch import torchvision import torch.nn as nn from torch.utils.data import DataLoader from torch.utils.tensorboard import SummaryWriter import time # prepare the data for training and testing data_path = "../data_cifar10" dataset_train = torchvision.datasets.CIFAR10(root=data_path, train=True, transform=torchvision.transforms.ToTensor(), download=True) dataset_test = torchvision.datasets.CIFAR10(root=data_path, train=False, transform=torchvision.transforms.ToTensor(), download=True) # dataset_train.cuda(),不存在该方法,直接报错 dataloader_train = DataLoader(dataset_train, batch_size=64) dataloader_test = DataLoader(dataset_test, batch_size=64) # dataloader_train.cuda() 直接就是不存在该方法 # create the module class class MyModel(nn.Module): def __init__(self): super(MyModel, self).__init__() self.model = nn.Sequential( nn.Conv2d(in_channels=3, out_channels=32, kernel_size=5, stride=1, padding=2), nn.MaxPool2d(kernel_size=2), nn.Conv2d(in_channels=32, out_channels=32, kernel_size=5, stride=1, padding=2), nn.MaxPool2d(kernel_size=2), nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, stride=1, padding=2), nn.MaxPool2d(kernel_size=2), nn.Flatten(), nn.Linear(in_features=1024, out_features=64), nn.Linear(in_features=64, out_features=10) ) def forward(self, x): return self.model(x) # create the neural network, loss_function and optimization logdir = "../logs" writer = SummaryWriter(log_dir=logdir) learning_rate = 1e-3 my_model = MyModel() if torch.cuda.is_available(): my_model = my_model.cuda() my_loss_fn = torch.nn.CrossEntropyLoss() if torch.cuda.is_available(): my_loss_fn = my_loss_fn.cuda() my_optimization = torch.optim.SGD(my_model.parameters(), lr=learning_rate) # my_optimization = my_optimization.cuda() max_epoch = 200 train_step = 0 test_step = 0 train_size = len(dataset_train) test_size = len(dataset_test) start_time = time.time() for epoch in range(max_epoch): print("-------Epoch {}-------".format(epoch)) # train loss_sum = 0.0 train_step = 0 my_model.train() for images, targets in dataloader_train: if torch.cuda.is_available(): images = images.cuda() targets = targets.cuda() output = my_model(images) cur_loss = my_loss_fn(output, targets) loss_sum += cur_loss # optimize the model parameters my_optimization.zero_grad() cur_loss.backward() my_optimization.step() if train_step % 100 == 0: print(f"epoch:{epoch}, train_step:{train_step}, cur_loss:{cur_loss}") train_step += 1 writer.add_scalar("epoch:train_loss", loss_sum, epoch) print(f"--epoch {epoch}:train_loss {loss_sum}") # test my_model.eval() right_classify_cnt = 0 loss_sum = 0.0 with torch.no_grad(): for images, targets in dataloader_test: if torch.cuda.is_available(): images = images.cuda() targets = targets.cuda() output = my_model(images) cur_loss = my_loss_fn(output, targets) loss_sum += cur_loss right_classify_cnt += (output.argmax(dim=1) == targets).sum() writer.add_scalar("epoch:test_loss", loss_sum, epoch) writer.add_scalar("epoch:test_accuracy", right_classify_cnt / test_size, epoch) print(f"--epoch {epoch}:test_loss {loss_sum}") print(f"--epoch {epoch}:test_accuracy {right_classify_cnt / test_size}") end_time = time.time() print(f"#### my_time:{end_time - start_time} if epoch % 5 == 0: torch.save(my_model.state_dict(), f="./epoch_1_{}.pth".format(epoch)) writer.close()
使用 .to() 方法
该方法是最为常用的方法,比.cuda() 方法需要匹配上的 if torch.cuda.is_available()
方便多了
举个例子
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") images = images.to(device) targets = targets.to(device) my_model = my_model.to(device) my_loss_fn = my_loss_fn.to(device)
修改之后的代码是
import torch import torchvision import torch.nn as nn from torch.utils.data import DataLoader from torch.utils.tensorboard import SummaryWriter import time # prepare the data for training and testing device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") data_path = "../data_cifar10" dataset_train = torchvision.datasets.CIFAR10(root=data_path, train=True, transform=torchvision.transforms.ToTensor(), download=True) dataset_test = torchvision.datasets.CIFAR10(root=data_path, train=False, transform=torchvision.transforms.ToTensor(), download=True) # dataset_train.cuda(),不存在该方法,直接报错 dataloader_train = DataLoader(dataset_train, batch_size=64) dataloader_test = DataLoader(dataset_test, batch_size=64) # dataloader_train.cuda() 直接就是不存在该方法 # create the module class class MyModel(nn.Module): def __init__(self): super(MyModel, self).__init__() self.model = nn.Sequential( nn.Conv2d(in_channels=3, out_channels=32, kernel_size=5, stride=1, padding=2), nn.MaxPool2d(kernel_size=2), nn.Conv2d(in_channels=32, out_channels=32, kernel_size=5, stride=1, padding=2), nn.MaxPool2d(kernel_size=2), nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, stride=1, padding=2), nn.MaxPool2d(kernel_size=2), nn.Flatten(), nn.Linear(in_features=1024, out_features=64), nn.Linear(in_features=64, out_features=10) ) def forward(self, x): return self.model(x) # create the neural network, loss_function and optimization logdir = "../logs" writer = SummaryWriter(log_dir=logdir) learning_rate = 1e-3 my_model = MyModel().to(device) my_loss_fn = torch.nn.CrossEntropyLoss().to(device) my_optimization = torch.optim.SGD(my_model.parameters(), lr=learning_rate) max_epoch = 200 train_step = 0 test_step = 0 train_size = len(dataset_train) test_size = len(dataset_test) start_time = time.time() for epoch in range(max_epoch): print("-------Epoch {}-------".format(epoch)) # train loss_sum = 0.0 train_step = 0 my_model.train() for images, targets in dataloader_train: images = images.to(device) targets = targets.to(device) output = my_model(images) cur_loss = my_loss_fn(output, targets) loss_sum += cur_loss # optimize the model parameters my_optimization.zero_grad() cur_loss.backward() my_optimization.step() if train_step % 100 == 0: print(f"epoch:{epoch}, train_step:{train_step}, cur_loss:{cur_loss}") train_step += 1 writer.add_scalar("epoch:train_loss", loss_sum, epoch) print(f"--epoch {epoch}:train_loss {loss_sum}") # test my_model.eval() right_classify_cnt = 0 loss_sum = 0.0 with torch.no_grad(): for images, targets in dataloader_test: images = images.to(device) targets = targets.to(device) output = my_model(images) cur_loss = my_loss_fn(output, targets) loss_sum += cur_loss right_classify_cnt += (output.argmax(dim=1) == targets).sum() writer.add_scalar("epoch:test_loss", loss_sum, epoch) writer.add_scalar("epoch:test_accuracy", right_classify_cnt / test_size, epoch) print(f"--epoch {epoch}:test_loss {loss_sum}") print(f"--epoch {epoch}:test_accuracy {right_classify_cnt / test_size}") end_time = time.time() print(f"#### my_time:{end_time - start_time} if epoch % 5 == 0: torch.save(my_model.state_dict(), f="./epoch_2_{}.pth".format(epoch)) writer.close()
对网络进行应用
有条件的同学,可以考虑写一个可视化的界面,这里我先写一个没有界面的 cmd 进行测试
无非就是加载模型,然后再进行应用。
不过这里,我们需要首先对输入的图片进行规范化
Image 读取图片,然后 ToTensor()转化为tensor数据类型
然后 to(device) 放入 CPU 或者是 GPU
进行 convert格式转化
然后进行 resize 裁剪图片,送到我们的网络中去。
最后网络输出结果->(可以考虑使用字典记性映射):
这里,可以查看下convert 和 resize 的作用
import torch import torchvision import torch.nn as nn from mymodel import * from torch.utils.tensorboard import SummaryWriter from PIL import Image device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # 加载模型 my_model = MyModel() my_model.to(device) my_model.load_state_dict(torch.load("./project_models/epoch_1_195.pth", map_location=torch.device('cpu'))) # 注意这个报错问题 # 加载代为验证的图像 my_model.eval() image_class_list = [ 'airplane', 'frog', 'dog', 'cat' ] idx_to_class = { 0:'airplane', 1:'automobile', 2:'bird', 3:'cat', 4:'deer', 5:'dog', 6:'frog', 7:'horse', 8:'ship', 9:'truck' } with torch.no_grad(): for i in range(0, 4): image = Image.open(f"../test_data/img_{i}.png") image = image.convert("RGB") image = image.resize((32, 32)) trans_totensor = torchvision.transforms.ToTensor() image = trans_totensor(image) image.to(device) image = torch.reshape(image, (1, 3, 32, 32)) output = my_model(image) cur_class = output.argmax(dim=1) print(f"image:{i} ({image_class_list[i]})-> output:{cur_class}({idx_to_class[cur_class.item()]})")
关于硬件资源问题
很多同学可能像我一样,电脑没有GPU,或者是GPU不是NAVDIA、或者是驱动过于老旧并且无法更新,那么GPU如何获取呢?
google colab
提供了免费的算力资源,可以进行使用,但是你需要google
账号,可以去 某宝 进行获取。
分类:
深度学习 Pytorch
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· AI与.NET技术实操系列:向量存储与相似性搜索在 .NET 中的实现
· 基于Microsoft.Extensions.AI核心库实现RAG应用
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· 开发者必知的日志记录最佳实践
· SQL Server 2025 AI相关能力初探
· winform 绘制太阳,地球,月球 运作规律
· AI与.NET技术实操系列(五):向量存储与相似性搜索在 .NET 中的实现
· 超详细:普通电脑也行Windows部署deepseek R1训练数据并当服务器共享给他人
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
· 上周热点回顾(3.3-3.9)