深度学习(LeNet)
业余时间重新学习一下深度学习,先从基础网络开始,一点一点积累。
Lenet网络模型:
下面程序中输入的数据是28*28的,结构和原始稍微有点不一样。
训练代码:
import torch import torch.nn as nn import torch.optim as optim from torch.utils.data import Dataset, DataLoader from torchvision.transforms import ToTensor from PIL import Image # 自定义LeNet模型 class LeNet(nn.Module): def __init__(self): super(LeNet, self).__init__() self.conv1 = nn.Conv2d(1, 6, kernel_size=5) self.conv2 = nn.Conv2d(6, 16, kernel_size=5) self.fc1 = nn.Linear(16*4*4, 120) self.fc2 = nn.Linear(120, 84) self.fc3 = nn.Linear(84, 10) def forward(self, x): x = torch.relu(self.conv1(x)) #28*28 -> 6*24*24 x = torch.max_pool2d(x, 2) #6*24*24 -> 6*12*12 x = torch.relu(self.conv2(x)) #6*12*12 -> 16*8*8 x = torch.max_pool2d(x, 2) #16*8*8 -> 16*4*4 x = x.view(x.size(0), -1) #16*4*4 -> 256 x = torch.relu(self.fc1(x)) #256 -> 120 x = torch.relu(self.fc2(x)) #120 -> 84 x = self.fc3(x) #84 -> 10 return x # 自定义数据集类 class MNISTDataset(Dataset): def __init__(self, image_folder, label_file, transform=None): self.image_folder = image_folder self.label_file = label_file self.transform = transform self.labels = self.load_labels() def load_labels(self): labels = [] with open(self.label_file, 'r') as f: lines = f.readlines() for line in lines: label = int(line.strip()) labels.append(label) return labels def __len__(self): return len(self.labels) def __getitem__(self, index): label = self.labels[index] image_path = f"{index}.bmp" # 假设图像的文件名是按照顺序命名的,例如0.jpg, 1.jpg, ... image = Image.open(self.image_folder + '/' + image_path).convert('L') if self.transform: image = self.transform(image) return image, label num_epochs = 10 # 创建LeNet模型和优化器 model = LeNet() optimizer = optim.Adam(model.parameters(), lr=0.001) criterion = nn.CrossEntropyLoss() # 加载数据集并进行训练 train_dataset = MNISTDataset( 'mnist/train', 'mnist/train.txt', transform=ToTensor()) train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) for epoch in range(num_epochs): model.train() running_loss = 0.0 correct = 0 total = 0 for images, labels in train_loader: images = images.to(device) labels = labels.to(device) # 前向传播 outputs = model(images) loss = criterion(outputs, labels) # 反向传播和优化 optimizer.zero_grad() loss.backward() optimizer.step() running_loss += loss.item() _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}, Accuracy: {(100 * correct / total):.2f}%") print('Training finished.') # 保存模型 torch.save(model.state_dict(), 'lenet_mnist.pth')
测试代码:
import torch import torch.nn as nn from torch.utils.data import Dataset, DataLoader from torchvision.transforms import ToTensor from PIL import Image # 自定义LeNet模型 class LeNet(nn.Module): def __init__(self): super(LeNet, self).__init__() self.conv1 = nn.Conv2d(1, 6, kernel_size=5) self.conv2 = nn.Conv2d(6, 16, kernel_size=5) self.fc1 = nn.Linear(16*4*4, 120) self.fc2 = nn.Linear(120, 84) self.fc3 = nn.Linear(84, 10) def forward(self, x): x = torch.relu(self.conv1(x)) x = torch.max_pool2d(x, 2) x = torch.relu(self.conv2(x)) x = torch.max_pool2d(x, 2) x = x.view(x.size(0), -1) x = torch.relu(self.fc1(x)) x = torch.relu(self.fc2(x)) x = self.fc3(x) return x # 自定义数据集类 class MNISTDataset(Dataset): def __init__(self, image_folder, label_file, transform=None): self.image_folder = image_folder self.label_file = label_file self.transform = transform self.labels = self.load_labels() def load_labels(self): labels = [] with open(self.label_file, 'r') as f: lines = f.readlines() for line in lines: label = int(line.strip()) labels.append(label) return labels def __len__(self): return len(self.labels) def __getitem__(self, index): label = self.labels[index] image_path = f"{index}.bmp" # 假设图像的文件名是按照顺序命名的,例如0.jpg, 1.jpg, ... image = Image.open(self.image_folder + '/' + image_path).convert('L') if self.transform: image = self.transform(image) return image, label # 加载模型参数 model = LeNet() model.load_state_dict(torch.load('lenet_mnist.pth')) # 加载测试数据集 test_dataset = MNISTDataset( 'mnist/test', 'mnist/test.txt', transform=ToTensor()) test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) model.eval() correct = 0 total = 0 with torch.no_grad(): for images, labels in test_loader: images = images.to(device) labels = labels.to(device) # 前向传播 outputs = model(images) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() print(f"Test Accuracy: {(100 * correct / total):.2f}%")
将pth模型转为onnx模型:
import torch import torch.nn as nn # 自定义LeNet模型 class LeNet(nn.Module): def __init__(self): super(LeNet, self).__init__() self.conv1 = nn.Conv2d(1, 6, kernel_size=5) self.conv2 = nn.Conv2d(6, 16, kernel_size=5) self.fc1 = nn.Linear(16*4*4, 120) self.fc2 = nn.Linear(120, 84) self.fc3 = nn.Linear(84, 10) def forward(self, x): x = torch.relu(self.conv1(x)) x = torch.max_pool2d(x, 2) x = torch.relu(self.conv2(x)) x = torch.max_pool2d(x, 2) x = x.view(x.size(0), -1) x = torch.relu(self.fc1(x)) x = torch.relu(self.fc2(x)) x = self.fc3(x) return x # 加载模型参数 model = LeNet() model.load_state_dict(torch.load('lenet_mnist.pth')) model.eval() # 创建一个虚拟的输入张量 dummy_input = torch.randn(1, 1, 28, 28) # 假设输入图像尺寸为28*28 # 导出模型为ONNX格式 onnx_filename = 'lenet_mnist.onnx' torch.onnx.export(model, dummy_input, onnx_filename, verbose=False,input_names=["image"],output_names=["class"]) print(f"Model successfully exported as {onnx_filename}.")
安装netron可以查看onnx模型。
下面利用c++做测试:
#include <iostream> #include <opencv2/opencv.hpp> #include <onnxruntime_cxx_api.h> std::vector<float> ApplyTransform(const cv::Mat& image) { cv::Mat resized, floatImage; image.convertTo(floatImage, CV_32FC1); float mean = 0.0f; float std = 0.0f; cv::Scalar meanScalar, stdScalar; meanStdDev(floatImage, meanScalar, stdScalar); mean = static_cast<float>(meanScalar.val[0]); std = static_cast<float>(stdScalar.val[0]); std::vector<float> imgData; for (int h = 0; h < image.rows; h++) { for (int w = 0; w < image.cols; w++) { imgData.push_back((floatImage.at<float>(h, w) - mean) / std); } } return imgData; } int main() { // 读取图像 cv::Mat image = cv::imread("4.bmp", cv::IMREAD_GRAYSCALE); if (image.empty()) { std::cerr << "Failed to read image." << std::endl; return 1; } cv::Mat resized_image; cv::resize(image, resized_image, cv::Size(28, 28)); std::vector<float> imgData = ApplyTransform(resized_image); // 加载ONNX模型 Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "ONNXModel"); Ort::SessionOptions session_options; session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL); std::wstring onnx_model_path = L"lenet_mnist.onnx"; Ort::Session session(env, onnx_model_path.c_str(), session_options); std::vector<int64_t> inputShape{ 1, 1, resized_image.rows, resized_image.cols }; Ort::MemoryInfo memoryInfo = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU); Ort::Value inputTensor = Ort::Value::CreateTensor<float>(memoryInfo, imgData.data(), imgData.size(), inputShape.data(), inputShape.size()); const char* input_names[] = { "image" }; const char* output_names[] = { "class" }; Ort::RunOptions run_options; std::vector<Ort::Value> outputs = session.Run(run_options, input_names, &inputTensor, 1, output_names, 1); std::vector<int64_t> kpshape = outputs[0].GetTensorTypeAndShapeInfo().GetShape(); float* kp = outputs[0].GetTensorMutableData<float>(); std::cout << kpshape[0]<<" "<<kpshape[1] << std::endl; for (int i = 0; i < kpshape[1]; i++) { std::cout << kp[i] << std::endl; } return 0; }