深度学习(AlexNet)
AlexNet是另外一个比较经典的深度学习网络模型。
模型结构如下:
这里用该模型做个了个猫狗大战的训练,测试与c++测试和上一篇类似。
import torch import torch.nn as nn import torch.optim as optim from torch.utils.data import Dataset, DataLoader from torchvision.transforms import ToTensor from PIL import Image # 自定义AlexNet模型 class AlexNet(nn.Module): def __init__(self): super(AlexNet, self).__init__() self.conv1 = nn.Conv2d(1, 96, kernel_size=11, stride=4) self.conv2 = nn.Conv2d(96, 256, kernel_size=5, stride=1, padding=2) self.conv3 = nn.Conv2d(256, 384, kernel_size=3, stride=1, padding=1) self.conv4 = nn.Conv2d(384, 384, kernel_size=3, stride=1, padding=1) self.conv5 = nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1) self.fc1 = nn.Linear(256*6*6, 4096) self.fc2 = nn.Linear(4096, 4096) self.fc3 = nn.Linear(4096, 2) def forward(self, x): x = torch.relu(self.conv1(x)) # 227*227 -> 96*55*55 x = torch.max_pool2d(x, 3, stride=2) # 96*55*55 -> 96*27*27 x = torch.relu(self.conv2(x)) # 96*27*27 -> 256*27*27 x = torch.max_pool2d(x, 3, stride=2) # 256*27*27 -> 256*13*13 x = torch.relu(self.conv3(x)) # 256*13*13 -> 384*13*13 x = torch.relu(self.conv4(x)) # 384*13*13 -> 384*13*13 x = torch.relu(self.conv5(x)) # 384*13*13 -> 256*13*13 x = torch.max_pool2d(x, 3, stride=2) # 256*13*13 -> 256*6*6 x = x.view(x.size(0), -1) # 256*6*6 -> 9216 x = torch.relu(self.fc1(x)) # 9216 -> 4096 x = torch.relu(self.fc2(x)) # 4096 -> 4096 x = self.fc3(x) # 4096 -> 2 return x # 自定义数据集类 class CustomDataset(Dataset): def __init__(self, image_folder, transform=None): self.image_folder = image_folder self.transform = transform def __len__(self): return 25000 def __getitem__(self, index): image_name = str(index+1)+".jpg" image = Image.open(self.image_folder + '/' + image_name).convert('L').resize((227, 227)) if self.transform: image = self.transform(image) #print(index) if index < 12500: return image, 0 # cat else: return image, 1 # dog num_epochs = 10 # 创建AlexNet模型和优化器 model = AlexNet() optimizer = optim.Adam(model.parameters(), lr=0.001) criterion = nn.CrossEntropyLoss() # 加载数据集并进行训练 train_dataset = CustomDataset( './cat_vs_dog/train', transform=ToTensor()) train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) for epoch in range(num_epochs): model.train() running_loss = 0.0 correct = 0 total = 0 for images, labels in train_loader: images = images.to(device) labels = labels.to(device) # 前向传播 outputs = model(images) loss = criterion(outputs, labels) # 反向传播和优化 optimizer.zero_grad() loss.backward() optimizer.step() running_loss += loss.item() _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}, Accuracy: {(100 * correct / total):.2f}%") print('Training finished.') # 保存模型 torch.save(model.state_dict(), 'alexnet.pth')