GoogLeNet
GoogLeNet
简介
GoogLeNet在2014年由Google团队提出,斩获当年ImageNet竞
赛中Classification Task (分类任务) 第一名。
GoogLeNet网络的优点
GoogLeNet模型架构
type | patchsize/ stride |
output size |
depth | #1_×_1 | #3_×_3 reduce |
#3_×_3 | #5_×_5 reduce |
#5_×_5 | pool proj |
params | ops |
---|---|---|---|---|---|---|---|---|---|---|---|
convolution | 7_×_7_/_2 | 112_×_112_×_64 | 1 | 2.7K | 34M | ||||||
max pool | 3_×_3_/_2 | 56_×_56_×_64 | 0 | ||||||||
convolution | 3_×_3_/_1 | 56_×_56_×_192 | 2 | 64 | 192 | 112K | 360M | ||||
max pool | 3_×_3_/_2 | 28_×_28_×_192 | 0 | ||||||||
inception(3a) | 28_×_28_×_256 | 2 | 64 | 96 | 128 | 16 | 32 | 32 | 159K | 128M | |
inception(3b) | 28_×_28_×_480 | 2 | 128 | 128 | 192 | 32 | 96 | 64 | 380K | 304M | |
max pool | 3_×_3_/_2 | 14_×_14_×_480 | 0 | ||||||||
inception(4a) | 14_×_14_×_512 | 2 | 192 | 96 | 208 | 16 | 48 | 64 | 364K | 73M | |
inception(4b) | 14_×_14_×_512 | 2 | 160 | 112 | 224 | 24 | 64 | 64 | 437K | 88M | |
inception(4c) | 14_×_14_×_512 | 2 | 128 | 128 | 256 | 24 | 64 | 64 | 463K | 100M | |
inception(4d) | 14_×_14_×_528 | 2 | 112 | 144 | 288 | 32 | 64 | 64 | 580K | 119M | |
inception(4e) | 14_×_14_×_832 | 2 | 256 | 160 | 320 | 32 | 128 | 128 | 840K | 170M | |
max pool | 3_×_3_/_2 | 7_×_7_×_832 | 0 | ||||||||
inception(5a) | 7_×_7_×_832 | 2 | 256 | 160 | 320 | 32 | 128 | 128 | 1072K | 54M | |
inception(5b) | 7_×_7_×_1024 | 2 | 384 | 192 | 384 | 48 | 128 | 128 | 1388K | 71M | |
avgpool | 7_×_7_/_1 | 1_×_1_×_1024 | 0 | ||||||||
dropout(40%) | 1_×_1_×_1024 | 0 | |||||||||
linear | 1_×_1_×_1000 | 1 | 1000K | 1M | |||||||
softmax | 1_×_1_×_1000 | 0 |
Inception结构
辅助分类器(Auxiliary Classifier)
- An average pooling layer with \(5\times5\)filter size and stride 3, resulting in an \(4\times4\times512\)output for the (4a), and \(4\times4\times528\)for the (4d) stage.
- A \(1\times 1\)convolution with 128 filters for dimension reduction and recitified linear activation.
- A fully connected layer with 1024 units and recitified linear activation.
- A dropout layer with \(70\%\)ratio of dropped outputs.
- A linear layer with softmax loss as the classifier (predicting the same 1000 classes as the main classifier, but removed at inference time).
A schematic view of the resulting network is depicted in Figure 3.
卷积后图像大小:
\(out_{size}=(in_{size}-F_{size}+2P)/S+1\)
GoogLeNet整体架构
使用PyTorch搭建GoogLeNet网络
编写Inception块
# 定义一个带激活函数的卷积层
class BasicConv2d(nn.Module):
def __init__(self, in_channels, out_channels, **kwargs):
super(BasicConv2d, self).__init__()
self.conv = nn.Conv2d(in_channels, out_channels, **kwargs)
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
return self.relu(self.conv(x))
# 定义Inception块
class Inception(nn.Module):
def __init__(self, in_channels, ch1x1, ch3x3red, ch3x3, ch5x5red, ch5x5, pool_proj):
super(Inception, self).__init__()
# 分别编写四个分支
self.branch1 = BasicConv2d(in_channels, ch1x1, kernel_size=1)
self.branch2 = nn.Sequential(
BasicConv2d(in_channels, ch3x3red, kernel_size=1),
# padding='same'保证输入图像和输出图像大小相同
BasicConv2d(ch3x3red, ch3x3, kernel_size=3, padding='same')
)
self.branch3 = nn.Sequential(
BasicConv2d(in_channels, ch5x5red, kernel_size=1),
BasicConv2d(ch5x5red, ch5x5, kernel_size=5, padding='same')
)
self.branch4 = nn.Sequential(
nn.MaxPool2d(kernel_size=3, stride=1, padding='same'),
BasicConv2d(in_channels, pool_proj, kernel_size=1)
)
def forward(self, x):
branch1 = self.branch1(x)
branch2 = self.branch2(x)
branch3 = self.branch3(x)
branch4 = self.branch4(x)
# outputs=[branch1,branch2,branch3,branch4]
# dim=1为通道维度,在通道维度拼接在一起
return torch.cat([branch1, branch2, branch3, branch4], 1)
编写辅助分类器
class InceptionAux(nn.Module):
def __init__(self, in_channels, num_classes):
super(InceptionAux, self).__init__()
# 平均池化层
self.avgPool = nn.AvgPool2d(kernel_size=5, stride=3)
# output: [batch, 128, 4, 4]
# 卷积层
self.conv = BasicConv2d(in_channels, 128, kernel_size=1)
# 线性层
self.fc1 = nn.Linear(2048, 1024)
self.fc2 = nn.Linear(1024, num_classes)
def forward(self, x):
# aux1: N x 512 x 14 x 14, aux2: N x 528 x 14 x 14
x = self.avgPool(x)
# aux1: N x 512 x 4 x 4, aux2: N x 528 x 4 x 4
x = self.conv(x)
# N x 128 x 4 x 4
x = torch.flatten(x, 1)
x = torch.dropout(x, 0.7, train=self.training)
# N x 2048
x = torch.relu(self.fc1(x), inplace=True)
x = torch.dropout(x, 0.7, train=self.training)
# N x 1024
x = self.fc2(x)
# N x num_classes
return x
编写GoogLeNet网络
class GoogLeNet(nn.Module):
def __init__(self, num_classes=1000, aux_logits=True, init_weights=False):
super(GoogLeNet, self).__init__()
self.aux_logits = aux_logits
self.conv1 = BasicConv2d(3, 64, kernel_size=7, stride=2, padding=3)
self.maxpool1 = nn.MaxPool2d(3, stride=2, ceil_mode=True)
self.conv2 = BasicConv2d(64, 64, kernel_size=1)
self.conv3 = BasicConv2d(64, 192, kernel_size=3, padding=1)
self.maxpool2 = nn.MaxPool2d(3, stride=2, ceil_mode=True)
self.inception3a = Inception(192, 64, 96, 128, 16, 32, 32)
self.inception3b = Inception(256, 128, 128, 192, 32, 96, 64)
self.maxpool3 = nn.MaxPool2d(3, stride=2, ceil_mode=True)
self.inception4a = Inception(480, 192, 96, 208, 16, 48, 64)
self.inception4b = Inception(512, 160, 112, 224, 24, 64, 64)
self.inception4c = Inception(512, 128, 128, 256, 24, 64, 64)
self.inception4d = Inception(512, 112, 144, 288, 32, 64, 64)
self.inception4e = Inception(528, 256, 160, 320, 32, 128, 128)
self.maxpool4 = nn.MaxPool2d(3, stride=2, ceil_mode=True)
self.inception5a = Inception(832, 256, 160, 320, 32, 128, 128)
self.inception5b = Inception(832, 384, 192, 384, 48, 128, 128)
if self.aux_logits:
self.aux1 = InceptionAux(512, num_classes)
self.aux2 = InceptionAux(528, num_classes)
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.dropout = nn.Dropout(0.7)
self.fc = nn.Linear(1024, num_classes)
if init_weights:
self._initialize_weights()
def forward(self, x):
# N x 3 x 224 x 224
x = self.conv1(x)
# N x 64 x 112 x 112
x = self.maxpool1(x)
# N x 64 x 56 x 56
x = self.conv2(x)
# N x 64 x 56 x 56
x = self.conv3(x)
# N x 192 x 56 x 56
x = self.maxpool2(x)
# N x 192 x 28 x 28
x = self.inception3a(x)
# N x 256 x 28 x 28
x = self.inception3b(x)
# N x 480 x 28 x 28
x = self.maxpool3(x)
# N x 480 x 14 x 14
x = self.inception4a(x)
# N x 512 x 14 x 14
if self.training and self.aux_logits: # eval model lose this layer
aux1 = self.aux1(x)
x = self.inception4b(x)
# N x 512 x 14 x 14
x = self.inception4c(x)
# N x 512 x 14 x 14
x = self.inception4d(x)
# N x 528 x 14 x 14
if self.training and self.aux_logits: # eval model lose this layer
aux2 = self.aux2(x)
x = self.inception4e(x)
# N x 832 x 14 x 14
x = self.maxpool4(x)
# N x 832 x 7 x 7
x = self.inception5a(x)
# N x 832 x 7 x 7
x = self.inception5b(x)
# N x 1024 x 7 x 7
x = self.avgpool(x)
# N x 1024 x 1 x 1
x = torch.flatten(x, 1)
# N x 1024
x = self.dropout(x)
x = self.fc(x)
# N x 1000 (num_classes)
if self.training and self.aux_logits: # eval model lose this layer
return x, aux2, aux1
return x
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_uniform_(m.weight, mode='fan_out', nonlinearity='relu')
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
nn.init.constant_(m.bias, 0)
模型总代码
import torch
import torch.nn as nn
import torch.nn.functional as F
# 定义一个带激活函数的卷积层
class BasicConv2d(nn.Module):
def __init__(self, in_channels, out_channels, **kwargs):
super(BasicConv2d, self).__init__()
self.conv = nn.Conv2d(in_channels, out_channels, **kwargs)
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
return self.relu(self.conv(x))
# 定义Inception块
class Inception(nn.Module):
def __init__(self, in_channels, ch1x1, ch3x3red, ch3x3, ch5x5red, ch5x5, pool_proj):
super(Inception, self).__init__()
# 分别编写四个分支
self.branch1 = BasicConv2d(in_channels, ch1x1, kernel_size=1)
self.branch2 = nn.Sequential(
BasicConv2d(in_channels, ch3x3red, kernel_size=1),
# padding='same'保证输入图像和输出图像大小相同
BasicConv2d(ch3x3red, ch3x3, kernel_size=3, padding=1)
)
self.branch3 = nn.Sequential(
BasicConv2d(in_channels, ch5x5red, kernel_size=1),
BasicConv2d(ch5x5red, ch5x5, kernel_size=5, padding=2)
)
self.branch4 = nn.Sequential(
nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
BasicConv2d(in_channels, pool_proj, kernel_size=1)
)
def forward(self, x):
branch1 = self.branch1(x)
branch2 = self.branch2(x)
branch3 = self.branch3(x)
branch4 = self.branch4(x)
# outputs=[branch1,branch2,branch3,branch4]
# dim=1为通道维度,在通道维度拼接在一起
return torch.cat([branch1, branch2, branch3, branch4], 1)
class InceptionAux(nn.Module):
def __init__(self, in_channels, num_classes):
super(InceptionAux, self).__init__()
# 平均池化层
self.avgPool = nn.AvgPool2d(kernel_size=5, stride=3)
# output: [batch, 128, 4, 4]
# 卷积层
self.conv = BasicConv2d(in_channels, 128, kernel_size=1)
# 线性层
self.fc1 = nn.Linear(2048, 1024)
self.fc2 = nn.Linear(1024, num_classes)
def forward(self, x):
# aux1: N x 512 x 14 x 14, aux2: N x 528 x 14 x 14
x = self.avgPool(x)
# aux1: N x 512 x 4 x 4, aux2: N x 528 x 4 x 4
x = self.conv(x)
# N x 128 x 4 x 4
x = torch.flatten(x, 1)
x = torch.dropout(x, 0.7, train=self.training)
# N x 2048
x = torch.relu(self.fc1(x))
x = torch.dropout(x, 0.7, train=self.training)
# N x 1024
x = self.fc2(x)
# N x num_classes
return x
class GoogLeNet(nn.Module):
def __init__(self, num_classes=1000, aux_logits=True, init_weights=False):
super(GoogLeNet, self).__init__()
self.aux_logits = aux_logits
self.conv1 = BasicConv2d(3, 64, kernel_size=7, stride=2, padding=3)
self.maxpool1 = nn.MaxPool2d(3, stride=2, ceil_mode=True)
self.conv2 = BasicConv2d(64, 64, kernel_size=1)
self.conv3 = BasicConv2d(64, 192, kernel_size=3, padding=1)
self.maxpool2 = nn.MaxPool2d(3, stride=2, ceil_mode=True)
self.inception3a = Inception(192, 64, 96, 128, 16, 32, 32)
self.inception3b = Inception(256, 128, 128, 192, 32, 96, 64)
self.maxpool3 = nn.MaxPool2d(3, stride=2, ceil_mode=True)
self.inception4a = Inception(480, 192, 96, 208, 16, 48, 64)
self.inception4b = Inception(512, 160, 112, 224, 24, 64, 64)
self.inception4c = Inception(512, 128, 128, 256, 24, 64, 64)
self.inception4d = Inception(512, 112, 144, 288, 32, 64, 64)
self.inception4e = Inception(528, 256, 160, 320, 32, 128, 128)
self.maxpool4 = nn.MaxPool2d(3, stride=2, ceil_mode=True)
self.inception5a = Inception(832, 256, 160, 320, 32, 128, 128)
self.inception5b = Inception(832, 384, 192, 384, 48, 128, 128)
if self.aux_logits:
self.aux1 = InceptionAux(512, num_classes)
self.aux2 = InceptionAux(528, num_classes)
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.dropout = nn.Dropout(0.7)
self.fc = nn.Linear(1024, num_classes)
if init_weights:
self._initialize_weights()
def forward(self, x):
# N x 3 x 224 x 224
x = self.conv1(x)
# N x 64 x 112 x 112
x = self.maxpool1(x)
# N x 64 x 56 x 56
x = self.conv2(x)
# N x 64 x 56 x 56
x = self.conv3(x)
# N x 192 x 56 x 56
x = self.maxpool2(x)
# N x 192 x 28 x 28
x = self.inception3a(x)
# N x 256 x 28 x 28
x = self.inception3b(x)
# N x 480 x 28 x 28
x = self.maxpool3(x)
# N x 480 x 14 x 14
x = self.inception4a(x)
# N x 512 x 14 x 14
if self.training and self.aux_logits: # eval model lose this layer
aux1 = self.aux1(x)
x = self.inception4b(x)
# N x 512 x 14 x 14
x = self.inception4c(x)
# N x 512 x 14 x 14
x = self.inception4d(x)
# N x 528 x 14 x 14
if self.training and self.aux_logits: # eval model lose this layer
aux2 = self.aux2(x)
x = self.inception4e(x)
# N x 832 x 14 x 14
x = self.maxpool4(x)
# N x 832 x 7 x 7
x = self.inception5a(x)
# N x 832 x 7 x 7
x = self.inception5b(x)
# N x 1024 x 7 x 7
x = self.avgpool(x)
# N x 1024 x 1 x 1
x = torch.flatten(x, 1)
# N x 1024
x = self.dropout(x)
x = self.fc(x)
# N x 1000 (num_classes)
if self.training and self.aux_logits: # eval model lose this layer
return x, aux2, aux1
return x
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_uniform_(m.weight, mode='fan_out', nonlinearity='relu')
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
nn.init.constant_(m.bias, 0)
训练模型脚本
import os
import sys
import json
import torch
import torch.nn as nn
from torchvision import transforms, datasets
import torch.optim as optim
from tqdm import tqdm
from model import GoogLeNet
from utils import read_split_data, ImageDataset, compute_mean_std
def main():
# 设置训练设备
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('using {} device.'.format(device))
# 图片路径
image_path = r"D:\卷积神经网络PPT\AlexNet\Alex_tf\data\flower_photos"
# 创建训练集
train_images_path, train_images_label, val_images_path, val_images_label = read_split_data(root=image_path)
# 训练集均值和标准差字典
mean_std = compute_mean_std(train_images_path)
# 设置训练集和验证集的数据增强
data_transform = {
'train': transforms.Compose([
transforms.RandomResizedCrop(224), # 随机裁剪后缩放到指定大小
transforms.RandomHorizontalFlip(), # 随机水平翻转
transforms.ToTensor(),
transforms.Normalize(mean_std['mean'], mean_std['std']) # 标准化
]),
'val': transforms.Compose([
transforms.Resize((224, 224)), # 缩放到指定大小
transforms.ToTensor(),
transforms.Normalize(mean_std['mean'], mean_std['std'])
])
}
# 设置批次大小
batch_size = 32
# 设置加载数据进程数
nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])
print('using {} dataloader workers every process'.format(nw))
train_dataset = ImageDataset(train_images_path, train_images_label, transform=data_transform['train'])
train_num = len(train_dataset)
# 装载训练集数据
train_loader = torch.utils.data.DataLoader2(train_dataset,
batch_size=batch_size,
shuffle=True,
pin_memory=True,
num_workers=nw)
# 创建验证集
valid_dataset = ImageDataset(val_images_path, val_images_label, transform=data_transform['val'])
# 装载验证集数据
valid_loader = torch.utils.data.DataLoader2(valid_dataset,
batch_size=batch_size,
shuffle=False,
pin_memory=True,
num_workers=nw)
# 记录验证集数据量
val_num = len(valid_dataset)
# test_data_iter = iter(validate_loader)
# test_image, test_label = test_data_iter.next()
net = GoogLeNet(num_classes=5, aux_logits=True, init_weights=True)
# 如果要使用官方的预训练权重,注意是将权重载入官方的模型,不是我们自己实现的模型
# 官方的模型中使用了bn层以及改了一些参数,不能混用
# import torchvision
# net = torchvision.models.googlenet(num_classes=5)
# model_dict = net.state_dict()
# # 预训练权重下载地址: https://download.pytorch.org/models/googlenet-1378be20.pth
# pretrain_model = torch.load("googlenet.pth")
# del_list = ["aux1.fc2.weight", "aux1.fc2.bias",
# "aux2.fc2.weight", "aux2.fc2.bias",
# "fc.weight", "fc.bias"]
# pretrain_dict = {k: v for k, v in pretrain_model.items() if k not in del_list}
# model_dict.update(pretrain_dict)
# net.load_state_dict(model_dict)
net.to(device)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.0003)
epochs = 30
best_acc = 0.0
save_path = './googleNet.pth'
train_steps = len(train_loader)
for epoch in range(epochs):
# train
net.train()
running_loss = 0.0
train_bar = tqdm(train_loader, file=sys.stdout)
for step, data in enumerate(train_bar):
images, labels = data
optimizer.zero_grad()
logits, aux_logits2, aux_logits1 = net(images.to(device))
loss0 = loss_function(logits, labels.to(device))
loss1 = loss_function(aux_logits1, labels.to(device))
loss2 = loss_function(aux_logits2, labels.to(device))
loss = loss0 + loss1 * 0.3 + loss2 * 0.3
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1,
epochs,
loss)
# validate
net.eval()
acc = 0.0 # accumulate accurate number / epoch
with torch.no_grad():
val_bar = tqdm(valid_loader, file=sys.stdout)
for val_data in val_bar:
val_images, val_labels = val_data
outputs = net(val_images.to(device)) # eval model only have last output layer
predict_y = torch.argmax(outputs,1)
acc += torch.eq(predict_y, val_labels.to(device)).sum().item()
val_accurate = acc / val_num
print('[epoch %d] train_loss: %.3f val_accuracy: %.3f' %
(epoch + 1, running_loss / train_steps, val_accurate))
if val_accurate > best_acc:
best_acc = val_accurate
torch.save(net.state_dict(), save_path)
print('Finished Training')
if __name__ == '__main__':
main()
运行结果:
train epoch[29/30] loss:1.303: 100%|██████████| 92/92 [00:28<00:00, 3.19it/s]
100%|██████████| 23/23 [00:18<00:00, 1.28it/s]
[epoch 29] train_loss: 0.995 val_accuracy: 0.795
train epoch[30/30] loss:1.112: 100%|██████████| 92/92 [00:26<00:00, 3.48it/s]
100%|██████████| 23/23 [00:17<00:00, 1.35it/s]
[epoch 30] train_loss: 0.946 val_accuracy: 0.793
Finished Training
预测脚本
import os
import json
import torch
from PIL import Image
from torchvision import transforms
import matplotlib.pyplot as plt
from model import GoogLeNet
def main():
device = torch.device('cpu')
data_transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])
img_path = r'D:/卷积神经网络PPT/AlexNet/Alex_Torch/img.png'
assert os.path.exists(img_path), "file: '{}' does not exist.".format(img_path)
img = Image.open(img_path)
plt.imshow(img)
img = data_transform(img)
# 拓展成四维,增加一个批次维度
img = torch.unsqueeze(img, dim=0) # [batch_size,channel,height,width]
# 读取类索引json文件
json_path = './class_indices.json'
assert os.path.exists(json_path), "file: '{}' does not exist.".format(json_path)
with open(json_path, 'r') as f:
class_indict = json.load(f)
# 实例化模型
model = GoogLeNet(num_classes=5, aux_logits=False)
# 加载训练好的模型权重
weights_path = './googleNet.pth'
assert os.path.exists(weights_path), "file: '{}' does not exist.".format(weights_path)
model.load_state_dict(torch.load(weights_path, map_location=device), strict=False)
# 设置模型为评估状态
model.eval()
# 不跟踪梯度
with torch.no_grad():
outputs = torch.squeeze(model(img.to(device))).cpu()
predict = torch.softmax(outputs, dim=0)
predict_cla = torch.argmax(predict).numpy()
print_res = "class: {} prob:{:.3}".format(class_indict[str(predict_cla)],
predict[predict_cla].numpy())
plt.grid(False)
plt.axis(False)
plt.title(print_res)
for i in range(len(predict)):
print("class: {:10} prob: {:.3}".format(class_indict[str(i)],
predict[i].numpy()))
plt.show()
if __name__ == '__main__':
main()
运行结果:
class: daisy prob: 0.0728
class: dandelion prob: 0.175
class: roses prob: 0.0419
class: sunflowers prob: 0.0971
class: tulips prob: 0.613
使用TensorFlow搭建GoogLeNet
Inception块
# Inception块
class Inception(layers.Layer):
def __init__(self, ch1x1, ch3x3red, ch3x3, ch5x5red, ch5x5, pool_proj, **kwargs):
super(Inception, self).__init__(**kwargs)
self.branch1 = layers.Conv2D(ch1x1, kernel_size=1, activation='relu')
# padding='same',输入图像和输出图像大小相同
self.branch2 = Sequential([
layers.Conv2D(ch3x3red, kernel_size=1, activation='relu'),
layers.Conv2D(ch3x3, kernel_size=3, padding='same', activation='relu')
])
self.branch3 = Sequential([
layers.Conv2D(ch5x5red, kernel_size=1, activation='relu'),
layers.Conv2D(ch5x5, kernel_size=5, activation='relu')
])
self.branch4 = Sequential([
layers.MaxPool2D(pool_size=3, strides=1, padding='same'),
layers.Conv2D(pool_proj, kernel_size=1, activation='relu')
])
# 正向传播,最后将输出沿通道拼接
def call(self, x, **kwargs):
branch1 = self.branch1(x)
branch2 = self.branch2(x)
branch3 = self.branch3(x)
branch4 = self.branch4(x)
return layers.concatenate([branch1, branch2, branch3, branch4])
辅助分类器
# 辅助分类器
class InceptionAux(layers.Layer):
def __int__(self, num_classes, **kwargs):
super(InceptionAux, self).__init__(**kwargs)
self.avgPool = layers.AvgPool2D(pool_size=5, strides=3)
self.conv = layers.Conv2D(128, kernel_size=1, activation='relu')
self.fc1 = layers.Dense(1024, activation='relu')
self.fc2 = layers.Dense(num_classes)
self.softmax = layers.Softmax()
# 正向传播
def call(self, inputs, **kwargs):
# aux1: N x 512 x 14 x 14, aux2: N x 528 x 14 x 14
x = self.averagePool(inputs)
# aux1: N x 512 x 4 x 4, aux2: N x 528 x 4 x 4
x = self.conv(x)
# N x 128 x 4 x 4
x = layers.Flatten()(x)
x = layers.Dropout(rate=0.5)(x)
# N x 2048
x = self.fc1(x)
x = layers.Dropout(rate=0.5)(x)
# N x 1024
x = self.fc2(x)
# N x num_classes
x = self.softmax(x)
return x
GoogLeNet网络
def GoogLeNet(im_height=224, im_width=224, class_num=1000, aux_logits=False):
# tensorflow中的tensor通道排序是NHWC
input_image = layers.Input(shape=(im_height, im_width, 3), dtype="float32")
# (None, 224, 224, 3)
x = layers.Conv2D(64, kernel_size=7, strides=2, padding="SAME", activation="relu", name="conv2d_1")(input_image)
# (None, 112, 112, 64)
x = layers.MaxPool2D(pool_size=3, strides=2, padding="SAME", name="maxpool_1")(x)
# (None, 56, 56, 64)
x = layers.Conv2D(64, kernel_size=1, activation="relu", name="conv2d_2")(x)
# (None, 56, 56, 64)
x = layers.Conv2D(192, kernel_size=3, padding="SAME", activation="relu", name="conv2d_3")(x)
# (None, 56, 56, 192)
x = layers.MaxPool2D(pool_size=3, strides=2, padding="SAME", name="maxpool_2")(x)
# (None, 28, 28, 192)
x = Inception(64, 96, 128, 16, 32, 32, name="inception_3a")(x)
# (None, 28, 28, 256)
x = Inception(128, 128, 192, 32, 96, 64, name="inception_3b")(x)
# (None, 28, 28, 480)
x = layers.MaxPool2D(pool_size=3, strides=2, padding="SAME", name="maxpool_3")(x)
# (None, 14, 14, 480)
x = Inception(192, 96, 208, 16, 48, 64, name="inception_4a")(x)
if aux_logits:
aux1 = InceptionAux(class_num, name="aux_1")(x)
# (None, 14, 14, 512)
x = Inception(160, 112, 224, 24, 64, 64, name="inception_4b")(x)
# (None, 14, 14, 512)
x = Inception(128, 128, 256, 24, 64, 64, name="inception_4c")(x)
# (None, 14, 14, 512)
x = Inception(112, 144, 288, 32, 64, 64, name="inception_4d")(x)
if aux_logits:
aux2 = InceptionAux(class_num, name="aux_2")(x)
# (None, 14, 14, 528)
x = Inception(256, 160, 320, 32, 128, 128, name="inception_4e")(x)
# (None, 14, 14, 532)
x = layers.MaxPool2D(pool_size=3, strides=2, padding="SAME", name="maxpool_4")(x)
# (None, 7, 7, 832)
x = Inception(256, 160, 320, 32, 128, 128, name="inception_5a")(x)
# (None, 7, 7, 832)
x = Inception(384, 192, 384, 48, 128, 128, name="inception_5b")(x)
# (None, 7, 7, 1024)
x = layers.AvgPool2D(pool_size=7, strides=1, name="avgpool_1")(x)
# (None, 1, 1, 1024)
x = layers.Flatten(name="output_flatten")(x)
# (None, 1024)
x = layers.Dropout(rate=0.4, name="output_dropout")(x)
x = layers.Dense(class_num, name="output_dense")(x)
# (None, class_num)
aux3 = layers.Softmax(name="aux_3")(x)
if aux_logits:
model = models.Model(inputs=input_image, outputs=[aux1, aux2, aux3])
else:
model = models.Model(inputs=input_image, outputs=aux3)
return model
模型总代码
from tensorflow.keras import layers, models, Model, Sequential
def GoogLeNet(im_height=224, im_width=224, num_classes=1000, aux_logits=False):
# tensorflow中的tensor通道排序是[batch, height, width, channel]
input_image = layers.Input(shape=(im_height, im_width, 3), dtype="float32")
# (None, 224, 224, 3)
x = layers.Conv2D(64, kernel_size=7, strides=2, padding="SAME", activation="relu", name="conv2d_1")(input_image)
# (None, 112, 112, 64)
x = layers.MaxPool2D(pool_size=3, strides=2, padding="SAME", name="maxpool_1")(x)
# (None, 56, 56, 64)
x = layers.Conv2D(64, kernel_size=1, activation="relu", name="conv2d_2")(x)
# (None, 56, 56, 64)
x = layers.Conv2D(192, kernel_size=3, padding="SAME", activation="relu", name="conv2d_3")(x)
# (None, 56, 56, 192)
x = layers.MaxPool2D(pool_size=3, strides=2, padding="SAME", name="maxpool_2")(x)
# (None, 28, 28, 192)
x = Inception(64, 96, 128, 16, 32, 32, name="inception_3a")(x)
# (None, 28, 28, 256)
x = Inception(128, 128, 192, 32, 96, 64, name="inception_3b")(x)
# (None, 28, 28, 480)
x = layers.MaxPool2D(pool_size=3, strides=2, padding="SAME", name="maxpool_3")(x)
# (None, 14, 14, 480)
x = Inception(192, 96, 208, 16, 48, 64, name="inception_4a")(x)
if aux_logits:
aux1 = InceptionAux(num_classes, name="aux_1")(x)
# (None, 14, 14, 512)
x = Inception(160, 112, 224, 24, 64, 64, name="inception_4b")(x)
# (None, 14, 14, 512)
x = Inception(128, 128, 256, 24, 64, 64, name="inception_4c")(x)
# (None, 14, 14, 512)
x = Inception(112, 144, 288, 32, 64, 64, name="inception_4d")(x)
if aux_logits:
aux2 = InceptionAux(num_classes, name="aux_2")(x)
# (None, 14, 14, 528)
x = Inception(256, 160, 320, 32, 128, 128, name="inception_4e")(x)
# (None, 14, 14, 532)
x = layers.MaxPool2D(pool_size=3, strides=2, padding="SAME", name="maxpool_4")(x)
# (None, 7, 7, 832)
x = Inception(256, 160, 320, 32, 128, 128, name="inception_5a")(x)
# (None, 7, 7, 832)
x = Inception(384, 192, 384, 48, 128, 128, name="inception_5b")(x)
# (None, 7, 7, 1024)
x = layers.AvgPool2D(pool_size=7, strides=1, name="avgpool_1")(x)
# (None, 1, 1, 1024)
x = layers.Flatten(name="output_flatten")(x)
# (None, 1024)
x = layers.Dropout(rate=0.4, name="output_dropout")(x)
x = layers.Dense(num_classes, name="output_dense")(x)
# (None, num_classes)
aux3 = layers.Softmax(name="aux_3")(x)
if aux_logits:
model = models.Model(inputs=input_image, outputs=[aux1, aux2, aux3])
else:
model = models.Model(inputs=input_image, outputs=aux3)
return model
class Inception(layers.Layer):
def __init__(self, ch1x1, ch3x3red, ch3x3, ch5x5red, ch5x5, pool_proj, **kwargs):
super(Inception, self).__init__(**kwargs)
self.branch1 = layers.Conv2D(ch1x1, kernel_size=1, activation="relu")
self.branch2 = Sequential([
layers.Conv2D(ch3x3red, kernel_size=1, activation="relu"),
layers.Conv2D(ch3x3, kernel_size=3, padding="SAME", activation="relu")]) # output_size= input_size
self.branch3 = Sequential([
layers.Conv2D(ch5x5red, kernel_size=1, activation="relu"),
layers.Conv2D(ch5x5, kernel_size=5, padding="SAME", activation="relu")]) # output_size= input_size
self.branch4 = Sequential([
layers.MaxPool2D(pool_size=3, strides=1, padding="SAME"), # caution: default strides==pool_size
layers.Conv2D(pool_proj, kernel_size=1, activation="relu")]) # output_size= input_size
def call(self, inputs, **kwargs):
branch1 = self.branch1(inputs)
branch2 = self.branch2(inputs)
branch3 = self.branch3(inputs)
branch4 = self.branch4(inputs)
outputs = layers.concatenate([branch1, branch2, branch3, branch4])
return outputs
class InceptionAux(layers.Layer):
def __init__(self, num_classes, **kwargs):
super(InceptionAux, self).__init__(**kwargs)
self.averagePool = layers.AvgPool2D(pool_size=5, strides=3)
self.conv = layers.Conv2D(128, kernel_size=1, activation="relu")
self.fc1 = layers.Dense(1024, activation="relu")
self.fc2 = layers.Dense(num_classes)
self.softmax = layers.Softmax()
def call(self, inputs, **kwargs):
# aux1: N x 512 x 14 x 14, aux2: N x 528 x 14 x 14
x = self.averagePool(inputs)
# aux1: N x 512 x 4 x 4, aux2: N x 528 x 4 x 4
x = self.conv(x)
# N x 128 x 4 x 4
x = layers.Flatten()(x)
x = layers.Dropout(rate=0.7)(x)
# N x 2048
x = self.fc1(x)
x = layers.Dropout(rate=0.7)(x)
# N x 1024
x = self.fc2(x)
# N x num_classes
x = self.softmax(x)
return x
训练脚本
import matplotlib.pyplot as plt
from model import GoogLeNet
import tensorflow as tf
import json
import os
import time
import random
from utils import read_split_data
os.environ['CUDA_DEVICE_ORDER'] = "PCI_BUS_ID"
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
def main():
# print(tf.test.is_gpu_available())
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
try:
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
except RuntimeError as e:
print(e)
exit(-1)
image_path = r"D:\卷积神经网络PPT\AlexNet\Alex_tf\data\flower_photos"
train_images_path, train_images_label, val_images_path, val_images_label = read_split_data(root=image_path,
val_rate=0.2)
if not os.path.exists('weights'):
os.mkdir('weights')
train_num = len(train_images_path)
valid_num = len(val_images_path)
im_height = 224
im_width = 224
batch_size = 64
epochs = 30
# class dict
with open('./class_indices.json') as f:
cla_dict = json.load(f)
num_classes = len(cla_dict)
print('using {} images for training, {} images for validation'.format(len(train_images_path), len(val_images_path)))
def process_train_img(img_path, label):
label = tf.one_hot(label, depth=num_classes)
image = tf.io.read_file(img_path)
image = tf.image.decode_jpeg(image)
image = tf.image.convert_image_dtype(image, tf.float32)
image = tf.image.resize(image, [im_height, im_width])
image = tf.image.random_flip_left_right(image)
image = (image - 0.5) / 0.5
return image, label
def process_valid_img(img_path, label):
label = tf.one_hot(label, depth=num_classes)
image = tf.io.read_file(img_path)
image = tf.image.decode_jpeg(image)
image = tf.image.convert_image_dtype(image, tf.float32)
image = tf.image.resize(image, [im_height, im_width])
image = (image - 0.5) / 0.5
return image, label
AUTOTUNE = tf.data.experimental.AUTOTUNE
# 加载训练集数据
train_ds = tf.data.Dataset.from_tensor_slices((train_images_path, train_images_label))
train_ds = train_ds.shuffle(buffer_size=train_num) \
.map(process_train_img, num_parallel_calls=AUTOTUNE) \
.repeat().batch(batch_size).prefetch(AUTOTUNE)
# 加载验证集数据
valid_ds = tf.data.Dataset.from_tensor_slices((val_images_path, val_images_label))
valid_ds = valid_ds.map(process_valid_img, num_parallel_calls=AUTOTUNE) \
.repeat() \
.batch(batch_size)
# 实例化模型
model = GoogLeNet(im_height=224, im_width=224, num_classes=5, aux_logits=True)
model.summary()
# 使用低级API来进行训练
criterion = tf.keras.losses.CategoricalCrossentropy(from_logits=False)
optimizer = tf.keras.optimizers.Adam(lr=0.0003)
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_acc = tf.keras.metrics.CategoricalAccuracy(name='train_acc')
valid_loss = tf.keras.metrics.Mean(name='valid_loss')
valid_acc = tf.keras.metrics.CategoricalAccuracy(name='valid_acc')
@tf.function
def train_step(images, labels):
with tf.GradientTape() as tape:
aux1, aux2, output = model(images, training=True)
loss1 = criterion(labels, aux1)
loss2 = criterion(labels, aux2)
loss3 = criterion(labels, output)
loss = loss1 * 0.3 + loss2 * 0.3 + loss3
gradients = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
train_loss(loss)
train_acc(labels, output)
@tf.function
def valid_step(images, labels):
_, _, output = model(images, training=False)
v_loss = criterion(labels, output)
valid_loss(v_loss)
valid_acc(labels, output)
best_valid_loss = float('inf')
train_step_num = train_num // batch_size
valid_step_num = valid_num // batch_size
for epoch in range(1, epochs + 1):
train_loss.reset_states()
train_acc.reset_states()
valid_loss.reset_states()
valid_acc.reset_states()
t1 = time.perf_counter()
for idx, (images, labels) in enumerate(train_ds):
train_step(images, labels)
if idx + 1 % train_step_num == 0:
break
print(time.perf_counter() - t1)
for idx, (images, labels) in enumerate(valid_ds):
valid_step(images, labels)
if idx + 1 % valid_step_num == 0:
break
template = 'Epoch\t{}\tTrain Loss\t{}Train Acc\t{}\tValid Loss\t{}\tValid Acc{}'
print(template.format(epoch,
train_loss.result(),
train_acc.result() * 100,
valid_loss.result(),
valid_acc.result() * 100))
if valid_loss.result() < best_valid_loss:
model.save_weights('./weights/myGoogLeNet.ckpt', save_format='tf')
if __name__ == '__main__':
main()
预测脚本
import os
import json
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
from model import GoogLeNet
def main():
im_height = 224
im_width = 224
# 读取图片,并对图像做预处理
img_path = r'D:/卷积神经网络PPT/AlexNet/Alex_Torch/img.png'
img = Image.open(img_path).convert("RGB")
img = img.resize((im_height, im_width))
plt.imshow(img)
img = np.asarray(img) / 255.
img = np.expand_dims(img, 0)
# 读取索引对应的类
json_path = './class_indices.json'
with open(json_path, 'r') as f:
class_dict = json.load(f)
# 实例化模型,并加载权重
model = GoogLeNet(im_height=224, im_width=224, num_classes=5, aux_logits=False)
weights_path = './weights/myGoogLeNet.ckpt'
model.load_weights(weights_path)
# 缩减批次维度
result = np.squeeze(model.predict(img))
predict_class = np.argmax(result) # 获取概率最大的索引
# 输出图片预测类和预测类的概率
print_res = "class: {} prob: {:.3}".format(class_dict[str(predict_class)],
result[predict_class])
plt.title(print_res)
for i in range(len(result)):
print("class: {:10} prob: {:.3}".format(class_dict[str(i)],
result[i]))
plt.show()
if __name__ == '__main__':
main()