J4、ResNet与DenseNet结合探索
- 🍨 本文为🔗365天深度学习训练营 中的学习记录博客
- 🍦 参考文章:365天深度学习训练营-第J4周:ResNet与DenseNet结合探索
- 🍖 原作者:K同学啊|接辅导、项目定制
学习要求¶
任务类型:自主探索 任务难度:偏难 任务描述:
- 根据J1~J3周的内容自由探索ResNet与DenseNet结合的可能性
- 是否可以根据两种的特性构建一个新的模型框架?
- 用之前的任一图像识别任务验证改进后模型的效果
ResNet模型的核心是通过建立前面层与后面层之间的“短路连接”(shortcuts,skip connection),进而训练出更深的CNN网络。
DenseNet模型的基本思路与ResNet一致,但是它建立的是前面所有层与后面层的密集连接(dense connection),它的名称也是由此而来。DenseNet的另一大特色是通过特征在channel上的连接来实现特征重用(feature reuse)。
可是说DenseNet是ResNet的扩展,但是使用中很容易炸显存,简单看一下二者的区别
一、设计网络¶
想自己设计一个的,课题太多实在是没法逐渐完善,搜了一下,发现已经有人对这二者进行了整合,叫做DPN,与ResNet和DensNet的对比图如下:
DPN的理念是使用ResNet和DensNet的优势:
- ResNet特征重用(因为前面特征被sum到了后面层上面)
- DensNet容易发现新特征(将前面特征均进行了concat组合)
Dual Path Architecture(DPA)以ResNet为主要框架,保证了特征的低冗余度,并在其基础上添加了一个非常小的DenseNet分支,用于生成新的特征。DPA的结构可以使用
二、实现DPN¶
In [8]:
# PyTorch搭建DPN
import torch
import torch.nn as nn
import torch.nn.functional as F
from collections import OrderedDict
__all__ = ['DPN', 'dpn92', 'dpn98', 'dpn131', 'dpn107', 'dpns']
def dpn92(num_classes=1000):
return DPN(num_init_features=64, k_R=96, G=32, k_sec=(3,4,20,3), inc_sec=(16,32,24,128), num_classes=num_classes)
def dpn98(num_classes=1000):
return DPN(num_init_features=96, k_R=160, G=40, k_sec=(3,6,20,3), inc_sec=(16,32,32,128), num_classes=num_classes)
def dpn131(num_classes=1000):
return DPN(num_init_features=128, k_R=160, G=40, k_sec=(4,8,28,3), inc_sec=(16,32,32,128), num_classes=num_classes)
def dpn107(num_classes=1000):
return DPN(num_init_features=128, k_R=200, G=50, k_sec=(4,8,20,3), inc_sec=(20,64,64,128), num_classes=num_classes)
dpns = {
'dpn92': dpn92,
'dpn98': dpn98,
'dpn107': dpn107,
'dpn131': dpn131,
}
class DualPathBlock(nn.Module):
def __init__(self, in_chs, num_1x1_a, num_3x3_b, num_1x1_c, inc, G, _type='normal'):
super(DualPathBlock, self).__init__()
self.num_1x1_c = num_1x1_c
if _type == 'proj':
key_stride = 1
self.has_proj = True
if _type == 'down':
key_stride = 2
self.has_proj = True
if _type == 'normal':
key_stride = 1
self.has_proj = False
if self.has_proj:
self.c1x1_w = self.BN_ReLU_Conv(in_chs=in_chs, out_chs=num_1x1_c+2*inc, kernel_size=1, stride=key_stride)
self.layers = nn.Sequential(OrderedDict([
('c1x1_a', self.BN_ReLU_Conv(in_chs=in_chs, out_chs=num_1x1_a, kernel_size=1, stride=1)),
('c3x3_b', self.BN_ReLU_Conv(in_chs=num_1x1_a, out_chs=num_3x3_b, kernel_size=3, stride=key_stride, padding=1, groups=G)),
('c1x1_c', self.BN_ReLU_Conv(in_chs=num_3x3_b, out_chs=num_1x1_c+inc, kernel_size=1, stride=1)),
]))
def BN_ReLU_Conv(self, in_chs, out_chs, kernel_size, stride, padding=0, groups=1):
return nn.Sequential(OrderedDict([
('norm', nn.BatchNorm2d(in_chs)),
('relu', nn.ReLU(inplace=True)),
('conv', nn.Conv2d(in_chs, out_chs, kernel_size, stride, padding, groups=groups, bias=False)),
]))
def forward(self, x):
data_in = torch.cat(x, dim=1) if isinstance(x, list) else x
if self.has_proj:
data_o = self.c1x1_w(data_in)
data_o1 = data_o[:,:self.num_1x1_c,:,:]
data_o2 = data_o[:,self.num_1x1_c:,:,:]
else:
data_o1 = x[0]
data_o2 = x[1]
out = self.layers(data_in)
summ = data_o1 + out[:,:self.num_1x1_c,:,:]
dense = torch.cat([data_o2, out[:,self.num_1x1_c:,:,:]], dim=1)
return [summ, dense]
class DPN(nn.Module):
def __init__(self, num_init_features=64, k_R=96, G=32,
k_sec=(3, 4, 20, 3), inc_sec=(16,32,24,128), num_classes=1000):
super(DPN, self).__init__()
blocks = OrderedDict()
# conv1
blocks['conv1'] = nn.Sequential(
nn.Conv2d(3, num_init_features, kernel_size=7, stride=2, padding=3, bias=False),
nn.BatchNorm2d(num_init_features),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
)
# conv2
bw = 256
inc = inc_sec[0]
R = int((k_R*bw)/256)
blocks['conv2_1'] = DualPathBlock(num_init_features, R, R, bw, inc, G, 'proj')
in_chs = bw + 3 * inc
for i in range(2, k_sec[0]+1):
blocks['conv2_{}'.format(i)] = DualPathBlock(in_chs, R, R, bw, inc, G, 'normal')
in_chs += inc
# conv3
bw = 512
inc = inc_sec[1]
R = int((k_R*bw)/256)
blocks['conv3_1'] = DualPathBlock(in_chs, R, R, bw, inc, G, 'down')
in_chs = bw + 3 * inc
for i in range(2, k_sec[1]+1):
blocks['conv3_{}'.format(i)] = DualPathBlock(in_chs, R, R, bw, inc, G, 'normal')
in_chs += inc
# conv4
bw = 1024
inc = inc_sec[2]
R = int((k_R*bw)/256)
blocks['conv4_1'] = DualPathBlock(in_chs, R, R, bw, inc, G, 'down')
in_chs = bw + 3 * inc
for i in range(2, k_sec[2]+1):
blocks['conv4_{}'.format(i)] = DualPathBlock(in_chs, R, R, bw, inc, G, 'normal')
in_chs += inc
# conv5
bw = 2048
inc = inc_sec[3]
R = int((k_R*bw)/256)
blocks['conv5_1'] = DualPathBlock(in_chs, R, R, bw, inc, G, 'down')
in_chs = bw + 3 * inc
for i in range(2, k_sec[3]+1):
blocks['conv5_{}'.format(i)] = DualPathBlock(in_chs, R, R, bw, inc, G, 'normal')
in_chs += inc
self.features = nn.Sequential(blocks)
self.classifier = nn.Linear(in_chs, num_classes)
def forward(self, x):
features = torch.cat(self.features(x), dim=1)
out = F.avg_pool2d(features, kernel_size=7).view(features.size(0), -1)
out = self.classifier(out)
return out
In [9]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using {} device".format(device))
Using cuda device
In [10]:
model = DPN().to(device)
model
Out[10]:
DPN(
(features): Sequential(
(conv1): Sequential(
(0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
(1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
(3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
)
(conv2_1): DualPathBlock(
(c1x1_w): Sequential(
(norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(64, 288, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(layers): Sequential(
(c1x1_a): Sequential(
(norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(64, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(c3x3_b): Sequential(
(norm): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
)
(c1x1_c): Sequential(
(norm): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(96, 272, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
)
)
(conv2_2): DualPathBlock(
(layers): Sequential(
(c1x1_a): Sequential(
(norm): BatchNorm2d(304, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(304, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(c3x3_b): Sequential(
(norm): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
)
(c1x1_c): Sequential(
(norm): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(96, 272, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
)
)
(conv2_3): DualPathBlock(
(layers): Sequential(
(c1x1_a): Sequential(
(norm): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(320, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(c3x3_b): Sequential(
(norm): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
)
(c1x1_c): Sequential(
(norm): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(96, 272, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
)
)
(conv3_1): DualPathBlock(
(c1x1_w): Sequential(
(norm): BatchNorm2d(336, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(336, 576, kernel_size=(1, 1), stride=(2, 2), bias=False)
)
(layers): Sequential(
(c1x1_a): Sequential(
(norm): BatchNorm2d(336, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(336, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(c3x3_b): Sequential(
(norm): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(192, 192, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=32, bias=False)
)
(c1x1_c): Sequential(
(norm): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(192, 544, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
)
)
(conv3_2): DualPathBlock(
(layers): Sequential(
(c1x1_a): Sequential(
(norm): BatchNorm2d(608, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(608, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(c3x3_b): Sequential(
(norm): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
)
(c1x1_c): Sequential(
(norm): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(192, 544, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
)
)
(conv3_3): DualPathBlock(
(layers): Sequential(
(c1x1_a): Sequential(
(norm): BatchNorm2d(640, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(640, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(c3x3_b): Sequential(
(norm): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
)
(c1x1_c): Sequential(
(norm): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(192, 544, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
)
)
(conv3_4): DualPathBlock(
(layers): Sequential(
(c1x1_a): Sequential(
(norm): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(672, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(c3x3_b): Sequential(
(norm): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
)
(c1x1_c): Sequential(
(norm): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(192, 544, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
)
)
(conv4_1): DualPathBlock(
(c1x1_w): Sequential(
(norm): BatchNorm2d(704, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(704, 1072, kernel_size=(1, 1), stride=(2, 2), bias=False)
)
(layers): Sequential(
(c1x1_a): Sequential(
(norm): BatchNorm2d(704, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(704, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(c3x3_b): Sequential(
(norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(384, 384, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=32, bias=False)
)
(c1x1_c): Sequential(
(norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(384, 1048, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
)
)
(conv4_2): DualPathBlock(
(layers): Sequential(
(c1x1_a): Sequential(
(norm): BatchNorm2d(1096, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(1096, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(c3x3_b): Sequential(
(norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
)
(c1x1_c): Sequential(
(norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(384, 1048, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
)
)
(conv4_3): DualPathBlock(
(layers): Sequential(
(c1x1_a): Sequential(
(norm): BatchNorm2d(1120, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(1120, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(c3x3_b): Sequential(
(norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
)
(c1x1_c): Sequential(
(norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(384, 1048, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
)
)
(conv4_4): DualPathBlock(
(layers): Sequential(
(c1x1_a): Sequential(
(norm): BatchNorm2d(1144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(1144, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(c3x3_b): Sequential(
(norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
)
(c1x1_c): Sequential(
(norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(384, 1048, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
)
)
(conv4_5): DualPathBlock(
(layers): Sequential(
(c1x1_a): Sequential(
(norm): BatchNorm2d(1168, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(1168, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(c3x3_b): Sequential(
(norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
)
(c1x1_c): Sequential(
(norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(384, 1048, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
)
)
(conv4_6): DualPathBlock(
(layers): Sequential(
(c1x1_a): Sequential(
(norm): BatchNorm2d(1192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(1192, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(c3x3_b): Sequential(
(norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
)
(c1x1_c): Sequential(
(norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(384, 1048, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
)
)
(conv4_7): DualPathBlock(
(layers): Sequential(
(c1x1_a): Sequential(
(norm): BatchNorm2d(1216, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(1216, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(c3x3_b): Sequential(
(norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
)
(c1x1_c): Sequential(
(norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(384, 1048, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
)
)
(conv4_8): DualPathBlock(
(layers): Sequential(
(c1x1_a): Sequential(
(norm): BatchNorm2d(1240, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(1240, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(c3x3_b): Sequential(
(norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
)
(c1x1_c): Sequential(
(norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(384, 1048, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
)
)
(conv4_9): DualPathBlock(
(layers): Sequential(
(c1x1_a): Sequential(
(norm): BatchNorm2d(1264, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(1264, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(c3x3_b): Sequential(
(norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
)
(c1x1_c): Sequential(
(norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(384, 1048, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
)
)
(conv4_10): DualPathBlock(
(layers): Sequential(
(c1x1_a): Sequential(
(norm): BatchNorm2d(1288, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(1288, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(c3x3_b): Sequential(
(norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
)
(c1x1_c): Sequential(
(norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(384, 1048, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
)
)
(conv4_11): DualPathBlock(
(layers): Sequential(
(c1x1_a): Sequential(
(norm): BatchNorm2d(1312, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(1312, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(c3x3_b): Sequential(
(norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
)
(c1x1_c): Sequential(
(norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(384, 1048, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
)
)
(conv4_12): DualPathBlock(
(layers): Sequential(
(c1x1_a): Sequential(
(norm): BatchNorm2d(1336, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(1336, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(c3x3_b): Sequential(
(norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
)
(c1x1_c): Sequential(
(norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(384, 1048, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
)
)
(conv4_13): DualPathBlock(
(layers): Sequential(
(c1x1_a): Sequential(
(norm): BatchNorm2d(1360, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(1360, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(c3x3_b): Sequential(
(norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
)
(c1x1_c): Sequential(
(norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(384, 1048, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
)
)
(conv4_14): DualPathBlock(
(layers): Sequential(
(c1x1_a): Sequential(
(norm): BatchNorm2d(1384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(1384, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(c3x3_b): Sequential(
(norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
)
(c1x1_c): Sequential(
(norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(384, 1048, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
)
)
(conv4_15): DualPathBlock(
(layers): Sequential(
(c1x1_a): Sequential(
(norm): BatchNorm2d(1408, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(1408, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(c3x3_b): Sequential(
(norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
)
(c1x1_c): Sequential(
(norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(384, 1048, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
)
)
(conv4_16): DualPathBlock(
(layers): Sequential(
(c1x1_a): Sequential(
(norm): BatchNorm2d(1432, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(1432, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(c3x3_b): Sequential(
(norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
)
(c1x1_c): Sequential(
(norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(384, 1048, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
)
)
(conv4_17): DualPathBlock(
(layers): Sequential(
(c1x1_a): Sequential(
(norm): BatchNorm2d(1456, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(1456, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(c3x3_b): Sequential(
(norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
)
(c1x1_c): Sequential(
(norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(384, 1048, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
)
)
(conv4_18): DualPathBlock(
(layers): Sequential(
(c1x1_a): Sequential(
(norm): BatchNorm2d(1480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(1480, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(c3x3_b): Sequential(
(norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
)
(c1x1_c): Sequential(
(norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(384, 1048, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
)
)
(conv4_19): DualPathBlock(
(layers): Sequential(
(c1x1_a): Sequential(
(norm): BatchNorm2d(1504, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(1504, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(c3x3_b): Sequential(
(norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
)
(c1x1_c): Sequential(
(norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(384, 1048, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
)
)
(conv4_20): DualPathBlock(
(layers): Sequential(
(c1x1_a): Sequential(
(norm): BatchNorm2d(1528, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(1528, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(c3x3_b): Sequential(
(norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
)
(c1x1_c): Sequential(
(norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(384, 1048, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
)
)
(conv5_1): DualPathBlock(
(c1x1_w): Sequential(
(norm): BatchNorm2d(1552, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(1552, 2304, kernel_size=(1, 1), stride=(2, 2), bias=False)
)
(layers): Sequential(
(c1x1_a): Sequential(
(norm): BatchNorm2d(1552, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(1552, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(c3x3_b): Sequential(
(norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(768, 768, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=32, bias=False)
)
(c1x1_c): Sequential(
(norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(768, 2176, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
)
)
(conv5_2): DualPathBlock(
(layers): Sequential(
(c1x1_a): Sequential(
(norm): BatchNorm2d(2432, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(2432, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(c3x3_b): Sequential(
(norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
)
(c1x1_c): Sequential(
(norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(768, 2176, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
)
)
(conv5_3): DualPathBlock(
(layers): Sequential(
(c1x1_a): Sequential(
(norm): BatchNorm2d(2560, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(2560, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
(c3x3_b): Sequential(
(norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
)
(c1x1_c): Sequential(
(norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv): Conv2d(768, 2176, kernel_size=(1, 1), stride=(1, 1), bias=False)
)
)
)
)
(classifier): Linear(in_features=2688, out_features=1000, bias=True)
)
三、训练结果¶
In [ ]:
四、总结¶
DPN的结构并没有太复杂,通过Inception式的结构合并两种或者网络本质上是一种模型集成的方式