目标检测——SSD模型搭建
SSD采用改造的VGG16加额外4个特征层,实现总体的特征提取,生成不同大小的feature_map
改造FC6 和FC7,采用卷积而不是全连接,生成19*19 的feature_map, 额外增加4个卷积层,生成10*10, 5*5, 3*3, 1*1 的feature_map,
其中conv4_3, FC7, conv6_2, conv7_2, conv8_2, conv9_2作为特征提取层,用于目标框的回归和类别的识别。
def SSD300(input_shape, num_classes=21): # 300,300,3 input_tensor = Input(shape=input_shape) img_size = (input_shape[1], input_shape[0]) # SSD结构,net字典 net = VGG16(input_tensor) #-----------------------将提取到的主干特征进行处理---------------------------# # 对conv4_3进行处理 38,38,512 net['conv4_3_norm'] = Normalize(20, name='conv4_3_norm')(net['conv4_3']) num_priors = 4 # 预测框的处理 # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整 net['conv4_3_norm_mbox_loc'] = Conv2D(num_priors * 4, kernel_size=(3,3), padding='same', name='conv4_3_norm_mbox_loc')(net['conv4_3_norm']) net['conv4_3_norm_mbox_loc_flat'] = Flatten(name='conv4_3_norm_mbox_loc_flat')(net['conv4_3_norm_mbox_loc']) # num_priors表示每个网格点先验框的数量,num_classes是所分的类 net['conv4_3_norm_mbox_conf'] = Conv2D(num_priors * num_classes, kernel_size=(3,3), padding='same',name='conv4_3_norm_mbox_conf')(net['conv4_3_norm']) net['conv4_3_norm_mbox_conf_flat'] = Flatten(name='conv4_3_norm_mbox_conf_flat')(net['conv4_3_norm_mbox_conf']) priorbox = PriorBox(img_size, 30.0,max_size = 60.0, aspect_ratios=[2], variances=[0.1, 0.1, 0.2, 0.2], name='conv4_3_norm_mbox_priorbox') net['conv4_3_norm_mbox_priorbox'] = priorbox(net['conv4_3_norm']) # 对fc7层进行处理 num_priors = 6 # 预测框的处理 # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整 net['fc7_mbox_loc'] = Conv2D(num_priors * 4, kernel_size=(3,3),padding='same',name='fc7_mbox_loc')(net['fc7']) net['fc7_mbox_loc_flat'] = Flatten(name='fc7_mbox_loc_flat')(net['fc7_mbox_loc']) # num_priors表示每个网格点先验框的数量,num_classes是所分的类 net['fc7_mbox_conf'] = Conv2D(num_priors * num_classes, kernel_size=(3,3),padding='same',name='fc7_mbox_conf')(net['fc7']) net['fc7_mbox_conf_flat'] = Flatten(name='fc7_mbox_conf_flat')(net['fc7_mbox_conf']) priorbox = PriorBox(img_size, 60.0, max_size=111.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='fc7_mbox_priorbox') net['fc7_mbox_priorbox'] = priorbox(net['fc7']) # 对conv6_2进行处理 num_priors = 6 # 预测框的处理 # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整 x = Conv2D(num_priors * 4, kernel_size=(3,3), padding='same',name='conv6_2_mbox_loc')(net['conv6_2']) net['conv6_2_mbox_loc'] = x net['conv6_2_mbox_loc_flat'] = Flatten(name='conv6_2_mbox_loc_flat')(net['conv6_2_mbox_loc']) # num_priors表示每个网格点先验框的数量,num_classes是所分的类 x = Conv2D(num_priors * num_classes, kernel_size=(3,3), padding='same',name='conv6_2_mbox_conf')(net['conv6_2']) net['conv6_2_mbox_conf'] = x net['conv6_2_mbox_conf_flat'] = Flatten(name='conv6_2_mbox_conf_flat')(net['conv6_2_mbox_conf']) priorbox = PriorBox(img_size, 111.0, max_size=162.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv6_2_mbox_priorbox') net['conv6_2_mbox_priorbox'] = priorbox(net['conv6_2']) # 对conv7_2进行处理 num_priors = 6 # 预测框的处理 # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整 x = Conv2D(num_priors * 4, kernel_size=(3,3), padding='same',name='conv7_2_mbox_loc')(net['conv7_2']) net['conv7_2_mbox_loc'] = x net['conv7_2_mbox_loc_flat'] = Flatten(name='conv7_2_mbox_loc_flat')(net['conv7_2_mbox_loc']) # num_priors表示每个网格点先验框的数量,num_classes是所分的类 x = Conv2D(num_priors * num_classes, kernel_size=(3,3), padding='same',name='conv7_2_mbox_conf')(net['conv7_2']) net['conv7_2_mbox_conf'] = x net['conv7_2_mbox_conf_flat'] = Flatten(name='conv7_2_mbox_conf_flat')(net['conv7_2_mbox_conf']) priorbox = PriorBox(img_size, 162.0, max_size=213.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv7_2_mbox_priorbox') net['conv7_2_mbox_priorbox'] = priorbox(net['conv7_2']) # 对conv8_2进行处理 num_priors = 4 # 预测框的处理 # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整 x = Conv2D(num_priors * 4, kernel_size=(3,3), padding='same',name='conv8_2_mbox_loc')(net['conv8_2']) net['conv8_2_mbox_loc'] = x net['conv8_2_mbox_loc_flat'] = Flatten(name='conv8_2_mbox_loc_flat')(net['conv8_2_mbox_loc']) # num_priors表示每个网格点先验框的数量,num_classes是所分的类 x = Conv2D(num_priors * num_classes, kernel_size=(3,3), padding='same',name='conv8_2_mbox_conf')(net['conv8_2']) net['conv8_2_mbox_conf'] = x net['conv8_2_mbox_conf_flat'] = Flatten(name='conv8_2_mbox_conf_flat')(net['conv8_2_mbox_conf']) priorbox = PriorBox(img_size, 213.0, max_size=264.0, aspect_ratios=[2], variances=[0.1, 0.1, 0.2, 0.2], name='conv8_2_mbox_priorbox') net['conv8_2_mbox_priorbox'] = priorbox(net['conv8_2']) # 对conv9_2进行处理 num_priors = 4 # 预测框的处理 # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整 x = Conv2D(num_priors * 4, kernel_size=(3,3), padding='same',name='conv9_2_mbox_loc')(net['conv9_2']) net['conv9_2_mbox_loc'] = x net['conv9_2_mbox_loc_flat'] = Flatten(name='conv9_2_mbox_loc_flat')(net['conv9_2_mbox_loc']) # num_priors表示每个网格点先验框的数量,num_classes是所分的类 x = Conv2D(num_priors * num_classes, kernel_size=(3,3), padding='same',name='conv9_2_mbox_conf')(net['conv9_2']) net['conv9_2_mbox_conf'] = x net['conv9_2_mbox_conf_flat'] = Flatten(name='conv9_2_mbox_conf_flat')(net['conv9_2_mbox_conf']) priorbox = PriorBox(img_size, 264.0, max_size=315.0, aspect_ratios=[2], variances=[0.1, 0.1, 0.2, 0.2], name='conv9_2_mbox_priorbox') net['conv9_2_mbox_priorbox'] = priorbox(net['conv9_2']) # 将所有结果进行堆叠 net['mbox_loc'] = Concatenate(axis=1, name='mbox_loc')([net['conv4_3_norm_mbox_loc_flat'], net['fc7_mbox_loc_flat'], net['conv6_2_mbox_loc_flat'], net['conv7_2_mbox_loc_flat'], net['conv8_2_mbox_loc_flat'], net['conv9_2_mbox_loc_flat']]) net['mbox_conf'] = Concatenate(axis=1, name='mbox_conf')([net['conv4_3_norm_mbox_conf_flat'], net['fc7_mbox_conf_flat'], net['conv6_2_mbox_conf_flat'], net['conv7_2_mbox_conf_flat'], net['conv8_2_mbox_conf_flat'], net['conv9_2_mbox_conf_flat']]) net['mbox_priorbox'] = Concatenate(axis=1, name='mbox_priorbox')([net['conv4_3_norm_mbox_priorbox'], net['fc7_mbox_priorbox'], net['conv6_2_mbox_priorbox'], net['conv7_2_mbox_priorbox'], net['conv8_2_mbox_priorbox'], net['conv9_2_mbox_priorbox']]) # 8732,4 net['mbox_loc'] = Reshape((-1, 4),name='mbox_loc_final')(net['mbox_loc']) # 8732,21 net['mbox_conf'] = Reshape((-1, num_classes),name='mbox_conf_logits')(net['mbox_conf'])
# 转化为概率信息 net['mbox_conf'] = Activation('softmax',name='mbox_conf_final')(net['mbox_conf']) net['predictions'] = Concatenate(axis=2, name='predictions')([net['mbox_loc'], net['mbox_conf'], net['mbox_priorbox']]) model = Model(net['input'], net['predictions']) return model
将各层输出的信息进行堆叠,维度为(None, 8732, 4+num_classes)
先验框信息也作为网络的输出,用于最后预测的时候根据对应的先验框信息回归
所以model 的输出维度为(None, 8732, 4+num_classes+8),最后一维的前四个代表数代表框回归偏差信息,后8个数代表先验框信息, 中间的数代表类别