fc层转conv
其实fc层可以用conv层取代
以lenet为例,修改lenet.prototxt为lenet_conv.prototxt,第一个fc变成conv的kernel_size可以通过加载模型的时候计算到。下边的kernel_size为1
name: "LeNet" layer { name: "data" type: "Input" top: "data" input_param { shape: { dim: 64 dim: 1 dim: 28 dim: 28 } } } layer { name: "conv1" type: "Convolution" bottom: "data" top: "conv1" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param { num_output: 20 kernel_size: 5 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "pool1" type: "Pooling" bottom: "conv1" top: "pool1" pooling_param { pool: MAX kernel_size: 2 stride: 2 } } layer { name: "conv2" type: "Convolution" bottom: "pool1" top: "conv2" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param { num_output: 50 kernel_size: 5 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "pool2" type: "Pooling" bottom: "conv2" top: "pool2" pooling_param { pool: MAX kernel_size: 2 stride: 2 } } layer { name: "ip1_conv" type: "Convolution" bottom: "pool2" top: "ip1" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param { num_output: 500 stride:1 kernel_size:4 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "relu1" type: "ReLU" bottom: "ip1" top: "ip1" } layer { name: "ip2_conv" type: "Convolution" bottom: "ip1" top: "ip2" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param { num_output: 10 stride:1 kernel_size:1 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "prob" type: "Softmax" bottom: "ip2" top: "prob" }
跑代码
#-*-coding:utf-8-*- # -------------------------------------------------------- # convert_fc2fullconv # Copyright (c) 2018 Tsinghua # Licensed under company License # Written by Dezan Zhao # -------------------------------------------------------- import numpy as np import math caffe_root = '../../python' import sys sys.path.insert(0, caffe_root + 'python') import caffe params = ['ip1', 'ip2'] params_full_conv = ['ip1_conv', 'ip2_conv'] def convert_full_conv(model_define,model_weight,model_define_fc,model_weight_fc): ''' @breif: convert fc to conv @param: model_define, src_prototxt @param: model_weight, src_model @param: model_define_fc, dst_prototxt @param: model_weight_fc, dst_model ''' net = caffe.Net(model_define, model_weight, caffe.TEST) fc_params = {pr: (net.params[pr][0].data, net.params[pr][1].data) for pr in params} net_fc = caffe.Net(model_define_fc, model_weight, caffe.TEST) conv_params = {pr: (net_fc.params[pr][0].data, net_fc.params[pr][1].data) for pr in params_full_conv} for fc in params: print '{} weights are {} dimensional and biases are {} dimensional'.format(fc, fc_params[fc][0].shape, fc_params[fc][1].shape) for conv in params_full_conv: print '{} weights are {} dimensional and biases are {} dimensional'.format(conv, conv_params[conv][0].shape, conv_params[conv][1].shape) for pr, pr_conv in zip(params, params_full_conv): conv_params[pr_conv][0].flat = fc_params[pr][0].flat # flat unrolls the arrays conv_params[pr_conv][1][...] = fc_params[pr][1] net_fc.save(model_weight_fc) print 'convert done!' return net_fc if __name__ == '__main__': file = 'lenet.prototxt'#原始的prototxt名称 conv_file = 'lenet_conv.prototxt' #复制并修改origin.prototxt中的全连接层的名字为params->params_full_conv得到 model ='lenet_iter_10000.caffemodel'#原始的caffemodel名称 conv_model = './lenet_conv.caffemodel'#最终得到的结果 convert_full_conv(file, model, conv_file, conv_model)
具体参考官方文档
http://nbviewer.jupyter.org/github/BVLC/caffe/blob/master/examples/net_surgery.ipynb