fc层转conv

其实fc层可以用conv层取代

以lenet为例,修改lenet.prototxt为lenet_conv.prototxt,第一个fc变成conv的kernel_size可以通过加载模型的时候计算到。下边的kernel_size为1

name: "LeNet"
layer {
  name: "data"
  type: "Input"
  top: "data"
  input_param { shape: { dim: 64 dim: 1 dim: 28 dim: 28 } }
}
layer {
  name: "conv1"
  type: "Convolution"
  bottom: "data"
  top: "conv1"
  param {
    lr_mult: 1
  }
  param {
    lr_mult: 2
  }
  convolution_param {
    num_output: 20
    kernel_size: 5
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "pool1"
  type: "Pooling"
  bottom: "conv1"
  top: "pool1"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}
layer {
  name: "conv2"
  type: "Convolution"
  bottom: "pool1"
  top: "conv2"
  param {
    lr_mult: 1
  }
  param {
    lr_mult: 2
  }
  convolution_param {
    num_output: 50
    kernel_size: 5
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "pool2"
  type: "Pooling"
  bottom: "conv2"
  top: "pool2"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}
layer {
  name: "ip1_conv"
  type: "Convolution"
  bottom: "pool2"
  top: "ip1"
  param {
    lr_mult: 1
  }
  param {
    lr_mult: 2
  }
  convolution_param {
    num_output: 500
	stride:1
	kernel_size:4
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "relu1"
  type: "ReLU"
  bottom: "ip1"
  top: "ip1"
}
layer {
  name: "ip2_conv"
  type: "Convolution"
  bottom: "ip1"
  top: "ip2"
  param {
    lr_mult: 1
  }
  param {
    lr_mult: 2
  }
  convolution_param {
    num_output: 10
	stride:1
	kernel_size:1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "prob"
  type: "Softmax"
  bottom: "ip2"
  top: "prob"
}


跑代码

#-*-coding:utf-8-*-
# --------------------------------------------------------
# convert_fc2fullconv
# Copyright (c) 2018 Tsinghua
# Licensed under  company License
# Written by Dezan Zhao
# --------------------------------------------------------

import numpy as np
import math
caffe_root = '../../python'
import sys
sys.path.insert(0, caffe_root + 'python')
import caffe

params           = ['ip1', 'ip2']
params_full_conv = ['ip1_conv', 'ip2_conv']
def convert_full_conv(model_define,model_weight,model_define_fc,model_weight_fc):
    '''
    @breif: convert fc to conv
    @param: model_define, src_prototxt
    @param: model_weight, src_model
    @param: model_define_fc, dst_prototxt
    @param: model_weight_fc, dst_model
    '''
    net = caffe.Net(model_define, model_weight, caffe.TEST)
    fc_params = {pr: (net.params[pr][0].data, net.params[pr][1].data) for pr in params}
    net_fc = caffe.Net(model_define_fc, model_weight, caffe.TEST)
    conv_params = {pr: (net_fc.params[pr][0].data, net_fc.params[pr][1].data) for pr in params_full_conv}
    for fc in params:
        print '{} weights are {} dimensional and biases are {} dimensional'.format(fc, fc_params[fc][0].shape, fc_params[fc][1].shape)
    for conv in params_full_conv:
        print '{} weights are {} dimensional and biases are {} dimensional'.format(conv, conv_params[conv][0].shape, conv_params[conv][1].shape)
    for pr, pr_conv in zip(params, params_full_conv):
       conv_params[pr_conv][0].flat = fc_params[pr][0].flat  # flat unrolls the arrays
       conv_params[pr_conv][1][...] = fc_params[pr][1]
    net_fc.save(model_weight_fc)
    print 'convert done!'
    return net_fc

if __name__ == '__main__':

    file    = 'lenet.prototxt'#原始的prototxt名称
    conv_file = 'lenet_conv.prototxt' #复制并修改origin.prototxt中的全连接层的名字为params->params_full_conv得到
    model ='lenet_iter_10000.caffemodel'#原始的caffemodel名称
    conv_model = './lenet_conv.caffemodel'#最终得到的结果

    convert_full_conv(file, model, conv_file, conv_model)

具体参考官方文档

http://nbviewer.jupyter.org/github/BVLC/caffe/blob/master/examples/net_surgery.ipynb

posted @ 2018-01-31 17:17  开往春天的拖拉机  阅读(242)  评论(0编辑  收藏  举报