caffe——全连接层inner_product_layer
在caffe中,全连接层叫做"inner_product_layer",区别于tensorflow中的fullyconnected_layer。
1、prototxt中的定义
layer {
bottom: "fc7"
top: "fc8"
name: "fc8"
type: "InnerProduct"
param { # 权重学习参数
lr_mult: 10 # 学习率
decay_mult: 1
}
param { # bias 学习参数
lr_mult: 20 # 一般情况,bias 学习率是权重学习率的两倍.
decay_mult: 0
}
inner_product_param {
num_output: 1000 # 输出单元个数
weight_filler { # 权重初始化方法
type: "gaussian"
std: 0.005
}
bias_filler { # bias 初始化方法
type: "constant"
value: 0.1
}
}
}
2、caffe.proto中的定义
message LayerParameter {
optional InnerProductParameter inner_product_param = 117;
}
message InnerProductParameter {
optional uint32 num_output = 1; // 网络层输出个数
optional bool bias_term = 2 [default = true]; // 是否有 bias 项
optional FillerParameter weight_filler = 3; // 权重weight filler
optional FillerParameter bias_filler = 4; // 偏置bias filler
// 在第一个 axis 进行单个内积计算.
// -1 表示最后一个 axis
optional int32 axis = 5 [default = 1];
//权重矩阵是否进行转置
optional bool transpose = 6 [default = false];
}
3、inner_product_layer.hpp
#ifndef CAFFE_INNER_PRODUCT_LAYER_HPP_
#define CAFFE_INNER_PRODUCT_LAYER_HPP_
#include <vector>
#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"
namespace caffe {
/** * @brief Also known as a "fully-connected" layer, computes an inner product * with a set of learned weights, and (optionally) adds biases. * * TODO(dox): thorough documentation for Forward, Backward, and proto params. */
template <typename Dtype>
class InnerProductLayer : public Layer<Dtype> {
public:
explicit InnerProductLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual inline const char* type() const { return "InnerProduct"; }
virtual inline int ExactNumBottomBlobs() const { return 1; }
virtual inline int ExactNumTopBlobs() const { return 1; }
protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const