Caffe源码-Net类(上)
Net类简介
Net类主要处理各个Layer之间的输入输出数据和参数数据共享等的关系。由于Net类的代码较多,本次主要介绍网络初始化部分的代码。Net类在初始化的时候将各个Layer的输出blob都统一保存在变量blobs_中,利用各个层的输入输出数据在blobs_的位置,可以方便地确定层之间数据传递的关系。Net中还利用类似的方法来存储各个层之间的学习参数,管理层之间的参数共享关系。
net.cpp部分源码
template <typename Dtype>
Net<Dtype>::Net(const NetParameter& param) { //构造函数,使用NetParameter类型的变量初始化网络
Init(param);
}
//param_file: Proto类型的文本文件名
//phase: 字段,网络的一种状态(TRAIN OR TEST) //这三种设置会在StateMeetsRule()函数中详细说明
//level: 级别,网络的一种状态
//stages: 同样为网络的一种状态,phase/level/stages三种均为网络的状态,定义在NetState消息中
template <typename Dtype>
Net<Dtype>::Net(const string& param_file, Phase phase,
const int level, const vector<string>* stages) { //创建网络,并使用从param_file中读取的网络参数和三种网络状态初始化
NetParameter param;
ReadNetParamsFromTextFileOrDie(param_file, ¶m); //从ProtoTxt文件中读取网络参数(还做了些兼容旧版本的转换操作)
// Set phase, stages and level
param.mutable_state()->set_phase(phase); //设置网络状态,字段(TRAIN 或 TEST)
if (stages != NULL) {
for (int i = 0; i < stages->size(); i++) {
param.mutable_state()->add_stage((*stages)[i]); //设置网络的stage
}
}
param.mutable_state()->set_level(level); //设置网络的level
Init(param); //初始化网络
}
template <typename Dtype>
void Net<Dtype>::Init(const NetParameter& in_param) { //网络初始化
// Set phase from the state.
phase_ = in_param.state().phase(); //网络字段(TRAIN 或 TEST)
// Filter layers based on their include/exclude rules and
// the current NetState.
NetParameter filtered_param;
FilterNet(in_param, &filtered_param); //根据网络状态和layer的状态规则过滤部分layer,得到过滤后的网络参数
LOG_IF(INFO, Caffe::root_solver()) << "Initializing net from parameters: " << std::endl
<< filtered_param.DebugString();
// Create a copy of filtered_param with splits added where necessary.
NetParameter param;
InsertSplits(filtered_param, ¶m); //将filtered_param复制到param,并拆分其中一些复用的blob数据,参见insert_splits.cpp
// Basically, build all the layers and set up their connections.
name_ = param.name(); //设置网络的名称
map<string, int> blob_name_to_idx; //<输出blob的名称, blob在blobs_中的索引>,只会保存输出blob的映射关系
//AppendBottom()会从中删除已被用作输入的输出blob,而AppendTop()会将layer的输出blob都加入其中.
//最终剩下的blob都认为是网络的输出,例如loss层的输出blob
set<string> available_blobs; //当前还未被当作输入的所有输出blob
memory_used_ = 0; //内存使用大小,初始化为0
// For each layer, set up its input and output
bottom_vecs_.resize(param.layer_size()); //初始化net中各项参数的大小,layer的个数
top_vecs_.resize(param.layer_size());
bottom_id_vecs_.resize(param.layer_size());
param_id_vecs_.resize(param.layer_size());
top_id_vecs_.resize(param.layer_size());
bottom_need_backward_.resize(param.layer_size());
for (int layer_id = 0; layer_id < param.layer_size(); ++layer_id) { //网络中的各个层
// Inherit phase from net if unset.
if (!param.layer(layer_id).has_phase()) {
param.mutable_layer(layer_id)->set_phase(phase_); //当前layer未设置phase,则使用net本身的值设置
}
// Setup layer.
const LayerParameter& layer_param = param.layer(layer_id); //当前layer的参数
if (layer_param.propagate_down_size() > 0) { //设置了是否需要反向传播
CHECK_EQ(layer_param.propagate_down_size(),
layer_param.bottom_size())
<< "propagate_down param must be specified "
<< "either 0 or bottom_size times "; //检查设置的个数与layer的输入blob的个数是否相等
}
layers_.push_back(LayerRegistry<Dtype>::CreateLayer(layer_param)); //根据layer参数创建layer
layer_names_.push_back(layer_param.name()); //保存layer的名称
LOG_IF(INFO, Caffe::root_solver()) << "Creating Layer " << layer_param.name(); //主线程打印信息
bool need_backward = false; //该layer是否需要反向传播,下面会根据layer中输入blob和层的参数来决定该值
// Figure out this layer's input and output
for (int bottom_id = 0; bottom_id < layer_param.bottom_size(); ++bottom_id) { //处理层的输入blob
const int blob_id = AppendBottom(param, layer_id, bottom_id,
&available_blobs, &blob_name_to_idx); //将layer的第bottom_id个输入blob添加到param中
// If a blob needs backward, this layer should provide it.
need_backward |= blob_need_backward_[blob_id]; //如果输入blob需要反传,则所在的layer也设置成需要
}
int num_top = layer_param.top_size();
for (int top_id = 0; top_id < num_top; ++top_id) { //处理层的输出blob
AppendTop(param, layer_id, top_id, &available_blobs, &blob_name_to_idx); //将输出blob添加到net中
// Collect Input layer tops as Net inputs.
if (layer_param.type() == "Input") { //该layer为网络的数据输入层
const int blob_id = blobs_.size() - 1;
net_input_blob_indices_.push_back(blob_id); //保存该输出blob的位置
net_input_blobs_.push_back(blobs_[blob_id].get()); //保存该输出blob的指针
}
}
// If the layer specifies that AutoTopBlobs() -> true and the LayerParameter
// specified fewer than the required number (as specified by
// ExactNumTopBlobs() or MinTopBlobs()), allocate them here.
Layer<Dtype>* layer = layers_[layer_id].get();
//允许创建匿名blob.允许输出blob的个数比ExactNumTopBlobs()或者MinTopBlobs()的值小
if (layer->AutoTopBlobs()) {
const int needed_num_top =
std::max(layer->MinTopBlobs(), layer->ExactNumTopBlobs()); //需要的输出blob的个数
for (; num_top < needed_num_top; ++num_top) {
// Add "anonymous" top blobs -- do not modify available_blobs or
// blob_name_to_idx as we don't want these blobs to be usable as input
// to other layers.
//从此处来看,匿名blob不可用做其他层的输入,但会存储在blobs_中
AppendTop(param, layer_id, num_top, NULL, NULL); //添加到net中
}
}
// After this layer is connected, set it up.
layers_[layer_id]->SetUp(bottom_vecs_[layer_id], top_vecs_[layer_id]); //调用layer的SetUp函数,检查输入输出blob的个数,调整大小等
LOG_IF(INFO, Caffe::root_solver()) << "Setting up " << layer_names_[layer_id]; //打印
for (int top_id = 0; top_id < top_vecs_[layer_id].size(); ++top_id) {
if (blob_loss_weights_.size() <= top_id_vecs_[layer_id][top_id]) { //输出blob对应的blob_id比权重的个数大
blob_loss_weights_.resize(top_id_vecs_[layer_id][top_id] + 1, Dtype(0)); //权重的个数加1,初始为0
}
blob_loss_weights_[top_id_vecs_[layer_id][top_id]] = layer->loss(top_id); //根据layer中的权重设置
LOG_IF(INFO, Caffe::root_solver())
<< "Top shape: " << top_vecs_[layer_id][top_id]->shape_string(); //打印blob的形状信息
if (layer->loss(top_id)) {
LOG_IF(INFO, Caffe::root_solver()) << " with loss weight " << layer->loss(top_id); //loss权重不为0,也打印下
}
//TODO AppendTop()中同址计算的blob指针也会重复保存在top_vecs_中,那此处内存消耗的计算重复了???
memory_used_ += top_vecs_[layer_id][top_id]->count(); //累加输出blob中数据的个数
}
LOG_IF(INFO, Caffe::root_solver())
<< "Memory required for data: " << memory_used_ * sizeof(Dtype);
const int param_size = layer_param.param_size(); //layer参数中param参数的设置的个数
const int num_param_blobs = layers_[layer_id]->blobs().size(); //layer中可学习的参数blob的个数
CHECK_LE(param_size, num_param_blobs)
<< "Too many params specified for layer " << layer_param.name(); //设置的个数需小于等于参数blob的个数
ParamSpec default_param_spec; //layer中blob参数的默认配置,例如layer参数的lr_mult_=1和decay_mult_=1等
for (int param_id = 0; param_id < num_param_blobs; ++param_id) {
const ParamSpec* param_spec = (param_id < param_size) ?
&layer_param.param(param_id) : &default_param_spec; //使用设置的训练参数,或者使用默认配置
const bool param_need_backward = param_spec->lr_mult() != 0; //学习率系数不为0,则需要反向传播
need_backward |= param_need_backward; //如果layer的参数blob需要反传,则所在的layer也设置成需要
layers_[layer_id]->set_param_propagate_down(param_id,
param_need_backward); //将参数blob是否反传保存到net的layers_中
}
for (int param_id = 0; param_id < num_param_blobs; ++param_id) {
AppendParam(param, layer_id, param_id); //将layer的参数blob添加到net中
}
// Finally, set the backward flag
//设置layer是否需要反传(layer中输入blob和参数blob中任何一个需要,则该层都需要反传)
layer_need_backward_.push_back(need_backward);
if (need_backward) {
for (int top_id = 0; top_id < top_id_vecs_[layer_id].size(); ++top_id) {
//将layer中的输出blob也设置为需要反传
blob_need_backward_[top_id_vecs_[layer_id][top_id]] = true;
}
}
}
// Go through the net backwards to determine which blobs contribute to the
// loss. We can skip backward computation for blobs that don't contribute
// to the loss.
// Also checks if all bottom blobs don't need backward computation (possible
// because the skip_propagate_down param) and so we can skip backward
// computation for the entire layer
set<string> blobs_under_loss; //参与loss计算的blob的名称的集合
set<string> blobs_skip_backp; //可跳过反向传播的blob的名称的集合
for (int layer_id = layers_.size() - 1; layer_id >= 0; --layer_id) { //从后往前判断哪些layer对网络的loss计算有贡献
bool layer_contributes_loss = false; //初始设置为无用,可跳过
bool layer_skip_propagate_down = true;
for (int top_id = 0; top_id < top_vecs_[layer_id].size(); ++top_id) {
const string& blob_name = blob_names_[top_id_vecs_[layer_id][top_id]]; //第layer_id层的第top_id个输出blob的名称
if (layers_[layer_id]->loss(top_id) ||
(blobs_under_loss.find(blob_name) != blobs_under_loss.end())) { //该输出blob权重不为0,或者在参与计算loss的集合中能找到
layer_contributes_loss = true; //输出blob有用,则该layer设置为对loss计算有用
}
if (blobs_skip_backp.find(blob_name) == blobs_skip_backp.end()) { //该输出blob不在可跳过反传的集合中
layer_skip_propagate_down = false; //设置为不可跳过
}
//layer只要有任意一个输出blob参与loss计算,同时不可跳过反传步骤,则整个layer都是此状态,不必再判断其他输出blob了
if (layer_contributes_loss && !layer_skip_propagate_down)
break; //不用再判断其他输出blob
}
// If this layer can skip backward computation, also all his bottom blobs don't need backpropagation
//如果之前layer设置了需要反传,但是此处是可跳过反传的(此处可跳过,说明该层的所有输出blob均可跳过反传,均不参与loss计算)
if (layer_need_backward_[layer_id] && layer_skip_propagate_down) {
layer_need_backward_[layer_id] = false; //有限考虑此处的设置,设置为false
for (int bottom_id = 0; bottom_id < bottom_vecs_[layer_id].size();
++bottom_id) {
bottom_need_backward_[layer_id][bottom_id] = false; //所有输入blob也设置为不需要反传
}
}
//layer的输出blob对loss的计算无用,则layer设置为不需要反传(此处只设置了layer,后续才设置了输入参数不需要)
if (!layer_contributes_loss) { layer_need_backward_[layer_id] = false; }
if (Caffe::root_solver()) { //主线程打印该层是否需要反传的信息
if (layer_need_backward_[layer_id]) {
LOG(INFO) << layer_names_[layer_id] << " needs backward computation.";
} else {
LOG(INFO) << layer_names_[layer_id]
<< " does not need backward computation.";
}
}
for (int bottom_id = 0; bottom_id < bottom_vecs_[layer_id].size(); ++bottom_id) {
if (layer_contributes_loss) { //layer对loss计算有用
//把layer的所有输入blob的名称保存到blobs_under_loss中,表示与该输入blob对应的上层layer的
//输出blob也同样参与了loss的计算,保存名称方便设置这些上层layer的输出blob
const string& blob_name = blob_names_[bottom_id_vecs_[layer_id][bottom_id]];
blobs_under_loss.insert(blob_name);
} else {
bottom_need_backward_[layer_id][bottom_id] = false; //否则,设置输入blob为不需要反传
}
if (!bottom_need_backward_[layer_id][bottom_id]) { //当前输入blob不需要反传
//将名称保存在blobs_skip_backp中,表示与该输入blob对应的上层layer的输出blob可以跳过反传
const string& blob_name = blob_names_[bottom_id_vecs_[layer_id][bottom_id]];
blobs_skip_backp.insert(blob_name);
}
}
}
// Handle force_backward if needed.
//如果net参数中设置了需要强制反传,则根据layer的是否设置允许强制反传再设置一遍
if (param.force_backward()) {
for (int layer_id = 0; layer_id < layers_.size(); ++layer_id) {
layer_need_backward_[layer_id] = true; //默认需要
for (int bottom_id = 0; bottom_id < bottom_need_backward_[layer_id].size(); ++bottom_id) {
//输入blob设置了需要反传,或者该输入blob设置了允许强制反传
//参见layer.hpp,允许强制反传则优先遵从layer的强制设置,不允许的话则只考虑自身的设置
bottom_need_backward_[layer_id][bottom_id] =
bottom_need_backward_[layer_id][bottom_id] ||
layers_[layer_id]->AllowForceBackward(bottom_id);
//找到该输入blob在blobs_中的索引,并在blob_need_backward_中设置对应的值
blob_need_backward_[bottom_id_vecs_[layer_id][bottom_id]] =
blob_need_backward_[bottom_id_vecs_[layer_id][bottom_id]] ||
bottom_need_backward_[layer_id][bottom_id];
}
for (int param_id = 0; param_id < layers_[layer_id]->blobs().size(); ++param_id) {
layers_[layer_id]->set_param_propagate_down(param_id, true); //layer还是要遵守net的设置,全部置为需要反传
}
}
}
// In the end, all remaining blobs are considered output blobs.
//最后,将所有当前还未被当作输入的输出blob(保存在available_blobs中)全部认为是网络的输出
for (set<string>::iterator it = available_blobs.begin();
it != available_blobs.end(); ++it) { //available_blobs中剩余的blob
LOG_IF(INFO, Caffe::root_solver())
<< "This network produces output " << *it; //打印信息
net_output_blobs_.push_back(blobs_[blob_name_to_idx[*it]].get()); //将这些输出blob在blobs_中的指针统一保存起来
net_output_blob_indices_.push_back(blob_name_to_idx[*it]); //保存在blobs_中的索引
}
for (size_t blob_id = 0; blob_id < blob_names_.size(); ++blob_id) {
blob_names_index_[blob_names_[blob_id]] = blob_id; //关联blob名称和在blobs_中的索引,方便由名称得到位置
}
for (size_t layer_id = 0; layer_id < layer_names_.size(); ++layer_id) {
layer_names_index_[layer_names_[layer_id]] = layer_id; //关联layer的名称和在layers_的索引,方便由名称得到位置
}
ShareWeights(); //设置共享参数的数据和梯度指针
debug_info_ = param.debug_info(); //是否计算和打印调试信息
LOG_IF(INFO, Caffe::root_solver()) << "Network initialization done."; //网路初始化完成
}
//根据网络设置的NetState类型状态和各层内部设置的NetStateRule,判断网络是否需要包含该层
//过滤掉这些层之后得到新的网络参数param_filtered
template <typename Dtype>
void Net<Dtype>::FilterNet(const NetParameter& param,
NetParameter* param_filtered) {
NetState net_state(param.state()); //读取网络的状态
param_filtered->CopyFrom(param); //先从param中拷贝所有数据
param_filtered->clear_layer(); //然后清空所有layer
for (int i = 0; i < param.layer_size(); ++i) { //所有layer一个一个进行判断
const LayerParameter& layer_param = param.layer(i); //当前layer的参数(LayerParameter类型的消息)
const string& layer_name = layer_param.name(); //layer的名称
//检查,include和exclude不能同时设置,参见caffe.proto中的LayerParameter
//include和exclude均为NetStateRule类型的消息,用于设置layer的状态
CHECK(layer_param.include_size() == 0 || layer_param.exclude_size() == 0)
<< "Specify either include rules or exclude rules; not both.";
// If no include rules are specified, the layer is included by default and
// only excluded if it meets one of the exclude rules.
bool layer_included = (layer_param.include_size() == 0); //不包含included消息 //include和exclude都不设置时默认包含该层
for (int j = 0; layer_included && j < layer_param.exclude_size(); ++j) {
if (StateMeetsRule(net_state, layer_param.exclude(j), layer_name)) { //检查网络参数是否满足layer的exclude参数的要求
layer_included = false; //满足layer设置的任意一个exclude,则net运行时不能包含该层layer
}
}
for (int j = 0; !layer_included && j < layer_param.include_size(); ++j) {
if (StateMeetsRule(net_state, layer_param.include(j), layer_name)) {
layer_included = true; //满足layer设置的任意一个included,则net运行时需要包含该层layer
}
}
if (layer_included) {
param_filtered->add_layer()->CopyFrom(layer_param); //将该层的参数加入到网络参数中
}
}
}
//检查state(NetParameter中的参数)中的设置是否满足rule(LayerParameter中的参数)中的所有规则
//网络的phase必须与layer的phase相同,网络的level必须在layer的min_level和max_level之间
//网络的stage必须包含layer的所有stage字符串,不能包含not_stage中的任意一个字符串,这些都满足才说明网络的设置满足layer的要求
template <typename Dtype>
bool Net<Dtype>::StateMeetsRule(const NetState& state,
const NetStateRule& rule, const string& layer_name) {
// Check whether the rule is broken due to phase.
if (rule.has_phase()) { //rule中设置了phase,那么state中的字段必须与之相等才符合
if (rule.phase() != state.phase()) {
LOG_IF(INFO, Caffe::root_solver())
<< "The NetState phase (" << state.phase()
<< ") differed from the phase (" << rule.phase()
<< ") specified by a rule in layer " << layer_name; //不等,记录信息
return false;
}
}
// Check whether the rule is broken due to min level.
if (rule.has_min_level()) { //state中的level必须大于rule中设置的最小level
if (state.level() < rule.min_level()) {
LOG_IF(INFO, Caffe::root_solver())
<< "The NetState level (" << state.level()
<< ") is above the min_level (" << rule.min_level()
<< ") specified by a rule in layer " << layer_name; //不满足,记录信息
return false;
}
}
// Check whether the rule is broken due to max level.
if (rule.has_max_level()) { //state中的level必须小于rule中设置的最大level
if (state.level() > rule.max_level()) {
LOG_IF(INFO, Caffe::root_solver())
<< "The NetState level (" << state.level()
<< ") is above the max_level (" << rule.max_level()
<< ") specified by a rule in layer " << layer_name;
return false;
}
}
// Check whether the rule is broken due to stage. The NetState must
// contain ALL of the rule's stages to meet it.
for (int i = 0; i < rule.stage_size(); ++i) { //rule中设置了一堆stage字符串
// Check that the NetState contains the rule's ith stage.
bool has_stage = false;
for (int j = 0; !has_stage && j < state.stage_size(); ++j) {
if (rule.stage(i) == state.stage(j)) { has_stage = true; } //检查rule.stage(i)是否在state.stage(...)中
}
if (!has_stage) { //state中必须包含rule中的所有stage,否则返回false
LOG_IF(INFO, Caffe::root_solver())
<< "The NetState did not contain stage '" << rule.stage(i)
<< "' specified by a rule in layer " << layer_name;
return false;
}
}
// Check whether the rule is broken due to not_stage. The NetState must
// contain NONE of the rule's not_stages to meet it.
for (int i = 0; i < rule.not_stage_size(); ++i) { //rule中设置了一堆not_stage字符串
// Check that the NetState contains the rule's ith not_stage.
bool has_stage = false;
for (int j = 0; !has_stage && j < state.stage_size(); ++j) {
if (rule.not_stage(i) == state.stage(j)) { has_stage = true; } //检查rule.not_stage(i)是否在state.stage(...)中
}
if (has_stage) { //state中不能包含rule中的任何一个not_stage,否则返回false
LOG_IF(INFO, Caffe::root_solver())
<< "The NetState contained a not_stage '" << rule.not_stage(i)
<< "' specified by a rule in layer " << layer_name;
return false;
}
}
return true; //全部满足
}
//根据第layer_id层的第top_id个输出blob,创建一个空的blob保存到网络参数param中的blobs_中,同址计算则不创建
//并且无论是否为同址计算,将该blob数据的指针保存到top_vecs_中,将其在blobs_中的索引保存在top_id_vecs_中
// Helper for Net::Init: add a new top blob to the net.
template <typename Dtype>
void Net<Dtype>::AppendTop(const NetParameter& param, const int layer_id,
const int top_id, set<string>* available_blobs,
map<string, int>* blob_name_to_idx) {
shared_ptr<LayerParameter> layer_param(new LayerParameter(param.layer(layer_id))); //拷贝当前layer的参数
const string& blob_name = (layer_param->top_size() > top_id) ?
layer_param->top(top_id) : "(automatic)"; //输出blob的名称,或者用"(automatic)"(匿名blob)替代
// Check if we are doing in-place computation
if (blob_name_to_idx && layer_param->bottom_size() > top_id &&
blob_name == layer_param->bottom(top_id)) { //当前输出blob的名称与对应位置的输入blob的名称相同,为同址计算
// In-place computation
LOG_IF(INFO, Caffe::root_solver())
<< layer_param->name() << " -> " << blob_name << " (in-place)"; //在主线程中打印信息
//(*blob_name_to_idx)[blob_name]为该名称对应的blob数据在blobs_中的索引
//top_vecs_中会保存第layer_id层第top_id个输出blob的指针位置,top_id_vecs_保存索引
top_vecs_[layer_id].push_back(blobs_[(*blob_name_to_idx)[blob_name]].get());
top_id_vecs_[layer_id].push_back((*blob_name_to_idx)[blob_name]);
} else if (blob_name_to_idx &&
blob_name_to_idx->find(blob_name) != blob_name_to_idx->end()) {
//blob_name_to_idx只会存储输出blob的名称和位置,此处找到同名的输出blob,说明layer的输出blob的名称设置有问题
// If we are not doing in-place computation but have duplicated blobs,
// raise an error.
LOG(FATAL) << "Top blob '" << blob_name << "' produced by multiple sources.";
} else { //基操勿六
// Normal output.
if (Caffe::root_solver()) {
LOG(INFO) << layer_param->name() << " -> " << blob_name; //主线程中打印信息
}
//出现普通的输出blob,将其信息存储到Net中
shared_ptr<Blob<Dtype> > blob_pointer(new Blob<Dtype>()); //创建一个空的blob,返回其指针
const int blob_id = blobs_.size();
blobs_.push_back(blob_pointer); //blob_id为该输出blob在blobs_中的索引,将指针存入
blob_names_.push_back(blob_name); //设置名称
blob_need_backward_.push_back(false); //初始设置为不需要反传
if (blob_name_to_idx) { (*blob_name_to_idx)[blob_name] = blob_id; } //保存名称与位置的映射关系
top_id_vecs_[layer_id].push_back(blob_id); //top_id_vecs_[layer_id][top_id]为其位置索引
top_vecs_[layer_id].push_back(blob_pointer.get()); //top_vecs_[layer_id][top_id]为其blob指针
}
//该输出blob可被后续的layer当成输入,用于AppendBottom()中
if (available_blobs) { available_blobs->insert(blob_name); }
}
//将第layer_id层的第bottom_id个输入blob加入到网络参数param中的对应layer的参数中
// Helper for Net::Init: add a new bottom blob to the net.
template <typename Dtype>
int Net<Dtype>::AppendBottom(const NetParameter& param, const int layer_id,
const int bottom_id, set<string>* available_blobs,
map<string, int>* blob_name_to_idx) {
const LayerParameter& layer_param = param.layer(layer_id); //当前层的参数
const string& blob_name = layer_param.bottom(bottom_id); //输入blob的名称
//available_blobs存放着第0至layer_id-1层的layer中所有还未被当作输入的输出blob的名称,
//在这里面找不到该层的输入blob的名称,说明该输入blob在之前层中找不到对应的输出blob,报错
if (available_blobs->find(blob_name) == available_blobs->end()) {
LOG(FATAL) << "Unknown bottom blob '" << blob_name << "' (layer '"
<< layer_param.name() << "', bottom index " << bottom_id << ")";
}
const int blob_id = (*blob_name_to_idx)[blob_name]; //名称得到其在blobs_中的索引id
LOG_IF(INFO, Caffe::root_solver()) << layer_names_[layer_id] << " <- " << blob_name; //主线程中打印信息
//每个输入blob都会调用一次AppendBottom(),所以bottom_vecs_[layer_id][bottom_id]存放着当前输入blob的指针
bottom_vecs_[layer_id].push_back(blobs_[blob_id].get()); //将其在blobs_中的数据指针保存在bottom_vecs_中
bottom_id_vecs_[layer_id].push_back(blob_id); //bottom_id_vecs_[layer_id][bottom_id]保存该输入blob在blobs_中的索引
available_blobs->erase(blob_name); //blob_name对应的输出blob已被当作输入blob,在available_blobs中删除
bool need_backward = blob_need_backward_[blob_id]; //blobs_[blob_id]是否需要反向传播
// Check if the backpropagation on bottom_id should be skipped
if (layer_param.propagate_down_size() > 0) { //该层设置了是否需要反向传播参数
need_backward = layer_param.propagate_down(bottom_id); //读取设置中第bottom_id个的值
}
//bottom_need_backward_[layer_id][bottom_id]保存该输入blob是否需要反向传播
bottom_need_backward_[layer_id].push_back(need_backward);
return blob_id; //返回该输入blob在blobs_中的位置
}
//将第layer_id层的第param_id个参数数据保存到网络中,并保存一些用于索引的值
//对于非共享的参数,还会将数据指针保存在learnable_params_中
//learnable_param_ids_保存第第layer_id层的第param_id个参数在learnable_params_的索引,共享参数保存其来源参数的索引
template <typename Dtype>
void Net<Dtype>::AppendParam(const NetParameter& param, const int layer_id,
const int param_id) {
const LayerParameter& layer_param = layers_[layer_id]->layer_param(); //layer的参数
const int param_size = layer_param.param_size(); //layer参数中param参数的设置的个数
//按照caffe.proto文件中ParamSpec消息的说明,当需要在layer之间共享参数时,可以param_name会是一个非空的名称
string param_name =
(param_size > param_id) ? layer_param.param(param_id).name() : ""; //参数blob的名称或者默认的""
if (param_name.size()) {
param_display_names_.push_back(param_name); //非空,保存其名称,方便其他layer查询
} else {
ostringstream param_display_name;
param_display_name << param_id;
param_display_names_.push_back(param_display_name.str()); //使用param_id作为其名称
}
const int net_param_id = params_.size(); //net_param_id为当前参数在net的params_中的索引
params_.push_back(layers_[layer_id]->blobs()[param_id]); //将第layer_id层的第param_id个参数数据保存在params_中
param_id_vecs_[layer_id].push_back(net_param_id); //param_id_vecs_[layer_id][param_id]为在params_中的索引net_param_id
//param_layer_indices_[net_param_id]为参数在网络中的位置(layer_id, param_id)
param_layer_indices_.push_back(make_pair(layer_id, param_id));
ParamSpec default_param_spec; //layer中blob参数的默认训练参数,lr_mult_=1和decay_mult_=1等
const ParamSpec* param_spec = (layer_param.param_size() > param_id) ?
&layer_param.param(param_id) : &default_param_spec; //使用设置的或者默认配置
if (!param_size || !param_name.size() || (param_name.size() &&
param_names_index_.find(param_name) == param_names_index_.end())) {
//layer中没有配置param参数,或者param参数中没有设置名称,或者设置了名称但是还不在param_names_index_中
//(设置了名称但还不在param_names_index_中,说明该参数首次出现,是源参数,但是需要共享给其他的layer)
// This layer "owns" this parameter blob -- it is either anonymous
// (i.e., not given a param_name) or explicitly given a name that we
// haven't already seen.
param_owners_.push_back(-1); //参数来自自身的layer,源参数,设置为-1
if (param_name.size()) { //param中设置了名称,说明需要共享给其他layer的某个参数
param_names_index_[param_name] = net_param_id; //将参数名称param_name与在params_的位置net_param_id关联起来
}
const int learnable_param_id = learnable_params_.size(); //当前参数在learnable_params_中的索引
learnable_params_.push_back(params_[net_param_id].get()); //保存参数指针
learnable_param_ids_.push_back(learnable_param_id); //保存索引
has_params_lr_.push_back(param_spec->has_lr_mult()); //保存参数是否设置了对应的学习率系数
has_params_decay_.push_back(param_spec->has_decay_mult()); //保存参数是否设置了对应的衰减率系数
params_lr_.push_back(param_spec->lr_mult()); //保存参数的学习率系数
params_weight_decay_.push_back(param_spec->decay_mult()); //保存参数的衰减率系数
} else {
//说明该参数来源于其他的layer参数,为共享参数
//共享参数配置的param中的name必须与源参数的param中的name相同
// Named param blob with name we've seen before: share params
const int owner_net_param_id = param_names_index_[param_name]; //先找到源参数在net的params_的索引
param_owners_.push_back(owner_net_param_id); //保存源参数的索引
const pair<int, int>& owner_index =
param_layer_indices_[owner_net_param_id]; //得到源参数在网络中的位置(layer_id, param_id)
const int owner_layer_id = owner_index.first; //源参数所在的层
const int owner_param_id = owner_index.second; //源参数所在的位置
LOG_IF(INFO, Caffe::root_solver()) << "Sharing parameters '" << param_name
<< "' owned by "
<< "layer '" << layer_names_[owner_layer_id] << "', param "
<< "index " << owner_param_id; //打印信息
Blob<Dtype>* this_blob = layers_[layer_id]->blobs()[param_id].get(); //当前参数的blob指针,第layer_id层第param_id个
Blob<Dtype>* owner_blob =
layers_[owner_layer_id]->blobs()[owner_param_id].get(); //源参数在net中的blob指针,第owner_layer_id层第owner_param_id个
const int param_size = layer_param.param_size();
if (param_size > param_id && (layer_param.param(param_id).share_mode() ==
ParamSpec_DimCheckMode_PERMISSIVE)) {
//检查参数共享的模式,PERMISSIVE模式下只要求两个blob的总数据个数相同
// Permissive dimension checking -- only check counts are the same.
CHECK_EQ(this_blob->count(), owner_blob->count())
<< "Cannot share param '" << param_name << "' owned by layer '"
<< layer_names_[owner_layer_id] << "' with layer '"
<< layer_names_[layer_id] << "'; count mismatch. Owner layer param "
<< "shape is " << owner_blob->shape_string() << "; sharing layer "
<< "shape is " << this_blob->shape_string(); //检查数据的总个数是否相等
} else {
//STRICT模式下要求两个参数blob的数据的各个维度值都相等
// Strict dimension checking -- all dims must be the same.
CHECK(this_blob->shape() == owner_blob->shape())
<< "Cannot share param '" << param_name << "' owned by layer '"
<< layer_names_[owner_layer_id] << "' with layer '"
<< layer_names_[layer_id] << "'; shape mismatch. Owner layer param "
<< "shape is " << owner_blob->shape_string() << "; sharing layer "
<< "expects shape " << this_blob->shape_string(); //检查形状是否相同
}
//owner_net_param_id虽然是源参数在params_的索引,但是每次调用AppendParam()时,params_与learnable_param_ids_
//都会存入一个值,他们的大小一致,是逐个对应的(注意learnable_params_与params_不是),所以他们之间的索引可以通用
const int learnable_param_id = learnable_param_ids_[owner_net_param_id]; //得到参数在learnable_params_中的索引
learnable_param_ids_.push_back(learnable_param_id); //将源参数的索引当成当前的共享参数的索引,保存起来
if (param_spec->has_lr_mult()) { //当前的共享参数配置的param中有设置学习率系数
if (has_params_lr_[learnable_param_id]) { //源参数也配置了学习率系数
CHECK_EQ(param_spec->lr_mult(), params_lr_[learnable_param_id])
<< "Shared param '" << param_name << "' has mismatched lr_mult."; //要求两者相等,不然参数共享时更新的步长不一致
} else {
has_params_lr_[learnable_param_id] = true; //源参数未设置,则将共享参数的设置保存到源参数的设置中
params_lr_[learnable_param_id] = param_spec->lr_mult();
}
}
if (param_spec->has_decay_mult()) { //权重衰减系数同样处理
if (has_params_decay_[learnable_param_id]) {
CHECK_EQ(param_spec->decay_mult(),
params_weight_decay_[learnable_param_id])
<< "Shared param '" << param_name << "' has mismatched decay_mult."; //要求相等
} else {
has_params_decay_[learnable_param_id] = true;
params_weight_decay_[learnable_param_id] = param_spec->decay_mult(); //配置之前未设置的源参数
}
}
}
}
net.hpp源码
/// @brief The network name
string name_; //网络的名称
/// @brief The phase: TRAIN or TEST
Phase phase_; //网络的字段状态(TRAIN or TEST)
/// @brief Individual layers in the net
vector<shared_ptr<Layer<Dtype> > > layers_; //网络的各个层,layers_[layer_id]
vector<string> layer_names_; //网络的各个层的名称
map<string, int> layer_names_index_; //<layer的名称layer_name, layer在layers_中的索引layer_id>
vector<bool> layer_need_backward_; //网络的各个层是否需要反向传播
/// @brief the blobs storing intermediate results between the layer.
vector<shared_ptr<Blob<Dtype> > > blobs_; //存储网络的所有输出blob,blobs_[blob_id]
vector<string> blob_names_; //blob_names_[blob_id]为blobs_[blob_id]存储的blob数据的名称
map<string, int> blob_names_index_; //<blob数据的名称blob_name, blob数据在blobs_中的索引blob_id>
vector<bool> blob_need_backward_; //blobs_中的各个数据是否需要反向传播
/// bottom_vecs stores the vectors containing the input for each layer.
/// They don't actually host the blobs (blobs_ does), so we simply store
/// pointers.
vector<vector<Blob<Dtype>*> > bottom_vecs_; //bottom_vecs_[i][j]为第i层第j个输入blob数据的指针,数据在blobs_中
vector<vector<int> > bottom_id_vecs_; //bottom_id_vecs_[i][j]为第i层第j个输入blob在blobs_中的索引blob_id
vector<vector<bool> > bottom_need_backward_; //bottom_need_backward_[i][j]为第i层第j个输入blob是否需要反传
/// top_vecs stores the vectors containing the output for each layer
vector<vector<Blob<Dtype>*> > top_vecs_; //top_vecs_[i][j]为第i层第j个输出blob数据的指针,数据在blobs_中
vector<vector<int> > top_id_vecs_; //top_id_vecs_[i][j]为第i层第j个输出blob在blobs_中的索引blob_id
/// Vector of weight in the loss (or objective) function of each net blob, indexed by blob_id.
vector<Dtype> blob_loss_weights_; //blobs_[blob_id]对应的loss权重
vector<vector<int> > param_id_vecs_; //param_id_vecs_[i][j]为第i层第j个参数blob在params_中的位置
//param_owners_[i]为参数params_[i]的拥有者的id,源参数(参数来源于所在的layer)的值为-1,而共享参数的值为源参数在params_的索引
vector<int> param_owners_;
//param_display_names_[i]为参数params_[i]显示的名称,需共享的参数会设置非一个非空的值,未设置则用参数的id替代
//由于AppendParam()中参数名称未设置时会用param_id替代,所以该变量中可能会存在重复的名称.因此该变量不可用于寻找或匹配与共享参数同名的源参数的位置
vector<string> param_display_names_;
vector<pair<int, int> > param_layer_indices_; //param_layer_indices_[i]为参数params_[i]在网络中的位置(layer_id, param_id)
//<参数名称, 参数在params_的索引>,非空名称的参数只有在首次出现时才会保存在该变量中,后续出现都作为共享参数,所以该变量可用于寻找和匹配共享参数的源参数的位置
map<string, int> param_names_index_;
/// blob indices for the input and the output of the net
vector<int> net_input_blob_indices_; //net_input_blob_indices_[i]为网络的数据输入blob在blobs_中的索引
vector<int> net_output_blob_indices_; //net_output_blob_indices_[i]为网络的数据输出blob在blobs_中的索引
vector<Blob<Dtype>*> net_input_blobs_; //网络的数据输入blob的指针
vector<Blob<Dtype>*> net_output_blobs_; //网络的数据输出blob的指针(所有未被当作其他层输入的输出blob)
/// The parameters in the network.
vector<shared_ptr<Blob<Dtype> > > params_; //网络中的所有参数blob(包括源参数和共享参数)
vector<Blob<Dtype>*> learnable_params_; //网络中的所有学习参数,只会保存源参数
/**
* The mapping from params_ -> learnable_params_: we have
* learnable_param_ids_.size() == params_.size(),
* and learnable_params_[learnable_param_ids_[i]] == params_[i].get()
* if and only if params_[i] is an "owner"; otherwise, params_[i] is a sharer
* and learnable_params_[learnable_param_ids_[i]] gives its owner.
*/
vector<int> learnable_param_ids_; //learnable_param_ids_[i]为参数params_[i]在learnable_params_中的索引
/// the learning rate multipliers for learnable_params_
vector<float> params_lr_; //params_lr_[i]为参数learnable_params_[i]的学习率系数
vector<bool> has_params_lr_; //has_params_lr_[i]为参数learnable_params_[i]是否设置了学习率系数
/// the weight decay multipliers for learnable_params_
vector<float> params_weight_decay_; //params_weight_decay_[i]为参数learnable_params_[i]的权重衰减系数
vector<bool> has_params_decay_; //has_params_decay_[i]为参数learnable_params_[i]是否设置了权重衰减系数
/// The bytes of memory used by this net
size_t memory_used_; //网络中所有输出blob的大小之和
/// Whether to compute and display debug info for the net.
bool debug_info_; //网络是否允许计算并打印调试信息,允许的话则会打印参与前向计算的layer的输出blob和参数blob的数据的绝对值之和的均值
// Callbacks
vector<Callback*> before_forward_; //网络执行前向计算之前的回调函数
vector<Callback*> after_forward_; //网络执行前向计算之后的回调函数
vector<Callback*> before_backward_; //网络执行反向传播之前的回调函数
vector<Callback*> after_backward_; //网络执行反向传播之后的回调函数
小结
- 初始化函数中的
available_blobs
,是指目前还未被当成输入blob使用的输出blob。在经过InsertSplits()操作后,除去部分被用于计算网络loss的输出blob外,剩余的输出blob与输入blob是一一对应的。所以可使用该变量快速确定某层的输入blob是否有效,以及网络的所有输出blob。 - 网络的所有输出blob都会保存在
blobs_
中,网络各层之间的数据传递依赖此变量进行。但初始化的时候只会创建一个空指针,实际训练时才会将数据填充进去。 - 网络的各Layer是否需要反传,首先是根据输入数据和参数数据的设置来判断该层是否需要反传,然后是根据该层的输出数据是否参与了网络loss的计算来判断,最后是根据是否设置了强制需要反传参数来确定,越往后的判断的优先级越高。
Caffe的源码笔者是第一次阅读,一边阅读一边记录,对代码的理解和分析可能会存在错误或遗漏,希望各位读者批评指正,谢谢支持!