Caffe源码-Net类(上)

Net类简介

Net类主要处理各个Layer之间的输入输出数据和参数数据共享等的关系。由于Net类的代码较多,本次主要介绍网络初始化部分的代码。Net类在初始化的时候将各个Layer的输出blob都统一保存在变量blobs_中,利用各个层的输入输出数据在blobs_的位置,可以方便地确定层之间数据传递的关系。Net中还利用类似的方法来存储各个层之间的学习参数,管理层之间的参数共享关系。

net.cpp部分源码

template <typename Dtype>
Net<Dtype>::Net(const NetParameter& param) {    //构造函数,使用NetParameter类型的变量初始化网络
  Init(param);
}

//param_file: Proto类型的文本文件名
//phase: 字段,网络的一种状态(TRAIN OR TEST)       //这三种设置会在StateMeetsRule()函数中详细说明
//level: 级别,网络的一种状态
//stages: 同样为网络的一种状态,phase/level/stages三种均为网络的状态,定义在NetState消息中
template <typename Dtype>
Net<Dtype>::Net(const string& param_file, Phase phase,
    const int level, const vector<string>* stages) {    //创建网络,并使用从param_file中读取的网络参数和三种网络状态初始化
  NetParameter param;
  ReadNetParamsFromTextFileOrDie(param_file, &param);   //从ProtoTxt文件中读取网络参数(还做了些兼容旧版本的转换操作)
  // Set phase, stages and level
  param.mutable_state()->set_phase(phase);    //设置网络状态,字段(TRAIN 或 TEST)
  if (stages != NULL) {
    for (int i = 0; i < stages->size(); i++) {
      param.mutable_state()->add_stage((*stages)[i]);   //设置网络的stage
    }
  }
  param.mutable_state()->set_level(level);    //设置网络的level
  Init(param);      //初始化网络
}

template <typename Dtype>
void Net<Dtype>::Init(const NetParameter& in_param) {   //网络初始化
  // Set phase from the state.
  phase_ = in_param.state().phase();    //网络字段(TRAIN 或 TEST)
  // Filter layers based on their include/exclude rules and
  // the current NetState.
  NetParameter filtered_param;
  FilterNet(in_param, &filtered_param);   //根据网络状态和layer的状态规则过滤部分layer,得到过滤后的网络参数
  LOG_IF(INFO, Caffe::root_solver()) << "Initializing net from parameters: " << std::endl
      << filtered_param.DebugString();
  // Create a copy of filtered_param with splits added where necessary.
  NetParameter param;
  InsertSplits(filtered_param, &param);   //将filtered_param复制到param,并拆分其中一些复用的blob数据,参见insert_splits.cpp
  // Basically, build all the layers and set up their connections.
  name_ = param.name();                   //设置网络的名称
  map<string, int> blob_name_to_idx;    //<输出blob的名称, blob在blobs_中的索引>,只会保存输出blob的映射关系
  
  //AppendBottom()会从中删除已被用作输入的输出blob,而AppendTop()会将layer的输出blob都加入其中.
  //最终剩下的blob都认为是网络的输出,例如loss层的输出blob
  set<string> available_blobs;    //当前还未被当作输入的所有输出blob
  
  memory_used_ = 0;   //内存使用大小,初始化为0
  // For each layer, set up its input and output
  bottom_vecs_.resize(param.layer_size());    //初始化net中各项参数的大小,layer的个数
  top_vecs_.resize(param.layer_size());
  bottom_id_vecs_.resize(param.layer_size());
  param_id_vecs_.resize(param.layer_size());
  top_id_vecs_.resize(param.layer_size());
  bottom_need_backward_.resize(param.layer_size());

  for (int layer_id = 0; layer_id < param.layer_size(); ++layer_id) {   //网络中的各个层
    // Inherit phase from net if unset.
    if (!param.layer(layer_id).has_phase()) {
      param.mutable_layer(layer_id)->set_phase(phase_);   //当前layer未设置phase,则使用net本身的值设置
    }
    // Setup layer.
    const LayerParameter& layer_param = param.layer(layer_id);    //当前layer的参数
    if (layer_param.propagate_down_size() > 0) {    //设置了是否需要反向传播
      CHECK_EQ(layer_param.propagate_down_size(),
          layer_param.bottom_size())
          << "propagate_down param must be specified "
          << "either 0 or bottom_size times ";    //检查设置的个数与layer的输入blob的个数是否相等
    }
    layers_.push_back(LayerRegistry<Dtype>::CreateLayer(layer_param));    //根据layer参数创建layer
    layer_names_.push_back(layer_param.name());   //保存layer的名称
    LOG_IF(INFO, Caffe::root_solver()) << "Creating Layer " << layer_param.name();  //主线程打印信息
    
    bool need_backward = false;   //该layer是否需要反向传播,下面会根据layer中输入blob和层的参数来决定该值

    // Figure out this layer's input and output
    for (int bottom_id = 0; bottom_id < layer_param.bottom_size(); ++bottom_id) {   //处理层的输入blob
      const int blob_id = AppendBottom(param, layer_id, bottom_id,
                                       &available_blobs, &blob_name_to_idx);  //将layer的第bottom_id个输入blob添加到param中
      // If a blob needs backward, this layer should provide it.
      need_backward |= blob_need_backward_[blob_id];  //如果输入blob需要反传,则所在的layer也设置成需要
    }
    int num_top = layer_param.top_size();
    for (int top_id = 0; top_id < num_top; ++top_id) {    //处理层的输出blob
      AppendTop(param, layer_id, top_id, &available_blobs, &blob_name_to_idx);  //将输出blob添加到net中
      // Collect Input layer tops as Net inputs.
      if (layer_param.type() == "Input") {    //该layer为网络的数据输入层
        const int blob_id = blobs_.size() - 1;
        net_input_blob_indices_.push_back(blob_id);         //保存该输出blob的位置
        net_input_blobs_.push_back(blobs_[blob_id].get());  //保存该输出blob的指针
      }
    }
    // If the layer specifies that AutoTopBlobs() -> true and the LayerParameter
    // specified fewer than the required number (as specified by
    // ExactNumTopBlobs() or MinTopBlobs()), allocate them here.
    Layer<Dtype>* layer = layers_[layer_id].get();
    //允许创建匿名blob.允许输出blob的个数比ExactNumTopBlobs()或者MinTopBlobs()的值小
    if (layer->AutoTopBlobs()) {
      const int needed_num_top =
          std::max(layer->MinTopBlobs(), layer->ExactNumTopBlobs());  //需要的输出blob的个数
      for (; num_top < needed_num_top; ++num_top) {
        // Add "anonymous" top blobs -- do not modify available_blobs or
        // blob_name_to_idx as we don't want these blobs to be usable as input
        // to other layers.
        //从此处来看,匿名blob不可用做其他层的输入,但会存储在blobs_中
        AppendTop(param, layer_id, num_top, NULL, NULL);    //添加到net中
      }
    }
    // After this layer is connected, set it up.
    layers_[layer_id]->SetUp(bottom_vecs_[layer_id], top_vecs_[layer_id]);  //调用layer的SetUp函数,检查输入输出blob的个数,调整大小等
    LOG_IF(INFO, Caffe::root_solver()) << "Setting up " << layer_names_[layer_id];  //打印
    for (int top_id = 0; top_id < top_vecs_[layer_id].size(); ++top_id) {
      if (blob_loss_weights_.size() <= top_id_vecs_[layer_id][top_id]) {  //输出blob对应的blob_id比权重的个数大
        blob_loss_weights_.resize(top_id_vecs_[layer_id][top_id] + 1, Dtype(0));  //权重的个数加1,初始为0
      }
      blob_loss_weights_[top_id_vecs_[layer_id][top_id]] = layer->loss(top_id);   //根据layer中的权重设置
      LOG_IF(INFO, Caffe::root_solver())
          << "Top shape: " << top_vecs_[layer_id][top_id]->shape_string();        //打印blob的形状信息
      if (layer->loss(top_id)) {
        LOG_IF(INFO, Caffe::root_solver()) << "    with loss weight " << layer->loss(top_id); //loss权重不为0,也打印下
      }
      //TODO AppendTop()中同址计算的blob指针也会重复保存在top_vecs_中,那此处内存消耗的计算重复了???
      memory_used_ += top_vecs_[layer_id][top_id]->count();   //累加输出blob中数据的个数
    }
    LOG_IF(INFO, Caffe::root_solver())
        << "Memory required for data: " << memory_used_ * sizeof(Dtype);
    const int param_size = layer_param.param_size();    //layer参数中param参数的设置的个数
    const int num_param_blobs = layers_[layer_id]->blobs().size();  //layer中可学习的参数blob的个数
    CHECK_LE(param_size, num_param_blobs)
        << "Too many params specified for layer " << layer_param.name();  //设置的个数需小于等于参数blob的个数
    ParamSpec default_param_spec;   //layer中blob参数的默认配置,例如layer参数的lr_mult_=1和decay_mult_=1等  
    for (int param_id = 0; param_id < num_param_blobs; ++param_id) {
      const ParamSpec* param_spec = (param_id < param_size) ?
          &layer_param.param(param_id) : &default_param_spec; //使用设置的训练参数,或者使用默认配置
      const bool param_need_backward = param_spec->lr_mult() != 0;  //学习率系数不为0,则需要反向传播
      need_backward |= param_need_backward;   //如果layer的参数blob需要反传,则所在的layer也设置成需要
      layers_[layer_id]->set_param_propagate_down(param_id,
                                                  param_need_backward); //将参数blob是否反传保存到net的layers_中
    }
    for (int param_id = 0; param_id < num_param_blobs; ++param_id) {
      AppendParam(param, layer_id, param_id); //将layer的参数blob添加到net中
    }
    // Finally, set the backward flag
    //设置layer是否需要反传(layer中输入blob和参数blob中任何一个需要,则该层都需要反传)
    layer_need_backward_.push_back(need_backward);
    if (need_backward) {
      for (int top_id = 0; top_id < top_id_vecs_[layer_id].size(); ++top_id) {
        //将layer中的输出blob也设置为需要反传
        blob_need_backward_[top_id_vecs_[layer_id][top_id]] = true;
      }
    }
  }
  // Go through the net backwards to determine which blobs contribute to the
  // loss.  We can skip backward computation for blobs that don't contribute
  // to the loss.
  // Also checks if all bottom blobs don't need backward computation (possible
  // because the skip_propagate_down param) and so we can skip backward
  // computation for the entire layer
  set<string> blobs_under_loss;     //参与loss计算的blob的名称的集合
  set<string> blobs_skip_backp;     //可跳过反向传播的blob的名称的集合
  for (int layer_id = layers_.size() - 1; layer_id >= 0; --layer_id) {    //从后往前判断哪些layer对网络的loss计算有贡献
    bool layer_contributes_loss = false;    //初始设置为无用,可跳过
    bool layer_skip_propagate_down = true;
    for (int top_id = 0; top_id < top_vecs_[layer_id].size(); ++top_id) {
      const string& blob_name = blob_names_[top_id_vecs_[layer_id][top_id]];  //第layer_id层的第top_id个输出blob的名称
      if (layers_[layer_id]->loss(top_id) ||
          (blobs_under_loss.find(blob_name) != blobs_under_loss.end())) { //该输出blob权重不为0,或者在参与计算loss的集合中能找到
        layer_contributes_loss = true;      //输出blob有用,则该layer设置为对loss计算有用
      }
      if (blobs_skip_backp.find(blob_name) == blobs_skip_backp.end()) { //该输出blob不在可跳过反传的集合中
        layer_skip_propagate_down = false;  //设置为不可跳过
      }
      //layer只要有任意一个输出blob参与loss计算,同时不可跳过反传步骤,则整个layer都是此状态,不必再判断其他输出blob了
      if (layer_contributes_loss && !layer_skip_propagate_down)
        break;    //不用再判断其他输出blob
    }
    // If this layer can skip backward computation, also all his bottom blobs don't need backpropagation
    //如果之前layer设置了需要反传,但是此处是可跳过反传的(此处可跳过,说明该层的所有输出blob均可跳过反传,均不参与loss计算)
    if (layer_need_backward_[layer_id] && layer_skip_propagate_down) {
      layer_need_backward_[layer_id] = false;   //有限考虑此处的设置,设置为false
      for (int bottom_id = 0; bottom_id < bottom_vecs_[layer_id].size();
               ++bottom_id) {
        bottom_need_backward_[layer_id][bottom_id] = false;   //所有输入blob也设置为不需要反传
      }
    }
    //layer的输出blob对loss的计算无用,则layer设置为不需要反传(此处只设置了layer,后续才设置了输入参数不需要)
    if (!layer_contributes_loss) { layer_need_backward_[layer_id] = false; }

    if (Caffe::root_solver()) {   //主线程打印该层是否需要反传的信息
      if (layer_need_backward_[layer_id]) {
        LOG(INFO) << layer_names_[layer_id] << " needs backward computation.";
      } else {
        LOG(INFO) << layer_names_[layer_id]
            << " does not need backward computation.";
      }
    }
    for (int bottom_id = 0; bottom_id < bottom_vecs_[layer_id].size(); ++bottom_id) {
      if (layer_contributes_loss) {   //layer对loss计算有用
        //把layer的所有输入blob的名称保存到blobs_under_loss中,表示与该输入blob对应的上层layer的
        //输出blob也同样参与了loss的计算,保存名称方便设置这些上层layer的输出blob
        const string& blob_name = blob_names_[bottom_id_vecs_[layer_id][bottom_id]];
        blobs_under_loss.insert(blob_name);
      } else {
        bottom_need_backward_[layer_id][bottom_id] = false;   //否则,设置输入blob为不需要反传
      }
      if (!bottom_need_backward_[layer_id][bottom_id]) {    //当前输入blob不需要反传
        //将名称保存在blobs_skip_backp中,表示与该输入blob对应的上层layer的输出blob可以跳过反传
        const string& blob_name = blob_names_[bottom_id_vecs_[layer_id][bottom_id]];
        blobs_skip_backp.insert(blob_name);
      }
    }
  }
  // Handle force_backward if needed.
  //如果net参数中设置了需要强制反传,则根据layer的是否设置允许强制反传再设置一遍
  if (param.force_backward()) {
    for (int layer_id = 0; layer_id < layers_.size(); ++layer_id) {
      layer_need_backward_[layer_id] = true;    //默认需要
      for (int bottom_id = 0; bottom_id < bottom_need_backward_[layer_id].size(); ++bottom_id) {
        //输入blob设置了需要反传,或者该输入blob设置了允许强制反传
        //参见layer.hpp,允许强制反传则优先遵从layer的强制设置,不允许的话则只考虑自身的设置
        bottom_need_backward_[layer_id][bottom_id] =
            bottom_need_backward_[layer_id][bottom_id] ||
            layers_[layer_id]->AllowForceBackward(bottom_id);
        //找到该输入blob在blobs_中的索引,并在blob_need_backward_中设置对应的值
        blob_need_backward_[bottom_id_vecs_[layer_id][bottom_id]] =
            blob_need_backward_[bottom_id_vecs_[layer_id][bottom_id]] ||
            bottom_need_backward_[layer_id][bottom_id];
      }
      for (int param_id = 0; param_id < layers_[layer_id]->blobs().size(); ++param_id) {
        layers_[layer_id]->set_param_propagate_down(param_id, true);  //layer还是要遵守net的设置,全部置为需要反传
      }
    }
  }
  // In the end, all remaining blobs are considered output blobs.
  //最后,将所有当前还未被当作输入的输出blob(保存在available_blobs中)全部认为是网络的输出
  for (set<string>::iterator it = available_blobs.begin();
      it != available_blobs.end(); ++it) {    //available_blobs中剩余的blob
    LOG_IF(INFO, Caffe::root_solver())
        << "This network produces output " << *it;    //打印信息
    net_output_blobs_.push_back(blobs_[blob_name_to_idx[*it]].get()); //将这些输出blob在blobs_中的指针统一保存起来
    net_output_blob_indices_.push_back(blob_name_to_idx[*it]);        //保存在blobs_中的索引
  }
  for (size_t blob_id = 0; blob_id < blob_names_.size(); ++blob_id) {
    blob_names_index_[blob_names_[blob_id]] = blob_id;  //关联blob名称和在blobs_中的索引,方便由名称得到位置
  }
  for (size_t layer_id = 0; layer_id < layer_names_.size(); ++layer_id) {
    layer_names_index_[layer_names_[layer_id]] = layer_id;  //关联layer的名称和在layers_的索引,方便由名称得到位置
  }
  ShareWeights();     //设置共享参数的数据和梯度指针
  debug_info_ = param.debug_info();   //是否计算和打印调试信息
  LOG_IF(INFO, Caffe::root_solver()) << "Network initialization done.";   //网路初始化完成
}

//根据网络设置的NetState类型状态和各层内部设置的NetStateRule,判断网络是否需要包含该层
//过滤掉这些层之后得到新的网络参数param_filtered
template <typename Dtype>
void Net<Dtype>::FilterNet(const NetParameter& param,
    NetParameter* param_filtered) {
  NetState net_state(param.state());      //读取网络的状态
  param_filtered->CopyFrom(param);        //先从param中拷贝所有数据
  param_filtered->clear_layer();          //然后清空所有layer
  for (int i = 0; i < param.layer_size(); ++i) {          //所有layer一个一个进行判断
    const LayerParameter& layer_param = param.layer(i);   //当前layer的参数(LayerParameter类型的消息)
    const string& layer_name = layer_param.name();        //layer的名称
    
    //检查,include和exclude不能同时设置,参见caffe.proto中的LayerParameter
    //include和exclude均为NetStateRule类型的消息,用于设置layer的状态
    CHECK(layer_param.include_size() == 0 || layer_param.exclude_size() == 0)
          << "Specify either include rules or exclude rules; not both.";

    // If no include rules are specified, the layer is included by default and
    // only excluded if it meets one of the exclude rules.
    bool layer_included = (layer_param.include_size() == 0);    //不包含included消息    //include和exclude都不设置时默认包含该层
    for (int j = 0; layer_included && j < layer_param.exclude_size(); ++j) {
      if (StateMeetsRule(net_state, layer_param.exclude(j), layer_name)) {  //检查网络参数是否满足layer的exclude参数的要求
        layer_included = false;   //满足layer设置的任意一个exclude,则net运行时不能包含该层layer
      }
    }
    for (int j = 0; !layer_included && j < layer_param.include_size(); ++j) {
      if (StateMeetsRule(net_state, layer_param.include(j), layer_name)) {
        layer_included = true;    //满足layer设置的任意一个included,则net运行时需要包含该层layer
      }
    }
    if (layer_included) {
      param_filtered->add_layer()->CopyFrom(layer_param);   //将该层的参数加入到网络参数中
    }
  }
}

//检查state(NetParameter中的参数)中的设置是否满足rule(LayerParameter中的参数)中的所有规则
//网络的phase必须与layer的phase相同,网络的level必须在layer的min_level和max_level之间
//网络的stage必须包含layer的所有stage字符串,不能包含not_stage中的任意一个字符串,这些都满足才说明网络的设置满足layer的要求
template <typename Dtype>
bool Net<Dtype>::StateMeetsRule(const NetState& state,
    const NetStateRule& rule, const string& layer_name) {
  // Check whether the rule is broken due to phase.
  if (rule.has_phase()) {         //rule中设置了phase,那么state中的字段必须与之相等才符合
      if (rule.phase() != state.phase()) {
        LOG_IF(INFO, Caffe::root_solver())
            << "The NetState phase (" << state.phase()
            << ") differed from the phase (" << rule.phase()
            << ") specified by a rule in layer " << layer_name;   //不等,记录信息
        return false;
      }
  }
  // Check whether the rule is broken due to min level.
  if (rule.has_min_level()) {     //state中的level必须大于rule中设置的最小level
    if (state.level() < rule.min_level()) {
      LOG_IF(INFO, Caffe::root_solver())
          << "The NetState level (" << state.level()
          << ") is above the min_level (" << rule.min_level()
          << ") specified by a rule in layer " << layer_name;   //不满足,记录信息
      return false;
    }
  }
  // Check whether the rule is broken due to max level.
  if (rule.has_max_level()) {     //state中的level必须小于rule中设置的最大level
    if (state.level() > rule.max_level()) {
      LOG_IF(INFO, Caffe::root_solver())
          << "The NetState level (" << state.level()
          << ") is above the max_level (" << rule.max_level()
          << ") specified by a rule in layer " << layer_name;
      return false;
    }
  }
  // Check whether the rule is broken due to stage. The NetState must
  // contain ALL of the rule's stages to meet it.
  for (int i = 0; i < rule.stage_size(); ++i) {       //rule中设置了一堆stage字符串
    // Check that the NetState contains the rule's ith stage.
    bool has_stage = false;
    for (int j = 0; !has_stage && j < state.stage_size(); ++j) {
      if (rule.stage(i) == state.stage(j)) { has_stage = true; }    //检查rule.stage(i)是否在state.stage(...)中
    }
    if (!has_stage) {         //state中必须包含rule中的所有stage,否则返回false
      LOG_IF(INFO, Caffe::root_solver())
          << "The NetState did not contain stage '" << rule.stage(i)
          << "' specified by a rule in layer " << layer_name;
      return false;
    }
  }
  // Check whether the rule is broken due to not_stage. The NetState must
  // contain NONE of the rule's not_stages to meet it.
  for (int i = 0; i < rule.not_stage_size(); ++i) {   //rule中设置了一堆not_stage字符串
    // Check that the NetState contains the rule's ith not_stage.
    bool has_stage = false;
    for (int j = 0; !has_stage && j < state.stage_size(); ++j) {
      if (rule.not_stage(i) == state.stage(j)) { has_stage = true; }  //检查rule.not_stage(i)是否在state.stage(...)中
    }
    if (has_stage) {        //state中不能包含rule中的任何一个not_stage,否则返回false
      LOG_IF(INFO, Caffe::root_solver())
          << "The NetState contained a not_stage '" << rule.not_stage(i)
          << "' specified by a rule in layer " << layer_name;
      return false;
    }
  }
  return true;  //全部满足
}

//根据第layer_id层的第top_id个输出blob,创建一个空的blob保存到网络参数param中的blobs_中,同址计算则不创建
//并且无论是否为同址计算,将该blob数据的指针保存到top_vecs_中,将其在blobs_中的索引保存在top_id_vecs_中
// Helper for Net::Init: add a new top blob to the net.
template <typename Dtype>
void Net<Dtype>::AppendTop(const NetParameter& param, const int layer_id,
                           const int top_id, set<string>* available_blobs,
                           map<string, int>* blob_name_to_idx) {
  shared_ptr<LayerParameter> layer_param(new LayerParameter(param.layer(layer_id)));  //拷贝当前layer的参数
  const string& blob_name = (layer_param->top_size() > top_id) ?
      layer_param->top(top_id) : "(automatic)";   //输出blob的名称,或者用"(automatic)"(匿名blob)替代
  // Check if we are doing in-place computation
  if (blob_name_to_idx && layer_param->bottom_size() > top_id &&
      blob_name == layer_param->bottom(top_id)) { //当前输出blob的名称与对应位置的输入blob的名称相同,为同址计算
    // In-place computation
    LOG_IF(INFO, Caffe::root_solver())
        << layer_param->name() << " -> " << blob_name << " (in-place)";   //在主线程中打印信息
    //(*blob_name_to_idx)[blob_name]为该名称对应的blob数据在blobs_中的索引
    //top_vecs_中会保存第layer_id层第top_id个输出blob的指针位置,top_id_vecs_保存索引
    top_vecs_[layer_id].push_back(blobs_[(*blob_name_to_idx)[blob_name]].get());
    top_id_vecs_[layer_id].push_back((*blob_name_to_idx)[blob_name]);
  } else if (blob_name_to_idx &&
             blob_name_to_idx->find(blob_name) != blob_name_to_idx->end()) {
    //blob_name_to_idx只会存储输出blob的名称和位置,此处找到同名的输出blob,说明layer的输出blob的名称设置有问题
    // If we are not doing in-place computation but have duplicated blobs,
    // raise an error.
    LOG(FATAL) << "Top blob '" << blob_name << "' produced by multiple sources.";
  } else {  //基操勿六
    // Normal output.
    if (Caffe::root_solver()) {
      LOG(INFO) << layer_param->name() << " -> " << blob_name;    //主线程中打印信息
    }
    //出现普通的输出blob,将其信息存储到Net中
    shared_ptr<Blob<Dtype> > blob_pointer(new Blob<Dtype>());   //创建一个空的blob,返回其指针
    const int blob_id = blobs_.size();
    blobs_.push_back(blob_pointer);       //blob_id为该输出blob在blobs_中的索引,将指针存入
    blob_names_.push_back(blob_name);     //设置名称
    blob_need_backward_.push_back(false); //初始设置为不需要反传
    if (blob_name_to_idx) { (*blob_name_to_idx)[blob_name] = blob_id; }   //保存名称与位置的映射关系
    top_id_vecs_[layer_id].push_back(blob_id);          //top_id_vecs_[layer_id][top_id]为其位置索引
    top_vecs_[layer_id].push_back(blob_pointer.get());  //top_vecs_[layer_id][top_id]为其blob指针
  }
  //该输出blob可被后续的layer当成输入,用于AppendBottom()中
  if (available_blobs) { available_blobs->insert(blob_name); }
}

//将第layer_id层的第bottom_id个输入blob加入到网络参数param中的对应layer的参数中
// Helper for Net::Init: add a new bottom blob to the net.
template <typename Dtype>
int Net<Dtype>::AppendBottom(const NetParameter& param, const int layer_id,
    const int bottom_id, set<string>* available_blobs,
    map<string, int>* blob_name_to_idx) {
  const LayerParameter& layer_param = param.layer(layer_id);    //当前层的参数
  const string& blob_name = layer_param.bottom(bottom_id);      //输入blob的名称
  //available_blobs存放着第0至layer_id-1层的layer中所有还未被当作输入的输出blob的名称,
  //在这里面找不到该层的输入blob的名称,说明该输入blob在之前层中找不到对应的输出blob,报错
  if (available_blobs->find(blob_name) == available_blobs->end()) {
    LOG(FATAL) << "Unknown bottom blob '" << blob_name << "' (layer '"
               << layer_param.name() << "', bottom index " << bottom_id << ")";
  }

  const int blob_id = (*blob_name_to_idx)[blob_name];   //名称得到其在blobs_中的索引id
  LOG_IF(INFO, Caffe::root_solver()) << layer_names_[layer_id] << " <- " << blob_name;  //主线程中打印信息
  //每个输入blob都会调用一次AppendBottom(),所以bottom_vecs_[layer_id][bottom_id]存放着当前输入blob的指针
  bottom_vecs_[layer_id].push_back(blobs_[blob_id].get());  //将其在blobs_中的数据指针保存在bottom_vecs_中
  bottom_id_vecs_[layer_id].push_back(blob_id);   //bottom_id_vecs_[layer_id][bottom_id]保存该输入blob在blobs_中的索引
  available_blobs->erase(blob_name);              //blob_name对应的输出blob已被当作输入blob,在available_blobs中删除
  bool need_backward = blob_need_backward_[blob_id];    //blobs_[blob_id]是否需要反向传播
  // Check if the backpropagation on bottom_id should be skipped
  if (layer_param.propagate_down_size() > 0) {    //该层设置了是否需要反向传播参数
    need_backward = layer_param.propagate_down(bottom_id);  //读取设置中第bottom_id个的值
  }
  //bottom_need_backward_[layer_id][bottom_id]保存该输入blob是否需要反向传播
  bottom_need_backward_[layer_id].push_back(need_backward);
  return blob_id;   //返回该输入blob在blobs_中的位置
}

//将第layer_id层的第param_id个参数数据保存到网络中,并保存一些用于索引的值
//对于非共享的参数,还会将数据指针保存在learnable_params_中
//learnable_param_ids_保存第第layer_id层的第param_id个参数在learnable_params_的索引,共享参数保存其来源参数的索引
template <typename Dtype>
void Net<Dtype>::AppendParam(const NetParameter& param, const int layer_id,
                             const int param_id) {
  const LayerParameter& layer_param = layers_[layer_id]->layer_param();   //layer的参数
  const int param_size = layer_param.param_size();  //layer参数中param参数的设置的个数
  
  //按照caffe.proto文件中ParamSpec消息的说明,当需要在layer之间共享参数时,可以param_name会是一个非空的名称
  string param_name =
      (param_size > param_id) ? layer_param.param(param_id).name() : "";  //参数blob的名称或者默认的""
  if (param_name.size()) {
    param_display_names_.push_back(param_name);   //非空,保存其名称,方便其他layer查询
  } else {
    ostringstream param_display_name;
    param_display_name << param_id;
    param_display_names_.push_back(param_display_name.str()); //使用param_id作为其名称
  }
  const int net_param_id = params_.size();    //net_param_id为当前参数在net的params_中的索引
  params_.push_back(layers_[layer_id]->blobs()[param_id]);  //将第layer_id层的第param_id个参数数据保存在params_中
  param_id_vecs_[layer_id].push_back(net_param_id); //param_id_vecs_[layer_id][param_id]为在params_中的索引net_param_id
  //param_layer_indices_[net_param_id]为参数在网络中的位置(layer_id, param_id)
  param_layer_indices_.push_back(make_pair(layer_id, param_id));
  ParamSpec default_param_spec;   //layer中blob参数的默认训练参数,lr_mult_=1和decay_mult_=1等 
  const ParamSpec* param_spec = (layer_param.param_size() > param_id) ?
      &layer_param.param(param_id) : &default_param_spec;   //使用设置的或者默认配置
  if (!param_size || !param_name.size() || (param_name.size() &&
      param_names_index_.find(param_name) == param_names_index_.end())) {
    //layer中没有配置param参数,或者param参数中没有设置名称,或者设置了名称但是还不在param_names_index_中
    //(设置了名称但还不在param_names_index_中,说明该参数首次出现,是源参数,但是需要共享给其他的layer)
    // This layer "owns" this parameter blob -- it is either anonymous
    // (i.e., not given a param_name) or explicitly given a name that we
    // haven't already seen.
    param_owners_.push_back(-1);    //参数来自自身的layer,源参数,设置为-1
    if (param_name.size()) {    //param中设置了名称,说明需要共享给其他layer的某个参数
      param_names_index_[param_name] = net_param_id;  //将参数名称param_name与在params_的位置net_param_id关联起来
    }
    const int learnable_param_id = learnable_params_.size();    //当前参数在learnable_params_中的索引
    learnable_params_.push_back(params_[net_param_id].get());   //保存参数指针
    learnable_param_ids_.push_back(learnable_param_id);         //保存索引
    has_params_lr_.push_back(param_spec->has_lr_mult());        //保存参数是否设置了对应的学习率系数
    has_params_decay_.push_back(param_spec->has_decay_mult());  //保存参数是否设置了对应的衰减率系数
    params_lr_.push_back(param_spec->lr_mult());                //保存参数的学习率系数
    params_weight_decay_.push_back(param_spec->decay_mult());   //保存参数的衰减率系数
  } else {
    //说明该参数来源于其他的layer参数,为共享参数
    //共享参数配置的param中的name必须与源参数的param中的name相同

    // Named param blob with name we've seen before: share params
    const int owner_net_param_id = param_names_index_[param_name];  //先找到源参数在net的params_的索引
    param_owners_.push_back(owner_net_param_id);    //保存源参数的索引
    const pair<int, int>& owner_index =
        param_layer_indices_[owner_net_param_id];   //得到源参数在网络中的位置(layer_id, param_id)
    const int owner_layer_id = owner_index.first;   //源参数所在的层
    const int owner_param_id = owner_index.second;  //源参数所在的位置
    LOG_IF(INFO, Caffe::root_solver()) << "Sharing parameters '" << param_name
        << "' owned by "
        << "layer '" << layer_names_[owner_layer_id] << "', param "
        << "index " << owner_param_id;              //打印信息
    Blob<Dtype>* this_blob = layers_[layer_id]->blobs()[param_id].get();  //当前参数的blob指针,第layer_id层第param_id个
    Blob<Dtype>* owner_blob =
        layers_[owner_layer_id]->blobs()[owner_param_id].get();   //源参数在net中的blob指针,第owner_layer_id层第owner_param_id个
    const int param_size = layer_param.param_size();
    if (param_size > param_id && (layer_param.param(param_id).share_mode() ==
                                  ParamSpec_DimCheckMode_PERMISSIVE)) {
      //检查参数共享的模式,PERMISSIVE模式下只要求两个blob的总数据个数相同
      // Permissive dimension checking -- only check counts are the same.
      CHECK_EQ(this_blob->count(), owner_blob->count())
          << "Cannot share param '" << param_name << "' owned by layer '"
          << layer_names_[owner_layer_id] << "' with layer '"
          << layer_names_[layer_id] << "'; count mismatch.  Owner layer param "
          << "shape is " << owner_blob->shape_string() << "; sharing layer "
          << "shape is " << this_blob->shape_string();    //检查数据的总个数是否相等
    } else {
      //STRICT模式下要求两个参数blob的数据的各个维度值都相等
      // Strict dimension checking -- all dims must be the same.
      CHECK(this_blob->shape() == owner_blob->shape())
          << "Cannot share param '" << param_name << "' owned by layer '"
          << layer_names_[owner_layer_id] << "' with layer '"
          << layer_names_[layer_id] << "'; shape mismatch.  Owner layer param "
          << "shape is " << owner_blob->shape_string() << "; sharing layer "
          << "expects shape " << this_blob->shape_string();   //检查形状是否相同
    }
    //owner_net_param_id虽然是源参数在params_的索引,但是每次调用AppendParam()时,params_与learnable_param_ids_    
    //都会存入一个值,他们的大小一致,是逐个对应的(注意learnable_params_与params_不是),所以他们之间的索引可以通用
    const int learnable_param_id = learnable_param_ids_[owner_net_param_id];  //得到参数在learnable_params_中的索引
    learnable_param_ids_.push_back(learnable_param_id);   //将源参数的索引当成当前的共享参数的索引,保存起来
    if (param_spec->has_lr_mult()) {              //当前的共享参数配置的param中有设置学习率系数
      if (has_params_lr_[learnable_param_id]) {   //源参数也配置了学习率系数
        CHECK_EQ(param_spec->lr_mult(), params_lr_[learnable_param_id])
            << "Shared param '" << param_name << "' has mismatched lr_mult."; //要求两者相等,不然参数共享时更新的步长不一致
      } else {
        has_params_lr_[learnable_param_id] = true;    //源参数未设置,则将共享参数的设置保存到源参数的设置中
        params_lr_[learnable_param_id] = param_spec->lr_mult();
      }
    }
    if (param_spec->has_decay_mult()) {   //权重衰减系数同样处理
      if (has_params_decay_[learnable_param_id]) {
        CHECK_EQ(param_spec->decay_mult(),
                 params_weight_decay_[learnable_param_id])
            << "Shared param '" << param_name << "' has mismatched decay_mult.";  //要求相等
      } else {
        has_params_decay_[learnable_param_id] = true;
        params_weight_decay_[learnable_param_id] = param_spec->decay_mult();  //配置之前未设置的源参数
      }
    }
  }
}

net.hpp源码

  /// @brief The network name
  string name_;   //网络的名称
  /// @brief The phase: TRAIN or TEST
  Phase phase_;   //网络的字段状态(TRAIN or TEST)
  /// @brief Individual layers in the net
  vector<shared_ptr<Layer<Dtype> > > layers_;   //网络的各个层,layers_[layer_id]
  vector<string> layer_names_;                  //网络的各个层的名称
  map<string, int> layer_names_index_;          //<layer的名称layer_name, layer在layers_中的索引layer_id>
  vector<bool> layer_need_backward_;            //网络的各个层是否需要反向传播

  /// @brief the blobs storing intermediate results between the layer.
  vector<shared_ptr<Blob<Dtype> > > blobs_;     //存储网络的所有输出blob,blobs_[blob_id]
  vector<string> blob_names_;                   //blob_names_[blob_id]为blobs_[blob_id]存储的blob数据的名称
  map<string, int> blob_names_index_;           //<blob数据的名称blob_name, blob数据在blobs_中的索引blob_id>
  vector<bool> blob_need_backward_;             //blobs_中的各个数据是否需要反向传播
  /// bottom_vecs stores the vectors containing the input for each layer.
  /// They don't actually host the blobs (blobs_ does), so we simply store
  /// pointers.
  vector<vector<Blob<Dtype>*> > bottom_vecs_;   //bottom_vecs_[i][j]为第i层第j个输入blob数据的指针,数据在blobs_中
  vector<vector<int> > bottom_id_vecs_;         //bottom_id_vecs_[i][j]为第i层第j个输入blob在blobs_中的索引blob_id
  vector<vector<bool> > bottom_need_backward_;  //bottom_need_backward_[i][j]为第i层第j个输入blob是否需要反传
  /// top_vecs stores the vectors containing the output for each layer
  vector<vector<Blob<Dtype>*> > top_vecs_;      //top_vecs_[i][j]为第i层第j个输出blob数据的指针,数据在blobs_中
  vector<vector<int> > top_id_vecs_;            //top_id_vecs_[i][j]为第i层第j个输出blob在blobs_中的索引blob_id
  /// Vector of weight in the loss (or objective) function of each net blob, indexed by blob_id.
  vector<Dtype> blob_loss_weights_;             //blobs_[blob_id]对应的loss权重
  
  vector<vector<int> > param_id_vecs_;          //param_id_vecs_[i][j]为第i层第j个参数blob在params_中的位置
  //param_owners_[i]为参数params_[i]的拥有者的id,源参数(参数来源于所在的layer)的值为-1,而共享参数的值为源参数在params_的索引
  vector<int> param_owners_;
  //param_display_names_[i]为参数params_[i]显示的名称,需共享的参数会设置非一个非空的值,未设置则用参数的id替代
  //由于AppendParam()中参数名称未设置时会用param_id替代,所以该变量中可能会存在重复的名称.因此该变量不可用于寻找或匹配与共享参数同名的源参数的位置
  vector<string> param_display_names_;
  vector<pair<int, int> > param_layer_indices_;   //param_layer_indices_[i]为参数params_[i]在网络中的位置(layer_id, param_id)
  //<参数名称, 参数在params_的索引>,非空名称的参数只有在首次出现时才会保存在该变量中,后续出现都作为共享参数,所以该变量可用于寻找和匹配共享参数的源参数的位置
  map<string, int> param_names_index_;
  /// blob indices for the input and the output of the net
  vector<int> net_input_blob_indices_;    //net_input_blob_indices_[i]为网络的数据输入blob在blobs_中的索引
  vector<int> net_output_blob_indices_;   //net_output_blob_indices_[i]为网络的数据输出blob在blobs_中的索引
  vector<Blob<Dtype>*> net_input_blobs_;  //网络的数据输入blob的指针
  vector<Blob<Dtype>*> net_output_blobs_; //网络的数据输出blob的指针(所有未被当作其他层输入的输出blob)
  /// The parameters in the network.
  vector<shared_ptr<Blob<Dtype> > > params_;    //网络中的所有参数blob(包括源参数和共享参数)
  vector<Blob<Dtype>*> learnable_params_;       //网络中的所有学习参数,只会保存源参数
  /**
   * The mapping from params_ -> learnable_params_: we have
   * learnable_param_ids_.size() == params_.size(),
   * and learnable_params_[learnable_param_ids_[i]] == params_[i].get()
   * if and only if params_[i] is an "owner"; otherwise, params_[i] is a sharer
   * and learnable_params_[learnable_param_ids_[i]] gives its owner.
   */
  vector<int> learnable_param_ids_;     //learnable_param_ids_[i]为参数params_[i]在learnable_params_中的索引
  /// the learning rate multipliers for learnable_params_
  vector<float> params_lr_;             //params_lr_[i]为参数learnable_params_[i]的学习率系数
  vector<bool> has_params_lr_;          //has_params_lr_[i]为参数learnable_params_[i]是否设置了学习率系数
  /// the weight decay multipliers for learnable_params_
  vector<float> params_weight_decay_;   //params_weight_decay_[i]为参数learnable_params_[i]的权重衰减系数
  vector<bool> has_params_decay_;       //has_params_decay_[i]为参数learnable_params_[i]是否设置了权重衰减系数
  /// The bytes of memory used by this net
  size_t memory_used_;                  //网络中所有输出blob的大小之和
  /// Whether to compute and display debug info for the net.
  bool debug_info_;   //网络是否允许计算并打印调试信息,允许的话则会打印参与前向计算的layer的输出blob和参数blob的数据的绝对值之和的均值
  // Callbacks
  vector<Callback*> before_forward_;    //网络执行前向计算之前的回调函数
  vector<Callback*> after_forward_;     //网络执行前向计算之后的回调函数
  vector<Callback*> before_backward_;   //网络执行反向传播之前的回调函数
  vector<Callback*> after_backward_;    //网络执行反向传播之后的回调函数

小结

  1. 初始化函数中的available_blobs,是指目前还未被当成输入blob使用的输出blob。在经过InsertSplits()操作后,除去部分被用于计算网络loss的输出blob外,剩余的输出blob与输入blob是一一对应的。所以可使用该变量快速确定某层的输入blob是否有效,以及网络的所有输出blob。
  2. 网络的所有输出blob都会保存在blobs_中,网络各层之间的数据传递依赖此变量进行。但初始化的时候只会创建一个空指针,实际训练时才会将数据填充进去。
  3. 网络的各Layer是否需要反传,首先是根据输入数据和参数数据的设置来判断该层是否需要反传,然后是根据该层的输出数据是否参与了网络loss的计算来判断,最后是根据是否设置了强制需要反传参数来确定,越往后的判断的优先级越高。

Caffe的源码笔者是第一次阅读,一边阅读一边记录,对代码的理解和分析可能会存在错误或遗漏,希望各位读者批评指正,谢谢支持!

posted @ 2019-12-13 00:50  Rule110  阅读(464)  评论(0编辑  收藏  举报