net_->ForwardBackward()的大致梳理
net_->ForwardBackward()方法在net.hpp文件中
Dtype ForwardBackward() { Dtype loss; Forward(&loss); Backward(); return loss; }
首先进入Forward(&loss)
net.cpp
template <typename Dtype> const vector<Blob<Dtype>*>& Net<Dtype>::Forward(Dtype* loss) { if (loss != NULL) { *loss = ForwardFromTo(0, layers_.size() - 1); } else { ForwardFromTo(0, layers_.size() - 1); } return net_output_blobs_; }
接着进入*loss = ForwardFromTo(0, layers_.size() - 1)这句话
net.cpp
template <typename Dtype> Dtype Net<Dtype>::ForwardFromTo(int start, int end) { CHECK_GE(start, 0); CHECK_LT(end, layers_.size()); Dtype loss = 0; for (int i = start; i <= end; ++i) { for (int c = 0; c < before_forward_.size(); ++c) { before_forward_[c]->run(i); } // 一层一层地前向传播,bottom_vecs_[i]是各层的输入输入数据指针,top_vecs_[i]是各层的输出数据指针 Dtype layer_loss = layers_[i]->Forward(bottom_vecs_[i], top_vecs_[i]); loss += layer_loss; if (debug_info_) { ForwardDebugInfo(i); } for (int c = 0; c < after_forward_.size(); ++c) { after_forward_[c]->run(i); } } return loss; }
再进入Dtype layer_loss = layers_[i]->Forward(bottom_vecs_[i], top_vecs_[i])。首先会进入Layer类的Forward函数
layer.hpp
// Forward and backward wrappers. You should implement the cpu and // gpu specific implementations instead, and should not change these // functions. template <typename Dtype> inline Dtype Layer<Dtype>::Forward(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { Dtype loss = 0; Reshape(bottom, top); switch (Caffe::mode()) { case Caffe::CPU: // Layer类的虚函数,具体由其不同的派生类作不同的实现,也就是此句将会调用不同网络层的Forward_cpu函数,下面的Forward_gpu同理。 Forward_cpu(bottom, top); for (int top_id = 0; top_id < top.size(); ++top_id) { if (!this->loss(top_id)) { continue; } const int count = top[top_id]->count(); const Dtype* data = top[top_id]->cpu_data(); const Dtype* loss_weights = top[top_id]->cpu_diff(); loss += caffe_cpu_dot(count, data, loss_weights); } break; case Caffe::GPU: Forward_gpu(bottom, top); #ifndef CPU_ONLY for (int top_id = 0; top_id < top.size(); ++top_id) { if (!this->loss(top_id)) { continue; } const int count = top[top_id]->count(); const Dtype* data = top[top_id]->gpu_data(); const Dtype* loss_weights = top[top_id]->gpu_diff(); Dtype blob_loss = 0; caffe_gpu_dot(count, data, loss_weights, &blob_loss); loss += blob_loss; } #endif break; default: LOG(FATAL) << "Unknown caffe mode."; } return loss; } template <typename Dtype> inline void Layer<Dtype>::Backward(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { switch (Caffe::mode()) { case Caffe::CPU: Backward_cpu(top, propagate_down, bottom); break; case Caffe::GPU: Backward_gpu(top, propagate_down, bottom); break; default: LOG(FATAL) << "Unknown caffe mode."; } }
接下来再看ForwardBackward()中的Backward()
net.cpp
template <typename Dtype> void Net<Dtype>::Backward() { // 从最后一层开始反向传播 BackwardFromTo(layers_.size() - 1, 0); if (debug_info_) { Dtype asum_data = 0, asum_diff = 0, sumsq_data = 0, sumsq_diff = 0; for (int i = 0; i < learnable_params_.size(); ++i) { asum_data += learnable_params_[i]->asum_data(); asum_diff += learnable_params_[i]->asum_diff(); sumsq_data += learnable_params_[i]->sumsq_data(); sumsq_diff += learnable_params_[i]->sumsq_diff(); } const Dtype l2norm_data = std::sqrt(sumsq_data); const Dtype l2norm_diff = std::sqrt(sumsq_diff); LOG(ERROR) << " [Backward] All net params (data, diff): " << "L1 norm = (" << asum_data << ", " << asum_diff << "); " << "L2 norm = (" << l2norm_data << ", " << l2norm_diff << ")"; } }
进入BackwardFromTo(layers_.size() - 1, 0)
net.cpp
template <typename Dtype> void Net<Dtype>::BackwardFromTo(int start, int end) { CHECK_GE(end, 0); CHECK_LT(start, layers_.size()); for (int i = start; i >= end; --i) { for (int c = 0; c < before_backward_.size(); ++c) { before_backward_[c]->run(i); } if (layer_need_backward_[i]) { // 反向传播过程中,top_vecs_[i]是各层的输入数据指针,bottom_vecs[i]是各层的输出数据指针,与前向传播正好相反 layers_[i]->Backward( top_vecs_[i], bottom_need_backward_[i], bottom_vecs_[i]); if (debug_info_) { BackwardDebugInfo(i); } } for (int c = 0; c < after_backward_.size(); ++c) { after_backward_[c]->run(i); } } }
进入layers_[i]->Backward(top_vecs_[i], bottom_need_backward_[i], bottom_vecs_[i])
layer.hpp
template <typename Dtype> inline void Layer<Dtype>::Backward(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { switch (Caffe::mode()) { case Caffe::CPU: // 与前向传播类似,利用不同派生类的同名函数作出不同层的反向传播的具体实现 Backward_cpu(top, propagate_down, bottom); break; case Caffe::GPU: Backward_gpu(top, propagate_down, bottom); break; default: LOG(FATAL) << "Unknown caffe mode."; } }
不同层的前向、反向传播的具体实现见下一章节。