caffe.proto中DataParameter部分
message DataParameter { //输入数据使用的DB类型 enum DB { LEVELDB = 0;//使用LEVELDB LMDB = 1; //使用LMDB } // Specify the data source.源数据的路径 optional string source = 1; // Specify the batch size.一个批量数据包含的图片数目 optional uint32 batch_size = 4; // The rand_skip variable is for the data layer to skip a few data points // to avoid all asynchronous sgd clients to start at the same point. The skip // point would be set as rand_skip * rand(0,1). Note that rand_skip should not // be larger than the number of keys in the database. // DEPRECATED. Each solver accesses a different subset of the database. // 随机跳过若干图片。防止SGD从同一起点开始。已弃用。 optional uint32 rand_skip = 7 [default = 0]; optional DB backend = 8 [default = LEVELDB];//默认输入数据使用DB类型。默认LEVELDB // DEPRECATED. See TransformationParameter. For data pre-processing, we can do // simple scaling and subtracting the data mean, if provided. Note that the // mean subtraction is always carried out before scaling. // 弃用。使用TransformationParameter optional float scale = 2 [default = 1]; optional string mean_file = 3; // DEPRECATED. See TransformationParameter. Specify if we would like to randomly // crop an image. optional uint32 crop_size = 5 [default = 0]; // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror // data. optional bool mirror = 6 [default = false]; // Force the encoded image to have 3 color channels 强制编码图像为三通道彩色图像 optional bool force_encoded_color = 9 [default = false]; // Prefetch queue (Increase if data feeding bandwidth varies, within the // limit of device memory for GPU training) // 预取队列 (在硬件设备允许的情况下,预先放到主机内存中的批量数,默认为4个batch) optional uint32 prefetch = 10 [default = 4]; }
include/caffe/layers/base_data_layer.hpp
1 #ifndef CAFFE_DATA_LAYERS_HPP_ 2 #define CAFFE_DATA_LAYERS_HPP_ 3 4 #include <vector> 5 6 #include "caffe/blob.hpp" 7 #include "caffe/data_transformer.hpp" 8 #include "caffe/internal_thread.hpp" 9 #include "caffe/layer.hpp" 10 #include "caffe/proto/caffe.pb.h" 11 #include "caffe/util/blocking_queue.hpp" 12 13 namespace caffe { 14 15 /** 16 * @brief Provides base for data layers that feed blobs to the Net. 17 * 18 * TODO(dox): thorough documentation for Forward and proto params. 19 */ 20 //基本数据层,派生于Layer 21 template <typename Dtype> 22 class BaseDataLayer : public Layer<Dtype> { 23 public: 24 explicit BaseDataLayer(const LayerParameter& param); 25 // LayerSetUp: implements common data layer setup functionality, and calls 26 // DataLayerSetUp to do special data layer setup for individual layer types. 27 // This method may not be overridden except by the BasePrefetchingDataLayer. 28 //通用层配置功能。之后调用DataLayerSetUp进行数据读取层的特殊配置 29 virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom, 30 const vector<Blob<Dtype>*>& top); 31 virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom, 32 const vector<Blob<Dtype>*>& top) {} 33 // Data layers have no bottoms, so reshaping is trivial. 34 //数据读取层没有输入Bottom Blob,变形操作不是很重要 35 virtual void Reshape(const vector<Blob<Dtype>*>& bottom, 36 const vector<Blob<Dtype>*>& top) {} 37 //反向传播函数不需要做任何事情 38 virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, 39 const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {} 40 virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, 41 const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {} 42 43 protected: 44 TransformationParameter transform_param_;//数据预处理变换器参数 45 shared_ptr<DataTransformer<Dtype> > data_transformer_;//数据预处理变换器 46 bool output_labels_;//是否输出标签数据 47 }; 48 49 //批量数据,用于存放数据读取层输出 50 template <typename Dtype> 51 class Batch { 52 public: 53 Blob<Dtype> data_, label_;//两个Blob分别用来存储图片数据和标签 54 }; 55 56 //带预取功能的数据读取层,派生于BaseDataLayer和InternalThread 57 template <typename Dtype> 58 class BasePrefetchingDataLayer : 59 public BaseDataLayer<Dtype>, public InternalThread { 60 public: 61 explicit BasePrefetchingDataLayer(const LayerParameter& param); 62 // LayerSetUp: implements common data layer setup functionality, and calls 63 // DataLayerSetUp to do special data layer setup for individual layer types. 64 // This method may not be overridden.层设置 65 void LayerSetUp(const vector<Blob<Dtype>*>& bottom, 66 const vector<Blob<Dtype>*>& top); 67 68 virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom, 69 const vector<Blob<Dtype>*>& top);//前向 70 virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom, 71 const vector<Blob<Dtype>*>& top); 72 73 protected: 74 virtual void InternalThreadEntry();//内部线程入口 75 virtual void load_batch(Batch<Dtype>* batch) = 0;//载入批量数据,纯虚函数 76 77 vector<shared_ptr<Batch<Dtype> > > prefetch_;//抓取 78 BlockingQueue<Batch<Dtype>*> prefetch_free_;//空闲Batch队列 79 BlockingQueue<Batch<Dtype>*> prefetch_full_;//已加载Batch队列 80 Batch<Dtype>* prefetch_current_; 81 82 Blob<Dtype> transformed_data_;//变换后的数据 83 }; 84 85 } // namespace caffe 86 87 #endif // CAFFE_DATA_LAYERS_HPP_
src/caffe/layers/base_data_layer.cpp
1 #include <boost/thread.hpp> 2 #include <vector> 3 4 #include "caffe/blob.hpp" 5 #include "caffe/data_transformer.hpp" 6 #include "caffe/internal_thread.hpp" 7 #include "caffe/layer.hpp" 8 #include "caffe/layers/base_data_layer.hpp" 9 #include "caffe/proto/caffe.pb.h" 10 #include "caffe/util/blocking_queue.hpp" 11 12 namespace caffe { 13 14 //构造函数。初始化Layer参数、数据变换器参数 15 template <typename Dtype> 16 BaseDataLayer<Dtype>::BaseDataLayer(const LayerParameter& param) 17 : Layer<Dtype>(param), 18 transform_param_(param.transform_param()) { 19 } 20 21 //BaseDataLayer层设置 22 template <typename Dtype> 23 void BaseDataLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom, 24 const vector<Blob<Dtype>*>& top) { 25 if (top.size() == 1) {//判断输出Blob数目。1则只输出data,2则输出data和label 26 output_labels_ = false; 27 } else { 28 output_labels_ = true; 29 } 30 //初始化数据变换器对象 31 data_transformer_.reset( 32 new DataTransformer<Dtype>(transform_param_, this->phase_)); 33 data_transformer_->InitRand(); 34 // The subclasses should setup the size of bottom and top 35 DataLayerSetUp(bottom, top);//子类负责设置Top Blob形状 36 } 37 38 //BasePrefetchingDataLayer构造函数 39 template <typename Dtype> 40 BasePrefetchingDataLayer<Dtype>::BasePrefetchingDataLayer( 41 const LayerParameter& param) 42 : BaseDataLayer<Dtype>(param), 43 prefetch_(param.data_param().prefetch()), 44 prefetch_free_(), prefetch_full_(), prefetch_current_() { 45 for (int i = 0; i < prefetch_.size(); ++i) { 46 prefetch_[i].reset(new Batch<Dtype>()); 47 prefetch_free_.push(prefetch_[i].get());//将batch对象都放入空闲队列 48 } 49 } 50 51 //BasePrefetchingDataLayer层配置函数 52 template <typename Dtype> 53 void BasePrefetchingDataLayer<Dtype>::LayerSetUp( 54 const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { 55 BaseDataLayer<Dtype>::LayerSetUp(bottom, top); 56 57 // Before starting the prefetch thread, we make cpu_data and gpu_data 58 // calls so that the prefetch thread does not accidentally make simultaneous 59 // cudaMalloc calls when the main thread is running. In some GPUs this 60 // seems to cause failures if we do not so. 61 //在开启数据预取线程前,通过调用Blob相应函数先进行cudaMalloc, 62 //避免多线程情况下同时进行cudaMalloc,会导致cuda API调用失败 63 for (int i = 0; i < prefetch_.size(); ++i) { 64 prefetch_[i]->data_.mutable_cpu_data(); 65 if (this->output_labels_) { 66 prefetch_[i]->label_.mutable_cpu_data(); 67 } 68 } 69 //GPU 70 #ifndef CPU_ONLY 71 if (Caffe::mode() == Caffe::GPU) { 72 for (int i = 0; i < prefetch_.size(); ++i) { 73 prefetch_[i]->data_.mutable_gpu_data(); 74 if (this->output_labels_) { 75 prefetch_[i]->label_.mutable_gpu_data(); 76 } 77 } 78 } 79 #endif 80 DLOG(INFO) << "Initializing prefetch"; 81 this->data_transformer_->InitRand(); 82 StartInternalThread();//开启内部预取线程 83 DLOG(INFO) << "Prefetch initialized."; 84 } 85 86 //内部线程入口 87 template <typename Dtype> 88 void BasePrefetchingDataLayer<Dtype>::InternalThreadEntry() { 89 //创建CUDA Stream,非阻塞类型 90 #ifndef CPU_ONLY 91 cudaStream_t stream; 92 if (Caffe::mode() == Caffe::GPU) { 93 CUDA_CHECK(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking)); 94 } 95 #endif 96 97 try { 98 while (!must_stop()) {//循环载入批量数据 99 Batch<Dtype>* batch = prefetch_free_.pop();//得到一个空闲batch 100 load_batch(batch);//载入批量数据 101 #ifndef CPU_ONLY 102 if (Caffe::mode() == Caffe::GPU) { 103 batch->data_.data().get()->async_gpu_push(stream); 104 if (this->output_labels_) { 105 batch->label_.data().get()->async_gpu_push(stream); 106 } 107 CUDA_CHECK(cudaStreamSynchronize(stream));//同步到GPU 108 } 109 #endif 110 prefetch_full_.push(batch);//加入到带负载的Batch队列中 111 } 112 } catch (boost::thread_interrupted&) {//捕获到异常则退出循环 113 // Interrupted exception is expected on shutdown 114 } 115 #ifndef CPU_ONLY 116 if (Caffe::mode() == Caffe::GPU) { 117 CUDA_CHECK(cudaStreamDestroy(stream));//销毁CUDA Stream 118 } 119 #endif 120 } 121 122 //前向传播函数 123 template <typename Dtype> 124 void BasePrefetchingDataLayer<Dtype>::Forward_cpu( 125 const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { 126 if (prefetch_current_) { 127 prefetch_free_.push(prefetch_current_); 128 } 129 //从带负载的Batch中取出一个Batch对象 130 prefetch_current_ = prefetch_full_.pop("Waiting for data"); 131 // Reshape to loaded data.输出Top Blob根据Batch形状进行变形 132 top[0]->ReshapeLike(prefetch_current_->data_); 133 top[0]->set_cpu_data(prefetch_current_->data_.mutable_cpu_data()); 134 if (this->output_labels_) {//如果需要输出便签数据 135 // Reshape to loaded labels. 136 top[1]->ReshapeLike(prefetch_current_->label_);//Top Blob根据Batch中lable_形状进行变形 137 top[1]->set_cpu_data(prefetch_current_->label_.mutable_cpu_data()); 138 } 139 } 140 141 #ifdef CPU_ONLY 142 STUB_GPU_FORWARD(BasePrefetchingDataLayer, Forward); 143 #endif 144 145 INSTANTIATE_CLASS(BaseDataLayer); 146 INSTANTIATE_CLASS(BasePrefetchingDataLayer); 147 148 } // namespace caffe
摘抄参看赵永科《21天实战caffe》