
message DataParameter {
  enum DB {
    LMDB = 1;   //使用LMDB
  // Specify the data source.源数据的路径
  optional string source = 1;
  // Specify the batch size.一个批量数据包含的图片数目
  optional uint32 batch_size = 4;
  // The rand_skip variable is for the data layer to skip a few data points
  // to avoid all asynchronous sgd clients to start at the same point. The skip
  // point would be set as rand_skip * rand(0,1). Note that rand_skip should not
  // be larger than the number of keys in the database.
  // DEPRECATED. Each solver accesses a different subset of the database.
  // 随机跳过若干图片。防止SGD从同一起点开始。已弃用。
  optional uint32 rand_skip = 7 [default = 0];
  optional DB backend = 8 [default = LEVELDB];//默认输入数据使用DB类型。默认LEVELDB
  // DEPRECATED. See TransformationParameter. For data pre-processing, we can do
  // simple scaling and subtracting the data mean, if provided. Note that the
  // mean subtraction is always carried out before scaling.
  // 弃用。使用TransformationParameter
  optional float scale = 2 [default = 1];
  optional string mean_file = 3;
  // DEPRECATED. See TransformationParameter. Specify if we would like to randomly
  // crop an image.
  optional uint32 crop_size = 5 [default = 0];
  // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror
  // data.
  optional bool mirror = 6 [default = false];
  // Force the encoded image to have 3 color channels 强制编码图像为三通道彩色图像
  optional bool force_encoded_color = 9 [default = false];
  // Prefetch queue (Increase if data feeding bandwidth varies, within the
  // limit of device memory for GPU training)
  // 预取队列 (在硬件设备允许的情况下,预先放到主机内存中的批量数,默认为4个batch)
  optional uint32 prefetch = 10 [default = 4];


 4 #include <vector>
 6 #include "caffe/blob.hpp"
 7 #include "caffe/data_transformer.hpp"
 8 #include "caffe/internal_thread.hpp"
 9 #include "caffe/layer.hpp"
10 #include "caffe/proto/caffe.pb.h"
11 #include "caffe/util/blocking_queue.hpp"
13 namespace caffe {
15 /**
16  * @brief Provides base for data layers that feed blobs to the Net.
17  *
18  * TODO(dox): thorough documentation for Forward and proto params.
19  */
20 //基本数据层,派生于Layer
21 template <typename Dtype>
22 class BaseDataLayer : public Layer<Dtype> {
23  public:
24   explicit BaseDataLayer(const LayerParameter& param);
25   // LayerSetUp: implements common data layer setup functionality, and calls
26   // DataLayerSetUp to do special data layer setup for individual layer types.
27   // This method may not be overridden except by the BasePrefetchingDataLayer.
28   //通用层配置功能。之后调用DataLayerSetUp进行数据读取层的特殊配置
29   virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
30       const vector<Blob<Dtype>*>& top);
31   virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
32       const vector<Blob<Dtype>*>& top) {}
33   // Data layers have no bottoms, so reshaping is trivial.
34   //数据读取层没有输入Bottom Blob,变形操作不是很重要
35   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
36       const vector<Blob<Dtype>*>& top) {}
37   //反向传播函数不需要做任何事情
38   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
39       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {}
40   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
41       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {}
43  protected:
44   TransformationParameter transform_param_;//数据预处理变换器参数
45   shared_ptr<DataTransformer<Dtype> > data_transformer_;//数据预处理变换器
46   bool output_labels_;//是否输出标签数据
47 };
49 //批量数据,用于存放数据读取层输出
50 template <typename Dtype>
51 class Batch {
52  public:
53   Blob<Dtype> data_, label_;//两个Blob分别用来存储图片数据和标签
54 };
56 //带预取功能的数据读取层,派生于BaseDataLayer和InternalThread
57 template <typename Dtype>
58 class BasePrefetchingDataLayer :
59     public BaseDataLayer<Dtype>, public InternalThread {
60  public:
61   explicit BasePrefetchingDataLayer(const LayerParameter& param);
62   // LayerSetUp: implements common data layer setup functionality, and calls
63   // DataLayerSetUp to do special data layer setup for individual layer types.
64   // This method may not be overridden.层设置
65   void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
66       const vector<Blob<Dtype>*>& top);
68   virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
69       const vector<Blob<Dtype>*>& top);//前向
70   virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
71       const vector<Blob<Dtype>*>& top);
73  protected:
74   virtual void InternalThreadEntry();//内部线程入口
75   virtual void load_batch(Batch<Dtype>* batch) = 0;//载入批量数据,纯虚函数
77   vector<shared_ptr<Batch<Dtype> > > prefetch_;//抓取
78   BlockingQueue<Batch<Dtype>*> prefetch_free_;//空闲Batch队列
79   BlockingQueue<Batch<Dtype>*> prefetch_full_;//已加载Batch队列
80   Batch<Dtype>* prefetch_current_;
82   Blob<Dtype> transformed_data_;//变换后的数据
83 };
85 }  // namespace caffe
87 #endif  // CAFFE_DATA_LAYERS_HPP_


  1 #include <boost/thread.hpp>
  2 #include <vector>
  4 #include "caffe/blob.hpp"
  5 #include "caffe/data_transformer.hpp"
  6 #include "caffe/internal_thread.hpp"
  7 #include "caffe/layer.hpp"
  8 #include "caffe/layers/base_data_layer.hpp"
  9 #include "caffe/proto/caffe.pb.h"
 10 #include "caffe/util/blocking_queue.hpp"
 12 namespace caffe {
 14 //构造函数。初始化Layer参数、数据变换器参数
 15 template <typename Dtype>
 16 BaseDataLayer<Dtype>::BaseDataLayer(const LayerParameter& param)
 17     : Layer<Dtype>(param),
 18       transform_param_(param.transform_param()) {
 19 }
 21 //BaseDataLayer层设置
 22 template <typename Dtype>
 23 void BaseDataLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
 24       const vector<Blob<Dtype>*>& top) {
 25   if (top.size() == 1) {//判断输出Blob数目。1则只输出data,2则输出data和label
 26     output_labels_ = false;
 27   } else {
 28     output_labels_ = true;
 29   }
 30   //初始化数据变换器对象
 31   data_transformer_.reset(
 32       new DataTransformer<Dtype>(transform_param_, this->phase_));
 33   data_transformer_->InitRand();
 34   // The subclasses should setup the size of bottom and top
 35   DataLayerSetUp(bottom, top);//子类负责设置Top Blob形状
 36 }
 38 //BasePrefetchingDataLayer构造函数
 39 template <typename Dtype>
 40 BasePrefetchingDataLayer<Dtype>::BasePrefetchingDataLayer(
 41     const LayerParameter& param)
 42     : BaseDataLayer<Dtype>(param),
 43       prefetch_(param.data_param().prefetch()),
 44       prefetch_free_(), prefetch_full_(), prefetch_current_() {
 45   for (int i = 0; i < prefetch_.size(); ++i) {
 46     prefetch_[i].reset(new Batch<Dtype>());
 47     prefetch_free_.push(prefetch_[i].get());//将batch对象都放入空闲队列
 48   }
 49 }
 51 //BasePrefetchingDataLayer层配置函数
 52 template <typename Dtype>
 53 void BasePrefetchingDataLayer<Dtype>::LayerSetUp(
 54     const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
 55   BaseDataLayer<Dtype>::LayerSetUp(bottom, top);
 57   // Before starting the prefetch thread, we make cpu_data and gpu_data
 58   // calls so that the prefetch thread does not accidentally make simultaneous
 59   // cudaMalloc calls when the main thread is running. In some GPUs this
 60   // seems to cause failures if we do not so.
 61   //在开启数据预取线程前,通过调用Blob相应函数先进行cudaMalloc,
 62   //避免多线程情况下同时进行cudaMalloc,会导致cuda API调用失败
 63   for (int i = 0; i < prefetch_.size(); ++i) {
 64     prefetch_[i]->data_.mutable_cpu_data();
 65     if (this->output_labels_) {
 66       prefetch_[i]->label_.mutable_cpu_data();
 67     }
 68   }
 69   //GPU
 70 #ifndef CPU_ONLY
 71   if (Caffe::mode() == Caffe::GPU) {
 72     for (int i = 0; i < prefetch_.size(); ++i) {
 73       prefetch_[i]->data_.mutable_gpu_data();
 74       if (this->output_labels_) {
 75         prefetch_[i]->label_.mutable_gpu_data();
 76       }
 77     }
 78   }
 79 #endif
 80   DLOG(INFO) << "Initializing prefetch";
 81   this->data_transformer_->InitRand();
 82   StartInternalThread();//开启内部预取线程
 83   DLOG(INFO) << "Prefetch initialized.";
 84 }
 86 //内部线程入口
 87 template <typename Dtype>
 88 void BasePrefetchingDataLayer<Dtype>::InternalThreadEntry() {
 89 //创建CUDA Stream,非阻塞类型
 90 #ifndef CPU_ONLY
 91   cudaStream_t stream;
 92   if (Caffe::mode() == Caffe::GPU) {
 93     CUDA_CHECK(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
 94   }
 95 #endif
 97   try {
 98     while (!must_stop()) {//循环载入批量数据
 99       Batch<Dtype>* batch = prefetch_free_.pop();//得到一个空闲batch
100       load_batch(batch);//载入批量数据
101 #ifndef CPU_ONLY
102       if (Caffe::mode() == Caffe::GPU) {
103         batch->>async_gpu_push(stream);
104         if (this->output_labels_) {
105           batch->>async_gpu_push(stream);
106         }
107         CUDA_CHECK(cudaStreamSynchronize(stream));//同步到GPU
108       }
109 #endif
110       prefetch_full_.push(batch);//加入到带负载的Batch队列中
111     }
112   } catch (boost::thread_interrupted&) {//捕获到异常则退出循环
113     // Interrupted exception is expected on shutdown
114   }
115 #ifndef CPU_ONLY
116   if (Caffe::mode() == Caffe::GPU) {
117     CUDA_CHECK(cudaStreamDestroy(stream));//销毁CUDA Stream
118   }
119 #endif
120 }
122 //前向传播函数
123 template <typename Dtype>
124 void BasePrefetchingDataLayer<Dtype>::Forward_cpu(
125     const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
126   if (prefetch_current_) {
127     prefetch_free_.push(prefetch_current_);
128   }
129   //从带负载的Batch中取出一个Batch对象
130   prefetch_current_ = prefetch_full_.pop("Waiting for data");
131   // Reshape to loaded data.输出Top Blob根据Batch形状进行变形
132   top[0]->ReshapeLike(prefetch_current_->data_);
133   top[0]->set_cpu_data(prefetch_current_->data_.mutable_cpu_data());
134   if (this->output_labels_) {//如果需要输出便签数据
135     // Reshape to loaded labels.
136     top[1]->ReshapeLike(prefetch_current_->label_);//Top Blob根据Batch中lable_形状进行变形
137     top[1]->set_cpu_data(prefetch_current_->label_.mutable_cpu_data());
138   }
139 }
141 #ifdef CPU_ONLY
142 STUB_GPU_FORWARD(BasePrefetchingDataLayer, Forward);
143 #endif
146 INSTANTIATE_CLASS(BasePrefetchingDataLayer);
148 }  // namespace caffe






