首先,Blob使用的小例子(通过运行结果即可知道相关功能):
#include <vector> #include <caffe/blob.hpp> #include <caffe/util/io.hpp>//磁盘读写 #include <iostream> using namespace std; using namespace caffe; int main() { Blob<float> a; cout<<"Size: "<<a.shape_string()<<endl; a.Reshape(1,2,3,4); cout<<"Size: "<<a.shape_string()<<endl; a.Reshape(1,1,1,4); cout<<"Size: "<<a.shape_string()<<endl; float* p=a.mutable_cpu_data(); float* q=a.mutable_cpu_diff(); for(int i=0;i<a.count();i++) { p[i]=i; q[i]=a.count()-1-i; } cout<<"L1: "<<a.asum_data()<<endl; cout<<"L2: "<<a.sumsq_data()<<endl; //a.Update(); //磁盘读写 BlobProto bp; a.ToProto(&bp,true);//a序列化,连带diff(默认不带) WriteProtoToBinaryFile(bp,"a.blob"); BlobProto bp2; ReadProtoFromBinaryFileOrDie("a.blob",&bp2); Blob<float> b; b.FromProto(bp2,true);//从序列化对象中克隆b(连同形状) b.Update(); cout<<"L1: "<<b.asum_data()<<endl; cout<<"L2: "<<b.sumsq_data()<<endl; return 0; }
编译:
export LD_LIBRARY_PATH=./build/lib/:$LD_LIBRARY_PATH g++ -o app ./bambootry/try.cpp -I ./include/ -D CPU_ONLY \ -I ./.build_release/src/ \ -L ./.build_release/lib/ -lcaffe -lglog -lboost_system
运行结果:
进入正题,代码注释
src/caffe/proto/caffe.proto中Blob部分
// Specifies the shape (dimensions) of a Blob.表示Blob每个维度的大小 message BlobShape { repeated int64 dim = 1 [packed = true];//packed表示这些值在内存中紧密排布没有空洞 } //该结构描述Blob在磁盘中序列化的形态 message BlobProto { optional BlobShape shape = 7; //可选,包括一个BlobShape对象 repeated float data = 5 [packed = true]; //包括若干浮点元素,存储数据或权重,元素数目由shape或(num,channels,height,weight)决定 repeated float diff = 6 [packed = true]; //包括若干浮点元素,用于存储增量信息,维度与data数组一致 repeated double double_data = 8 [packed = true]; //data 类型double repeated double double_diff = 9 [packed = true]; //diff 类型double // 4D dimensions -- deprecated. Use "shape" instead.可选,维度信息。新版本推荐使用shape optional int32 num = 1 [default = 0]; optional int32 channels = 2 [default = 0]; optional int32 height = 3 [default = 0]; optional int32 width = 4 [default = 0]; }
include/caffe/blob.hpp (其中使用了SyncedMemory类,具体有关该类的内容在其他文件中) 作为基本计算单元服务 Layer Net Solver等
1 #ifndef CAFFE_BLOB_HPP_ 2 #define CAFFE_BLOB_HPP_ 3 4 #include <algorithm> 5 #include <string> 6 #include <vector> 7 8 #include "caffe/common.hpp" 9 #include "caffe/proto/caffe.pb.h" //由protoc生成的头文件,声明了BlobProto、BlobShape等遵循caffe.proto协议的数据结构 10 #include "caffe/syncedmem.hpp" //CPU和GPU共享内存类,用于数据同步 11 12 const int kMaxBlobAxes = 32; //Blob最大维数 13 14 namespace caffe { 15 16 /** 17 * @brief A wrapper around SyncedMemory holders serving as the basic 18 * computational unit through which Layer%s, Net%s, and Solver%s 19 * interact. 20 * 21 * TODO(dox): more thorough description. 22 */ 23 template <typename Dtype> 24 class Blob { 25 public: 26 Blob() 27 : data_(), diff_(), count_(0), capacity_(0) {} //默认构造函数 28 29 /// @brief Deprecated; use <code>Blob(const vector<int>& shape)</code>. 30 explicit Blob(const int num, const int channels, const int height, 31 const int width); //显式构造函数,避免隐式数据类型转换 32 explicit Blob(const vector<int>& shape); 33 34 /// @brief Deprecated; use <code>Reshape(const vector<int>& shape)</code>. 35 // 变形函数,根据输入参数重新设置当前Blob形状,必要时重新分配内存 36 void Reshape(const int num, const int channels, const int height, 37 const int width); 38 /** 39 * @brief Change the dimensions of the blob, allocating new memory if 40 * necessary. 41 * 42 * This function can be called both to create an initial allocation 43 * of memory, and to adjust the dimensions of a top blob during Layer::Reshape 44 * or Layer::Forward. When changing the size of blob, memory will only be 45 * reallocated if sufficient memory does not already exist, and excess memory 46 * will never be freed. 47 * 48 * Note that reshaping an input blob and immediately calling Net::Backward is 49 * an error; either Net::Forward or Net::Reshape need to be called to 50 * propagate the new input shape to higher layers. 51 */ 52 void Reshape(const vector<int>& shape); 53 void Reshape(const BlobShape& shape); 54 void ReshapeLike(const Blob& other); 55 //得到Blob形状字符串,用于打印log,见Caffe运行log 56 //例如: Top Shape: 100 1 28 28 (78400) 57 inline string shape_string() const { 58 ostringstream stream; 59 for (int i = 0; i < shape_.size(); ++i) { 60 stream << shape_[i] << " "; 61 } 62 stream << "(" << count_ << ")"; 63 return stream.str(); 64 } 65 //返回blob形状 66 inline const vector<int>& shape() const { return shape_; } 67 /** 68 * @brief Returns the dimension of the index-th axis (or the negative index-th 69 * axis from the end, if index is negative). 70 * 71 * @param index the axis index, which may be negative as it will be 72 * "canonicalized" using CanonicalAxisIndex. 73 * Dies on out of range index. 74 */ 75 //返回某一维度尺寸 76 inline int shape(int index) const { 77 return shape_[CanonicalAxisIndex(index)]; 78 } 79 //返回维度数目 80 inline int num_axes() const { return shape_.size(); } 81 //返回Blob中元素总数 82 inline int count() const { return count_; } 83 84 /** 85 * @brief Compute the volume of a slice; i.e., the product of dimensions 86 * among a range of axes. 87 * 88 * @param start_axis The first axis to include in the slice. 89 * 90 * @param end_axis The first axis to exclude from the slice. 91 */ 92 //返回Blob中某几个维度子集的元素总数 93 inline int count(int start_axis, int end_axis) const { 94 CHECK_LE(start_axis, end_axis);//保证start_sxis<=end_axis 95 CHECK_GE(start_axis, 0);//保证start_sxis>=0 96 CHECK_GE(end_axis, 0);//保证end_axis>=0 97 CHECK_LE(start_axis, num_axes());//保证start_sxis小于总维度数目 98 CHECK_LE(end_axis, num_axes());//保证end_axis小于总维度数目 99 int count = 1; 100 for (int i = start_axis; i < end_axis; ++i) { 101 count *= shape(i); 102 } 103 return count; 104 } 105 /** 106 * @brief Compute the volume of a slice spanning from a particular first 107 * axis to the final axis. 108 * 109 * @param start_axis The first axis to include in the slice. 110 */ 111 //计算从某一个维度开始的元素总数 112 inline int count(int start_axis) const { 113 return count(start_axis, num_axes()); 114 } 115 116 /** 117 * @brief Returns the 'canonical' version of a (usually) user-specified axis, 118 * allowing for negative indexing (e.g., -1 for the last axis). 119 * 120 * @param axis_index the axis index. 121 * If 0 <= index < num_axes(), return index. 122 * If -num_axes <= index <= -1, return (num_axes() - (-index)), 123 * e.g., the last axis index (num_axes() - 1) if index == -1, 124 * the second to last if index == -2, etc. 125 * Dies on out of range index. 126 */ 127 //转换坐标轴索引 128 inline int CanonicalAxisIndex(int axis_index) const { 129 CHECK_GE(axis_index, -num_axes())//保证axis_index>=-num_axes() 130 << "axis " << axis_index << " out of range for " << num_axes() 131 << "-D Blob with shape " << shape_string(); 132 CHECK_LT(axis_index, num_axes())//保证axis_index<=num_axes() 133 << "axis " << axis_index << " out of range for " << num_axes() 134 << "-D Blob with shape " << shape_string(); 135 if (axis_index < 0) { 136 return axis_index + num_axes();//负索引号表示从后向前索引,例如-1为最后一个,即正索引号的N-1 137 } 138 return axis_index; 139 } 140 141 /// @brief Deprecated legacy shape accessor num: use shape(0) instead. 142 inline int num() const { return LegacyShape(0); } 143 /// @brief Deprecated legacy shape accessor channels: use shape(1) instead. 144 inline int channels() const { return LegacyShape(1); } 145 /// @brief Deprecated legacy shape accessor height: use shape(2) instead. 146 inline int height() const { return LegacyShape(2); } 147 /// @brief Deprecated legacy shape accessor width: use shape(3) instead. 148 inline int width() const { return LegacyShape(3); } 149 inline int LegacyShape(int index) const { 150 CHECK_LE(num_axes(), 4) 151 << "Cannot use legacy accessors on Blobs with > 4 axes."; 152 CHECK_LT(index, 4); 153 CHECK_GE(index, -4); 154 if (index >= num_axes() || index < -num_axes()) { 155 // Axis is out of range, but still in [0, 3] (or [-4, -1] for reverse 156 // indexing) -- this special case simulates the one-padding used to fill 157 // extraneous axes of legacy blobs. 158 return 1; 159 } 160 return shape(index); 161 } 162 //offset函数用于计算偏移量 163 inline int offset(const int n, const int c = 0, const int h = 0, 164 const int w = 0) const { 165 CHECK_GE(n, 0); 166 CHECK_LE(n, num()); 167 CHECK_GE(channels(), 0); 168 CHECK_LE(c, channels()); 169 CHECK_GE(height(), 0); 170 CHECK_LE(h, height()); 171 CHECK_GE(width(), 0); 172 CHECK_LE(w, width()); 173 return ((n * channels() + c) * height() + h) * width() + w; 174 } 175 176 inline int offset(const vector<int>& indices) const { 177 CHECK_LE(indices.size(), num_axes()); 178 int offset = 0; 179 for (int i = 0; i < num_axes(); ++i) { 180 offset *= shape(i); 181 if (indices.size() > i) { 182 CHECK_GE(indices[i], 0); 183 CHECK_LT(indices[i], shape(i)); 184 offset += indices[i]; 185 } 186 } 187 return offset; 188 } 189 /** 190 * @brief Copy from a source Blob. 191 * 192 * @param source the Blob to copy from 193 * @param copy_diff if false, copy the data; if true, copy the diff 194 * @param reshape if false, require this Blob to be pre-shaped to the shape 195 * of other (and die otherwise); if true, Reshape this Blob to other's 196 * shape if necessary 197 */ 198 //拷贝Blob到当前Blob 199 void CopyFrom(const Blob<Dtype>& source, bool copy_diff = false, 200 bool reshape = false); 201 //下面为一系列存取器 202 inline Dtype data_at(const int n, const int c, const int h, 203 const int w) const { 204 return cpu_data()[offset(n, c, h, w)]; 205 } 206 207 inline Dtype diff_at(const int n, const int c, const int h, 208 const int w) const { 209 return cpu_diff()[offset(n, c, h, w)]; 210 } 211 212 inline Dtype data_at(const vector<int>& index) const { 213 return cpu_data()[offset(index)]; 214 } 215 216 inline Dtype diff_at(const vector<int>& index) const { 217 return cpu_diff()[offset(index)]; 218 } 219 220 inline const shared_ptr<SyncedMemory>& data() const { 221 CHECK(data_); 222 return data_; 223 } 224 225 inline const shared_ptr<SyncedMemory>& diff() const { 226 CHECK(diff_); 227 return diff_; 228 } 229 230 const Dtype* cpu_data() const;//只读访问cpu data 231 void set_cpu_data(Dtype* data);//设置cpu data 232 const int* gpu_shape() const; 233 const Dtype* gpu_data() const;//只读访问gpu data 234 void set_gpu_data(Dtype* data);//设置gpu data 235 const Dtype* cpu_diff() const;//只读访问cpu diff 236 const Dtype* gpu_diff() const;//只读访问gpu diff 237 Dtype* mutable_cpu_data();//读写访问 238 Dtype* mutable_gpu_data();//读写访问 239 Dtype* mutable_cpu_diff();//读写访问 240 Dtype* mutable_gpu_diff();//读写访问 241 void Update();//Blob更新运算 242 void FromProto(const BlobProto& proto, bool reshape = true);//反序列化函数,从BlobProto中恢复一个Blob对象 243 void ToProto(BlobProto* proto, bool write_diff = false) const;//序列化函数,将内存中的Blob对象保存到BlobProto中 244 245 /// @brief Compute the sum of absolute values (L1 norm) of the data. 246 Dtype asum_data() const;//计算data的L1范数,即绝对值求和 247 /// @brief Compute the sum of absolute values (L1 norm) of the diff. 248 Dtype asum_diff() const;//计算diff的L1范数,即绝对值求和 249 /// @brief Compute the sum of squares (L2 norm squared) of the data. 250 Dtype sumsq_data() const;//计算data的平方和,用于L2范数 251 /// @brief Compute the sum of squares (L2 norm squared) of the diff. 252 Dtype sumsq_diff() const;//计算diff的平方和,用于L2范数 253 254 /// @brief Scale the blob data by a constant factor. 255 void scale_data(Dtype scale_factor);//data乘一个标量 256 /// @brief Scale the blob diff by a constant factor. 257 void scale_diff(Dtype scale_factor);//diff乘一个标量 258 259 /** 260 * @brief Set the data_ shared_ptr to point to the SyncedMemory holding the 261 * data_ of Blob other -- useful in Layer%s which simply perform a copy 262 * in their Forward pass. 263 * 264 * This deallocates the SyncedMemory holding this Blob's data_, as 265 * shared_ptr calls its destructor when reset with the "=" operator. 266 */ 267 void ShareData(const Blob& other); 268 /** 269 * @brief Set the diff_ shared_ptr to point to the SyncedMemory holding the 270 * diff_ of Blob other -- useful in Layer%s which simply perform a copy 271 * in their Forward pass. 272 * 273 * This deallocates the SyncedMemory holding this Blob's diff_, as 274 * shared_ptr calls its destructor when reset with the "=" operator. 275 */ 276 void ShareDiff(const Blob& other);//共享另一个Blob的diff_ 277 278 bool ShapeEquals(const BlobProto& other); 279 280 protected: 281 shared_ptr<SyncedMemory> data_;//存放指向data的指针 282 shared_ptr<SyncedMemory> diff_;//存放指向diff的指针 283 shared_ptr<SyncedMemory> shape_data_; 284 vector<int> shape_; 285 int count_;//存放有效元素数目信息 286 int capacity_;//存放Blob容器的容量信息 287 288 DISABLE_COPY_AND_ASSIGN(Blob);//禁止拷贝构造函数、赋值运算符重载 289 }; // class Blob 290 291 } // namespace caffe 292 293 #endif // CAFFE_BLOB_HPP_
include/caffe/syncedmem.hpp
1 #ifndef CAFFE_SYNCEDMEM_HPP_ 2 #define CAFFE_SYNCEDMEM_HPP_ 3 4 #include <cstdlib> 5 6 #ifdef USE_MKL 7 #include "mkl.h" 8 #endif 9 10 #include "caffe/common.hpp" 11 12 namespace caffe { 13 14 // If CUDA is available and in GPU mode, host memory will be allocated pinned, 15 // using cudaMallocHost. It avoids dynamic pinning for transfers (DMA). 16 // The improvement in performance seems negligible in the single GPU case, 17 // but might be more significant for parallel training. Most importantly, 18 // it improved stability for large models on many GPUs. 19 // 如果是GPU模式且CUDA使能,则主机内存会以页锁定内存方式分配(使用cudaMallocHost()函数) 20 // 对单GPU提升并不明显,对多GPU提升非常明显 21 inline void CaffeMallocHost(void** ptr, size_t size, bool* use_cuda) { 22 #ifndef CPU_ONLY 23 if (Caffe::mode() == Caffe::GPU) { 24 CUDA_CHECK(cudaMallocHost(ptr, size)); 25 *use_cuda = true; 26 return; 27 } 28 #endif 29 #ifdef USE_MKL 30 *ptr = mkl_malloc(size ? size:1, 64); 31 #else 32 *ptr = malloc(size); 33 #endif 34 *use_cuda = false; 35 CHECK(*ptr) << "host allocation of size " << size << " failed"; 36 } 37 // 与CaffeMallocHost对应 38 inline void CaffeFreeHost(void* ptr, bool use_cuda) { 39 #ifndef CPU_ONLY 40 if (use_cuda) { 41 CUDA_CHECK(cudaFreeHost(ptr)); 42 return; 43 } 44 #endif 45 #ifdef USE_MKL 46 mkl_free(ptr); 47 #else 48 free(ptr); 49 #endif 50 } 51 52 53 /** 54 * @brief Manages memory allocation and synchronization between the host (CPU) 55 * and device (GPU). 56 * 57 * TODO(dox): more thorough description. 58 */ 59 // 该类负责存储分配以及主机设备间同步 60 class SyncedMemory { 61 public: 62 SyncedMemory(); 63 explicit SyncedMemory(size_t size); 64 ~SyncedMemory(); 65 const void* cpu_data(); //只读 66 void set_cpu_data(void* data);//设置 67 const void* gpu_data(); //只读 68 void set_gpu_data(void* data);//设置 69 void* mutable_cpu_data(); //读写 70 void* mutable_gpu_data(); //读写 71 //状态机变量,4种状态:未初始化 CPU数据有效 GPU数据有效 已同步 72 enum SyncedHead { UNINITIALIZED, HEAD_AT_CPU, HEAD_AT_GPU, SYNCED }; 73 SyncedHead head() { return head_; }//获取当前状态机变量值 74 size_t size() { return size_; } //获取当前存储空间尺寸 75 76 #ifndef CPU_ONLY 77 void async_gpu_push(const cudaStream_t& stream); 78 #endif 79 80 private: 81 void check_device(); 82 83 void to_cpu(); //数据同步到CPU 84 void to_gpu(); //数据同步到GPU 85 void* cpu_ptr_; //位于CPU的数据指针 86 void* gpu_ptr_; //位于GPU的数据指针 87 size_t size_; //存储空间大小 88 SyncedHead head_; //状态机变量 89 bool own_cpu_data_;//标志是否拥有CPU数据权(否,即从别的对象共享) 90 bool cpu_malloc_use_cuda_; 91 bool own_gpu_data_;//标志是否拥有GPU数据权 92 int device_; 93 94 DISABLE_COPY_AND_ASSIGN(SyncedMemory); 95 }; // class SyncedMemory 96 97 } // namespace caffe 98 99 #endif // CAFFE_SYNCEDMEM_HPP_
src/caffe/blob.cpp
1 #include <climits> 2 #include <vector> 3 4 #include "caffe/blob.hpp" 5 #include "caffe/common.hpp" 6 #include "caffe/syncedmem.hpp" 7 #include "caffe/util/math_functions.hpp" 8 9 namespace caffe { 10 //变维函数,将(num,channels,height,width)转为vector<int>并调用重载函数 11 template <typename Dtype> 12 void Blob<Dtype>::Reshape(const int num, const int channels, const int height, 13 const int width) { 14 vector<int> shape(4); 15 shape[0] = num; 16 shape[1] = channels; 17 shape[2] = height; 18 shape[3] = width; 19 Reshape(shape); 20 } 21 //真正的变维函数 22 template <typename Dtype> 23 void Blob<Dtype>::Reshape(const vector<int>& shape) { 24 CHECK_LE(shape.size(), kMaxBlobAxes);//保证vector维度小于kMaxBlobAxes 25 count_ = 1;//用于计算元素总数=num*channels*height*width 26 shape_.resize(shape.size());//成员变量维度重置 27 if (!shape_data_ || shape_data_->size() < shape.size() * sizeof(int)) { 28 shape_data_.reset(new SyncedMemory(shape.size() * sizeof(int))); 29 } 30 int* shape_data = static_cast<int*>(shape_data_->mutable_cpu_data()); 31 for (int i = 0; i < shape.size(); ++i) { 32 CHECK_GE(shape[i], 0); 33 if (count_ != 0) {//保证count不溢出 34 CHECK_LE(shape[i], INT_MAX / count_) << "blob size exceeds INT_MAX"; 35 } 36 count_ *= shape[i]; 37 shape_[i] = shape[i];//成员变量赋值 38 shape_data[i] = shape[i]; 39 } 40 if (count_ > capacity_) {//如果新的count_大于当前分配的空间容量 41 capacity_ = count_; //扩容,重新分配data_和diff_空间 42 data_.reset(new SyncedMemory(capacity_ * sizeof(Dtype))); 43 diff_.reset(new SyncedMemory(capacity_ * sizeof(Dtype))); 44 } 45 } 46 47 template <typename Dtype> 48 void Blob<Dtype>::Reshape(const BlobShape& shape) { 49 CHECK_LE(shape.dim_size(), kMaxBlobAxes); 50 vector<int> shape_vec(shape.dim_size()); 51 for (int i = 0; i < shape.dim_size(); ++i) { 52 shape_vec[i] = shape.dim(i); 53 } 54 Reshape(shape_vec); 55 } 56 57 template <typename Dtype> 58 void Blob<Dtype>::ReshapeLike(const Blob<Dtype>& other) { 59 Reshape(other.shape()); 60 } 61 //构造函数 62 template <typename Dtype> 63 Blob<Dtype>::Blob(const int num, const int channels, const int height, 64 const int width) 65 // capacity_ must be initialized before calling Reshape 66 : capacity_(0) { 67 //调用Reshape之前必须初始化capacity,否则会导致不可预期的后果 68 Reshape(num, channels, height, width); 69 } 70 71 template <typename Dtype> 72 Blob<Dtype>::Blob(const vector<int>& shape) 73 // capacity_ must be initialized before calling Reshape 74 : capacity_(0) { 75 Reshape(shape); 76 } 77 78 template <typename Dtype> 79 const int* Blob<Dtype>::gpu_shape() const { 80 CHECK(shape_data_); 81 return (const int*)shape_data_->gpu_data(); 82 } 83 //只读获得cpu data指针 84 template <typename Dtype> 85 const Dtype* Blob<Dtype>::cpu_data() const { 86 CHECK(data_);//保证data_不为空 87 return (const Dtype*)data_->cpu_data(); 88 } 89 //修改cpu data指针 90 template <typename Dtype> 91 void Blob<Dtype>::set_cpu_data(Dtype* data) { 92 CHECK(data); 93 // Make sure CPU and GPU sizes remain equal 94 size_t size = count_ * sizeof(Dtype); 95 if (data_->size() != size) { 96 data_.reset(new SyncedMemory(size)); 97 diff_.reset(new SyncedMemory(size)); 98 } 99 data_->set_cpu_data(data);//设置成员变量值为传入参数值 100 } 101 //只读获得gpu data指针 102 template <typename Dtype> 103 const Dtype* Blob<Dtype>::gpu_data() const { 104 CHECK(data_);//保证不为空 105 return (const Dtype*)data_->gpu_data(); 106 } 107 //修改gpu data指针 108 template <typename Dtype> 109 void Blob<Dtype>::set_gpu_data(Dtype* data) { 110 CHECK(data); 111 // Make sure CPU and GPU sizes remain equal 112 size_t size = count_ * sizeof(Dtype); 113 if (data_->size() != size) { 114 data_.reset(new SyncedMemory(size)); 115 diff_.reset(new SyncedMemory(size)); 116 } 117 data_->set_gpu_data(data); 118 } 119 //只读获得cpu diff指针 120 template <typename Dtype> 121 const Dtype* Blob<Dtype>::cpu_diff() const { 122 CHECK(diff_); 123 return (const Dtype*)diff_->cpu_data(); 124 } 125 //只读获得gpu diff指针 126 template <typename Dtype> 127 const Dtype* Blob<Dtype>::gpu_diff() const { 128 CHECK(diff_); 129 return (const Dtype*)diff_->gpu_data(); 130 } 131 //读写访问cpu_data 132 template <typename Dtype> 133 Dtype* Blob<Dtype>::mutable_cpu_data() { 134 CHECK(data_); 135 return static_cast<Dtype*>(data_->mutable_cpu_data()); 136 } 137 //读写访问gpu_data 138 template <typename Dtype> 139 Dtype* Blob<Dtype>::mutable_gpu_data() { 140 CHECK(data_); 141 return static_cast<Dtype*>(data_->mutable_gpu_data()); 142 } 143 //读写访问cpu_diff 144 template <typename Dtype> 145 Dtype* Blob<Dtype>::mutable_cpu_diff() { 146 CHECK(diff_); 147 return static_cast<Dtype*>(diff_->mutable_cpu_data()); 148 } 149 //读写访问gpu_diff 150 template <typename Dtype> 151 Dtype* Blob<Dtype>::mutable_gpu_diff() { 152 CHECK(diff_); 153 return static_cast<Dtype*>(diff_->mutable_gpu_data()); 154 } 155 //共享另一个Blob的data指针 156 template <typename Dtype> 157 void Blob<Dtype>::ShareData(const Blob& other) { 158 CHECK_EQ(count_, other.count()); 159 data_ = other.data(); 160 } 161 //共享另一个Blob的diff指针 162 template <typename Dtype> 163 void Blob<Dtype>::ShareDiff(const Blob& other) { 164 CHECK_EQ(count_, other.count()); 165 diff_ = other.diff(); 166 } 167 168 // The "update" method is used for parameter blobs in a Net, which are stored 169 // as Blob<float> or Blob<double> -- hence we do not define it for 170 // Blob<int> or Blob<unsigned int>. 171 //更新函数用于网络参数Blob的更新。其中int和unsigned int类型的处理并未实现 172 //实现的类型为Blob<float>和Blob<double> 173 template <> void Blob<unsigned int>::Update() { NOT_IMPLEMENTED; } 174 template <> void Blob<int>::Update() { NOT_IMPLEMENTED; } 175 176 template <typename Dtype> 177 void Blob<Dtype>::Update() { 178 // We will perform update based on where the data is located. 179 //data在哪儿就在哪儿更新 180 switch (data_->head()) { 181 case SyncedMemory::HEAD_AT_CPU: 182 // perform computation on CPU 183 //执行CPU运算,data[i]=data[i]-diff[i],i=0~count-1 184 caffe_axpy<Dtype>(count_, Dtype(-1), 185 static_cast<const Dtype*>(diff_->cpu_data()), 186 static_cast<Dtype*>(data_->mutable_cpu_data())); 187 break; 188 case SyncedMemory::HEAD_AT_GPU://data位于GPU或CPU/GPU已同步 189 case SyncedMemory::SYNCED: 190 #ifndef CPU_ONLY 191 // perform computation on GPU 192 //执行GPU运算,data[i]=data[i]-diff[i],i=0~count-1 193 caffe_gpu_axpy<Dtype>(count_, Dtype(-1), 194 static_cast<const Dtype*>(diff_->gpu_data()), 195 static_cast<Dtype*>(data_->mutable_gpu_data())); 196 #else 197 NO_GPU;//编译时如果打开CPU_ONLY,则GPU禁用 198 #endif 199 break; 200 default: 201 LOG(FATAL) << "Syncedmem not initialized."; 202 } 203 } 204 205 template <> unsigned int Blob<unsigned int>::asum_data() const { 206 NOT_IMPLEMENTED; 207 return 0; 208 } 209 210 template <> int Blob<int>::asum_data() const { 211 NOT_IMPLEMENTED; 212 return 0; 213 } 214 //计算L1范数,即绝对值和 215 template <typename Dtype> 216 Dtype Blob<Dtype>::asum_data() const { 217 if (!data_) { return 0; } 218 switch (data_->head()) { 219 case SyncedMemory::HEAD_AT_CPU: 220 return caffe_cpu_asum(count_, cpu_data());//执行CPU上的asum计算 221 case SyncedMemory::HEAD_AT_GPU: 222 case SyncedMemory::SYNCED: 223 #ifndef CPU_ONLY 224 { 225 Dtype asum; 226 caffe_gpu_asum(count_, gpu_data(), &asum);//执行GPU上的asum计算 227 return asum; 228 } 229 #else 230 NO_GPU; 231 #endif 232 case SyncedMemory::UNINITIALIZED: 233 return 0; 234 default: 235 LOG(FATAL) << "Unknown SyncedMemory head state: " << data_->head(); 236 } 237 return 0; 238 } 239 240 template <> unsigned int Blob<unsigned int>::asum_diff() const { 241 NOT_IMPLEMENTED; 242 return 0; 243 } 244 245 template <> int Blob<int>::asum_diff() const { 246 NOT_IMPLEMENTED; 247 return 0; 248 } 249 250 template <typename Dtype> 251 Dtype Blob<Dtype>::asum_diff() const { 252 if (!diff_) { return 0; } 253 switch (diff_->head()) { 254 case SyncedMemory::HEAD_AT_CPU: 255 return caffe_cpu_asum(count_, cpu_diff()); 256 case SyncedMemory::HEAD_AT_GPU: 257 case SyncedMemory::SYNCED: 258 #ifndef CPU_ONLY 259 { 260 Dtype asum; 261 caffe_gpu_asum(count_, gpu_diff(), &asum); 262 return asum; 263 } 264 #else 265 NO_GPU; 266 #endif 267 case SyncedMemory::UNINITIALIZED: 268 return 0; 269 default: 270 LOG(FATAL) << "Unknown SyncedMemory head state: " << diff_->head(); 271 } 272 return 0; 273 } 274 275 template <> unsigned int Blob<unsigned int>::sumsq_data() const { 276 NOT_IMPLEMENTED; 277 return 0; 278 } 279 280 template <> int Blob<int>::sumsq_data() const { 281 NOT_IMPLEMENTED; 282 return 0; 283 } 284 //用于L2范数,平方和 285 template <typename Dtype> 286 Dtype Blob<Dtype>::sumsq_data() const { 287 Dtype sumsq; 288 const Dtype* data; 289 if (!data_) { return 0; } 290 switch (data_->head()) { 291 case SyncedMemory::HEAD_AT_CPU: 292 data = cpu_data(); 293 sumsq = caffe_cpu_dot(count_, data, data); 294 break; 295 case SyncedMemory::HEAD_AT_GPU: 296 case SyncedMemory::SYNCED: 297 #ifndef CPU_ONLY 298 data = gpu_data(); 299 caffe_gpu_dot(count_, data, data, &sumsq); 300 #else 301 NO_GPU; 302 #endif 303 break; 304 case SyncedMemory::UNINITIALIZED: 305 return 0; 306 default: 307 LOG(FATAL) << "Unknown SyncedMemory head state: " << data_->head(); 308 } 309 return sumsq; 310 } 311 312 template <> unsigned int Blob<unsigned int>::sumsq_diff() const { 313 NOT_IMPLEMENTED; 314 return 0; 315 } 316 317 template <> int Blob<int>::sumsq_diff() const { 318 NOT_IMPLEMENTED; 319 return 0; 320 } 321 322 template <typename Dtype> 323 Dtype Blob<Dtype>::sumsq_diff() const { 324 Dtype sumsq; 325 const Dtype* diff; 326 if (!diff_) { return 0; } 327 switch (diff_->head()) { 328 case SyncedMemory::HEAD_AT_CPU: 329 diff = cpu_diff(); 330 sumsq = caffe_cpu_dot(count_, diff, diff); 331 break; 332 case SyncedMemory::HEAD_AT_GPU: 333 case SyncedMemory::SYNCED: 334 #ifndef CPU_ONLY 335 diff = gpu_diff(); 336 caffe_gpu_dot(count_, diff, diff, &sumsq); 337 break; 338 #else 339 NO_GPU; 340 #endif 341 case SyncedMemory::UNINITIALIZED: 342 return 0; 343 default: 344 LOG(FATAL) << "Unknown SyncedMemory head state: " << data_->head(); 345 } 346 return sumsq; 347 } 348 349 template <> void Blob<unsigned int>::scale_data(unsigned int scale_factor) { 350 NOT_IMPLEMENTED; 351 } 352 353 template <> void Blob<int>::scale_data(int scale_factor) { 354 NOT_IMPLEMENTED; 355 } 356 //对data_进行幅度缩放 357 template <typename Dtype> 358 void Blob<Dtype>::scale_data(Dtype scale_factor) { 359 Dtype* data; 360 if (!data_) { return; } 361 switch (data_->head()) { 362 case SyncedMemory::HEAD_AT_CPU: 363 data = mutable_cpu_data(); 364 caffe_scal(count_, scale_factor, data); 365 return; 366 case SyncedMemory::HEAD_AT_GPU: 367 case SyncedMemory::SYNCED: 368 #ifndef CPU_ONLY 369 data = mutable_gpu_data(); 370 caffe_gpu_scal(count_, scale_factor, data); 371 return; 372 #else 373 NO_GPU; 374 #endif 375 case SyncedMemory::UNINITIALIZED: 376 return; 377 default: 378 LOG(FATAL) << "Unknown SyncedMemory head state: " << data_->head(); 379 } 380 } 381 382 template <> void Blob<unsigned int>::scale_diff(unsigned int scale_factor) { 383 NOT_IMPLEMENTED; 384 } 385 386 template <> void Blob<int>::scale_diff(int scale_factor) { 387 NOT_IMPLEMENTED; 388 } 389 390 template <typename Dtype> 391 void Blob<Dtype>::scale_diff(Dtype scale_factor) { 392 Dtype* diff; 393 if (!diff_) { return; } 394 switch (diff_->head()) { 395 case SyncedMemory::HEAD_AT_CPU: 396 diff = mutable_cpu_diff(); 397 caffe_scal(count_, scale_factor, diff); 398 return; 399 case SyncedMemory::HEAD_AT_GPU: 400 case SyncedMemory::SYNCED: 401 #ifndef CPU_ONLY 402 diff = mutable_gpu_diff(); 403 caffe_gpu_scal(count_, scale_factor, diff); 404 return; 405 #else 406 NO_GPU; 407 #endif 408 case SyncedMemory::UNINITIALIZED: 409 return; 410 default: 411 LOG(FATAL) << "Unknown SyncedMemory head state: " << diff_->head(); 412 } 413 } 414 //判断形状是否相同 415 template <typename Dtype> 416 bool Blob<Dtype>::ShapeEquals(const BlobProto& other) { 417 if (other.has_num() || other.has_channels() || 418 other.has_height() || other.has_width()) { 419 // Using deprecated 4D Blob dimensions --每个维度对比 420 // shape is (num, channels, height, width). 421 // Note: we do not use the normal Blob::num(), Blob::channels(), etc. 422 // methods as these index from the beginning of the blob shape, where legacy 423 // parameter blobs were indexed from the end of the blob shape (e.g., bias 424 // Blob shape (1 x 1 x 1 x N), IP layer weight Blob shape (1 x 1 x M x N)). 425 return shape_.size() <= 4 && 426 LegacyShape(-4) == other.num() && 427 LegacyShape(-3) == other.channels() && 428 LegacyShape(-2) == other.height() && 429 LegacyShape(-1) == other.width(); 430 } 431 //直接对比 432 vector<int> other_shape(other.shape().dim_size()); 433 for (int i = 0; i < other.shape().dim_size(); ++i) { 434 other_shape[i] = other.shape().dim(i); 435 } 436 return shape_ == other_shape; 437 } 438 //从另一个Blob拷贝data(可选diff),必要时进行变维 439 template <typename Dtype> 440 void Blob<Dtype>::CopyFrom(const Blob& source, bool copy_diff, bool reshape) { 441 if (source.count() != count_ || source.shape() != shape_) { 442 if (reshape) { 443 ReshapeLike(source); 444 } else { 445 LOG(FATAL) << "Trying to copy blobs of different sizes."; 446 } 447 } 448 switch (Caffe::mode()) { 449 case Caffe::GPU: 450 if (copy_diff) { 451 caffe_copy(count_, source.gpu_diff(), 452 static_cast<Dtype*>(diff_->mutable_gpu_data())); 453 } else { 454 caffe_copy(count_, source.gpu_data(), 455 static_cast<Dtype*>(data_->mutable_gpu_data())); 456 } 457 break; 458 case Caffe::CPU: 459 if (copy_diff) { 460 caffe_copy(count_, source.cpu_diff(), 461 static_cast<Dtype*>(diff_->mutable_cpu_data())); 462 } else { 463 caffe_copy(count_, source.cpu_data(), 464 static_cast<Dtype*>(data_->mutable_cpu_data())); 465 } 466 break; 467 default: 468 LOG(FATAL) << "Unknown caffe mode."; 469 } 470 } 471 //从BlobProto中加载一个Blob 472 template <typename Dtype> 473 void Blob<Dtype>::FromProto(const BlobProto& proto, bool reshape) { 474 if (reshape) {//获取维度信息 475 vector<int> shape; 476 if (proto.has_num() || proto.has_channels() || 477 proto.has_height() || proto.has_width()) { 478 // Using deprecated 4D Blob dimensions -- 479 // shape is (num, channels, height, width). 480 shape.resize(4); 481 shape[0] = proto.num(); 482 shape[1] = proto.channels(); 483 shape[2] = proto.height(); 484 shape[3] = proto.width(); 485 } else { 486 shape.resize(proto.shape().dim_size()); 487 for (int i = 0; i < proto.shape().dim_size(); ++i) { 488 shape[i] = proto.shape().dim(i); 489 } 490 } 491 Reshape(shape);//变维 492 } else { 493 CHECK(ShapeEquals(proto)) << "shape mismatch (reshape not set)"; 494 } 495 // copy data 加载数据 496 Dtype* data_vec = mutable_cpu_data(); 497 if (proto.double_data_size() > 0) {//若之前保存的是double类型的data 498 CHECK_EQ(count_, proto.double_data_size()); 499 for (int i = 0; i < count_; ++i) { 500 data_vec[i] = proto.double_data(i);//加载double data 501 } 502 } else { 503 CHECK_EQ(count_, proto.data_size()); 504 for (int i = 0; i < count_; ++i) { 505 data_vec[i] = proto.data(i);//否则加载float data 506 } 507 } 508 if (proto.double_diff_size() > 0) {//若之前保存的是double类型的diff 509 CHECK_EQ(count_, proto.double_diff_size()); 510 Dtype* diff_vec = mutable_cpu_diff(); 511 for (int i = 0; i < count_; ++i) { 512 diff_vec[i] = proto.double_diff(i);//加载double diff 513 } 514 } else if (proto.diff_size() > 0) { 515 CHECK_EQ(count_, proto.diff_size()); 516 Dtype* diff_vec = mutable_cpu_diff(); 517 for (int i = 0; i < count_; ++i) { 518 diff_vec[i] = proto.diff(i);//否则加载float diff 519 } 520 } 521 } 522 523 template <> 524 void Blob<double>::ToProto(BlobProto* proto, bool write_diff) const { 525 proto->clear_shape(); 526 for (int i = 0; i < shape_.size(); ++i) { 527 proto->mutable_shape()->add_dim(shape_[i]); 528 } 529 proto->clear_double_data(); 530 proto->clear_double_diff(); 531 const double* data_vec = cpu_data(); 532 for (int i = 0; i < count_; ++i) { 533 proto->add_double_data(data_vec[i]); 534 } 535 if (write_diff) { 536 const double* diff_vec = cpu_diff(); 537 for (int i = 0; i < count_; ++i) { 538 proto->add_double_diff(diff_vec[i]); 539 } 540 } 541 } 542 //将Blob中的data(可选diff)导出到BlobProto结构体,便于存储到磁盘文件中 543 template <> 544 void Blob<float>::ToProto(BlobProto* proto, bool write_diff) const { 545 proto->clear_shape();//重置proto维度,保证与Blob相同 546 for (int i = 0; i < shape_.size(); ++i) { 547 proto->mutable_shape()->add_dim(shape_[i]); 548 } 549 proto->clear_data();//清楚data 550 proto->clear_diff();//清楚diff 551 const float* data_vec = cpu_data();//将data导出到proto 552 for (int i = 0; i < count_; ++i) { 553 proto->add_data(data_vec[i]); 554 } 555 if (write_diff) {//如果要求导出diff 556 const float* diff_vec = cpu_diff();//导出diff到proto 557 for (int i = 0; i < count_; ++i) { 558 proto->add_diff(diff_vec[i]); 559 } 560 } 561 } 562 563 INSTANTIATE_CLASS(Blob);//实例化Blob类模板(float,double ) 564 template class Blob<int>; 565 template class Blob<unsigned int>; 566 567 } // namespace caffe
初学者,慢慢看,待更新……
摘抄参考赵永科《深度学习 21天实战caffe》