解析模型参数
解析模型参数
Caffe Model
caffemodel是使用protobuf进行保存的。
1. 有prototxt
依赖于caffe。
解析的时候如果有prototxt,caffemodel两个文件,可以使用caffe提供的接口load网络,然后再解析网络。
weigth是net.params[param_name][0].data
,bias需要判断net.params[param_name]
的长度,如果大于1则有bias,是net.params[param_name][1].data
,否则没有bias参数。
#!/usr/bin/env python
import caffe
import numpy as np
# 使输出的参数完全显示
# 若没有这一句,因为参数太多,中间会以省略号“……”的形式代替
np.set_printoptions(threshold='nan')
# deploy文件
MODEL_FILE = 'caffe_deploy.prototxt'
# 预先训练好的caffe模型
PRETRAIN_FILE = 'caffe_iter_10000.caffemodel'
# 保存参数的文件
params_txt = 'params.txt'
pf = open(params_txt, 'w')
# 让caffe以测试模式读取网络参数
net = caffe.Net(MODEL_FILE, PRETRAIN_FILE, caffe.TEST)
# 遍历每一层
for param_name in net.params.keys():
# 权重参数
weight = net.params[param_name][0].data
# 偏置参数
if len(net.params[param_name]) == 2:
bias = net.params[param_name][1].data
# 该层在prototxt文件中对应“top”的名称
pf.write(param_name)
pf.write('\n')
# 写权重参数
pf.write('\n' + param_name + '_weight:\n\n')
# 权重参数是多维数组,为了方便输出,转为单列数组
weight.shape = (-1, 1)
for w in weight:
pf.write('%ff, ' % w)
# 写偏置参数
if len(net.params[param_name]) == 2:
pf.write('\n\n' + param_name + '_bias:\n\n')
# 偏置参数是多维数组,为了方便输出,转为单列数组
bias.shape = (-1, 1)
for b in bias:
pf.write('%ff, ' % b)
pf.write('\n\n')
pf.close
参考:
http://blog.csdn.net/u011762313/article/details/49851795
https://www.cnblogs.com/denny402/p/5686257.html
2. 没有prototxt
依赖于caffe.proto.caffe_pb2。
如果不存在prototxt文件,那么就需要直接调用caffe_pb2解析caffemodel文件,成员需要参照caffe.proto文件,在caffe工程的src/caffe/proto下面,使用protobuf语法,利用protoc编译生成caffe.pb.cc, caffe.pb.h两个文件供C++调用。
weight参数位置:
NetParameter -> LayerParameter -> BlobProto -> data
model -> layers -> blobs[0] -> data
bias参数位置(如果有):
NetParameter -> LayerParameter -> BlobProto -> data
model -> layers -> blobs[1] -> data
如下是caffe.proto文件定义的参数:
NetParameter主要包含name, input, input_shape, layer。
LayerParameter主要包含name, type, bottom, top, blobs, param(学习率), transform_param,convolution_param等。
message NetParameter {
optional string name = 1; // consider giving the network a name
// DEPRECATED. See InputParameter. The input blobs to the network.
repeated string input = 3;
// DEPRECATED. See InputParameter. The shape of the input blobs.
repeated BlobShape input_shape = 8;
// 4D input dimensions -- deprecated. Use "input_shape" instead.
// If specified, for each input blob there should be four
// values specifying the num, channels, height and width of the input blob.
// Thus, there should be a total of (4 * #input) numbers.
repeated int32 input_dim = 4;
// Whether the network will force every layer to carry out backward operation.
// If set False, then whether to carry out backward is determined
// automatically according to the net structure and learning rates.
optional bool force_backward = 5 [default = false];
// The current "state" of the network, including the phase, level, and stage.
// Some layers may be included/excluded depending on this state and the states
// specified in the layers' include and exclude fields.
optional NetState state = 6;
// Print debugging information about results while running Net::Forward,
// Net::Backward, and Net::Update.
optional bool debug_info = 7 [default = false];
// The layers that make up the net. Each of their configurations, including
// connectivity and behavior, is specified as a LayerParameter.
repeated LayerParameter layer = 100; // ID 100 so layers are printed last.
// DEPRECATED: use 'layer' instead.
repeated V1LayerParameter layers = 2;
}
message LayerParameter {
optional string name = 1; // the layer name
optional string type = 2; // the layer type
repeated string bottom = 3; // the name of each bottom blob
repeated string top = 4; // the name of each top blob
// The train / test phase for computation.
optional Phase phase = 10;
// The amount of weight to assign each top blob in the objective.
// Each layer assigns a default value, usually of either 0 or 1,
// to each top blob.
repeated float loss_weight = 5;
// Specifies training parameters (multipliers on global learning constants,
// and the name and other settings used for weight sharing).
repeated ParamSpec param = 6;
// The blobs containing the numeric parameters of the layer.
repeated BlobProto blobs = 7;
// Specifies whether to backpropagate to each bottom. If unspecified,
// Caffe will automatically infer whether each input needs backpropagation
// to compute parameter gradients. If set to true for some inputs,
// backpropagation to those inputs is forced; if set false for some inputs,
// backpropagation to those inputs is skipped.
//
// The size must be either 0 or equal to the number of bottoms.
repeated bool propagate_down = 11;
// Rules controlling whether and when a layer is included in the network,
// based on the current NetState. You may specify a non-zero number of rules
// to include OR exclude, but not both. If no include or exclude rules are
// specified, the layer is always included. If the current NetState meets
// ANY (i.e., one or more) of the specified rules, the layer is
// included/excluded.
repeated NetStateRule include = 8;
repeated NetStateRule exclude = 9;
// Parameters for data pre-processing.
optional TransformationParameter transform_param = 100;
// Parameters shared by loss layers.
optional LossParameter loss_param = 101;
// Layer type-specific parameters.
//
// Note: certain layers may have more than one computational engine
// for their implementation. These layers include an Engine type and
// engine parameter for selecting the implementation.
// The default for the engine is set by the ENGINE switch at compile-time.
optional AccuracyParameter accuracy_param = 102;
optional ArgMaxParameter argmax_param = 103;
optional BatchNormParameter batch_norm_param = 139;
optional BiasParameter bias_param = 141;
optional ConcatParameter concat_param = 104;
optional ContrastiveLossParameter contrastive_loss_param = 105;
optional ConvolutionParameter convolution_param = 106;
optional CropParameter crop_param = 144;
optional DataParameter data_param = 107;
optional DropoutParameter dropout_param = 108;
optional DummyDataParameter dummy_data_param = 109;
optional EltwiseParameter eltwise_param = 110;
optional ELUParameter elu_param = 140;
optional EmbedParameter embed_param = 137;
optional ExpParameter exp_param = 111;
optional FlattenParameter flatten_param = 135;
optional HDF5DataParameter hdf5_data_param = 112;
optional HDF5OutputParameter hdf5_output_param = 113;
optional HingeLossParameter hinge_loss_param = 114;
optional ImageDataParameter image_data_param = 115;
optional InfogainLossParameter infogain_loss_param = 116;
optional InnerProductParameter inner_product_param = 117;
optional InputParameter input_param = 143;
optional LogParameter log_param = 134;
optional LRNParameter lrn_param = 118;
optional MemoryDataParameter memory_data_param = 119;
optional MVNParameter mvn_param = 120;
optional ParameterParameter parameter_param = 145;
optional PoolingParameter pooling_param = 121;
optional PowerParameter power_param = 122;
optional PReLUParameter prelu_param = 131;
optional PythonParameter python_param = 130;
optional RecurrentParameter recurrent_param = 146;
optional ReductionParameter reduction_param = 136;
optional ReLUParameter relu_param = 123;
optional ReshapeParameter reshape_param = 133;
optional ScaleParameter scale_param = 142;
optional SigmoidParameter sigmoid_param = 124;
optional SoftmaxParameter softmax_param = 125;
optional SPPParameter spp_param = 132;
optional SliceParameter slice_param = 126;
optional TanHParameter tanh_param = 127;
optional ThresholdParameter threshold_param = 128;
optional TileParameter tile_param = 138;
optional WindowDataParameter window_data_param = 129;
}
BlobProto里面则是存储了相关的训练参数,重要的两个成员是shape, data:
packed = true表示采用连续存储的方式,在前面先写一个字节长度,再在下面逐行记录每个数据;如果不用packed模式,需要在每个数据前都声明是data这个字段的数据,消耗空间。默认repeated字段就是使用packed模式。
message BlobProto {
optional BlobShape shape = 7;
repeated float data = 5 [packed = true];
repeated float diff = 6 [packed = true];
repeated double double_data = 8 [packed = true];
repeated double double_diff = 9 [packed = true];
}
以下代码可以用来读取caffemodel各字段:
import caffe.proto.caffe_pb2 as caffe_pb2
import pdb
caffemodel_filename = '/home/gr/deepwork/HyperLPR/lpr.caffemodel'
model = caffe_pb2.NetParameter()
f=open(caffemodel_filename, 'rb')
model.ParseFromString(f.read())
f.close()
layers = model.layer
print 'name: ' + model.name
pdb.set_trace()
layer_id=-1
for layer in layers:
print layer.name + ':'
if len(layer.blobs) > 0:
print '\tweight filter ' + str(layer.blobs[0].shape.dim) + ':' + str(layer.blobs[0].data[0])
if len(layer.blobs) > 1:
print '\tbias filter ' + str(layer.blobs[1].shape.dim) + ':' + str(layer.blobs[1].data[0])
else:
print '\tequal 0'
另外也可以解析生成对应的prototxt文件。
参考:
https://www.cnblogs.com/zjutzz/p/6185452.html
http://blog.csdn.net/jiongnima/article/details/72904526
http://blog.csdn.net/seven_first/article/details/47418887#message-layerparameter
https://www.cnblogs.com/autyinjing/p/6495103.html
3. 使用c++解析
依赖于protobuf。
同样需要对照caffe.proto进行解析:
weight参数位置:
NetParameter -> LayerParameter -> BlobProto -> data
msg -> layer -> blobs[0] -> data
bias参数位置(如果有):
NetParameter -> LayerParameter -> BlobProto -> data
msg -> layer -> blobs[1] -> data
protoc生成的C++代码,每个字段都提供了同名访问接口,但如果是repeated字段,则在名字前面加上mutable_
,如下:
通过string name = student->name();
取得学生姓名。
通过RepeatedPtrField<string>* classes = student->mutable_classes();
取得所有课程名, string first_class = classes->Get(0);
取得第一门课程,而提供的classes()
函数就是调用Get()
。
message Student {
# 名字
option string name = 1;
# 课程
repeated string classes = 2;
}
对于一个repeated float data
字段,会对该字段生成如下成员函数:
inline int data_size() const;
inline void clear_data();
static const int kDataFieldNumber = 5;
inline float data(int index) const;
inline void set_data(int index, float value);
inline void add_data(float value);
inline const ::google::protobuf::RepeatedField< float >& data() const;
inline ::google::protobuf::RepeatedField< float >* mutable_data();
提取参数代码:
#include <stdio.h>
#include <string.h>
#include <fstream>
#include <iostream>
#include "caffe.pb.h"
using namespace std;
using namespace caffe;
int main(int argc, char* argv[])
{
caffe::NetParameter msg;
fstream input("/home/gr/deepwork/caffe-tensorflow/examples/mnist/lenet_iter_10000.caffemodel", ios::in | ios::binary);
if (!msg.ParseFromIstream(&input))
{
cerr << "Failed to parse address book." << endl;
return -1;
}
::google::protobuf::RepeatedPtrField< LayerParameter >* layers = msg.mutable_layer();
::google::protobuf::RepeatedPtrField< LayerParameter >::iterator it = layers->begin();
for (; it != layers->end(); ++it)
{
cout << it->name() << endl;
cout << it->type() << endl;
::google::protobuf::RepeatedPtrField< BlobProto >* blobs = it->mutable_blobs();
for (int i = 0; i < blobs->size(); ++i) {
BlobProto blob = blobs->Get(i);
::google::protobuf::RepeatedField< float >* datas = blob.mutable_data();
for (int j = 0; j < datas->size(); ++j) {
cout << datas->Get(j) << " ";
}
cout << endl;
}
}
return 0;
}
http://blog.csdn.net/zr459927180/article/details/50904938
http://blog.csdn.net/dachao_xu/article/details/50899534
二、TensorFlow
conv2d:
if
normalizer_fn
is None and abiases_initializer
is provided then abiases
variable would be created and added the activations.
如果没有提供normalizer_fn
,并且提供了normalizer_fn
就会创建初始化biases
,否则其它情况都没有偏置。
TensorFlow如果使用高级接口slim,而不是自己定义Variable,可以通过trainable_variables()获取需要训练的变量:
params=tf.trainable_variables()
for idx, v in enumerate(params):
print(" param {:15}: {:15} {}".format(idx, str(v.get_shape()), v.name))
结果:
可以利用tf.train.Saver
load训练好的权重,之后取得每个参数:
saver = tf.train.Saver()
params = tf.trainable_variables()
fp = open('mnist_model.txt', 'w')
with tf.Session() as sess:
saver.restore(sess, './tmp/mnist_model.ckpt')
for param in params:
v = sess.run(param)
fp.write(param.name)
fp.write(v)
fp.write('\n')
fp.close()