5-使用lenet5 进行mnist手写字体训练以及预测

1.下载mnist

在caffe根目录下,执行以下脚本

./data/mnist/get_mnist.sh

 脚本内容如下

#!/usr/bin/env sh
# This scripts downloads the mnist data and unzips it.

DIR="$( cd "$(dirname "$0")" ; pwd -P )"
cd "$DIR"

echo "Downloading..."
# 下载 了四个文件分别是rain-images-idx3-ubyte #  train-labels-idx1-ubyte # t10k-images-idx3-ubyte #  t10k-labels-idx1-ubyte
for fname in train-images-idx3-ubyte train-labels-idx1-ubyte t10k-images-idx3-ubyte t10k-labels-idx1-ubyte
do
    if [ ! -e $fname ]; then
# 下载文件 wget --no-check-certificate http://yann.lecun.com/exdb/mnist/${fname}.gz
# 解压文件 gunzip ${fname}.gz fi done

 下载完成

shell 命令解析

 wget --no-check-certificate http://yann.lecun.com/exdb/mnist/${fname}.gz

在 wget 对 https 站点的请求时不再验证证书,即“不请求验证”的意思,即可下载

 gunzip ${fname}.gz

 gunzip 解压文件

 

2.数据类型转换

在caffe根目录下执行

./examples/mnist/create_mnist.sh

 原始数据为二进制文件,需要转化为level db或者LM_DB才能被caffe识别

create_mnist.sh
#!/usr/bin/env sh
# This script converts the mnist data into lmdb/leveldb format,
# depending on the value assigned to $BACKEND.
set -e

EXAMPLE=examples/mnist
DATA=data/mnist
BUILD=build/examples/mnist
# 设置后台Db为lmdb
BACKEND="lmdb"
#输出日志
echo "Creating ${BACKEND}..."

rm -rf $EXAMPLE/mnist_train_${BACKEND}
rm -rf $EXAMPLE/mnist_test_${BACKEND}
# 调用 convert_mnist_data.bin对于数据类型进行转换
$BUILD/convert_mnist_data.bin $DATA/train-images-idx3-ubyte \
  $DATA/train-labels-idx1-ubyte $EXAMPLE/mnist_train_${BACKEND} --backend=${BACKEND}
$BUILD/convert_mnist_data.bin $DATA/t10k-images-idx3-ubyte \
  $DATA/t10k-labels-idx1-ubyte $EXAMPLE/mnist_test_${BACKEND} --backend=${BACKEND}

echo "Done."

 convert_mnist_data.bin使用由convert_mnist_data.cpp生成 ,源码如下

//  该脚本将mnist数据集转换为lmdd(默认类型)或者leveldb(通过设置--backend=leveldb格式以便于caffe加载数据
// 使用方式:
//    convert_mnist_data [FLAGS] input_image_file input_label_file  output_db_file
// The MNIST dataset could be downloaded at
//    http://yann.lecun.com/exdb/mnist/

#include <gflags/gflags.h>  //调用gflags用于处理命令行标记
#include <glog/logging.h>   //glog轻量级日志,用于日志输出
#include <google/protobuf/text_format.h>//protobuf //用于序列化网络文件,在效率上以及储存数据大小方面是由于jason 以及XML

#if defined(USE_LEVELDB) && defined(USE_LMDB)
#include <leveldb/db.h>  //使用leveldb key-val类型数据库,不属于网络类型数据库,用于实现under_map类型的查找擦
#include <leveldb/write_batch.h>//
#include <lmdb.h>//使用lmdb key-val类型数据库,image-net 就是使用该类型的数据库,进行图像的label-bytes(原始数据的存储)
#endif

#include <stdint.h> //c语言中定义的整数类型
#include <sys/stat.h>//c语言中文件,用于获取文件的全部属性

#include <fstream>  // NOLINT(readability/streams) c++ 模式对流文件进行读写
#include <string>

#include "boost/scoped_ptr.hpp"//相当于share_ptr但是,但是不能转让
#include "caffe/proto/caffe.pb.h"
#include "caffe/util/db.hpp"
#include "caffe/util/format.hpp"

#if defined(USE_LEVELDB) && defined(USE_LMDB)

using namespace caffe;  // NOLINT(build/namespaces)
using boost::scoped_ptr;
using std::string;

DEFINE_string(backend, "lmdb", "The backend for storing the result");

//大小端转换 uint32_t swap_endian(uint32_t val) { val = ((val << 8) & 0xFF00FF00) | ((val >> 8) & 0xFF00FF); return (val << 16) | (val >> 16); } void convert_dataset(const char* image_filename, const char* label_filename, const char* db_path, const string& db_backend) { // Open files std::ifstream image_file(image_filename, std::ios::in | std::ios::binary); std::ifstream label_file(label_filename, std::ios::in | std::ios::binary); CHECK(image_file) << "Unable to open file " << image_filename; CHECK(label_file) << "Unable to open file " << label_filename; // Read the magic and the meta data uint32_t magic; uint32_t num_items; uint32_t num_labels; uint32_t rows; uint32_t cols; image_file.read(reinterpret_cast<char*>(&magic), 4); magic = swap_endian(magic); CHECK_EQ(magic, 2051) << "Incorrect image file magic."; label_file.read(reinterpret_cast<char*>(&magic), 4); magic = swap_endian(magic); CHECK_EQ(magic, 2049) << "Incorrect label file magic."; image_file.read(reinterpret_cast<char*>(&num_items), 4); num_items = swap_endian(num_items); label_file.read(reinterpret_cast<char*>(&num_labels), 4); num_labels = swap_endian(num_labels); CHECK_EQ(num_items, num_labels); image_file.read(reinterpret_cast<char*>(&rows), 4); rows = swap_endian(rows); image_file.read(reinterpret_cast<char*>(&cols), 4); cols = swap_endian(cols); scoped_ptr<db::DB> db(db::GetDB(db_backend)); db->Open(db_path, db::NEW); scoped_ptr<db::Transaction> txn(db->NewTransaction()); // Storing to db char label; char* pixels = new char[rows * cols]; int count = 0; string value; Datum datum; datum.set_channels(1); datum.set_height(rows); datum.set_width(cols); LOG(INFO) << "A total of " << num_items << " items."; LOG(INFO) << "Rows: " << rows << " Cols: " << cols; for (int item_id = 0; item_id < num_items; ++item_id) {
//读取一次数据以及label image_file.read(pixels, rows * cols); label_file.read(&label, 1);
//存储到Datum数据类型,传入数值 datum.set_data(pixels, rows*cols); datum.set_label(label);
//获取当前样本的it转为string string key_str = caffe::format_int(item_id, 8);
//将样本图像数据以及label序列化 datum.SerializeToString(&value); //txn 存储到数据库中 txn->Put(key_str, value); if (++count % 1000 == 0) {
//批量提交 txn->Commit(); } } // write the last batch
// if (count % 1000 != 0) { txn->Commit(); } LOG(INFO) << "Processed " << count << " files."; delete[] pixels; db->Close(); } int main(int argc, char** argv) { #ifndef GFLAGS_GFLAGS_H_ namespace gflags = google; #endif FLAGS_alsologtostderr = 1; gflags::SetUsageMessage("This script converts the MNIST dataset to\n" "the lmdb/leveldb format used by Caffe to load data.\n" "Usage:\n" " convert_mnist_data [FLAGS] input_image_file input_label_file " "output_db_file\n" "The MNIST dataset could be downloaded at\n" " http://yann.lecun.com/exdb/mnist/\n" "You should gunzip them after downloading," "or directly use data/mnist/get_mnist.sh\n"); gflags::ParseCommandLineFlags(&argc, &argv, true); const string& db_backend = FLAGS_backend; if (argc != 4) { gflags::ShowUsageWithFlagsRestrict(argv[0], "examples/mnist/convert_mnist_data"); } else {
//初始化log库 google::InitGoogleLogging(argv[0]); convert_dataset(argv[1], argv[2], argv[3], db_backend); } return 0; } #else int main(int argc, char** argv) { LOG(FATAL) << "This example requires LevelDB and LMDB; " << "compile with USE_LEVELDB and USE_LMDB."; } #endif // USE_LEVELDB and USE_LMDB

 3.lenet-5 模型解析

caffe例子中lenet-5与原版稍有不同

./example/mnist_train_test_prototxt

name: "LeNet"     //网络名称LeNet
layer 
{ //定义第一曾layer name: "mnist" //层名称为mnist type: "Data" //层类型为数据层 如果是Data,
top: "data" //层输出两个,分别为data和label top: "label" include
{ phase: TRAIN //该层面只在训练层有效 } transform_param
{ scale: 0.00390625 //数据变换使用的放缩因子 } data_param { //数据参数层 source: "examples/mnist/mnist_train_lmdb" //LMDB路径 batch_size: 64 //批量数据,一次取64张图片 backend: LMDB //后台数据为LMDB } } layer
{ //定一个参数曾,但是该这里参数只在TEST阶段有效 name: "mnist" type: "Data" top: "data" top: "label" include { phase: TEST } transform_param { scale: 0.00390625 } data_param { source: "examples/mnist/mnist_test_lmdb" batch_size: 100 backend: LMDB } } layer
{ //定义第一个卷积层conv1, name: "conv1" //名称 type: "Convolution" //数据类型为卷积 bottom: "data" //输入数据为data top: "conv1" //输出数据为conv1 param { lr_mult: 1 //权值学习速率倍乘因子,1表示与全局参数一致 } param { lr_mult: 2 //bias学习速率倍乘因子,1表示与全局参数一致 } convolution_param { //卷积计算参数 num_output: 20 //输出feature-map数量为20 kernel_size: 5 //卷积核为5X5 stride: 1 //卷积输出的跳跃间隔:1表示连续输出,无跳跃 weight_filler { //权值使用xavier填充 type: "xavier" } bias_filler { //bias使用常数填充,默认为0 type: "constant" } } } layer { //定义下下采样层pool1,输入blob为conv1,输出blob为pool1 name: "pool1" type: "Pooling" bottom: "conv1" top: "pool1" pooling_param { //参数 pool: MAX //使用最大值下采样方法 kernel_size: 2 //下采样窗口尺寸2×2 stride: 2 //下采样输出跳跃区间2×2 } } layer { //同上 name: "conv2" type: "Convolution" bottom: "pool1" top: "conv2" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param { num_output: 50 kernel_size: 5 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { //同上 name: "pool2" type: "Pooling" bottom: "conv2" top: "pool2" pooling_param { pool: MAX kernel_size: 2 stride: 2 } } layer { //全链接层 name: "ip1" type: "InnerProduct" bottom: "pool2" top: "ip1" param { lr_mult: 1 } param { lr_mult: 2 } inner_product_param { //全连接层参数 num_output: 500 //该层输入参数为500 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { //非线性层,使用RelU方法 name: "relu1" type: "ReLU" bottom: "ip1" top: "ip1" } layer { name: "ip2" type: "InnerProduct" bottom: "ip1" top: "ip2" param { lr_mult: 1 } param { lr_mult: 2 } inner_product_param { num_output: 10 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { //分类准确率层,只在test阶段有效,输入ip2以及label,输出accuracy name: "accuracy" type: "Accuracy" bottom: "ip2" bottom: "label" top: "accuracy" include { phase: TEST } } layer { //损失层,损失函数采用SoftmaxWithLoss,输入ip2以及label,输出loss name: "loss" type: "SoftmaxWithLoss" bottom: "ip2" bottom: "label" top: "loss" }

 模型可视化  工具http://ethereon.github.io/netscope/#/editor

 

网络解析:

数据源mnist负责从原始数据到lmdb数据库转化的图像数据data和标签数据label

图像数据后续被送入CNN结构中进行处理。

CNN结构包括由一组conv(1,2)+加下采样pool(1,2)交替形成的特征层以及全连接层ip1和ip2(类似多层感知结构)。

对ip2的输出进一步同标签数据label对比,可计算分类准确性accuracy以及loss.

 

4.训练模型

 模型训练脚本

examples/mnist/train_lenet.sh

脚本如下

 

./build/tools/caffe train --solver=examples/mnist/lenet_solver.prototxt 

 

 之前编译完成了build/tools/caffe.bin 二进制文件,参数-solver=examples/mnist/lenet_solver.prototxt指定了训练超参数(hyper-parameter)

机器学习模型中一般有两类参数:一类需要从数据中学习和估计得到,称为模型参数(Parameter)---即模型本身的参数。比如,线性回归直线的加权系数(斜率)及其偏差项(截距)都是模型参数。还有一类则是机器学习算法中的调优参数(tuning parameters),需要人为设定,称为超参数,所谓调参数,是调节超参数,eg,梯度下降法中的学习速率α,迭代次数epoch,批量大小batch-size

lenet_solver.prototxt 内容如下
# The train/test net protocol buffer definition
net: "examples/mnist/lenet_train_test.prototxt"
# test_iter specifies how many forward passes the test should carry out.
# In the case of MNIST, we have test batch size 100 and 100 test iterations,  //batch 大小为100,预测阶段迭代次数为100
# covering the full 10,000 testing images.//覆盖一万张图片
test_iter: 100    //预测阶段的迭代次数
# Carry out testing every 500 training iterations.  //每训练500次进行一次预测
test_interval: 500
# The base learning rate, momentum and the weight decay of the network. //网络学习率,冲量以及权衰量
base_lr: 0.01
momentum: 0.9
weight_decay: 0.0005
# The learning rate policy  //学习率的衰减策略
lr_policy: "inv"
gamma: 0.0001
power: 0.75
# Display every 100 iterations //每进行100次迭代打印一次log
display: 100
# The maximum number of iterations  最大迭代次数10000
max_iter: 10000
# snapshot intermediate results  //每迭代5000次进行一次快照
snapshot: 5000
snapshot_prefix: "examples/mnist/lenet"
# solver mode: CPU or GPU  //求解模式为GPU
solver_mode: GPU

 进入caffe 根目录

cd caffe

 运行脚本

./examples/mnist/train_lenet.sh

 

训练过程

1.从文件初始化train网络

 

 

分析需要反向传播的层

生成test网络

 

 

 

同样进行网络创建以及计算反向传播层(省略)

 

初始化第一次进行准确率的计算后开始进行训练操作

如下图所示,每100次迭代输入打印一次日志,loss值逐渐变小,学习率逐渐降低

 

 

 

在第10000次迭代时候输出对应的accuray以及损失率

 5.模型预测

 

./buld/tools/caffe

 

 

 

 

以上为caffe.bin的命令行参数

./build/tools/caffe test -model examples/mnist/lenet_train_test.prototxt -weights examples/mnist/lenet_iter_10000.caffemodel -iterations 100
预测 模型路径 指定路径预先训练的权重值文件 指定迭代次数100

 

posted @ 2022-02-19 23:17  ID是菜鸟  阅读(183)  评论(0编辑  收藏  举报