5-使用lenet5 进行mnist手写字体训练以及预测

1.下载mnist

在caffe根目录下，执行以下脚本

./data/mnist/get_mnist.sh

脚本内容如下

#!/usr/bin/env sh
# This scripts downloads the mnist data and unzips it.

DIR="$( cd "$(dirname "$0")" ; pwd -P )"
cd "$DIR"

echo "Downloading..."
# 下载 了四个文件分别是rain-images-idx3-ubyte #  train-labels-idx1-ubyte # t10k-images-idx3-ubyte #  t10k-labels-idx1-ubyte
for fname in train-images-idx3-ubyte train-labels-idx1-ubyte t10k-images-idx3-ubyte t10k-labels-idx1-ubyte
do
    if [ ! -e $fname ]; then
       # 下载文件
        wget --no-check-certificate http://yann.lecun.com/exdb/mnist/${fname}.gz
       # 解压文件
        gunzip ${fname}.gz
    fi
done

下载完成

shell 命令解析

 wget --no-check-certificate http://yann.lecun.com/exdb/mnist/${fname}.gz

在 wget 对 https 站点的请求时不再验证证书，即“不请求验证”的意思，即可下载

 gunzip ${fname}.gz

gunzip 解压文件

2.数据类型转换

在caffe根目录下执行

./examples/mnist/create_mnist.sh

原始数据为二进制文件，需要转化为level db或者LM_DB才能被caffe识别

create_mnist.sh

#!/usr/bin/env sh
# This script converts the mnist data into lmdb/leveldb format,
# depending on the value assigned to $BACKEND.
set -e

EXAMPLE=examples/mnist
DATA=data/mnist
BUILD=build/examples/mnist
# 设置后台Db为lmdb
BACKEND="lmdb"
#输出日志
echo "Creating ${BACKEND}..."

rm -rf $EXAMPLE/mnist_train_${BACKEND}
rm -rf $EXAMPLE/mnist_test_${BACKEND}
# 调用 convert_mnist_data.bin对于数据类型进行转换
$BUILD/convert_mnist_data.bin $DATA/train-images-idx3-ubyte \
  $DATA/train-labels-idx1-ubyte $EXAMPLE/mnist_train_${BACKEND} --backend=${BACKEND}
$BUILD/convert_mnist_data.bin $DATA/t10k-images-idx3-ubyte \
  $DATA/t10k-labels-idx1-ubyte $EXAMPLE/mnist_test_${BACKEND} --backend=${BACKEND}

echo "Done."

convert_mnist_data.bin使用由convert_mnist_data.cpp生成，源码如下

//  该脚本将mnist数据集转换为lmdd(默认类型）或者leveldb（通过设置--backend=leveldb格式以便于caffe加载数据
// 使用方式:
//    convert_mnist_data [FLAGS] input_image_file input_label_file  output_db_file
// The MNIST dataset could be downloaded at
//    http://yann.lecun.com/exdb/mnist/

#include <gflags/gflags.h>  //调用gflags用于处理命令行标记
#include <glog/logging.h>   //glog轻量级日志，用于日志输出
#include <google/protobuf/text_format.h>//protobuf //用于序列化网络文件，在效率上以及储存数据大小方面是由于jason 以及XML

#if defined(USE_LEVELDB) && defined(USE_LMDB)
#include <leveldb/db.h>  //使用leveldb key-val类型数据库，不属于网络类型数据库，用于实现under_map类型的查找擦
#include <leveldb/write_batch.h>//
#include <lmdb.h>//使用lmdb key-val类型数据库，image-net 就是使用该类型的数据库，进行图像的label-bytes(原始数据的存储）
#endif

#include <stdint.h> //c语言中定义的整数类型
#include <sys/stat.h>//c语言中文件，用于获取文件的全部属性

#include <fstream>  // NOLINT(readability/streams) c++ 模式对流文件进行读写
#include <string>

#include "boost/scoped_ptr.hpp"//相当于share_ptr但是，但是不能转让
#include "caffe/proto/caffe.pb.h"
#include "caffe/util/db.hpp"
#include "caffe/util/format.hpp"

#if defined(USE_LEVELDB) && defined(USE_LMDB)

using namespace caffe;  // NOLINT(build/namespaces)
using boost::scoped_ptr;
using std::string;

DEFINE_string(backend, "lmdb", "The backend for storing the result");

//大小端转换
uint32_t swap_endian(uint32_t val) {
    val = ((val << 8) & 0xFF00FF00) | ((val >> 8) & 0xFF00FF);
    return (val << 16) | (val >> 16);
}

void convert_dataset(const char* image_filename, const char* label_filename,
        const char* db_path, const string& db_backend) {
  // Open files
  std::ifstream image_file(image_filename, std::ios::in | std::ios::binary);
  std::ifstream label_file(label_filename, std::ios::in | std::ios::binary);
  CHECK(image_file) << "Unable to open file " << image_filename;
  CHECK(label_file) << "Unable to open file " << label_filename;
  // Read the magic and the meta data
  uint32_t magic;
  uint32_t num_items;
  uint32_t num_labels;
  uint32_t rows;
  uint32_t cols;

  image_file.read(reinterpret_cast<char*>(&magic), 4);
  magic = swap_endian(magic);
  CHECK_EQ(magic, 2051) << "Incorrect image file magic.";
  label_file.read(reinterpret_cast<char*>(&magic), 4);
  magic = swap_endian(magic);
  CHECK_EQ(magic, 2049) << "Incorrect label file magic.";
  image_file.read(reinterpret_cast<char*>(&num_items), 4);
  num_items = swap_endian(num_items);
  label_file.read(reinterpret_cast<char*>(&num_labels), 4);
  num_labels = swap_endian(num_labels);
  CHECK_EQ(num_items, num_labels);
  image_file.read(reinterpret_cast<char*>(&rows), 4);
  rows = swap_endian(rows);
  image_file.read(reinterpret_cast<char*>(&cols), 4);
  cols = swap_endian(cols);


  scoped_ptr<db::DB> db(db::GetDB(db_backend));
  db->Open(db_path, db::NEW);
  scoped_ptr<db::Transaction> txn(db->NewTransaction());

  // Storing to db
  char label;
  char* pixels = new char[rows * cols];
  int count = 0;
  string value;

  Datum datum;
  datum.set_channels(1);
  datum.set_height(rows);
  datum.set_width(cols);
  LOG(INFO) << "A total of " << num_items << " items.";
  LOG(INFO) << "Rows: " << rows << " Cols: " << cols;
  for (int item_id = 0; item_id < num_items; ++item_id) {
    //读取一次数据以及label
    image_file.read(pixels, rows * cols);
    label_file.read(&label, 1);
    //存储到Datum数据类型，传入数值
    datum.set_data(pixels, rows*cols);
    datum.set_label(label);
    //获取当前样本的it转为string
    string key_str = caffe::format_int(item_id, 8);
    //将样本图像数据以及label序列化
    datum.SerializeToString(&value);
    //txn 存储到数据库中
    txn->Put(key_str, value);

    if (++count % 1000 == 0) {
     //批量提交
      txn->Commit();
    }
  }
  // write the last batch
//
  if (count % 1000 != 0) {
      txn->Commit();
  }
  LOG(INFO) << "Processed " << count << " files.";
  delete[] pixels;
  db->Close();
}

int main(int argc, char** argv) {
#ifndef GFLAGS_GFLAGS_H_
  namespace gflags = google;
#endif

  FLAGS_alsologtostderr = 1;

  gflags::SetUsageMessage("This script converts the MNIST dataset to\n"
        "the lmdb/leveldb format used by Caffe to load data.\n"
        "Usage:\n"
        "    convert_mnist_data [FLAGS] input_image_file input_label_file "
        "output_db_file\n"
        "The MNIST dataset could be downloaded at\n"
        "    http://yann.lecun.com/exdb/mnist/\n"
        "You should gunzip them after downloading,"
        "or directly use data/mnist/get_mnist.sh\n");
  gflags::ParseCommandLineFlags(&argc, &argv, true);

  const string& db_backend = FLAGS_backend;

  if (argc != 4) {
    gflags::ShowUsageWithFlagsRestrict(argv[0],
        "examples/mnist/convert_mnist_data");
  } else {
    //初始化log库
    google::InitGoogleLogging(argv[0]);
    convert_dataset(argv[1], argv[2], argv[3], db_backend);
  }
  return 0;
}
#else
int main(int argc, char** argv) {
  LOG(FATAL) << "This example requires LevelDB and LMDB; " <<
  "compile with USE_LEVELDB and USE_LMDB.";
}
#endif  // USE_LEVELDB and USE_LMDB

3.lenet-5 模型解析

caffe例子中lenet-5与原版稍有不同

./example/mnist_train_test_prototxt

name: "LeNet"     //网络名称LeNet
layer 
{           //定义第一曾layer
  name: "mnist"   //层名称为mnist
  type: "Data"    //层类型为数据层 如果是Data，

  top: "data"     //层输出两个，分别为data和label
  top: "label"
  include
  {
    phase: TRAIN  //该层面只在训练层有效
  }
  transform_param 
  {
    scale: 0.00390625  //数据变换使用的放缩因子
  }
  data_param {  //数据参数层
    source: "examples/mnist/mnist_train_lmdb"  //LMDB路径
    batch_size: 64  //批量数据，一次取64张图片
    backend: LMDB   //后台数据为LMDB
  }
}
layer
 {         //定一个参数曾，但是该这里参数只在TEST阶段有效
  name: "mnist"
  type: "Data"
  top: "data"
  top: "label"
  include {
    phase: TEST
  }
  transform_param {
    scale: 0.00390625
  }
  data_param {
    source: "examples/mnist/mnist_test_lmdb"
    batch_size: 100
    backend: LMDB
  }
}
layer 
{          //定义第一个卷积层conv1，
  name: "conv1"     //名称
  type: "Convolution" //数据类型为卷积
  bottom: "data"      //输入数据为data
  top: "conv1"        //输出数据为conv1
  param {
    lr_mult: 1     //权值学习速率倍乘因子，1表示与全局参数一致
  }
  param {
    lr_mult: 2     //bias学习速率倍乘因子，1表示与全局参数一致 
  }
  convolution_param {  //卷积计算参数
    num_output: 20  //输出feature-map数量为20
    kernel_size: 5  //卷积核为5X5
    stride: 1       //卷积输出的跳跃间隔：1表示连续输出，无跳跃
    weight_filler { //权值使用xavier填充
      type: "xavier"
    }
    bias_filler {    //bias使用常数填充，默认为0
      type: "constant"
    }
  }
}
layer {           //定义下下采样层pool1，输入blob为conv1，输出blob为pool1
  name: "pool1"
  type: "Pooling"
  bottom: "conv1"
  top: "pool1"
  pooling_param { //参数
    pool: MAX      //使用最大值下采样方法
    kernel_size: 2 //下采样窗口尺寸2×2
    stride: 2      //下采样输出跳跃区间2×2
  }
}
layer {            //同上
  name: "conv2"
  type: "Convolution"
  bottom: "pool1"
  top: "conv2"
  param {
    lr_mult: 1
  }
  param {
    lr_mult: 2
  }
  convolution_param {
    num_output: 50
    kernel_size: 5
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {             //同上
  name: "pool2"
  type: "Pooling"
  bottom: "conv2"
  top: "pool2"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}
layer {          //全链接层
  name: "ip1"
  type: "InnerProduct"
  bottom: "pool2"
  top: "ip1"
  param {
    lr_mult: 1
  }
  param {
    lr_mult: 2
  }
  inner_product_param {  //全连接层参数
    num_output: 500      //该层输入参数为500
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {               //非线性层，使用RelU方法
  name: "relu1"
  type: "ReLU"
  bottom: "ip1"
  top: "ip1"
}
layer {
  name: "ip2"
  type: "InnerProduct"
  bottom: "ip1"
  top: "ip2"
  param {
    lr_mult: 1
  }
  param {
    lr_mult: 2
  }
  inner_product_param {
    num_output: 10
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {               //分类准确率层，只在test阶段有效，输入ip2以及label,输出accuracy
  name: "accuracy"
  type: "Accuracy"
  bottom: "ip2"
  bottom: "label"
  top: "accuracy"
  include {
    phase: TEST
  }
}
layer {        //损失层，损失函数采用SoftmaxWithLoss，输入ip2以及label,输出loss
  name: "loss"
  type: "SoftmaxWithLoss"
  bottom: "ip2"
  bottom: "label"
  top: "loss"
}

模型可视化工具http://ethereon.github.io/netscope/#/editor

网络解析：

数据源mnist负责从原始数据到lmdb数据库转化的图像数据data和标签数据label

图像数据后续被送入CNN结构中进行处理。

CNN结构包括由一组conv(1,2)+加下采样pool(1,2）交替形成的特征层以及全连接层ip1和ip2（类似多层感知结构）。

对ip2的输出进一步同标签数据label对比，可计算分类准确性accuracy以及loss.

4.训练模型

模型训练脚本

examples/mnist/train_lenet.sh

脚本如下

./build/tools/caffe train --solver=examples/mnist/lenet_solver.prototxt

之前编译完成了build/tools/caffe.bin 二进制文件，参数-solver=examples/mnist/lenet_solver.prototxt指定了训练超参数（hyper-parameter)

机器学习模型中一般有两类参数：一类需要从数据中学习和估计得到，称为模型参数（Parameter）---即模型本身的参数。比如，线性回归直线的加权系数（斜率）及其偏差项（截距）都是模型参数。还有一类则是机器学习算法中的调优参数（tuning parameters），需要人为设定，称为超参数，所谓调参数，是调节超参数，eg,梯度下降法中的学习速率α，迭代次数epoch，批量大小batch-size

lenet_solver.prototxt 内容如下

# The train/test net protocol buffer definition
net: "examples/mnist/lenet_train_test.prototxt"
# test_iter specifies how many forward passes the test should carry out.
# In the case of MNIST, we have test batch size 100 and 100 test iterations,  //batch 大小为100，预测阶段迭代次数为100
# covering the full 10,000 testing images.//覆盖一万张图片
test_iter: 100    //预测阶段的迭代次数
# Carry out testing every 500 training iterations.  //每训练500次进行一次预测
test_interval: 500
# The base learning rate, momentum and the weight decay of the network. //网络学习率，冲量以及权衰量
base_lr: 0.01
momentum: 0.9
weight_decay: 0.0005
# The learning rate policy  //学习率的衰减策略
lr_policy: "inv"
gamma: 0.0001
power: 0.75
# Display every 100 iterations //每进行100次迭代打印一次log
display: 100
# The maximum number of iterations  最大迭代次数10000
max_iter: 10000
# snapshot intermediate results  //每迭代5000次进行一次快照
snapshot: 5000
snapshot_prefix: "examples/mnist/lenet"
# solver mode: CPU or GPU  //求解模式为GPU
solver_mode: GPU

进入caffe 根目录

cd caffe

运行脚本

./examples/mnist/train_lenet.sh

训练过程

1.从文件初始化train网络

分析需要反向传播的层

生成test网络

同样进行网络创建以及计算反向传播层（省略）

初始化第一次进行准确率的计算后开始进行训练操作

如下图所示，每100次迭代输入打印一次日志，loss值逐渐变小，学习率逐渐降低

在第10000次迭代时候输出对应的accuray以及损失率

5.模型预测

./buld/tools/caffe

以上为caffe.bin的命令行参数

./build/tools/caffe test -model examples/mnist/lenet_train_test.prototxt -weights examples/mnist/lenet_iter_10000.caffemodel -iterations 100
                    预测   模型路径                                        指定路径预先训练的权重值文件                               指定迭代次数100

posted @ 2022-02-19 23:17 ID是菜鸟阅读(225) 评论(0) 收藏举报

刷新页面返回顶部

ID是菜鸟

只要一切好没有结束，那么永远怀着期待未来，未来的或许艰辛或者顺利，或许迷茫，或许踌躇满志，每一天都应该充满期待，值得去发现惊喜

5-使用lenet5 进行mnist手写字体训练以及预测

公告