tensorflow、keras和pytorch搭建DNN、CNN、RNN手写数字识别
MNIST手写数字集
MNIST是一个由美国由美国邮政系统开发的手写数字识别数据集。手写内容是0~9,一共有60000个图片样本,我们可以到MNIST官网免费下载,总共4个.gz后缀的压缩文件,该文件是二进制内容。
文件名 | 大小 | 用途 |
train-images-idx3-ubyte.gz | 9.45MB | 训练图像数据 |
train-labels-idx1-ubyte.gz | 0.03MB | 训练图像的标签 |
t10k-images-idx3-ubyte.gz | 1.57MB | 测试图像数据 |
t10k-labels-idx1-ubyte.gz | 4.4KB | 测试图像的标签 |
下载MNIST数据集
方法一、官网下载(4个gz文件,图像的取值在0~1之间)
方法二、谷歌下载(1个npz文件,图像的取值在0~255之间)
方法三、通过tensorflow或keras代码获取
from tensorflow.examples.tutorials.mnist import input_data # tensorflow(1.7版本以前) # 从MNIST_data/中读取MNIST数据。当数据不存在时,会自动执行下载 mnist = input_data.read_data_sets("./mnist/", one_hot=True) # tensorflow(1.7版本以后) import tensorflow as tf (train_x, train_y), (test_x, test_y) = tf.keras.datasets.mnist.load_data(path='mnist.npz') # keras代码获取 from keras.datasets import mnist (train_x, train_y), (test_x, test_y) = mnist.load_data() # 通过numpy代码获取.npz中的数据 f = np.load(path) x_train, y_train = f['x_train'], f['y_train'] x_test, y_test = f['x_test'], f['y_test'] f.close()
如果通过代码下载MNIST的方法,不FQ的话,可能无法顺利下载MNSIT数据集,因此我建议大家还是先手动下载好,再来通过代码导入。
MNIST图像
训练数据集包含 60,000 个样本, 测试数据集包含 10,000 样本。在 MNIST 数据集中的每张图片由 28 x 28(=784) 个像素点构成, 每个像素点用一个灰度值表示。
我们可以通过下面python代码下载MNIST数据集,并窥探一下MNIST数据集的内部数据集的划分,以及手写数字的长相。
import matplotlib.pyplot as plt from tensorflow.examples.tutorials.mnist import input_data
# 从MNIST_data/中读取MNIST数据。当数据不存在时,会自动执行下载 mnist = input_data.read_data_sets('./mnist', one_hot=True) # 将数组张换成图片形式 print(mnist.train.images.shape) # 训练数据图片(55000, 784) print(mnist.train.labels.shape) # 训练数据标签(55000, 10) print(mnist.test.images.shape) # 测试数据图片(10000, 784) print(mnist.test.labels.shape) # 测试数据图片(10000, 10) print(mnist.validation.images.shape) # 验证数据图片(5000, 784) print(mnist.validation.labels.shape) # 验证数据图片(5000, 784) print(mnist.train.labels[1]) # [0. 0. 0. 0. 0. 0. 0. 1. 0. 0.] image = mnist.train.images[1].reshape(28, 28) fig = plt.figure("图片展示") plt.imshow(image,cmap='gray') plt.axis('off') #不显示坐标尺寸 plt.show()
在画出数字的同时,同时取出标签.
from tensorflow.examples.tutorials.mnist import input_data import math import matplotlib.pyplot as plt import numpy as np mnist = input_data.read_data_sets('./mnist', one_hot=True) # 画单张mnist数据集的数字 def drawdigit(position,image, title): plt.subplot(*position) # 星号元组传参 plt.imshow(image, cmap='gray_r') plt.axis('off') plt.title(title) # 取一个batch的数据,然后在一张画布上画batch_size个子图 def batchDraw(batch_size): images, labels = mnist.train.next_batch(batch_size) row_num = math.ceil(batch_size ** 0.5) # 向上取整 column_num = row_num plt.figure(figsize=(row_num, column_num)) # 行.列 for i in range(row_num): for j in range(column_num): index = i * column_num + j if index < batch_size: position = (row_num, column_num, index+1) image = images[index].reshape(28, 28) # 取出列表中最大数的索引 title = 'actual:%d' % (np.argmax(labels[index])) drawdigit(position, image, title) if __name__ == '__main__': batchDraw(16) plt.show()
代码说明:
mnist = input_data.read_data_sets("./mnist/", one_hot=True, reshape=False)
图像是由RGB三个数组组成的,而灰度图只是其中一个数组,而图像是由像素组成,每个像素的值在0~225之间,MNIST数据集中的每个数字都有28*28=784个像素值.上面的代码如果reshape=True(默认),MNIST数据的shape=(?, 784),如果reshape=False MNIST数据为(?, 28,28,1).
Keras
DNN网络
from keras.models import Model from keras.layers import Input, Dense, Dropout from keras import regularizers from keras.optimizers import Adam from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets("mnist/", one_hot=True) x_train = mnist.train.images # 训练数据 (55000, 784) y_train = mnist.train.labels # 训练标签 x_test = mnist.test.images y_test = mnist.test.images # DNN网络结构 inputs = Input(shape=(784,)) h1 = Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.01))(inputs) # 权重矩阵l2正则化 h1 = Dropout(0.2)(h1) h2 = Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.01))(h1) # 权重矩阵l2正则化 h2 = Dropout(0.2)(h2) h3 = Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.01))(h2) # 权重矩阵l2正则化 h3 = Dropout(0.2)(h3) outputs = Dense(10, activation='softmax', kernel_regularizer=regularizers.l2(0.01))(h3) # 权重矩阵l2正则化 model = Model(input=inputs, output=outputs) # 编译模型 opt = Adam(lr=0.01, beta_1=0.9, beta_2=0.999, epsilon=1e-08) # epsilon模糊因子 model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy']) # 交叉熵损失函数 # 开始训练 model.fit(x=x_train, y=y_train, validation_split=0.1, batch_size=128, epochs=4) model.save('k_DNN.h5')
CNN网络
from keras.models import Model from keras.layers import Input, Conv2D, MaxPooling2D, Reshape, Dense from keras import regularizers from keras.optimizers import Adam from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets("./mnist/", one_hot=True, reshape=False) x_train = mnist.train.images # 训练数据 (55000, 28, 28, 1) y_train = mnist.train.labels # 训练标签 x_test = mnist.test.images y_test = mnist.test.images # 网络结构 input = Input(shape=(28, 28, 1)) h1 = Conv2D(filters=64, kernel_size=(3,3), strides=(1, 1), padding='same', activation='relu')(input) h1 = MaxPooling2D(pool_size=2, strides=2, padding='valid')(h1) h1 = Conv2D(filters=32, kernel_size=(3,3), strides=(1, 1), padding='same', activation='relu')(h1) h1 = MaxPooling2D()(h1) h1 = Conv2D(filters=16, kernel_size=(3,3), strides=(1, 1), padding='same', activation='relu')(h1) h1 = Reshape((16 * 7 * 7,))(h1) # h1.shape (?, 16*7*7) output = Dense(10, activation="softmax", kernel_regularizer=regularizers.l2(0.01))(h1) model = Model(input=input, output=output) model.summary() # 编译模型 opt = Adam(lr=0.01, beta_1=0.9, beta_2=0.999, epsilon=1e-08) model.compile(optimizer=opt, loss="categorical_crossentropy", metrics=["accuracy"]) # 开始训练 model.fit(x=x_train, y=y_train, validation_split=0.1, epochs=5) model.save('k_CNN.h5')
RNN网络
from keras.models import Model from keras.layers import Input, LSTM, Dense from keras import regularizers from keras.optimizers import Adam from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets("./mnist/", one_hot=True) x_train = mnist.train.images # (28, 28, 1) x_train = x_train.reshape(-1, 28, 28) y_train = mnist.train.labels # RNN网络结构 inputs = Input(shape=(28, 28)) h1 = LSTM(64, activation='relu', return_sequences=True, dropout=0.2)(inputs) h2 = LSTM(64, activation='relu', dropout=0.2)(h1) outputs = Dense(10, activation='softmax', kernel_regularizer=regularizers.l2(0.01))(h2) model = Model(input=inputs, output=outputs) # 编译模型 opt = Adam(lr=0.003, beta_1=0.9, beta_2=0.999, epsilon=1e-08) model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy']) # 训练模型 model.fit(x=x_train, y=y_train, validation_split=0.1, batch_size=128, epochs=5) model.save('k_RNN.h5')
Tensorflow
DNN网络
import tensorflow as tf from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets("./mnist", one_hot=True) # train image shape: (55000, 784) # trian label shape: (55000, 10) # val image shape: (5000, 784) # test image shape: (10000, 784) epochs = 2 output_size = 10 input_size = 784 hidden1_size = 512 hidden2_size = 256 batch_size = 1000 learning_rate_base = 0.005 unit_list = [784, 512, 256, 10] batch_num = mnist.train.labels.shape[0] // batch_size # 全连接神经网络 def dense(x, w, b, keeppord): linear = tf.matmul(x, w) + b activation = tf.nn.relu(linear) y = tf.nn.dropout(activation,keeppord) return y def DNNModel(image, w, b, keeppord): dense1 = dense(image, w[0], b[0],keeppord) dense2 = dense(dense1, w[1], b[1],keeppord) output = tf.matmul(dense2, w[2]) + b[2] return output # 生成网络的权重 def gen_weights(unit_list): w = [] b = [] # 遍历层数 for i in range(len(unit_list)-1): sub_w = tf.Variable(tf.random_normal(shape=[unit_list[i], unit_list[i+1]])) sub_b = tf.Variable(tf.random_normal(shape=[unit_list[i+1]])) w.append(sub_w) b.append(sub_b) return w, b x = tf.placeholder(tf.float32, [None, 784]) y_true = tf.placeholder(tf.float32, [None, 10]) keepprob = tf.placeholder(tf.float32) global_step = tf.Variable(0) w, b = gen_weights(unit_list) y_pre = DNNModel(x, w, b, keepprob) loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=y_pre, labels=y_true)) tf.summary.scalar("loss", loss) # 收集标量 opt = tf.train.AdamOptimizer(0.001).minimize(loss, global_step=global_step) predict = tf.equal(tf.argmax(y_pre, axis=1), tf.argmax(y_true, axis=1)) # 返回每行或者每列最大值的索引,判断是否相等 acc = tf.reduce_mean(tf.cast(predict, tf.float32)) tf.summary.scalar("acc", acc) # 收集标量 merged = tf.summary.merge_all() # 和并变量 saver = tf.train.Saver() # 保存和加载模型 init = tf.global_variables_initializer() # 初始化全局变量 with tf.Session() as sess: sess.run(init) writer = tf.summary.FileWriter("./logs/tensorboard", tf.get_default_graph()) # tensorboard 事件文件 for i in range(batch_num * epochs): x_train, y_train = mnist.train.next_batch(batch_size) summary, _ = sess.run([merged, opt], feed_dict={x:x_train, y_true:y_train, keepprob: 0.75}) writer.add_summary(summary, i) # 将每次迭代后的变量写入事件文件 # 评估模型在验证集上的识别率 if i % 50 == 0: feeddict = {x: mnist.validation.images, y_true: mnist.validation.labels, keepprob: 1.} # 验证集 valloss, accuracy = sess.run([loss, acc], feed_dict=feeddict) print(i, 'th batch val loss:', valloss, ', accuracy:', accuracy) saver.save(sess, './checkpoints/tfdnn.ckpt') # 保存模型 print('测试集准确度:', sess.run(acc, feed_dict={x:mnist.test.images, y_true:mnist.test.labels, keepprob:1.})) writer.close()
CNN网络
import tensorflow as tf from tensorflow.examples.tutorials.mnist import input_data epochs = 10 batch_size = 100 mnist = input_data.read_data_sets("mnist/", one_hot=True, reshape=False) batch_nums = mnist.train.labels.shape[0] // batch_size # 卷积结构 def conv2d(x, w, b): # x = (?, 28,28,1) # filter = [filter_height, filter_width, in_channels, out_channels] # data_format = [批次,高度,宽度,通道] # 第一个和第四个必须是1 return tf.nn.conv2d(x, filter=w, strides=[1, 1, 1, 1], padding='SAME') + b def pool(x): return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') # 定义网络结构 def cnn_net(x, keepprob): # x = reshape=False (?, 28,28,1) w1 = tf.Variable(tf.random_normal([5, 5, 1, 64])) b1 = tf.Variable(tf.random_normal([64])) w2 = tf.Variable(tf.random_normal([5, 5, 64, 32])) b2 = tf.Variable(tf.random_normal([32])) w3 = tf.Variable(tf.random_normal([7 * 7 * 32, 10])) b3 = tf.Variable(tf.random_normal([10])) hidden1 = pool(conv2d(x, w1, b1)) hidden1 = tf.nn.dropout(hidden1, keepprob) hidden2 = pool(conv2d(hidden1, w2, b2)) hidden2 = tf.reshape(hidden2, [-1, 7 * 7 * 32]) hidden2 = tf.nn.dropout(hidden2, keepprob) output = tf.matmul(hidden2, w3) + b3 return output # 定义所需占位符 x = tf.placeholder(tf.float32, [None, 28, 28, 1]) y_true = tf.placeholder(tf.float32, [None, 10]) keepprob = tf.placeholder(tf.float32) # 在训练模型时,随着训练的逐步降低学习率。该函数返回衰减后的学习率。 global_step = tf.Variable(0) learning_rate = tf.train.exponential_decay(0.01, global_step, 100, 0.96, staircase=True) # 训练所需损失函数 logits = cnn_net(x, keepprob) loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=y_true)) opt = tf.train.AdamOptimizer(learning_rate).minimize(loss, global_step=global_step) # 定义评估模型 predict = tf.equal(tf.argmax(logits, 1), tf.argmax(y_true, 1)) # 预测值 accuracy = tf.reduce_mean(tf.cast(predict, tf.float32)) # 验证值 init = tf.global_variables_initializer() # 开始训练 with tf.Session() as sess: sess.run(init) for k in range(epochs): for i in range(batch_nums): train_x, train_y = mnist.train.next_batch(batch_size) sess.run(opt, {x: train_x, y_true: train_y, keepprob: 0.75}) # 评估模型在验证集上的识别率 if i % 50 == 0: acc = sess.run(accuracy, {x: mnist.validation.images[:1000], y_true: mnist.validation.labels[:1000], keepprob: 1.}) print(k, 'epochs, ', i, 'iters, ', ', acc :', acc)
RNN网络
import tensorflow as tf from tensorflow.examples.tutorials.mnist import input_data epochs = 10 batch_size = 1000 mnist = input_data.read_data_sets("mnist/", one_hot=True) batch_nums = mnist.train.labels.shape[0] // batch_size # 定义网络结构 def RNN_Model(x, batch_size, keepprob): # rnn_layers = [tf.nn.rnn_cell.LSTMCell(size) for size in [28, 28]] rnn_cell = tf.nn.rnn_cell.LSTMCell(28) rnn_drop = tf.nn.rnn_cell.DropoutWrapper(rnn_cell, output_keep_prob=keepprob) # 创建由多个RNNCell组成的RNN单元。 multi_rnn_cell = tf.nn.rnn_cell.MultiRNNCell([rnn_drop] * 2) initial_state = multi_rnn_cell.zero_state(batch_size, tf.float32) # 创建由RNNCell指定的递归神经网络cell。执行完全动态展开inputs outputs, states = tf.nn.dynamic_rnn(cell=multi_rnn_cell, inputs=x, dtype=tf.float32, initial_state=initial_state ) # outputs 的shape为[batch_size, max_time, 28] w = tf.Variable(tf.random_normal([28, 10])) b = tf.Variable(tf.random_normal([10])) output = tf.matmul(outputs[:, -1, :], w) + b return output, states # 定义所需占位符 x = tf.placeholder(tf.float32, [None, 28, 28]) y_true = tf.placeholder(tf.float32, [None, 10]) keepprob = tf.placeholder(tf.float32) global_step = tf.Variable(0) # 在训练模型时,随着训练的逐步降低学习率。该函数返回衰减后的学习率。 learning_rate = tf.train.exponential_decay(0.01, global_step, 10, 0.96, staircase=True) # 训练所需损失函数 y_pred, states = RNN_Model(x, batch_size, keepprob) loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y_pred, labels=y_true)) opt = tf.train.AdamOptimizer(learning_rate).minimize(loss, global_step=global_step) # 最小化损失函数 predict = tf.equal(tf.argmax(y_pred, 1), tf.argmax(y_true, 1)) # 预测值 acc = tf.reduce_mean(tf.cast(predict, tf.float32)) # 精度 init = tf.global_variables_initializer() # 开始训练 with tf.Session() as sess: sess.run(init) for k in range(epochs): for i in range(batch_nums): train_x, train_y = mnist.train.next_batch(batch_size) sess.run(opt, {x: train_x.reshape((-1, 28, 28)), y_true: train_y, keepprob: 0.8}) # 评估模型在验证集上的识别率 if i % 50 == 0: val_losses = 0 accuracy = 0 val_x, val_y = mnist.validation.next_batch(batch_size) for i in range(val_x.shape[0]): val_loss, accy = sess.run([loss, acc], {x: val_x.reshape((-1, 28, 28)), y_true: val_y, keepprob: 1.}) val_losses += val_loss accuracy += accy print('val_loss is :', val_losses / val_x.shape[0], ', accuracy is :', accuracy / val_x.shape[0])
pytorch
DNN网络
class MNISTNet(nn.Module): def __init__(self, input_dims, n_hiddens, n_class): super(MNISTNet, self).__init__() self.block_1 = nn.Sequential( nn.Linear(in_features=input_dims, out_features=n_hiddens), nn.ReLU(), nn.Dropout(p=0.2) ) self.block_2 = nn.Sequential( nn.Linear(in_features=n_hiddens, out_features=n_hiddens), nn.ReLU(), nn.Dropout(p=0.2) ) self.last_Linear = nn.Linear(in_features=n_hiddens, out_features=n_class) def forward(self, inputs): x = inputs.view(inputs.size(0), -1) # (batch,28*28) x = self.block_1(x) x = self.block_2(x) x = self.last_Linear(x) return x def test_MNISTNet(): inputs = torch.randn(64, 1, 28, 28) model = MNISTNet(input_dims=784, n_hiddens=256, n_class=10) outputs = model(inputs) print("outputs", outputs.shape)
pytorch模型训练
# -*- coding:utf-8 -*- # Author:凌逆战 | Never # Date: 2023/1/19 """ 参考:https://github.com/aaron-xichen/pytorch-playground """ import os # os.environ["CUDA_VISIBLE_DEVICES"] = "0" # 使用第一个和第二个GPU import argparse import torch from torch.utils.data import DataLoader from tensorboardX import SummaryWriter from model.demo_model import MNISTNet import torchvision import torchvision.transforms as transforms import torch.nn.functional as F def parse_args(): parser = argparse.ArgumentParser(description='PyTorch MNIST Example') parser.add_argument("--model_class", type=str, default="MNISTNet", help="模型类名") parser.add_argument('--train_tag', default="Mask(IAM)_Loss(mask_MAE)", help='训练标记') parser.add_argument("--model_name", type=str, default=None, help="是否加载模型继续训练 '10.pth' None") parser.add_argument("--batch_size", type=int, default=64, help="") parser.add_argument("--epochs", type=int, default=100) parser.add_argument('--lr', type=float, default=3e-4, help='学习率 (default: 0.01)') parser.add_argument('--train_log_dir', default="./train_log", help='训练记录文件夹') args = parser.parse_args() return args def main(): args = parse_args() print("GPU是否可用:", torch.cuda.is_available()) # True device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # 实例化 Dataset # preprocessing normalize = transforms.Normalize(mean=[.5], std=[.5]) transform = transforms.Compose([transforms.ToTensor(), normalize]) # download and load the data train_dataset = torchvision.datasets.MNIST(root='./mnist/', train=True, transform=transform, download=True) test_dataset = torchvision.datasets.MNIST(root='./mnist/', train=False, transform=transform, download=False) # encapsulate them into dataloader form train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, drop_last=True) test_loader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False, drop_last=True) # ########### 保存检查点的地址(如果检查点不存在,则创建) ############ args.checkpoints_dir = os.path.join(args.train_log_dir, args.model_class + "_{}".format(args.train_tag), "checkpoints") if not os.path.exists(args.checkpoints_dir): os.makedirs(args.checkpoints_dir) ################################ # 实例化模型 # ################################ model = MNISTNet(input_dims=784, n_hiddens=256, n_class=10).to(device) # 实例化模型 ################################ # 损失函数 # ################################ ############################### # 创建优化器 Create optimizers # ############################### optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) # ########### TensorBoard可视化 summary ############ event_dir = os.path.join(args.train_log_dir, args.model_class + "_{}".format(args.train_tag), "event_files") writer = SummaryWriter(event_dir) # 创建事件文件 # ########### 加载模型检查点 ############ start_epoch = 0 if args.model_name: print("加载模型:", args.checkpoints_dir + "/", args.model_name) checkpoint = torch.load(os.path.join(args.checkpoints_dir, args.model_name)) model.load_state_dict(checkpoint["model"]) optimizer.load_state_dict(checkpoint["optimizer"]) start_epoch = checkpoint['epoch'] for epoch in range(start_epoch, args.epochs): model.train() # 训练模型 correct = 0 for batch_idx, (data, target) in enumerate(train_loader): data = data.to(device) # torch.Size([64, 1, 28, 28]) target = target.to(device) # torch.Size([64]) optimizer.zero_grad() output = model(data) # torch.Size([64, 10]) loss = F.cross_entropy(output, target) loss.backward() optimizer.step() pred = output.data.max(1)[1] # 返回[值列表,索引列表]get the index of the max log-probability correct += pred.eq(target).sum() # ########### 可视化打印 ############ acc = 100. * correct / len(train_loader.dataset) print('Train Epoch: {} Loss: {:.6f} Accuracy: {:.4f}%'.format(epoch, loss, acc)) # ########### TensorBoard可视化 summary ############ # print('learning rate:', optimizer.state_dict()['param_groups'][0]['lr']) # writer.add_scalar(tag="lr", scalar_value=optimizer.state_dict()['param_groups'][0]['lr'], # global_step=epoch + 1) writer.add_scalar(tag="train/train_loss", scalar_value=loss, global_step=epoch + 1) writer.add_scalar(tag="train/train_acc", scalar_value=acc, global_step=epoch + 1) writer.flush() # 神经网络在测试验证集上的表现 model.eval() # 测试模型 test_loss = 0 correct = 0 # 测试的时候不需要梯度 with torch.no_grad(): for data, target in test_loader: data, target = data.to(device), target.to(device) output = model(data) # print(output.dtype, target.dtype) # torch.float32 torch.int64 test_loss += F.cross_entropy(output, target).data pred = output.data.max(1)[1] # get the index of the max log-probability correct += pred.eq(target).sum() # ########### 可视化打印 ############ test_loss = test_loss / len(test_loader) # average over number of mini-batch acc = 100. * correct / len(test_loader.dataset) print('\tTest set: Average loss: {:.4f}, Accuracy: {:.0f}%'.format(test_loss, acc)) ###################### # 更新tensorboard # ###################### writer.add_scalar(tag="val/test_loss", scalar_value=test_loss, global_step=epoch + 1) writer.add_scalar(tag="val/test_acc", scalar_value=acc, global_step=epoch + 1) writer.flush() # ########### 保存模型 ############ if (epoch + 1) % 10 == 0: print("保存模型") checkpoint = { "model": model.state_dict(), "optimizer": optimizer.state_dict(), "epoch": epoch + 1, } torch.save(checkpoint, '%s/%d.pth' % (args.checkpoints_dir, epoch + 1)) if __name__ == "__main__": main()
加载模型
深度学习的训练是需要很长时间的,我们不可能每次需要预测都花大量的时间去重新训练,因此我们想出一个方法,保存模型,也就是保存我们训练好的参数.
import numpy as np from keras.models import load_model from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets("./mnist/", one_hot=True, reshape=False) # (?, 28,28,1) x_test = mnist.test.images # (10000, 28,28,1) y_test = mnist.test.labels # (10000, 10) print(y_test[1]) # [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.] model = load_model('k_CNN.h5') # 读取模型 # 评估模型 evl = model.evaluate(x=x_test, y=y_test) evl_name = model.metrics_names for i in range(len(evl)): print(evl_name[i], ':\t', evl[i]) # loss : 0.19366768299341203 # acc : 0.9691 test = x_test[1].reshape(1, 28, 28, 1) y_predict = model.predict(test) # (1, 10) print(y_predict) # [[1.6e-06 6.0e-09 9.9e-01 5.8e-10 4.0e-07 2.5e-08 1.72e-06 1.2e-09 2.1e-07 8.5e-08]] y_true = 'actual:%d' % (np.argmax(y_test[1])) # actual:2 pre = 'actual:%d' % (np.argmax(y_predict)) # actual:2
参考文献
【博客园】MNIST数据集探究
【github】tensorflow-mnist-cnn