神经网络手写数字识别

初始化Network对象,sizes包含各层神经元的数量,假设创建一个三层神经网络,第一层2个神经元,第二层3个神经元,第三层4个神经元。

初始化代码如下:

输出biases,weights情况如下:

 

随机抽样

numpy.random.rand(d0,d1,...,dn)随机样本位于[0,1)中

numpy.random.randn(d0,d1,...,dn)从标准正态分布中返回样本值,均值=0,标准差=1,记为N(0,1),样本基本上取值主要在-1.96~+1.96之间,在其他范围的概率较小。

Sigmoid函数:

 

Sigmoid函数的导数:

前向传播:

 

numpy.dot(a, b, out=None)

计算两个数组的乘积。对于二维数组来说,dot()计算的结果就相当于矩阵乘法。对于一维数组,它计算的是两个向量的点积。 对于N维数组,它是a的最后一维和b的倒数第二维和的积:dot(a, b)[i,j,k,m] = sum(a[i,j,:] * b[k,:,m])

 

*和dot()区别:

*来说只能是shape一样的才能相乘,与其他shape的矩阵相乘都会报错, 结果是对应元素相乘作为当前位置的结果,结果矩阵的形状保持不变

  

随机梯度下降:(训练数据,迭代次数,小样本数量,学习速率,是否有测试集)

对每个mini_batch都更新一次,重复完整个training_data:

反向传播:

输出层cost函数对于a的导数

评价函数

 

完整代码如下:

import numpy as np
import random
import os,struct
from array import array as pyarray
from numpy import append,array,int8,uint8,zeros
class Network(object):
    def __init__(self, sizes):#列表sizes包含各层神经元的数量
        self.num_layers = len(sizes)
        self.sizes = sizes
        self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
        self.weights = [np.random.randn(y, x)
                        for x, y in zip(sizes[:-1], sizes[1:])]
        # numpy.random随机抽样randn(d0,d1,...,dn)返回一个样本,具有标准正态分布
    def feedforward(self, a):
        for b, w in zip(self.biases, self.weights):
            a = sigmoid(np.dot(w, a)+b)#np.dot()返回的是两个数组的点积
        return a
    def SGD(self, training_data, epochs, mini_batch_size, eta,
            test_data=None):
        if test_data: n_test = len(test_data)
        n = len(training_data)
        for j in range(epochs):
            random.shuffle(training_data)#将序列的所有元素随机排序
            mini_batches = [
                training_data[k:k+mini_batch_size]
                for k in range(0, n, mini_batch_size)]
            for mini_batch in mini_batches:
                self.update_mini_batch(mini_batch, eta)
            if test_data:
                print("Epoch {0}: {1} / {2}".format(j, self.evaluate(test_data), n_test))#str.format()格式化字符串
            else:
                print("Epoch {0} complete".format(j))
    def update_mini_batch(self, mini_batch, eta):
        nabla_b = [np.zeros(b.shape) for b in self.biases]#返回和b形状相同的用0填充的数组
        nabla_w = [np.zeros(w.shape) for w in self.weights]#返回和w形状相同的用0填充的数组
        for x, y in mini_batch:
            delta_nabla_b, delta_nabla_w = self.backprop(x, y)
            nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
            nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
        self.weights = [w-(eta/len(mini_batch))*nw
                        for w, nw in zip(self.weights, nabla_w)]
        self.biases = [b-(eta/len(mini_batch))*nb
                       for b, nb in zip(self.biases, nabla_b)]
    #反向传播
    def backprop(self, x, y):
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        activation = x
        activations = [x]
        zs = []
        for b, w in zip(self.biases, self.weights):
            z = np.dot(w, activation)+b
            zs.append(z)
            activation = sigmoid(z)
            activations.append(activation)
        delta = self.cost_derivative(activations[-1], y) * \
            sigmoid_prime(zs[-1])
        nabla_b[-1] = delta
        nabla_w[-1] = np.dot(delta, activations[-2].transpose())
        for l in range(2, self.num_layers):
            z = zs[-l]
            sp = sigmoid_prime(z)
            delta = np.dot(self.weights[-l+1].transpose(), delta) * sp
            nabla_b[-l] = delta
            nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())
        return (nabla_b, nabla_w)
    def evaluate(self, test_data):
        test_results = [(np.argmax(self.feedforward(x)), y)
                        for (x, y) in test_data]#索引概率最大的位置,即概率最大是几
        return sum(int(x == y) for (x, y) in test_results)#概率最大的与实际标签是否相符

    def cost_derivative(self, output_activations, y):
        """Return the vector of partial derivatives \partial C_x /
        \partial a for the output activations."""
       
return (output_activations-y)
    def predict(self,data):
        value=self.feedforward(data)
        return value.tolist().index(max(value))
    def save(self):
        pass
    def load(self):
        pass
#sSigmoid函数,S型曲线
def sigmoid(z):
    """The sigmoid function."""
   
return 1.0/(1.0+np.exp(-z))
#Sigmoid函数的导函数
def sigmoid_prime(z):
    """Derivative of the sigmoid function."""
   
return sigmoid(z)*(1-sigmoid(z))
def load_mnist(dataset="training_data", digits=np.arange(10), path="E:\mnist"):
    if dataset == "training_data":
        fname_image = os.path.join(path, 'train-images.idx3-ubyte')
        fname_label = os.path.join(path, 'train-labels.idx1-ubyte')
    elif dataset == "testing_data":
        fname_image = os.path.join(path, 't10k-images.idx3-ubyte')
        fname_label = os.path.join(path, 't10k-labels.idx1-ubyte')
    else:
        raise ValueError("dataset must be 'training_data' or 'testing_data'")

    flbl = open(fname_label, 'rb')
    magic_nr, size = struct.unpack(">II", flbl.read(8))
    lbl = pyarray("b", flbl.read())
    flbl.close()

    fimg = open(fname_image, 'rb')
    magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
    img = pyarray("B", fimg.read())
    fimg.close()

    ind = [k for k in range(size) if lbl[k] in digits]
    N = len(ind)

    images = zeros((N, rows, cols), dtype=uint8)
    labels = zeros((N, 1), dtype=int8)
    for i in range(len(ind)):
        images[i] = array(img[ind[i] * rows * cols: (ind[i] + 1) * rows * cols]).reshape((rows, cols))
        labels[i] = lbl[ind[i]]
    return images, labels

def load_samples(dataset="training_data"):
    image, label = load_mnist(dataset)

    X = [np.reshape(x, (28 * 28, 1)) for x in image]
    X = [x / 255.0 for x in X]  # 灰度值范围(0-255),转换为(0-1)

    def vectorized_Y(y):
        e = np.zeros((10, 1))
        e[y] = 1.0
        return e
    # 把Y值转换为神经网络的输出格式

    if dataset == "training_data":
        Y = [vectorized_Y(y) for y in label]
        pair = list(zip(X, Y))
        return pair
    elif dataset == 'testing_data':
        pair = list(zip(X, label))
        return pair
    else:
        print('Something wrong')
if __name__=='__main__':
    INPUT=28*28
    OUTPUT=10
    net=Network([INPUT, 40, OUTPUT])
    train_set = load_samples(dataset='training_data')
    test_set = load_samples(dataset='testing_data')
    net.SGD(train_set, 30, 10, 3.0, test_data=test_set)
    correct = 0;
    for test_feature in test_set:
        if net.predict(test_feature[0]) == test_feature[1][0]:
            correct += 1
        print("准确率: ", correct / len(test_set))

 

实验结果:

三层神经网络,输入28*28,输出10,隐藏层40个神经元

INPUT=28*28
OUTPUT=10
net=Network([INPUT, 40, OUTPUT])

MNIST数据集学习,13次迭代期,小批量数据大小为100,学习速率3.0,最终准确率约为93%

net.SGD(train_set, 13, 100, 3.0, test_data=test_set)

结果:

MNIST数据集学习,13次迭代期,小批量数据大小为10,学习速率3.0,最终准确率达到95.67%。

net.SGD(train_set, 13, 10, 3.0, test_data=test_set)

结果:

发现减小小批量数据的大小越小,收敛越快,准确率越高。

posted @ 2019-03-13 21:47  huangshansan  阅读(335)  评论(0编辑  收藏  举报