神经网络算法入门

Sigmoid函数与损失函数求导:https://blog.csdn.net/zhishengqianjun/article/details/75303820

softmax交叉熵损失函数求导:https://blog.csdn.net/qian99/article/details/78046329

损失函数

交叉熵:

  https://blog.csdn.net/rtygbwwwerr/article/details/50778098

  https://blog.csdn.net/fengxueniu/article/details/73824318

熵(Entropy),交叉熵(Cross-Entropy),KL-松散度(KL Divergence)https://www.cnblogs.com/silent-stranger/p/7987708.html

激活函数

ReLu(Rectified Linear Units)激活函数:http://www.cnblogs.com/neopenx/p/4453161.html

从ReLU到Sinc,26种神经网络激活函数可视化:https://www.jianshu.com/p/36c377941b26

  激活函数的作用:https://www.zhihu.com/question/22334626

防止过拟合

权重衰减:http://blog.sina.cn/dpool/blog/s/blog_a89e19440102x1el.html

随机丢弃:https://yq.aliyun.com/articles/110002

其他教程

反向传播:http://speech.ee.ntu.edu.tw/~tlkagk/courses/MLDS_2015_2/Lecture/DNN%20backprop.ecm.mp4/index.html

本教程将阐述无监督特征学习和深度学习的主要观点: http://ufldl.stanford.edu/wiki/index.php/UFLDL%E6%95%99%E7%A8%8B

Ideas worth spreading:  https://www.ted.com/talks/fei_fei_li_how_we_re_teaching_computers_to_understand_pictures#t-825443

零基础入门深度学习(3) - 神经网络和反向传播算法:https://www.zybuluo.com/hanbingtao/note/476663

神经网络浅讲:http://www.cnblogs.com/subconscious/p/5058741.html

深入理解卷积层,全连接层的作用意义:https://blog.csdn.net/m0_37407756/article/details/80904580

实验楼:https://www.shiyanlou.com/courses/?category=%E5%90%8E%E7%AB%AF%E5%BC%80%E5%8F%91&course_type=all&tag=Python&fee=all

Sigmoid函数与损失函数求导

sigmoid函数求导

​ sigmoid导数具体的推导过程如下:

 

 

softmax交叉熵损失函数求导

 

 

import matplotlib.pyplot as plt
import numpy as np
from mnist import MNIST
from scipy import sparse


def sigmoid(x):
    return 1 / (1 + np.exp(-x))


def relu(x):
    return np.maximum(x, 0)


def softmax(x):
    x = np.exp(x)
    y = x / np.sum(x, 0)
    return y


def cross_entropy_loss(y, labels):
    '''交叉熵损失函数
           labels: 样本标签(真实分类结果)
           y:预测分类结果'''
    sum = np.sum(labels * np.log(y), 0)  # shape : (60000,) y和labels shape: (10,60000)
    loss = -np.mean(sum)
    return loss


def accuracy(y, labels):
    y_hat = np.argmax(y, 0)
    r = np.sum(1 * (y_hat == labels))
    acc = r / len(labels)
    return acc


def initialize(layers, batchsize, dropout):
    '''网络结构的创建和初始化'''
    netparams = NetParams(len(layers) - 1)
    netparams.z.append(0)
    netparams.a.append(0)
    netparams.gradZ.append(0)
    netparams.mask.append(1)
    netparams.dropout = dropout
    for i in range(1, len(layers) - 1):
        w = 0.01 * np.random.randn(layers[i], layers[i - 1])
        netparams.w.append(w)
        netparams.gradW.append(np.zeros(np.shape(w)))
        b = np.ones((layers[i], 1))
        netparams.b.append(b)
        netparams.gradB.append(np.zeros(np.shape(b)))
        netparams.z.append(0)
        netparams.a.append(0)
        netparams.gradZ.append(np.zeros((layers[i], batchsize)))
        netparams.mask.append(np.random.rand(layers[i], 1) > 0.5)
    w = 0.01 * np.random.randn(layers[-1], layers[-2])
    netparams.w.append(w)
    netparams.gradW.append(np.zeros(np.shape(w)))
    b = np.ones((layers[-1], 1))
    netparams.b.append(b)
    netparams.gradB.append(np.zeros(np.shape(b)))
    netparams.z.append(0)
    netparams.gradZ.append(np.zeros((layers[-1], batchsize)))
    netparams.a.append(0)
    netparams.mask.append(1)
    return netparams


def net(layers, x):
    h = x
    for i in range(1, len(layers) - 1):
        w = np.random.randn(layers[i], layers[i - 1])
        b = np.ones((layers[i], 1))
        h = sigmoid(np.dot(w, h) + b)
    w = np.random.randn(layers[-1], layers[-2])
    b = np.ones((layers[-1], 1))
    h = softmax(np.dot(w, h) + b)
    return h


def forward(x, labels, netparams):
    '''前向传播,求激活值和损失函数'''
    netparams.a[0] = netparams.z[0] = x
    l = netparams.l
    for i in range(l - 1):
        netparams.mask[i + 1] = np.random.rand(layers[i + 1], 1) >= netparams.dropout[i + 1]
        netparams.z[i + 1] = netparams.mask[i + 1] * (np.dot(netparams.w[i], netparams.a[i]) + netparams.b[i])
        netparams.a[i + 1] = relu(netparams.z[i + 1])
    netparams.z[l] = np.dot(netparams.w[l - 1], netparams.a[l - 1]) + netparams.b[l - 1]
    netparams.a[l] = softmax(netparams.z[l])
    loss = cross_entropy_loss(netparams.a[l], labels)
    return loss, netparams.a[l]


def backward(netparams, labels, weight_decay, moment, lr):
    '''后向传播,求梯度'''
    # weight_decay :权重缩减
    numcases = np.size(netparams.z[-1], 1)
    l = netparams.l
    netparams.gradZ[l] = -(labels - netparams.a[l])
    # moment * netparams.gradW[l - 1] 冲量系数乘以梯度 = 冲量
    netparams.gradW[l - 1] = moment * netparams.gradW[l - 1] + lr * (np.dot(netparams.gradZ[l], netparams.a[l - 1].T) / numcases + weight_decay * netparams.w[l - 1])
    netparams.gradB[l - 1] = moment * netparams.gradB[l - 1] + lr * np.sum(netparams.gradZ[l], 1,
                                                                           keepdims=True) / numcases
    for i in range(l - 1, 0, -1):
        netparams.gradZ[i] = np.dot(netparams.w[i].T, netparams.gradZ[i + 1])  # 对激活数(Ai)求导
        netparams.gradZ[i] = netparams.gradZ[i] * (netparams.z[i] >= 0) * netparams.mask[i]  # 求激活函数 relu 的梯度
        netparams.gradW[i - 1] = (1 - netparams.mask[i]) * netparams.gradW[i - 1] + netparams.mask[i] * (
                moment * netparams.gradW[i - 1] + lr * (
                np.dot(netparams.gradZ[i], netparams.a[i - 1].T) / numcases + weight_decay * netparams.w[
            i - 1]))
        netparams.gradB[i - 1] = (1 - netparams.mask[i]) * netparams.gradB[i - 1] + netparams.mask[i] * (
                moment * netparams.gradB[i - 1] + lr * np.sum(netparams.gradZ[i], 1, keepdims=True) / numcases)


def update(netparams):
    for i in range(netparams.l):
        netparams.w[i] = netparams.w[i] - netparams.mask[i + 1] * netparams.gradW[i]
        netparams.b[i] = netparams.b[i] - netparams.mask[i + 1] * netparams.gradB[i]


def train(netparams, x, labels, batchsize, weight_decay, max_iters, moment, lr, stepsize, gamma):
    outdims, numcases = np.shape(labels)
    numbatches = numcases // batchsize
    losses = []
    x_shuffle = x
    labels_shuffle = labels
    for i in range(max_iters):
        idx = i % numbatches
        if (idx == 0):
            data = np.concatenate((x, labels), axis=0)
            np.random.shuffle(data.T)
            x_shuffle = data[:-outdims, :]
            labels_shuffle = data[-outdims:, :]
        if (i > 0 and i % stepsize == 0):
            lr = gamma * lr
        batch_data = x_shuffle[:, idx * batchsize:(idx + 1) * batchsize]
        batch_labels = labels_shuffle[:, idx * batchsize:(idx + 1) * batchsize]
        loss, _ = forward(batch_data, batch_labels, netparams)
        backward(netparams, batch_labels, weight_decay, moment, lr)
        update(netparams)
        losses.append(loss)
    return losses


def inference(netparams, x):
    netparams.a[0] = netparams.z[0] = x
    l = netparams.l
    for i in range(l - 1):
        netparams.z[i + 1] = np.dot(netparams.w[i], netparams.a[i]) + netparams.b[i]
        netparams.a[i + 1] = relu(netparams.z[i + 1]) * (1 - netparams.dropout[i + 1])
    netparams.z[l] = np.dot(netparams.w[l - 1], netparams.a[l - 1]) + netparams.b[l - 1]
    netparams.a[l] = softmax(netparams.z[l])
    return netparams.a[l]


class NetParams:
    def __init__(self, l):
        self.l = l  # 网络层数
        self.w = []  # 权重
        self.b = []  # 偏置
        self.gradW = []  # 权重梯度
        self.gradB = []  # 偏置梯度
        self.gradZ = []  # 未激活值梯度
        self.z = []  # 未激活值
        self.a = []  # 激活值
        self.mask = []
        self.dropout = 0
        # self.kwargs = {
        #     'batchsize': 100,
        #     'weight_decay': 0.0001,
        #     'max_iters': 600, 'moment': 0.9, 'lr': 0.001,
        #     'stepsize': 300,
        #     'gamma': 0.96
        # }
        self.kwargs = {
            'batchsize': 100, 'weight_decay': 0.0001,
            'max_iters': 600, 'moment': 0.9,
            'lr': 0.001, 'stepsize': 600, 'gamma': 0.96
        }


if __name__ == "__main__":
    mndata = MNIST('mnist')
    images, labels = mndata.load_training()
    images = np.array(images).transpose()
    layers = [784, 500, 500, 100, 10]
    # dropout = np.random.rand(len(layers))
    dropout = np.array([0, 0, 0, 0, 0])
    batchsize = 100
    netparams = initialize(layers, batchsize, dropout=dropout)
    one_hot = sparse.coo_matrix(([1] * 60000, (labels, list(range(60000)))), (10, 60000)).toarray()
    losses = train(netparams, images, one_hot, **netparams.kwargs)

    # y = inference(netparams, images)
    # acc = accuracy(y, labels)
    # print("on train:", acc)
    #
    # plt.plot(range(len(losses)), losses)
    # plt.show()
    # images, labels = mndata.load_testing()
    # images = np.array(images).transpose()
    # y = inference(netparams, images)
    # acc = accuracy(y, labels)
    # print("on test:", acc)

    from PIL import Image
    # img = Image.open('001.png')
    # gray = img.convert('L')
    # WHITE, BLACK = 255, 0
    # img_new = gray.resize((28, 28), Image.ANTIALIAS)
    # print(np.array(img_new))
    # img_new = gray.point(lambda x: WHITE if x < 128 else BLACK)
    # print(np.array(img_new))
    #
    # images = np.reshape(images[:,1],(28,28))
    # plt.subplot(1, 2, 1)
    # plt.imshow(images)
    #
    # plt.subplot(1, 2,2)
    # plt.imshow(img_new)
    # plt.show()
    from PIL import Image
    img = Image.open('001.png')
    gray = img.convert('L')
    WHITE, BLACK = 255, 0
    img_new = gray.point(lambda x: WHITE if x < 128 else BLACK)
    img_new = gray.resize((28, 28), Image.ANTIALIAS)
    img_new = np.array(img_new)
    x = np.reshape(img_new, (784, 1))
    y = inference(netparams, x)
    print(np.argmax(y))

 

posted @ 2018-09-17 14:22  逐梦客!  阅读(187)  评论(0)    收藏  举报