神经网络算法入门
Sigmoid函数与损失函数求导:https://blog.csdn.net/zhishengqianjun/article/details/75303820
softmax交叉熵损失函数求导:https://blog.csdn.net/qian99/article/details/78046329
损失函数
交叉熵:
https://blog.csdn.net/rtygbwwwerr/article/details/50778098
https://blog.csdn.net/fengxueniu/article/details/73824318
熵(Entropy),交叉熵(Cross-Entropy),KL-松散度(KL Divergence) : https://www.cnblogs.com/silent-stranger/p/7987708.html
激活函数
ReLu(Rectified Linear Units)激活函数:http://www.cnblogs.com/neopenx/p/4453161.html
从ReLU到Sinc,26种神经网络激活函数可视化:https://www.jianshu.com/p/36c377941b26
激活函数的作用:https://www.zhihu.com/question/22334626
防止过拟合
权重衰减:http://blog.sina.cn/dpool/blog/s/blog_a89e19440102x1el.html
随机丢弃:https://yq.aliyun.com/articles/110002
其他教程
反向传播:http://speech.ee.ntu.edu.tw/~tlkagk/courses/MLDS_2015_2/Lecture/DNN%20backprop.ecm.mp4/index.html
本教程将阐述无监督特征学习和深度学习的主要观点: http://ufldl.stanford.edu/wiki/index.php/UFLDL%E6%95%99%E7%A8%8B
Ideas worth spreading: https://www.ted.com/talks/fei_fei_li_how_we_re_teaching_computers_to_understand_pictures#t-825443
零基础入门深度学习(3) - 神经网络和反向传播算法:https://www.zybuluo.com/hanbingtao/note/476663
神经网络浅讲:http://www.cnblogs.com/subconscious/p/5058741.html
深入理解卷积层,全连接层的作用意义:https://blog.csdn.net/m0_37407756/article/details/80904580
Sigmoid函数与损失函数求导
sigmoid函数求导
sigmoid导数具体的推导过程如下:

softmax交叉熵损失函数求导



import matplotlib.pyplot as plt import numpy as np from mnist import MNIST from scipy import sparse def sigmoid(x): return 1 / (1 + np.exp(-x)) def relu(x): return np.maximum(x, 0) def softmax(x): x = np.exp(x) y = x / np.sum(x, 0) return y def cross_entropy_loss(y, labels): '''交叉熵损失函数 labels: 样本标签(真实分类结果) y:预测分类结果''' sum = np.sum(labels * np.log(y), 0) # shape : (60000,) y和labels shape: (10,60000) loss = -np.mean(sum) return loss def accuracy(y, labels): y_hat = np.argmax(y, 0) r = np.sum(1 * (y_hat == labels)) acc = r / len(labels) return acc def initialize(layers, batchsize, dropout): '''网络结构的创建和初始化''' netparams = NetParams(len(layers) - 1) netparams.z.append(0) netparams.a.append(0) netparams.gradZ.append(0) netparams.mask.append(1) netparams.dropout = dropout for i in range(1, len(layers) - 1): w = 0.01 * np.random.randn(layers[i], layers[i - 1]) netparams.w.append(w) netparams.gradW.append(np.zeros(np.shape(w))) b = np.ones((layers[i], 1)) netparams.b.append(b) netparams.gradB.append(np.zeros(np.shape(b))) netparams.z.append(0) netparams.a.append(0) netparams.gradZ.append(np.zeros((layers[i], batchsize))) netparams.mask.append(np.random.rand(layers[i], 1) > 0.5) w = 0.01 * np.random.randn(layers[-1], layers[-2]) netparams.w.append(w) netparams.gradW.append(np.zeros(np.shape(w))) b = np.ones((layers[-1], 1)) netparams.b.append(b) netparams.gradB.append(np.zeros(np.shape(b))) netparams.z.append(0) netparams.gradZ.append(np.zeros((layers[-1], batchsize))) netparams.a.append(0) netparams.mask.append(1) return netparams def net(layers, x): h = x for i in range(1, len(layers) - 1): w = np.random.randn(layers[i], layers[i - 1]) b = np.ones((layers[i], 1)) h = sigmoid(np.dot(w, h) + b) w = np.random.randn(layers[-1], layers[-2]) b = np.ones((layers[-1], 1)) h = softmax(np.dot(w, h) + b) return h def forward(x, labels, netparams): '''前向传播,求激活值和损失函数''' netparams.a[0] = netparams.z[0] = x l = netparams.l for i in range(l - 1): netparams.mask[i + 1] = np.random.rand(layers[i + 1], 1) >= netparams.dropout[i + 1] netparams.z[i + 1] = netparams.mask[i + 1] * (np.dot(netparams.w[i], netparams.a[i]) + netparams.b[i]) netparams.a[i + 1] = relu(netparams.z[i + 1]) netparams.z[l] = np.dot(netparams.w[l - 1], netparams.a[l - 1]) + netparams.b[l - 1] netparams.a[l] = softmax(netparams.z[l]) loss = cross_entropy_loss(netparams.a[l], labels) return loss, netparams.a[l] def backward(netparams, labels, weight_decay, moment, lr): '''后向传播,求梯度''' # weight_decay :权重缩减 numcases = np.size(netparams.z[-1], 1) l = netparams.l netparams.gradZ[l] = -(labels - netparams.a[l]) # moment * netparams.gradW[l - 1] 冲量系数乘以梯度 = 冲量 netparams.gradW[l - 1] = moment * netparams.gradW[l - 1] + lr * (np.dot(netparams.gradZ[l], netparams.a[l - 1].T) / numcases + weight_decay * netparams.w[l - 1]) netparams.gradB[l - 1] = moment * netparams.gradB[l - 1] + lr * np.sum(netparams.gradZ[l], 1, keepdims=True) / numcases for i in range(l - 1, 0, -1): netparams.gradZ[i] = np.dot(netparams.w[i].T, netparams.gradZ[i + 1]) # 对激活数(Ai)求导 netparams.gradZ[i] = netparams.gradZ[i] * (netparams.z[i] >= 0) * netparams.mask[i] # 求激活函数 relu 的梯度 netparams.gradW[i - 1] = (1 - netparams.mask[i]) * netparams.gradW[i - 1] + netparams.mask[i] * ( moment * netparams.gradW[i - 1] + lr * ( np.dot(netparams.gradZ[i], netparams.a[i - 1].T) / numcases + weight_decay * netparams.w[ i - 1])) netparams.gradB[i - 1] = (1 - netparams.mask[i]) * netparams.gradB[i - 1] + netparams.mask[i] * ( moment * netparams.gradB[i - 1] + lr * np.sum(netparams.gradZ[i], 1, keepdims=True) / numcases) def update(netparams): for i in range(netparams.l): netparams.w[i] = netparams.w[i] - netparams.mask[i + 1] * netparams.gradW[i] netparams.b[i] = netparams.b[i] - netparams.mask[i + 1] * netparams.gradB[i] def train(netparams, x, labels, batchsize, weight_decay, max_iters, moment, lr, stepsize, gamma): outdims, numcases = np.shape(labels) numbatches = numcases // batchsize losses = [] x_shuffle = x labels_shuffle = labels for i in range(max_iters): idx = i % numbatches if (idx == 0): data = np.concatenate((x, labels), axis=0) np.random.shuffle(data.T) x_shuffle = data[:-outdims, :] labels_shuffle = data[-outdims:, :] if (i > 0 and i % stepsize == 0): lr = gamma * lr batch_data = x_shuffle[:, idx * batchsize:(idx + 1) * batchsize] batch_labels = labels_shuffle[:, idx * batchsize:(idx + 1) * batchsize] loss, _ = forward(batch_data, batch_labels, netparams) backward(netparams, batch_labels, weight_decay, moment, lr) update(netparams) losses.append(loss) return losses def inference(netparams, x): netparams.a[0] = netparams.z[0] = x l = netparams.l for i in range(l - 1): netparams.z[i + 1] = np.dot(netparams.w[i], netparams.a[i]) + netparams.b[i] netparams.a[i + 1] = relu(netparams.z[i + 1]) * (1 - netparams.dropout[i + 1]) netparams.z[l] = np.dot(netparams.w[l - 1], netparams.a[l - 1]) + netparams.b[l - 1] netparams.a[l] = softmax(netparams.z[l]) return netparams.a[l] class NetParams: def __init__(self, l): self.l = l # 网络层数 self.w = [] # 权重 self.b = [] # 偏置 self.gradW = [] # 权重梯度 self.gradB = [] # 偏置梯度 self.gradZ = [] # 未激活值梯度 self.z = [] # 未激活值 self.a = [] # 激活值 self.mask = [] self.dropout = 0 # self.kwargs = { # 'batchsize': 100, # 'weight_decay': 0.0001, # 'max_iters': 600, 'moment': 0.9, 'lr': 0.001, # 'stepsize': 300, # 'gamma': 0.96 # } self.kwargs = { 'batchsize': 100, 'weight_decay': 0.0001, 'max_iters': 600, 'moment': 0.9, 'lr': 0.001, 'stepsize': 600, 'gamma': 0.96 } if __name__ == "__main__": mndata = MNIST('mnist') images, labels = mndata.load_training() images = np.array(images).transpose() layers = [784, 500, 500, 100, 10] # dropout = np.random.rand(len(layers)) dropout = np.array([0, 0, 0, 0, 0]) batchsize = 100 netparams = initialize(layers, batchsize, dropout=dropout) one_hot = sparse.coo_matrix(([1] * 60000, (labels, list(range(60000)))), (10, 60000)).toarray() losses = train(netparams, images, one_hot, **netparams.kwargs) # y = inference(netparams, images) # acc = accuracy(y, labels) # print("on train:", acc) # # plt.plot(range(len(losses)), losses) # plt.show() # images, labels = mndata.load_testing() # images = np.array(images).transpose() # y = inference(netparams, images) # acc = accuracy(y, labels) # print("on test:", acc) from PIL import Image # img = Image.open('001.png') # gray = img.convert('L') # WHITE, BLACK = 255, 0 # img_new = gray.resize((28, 28), Image.ANTIALIAS) # print(np.array(img_new)) # img_new = gray.point(lambda x: WHITE if x < 128 else BLACK) # print(np.array(img_new)) # # images = np.reshape(images[:,1],(28,28)) # plt.subplot(1, 2, 1) # plt.imshow(images) # # plt.subplot(1, 2,2) # plt.imshow(img_new) # plt.show() from PIL import Image img = Image.open('001.png') gray = img.convert('L') WHITE, BLACK = 255, 0 img_new = gray.point(lambda x: WHITE if x < 128 else BLACK) img_new = gray.resize((28, 28), Image.ANTIALIAS) img_new = np.array(img_new) x = np.reshape(img_new, (784, 1)) y = inference(netparams, x) print(np.argmax(y))

浙公网安备 33010602011771号