卷积神经网络CNN——常用于图像识别
(1)卷积层
· 卷积——通过对图像进行卷积运算,可以对图像的某个特征进行选择性的增强或减弱
· 图像的局部性——各个像素点与其附近的像素点之间具有强关联——卷积层利用此对图像的特征进行检测
· 图像的张数——RGB就是三个,即通道数,单色图像通道数为1
· 通常的图片使用多个过滤器对图像进行卷积处理
· 过滤器的数量和偏置的数量是相同的
在这一层:输入--激励函数(卷积之和+偏置)--输出
(2)池化层
· 池化:将图像的各个区域进行划分,并将各个区域的代表特征抽取出来重新排列组合成一幅新的图像
· 最大池化:CNN中最常用,以每个区域最大值作为该区域的代表
这一层相当于对图像进行模糊处理,降低了网络整体计算量
(3)全连接层
普通神经网络的网络层
(4)填充
在图像周围环绕一圈,目的是保持图像大小不变;也能更多地获取图像边缘的特征
(5)步长
过滤器每次移动的间隔距离
变量一览:
B
|
批次大小
|
M
|
过滤器数量
|
Ih
|
图像高度
|
Fw
|
过滤器宽度
|
Iw
|
图像宽度
|
Fh
|
过滤器高度
|
C
|
通道数
|
Oh
|
输出图像的高度
|
P
|
池化区大小
|
Ow
|
输出图像的宽度
|
两种重要算法:可以避免过多的嵌套循环导致耗时
im2col(正向传播)
生成的过滤器矩阵为(M,CxFhxFw)阶矩阵
生成的图像矩阵为(BxOhxOw,CxFhxFw)阶矩阵
简单实现:
import numpy as np
def im2col(image,flt_h,flt_w,out_h,out_w):
img_h,img_w = image.shape
cols = np.zeros((flt_h,flt_w,out_h,out_w))
for h in range(flt_h):
h_lim = h + out_h
for w in range(flt_w):
w_lim = w + out_w
# print(img[h:h_lim,w:w_lim])
#切片 + 将滑动的区域平坦化处理
cols[h,w:,:] = img[h:h_lim,w:w_lim]
cols = cols.reshape(flt_h*flt_w,out_h*out_w)
return cols
img = np.array([[1,2,3,4],[5,6,7,8],[9,10,11,12],[13,14,15,16]])
cols = im2col(img,2,2,3,3)
print(cols)
多批次、多通道情况下:
import numpy as np
## 多批次、多通道的情况 + 考虑填充和步长的处理
def im2col_1(images,flt_h,flt_w,out_h,out_w,stride,pad):
n_bt,n_ch,img_h,img_w = images.shape
img_pad = np.pad(images,[(0,0),(0,0),(pad,pad),(pad,pad)],"constant")
cols = np.zeros((n_bt,n_ch,flt_h,flt_w,out_h,out_w))
for h in range(flt_h):
h_lim = h + stride*out_h
for w in range(flt_w):
w_lim = w + stride*out_w
print(img[:,:,h:h_lim,w:w_lim])
cols[:,:,h,w:,:] = img[:,:,h:h_lim:stride,w:w_lim:stride]
#切片 + 将滑动的区域平坦化处理
cols = cols.transpose(1,2,3,0,4,5).reshape(n_ch*flt_h*flt_w,n_bt*out_h*out_w)
return cols
img = np.array([[1,2,3,4],[5,6,7,8],[9,10,11,12],[13,14,15,16]])
cols = im2col_1(img,2,2,3,3,1,0)
print(cols)
col2im(反向传播)
def col2im(cols,img_shape,flt_h,flt_w,out_h,out_w,stride,pad):
n_bt,n_ch,img_h,img_w = img_shape
cols = cols.reshape(n_ch,flt_h,flt_w,n_bt,out_h,out_w).transpose(3,0,1,2,4,5)
images = np.zeros((n_bt,n_ch,img_h+2*pad+stride-1,img_w+2*pad+stride-1))
for h in range(flt_h):
h_lim = h + stride*out_h
for w in range(flt_w):
w_lim = w + stride*out_w
images[:,:,h:h_lim:stride,w:w_lim:stride] += cols[:,:,h,w,:,:]
return images[:,:,pad:img_h+pad,pad:img_w+pad]
cols = np.ones((4,4))
img_shape=(1,1,3,3)
images = col2im(cols,img_shape,2,2,2,2,1,0)
print(images)
卷积神经网络识别手写数字实践
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
# -- 手写文字数据集的读入 --
digits_data = datasets.load_digits()
input_data = digits_data.data
correct = digits_data.target
n_data = len(correct)
# -- 输入数据的标准化 --
ave_input = np.average(input_data)
std_input = np.std(input_data)
input_data = (input_data - ave_input) / std_input
# -- 将正确答案转换为独热编码格式 --
correct_data = np.zeros((n_data, 10))
for i in range(n_data):
correct_data[i, correct[i]] = 1.0
# -- 训练数据与测试数据 --
index = np.arange(n_data)
index_train = index[index%3 != 0]
index_test = index[index%3 == 0]
input_train = input_data[index_train, :] # 训练 输入数据
correct_train = correct_data[index_train, :] # 训练 正确答案
input_test = input_data[index_test, :] # 测试 输入数据
correct_test = correct_data[index_test, :] # 测试 正确答案
n_train = input_train.shape[0] # 训练数据的采样数
n_test = input_test.shape[0] # 测试数据的采样数
# -- 各个设置值 --
img_h = 8 # 输入图像的高度
img_w = 8 # 输入图像的宽度
img_ch = 1 # 输入图像的通道数
wb_width = 0.1 # 权重与偏置的扩散度
eta = 0.01 # 学习系数
epoch = 50
batch_size = 8
interval = 10 # 显示进度的间隔时间
n_sample = 200 # 误差计算的采样数
# -- im2col --
def im2col(images, flt_h, flt_w, out_h, out_w, stride, pad):
n_bt, n_ch, img_h, img_w = images.shape
img_pad = np.pad(images, [(0,0), (0,0), (pad, pad), (pad, pad)], "constant")
cols = np.zeros((n_bt, n_ch, flt_h, flt_w, out_h, out_w))
for h in range(flt_h):
h_lim = h + stride*out_h
for w in range(flt_w):
w_lim = w + stride*out_w
cols[:, :, h, w, :, :] = img_pad[:, :, h:h_lim:stride, w:w_lim:stride]
cols = cols.transpose(1, 2, 3, 0, 4, 5).reshape(n_ch*flt_h*flt_w, n_bt*out_h*out_w)
return cols
# -- col2im --
def col2im(cols, img_shape, flt_h, flt_w, out_h, out_w, stride, pad):
n_bt, n_ch, img_h, img_w = img_shape
cols = cols.reshape(n_ch, flt_h, flt_w, n_bt, out_h, out_w, ).transpose(3, 0, 1, 2, 4, 5)
images = np.zeros((n_bt, n_ch, img_h+2*pad+stride-1, img_w+2*pad+stride-1))
for h in range(flt_h):
h_lim = h + stride*out_h
for w in range(flt_w):
w_lim = w + stride*out_w
images[:, :, h:h_lim:stride, w:w_lim:stride] += cols[:, :, h, w, :, :]
return images[:, :, pad:img_h+pad, pad:img_w+pad]
# -- 卷积网络层 --
class ConvLayer:
# n_bt:批次尺寸, x_ch:输入的通道数量, x_h:输入图像的高度, x_w:输入图像的宽度
# n_flt:过滤器的数量, flt_h:过滤器的高度, flt_w:过滤器的宽度
# stride:步长的幅度, pad:填充的幅度
# y_ch:输出的通道数量, y_h:输出的高度, y_w:输出的宽度
def __init__(self, x_ch, x_h, x_w, n_flt, flt_h, flt_w, stride, pad):
# 将参数集中保存
self.params = (x_ch, x_h, x_w, n_flt, flt_h, flt_w, stride, pad)
# 过滤器和偏置的初始值
self.w = wb_width * np.random.randn(n_flt, x_ch, flt_h, flt_w)
self.b = wb_width * np.random.randn(1, n_flt)
# 输出图像的尺寸
self.y_ch = n_flt # 输出的通道数量
self.y_h = (x_h - flt_h + 2*pad) // stride + 1 # 输出的高度
self.y_w = (x_w - flt_w + 2*pad) // stride + 1 # 输出的宽度
# AdaGrad算法用
self.h_w = np.zeros((n_flt, x_ch, flt_h, flt_w)) + 1e-8
self.h_b = np.zeros((1, n_flt)) + 1e-8
def forward(self, x):
n_bt = x.shape[0]
x_ch, x_h, x_w, n_flt, flt_h, flt_w, stride, pad = self.params
y_ch, y_h, y_w = self.y_ch, self.y_h, self.y_w
# 将输入图像和过滤器转换成矩阵
self.cols = im2col(x, flt_h, flt_w, y_h, y_w, stride, pad)
self.w_col = self.w.reshape(n_flt, x_ch*flt_h*flt_w)
# 输出的计算:矩阵乘积、偏置的加法运算、激励函数
u = np.dot(self.w_col, self.cols).T + self.b
self.u = u.reshape(n_bt, y_h, y_w, y_ch).transpose(0, 3, 1, 2)
self.y = np.where(self.u <= 0, 0, self.u)
def backward(self, grad_y):
n_bt = grad_y.shape[0]
x_ch, x_h, x_w, n_flt, flt_h, flt_w, stride, pad = self.params
y_ch, y_h, y_w = self.y_ch, self.y_h, self.y_w
# delta
delta = grad_y * np.where(self.u <= 0, 0, 1)
delta = delta.transpose(0,2,3,1).reshape(n_bt*y_h*y_w, y_ch)
# 过滤器和偏置的梯度
grad_w = np.dot(self.cols, delta)
self.grad_w = grad_w.T.reshape(n_flt, x_ch, flt_h, flt_w)
self.grad_b = np.sum(delta, axis=0)
# 输入的梯度
grad_cols = np.dot(delta, self.w_col)
x_shape = (n_bt, x_ch, x_h, x_w)
self.grad_x = col2im(grad_cols.T, x_shape, flt_h, flt_w, y_h, y_w, stride, pad)
def update(self, eta):
self.h_w += self.grad_w * self.grad_w
self.w -= eta / np.sqrt(self.h_w) * self.grad_w
self.h_b += self.grad_b * self.grad_b
self.b -= eta / np.sqrt(self.h_b) * self.grad_b
# -- 池化层 --
class PoolingLayer:
# n_bt:批次尺寸, x_ch:输入的通道数量, x_h:输入图像的高度, x_w:输入图像的宽度
# pool:池化区域的尺寸, pad:填充的幅度
# y_ch:输出的通道数量, y_h:输出的高度, y_w:输出的宽度
def __init__(self, x_ch, x_h, x_w, pool, pad):
# 将参数集中保存
self.params = (x_ch, x_h, x_w, pool, pad)
# 输出图像的尺寸
self.y_ch = x_ch # 输出的通道数量
self.y_h = x_h//pool if x_h%pool==0 else x_h//pool+1 # 输出的高度
self.y_w = x_w//pool if x_w%pool==0 else x_w//pool+1 # 输出的宽度
def forward(self, x):
n_bt = x.shape[0]
x_ch, x_h, x_w, pool, pad = self.params
y_ch, y_h, y_w = self.y_ch, self.y_h, self.y_w
# 将输入图像转换成矩阵
cols = im2col(x, pool, pool, y_h, y_w, pool, pad)
cols = cols.T.reshape(n_bt*y_h*y_w*x_ch, pool*pool)
# 输出的计算:最大池化
y = np.max(cols, axis=1)
self.y = y.reshape(n_bt, y_h, y_w, x_ch).transpose(0, 3, 1, 2)
# 保存最大值的索引值
self.max_index = np.argmax(cols, axis=1)
def backward(self, grad_y):
n_bt = grad_y.shape[0]
x_ch, x_h, x_w, pool, pad = self.params
y_ch, y_h, y_w = self.y_ch, self.y_h, self.y_w
# 对输出的梯度的坐标轴进行切换
grad_y = grad_y.transpose(0, 2, 3, 1)
# 创建新的矩阵,只对每个列中具有最大值的元素所处位置中放入输出的梯度
grad_cols = np.zeros((pool*pool, grad_y.size))
grad_cols[self.max_index.reshape(-1), np.arange(grad_y.size)] = grad_y.reshape(-1)
grad_cols = grad_cols.reshape(pool, pool, n_bt, y_h, y_w, y_ch)
grad_cols = grad_cols.transpose(5,0,1,2,3,4)
grad_cols = grad_cols.reshape( y_ch*pool*pool, n_bt*y_h*y_w)
# 输入的梯度
x_shape = (n_bt, x_ch, x_h, x_w)
self.grad_x = col2im(grad_cols, x_shape, pool, pool, y_h, y_w, pool, pad)
# -- 全链接层的祖先类 --
class BaseLayer:
def __init__(self, n_upper, n):
self.w = wb_width * np.random.randn(n_upper, n)
self.b = wb_width * np.random.randn(n)
self.h_w = np.zeros(( n_upper, n)) + 1e-8
self.h_b = np.zeros(n) + 1e-8
def update(self, eta):
self.h_w += self.grad_w * self.grad_w
self.w -= eta / np.sqrt(self.h_w) * self.grad_w
self.h_b += self.grad_b * self.grad_b
self.b -= eta / np.sqrt(self.h_b) * self.grad_b
# -- 全链接的中间层 --
class MiddleLayer(BaseLayer):
def forward(self, x):
self.x = x
self.u = np.dot(x, self.w) + self.b
self.y = np.where(self.u <= 0, 0, self.u)
def backward(self, grad_y):
delta = grad_y * np.where(self.u <= 0, 0, 1)
self.grad_w = np.dot(self.x.T, delta)
self.grad_b = np.sum(delta, axis=0)
self.grad_x = np.dot(delta, self.w.T)
# -- 全链接的输出层 --
class OutputLayer(BaseLayer):
def forward(self, x):
self.x = x
u = np.dot(x, self.w) + self.b
self.y = np.exp(u)/np.sum(np.exp(u), axis=1).reshape(-1, 1)
def backward(self, t):
delta = self.y - t
self.grad_w = np.dot(self.x.T, delta)
self.grad_b = np.sum(delta, axis=0)
self.grad_x = np.dot(delta, self.w.T)
# -- 各个网络层的初始化 --
cl_1 = ConvLayer(img_ch, img_h, img_w, 10, 3, 3, 1, 1)
pl_1 = PoolingLayer(cl_1.y_ch, cl_1.y_h, cl_1.y_w, 2, 0)
n_fc_in = pl_1.y_ch * pl_1.y_h * pl_1.y_w
ml_1 = MiddleLayer(n_fc_in, 100)
ol_1 = OutputLayer(100, 10)
# -- 正向传播 --
def forward_propagation(x):
n_bt = x.shape[0]
images = x.reshape(n_bt, img_ch, img_h, img_w)
cl_1.forward(images)
pl_1.forward(cl_1.y)
fc_input = pl_1.y.reshape(n_bt, -1)
ml_1.forward(fc_input)
ol_1.forward(ml_1.y)
# -- 反向传播 --
def backpropagation(t):
n_bt = t.shape[0]
ol_1.backward(t)
ml_1.backward(ol_1.grad_x)
grad_img = ml_1.grad_x.reshape(n_bt, pl_1.y_ch, pl_1.y_h, pl_1.y_w)
pl_1.backward(grad_img)
cl_1.backward(pl_1.grad_x)
# -- 权重和偏置的更新 --
def uppdate_wb():
cl_1.update(eta)
ml_1.update(eta)
ol_1.update(eta)
# -- 对误差进行计算 --
def get_error(t, batch_size):
return -np.sum(t * np.log(ol_1.y + 1e-7)) / batch_size # 交叉熵误差
# -- 对样本进行正向传播 --
def forward_sample(inp, correct, n_sample):
index_rand = np.arange(len(correct))
np.random.shuffle(index_rand)
index_rand = index_rand[:n_sample]
x = inp[index_rand, :]
t = correct[index_rand, :]
forward_propagation(x)
return x, t
# -- 用于对误差进行记录 --
train_error_x = []
train_error_y = []
test_error_x = []
test_error_y = []
# -- 用于对学习过程进行记录 --
n_batch = n_train // batch_size
for i in range(epoch):
# -- 误差的测算 --
x, t = forward_sample(input_train, correct_train, n_sample)
error_train = get_error(t, n_sample)
x, t = forward_sample(input_test, correct_test, n_sample)
error_test = get_error(t, n_sample)
# -- 误差的记录 --
train_error_x.append(i)
train_error_y.append(error_train)
test_error_x.append(i)
test_error_y.append(error_test)
# -- 处理进度的显示 --
if i%interval == 0:
print("Epoch:" + str(i) + "/" + str(epoch),
"Error_train:" + str(error_train),
"Error_test:" + str(error_test))
# -- 学习 --
index_rand = np.arange(n_train)
np.random.shuffle(index_rand)
for j in range(n_batch):
mb_index = index_rand[j*batch_size : (j+1)*batch_size]
x = input_train[mb_index, :]
t = correct_train[mb_index, :]
forward_propagation(x)
backpropagation(t)
uppdate_wb()
# -- 显示记录误差的表格 --
plt.plot(train_error_x, train_error_y, label="Train")
plt.plot(test_error_x, test_error_y, label="Test")
plt.legend()
plt.xlabel("Epochs")
plt.ylabel("Error")
plt.show()
# -- 正确率的测定 --
x, t = forward_sample(input_train, correct_train, n_train)
count_train = np.sum(np.argmax(ol_1.y, axis=1) == np.argmax(t, axis=1))
x, t = forward_sample(input_test, correct_test, n_test)
count_test = np.sum(np.argmax(ol_1.y, axis=1) == np.argmax(t, axis=1))
print("Accuracy Train:", str(count_train/n_train*100) + "%",
"Accuracy Test:", str(count_test/n_test*100) + "%")
# 图像判断
samples = input_test[:5]
forward_propagation(samples)
print(ol_1.y)
print(correct_test[:5])
添加dropout后:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
# -- 手写文字数据集的读入 --
digits_data = datasets.load_digits()
input_data = digits_data.data
correct = digits_data.target
n_data = len(correct)
# -- 输入数据的标准化 --
ave_input = np.average(input_data)
std_input = np.std(input_data)
input_data = (input_data - ave_input) / std_input
# -- 将正确答案转换为独热编码格式 --
correct_data = np.zeros((n_data, 10))
for i in range(n_data):
correct_data[i, correct[i]] = 1.0
# -- 训练数据与测试数据 --
index = np.arange(n_data)
index_train = index[index%3 != 0]
index_test = index[index%3 == 0]
input_train = input_data[index_train, :] # 训练 输入数据
correct_train = correct_data[index_train, :] # 训练 正确答案
input_test = input_data[index_test, :] # 测试 输入数据
correct_test = correct_data[index_test, :] # 测试 正确答案
n_train = input_train.shape[0] # 训练数据的采样数
n_test = input_test.shape[0] # 测试数据的采样数
# -- 各个设置值 --
img_h = 8 # 输入图像的高度
img_w = 8 # 输入图像的宽度
img_ch = 1 # 输入图像的通道数
wb_width = 0.1 # 权重与偏置的扩散度
eta = 0.01 # 学习系数
epoch = 50
batch_size = 8
interval = 10 # 显示进度的间隔时间
n_sample = 200 # 误差计算的采样数
# -- im2col --
def im2col(images, flt_h, flt_w, out_h, out_w, stride, pad):
n_bt, n_ch, img_h, img_w = images.shape
img_pad = np.pad(images, [(0,0), (0,0), (pad, pad), (pad, pad)], "constant")
cols = np.zeros((n_bt, n_ch, flt_h, flt_w, out_h, out_w))
for h in range(flt_h):
h_lim = h + stride*out_h
for w in range(flt_w):
w_lim = w + stride*out_w
cols[:, :, h, w, :, :] = img_pad[:, :, h:h_lim:stride, w:w_lim:stride]
cols = cols.transpose(1, 2, 3, 0, 4, 5).reshape(n_ch*flt_h*flt_w, n_bt*out_h*out_w)
return cols
# -- col2im --
def col2im(cols, img_shape, flt_h, flt_w, out_h, out_w, stride, pad):
n_bt, n_ch, img_h, img_w = img_shape
cols = cols.reshape(n_ch, flt_h, flt_w, n_bt, out_h, out_w, ).transpose(3, 0, 1, 2, 4, 5)
images = np.zeros((n_bt, n_ch, img_h+2*pad+stride-1, img_w+2*pad+stride-1))
for h in range(flt_h):
h_lim = h + stride*out_h
for w in range(flt_w):
w_lim = w + stride*out_w
images[:, :, h:h_lim:stride, w:w_lim:stride] += cols[:, :, h, w, :, :]
return images[:, :, pad:img_h+pad, pad:img_w+pad]
# -- 卷积网络层 --
class ConvLayer:
# n_bt:批次尺寸, x_ch:输入的通道数量, x_h:输入图像的高度, x_w:输入图像的宽度
# n_flt:过滤器的数量, flt_h:过滤器的高度, flt_w:过滤器的宽度
# stride:步长的幅度, pad:填充的幅度
# y_ch:输出的通道数量, y_h:输出的高度, y_w:输出的宽度
def __init__(self, x_ch, x_h, x_w, n_flt, flt_h, flt_w, stride, pad):
# 将参数集中保存
self.params = (x_ch, x_h, x_w, n_flt, flt_h, flt_w, stride, pad)
# 过滤器和偏置的初始值
self.w = wb_width * np.random.randn(n_flt, x_ch, flt_h, flt_w)
self.b = wb_width * np.random.randn(1, n_flt)
# 输出图像的尺寸
self.y_ch = n_flt # 输出的通道数量
self.y_h = (x_h - flt_h + 2*pad) // stride + 1 # 输出的高度
self.y_w = (x_w - flt_w + 2*pad) // stride + 1 # 输出的宽度
# AdaGrad算法用
self.h_w = np.zeros((n_flt, x_ch, flt_h, flt_w)) + 1e-8
self.h_b = np.zeros((1, n_flt)) + 1e-8
def forward(self, x):
n_bt = x.shape[0]
x_ch, x_h, x_w, n_flt, flt_h, flt_w, stride, pad = self.params
y_ch, y_h, y_w = self.y_ch, self.y_h, self.y_w
# 将输入图像和过滤器转换成矩阵
self.cols = im2col(x, flt_h, flt_w, y_h, y_w, stride, pad)
self.w_col = self.w.reshape(n_flt, x_ch*flt_h*flt_w)
# 输出的计算:矩阵乘积、偏置的加法运算、激励函数
u = np.dot(self.w_col, self.cols).T + self.b
self.u = u.reshape(n_bt, y_h, y_w, y_ch).transpose(0, 3, 1, 2)
self.y = np.where(self.u <= 0, 0, self.u)
def backward(self, grad_y):
n_bt = grad_y.shape[0]
x_ch, x_h, x_w, n_flt, flt_h, flt_w, stride, pad = self.params
y_ch, y_h, y_w = self.y_ch, self.y_h, self.y_w
# delta
delta = grad_y * np.where(self.u <= 0, 0, 1)
delta = delta.transpose(0,2,3,1).reshape(n_bt*y_h*y_w, y_ch)
# 过滤器和偏置的梯度
grad_w = np.dot(self.cols, delta)
self.grad_w = grad_w.T.reshape(n_flt, x_ch, flt_h, flt_w)
self.grad_b = np.sum(delta, axis=0)
# 输入的梯度
grad_cols = np.dot(delta, self.w_col)
x_shape = (n_bt, x_ch, x_h, x_w)
self.grad_x = col2im(grad_cols.T, x_shape, flt_h, flt_w, y_h, y_w, stride, pad)
def update(self, eta):
self.h_w += self.grad_w * self.grad_w
self.w -= eta / np.sqrt(self.h_w) * self.grad_w
self.h_b += self.grad_b * self.grad_b
self.b -= eta / np.sqrt(self.h_b) * self.grad_b
# -- 池化层 --
class PoolingLayer:
# n_bt:批次尺寸, x_ch:输入的通道数量, x_h:输入图像的高度, x_w:输入图像的宽度
# pool:池化区域的尺寸, pad:填充的幅度
# y_ch:输出的通道数量, y_h:输出的高度, y_w:输出的宽度
def __init__(self, x_ch, x_h, x_w, pool, pad):
# 将参数集中保存
self.params = (x_ch, x_h, x_w, pool, pad)
# 输出图像的尺寸
self.y_ch = x_ch # 输出的通道数量
self.y_h = x_h//pool if x_h%pool==0 else x_h//pool+1 # 输出的高度
self.y_w = x_w//pool if x_w%pool==0 else x_w//pool+1 # 输出的宽度
def forward(self, x):
n_bt = x.shape[0]
x_ch, x_h, x_w, pool, pad = self.params
y_ch, y_h, y_w = self.y_ch, self.y_h, self.y_w
# 将输入图像转换成矩阵
cols = im2col(x, pool, pool, y_h, y_w, pool, pad)
cols = cols.T.reshape(n_bt*y_h*y_w*x_ch, pool*pool)
# 输出的计算:最大池化
y = np.max(cols, axis=1)
self.y = y.reshape(n_bt, y_h, y_w, x_ch).transpose(0, 3, 1, 2)
# 保存最大值的索引值
self.max_index = np.argmax(cols, axis=1)
def backward(self, grad_y):
n_bt = grad_y.shape[0]
x_ch, x_h, x_w, pool, pad = self.params
y_ch, y_h, y_w = self.y_ch, self.y_h, self.y_w
# 对输出的梯度的坐标轴进行切换
grad_y = grad_y.transpose(0, 2, 3, 1)
# 创建新的矩阵,只对每个列中具有最大值的元素所处位置中放入输出的梯度
grad_cols = np.zeros((pool*pool, grad_y.size))
grad_cols[self.max_index.reshape(-1), np.arange(grad_y.size)] = grad_y.reshape(-1)
grad_cols = grad_cols.reshape(pool, pool, n_bt, y_h, y_w, y_ch)
grad_cols = grad_cols.transpose(5,0,1,2,3,4)
grad_cols = grad_cols.reshape( y_ch*pool*pool, n_bt*y_h*y_w)
# 输入的梯度
x_shape = (n_bt, x_ch, x_h, x_w)
self.grad_x = col2im(grad_cols, x_shape, pool, pool, y_h, y_w, pool, pad)
# -- 全链接层的祖先类 --
class BaseLayer:
def __init__(self, n_upper, n):
self.w = wb_width * np.random.randn(n_upper, n)
self.b = wb_width * np.random.randn(n)
self.h_w = np.zeros(( n_upper, n)) + 1e-8
self.h_b = np.zeros(n) + 1e-8
def update(self, eta):
self.h_w += self.grad_w * self.grad_w
self.w -= eta / np.sqrt(self.h_w) * self.grad_w
self.h_b += self.grad_b * self.grad_b
self.b -= eta / np.sqrt(self.h_b) * self.grad_b
# -- 全链接的中间层 --
class MiddleLayer(BaseLayer):
def forward(self, x):
self.x = x
self.u = np.dot(x, self.w) + self.b
self.y = np.where(self.u <= 0, 0, self.u)
def backward(self, grad_y):
delta = grad_y * np.where(self.u <= 0, 0, 1)
self.grad_w = np.dot(self.x.T, delta)
self.grad_b = np.sum(delta, axis=0)
self.grad_x = np.dot(delta, self.w.T)
# -- 全链接的输出层 --
class OutputLayer(BaseLayer):
def forward(self, x):
self.x = x
u = np.dot(x, self.w) + self.b
self.y = np.exp(u)/np.sum(np.exp(u), axis=1).reshape(-1, 1)
def backward(self, t):
delta = self.y - t
self.grad_w = np.dot(self.x.T, delta)
self.grad_b = np.sum(delta, axis=0)
self.grad_x = np.dot(delta, self.w.T)
# -- Dropout --
class Dropout:
def __init__(self, dropout_ratio):
self.dropout_ratio = dropout_ratio
def forward(self, x, is_train):
if is_train:
rand = np.random.rand(*x.shape)
self.dropout = np.where(rand > self.dropout_ratio, 1, 0)
self.y = x * self.dropout
else:
self.y = (1-self.dropout_ratio)*x
def backward(self, grad_y):
self.grad_x = grad_y * self.dropout
# -- 各个网络层的初始化 --
cl_1 = ConvLayer(img_ch, img_h, img_w, 10, 3, 3, 1, 1)
cl_2 = ConvLayer(cl_1.y_ch, cl_1.y_h, cl_1.y_w, 10, 3, 3, 1, 1)
pl_1 = PoolingLayer(cl_2.y_ch, cl_2.y_h, cl_2.y_w, 2, 0)
n_fc_in = pl_1.y_ch * pl_1.y_h * pl_1.y_w
ml_1 = MiddleLayer(n_fc_in, 200)
dr_1 = Dropout(0.5)
ml_2 = MiddleLayer(200, 200)
dr_2 = Dropout(0.5)
ol_1 = OutputLayer(200, 10)
# -- 正向传播 --
def forward_propagation(x, is_train):
n_bt = x.shape[0]
images = x.reshape(n_bt, img_ch, img_h, img_w)
cl_1.forward(images)
cl_2.forward(cl_1.y)
pl_1.forward(cl_2.y)
fc_input = pl_1.y.reshape(n_bt, -1)
ml_1.forward(fc_input)
dr_1.forward(ml_1.y, is_train)
ml_2.forward(dr_1.y)
dr_2.forward(ml_2.y, is_train)
ol_1.forward(dr_2.y)
# -- 反向传播 --
def backpropagation(t):
n_bt = t.shape[0]
ol_1.backward(t)
dr_2.backward(ol_1.grad_x)
ml_2.backward(dr_2.grad_x)
dr_1.backward(ml_2.grad_x)
ml_1.backward(dr_1.grad_x)
grad_img = ml_1.grad_x.reshape(n_bt, pl_1.y_ch, pl_1.y_h, pl_1.y_w)
pl_1.backward(grad_img)
cl_2.backward(pl_1.grad_x)
cl_1.backward(cl_2.grad_x)
# -- 权重和偏置的更新 --
def uppdate_wb():
cl_1.update(eta)
cl_2.update(eta)
ml_1.update(eta)
ml_2.update(eta)
ol_1.update(eta)
# -- 对误差进行计算 --
def get_error(t, batch_size):
return -np.sum(t * np.log(ol_1.y + 1e-7)) / batch_size # 交叉熵误差
# -- 对样本进行正向传播 --
def forward_sample(inp, correct, n_sample):
index_rand = np.arange(len(correct))
np.random.shuffle(index_rand)
index_rand = index_rand[:n_sample]
x = inp[index_rand, :]
t = correct[index_rand, :]
forward_propagation(x, False)
return x, t
# -- 用于对误差进行记录 --
train_error_x = []
train_error_y = []
test_error_x = []
test_error_y = []
# -- 用于对学习过程进行记录 --
n_batch = n_train // batch_size
for i in range(epoch):
# -- 误差的测算 --
x, t = forward_sample(input_train, correct_train, n_sample)
error_train = get_error(t, n_sample)
x, t = forward_sample(input_test, correct_test, n_sample)
error_test = get_error(t, n_sample)
# -- 误差的记录 --
train_error_x.append(i)
train_error_y.append(error_train)
test_error_x.append(i)
test_error_y.append(error_test)
# -- 处理进度的显示 --
if i%interval == 0:
print("Epoch:" + str(i) + "/" + str(epoch),
"Error_train:" + str(error_train),
"Error_test:" + str(error_test))
# -- 学习 --
index_rand = np.arange(n_train)
np.random.shuffle(index_rand)
for j in range(n_batch):
mb_index = index_rand[j*batch_size : (j+1)*batch_size]
x = input_train[mb_index, :]
t = correct_train[mb_index, :]
forward_propagation(x, True)
backpropagation(t)
uppdate_wb()
# -- 显示记录误差的表格 --
plt.plot(train_error_x, train_error_y, label="Train")
plt.plot(test_error_x, test_error_y, label="Test")
plt.legend()
plt.xlabel("Epochs")
plt.ylabel("Error")
plt.show()
# -- 正确率的测定 --
x, t = forward_sample(input_train, correct_train, n_train)
count_train = np.sum(np.argmax(ol_1.y, axis=1) == np.argmax(t, axis=1))
x, t = forward_sample(input_test, correct_test, n_test)
count_test = np.sum(np.argmax(ol_1.y, axis=1) == np.argmax(t, axis=1))
print("Accuracy Train:", str(count_train/n_train*100) + "%",
"Accuracy Test:", str(count_test/n_test*100) + "%")
网络层次加深后,效果更好
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· TypeScript + Deepseek 打造卜卦网站:技术与玄学的结合
· Manus的开源复刻OpenManus初探
· AI 智能体引爆开源社区「GitHub 热点速览」
· 三行代码完成国际化适配,妙~啊~
· .NET Core 中如何实现缓存的预热?