Python小练习:卷积层与反卷积层
作者:凯鲁嘎吉 - 博客园 http://www.cnblogs.com/kailugaji/
使用Pytorch中的nn.Conv2d与nn.ConvTranspose2d函数来创建卷积层与反卷积层,构建卷积神经网络(CNN)编码器与解码器(仅含卷积层/反卷积层与激活函数)。
1. 卷积层代码
1.1 cnn_test.py
1 # -*- coding: utf-8 -*- 2 # Author:凯鲁嘎吉 Coral Gajic 3 # https://www.cnblogs.com/kailugaji/ 4 # Python小练习:创建卷积层 5 import torch 6 import torch.nn as nn 7 import numpy as np 8 import os 9 import imageio 10 from PIL import Image 11 import torchvision.transforms as transforms 12 import matplotlib.pyplot as plt 13 from pylab import * 14 from warnings import simplefilter 15 simplefilter(action="ignore",category=UserWarning) 16 17 # 构建卷积神经网络(仅用到卷积层) 18 def build_CNN(input_size): 19 module_list = [] 20 last_h = input_size[2] 21 last_w = input_size[3] 22 cnn_kernels = [[input_size[1], 256, 4], 23 [256, 128, 3, 2, 1, 1], 24 [128, 64, 3, 2, 1, 1], 25 [64, 32, 3, 2, 1, 1], 26 [32, 3, 5, 2, 2, 1]] 27 # 除了输入层,后面有5层,5个激活函数 28 # 6个参数的含义: 29 # 1. in_channels (int) – 输入图像中的通道数 30 # 2. out_channels (int) – 卷积产生的通道数即输出图片的通道数 31 # 3. kernel_size (int or tuple) – 卷积核的大小(可以是个数,也可以是元组) 32 # 4. stride (int or tuple, optional) — 卷积的步幅。 默认值:1 33 # 5. padding (int, tuple or str, optional) – 填充添加到输入的所有四个边。 默认值:0 34 # 6. dilation (int or tuple, optional) – 内核元素之间的间距。 默认值:1。 35 act_func = [nn.LeakyReLU(), nn.Tanh(), nn.ReLU(), nn.ELU(), nn.CELU()] # 激活函数 36 default = [None, None, None, 1, 0] # in_c, out_c, kernel, stride, pad 37 for i, ck in enumerate(cnn_kernels): 38 # i: 0, 1, 2, 3, 4 ... 39 ck = ck + default[len(ck):] 40 last_h = int((last_h + 2 * ck[4] - ck[2]) / ck[3] + 1) #(h+2*pad-k)/stride+1 41 last_w = int((last_w + 2 * ck[4] - ck[2]) / ck[3] + 1) 42 module_list.append(nn.Conv2d(*ck)) 43 module_list.append(act_func[i]) 44 output_shape = (cnn_kernels[-1][1], last_h, last_w) 45 return nn.Sequential(*module_list), output_shape 46 47 # 使用方法 48 # 图像例子: 49 path = "./img" # 打开存放图像的文件夹 50 dirs = os.listdir(path) # ['1.jpg', '2.jpg', '3.jpg'] 51 len_dir = len(dirs) # len_dir张图片 52 outs = [] 53 count = 0 54 fig = plt.figure(figsize=(24, 6)) # 画布布局 55 for i in dirs: 56 image_pad = imageio.imread(os.path.join(path, i)) # i: 'xxx.jpg' 57 image_pad = Image.fromarray(image_pad).resize((600, 300)) # 重新调整图像尺寸 58 transf = transforms.ToTensor() # 将原始数据形式(图像)转换成tensor 59 outs.append(transf(image_pad)) # tensor数据格式是torch(C,H,W) 60 plt.subplot(2, len_dir, count+1) # 2行,len_dir列,第count+1个子图 61 plt.axis('off') 62 plt.imshow(image_pad) 63 count += 1 64 outs= torch.tensor([np.array(item) for item in outs]) # 将list转换为tensor 65 model, output_shape = build_CNN(outs.shape) 66 # N, C, H, W = outs.shape # 样本个数5, 通道数3, 高300, 宽600 67 print('网络结构:\n', model) 68 print('单个样本的输出维度:', output_shape) 69 print('输入数据维度:', outs.shape) 70 y = model(outs) # 实例化 71 print('实际输出维度:', y.shape) 72 for i in range(outs.shape[0]): # 展示结果 73 toPIL = transforms.ToPILImage() 74 pic = toPIL(y[i]) 75 plt.subplot(2, len_dir, int(i + 1 + outs.shape[0])) 76 plt.imshow(pic) 77 plt.axis('off') 78 plt.savefig('CNN_fig.png', bbox_inches='tight', pad_inches=0.0, dpi=500) 79 plt.show() 80 print('-------------------------------------------------') 81 # 随机数据例子: 82 input_size = [10, 3, 84, 84] 83 # 10张图片,3通道,每张图片长*宽为84*84 84 model, output_shape = build_CNN(input_size) 85 x = torch.randn(input_size) 86 print('输入数据维度:', x.shape) 87 y = model(x) 88 print('实际输出维度:', y.shape)
1.2 结果
D:\ProgramData\Anaconda3\python.exe "D:/Python code/2023.3 exercise/Neural Network/cnn_test.py" 网络结构: Sequential( (0): Conv2d(3, 256, kernel_size=(4, 4), stride=(1, 1)) (1): LeakyReLU(negative_slope=0.01) (2): Conv2d(256, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) (3): Tanh() (4): Conv2d(128, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) (5): ReLU() (6): Conv2d(64, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) (7): ELU(alpha=1.0) (8): Conv2d(32, 3, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2)) (9): CELU(alpha=1.0) ) 单个样本的输出维度: (3, 19, 38) 输入数据维度: torch.Size([5, 3, 300, 600]) 实际输出维度: torch.Size([5, 3, 19, 38]) ------------------------------------------------- 输入数据维度: torch.Size([10, 3, 84, 84]) 实际输出维度: torch.Size([10, 3, 6, 6]) Process finished with exit code 0
2. 反卷积层代码
2.1 cnn_trans_test.py
1 # -*- coding: utf-8 -*- 2 # Author:凯鲁嘎吉 Coral Gajic 3 # https://www.cnblogs.com/kailugaji/ 4 # Python小练习:创建反卷积层 5 import torch 6 import torch.nn as nn 7 import numpy as np 8 import os 9 import imageio 10 from PIL import Image 11 import torchvision.transforms as transforms 12 import matplotlib.pyplot as plt 13 from pylab import * 14 from warnings import simplefilter 15 simplefilter(action="ignore",category=UserWarning) 16 17 def build_CNN_trans(input_size): 18 module_list = [] 19 last_h = input_size[2] 20 last_w = input_size[3] 21 cnn_trans_kernels = [[input_size[1], 256, 4], 22 [256, 128, 3, 2, 1, 1], 23 [128, 64, 3, 2, 1, 1], 24 [64, 32, 3, 2, 1, 1], 25 [32, 3, 5, 2, 2, 1]] 26 act_func = [nn.LeakyReLU(), nn.Tanh(), nn.ReLU(), nn.ELU(), nn.CELU()] # 激活函数 27 default = [None, None, None, 1, 0, 0] # in_c, out_c, kernel, stride, pad, outpad 28 for i, ck in enumerate(cnn_trans_kernels): 29 ck = ck + default[len(ck):] 30 last_h = (last_h - 1) * ck[3] - 2 * ck[4] + ck[2] + ck[5] 31 last_w = (last_w - 1) * ck[3] - 2 * ck[4] + ck[2] + ck[5] 32 module_list.append(nn.ConvTranspose2d(*ck)) 33 # 用的是反卷积,不是卷积 34 module_list.append(act_func[i]) 35 output_shape = (cnn_trans_kernels[-1][1], last_h, last_w) 36 return nn.Sequential(*module_list), output_shape 37 38 # 使用方法 39 # 图像例子: 40 path = "./img" # 打开存放图像的文件夹 41 dirs = os.listdir(path) # ['1.jpg', '2.jpg', '3.jpg'] 42 len_dir = len(dirs) # len_dir张图片 43 outs = [] 44 count = 0 45 fig = plt.figure(figsize=(24, 6)) # 画布布局 46 for i in dirs: 47 image_pad = imageio.imread(os.path.join(path, i)) # i: 'xxx.jpg' 48 image_pad = Image.fromarray(image_pad).resize((38, 19)) # 重新调整图像尺寸 49 transf = transforms.ToTensor() # 将原始数据形式(图像)转换成tensor 50 outs.append(transf(image_pad)) # tensor数据格式是torch(C,H,W) 51 plt.subplot(2, len_dir, count+1) # 2行,len_dir列,第count+1个子图 52 plt.axis('off') 53 plt.imshow(image_pad) 54 count += 1 55 outs= torch.tensor([np.array(item) for item in outs]) # 将list转换为tensor 56 model, output_shape = build_CNN_trans(outs.shape) 57 print('网络结构:\n', model) 58 print('单个样本的输出维度:', output_shape) 59 print('输入数据维度:', outs.shape) 60 y = model(outs) # 实例化 61 print('实际输出维度:', y.shape) 62 for i in range(outs.shape[0]): # 展示结果 63 toPIL = transforms.ToPILImage() 64 pic = toPIL(y[i]) 65 plt.subplot(2, len_dir, int(i + 1 + outs.shape[0])) 66 plt.imshow(pic) 67 plt.axis('off') 68 plt.savefig('CNN_trans_fig.png', bbox_inches='tight', pad_inches=0.0, dpi=500) 69 plt.show() 70 print('-------------------------------------------------') 71 # 随机数据例子: 72 input_size = [10, 3, 6, 6] 73 model, output_shape = build_CNN_trans(input_size) 74 x = torch.randn(input_size) 75 print('输入数据维度:', x.shape) 76 y = model(x) 77 print('实际输出维度:', y.shape)
2.2 结果
D:\ProgramData\Anaconda3\python.exe "D:/Python code/2023.3 exercise/Neural Network/cnn_trans_test.py" 网络结构: Sequential( (0): ConvTranspose2d(3, 256, kernel_size=(4, 4), stride=(1, 1)) (1): LeakyReLU(negative_slope=0.01) (2): ConvTranspose2d(256, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1)) (3): Tanh() (4): ConvTranspose2d(128, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1)) (5): ReLU() (6): ConvTranspose2d(64, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1)) (7): ELU(alpha=1.0) (8): ConvTranspose2d(32, 3, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), output_padding=(1, 1)) (9): CELU(alpha=1.0) ) 单个样本的输出维度: (3, 352, 656) 输入数据维度: torch.Size([5, 3, 19, 38]) 实际输出维度: torch.Size([5, 3, 352, 656]) ------------------------------------------------- 输入数据维度: torch.Size([10, 3, 6, 6]) 实际输出维度: torch.Size([10, 3, 144, 144]) Process finished with exit code 0
补充:卷积神经网络的调用:
1 # -*- coding: utf-8 -*- 2 # Author:凯鲁嘎吉 Coral Gajic 3 # https://www.cnblogs.com/kailugaji/ 4 # Python小练习:创建卷积层 5 import torch 6 import torch.nn as nn 7 import numpy as np 8 9 def build_cnn( 10 input_shape, 11 cnn_kernels 12 ): 13 act_func = [nn.ReLU(), nn.ReLU(), nn.ReLU(), nn.ReLU()] 14 module_list = [] 15 last_h = input_shape[1] 16 last_w = input_shape[2] 17 default = [None, None, None, 1, 0] 18 for i, ck in enumerate(cnn_kernels): 19 ck = ck + default[len(ck):] 20 last_h = int((last_h + 2 * ck[4] - ck[2]) / ck[3] + 1) # (h+2*pad-k)/stride+1 21 last_w = int((last_w + 2 * ck[4] - ck[2]) / ck[3] + 1) 22 module_list.append(nn.Conv2d(*ck)) 23 module_list.append(act_func[i]) 24 output_shape = (cnn_kernels[-1][1], last_h, last_w) 25 return nn.Sequential(*module_list), output_shape 26 27 class CNN(nn.Module): 28 def __init__( 29 self, 30 input_shape, 31 output_size=None, 32 cnn_kernels=[[-1, 32, 3, 2], [32, 32, 3, 1], [32, 32, 3, 1], [32, 32, 3, 1]] 33 ): 34 super().__init__() 35 self.cnn_kernels = cnn_kernels 36 cnn_kernels[0][0] = input_shape[0] 37 self.num_layers = len(cnn_kernels) 38 self.input_shape = input_shape 39 self.module, self.latent_shape = build_cnn( 40 input_shape, 41 cnn_kernels 42 ) 43 self.latent_size = np.prod(self.latent_shape) 44 if output_size is not None: 45 assert self.latent_size == output_size, (self.latent_size, output_size) 46 else: 47 output_size = self.latent_size 48 self.output_size = output_size 49 self.output_shape = (output_size,) 50 51 def process(self, obs): 52 h = self.forward(obs) 53 h = h.view(h.size(0), -1) 54 return h 55 56 def forward(self, obs): 57 obs = obs / 255.0 58 h = self.module(obs) 59 return h 60 61 def process_feature_map(self, obs): 62 h = self.forward(obs) 63 return h 64 65 def process_traj(self, obs): 66 B, L, _, _, _ = obs.shape 67 obs = obs.view(B * L, *self.input_shape) 68 h = self.forward(obs) 69 h = h.view(B, L, self.output_size) 70 return h 71 72 # ------------------------------- 73 data = torch.rand([10, 3, 84, 84]) 74 # 10个样本,每个样本的大小都是(3, 84, 84)的 75 cnn = CNN([3, 84, 84]) 76 # 输出维度[32, 35, 35] 77 # 32*35*35 = 39200 78 print('CNN模型结构:\n', cnn.module) 79 print('CNN输入尺寸:', cnn.input_shape) 80 print('CNN输出尺寸:', cnn.latent_shape) 81 data_process = cnn.process(data) 82 print('process:', data_process.shape) 83 # ------------------------------- 84 data_forward = cnn.forward(data) 85 print('forward:', data_forward.shape) 86 # ------------------------------- 87 data_process_feature_map = cnn.process_feature_map(data) 88 print('process_feature_map:', data_process_feature_map.shape) 89 # ---------------------------------------------------------------- 90 data = torch.rand([10, 6, 3, 84, 84]) 91 data_process_traj = cnn.process_traj(data) 92 print('process_traj:', data_process_traj.shape)
结果:
D:\ProgramData\Anaconda3\python.exe "D:/Python code/2023.3 exercise/Neural Network/CNN_class_test.py" CNN模型结构: Sequential( (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2)) (1): ReLU() (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1)) (3): ReLU() (4): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1)) (5): ReLU() (6): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1)) (7): ReLU() ) CNN输入尺寸: [3, 84, 84] CNN输出尺寸: (32, 35, 35) process: torch.Size([10, 39200]) forward: torch.Size([10, 32, 35, 35]) process_feature_map: torch.Size([10, 32, 35, 35]) process_traj: torch.Size([10, 6, 39200]) Process finished with exit code 0
完成。