[Paddle学习笔记][06][图像分类-动态图]
说明:
本例程使用动态图实现的LeNet,AlexNet,VGGNet,GOOGLeNet和ResNet实现iChanglle-PM病理近视数据集的图像分类任务。
实验代码:
相关类库
import os import time import random import numpy as np import matplotlib.pyplot as plt %matplotlib inline from PIL import Image import paddle import paddle.fluid as fluid from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear, BatchNorm
数据处理
# 图片路径 train_set = './data/PALM-Training400/' file1 = 'N0012.jpg' file2 = 'P0095.jpg' # 读取图片 image1 = Image.open(os.path.join(train_set, file1)) image1 = np.array(image1) image2 = Image.open(os.path.join(train_set, file2)) image2 = np.array(image2) print('image1 shape {}, image2 shape {}'.format(image1.shape, image2.shape)) # 显示图片 plt.figure(figsize=(16, 8)) fig = plt.subplot(121) fig.set_title('Normal', fontsize=20) plt.imshow(image1) fig = plt.subplot(122) fig.set_title('PM', fontsize=20) plt.imshow(image2) plt.show()
# 读取图片 def load_image(image_path): image = Image.open(image_path) # 打开图片文件 image = image.resize((224, 224), Image.ANTIALIAS) # 缩放图片大小: 224x224 image = np.transpose(image, (2, 0, 1)).astype('float32') # 转换图片格式: [H,W,C] 到 [C,H,W] image = image / 255.0 * 2.0 - 1.0 # 调整像素范围: [-1.0, 1.0] return image # 读取训练数据 def train_loader(train_set, batch_size=10): # 读取图片名称 image_name = os.listdir(train_set) # 读取训练图片 def reader(): # 打乱图片顺序 random.shuffle(image_name) # 输出批次图片 batch_image = [] # 图片列表 batch_label = [] # 标签列表 for name in image_name: # 读取图片 image_path = os.path.join(train_set, name) image = load_image(image_path) # 设置标签 if name[0] == 'H' or name[0] == 'N': label = 0 # H开头为高度近视,N开头有正常视力,标签为0 elif name[0] == 'P': label = 1 # P开头为病理近视,标签为1 else: raise('Not excepted file name') # 输出数据 batch_image.append(image) batch_label.append(label) if len(batch_image) == batch_size: array_image = np.array(batch_image).astype('float32') array_label = np.array(batch_label).astype('int64').reshape(-1, 1) yield array_image, array_label batch_image = [] batch_label = [] # 输出剩余图片 if len(batch_image) > 0: array_image = np.array(batch_image).astype('float32') array_label = np.array(batch_label).astype('int64').reshape(-1, 1) yield array_image, array_label # 返回读取图片 return reader # 读取验证数据 def valid_loader(valid_set, valid_csv, batch_size=10): # 读取数据文件 data_list = open(valid_csv).readlines() # 读取验证图片 def reader(): # 输出批次图片 batch_image = [] # 图片列表 batch_label = [] # 标签列表 for line in data_list[1:]: # 读取数据 line = line.strip().split(',') # 一行数据 name = line[1] # 图片名称 label = int(line[2]) # 图片标签 image_path = os.path.join(valid_set, name) image = load_image(image_path) # 输出数据 batch_image.append(image) batch_label.append(label) if len(batch_image) == batch_size: array_image = np.array(batch_image).astype('float32') array_label = np.array(batch_label).astype('int64').reshape(-1, 1) yield array_image, array_label batch_image = [] batch_label = [] # 输出剩余图片 if len(batch_image) > 0: array_image = np.array(batch_image).astype('float32') array_label = np.array(batch_label).astype('int64').reshape(-1, 1) yield array_image, array_label # 返回读取图片 return reader
# 读取训练数据 train_set = './data/PALM-Training400/' train_reader = train_loader(train_set, 10) train_data = next(train_reader()) print('train_data: image shape {}, label shape:{}'.format(train_data[0].shape, train_data[1].shape)) # 读取验证数据 valid_set = './data/PALM-Validation400/' valid_csv = './data/PM_Label_and_Fovea_Location.csv' valid_reader = valid_loader(valid_set, valid_csv, 10) valid_data = next(valid_reader()) print('valid_data: image shape {}, label shape:{}'.format(valid_data[0].shape, valid_data[1].shape))
模型设计
# LeNet模型 class LeNet(fluid.dygraph.Layer): def __init__(self): super(LeNet, self).__init__() # 输入: N*C*H*W=N*3*224*224, H/W=(H/W+2P-K)/S+1 self.conv1 = Conv2D(num_channels=3, num_filters=6, filter_size=5, act='sigmoid') # 输出: N*C*H*W=N*6*220*220 self.pool1 = Pool2D(pool_size=2, pool_stride=2, pool_type='max') # 输出: N*C*H*W=N*6*110*110 self.conv2 = Conv2D(num_channels=6, num_filters=16, filter_size=5, act='sigmoid') # 输出: N*C*H*W=N*16*106*106 self.pool2 = Pool2D(pool_size=2, pool_stride=2, pool_type='max') # 输出: N*C*H*W=N*16*53*53 self.conv3 = Conv2D(num_channels=16, num_filters=120, filter_size=4, act='sigmoid') # 输出: N*C*H*W=N*120*50*50 self.fc1 = Linear(input_dim=300000, output_dim=64, act='sigmoid') # 输出: N*C=N*64 self.fc2 = Linear(input_dim=64, output_dim=2, act='softmax') # 输出: N*2 def forward(self, image, label=None): x = self.conv1(image) x = self.pool1(x) x = self.conv2(x) x = self.pool2(x) x = self.conv3(x) x = fluid.layers.reshape(x=x, shape=[x.shape[0], -1]) x = self.fc1(x) infer = self.fc2(x) if label is not None: accuracy = fluid.layers.accuracy(input=infer,label=label) return infer, accuracy else: return infer
# AlexNet模型 class AlexNet(fluid.dygraph.Layer): def __init__(self): super(AlexNet, self).__init__() # 输入: N*C*H*W=N*3*224*224, H/W=(H/W+2P-K)/S+1 self.conv1 = Conv2D(num_channels=3, num_filters=96, filter_size=11, stride=4, padding=5, act='relu') # 输出: N*C*H*W=N*96*56*56 self.pool1 = Pool2D(pool_size=2, pool_stride=2, pool_type='max') # 输出: N*C*H*W=N*96*28*28 self.conv2 = Conv2D(num_channels=96, num_filters=256, filter_size=5, stride=1, padding=2, act='relu') # 输出: N*C*H*W=N*256*28*28 self.pool2 = Pool2D(pool_size=2, pool_stride=2, pool_type='max') # 输出: N*C*H*W=N*256*14*14 self.conv3 = Conv2D(num_channels=256, num_filters=384, filter_size=3, stride=1, padding=1, act='relu') # 输出: N*C*H*W=N*384*14*14 self.conv4 = Conv2D(num_channels=384, num_filters=384, filter_size=3, stride=1, padding=1, act='relu') # 输出: N*C*H*W=N*384*14*14 self.conv5 = Conv2D(num_channels=384, num_filters=256, filter_size=3, stride=1, padding=1, act='relu') # 输出: N*C*H*W=N*256*14*14 self.pool5 = Pool2D(pool_size=2, pool_stride=2, pool_type='max') # 输出: N*C*H*W=N*256*7*7 self.fc1 = Linear(input_dim=12544, output_dim=4096, act='relu') # 输出: N*C=N*4096 self.drop_ratio1 = 0.5 self.fc2 = Linear(input_dim=4096, output_dim=4096, act='relu') # 输出: N*C=N*4096 self.drop_ratio2 = 0.5 self.fc3 = Linear(input_dim=4096, output_dim=2, act='softmax') # 输出: N*C=N*2 def forward(self, image, label=None): x = self.conv1(image) x = self.pool1(x) x = self.conv2(x) x = self.pool2(x) x = self.conv3(x) x = self.conv4(x) x = self.conv5(x) x = self.pool5(x) x = fluid.layers.reshape(x=x, shape=[x.shape[0], -1]) x = self.fc1(x) x = fluid.layers.dropout(x=x, dropout_prob=self.drop_ratio1) x = self.fc2(x) x = fluid.layers.dropout(x=x, dropout_prob=self.drop_ratio2) infer = self.fc3(x) if label is not None: accuracy = fluid.layers.accuracy(input=infer,label=label) return infer, accuracy else: return infer
# VGGNet-16模型 class ConvBlock(fluid.dygraph.Layer): def __init__(self, input_dim, output_dim, conv_num): super(ConvBlock, self).__init__() self.conv_list = [] for conv_id in range(conv_num): conv_item = self.add_sublayer( 'conv_' + str(conv_id), Conv2D(num_channels=input_dim, num_filters=output_dim, filter_size=3, stride=1, padding=1, act='relu')) self.conv_list.append(conv_item) input_dim = output_dim self.pool = Pool2D(pool_size=2, pool_stride=2, pool_type='max') def forward(self, x): for conv_item in self.conv_list: x = conv_item(x) x = self.pool(x) return x class VGGNet(fluid.dygraph.Layer): def __init__(self): super(VGGNet, self).__init__() # 输入: N*C*H*W=N*3*224*224, H/W=(H/W+2P-K)/S+1 block_arch = [(3, 64, 2), (64, 128, 2), (128, 256, 3), (256, 512, 3), (512, 512, 3)] # 每组卷积的输入维度,输出维度和卷积个数 self.block_list = [] # 卷积模块列表 for block_id, conv_arch in enumerate(block_arch): block_item = self.add_sublayer( 'block_' + str(block_id), ConvBlock(input_dim=conv_arch[0], output_dim=conv_arch[1], conv_num=conv_arch[2])) self.block_list.append(block_item) # 输出: N*C*H*W=N*512*7*7 self.fc1 = Linear(input_dim=25088, output_dim=4096, act='relu') # 输出: N*C=N*4096 self.drop_ratio1 = 0.5 self.fc2 = Linear(input_dim=4096, output_dim=4096, act='relu') # 输出: N*C=N*4096 self.drop_ratio2 = 0.5 self.fc3 = Linear(input_dim=4096, output_dim=2, act='softmax') # 输出: N*C=N*2 def forward(self, image, label=None): for block_item in self.block_list: image = block_item(image) x = fluid.layers.reshape(image, [image.shape[0], -1]) x = self.fc1(x) x = fluid.layers.dropout(x=x, dropout_prob=self.drop_ratio1) x = self.fc2(x) x = fluid.layers.dropout(x=x, dropout_prob=self.drop_ratio2) infer = self.fc3(x) if label is not None: accuracy = fluid.layers.accuracy(input=infer,label=label) return infer, accuracy else: return infer
# GoogLeNet模型 class Inception(fluid.dygraph.Layer): def __init__(self, c0, c1, c2, c3, c4): super(Inception, self).__init__() self.p1_1 = Conv2D(num_channels=c0, num_filters=c1, filter_size=1, stride=1, padding=0, act='relu') self.p2_1 = Conv2D(num_channels=c0, num_filters=c2[0], filter_size=1, stride=1, padding=0, act='relu') self.p2_2 = Conv2D(num_channels=c2[0], num_filters=c2[1], filter_size=3, stride=1, padding=1, act='relu') self.p3_1 = Conv2D(num_channels=c0, num_filters=c3[0], filter_size=1, stride=1, padding=0, act='relu') self.p3_2 = Conv2D(num_channels=c3[0], num_filters=c3[1], filter_size=5, stride=1, padding=2, act='relu') self.p4_1 = Pool2D(pool_size=3, pool_stride=1, pool_padding=1, pool_type='max') self.p4_2 = Conv2D(num_channels=c0, num_filters=c4, filter_size=1, stride=1, padding=0, act='relu') def forward(self, x): p1 = self.p1_1(x) # 支路1: conv(1*1) p2 = self.p2_2(self.p2_1(x)) # 支路2: conv(1*1)+conv(3*3) p3 = self.p3_2(self.p3_1(x)) # 支路3: conv(1*1)+conv(5*5) p4 = self.p4_2(self.p4_1(x)) # 支路4: pool(3*3)+conv(1*1) x = fluid.layers.concat([p1, p2, p3, p4], axis=1) # 合并4个支路特征图 return x class GoogLeNet(fluid.dygraph.Layer): def __init__(self): super(GoogLeNet, self).__init__() # 输入: N*C*H*W=N*3*224*224, H/W=(H/W+2P-K)/S+1 self.conv1 = Conv2D(num_channels=3, num_filters=64, filter_size=7, stride=1, padding=3, act='relu') # 输出: N*C*H*W=N*64*224*224 self.pool1 = Pool2D(pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') # 输出: N*C*H*W=N*64*112*112 self.conv2_1 = Conv2D(num_channels=64, num_filters=64, filter_size=1, stride=1, padding=0, act='relu') # 输出: N*C*H*W=N*64*112*112 self.conv2_2 = Conv2D(num_channels=64, num_filters=192, filter_size=3, stride=1, padding=1, act='relu') # 输出: N*C*H*W=N*64*112*112 self.pool2 = Pool2D(pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') # 输出: N*C*H*W=N*64*56*56 self.block3_1 = Inception(c0=192, c1=64, c2=(96, 128), c3=(16, 32), c4=32) # 输出: N*C*H*W=N*256*56*56 self.block3_2 = Inception(c0=256, c1=128, c2=(128, 192), c3=(32, 96), c4=64) # 输出: N*C*H*W=N*480*56*56 self.pool3 = Pool2D(pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') # 输出: N*C*H*W=N*64*28*28 self.block4_1 = Inception(c0=480, c1=192, c2=(96, 208), c3=(16, 48), c4=64) # 输出: N*C*H*W=N*512*28*28 self.block4_2 = Inception(c0=512, c1=160, c2=(112, 224), c3=(24, 64), c4=64) # 输出: N*C*H*W=N*512*28*28 self.block4_3 = Inception(c0=512, c1=128, c2=(128, 256), c3=(24, 64), c4=64) # 输出: N*C*H*W=N*512*28*28 self.block4_4 = Inception(c0=512, c1=112, c2=(144, 288), c3=(32, 64), c4=64) # 输出: N*C*H*W=N*528*28*28 self.block4_5 = Inception(c0=528, c1=256, c2=(160, 320), c3=(32, 128), c4=128) # 输出: N*C*H*W=N*832*28*28 self.pool4 = Pool2D(pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') # 输出: N*C*H*W=N*832*14*14 self.block5_1 = Inception(c0=832, c1=256, c2=(160, 320), c3=(32, 128), c4=128) # 输出: N*C*H*W=N*832*7*7 self.block5_2 = Inception(c0=832, c1=384, c2=(192, 384), c3=(48, 128), c4=128) # 输出: N*C*H*W=N*1024*7*7 self.pool5 = Pool2D(global_pooling=True, pool_type='avg') # 输出: N*C*H*W=N*1024*1*1 self.fc = Linear(input_dim=1024, output_dim=2, act='softmax') # 输出: N*C=N*2 def forward(self, image, label=None): x = self.conv1(image) x = self.pool1(x) x = self.conv2_1(x) x = self.conv2_2(x) x = self.pool2(x) x = self.block3_1(x) x = self.block3_2(x) x = self.pool3(x) x = self.block4_1(x) x = self.block4_2(x) x = self.block4_3(x) x = self.block4_4(x) x = self.block4_5(x) x = self.pool4(x) x = self.block5_1(x) x = self.block5_2(x) x = self.pool5(x) x = fluid.layers.reshape(x, [x.shape[0], -1]) infer = self.fc(x) if label is not None: accuracy = fluid.layers.accuracy(input=infer,label=label) return infer, accuracy else: return infer
# ResNet模块 # 卷积模块 class ConvBN(fluid.dygraph.Layer): def __init__(self, num_channels, num_filters, filter_size, stride=1, act=None): super(ConvBN, self).__init__() self.conv = Conv2D( # 当stride=1,padding=(filter_size - 1)/2时,输出特征图大小不变 num_channels=num_channels, num_filters=num_filters, filter_size=filter_size, stride=stride, padding=(filter_size - 1)//2, act=None, bias_attr=False) self.batch_norm = BatchNorm(num_channels=num_filters, act=act) def forward(self, x): x = self.conv(x) x = self.batch_norm(x) return x # 瓶颈模块 class Bottleneck(fluid.dygraph.Layer): def __init__(self, num_channels, num_filters, stride=1, shortcut=True): super(Bottleneck, self).__init__() self.conv1 = ConvBN( num_channels=num_channels, num_filters=num_filters, filter_size=1, act='relu') self.conv2 = ConvBN( num_channels=num_filters, num_filters=num_filters, filter_size=3, stride=stride, act='relu') self.conv3 = ConvBN( # 残差块的输出=num_filters * 4 num_channels=num_filters, num_filters=num_filters * 4, filter_size=1) self.short = ConvBN( # 残差块的输出=num_filters * 4 num_channels=num_channels, num_filters=num_filters * 4, filter_size=1, stride=stride) self.shortcut = shortcut # 是否使用短路路径 def forward(self, x): conv1 = self.conv1(x) conv2 = self.conv2(conv1) conv3 = self.conv3(conv2) if self.shortcut: short = x # 当输入维度和输出维度相等时,使用短路路径 else: short = self.short(x) # 当输入维度和输出维度不相等时,变化输入维度为输出维度 y = fluid.layers.elementwise_add(x=conv3, y=short, act='relu') return y # 残差块组 class BottleneckBlock(fluid.dygraph.Layer): def __init__(self, num_channels, num_filters, stride, bottleneck_num): super(BottleneckBlock, self).__init__() self.bottleneck_list = [] # 瓶颈模块列表 for bottleneck_id in range(bottleneck_num): bottleneck_item = self.add_sublayer( 'bottleneck_' + str(bottleneck_id), Bottleneck(num_channels=num_channels if bottleneck_id == 0 else (num_filters*4), # 每组残差块除第一个块外输入维度=输出维度*4 num_filters=num_filters, stride=stride if bottleneck_id == 0 else 1, # 每组残差块除第一个块外stride=1 shortcut=False if bottleneck_id == 0 else True)) # 每组残差块除第一个模外shortcut=True self.bottleneck_list.append(bottleneck_item) def forward(self, x): for bottleneck_item in self.bottleneck_list: x = bottleneck_item(x) return x # 残差网络 class ResNet(fluid.dygraph.Layer): def __init__(self): super(ResNet, self).__init__() # 输入: N*C*H*W=N*3*224*224, H/W=(H/W+2P-K)/S+1 self.conv1 = ConvBN(num_channels=3, num_filters=64, filter_size=7, stride=2, act='relu') # 输出: N*C*H*W=N*64*112*112 self.pool1 = Pool2D(pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') # 输出: N*C*H*W=N*64*56*56 # 残差模块: ResNet-50每组残差块中的瓶颈模块个数: [3,4,6,3], ResNet-101:[3,4,23,3], ResNet-152:[3,8,36,3] block_arch = [(64, 64, 1, 3), (256, 128, 2, 4), (512, 256, 2, 6), (1024, 512, 2, 3)] # 每组瓶颈模块的输入维度,输出维度,步幅和瓶颈模块个数 self.block_list = [] # 残差块组列表 for block_id, bottleneck_arch in enumerate(block_arch): block_item = self.add_sublayer( 'block_' + str(block_id), BottleneckBlock(num_channels=bottleneck_arch[0], num_filters=bottleneck_arch[1], stride=bottleneck_arch[2], bottleneck_num=bottleneck_arch[3])) self.block_list.append(block_item) # 输出: N*C*H*W=N*2048*7*7 self.pool5 = Pool2D(global_pooling=True, pool_type='avg') # 输出: N*C*H*W=N*2048*1*1 self.fc = Linear(input_dim=2048, output_dim=2, act='softmax') # 输出: N*C=N*2 def forward(self, image, label=None): x = self.conv1(image) x = self.pool1(x) for block_item in self.block_list: x = block_item(x) x = self.pool5(x) x = fluid.layers.reshape(x, [x.shape[0], -1]) infer = self.fc(x) if label is not None: accuracy = fluid.layers.accuracy(input=infer,label=label) return infer, accuracy else: return infer
训练配置
with fluid.dygraph.guard(): # 声明模型 # model = LeNet() # model = AlexNet() # model = VGGNet() # model = GoogLeNet() model = ResNet() # 准备数据 train_set = './data/PALM-Training400/' train_reader = train_loader(train_set, 10) valid_set = './data/PALM-Validation400/' valid_csv = './data/PM_Label_and_Fovea_Location.csv' valid_reader = valid_loader(valid_set, valid_csv, 10) # 优化算法 optimizer = fluid.optimizer.Momentum(learning_rate=0.001, momentum=0.9, parameter_list=model.parameters())
训练过程
epoch_num = 10 # 训练周期 total_time = 0 # 训练时间 model_path = './model/iChallenge-PM' # 模型路径 # 训练模型 with fluid.dygraph.guard(): for epoch_id in range(epoch_num): # 训练模型 begin_time = time.time() # 训练开始时间 model.train() # 设置训练模式 for batch_id, train_data in enumerate(train_reader()): # 准备数据 image_data, label_data = train_data image = fluid.dygraph.to_variable(image_data) label = fluid.dygraph.to_variable(label_data) # 前向计算 infer = model(image) # 计算损失 loss = fluid.layers.cross_entropy(infer, label) avg_loss = fluid.layers.mean(loss) # 反向传播 avg_loss.backward() # 反向传播 optimizer.minimize(avg_loss) # 更新权重 model.clear_gradients() # 清除梯度 # 显示结果 if batch_id % 10 == 0: print("train - epoch: {}, batch: {:2d}, loss: {[0]:.6f}".format(epoch_id, batch_id, avg_loss.numpy())) end_time = time.time() # 训练结束时间 total_time += end_time - begin_time # 训练时间 print("train - epoch: {}, total train time: {:.3f}s".format(epoch_id, total_time)) # 验证模型 accuracy_set = [] # 准确率集 avg_loss_set = [] # 损失值集 model.eval() # 设置测验证模式 for batch_id, valid_data in enumerate(valid_reader()): # 准备数据 image_data, label_data = valid_data image = fluid.dygraph.to_variable(image_data) label = fluid.dygraph.to_variable(label_data) # 前向计算 infer, accuracy = model(image, label) # 计算损失 loss = fluid.layers.cross_entropy(infer, label) avg_loss = fluid.layers.mean(loss) # 保存结果 accuracy_set.append(accuracy.numpy()) avg_loss_set.append(avg_loss.numpy()) # 显示结果 print('valid - epoch: {}, loss: {:.6f}, accuracy: {:.2%}'.format(epoch_id, np.mean(avg_loss_set), np.mean(accuracy_set))) # 保存模型 with fluid.dygraph.guard(): fluid.save_dygraph(model.state_dict(), model_path)
测试模型
model_path = './model/iChallenge-PM' # 模型路径 image_path = './data/PALM-Training400/P0007.jpg' # 图片路径 with fluid.dygraph.guard(): # 加载模型 # model = LeNet() # model = AlexNet() # model = VGGNet() # model = GoogLeNet() model = ResNet() model.eval() # 设置测试模式 model_dict, _ = fluid.load_dygraph(model_path) model.load_dict(model_dict) # 准备数据 image = load_image(image_path) # 读取图片: (3,224,224) image = np.expand_dims(image, axis=0) # 增加维度: (1,3,224,224) image = fluid.dygraph.to_variable(image) # 前向计算 infer = model(image) # 显示结果 label = ['Normal','PM'] # 预测图像标签 print('infer result: {}'.format( label[ np.argmax( infer.numpy() ) ] ) ) image = Image.open(image_path) plt.imshow(image) plt.show() image])[0] total_image = np.concatenate([real_images, generate_images]) # 显示生成图像 print("Epoch: {0}, Batch: {1}, D AVG Loss: {2}, DG AVG Loss: {3}".format(epoch, batch, d_avg_loss_n, dg_avg_loss_n)) show_image(total_image) # 主函数 if __name__ == "__main__": train()
参考资料:
https://www.paddlepaddle.org.cn/tutorials/projectdetail/601037
https://aistudio.baidu.com/aistudio/projectdetail/605808