OpenMMLab AI实战营 第五课笔记
OpenMMLab AI实战营 第五课笔记
在气球数据集上训练检测模型
训练新模型通常有三个步骤:
- 支持新数据集
- 修改配置文件
- 训练模型
而MMDetection中有三种方法来支持新数据集:
- 将数据集整理为COCO格式
- 将数据集整理为中间格式
- 直接实现新数据集的支持
我们将基于数据集balloon dataset,通过三种方法来完成本节教程。这是一个带有mask的数据集,其中训练集包含61张图片,验证集包含13张图片。其他方法以及进阶用法可以参考doc。
进入 mmdetection 主目录
In [ ]:
import os
os.chdir('mmdetection')
下载并观察气球数据集
In [ ]:
#下载并解压数据集
!wget https://github.com/matterport/Mask_RCNN/releases/download/v2.1/balloon_dataset.zip
!unzip balloon_dataset.zip > /dev/null
In [ ]:
!tree balloon
In [ ]:
# 简单查看数据集的图片
from PIL import Image
Image.open('balloon/val/14898532020_ba6199dd22_k.jpg')
这里我们以单张图像为例查看balloon数据集的标签json格式,可以看到标签包含基本的图像名,大小信息以及mask所在的区域。
In [ ]:
# 查看单张图片的标签格式
import json
with open('balloon/train/via_region_data.json') as f:
data = json.load(f)
anno1 = list(data.keys())[0]
print(anno1)
print(json.dumps(data[anno1], indent=2))
下载训练好的mask rcnn模型
In [ ]:
# 下载训练好的 Mask R CNN 模型到checkpoints文件夹
!wget http://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco_bbox_mAP-0.408__segm_mAP-0.37_20200504_163245-42aa3d00.pth -P checkpoints
方法一:将数据集整理为COCO格式
这里我们需要了解COCO数据集的格式,并将balloon的标注文件转换为coco格式。
查看COCO数据集格式
对于实例分割来说,比较重要的COCO格式如下所示。
更详细完整的格式说明可以参考这里
{
"images": [image],
"annotations": [annotation],
"categories": [category]
}
# 图像信息
image = {
"id": int, #图像id
"width": int, #宽
"height": int, #高
"file_name": str, #图像文件名
}
#标注信息
annotation = {
"id": int, #标注id
"image_id": int, #对应的图像id
"category_id": int, #类别id
"segmentation": RLE or [polygon], #分割mask信息
"area": float, #mask面积
"bbox": [x,y,width,height], #边界框信息
"iscrowd": 0 or 1, #是否为密集图片
}
#类别信息
categories = [{
"id": int, #类别id
"name": str, #类别名
"supercategory": str, #母类名称
}]
转换标注文件格式
编写功能函数convert_balloon_to_coco
完成格式转换。
In [ ]:
import os.path as osp
import mmcv
def convert_balloon_to_coco(ann_file, out_file, image_prefix):
data_infos = mmcv.load(ann_file)
annotations = []
images = []
obj_count = 0
for idx, v in enumerate(mmcv.track_iter_progress(data_infos.values())):
filename = v['filename']
img_path = osp.join(image_prefix, filename)
height, width = mmcv.imread(img_path).shape[:2]
images.append(dict(
id=idx,
file_name=filename,
height=height,
width=width))
bboxes = []
labels = []
masks = []
for _, obj in v['regions'].items():
assert not obj['region_attributes']
obj = obj['shape_attributes']
px = obj['all_points_x']
py = obj['all_points_y']
poly = [(x + 0.5, y + 0.5) for x, y in zip(px, py)]
poly = [p for x in poly for p in x]
x_min, y_min, x_max, y_max = (
min(px), min(py), max(px), max(py))
data_anno = dict(
image_id=idx,
id=obj_count,
category_id=0,
bbox=[x_min, y_min, x_max - x_min, y_max - y_min],
area=(x_max - x_min) * (y_max - y_min),
segmentation=[poly],
iscrowd=0)
annotations.append(data_anno)
obj_count += 1
coco_format_json = dict(
images=images,
annotations=annotations,
categories=[{'id':0, 'name': 'balloon'}])
mmcv.dump(coco_format_json, out_file)
convert_balloon_to_coco('balloon/train/via_region_data.json', 'balloon/train/coco.json', 'balloon/train')
convert_balloon_to_coco('balloon/val/via_region_data.json', 'balloon/val/coco.json', 'balloon/val')
In [ ]:
# import json
# with open('balloon/val/coco.json') as f:
# data = json.load(f)
# with open('coco.json', 'w') as f:
# json.dump(data, f, indent=4)
修改 config 配置文件并保存
In [ ]:
from mmcv import Config
from mmdet.apis import set_random_seed
# 获取基本配置文件参数
cfg = Config.fromfile('./configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_1x_coco.py')
# 修改数据集类型以及文件路径
cfg.dataset_type = 'COCODataset'
cfg.data_root = 'balloon/'
cfg.classes = ('balloon',)
cfg.data.train.data_root = 'balloon/'
cfg.data.train.ann_file = 'train/coco.json'
cfg.data.train.img_prefix = 'train'
cfg.data.train.classes = cfg.classes
cfg.data.val.data_root = 'balloon/'
cfg.data.val.ann_file = 'val/coco.json'
cfg.data.val.img_prefix = 'val'
cfg.data.val.classes = cfg.classes
cfg.data.test.data_root = 'balloon/'
cfg.data.test.ann_file = 'val/coco.json'
cfg.data.test.img_prefix = 'val'
cfg.data.test.classes = cfg.classes
# 修改bbox_head和mask_head中的类别数
cfg.model.roi_head.bbox_head.num_classes = 1
cfg.model.roi_head.mask_head.num_classes = 1
# 使用预训练好的faster rcnn模型用于finetuning
cfg.load_from = 'checkpoints/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco_bbox_mAP-0.408__segm_mAP-0.37_20200504_163245-42aa3d00.pth'
# 设置工作目录用于存放log和临时文件
cfg.work_dir = 'work_dirs/balloon'
# 原本的学习率是在8卡基础上训练设置的,现在单卡需要除以8
cfg.optimizer.lr = 0.02 / 8
cfg.lr_config.warmup = None
cfg.log_config.interval = 10
# # 设置evaluation间隔减少运行时间
# cfg.evaluation.interval = 12
# # 设置存档点间隔减少存储空间的消耗
# cfg.checkpoint_config.interval = 12
# 设置运行epoch数
cfg.runner.max_epochs = 1
# 固定随机种子使得结果可复现
cfg.seed = 0
set_random_seed(0, deterministic=False)
cfg.gpu_ids = range(1)
cfg.device = 'cuda'
# 打印所有的配置参数
# print(f'Config:\n{cfg.pretty_text}')
# 保存配置文件
mmcv.mkdir_or_exist('work_dirs/balloon')
cfg.dump('work_dirs/balloon/cocoformat.py')
训练新模型
In [ ]:
from mmdet.datasets import build_dataset
from mmdet.models import build_detector
from mmdet.apis import train_detector
# 构建数据集
datasets = [build_dataset(cfg.data.train)]
# 构建检测模型
model = build_detector(cfg.model)
# 添加类别文字属性提高可视化效果
model.CLASSES = datasets[0].CLASSES
# 创建工作目录并训练模型
mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
In [ ]:
# 训练模型
train_detector(model, datasets, cfg, distributed=False, validate=True)
在测试集评价模型
训练完模型后,需要对模型进行评价分析,这里可以用mmdet提供的test.py来直接运行。更多评价方法可以查看这里
In [ ]:
!python tools/test.py work_dirs/balloon/cocoformat.py work_dirs/balloon/latest.pth --eval bbox segm
预测新图像
In [ ]:
from mmdet.apis import inference_detector, show_result_pyplot
img = mmcv.imread('balloon/val/14898532020_ba6199dd22_k.jpg')
model.cfg = cfg
result = inference_detector(model, img)
show_result_pyplot(model, img, result)
In [ ]:
result[0][0].shape
In [ ]:
len(result[1][0])
In [ ]:
result[1][0][0].shape
方法二:将数据集转换为中间格式
除了coco格式,mmdet还支持中间格式的定义。
数据集中间格式
[
{
'filename': 'a.jpg',
'width': 1280,
'height': 720,
'ann': {
'bboxes': <np.ndarray> (n, 4),
'labels': <np.ndarray> (n, ),
'bboxes_ignore': <np.ndarray> (k, 4), (optional field)
'labels_ignore': <np.ndarray> (k, 4) (optional field),
'masks': [poly]
}
},
...
]
我们需要根据上述中间类型来编写转换函数。
转换标注文件格式
编写功能函数convert_balloon_to_middle
完成格式转换。
In [ ]:
import os.path as osp
import numpy as np
def convert_balloon_to_middle(ann_file, out_file, image_prefix):
data_infos = mmcv.load(ann_file)
middle_format = []
obj_count = 0
for idx, v in enumerate(mmcv.track_iter_progress(data_infos.values())):
filename = v['filename']
img_path = osp.join(image_prefix, filename)
height, width = mmcv.imread(img_path).shape[:2]
data_info = dict(filename=filename, width=width, height=height)
gt_bboxes = []
gt_labels = []
gt_masks_ann = []
for _, obj in v['regions'].items():
assert not obj['region_attributes']
obj = obj['shape_attributes']
px = obj['all_points_x']
py = obj['all_points_y']
poly = [(x + 0.5, y + 0.5) for x, y in zip(px, py)]
poly = [p for x in poly for p in x]
x_min, y_min, x_max, y_max = (
min(px), min(py), max(px), max(py))
bbox=[x_min, y_min, x_max, y_max]
label = 0
segmentation=[poly]
gt_bboxes.append(bbox)
gt_labels.append(label)
gt_masks_ann.append(segmentation)
data_anno = dict(
bboxes=np.array(gt_bboxes, dtype=np.float32).reshape(-1, 4),
labels=np.array(gt_labels, dtype=np.long),
# bboxes_ignore = np.zeros((0, 4), dtype=np.float32),
# masks=gt_masks_ann
)
data_info.update(ann=data_anno)
middle_format.append(data_info)
mmcv.dump(middle_format, out_file)
convert_balloon_to_middle('balloon/train/via_region_data.json', 'balloon/train/middle.pkl', 'balloon/train')
convert_balloon_to_middle('balloon/val/via_region_data.json', 'balloon/val/middle.pkl', 'balloon/val')
In [ ]:
# 查看单张图片的标签格式
import json
data = mmcv.load('balloon/train/middle.pkl')
print(data[3])
修改 config 配置并保存
注意这里记得修改各个部分的数据集类型为CustomDataset
、对应标注文件为middle.pkl
、工作目录和配置文件中的部分为middleformat
。
In [ ]:
from mmcv import Config
from mmdet.apis import set_random_seed
# 获取基本配置文件参数
# cfg = Config.fromfile('./configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_1x_coco.py')
cfg = Config.fromfile('./configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_1x_coco.py')
# 修改数据集类型以及文件路径
cfg.dataset_type = 'CustomDataset'
cfg.data_root = 'balloon/'
cfg.classes = ('balloon',)
cfg.data.train.type = 'CustomDataset'
cfg.data.train.data_root = 'balloon/'
cfg.data.train.ann_file = 'train/middle.pkl'
cfg.data.train.img_prefix = 'train'
cfg.data.train.classes = cfg.classes
cfg.data.val.type = 'CustomDataset'
cfg.data.val.data_root = 'balloon/'
cfg.data.val.ann_file = 'val/middle.pkl'
cfg.data.val.img_prefix = 'val'
cfg.data.val.classes = cfg.classes
cfg.data.test.type = 'CustomDataset'
cfg.data.test.data_root = 'balloon/'
cfg.data.test.ann_file = 'val/middle.pkl'
cfg.data.test.img_prefix = 'val'
cfg.data.test.classes = cfg.classes
# 修改bbox_head和mask_head中的类别数
cfg.model.roi_head.bbox_head.num_classes = 1
# cfg.model.roi_head.mask_head.num_classes = 1
# 使用预训练好的faster rcnn模型用于finetuning
cfg.load_from = 'checkpoints/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco_bbox_mAP-0.408__segm_mAP-0.37_20200504_163245-42aa3d00.pth'
# 设置工作目录用于存放log和临时文件
cfg.work_dir = 'work_dirs/balloon/middleformat'
# 原本的学习率是在8卡基础上训练设置的,现在单卡需要除以8
cfg.optimizer.lr = 0.02 / 8
cfg.lr_config.warmup = None
cfg.log_config.interval = 10
# 由于是自定义数据集,需要修改评价方法
cfg.evaluation.metric = 'mAP'
# # 设置evaluation间隔减少运行时间
# cfg.evaluation.interval = 12
# # 设置存档点间隔减少存储空间的消耗
# cfg.checkpoint_config.interval = 12
# 设置运行epoch数
cfg.runner.max_epochs = 1
# 固定随机种子使得结果可复现
cfg.seed = 0
set_random_seed(0, deterministic=False)
cfg.gpu_ids = range(1)
cfg.device = 'cuda'
# 打印所有的配置参数
# print(f'Config:\n{cfg.pretty_text}')
# 保存配置文件
mmcv.mkdir_or_exist('work_dirs/balloon/middleformat')
cfg.dump('work_dirs/balloon/middleformat/middleformat.py')
训练新模型
这里需要根据配置文件构建数据集,检测模型并完成训练。
In [ ]:
from mmdet.datasets import build_dataset
from mmdet.models import build_detector
from mmdet.apis import train_detector
# 构建数据集
datasets = [build_dataset(cfg.data.train)]
# 构建检测模型
model = build_detector(cfg.model)
# 添加类别文字属性提高可视化效果
model.CLASSES = datasets[0].CLASSES
# 创建工作目录并训练模型
mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
In [ ]:
# 训练模型
train_detector(model, datasets, cfg, distributed=False, validate=True)
测试集上评价模型
In [ ]:
!python tools/test.py work_dirs/balloon/middleformat/middleformat.py work_dirs/balloon/middleformat/latest.pth --eval mAP
预测新图像
In [ ]:
from mmdet.apis import inference_detector, show_result_pyplot
img = mmcv.imread('balloon/val/14898532020_ba6199dd22_k.jpg')
model.cfg = cfg
result = inference_detector(model, img)
show_result_pyplot(model, img, result)
方法三:直接实现新数据集的支持
在mmdet/datasets
新建BalloonDataset.py
文件,填充内容如下:
并在mmdet/datasets/__init__.py
中,增加from .BalloonDataset import BalloonDataset
In [ ]:
# mmdet/datasets/BalloonDataset.py
import copy
import os.path as osp
import mmcv
import numpy as np
from mmdet.datasets.builder import DATASETS
from mmdet.datasets.custom import CustomDataset
@DATASETS.register_module()
class BalloonDataset(CustomDataset):
CLASSES = ('balloon',)
def load_annotations(self, ann_file):
data_infos = mmcv.load(self.ann_file)
middle_format = []
obj_count = 0
for idx, v in enumerate(mmcv.track_iter_progress(data_infos.values())):
filename = v['filename']
img_path = osp.join(self.img_prefix, filename)
height, width = mmcv.imread(img_path).shape[:2]
data_info = dict(filename=filename, width=width, height=height)
gt_bboxes = []
gt_labels = []
gt_masks_ann = []
for _, obj in v['regions'].items():
assert not obj['region_attributes']
obj = obj['shape_attributes']
px = obj['all_points_x']
py = obj['all_points_y']
poly = [(x + 0.5, y + 0.5) for x, y in zip(px, py)]
poly = [p for x in poly for p in x]
x_min, y_min, x_max, y_max = (
min(px), min(py), max(px), max(py))
bbox=[x_min, y_min, x_max, y_max]
label = 0
segmentation=[poly]
gt_bboxes.append(bbox)
gt_labels.append(label)
gt_masks_ann.append(segmentation)
data_anno = dict(
bboxes=np.array(gt_bboxes, dtype=np.float32).reshape(-1, 4),
labels=np.array(gt_labels, dtype=np.long),
# bboxes_ignore = np.zeros((0, 4), dtype=np.float32),
# masks=gt_masks_ann
)
data_info.update(ann=data_anno)
middle_format.append(data_info)
return middle_format
修改config配置文件并保存
这里同样注意修改数据集类型,标注文件名以及工作目录和配置文件存储路径等
In [ ]:
from mmcv import Config
from mmdet.apis import set_random_seed
# 获取基本配置文件参数
# cfg = Config.fromfile('./configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_1x_coco.py')
cfg = Config.fromfile('./configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_1x_coco.py')
# 修改数据集类型以及文件路径
cfg.dataset_type = 'BalloonDataset'
cfg.data_root = 'balloon/'
cfg.data.train.type = 'BalloonDataset'
cfg.data.train.data_root = 'balloon/'
cfg.data.train.ann_file = 'train/via_region_data.json'
cfg.data.train.img_prefix = 'train'
cfg.data.val.type = 'BalloonDataset'
cfg.data.val.data_root = 'balloon/'
cfg.data.val.ann_file = 'val/via_region_data.json'
cfg.data.val.img_prefix = 'val'
cfg.data.test.type = 'BalloonDataset'
cfg.data.test.data_root = 'balloon/'
cfg.data.test.ann_file = 'val/via_region_data.json'
cfg.data.test.img_prefix = 'val'
# 修改bbox_head和mask_head中的类别数
cfg.model.roi_head.bbox_head.num_classes = 1
# cfg.model.roi_head.mask_head.num_classes = 1
# 使用预训练好的faster rcnn模型用于finetuning
cfg.load_from = 'checkpoints/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco_bbox_mAP-0.408__segm_mAP-0.37_20200504_163245-42aa3d00.pth'
# 设置工作目录用于存放log和临时文件
cfg.work_dir = 'work_dirs/balloon/newdataset'
# 原本的学习率是在8卡基础上训练设置的,现在单卡需要除以8
cfg.optimizer.lr = 0.02 / 8
cfg.lr_config.warmup = None
cfg.log_config.interval = 10
# 由于是自定义数据集,需要修改评价方法
cfg.evaluation.metric = 'mAP'
# # 设置evaluation间隔减少运行时间
# cfg.evaluation.interval = 12
# # 设置存档点间隔减少存储空间的消耗
# cfg.checkpoint_config.interval = 12
# 设置运行epoch数
cfg.runner.max_epochs = 1
# 固定随机种子使得结果可复现
cfg.seed = 0
set_random_seed(0, deterministic=False)
cfg.gpu_ids = range(1)
cfg.device = 'cuda'
# 打印所有的配置参数
# print(f'Config:\n{cfg.pretty_text}')
# 保存配置文件
mmcv.mkdir_or_exist('work_dirs/balloon/newdataset')
cfg.dump('work_dirs/balloon/newdataset/newdataset.py')
训练新模型
In [ ]:
from mmdet.datasets import build_dataset
from mmdet.models import build_detector
from mmdet.apis import train_detector
# 构建数据集
datasets = [build_dataset(cfg.data.train)]
# 构建检测模型
model = build_detector(cfg.model)
# 添加类别文字属性提高可视化效果
model.CLASSES = datasets[0].CLASSES
# 创建工作目录并训练模型
mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
In [ ]:
# 训练模型
train_detector(model, datasets, cfg, distributed=False, validate=True)
测试集上评价模型
In [ ]:
!python tools/test.py work_dirs/ballon/newdataset/newdataset.py work_dirs/ballon/newdataset/latest.pth --eval mAP
预测新图像
In [ ]:
from mmdet.apis import inference_detector, show_result_pyplot
img = mmcv.imread('balloon/val/14898532020_ba6199dd22_k.jpg')
model.cfg = cfg
result = inference_detector(model, img)
show_result_pyplot(model, img, result)