OpenMMLab AI实战营 第五课笔记

OpenMMLab AI实战营 第五课笔记


在气球数据集上训练检测模型

训练新模型通常有三个步骤:

  1. 支持新数据集
  2. 修改配置文件
  3. 训练模型

而MMDetection中有三种方法来支持新数据集:

  1. 将数据集整理为COCO格式
  2. 将数据集整理为中间格式
  3. 直接实现新数据集的支持

我们将基于数据集balloon dataset,通过三种方法来完成本节教程。这是一个带有mask的数据集,其中训练集包含61张图片,验证集包含13张图片。其他方法以及进阶用法可以参考doc

进入 mmdetection 主目录

In [ ]:

import os
os.chdir('mmdetection')

下载并观察气球数据集

In [ ]:

#下载并解压数据集
!wget https://github.com/matterport/Mask_RCNN/releases/download/v2.1/balloon_dataset.zip
!unzip balloon_dataset.zip > /dev/null

In [ ]:

!tree balloon

In [ ]:

# 简单查看数据集的图片
from PIL import Image
Image.open('balloon/val/14898532020_ba6199dd22_k.jpg')

这里我们以单张图像为例查看balloon数据集的标签json格式,可以看到标签包含基本的图像名,大小信息以及mask所在的区域。

In [ ]:

# 查看单张图片的标签格式
import json

with open('balloon/train/via_region_data.json') as f:
    data = json.load(f)
anno1 = list(data.keys())[0]
print(anno1)
print(json.dumps(data[anno1], indent=2))

下载训练好的mask rcnn模型

In [ ]:

# 下载训练好的 Mask R CNN 模型到checkpoints文件夹
!wget http://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco_bbox_mAP-0.408__segm_mAP-0.37_20200504_163245-42aa3d00.pth -P checkpoints

方法一:将数据集整理为COCO格式

这里我们需要了解COCO数据集的格式,并将balloon的标注文件转换为coco格式。

查看COCO数据集格式

对于实例分割来说,比较重要的COCO格式如下所示。

更详细完整的格式说明可以参考这里

{
    "images": [image],
    "annotations": [annotation],
    "categories": [category]
}

# 图像信息
image = {
    "id": int, #图像id
    "width": int, #宽
    "height": int, #高
    "file_name": str, #图像文件名
}

#标注信息
annotation = {
    "id": int, #标注id   
    "image_id": int, #对应的图像id
    "category_id": int, #类别id
    "segmentation": RLE or [polygon], #分割mask信息
    "area": float, #mask面积
    "bbox": [x,y,width,height], #边界框信息
    "iscrowd": 0 or 1, #是否为密集图片
}

#类别信息
categories = [{
    "id": int, #类别id
    "name": str, #类别名
    "supercategory": str, #母类名称
}]

转换标注文件格式

编写功能函数convert_balloon_to_coco完成格式转换。

In [ ]:

import os.path as osp
import mmcv

def convert_balloon_to_coco(ann_file, out_file, image_prefix):
    data_infos = mmcv.load(ann_file)

    annotations = []
    images = []
    obj_count = 0
    for idx, v in enumerate(mmcv.track_iter_progress(data_infos.values())):
        filename = v['filename']
        img_path = osp.join(image_prefix, filename)
        height, width = mmcv.imread(img_path).shape[:2]

        images.append(dict(
            id=idx,
            file_name=filename,
            height=height,
            width=width))

        bboxes = []
        labels = []
        masks = []
        for _, obj in v['regions'].items():
            assert not obj['region_attributes']
            obj = obj['shape_attributes']
            px = obj['all_points_x']
            py = obj['all_points_y']
            poly = [(x + 0.5, y + 0.5) for x, y in zip(px, py)]
            poly = [p for x in poly for p in x]

            x_min, y_min, x_max, y_max = (
                min(px), min(py), max(px), max(py))

            data_anno = dict(
                image_id=idx,
                id=obj_count,
                category_id=0,
                bbox=[x_min, y_min, x_max - x_min, y_max - y_min],
                area=(x_max - x_min) * (y_max - y_min),
                segmentation=[poly],
                iscrowd=0)
            annotations.append(data_anno)
            obj_count += 1

    coco_format_json = dict(
        images=images,
        annotations=annotations,
        categories=[{'id':0, 'name': 'balloon'}])
    mmcv.dump(coco_format_json, out_file)
convert_balloon_to_coco('balloon/train/via_region_data.json', 'balloon/train/coco.json', 'balloon/train')
convert_balloon_to_coco('balloon/val/via_region_data.json', 'balloon/val/coco.json', 'balloon/val')

In [ ]:

# import json

# with open('balloon/val/coco.json') as f:
#     data = json.load(f)

# with open('coco.json', 'w') as f:
#     json.dump(data, f, indent=4)

修改 config 配置文件并保存

In [ ]:

from mmcv import Config
from mmdet.apis import set_random_seed

# 获取基本配置文件参数
cfg = Config.fromfile('./configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_1x_coco.py')

# 修改数据集类型以及文件路径
cfg.dataset_type = 'COCODataset'
cfg.data_root = 'balloon/'
cfg.classes = ('balloon',)

cfg.data.train.data_root = 'balloon/'
cfg.data.train.ann_file = 'train/coco.json'
cfg.data.train.img_prefix = 'train'
cfg.data.train.classes = cfg.classes

cfg.data.val.data_root = 'balloon/'
cfg.data.val.ann_file = 'val/coco.json'
cfg.data.val.img_prefix = 'val'
cfg.data.val.classes = cfg.classes

cfg.data.test.data_root = 'balloon/'
cfg.data.test.ann_file = 'val/coco.json'
cfg.data.test.img_prefix = 'val'
cfg.data.test.classes = cfg.classes

# 修改bbox_head和mask_head中的类别数
cfg.model.roi_head.bbox_head.num_classes = 1
cfg.model.roi_head.mask_head.num_classes = 1
# 使用预训练好的faster rcnn模型用于finetuning
cfg.load_from = 'checkpoints/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco_bbox_mAP-0.408__segm_mAP-0.37_20200504_163245-42aa3d00.pth'
# 设置工作目录用于存放log和临时文件
cfg.work_dir = 'work_dirs/balloon'

# 原本的学习率是在8卡基础上训练设置的,现在单卡需要除以8
cfg.optimizer.lr = 0.02 / 8
cfg.lr_config.warmup = None
cfg.log_config.interval = 10

# # 设置evaluation间隔减少运行时间
# cfg.evaluation.interval = 12
# # 设置存档点间隔减少存储空间的消耗
# cfg.checkpoint_config.interval = 12

# 设置运行epoch数
cfg.runner.max_epochs = 1

# 固定随机种子使得结果可复现
cfg.seed = 0
set_random_seed(0, deterministic=False)
cfg.gpu_ids = range(1)

cfg.device = 'cuda'

# 打印所有的配置参数
# print(f'Config:\n{cfg.pretty_text}')

# 保存配置文件
mmcv.mkdir_or_exist('work_dirs/balloon')
cfg.dump('work_dirs/balloon/cocoformat.py')

训练新模型

In [ ]:

from mmdet.datasets import build_dataset
from mmdet.models import build_detector
from mmdet.apis import train_detector

# 构建数据集
datasets = [build_dataset(cfg.data.train)]

# 构建检测模型
model = build_detector(cfg.model)
# 添加类别文字属性提高可视化效果
model.CLASSES = datasets[0].CLASSES

# 创建工作目录并训练模型
mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))

In [ ]:

# 训练模型
train_detector(model, datasets, cfg, distributed=False, validate=True)

在测试集评价模型

训练完模型后,需要对模型进行评价分析,这里可以用mmdet提供的test.py来直接运行。更多评价方法可以查看这里

In [ ]:

!python tools/test.py work_dirs/balloon/cocoformat.py work_dirs/balloon/latest.pth --eval bbox segm

预测新图像

In [ ]:

from mmdet.apis import inference_detector, show_result_pyplot

img = mmcv.imread('balloon/val/14898532020_ba6199dd22_k.jpg')

model.cfg = cfg
result = inference_detector(model, img)
show_result_pyplot(model, img, result)

In [ ]:

result[0][0].shape

In [ ]:

len(result[1][0])

In [ ]:

result[1][0][0].shape

方法二:将数据集转换为中间格式

除了coco格式,mmdet还支持中间格式的定义。

数据集中间格式

[
    {
        'filename': 'a.jpg',
        'width': 1280,
        'height': 720,
        'ann': {
            'bboxes': <np.ndarray> (n, 4),
            'labels': <np.ndarray> (n, ),
            'bboxes_ignore': <np.ndarray> (k, 4), (optional field)
            'labels_ignore': <np.ndarray> (k, 4) (optional field),
            'masks': [poly]
        }
    },
    ...
]

我们需要根据上述中间类型来编写转换函数。

转换标注文件格式

编写功能函数convert_balloon_to_middle完成格式转换。

In [ ]:

import os.path as osp
import numpy as np

def convert_balloon_to_middle(ann_file, out_file, image_prefix):
    data_infos = mmcv.load(ann_file)

    middle_format = []
    obj_count = 0
    for idx, v in enumerate(mmcv.track_iter_progress(data_infos.values())):
        filename = v['filename']
        img_path = osp.join(image_prefix, filename)
        height, width = mmcv.imread(img_path).shape[:2]

        data_info = dict(filename=filename, width=width, height=height)

        gt_bboxes = []
        gt_labels = []
        gt_masks_ann = []

        for _, obj in v['regions'].items():
            assert not obj['region_attributes']
            obj = obj['shape_attributes']
            px = obj['all_points_x']
            py = obj['all_points_y']
            poly = [(x + 0.5, y + 0.5) for x, y in zip(px, py)]
            poly = [p for x in poly for p in x]

            x_min, y_min, x_max, y_max = (
                min(px), min(py), max(px), max(py))

            bbox=[x_min, y_min, x_max, y_max]
            label = 0
            segmentation=[poly]
            gt_bboxes.append(bbox)
            gt_labels.append(label)
            gt_masks_ann.append(segmentation)

        data_anno = dict(
            bboxes=np.array(gt_bboxes, dtype=np.float32).reshape(-1, 4),
            labels=np.array(gt_labels, dtype=np.long),
            # bboxes_ignore = np.zeros((0, 4), dtype=np.float32),
            # masks=gt_masks_ann
            )         
     
        data_info.update(ann=data_anno)
        middle_format.append(data_info)

    mmcv.dump(middle_format, out_file)
convert_balloon_to_middle('balloon/train/via_region_data.json', 'balloon/train/middle.pkl', 'balloon/train')
convert_balloon_to_middle('balloon/val/via_region_data.json', 'balloon/val/middle.pkl', 'balloon/val')

In [ ]:

# 查看单张图片的标签格式
import json

data = mmcv.load('balloon/train/middle.pkl')
print(data[3])

修改 config 配置并保存

注意这里记得修改各个部分的数据集类型为CustomDataset、对应标注文件为middle.pkl、工作目录和配置文件中的部分为middleformat

In [ ]:

from mmcv import Config
from mmdet.apis import set_random_seed

# 获取基本配置文件参数
# cfg = Config.fromfile('./configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_1x_coco.py')
cfg = Config.fromfile('./configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_1x_coco.py')

# 修改数据集类型以及文件路径
cfg.dataset_type = 'CustomDataset'
cfg.data_root = 'balloon/'
cfg.classes = ('balloon',)

cfg.data.train.type = 'CustomDataset'
cfg.data.train.data_root = 'balloon/'
cfg.data.train.ann_file = 'train/middle.pkl'
cfg.data.train.img_prefix = 'train'
cfg.data.train.classes = cfg.classes

cfg.data.val.type = 'CustomDataset'
cfg.data.val.data_root = 'balloon/'
cfg.data.val.ann_file = 'val/middle.pkl'
cfg.data.val.img_prefix = 'val'
cfg.data.val.classes = cfg.classes

cfg.data.test.type = 'CustomDataset'
cfg.data.test.data_root = 'balloon/'
cfg.data.test.ann_file = 'val/middle.pkl'
cfg.data.test.img_prefix = 'val'
cfg.data.test.classes = cfg.classes

# 修改bbox_head和mask_head中的类别数
cfg.model.roi_head.bbox_head.num_classes = 1
# cfg.model.roi_head.mask_head.num_classes = 1
# 使用预训练好的faster rcnn模型用于finetuning
cfg.load_from = 'checkpoints/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco_bbox_mAP-0.408__segm_mAP-0.37_20200504_163245-42aa3d00.pth'
# 设置工作目录用于存放log和临时文件
cfg.work_dir = 'work_dirs/balloon/middleformat'

# 原本的学习率是在8卡基础上训练设置的,现在单卡需要除以8
cfg.optimizer.lr = 0.02 / 8
cfg.lr_config.warmup = None
cfg.log_config.interval = 10

# 由于是自定义数据集,需要修改评价方法
cfg.evaluation.metric = 'mAP'
# # 设置evaluation间隔减少运行时间
# cfg.evaluation.interval = 12
# # 设置存档点间隔减少存储空间的消耗
# cfg.checkpoint_config.interval = 12

# 设置运行epoch数
cfg.runner.max_epochs = 1

# 固定随机种子使得结果可复现
cfg.seed = 0
set_random_seed(0, deterministic=False)
cfg.gpu_ids = range(1)

cfg.device = 'cuda'

# 打印所有的配置参数
# print(f'Config:\n{cfg.pretty_text}')

# 保存配置文件
mmcv.mkdir_or_exist('work_dirs/balloon/middleformat')
cfg.dump('work_dirs/balloon/middleformat/middleformat.py')

训练新模型

这里需要根据配置文件构建数据集,检测模型并完成训练。

In [ ]:

from mmdet.datasets import build_dataset
from mmdet.models import build_detector
from mmdet.apis import train_detector

# 构建数据集
datasets = [build_dataset(cfg.data.train)]

# 构建检测模型
model = build_detector(cfg.model)
# 添加类别文字属性提高可视化效果
model.CLASSES = datasets[0].CLASSES

# 创建工作目录并训练模型
mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))

In [ ]:

# 训练模型
train_detector(model, datasets, cfg, distributed=False, validate=True)

测试集上评价模型

In [ ]:

!python tools/test.py work_dirs/balloon/middleformat/middleformat.py work_dirs/balloon/middleformat/latest.pth --eval mAP

预测新图像

In [ ]:

from mmdet.apis import inference_detector, show_result_pyplot

img = mmcv.imread('balloon/val/14898532020_ba6199dd22_k.jpg')

model.cfg = cfg
result = inference_detector(model, img)

show_result_pyplot(model, img, result)

方法三:直接实现新数据集的支持

mmdet/datasets新建BalloonDataset.py文件,填充内容如下:

并在mmdet/datasets/__init__.py中,增加from .BalloonDataset import BalloonDataset

In [ ]:

# mmdet/datasets/BalloonDataset.py

import copy
import os.path as osp

import mmcv
import numpy as np

from mmdet.datasets.builder import DATASETS
from mmdet.datasets.custom import CustomDataset

@DATASETS.register_module()
class BalloonDataset(CustomDataset):

    CLASSES = ('balloon',)

    def load_annotations(self, ann_file):
        data_infos = mmcv.load(self.ann_file)

        middle_format = []
        obj_count = 0
        for idx, v in enumerate(mmcv.track_iter_progress(data_infos.values())):
            filename = v['filename']
            img_path = osp.join(self.img_prefix, filename)
            height, width = mmcv.imread(img_path).shape[:2]

            data_info = dict(filename=filename, width=width, height=height)

            gt_bboxes = []
            gt_labels = []
            gt_masks_ann = []

            for _, obj in v['regions'].items():
                assert not obj['region_attributes']
                obj = obj['shape_attributes']
                px = obj['all_points_x']
                py = obj['all_points_y']
                poly = [(x + 0.5, y + 0.5) for x, y in zip(px, py)]
                poly = [p for x in poly for p in x]

                x_min, y_min, x_max, y_max = (
                    min(px), min(py), max(px), max(py))

                bbox=[x_min, y_min, x_max, y_max]
                label = 0
                segmentation=[poly]
                gt_bboxes.append(bbox)
                gt_labels.append(label)
                gt_masks_ann.append(segmentation)

            data_anno = dict(
                bboxes=np.array(gt_bboxes, dtype=np.float32).reshape(-1, 4),
                labels=np.array(gt_labels, dtype=np.long),
                # bboxes_ignore = np.zeros((0, 4), dtype=np.float32),
                # masks=gt_masks_ann
                )         
        
            data_info.update(ann=data_anno)
            middle_format.append(data_info)

        return middle_format

修改config配置文件并保存

这里同样注意修改数据集类型,标注文件名以及工作目录和配置文件存储路径等

In [ ]:

from mmcv import Config
from mmdet.apis import set_random_seed

# 获取基本配置文件参数
# cfg = Config.fromfile('./configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_1x_coco.py')
cfg = Config.fromfile('./configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_1x_coco.py')

# 修改数据集类型以及文件路径
cfg.dataset_type = 'BalloonDataset'
cfg.data_root = 'balloon/'

cfg.data.train.type = 'BalloonDataset'
cfg.data.train.data_root = 'balloon/'
cfg.data.train.ann_file = 'train/via_region_data.json'
cfg.data.train.img_prefix = 'train'

cfg.data.val.type = 'BalloonDataset'
cfg.data.val.data_root = 'balloon/'
cfg.data.val.ann_file = 'val/via_region_data.json'
cfg.data.val.img_prefix = 'val'

cfg.data.test.type = 'BalloonDataset'
cfg.data.test.data_root = 'balloon/'
cfg.data.test.ann_file = 'val/via_region_data.json'
cfg.data.test.img_prefix = 'val'

# 修改bbox_head和mask_head中的类别数
cfg.model.roi_head.bbox_head.num_classes = 1
# cfg.model.roi_head.mask_head.num_classes = 1
# 使用预训练好的faster rcnn模型用于finetuning
cfg.load_from = 'checkpoints/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco_bbox_mAP-0.408__segm_mAP-0.37_20200504_163245-42aa3d00.pth'
# 设置工作目录用于存放log和临时文件
cfg.work_dir = 'work_dirs/balloon/newdataset'

# 原本的学习率是在8卡基础上训练设置的,现在单卡需要除以8
cfg.optimizer.lr = 0.02 / 8
cfg.lr_config.warmup = None
cfg.log_config.interval = 10

# 由于是自定义数据集,需要修改评价方法
cfg.evaluation.metric = 'mAP'
# # 设置evaluation间隔减少运行时间
# cfg.evaluation.interval = 12
# # 设置存档点间隔减少存储空间的消耗
# cfg.checkpoint_config.interval = 12

# 设置运行epoch数
cfg.runner.max_epochs = 1

# 固定随机种子使得结果可复现
cfg.seed = 0
set_random_seed(0, deterministic=False)
cfg.gpu_ids = range(1)

cfg.device = 'cuda'

# 打印所有的配置参数
# print(f'Config:\n{cfg.pretty_text}')

# 保存配置文件
mmcv.mkdir_or_exist('work_dirs/balloon/newdataset')
cfg.dump('work_dirs/balloon/newdataset/newdataset.py')

训练新模型

In [ ]:

from mmdet.datasets import build_dataset
from mmdet.models import build_detector
from mmdet.apis import train_detector

# 构建数据集
datasets = [build_dataset(cfg.data.train)]

# 构建检测模型
model = build_detector(cfg.model)
# 添加类别文字属性提高可视化效果
model.CLASSES = datasets[0].CLASSES

# 创建工作目录并训练模型
mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))

In [ ]:

# 训练模型
train_detector(model, datasets, cfg, distributed=False, validate=True)

测试集上评价模型

In [ ]:

!python tools/test.py work_dirs/ballon/newdataset/newdataset.py work_dirs/ballon/newdataset/latest.pth --eval mAP

预测新图像

In [ ]:

from mmdet.apis import inference_detector, show_result_pyplot

img = mmcv.imread('balloon/val/14898532020_ba6199dd22_k.jpg')

model.cfg = cfg
result = inference_detector(model, img)

show_result_pyplot(model, img, result)
posted @ 2023-02-10 22:57  Xu_Lin  阅读(247)  评论(0编辑  收藏  举报