将彩色RGB分割标注图像数据集转换为COCO格式的JSON文件
https://www.freesion.com/article/95661187982/
将彩色RGB分割标注图像数据集转换为COCO格式的JSON文件
由于很多检测、分割网络对coco格式的数据集都兼容支持,有时候需要将自己的数据集转化为coco格式的json文件,写一篇博客记录一下自己将彩色RGB的分割标注图像转换为coco格式文件的过程。
彩色 label:
转换为单个物体的黑白mask:
生成最终的 json 文件:
具体流程:
1. 安装pycococreator(先安装pycocotools, cython):
# windows pip install git+https://github.com/philferriere/cocoapi.git#egg=pycocotools^&subdirectory=PythonAPI # Linux pip install git+https://github.com/waleedka/cocoapi.git#egg=pycocotools&subdirectory=PythonAPI pip install cython pip install git+git://github.com/waspinator/coco.git@2.1.0
2. 创建文件目录格式如下 :
shapes │ | |———labels | | <image_id>.png | | ... | └───train │ └───annotations │ │ <image_id>_<object_class_name>_<annotation_id>.png │ │ ... │ └───<subset><year> │ <image_id>.png │ ... e.g shapes │ | |———labels | | 5.png | | ... | └───train │ └───annotations │ │ 5_leaf_0.png # 第5张图片的第0个叶片 │ │ ... │ └───shapes_train2017 │ 5.png # 第5张图片 │ ...
3. 将彩色RGB的标注图像转化为黑白图像,命名格式为 :
<image_id>_<object_class_name>_<annotation_id>.png # e.g 第5张图片的第0个叶片 ./shapes/train/annotations/5_leaf_0.png
RGB标注彩图转换为单个物体的黑白mask图像代码为:
import cv2 import numpy as np import os, glob def rgb2masks(label_name): lbl_id = os.path.split(label_name)[-1].split('.')[0] lbl = cv2.imread(label_name, 1) h, w = lbl.shape[:2] leaf_dict = {} idx = 0 white_mask = np.ones((h, w, 3), dtype=np.uint8) * 255 for i in range(h): for j in range(w): if tuple(lbl[i][j]) in leaf_dict or tuple(lbl[i][j]) == (0, 0, 0): continue leaf_dict[tuple(lbl[i][j])] = idx mask = (lbl == lbl[i][j]).all(-1) # leaf = lbl * mask[..., None] # colorful leaf with black background # np.repeat(mask[...,None],3,axis=2) # 3D mask leaf = np.where(mask[..., None], white_mask, 0) mask_name = './shapes/train/annotations/' + lbl_id + '_leaf_' + str(idx) + '.png' cv2.imwrite(mask_name, leaf) idx += 1 label_dir = './labels' label_list = glob.glob(os.path.join(label_dir, '*.png')) for label_name in label_list: rgb2masks(label_name)
4. 利用pycococreator和得到的黑白masks生成coco json格式的数据集,代码如下:
import datetime import json import os import re import fnmatch from PIL import Image import numpy as np from pycococreatortools import pycococreatortools ROOT_DIR = 'C:/Users/1/Desktop/333/tokyo/tokyo/' IMAGE_DIR = os.path.join(ROOT_DIR, "img") ANNOTATION_DIR = os.path.join(ROOT_DIR, "anno") INFO = { "description": "Leaf Dataset", "url": "https://github.com/waspinator/pycococreator", "version": "0.1.0", "year": 2017, "contributor": "Francis_Liu", "date_created": datetime.datetime.utcnow().isoformat(' ') } LICENSES = [ { "id": 1, "name": "Attribution-NonCommercial-ShareAlike License", "url": "http://creativecommons.org/licenses/by-nc-sa/2.0/" } ] # 根据自己的需要添加种类 CATEGORIES = [ { 'id': 1, 'name': 'tokyo', 'supercategory': 'tokyo', } ] def filter_for_jpeg(root, files): file_types = ['*.jpeg', '*.jpg', '*.png'] file_types = r'|'.join([fnmatch.translate(x) for x in file_types]) files = [os.path.join(root, f) for f in files] files = [f for f in files if re.match(file_types, f)] return files def filter_for_annotations(root, files, image_filename): file_types = ['*.png'] file_types = r'|'.join([fnmatch.translate(x) for x in file_types]) basename_no_extension = os.path.splitext(os.path.basename(image_filename))[0] file_name_prefix = basename_no_extension + '.*' files = [os.path.join(root, f) for f in files] files = [f for f in files if re.match(file_types, f)] files = [f for f in files if re.match(file_name_prefix, os.path.splitext(os.path.basename(f))[0])] return files def main(): coco_output = { "info": INFO, "licenses": LICENSES, "categories": CATEGORIES, "images": [], "annotations": [] } image_id = 1 segmentation_id = 1 # filter for jpeg images for root, _, files in os.walk(IMAGE_DIR): image_files = filter_for_jpeg(root, files) # go through each image for image_filename in image_files: image = Image.open(image_filename) image_info = pycococreatortools.create_image_info( image_id, os.path.basename(image_filename), image.size) coco_output["images"].append(image_info) # filter for associated png annotations for root, _, files in os.walk(ANNOTATION_DIR): annotation_files = filter_for_annotations(root, files, image_filename) # go through each associated annotation for annotation_filename in annotation_files: print(annotation_filename) class_id = [x['id'] for x in CATEGORIES if x['name'] in annotation_filename] category_info = {'id': class_id, 'is_crowd': 'tokyo' in image_filename} binary_mask = np.asarray(Image.open(annotation_filename) .convert('1')).astype(np.uint8) annotation_info = pycococreatortools.create_annotation_info( segmentation_id, image_id, category_info, binary_mask, image.size, tolerance=2) if annotation_info is not None: coco_output["annotations"].append(annotation_info) segmentation_id = segmentation_id + 1 image_id = image_id + 1 with open('{}/instances_leaf_train2017.json'.format(ROOT_DIR), 'w') as output_json_file: json.dump(coco_output, output_json_file) if __name__ == "__main__": main()