labelme标注的大图用sahi切成小图和小json

1.去除labelme标注中只有1或2个点的轮廓
指有些标注错误的只有1个点或2个点的轮廓不能转化为polygon

点击查看代码
import cv2
import numpy as np
import json
import os
def remove_specific_labels(json_file):
    # 读取JSON文件
    with open(json_file, 'rb+') as f:
        data = json.load(f)
    if"shapes" in data:
        new_shapes=[]
        for shape in data['shapes']:
            points = np.array(shape['points'], np.int32)
            if(len(points)<3):
                print("单点polygon",points)
            else:
                new_shapes.append(shape)
        data["shapes"]=new_shapes
        # data_small["shapes"]=new_shapes_small
    with open(json_file,'w',encoding='utf-8')as f:
        json.dump(data,f,ensure_ascii=False,indent=4)


def traverse_jsons_folder(folder_path):
    image_files = []
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            if file.endswith(".json"):
                print(file)
                remove_specific_labels(os.path.join(root,file))

if __name__ == '__main__':
    # remove_specific_labels(r'D:\pic\see\0906\src\add24.json')
    traverse_jsons_folder(r'D:\pic\see\0906\src02')

2.labelme格式数据集转coco格式数据集
sahi的输入和输出都是COCO格式数据集,需要先转化为COCO格式数据集,这里用的labelme官方的labelme2coco.py

点击查看代码
#!/usr/bin/env python

import argparse
import collections
import datetime
import glob
import json
import os
import os.path as osp
import sys
import uuid

import imgviz
import numpy as np

import labelme

try:
    import pycocotools.mask
except ImportError:
    print("Please install pycocotools:\n\n    pip install pycocotools\n")
    sys.exit(1)


def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )
    parser.add_argument("input_dir", help="input annotated directory")
    parser.add_argument("output_dir", help="output dataset directory")
    parser.add_argument("--labels", help="labels file", required=True)
    parser.add_argument("--noviz", help="no visualization", action="store_true")
    args = parser.parse_args()

    if osp.exists(args.output_dir):
        print("Output directory already exists:", args.output_dir)
        sys.exit(1)
    os.makedirs(args.output_dir)
    os.makedirs(osp.join(args.output_dir, "JPEGImages"))
    if not args.noviz:
        os.makedirs(osp.join(args.output_dir, "Visualization"))
    print("Creating dataset:", args.output_dir)

    now = datetime.datetime.now()

    data = dict(
        info=dict(
            description=None,
            url=None,
            version=None,
            year=now.year,
            contributor=None,
            date_created=now.strftime("%Y-%m-%d %H:%M:%S.%f"),
        ),
        licenses=[
            dict(
                url=None,
                id=0,
                name=None,
            )
        ],
        images=[
            # license, url, file_name, height, width, date_captured, id
        ],
        type="instances",
        annotations=[
            # segmentation, area, iscrowd, image_id, bbox, category_id, id
        ],
        categories=[
            # supercategory, id, name
        ],
    )

    class_name_to_id = {}
    for i, line in enumerate(open(args.labels).readlines()):
        class_id = i - 1  # starts with -1
        class_name = line.strip()
        if class_id == -1:
            assert class_name == "__ignore__"
            continue
        class_name_to_id[class_name] = class_id
        data["categories"].append(
            dict(
                supercategory=None,
                id=class_id,
                name=class_name,
            )
        )

    out_ann_file = osp.join(args.output_dir, "annotations.json")
    label_files = glob.glob(osp.join(args.input_dir, "*.json"))
    for image_id, filename in enumerate(label_files):
        print("Generating dataset from:", filename)

        label_file = labelme.LabelFile(filename=filename)

        base = osp.splitext(osp.basename(filename))[0]
        out_img_file = osp.join(args.output_dir, "JPEGImages", base + ".jpg")

        img = labelme.utils.img_data_to_arr(label_file.imageData)
        imgviz.io.imsave(out_img_file, img)
        data["images"].append(
            dict(
                license=0,
                url=None,
                file_name=osp.relpath(out_img_file, osp.dirname(out_ann_file)),
                height=img.shape[0],
                width=img.shape[1],
                date_captured=None,
                id=image_id,
            )
        )

        masks = {}  # for area
        segmentations = collections.defaultdict(list)  # for segmentation
        for shape in label_file.shapes:
            points = shape["points"]
            label = shape["label"]
            group_id = shape.get("group_id")
            shape_type = shape.get("shape_type", "polygon")
            mask = labelme.utils.shape_to_mask(img.shape[:2], points, shape_type)

            if group_id is None:
                group_id = uuid.uuid1()

            instance = (label, group_id)

            if instance in masks:
                masks[instance] = masks[instance] | mask
            else:
                masks[instance] = mask

            if shape_type == "rectangle":
                (x1, y1), (x2, y2) = points
                x1, x2 = sorted([x1, x2])
                y1, y2 = sorted([y1, y2])
                points = [x1, y1, x2, y1, x2, y2, x1, y2]
            if shape_type == "circle":
                (x1, y1), (x2, y2) = points
                r = np.linalg.norm([x2 - x1, y2 - y1])
                # r(1-cos(a/2))<x, a=2*pi/N => N>pi/arccos(1-x/r)
                # x: tolerance of the gap between the arc and the line segment
                n_points_circle = max(int(np.pi / np.arccos(1 - 1 / r)), 12)
                i = np.arange(n_points_circle)
                x = x1 + r * np.sin(2 * np.pi / n_points_circle * i)
                y = y1 + r * np.cos(2 * np.pi / n_points_circle * i)
                points = np.stack((x, y), axis=1).flatten().tolist()
            else:
                points = np.asarray(points).flatten().tolist()

            segmentations[instance].append(points)
        segmentations = dict(segmentations)

        for instance, mask in masks.items():
            cls_name, group_id = instance
            if cls_name not in class_name_to_id:
                continue
            cls_id = class_name_to_id[cls_name]

            mask = np.asfortranarray(mask.astype(np.uint8))
            mask = pycocotools.mask.encode(mask)
            area = float(pycocotools.mask.area(mask))
            bbox = pycocotools.mask.toBbox(mask).flatten().tolist()

            data["annotations"].append(
                dict(
                    id=len(data["annotations"]),
                    image_id=image_id,
                    category_id=cls_id,
                    segmentation=segmentations[instance],
                    area=area,
                    bbox=bbox,
                    iscrowd=0,
                )
            )

        if not args.noviz:
            viz = img
            if masks:
                labels, captions, masks = zip(
                    *[
                        (class_name_to_id[cnm], cnm, msk)
                        for (cnm, gid), msk in masks.items()
                        if cnm in class_name_to_id
                    ]
                )
                viz = imgviz.instances2rgb(
                    image=img,
                    labels=labels,
                    masks=masks,
                    captions=captions,
                    font_size=15,
                    line_width=2,
                )
            out_viz_file = osp.join(args.output_dir, "Visualization", base + ".jpg")
            imgviz.io.imsave(out_viz_file, viz)

    with open(out_ann_file, "w") as f:
        json.dump(data, f)


if __name__ == "__main__":
    main()

这里用命令
python labelme2coco.py src02 coco02 --labels labels.txt
src02为装有labelme标注图片和json的文件夹内
coco02为不存在的文件夹会自动新建此文件夹
labels.txt 前两行固定第三行开始写自己的标签

点击查看代码
__ignore__
_background_
Primary_Particle

3 sahi切图
参考链接:https://github.com/obss/sahi/blob/main/docs/cli.md#coco-slice-command-usage

在这里本人用命令
sahi coco slice --image_dir ./ --dataset_json_path annotations.json
然后再当前目录下生成runs文件夹

4.coco数据集可视化

参考链接https://blog.51cto.com/u_16213710/10144288

点击查看代码
import os

from pycocotools.coco import COCO
from skimage import io
from matplotlib import pyplot as plt
import matplotlib
# https://blog.51cto.com/u_16213710/10144288
matplotlib.use('TkAgg')
# json_file = r'D:\pic\see\090501\coco\annotations.json'
# dataset_dir = r'D:\pic\see\090501\coco\\'
json_file = r'D:\pic\see\0906\coco02\runs\slice_coco\annotations_512_02.json'
dataset_dir = r'D:\pic\see\0906\coco02\runs\slice_coco\annotations_images_512_02\\'
coco = COCO(json_file)
catIds = coco.getCatIds(catNms=['Primary_Particle']) # catIds=1 表示人这一类
imgIds = coco.getImgIds(catIds=catIds ) # 图片id,许多值
for i in range(len(imgIds)):
    img = coco.loadImgs(imgIds[i])[0]
    I = io.imread(dataset_dir + img['file_name'])
    #plt.axis('off')
    plt.imshow(I) #绘制图像,显示交给plt.show()处理
    annIds = coco.getAnnIds(imgIds=img['id'], catIds=catIds, iscrowd=None)
    anns = coco.loadAnns(annIds)
    coco.showAnns(anns)
    plt.show()
# plt.show() #显示图像

5.切完小图的coco数据集转成labelme格式数据集

点击查看代码
# -*- coding: utf-8 -*-
import glob
import os
import cv2
import json
import io

"""
要将官方的coco数据集转成labelme,稍微有点不同,因为官方的coco的实际标签是排列到90,当将coco数据集转成labelme数据集时候,需要修改一下标签,插入空标签
分别要在(从1开始) 12 26 29 30 45 66 68 69 71 83 插入空labelme
"""

coco = ["Primary_Particle"]# 这里记得改成自己的标签

key_words = 'Primary_Particle' #这里改成自己的标签

label = dict()
for idx, item in enumerate(coco):
    label.update({idx: item})

labelme_json_img_path = r"D:\pic\see\0906\coco02\runs\slice_coco\cutted_generated"  # 保存数据集
coco_json_path = r'D:\pic\see\0906\coco02\runs\slice_coco\json'  # coco annotations
jpg_path = r"D:\pic\see\0906\coco02\runs\slice_coco\annotations_images_512_02"  # coco val images
coco_path=os.path.join(coco_json_path, "*.json")
# coco_json = glob.glob(os.path.join(coco_json_path, "*.json"))[2]
coco_json = glob.glob(os.path.join(coco_json_path, "*.json"))[0]
# print(coco_json)
file_json = io.open(coco_json, 'r', encoding='utf-8')
m_json_data = file_json.read()
m_data = json.loads(m_json_data)
# m_type=m_data['type']

for item in m_data['images']:
    flag = True
    m_images_file_name = item['file_name']
    (filename_path, m_filename) = os.path.split(m_images_file_name)
    (m_name, extension) = os.path.splitext(m_filename)
    m_image = cv2.imread(os.path.join(jpg_path, m_name + ".jpg"))
    m_images_height = item['height']
    m_images_width = item['width']
    m_images_id = item['id']
    data = {}
    data['version'] = "5.1.1"
    data['flags'] = {}
    data["shapes"] = []
    for annit in m_data['annotations']:
        m_image_id = annit['image_id']
        m_category_id = annit['category_id']

        if m_image_id == m_images_id and label[m_category_id - 1] == key_words:
            flag = True
            for segitem in annit['segmentation']:
                points = []
                for idx in range(0, len(segitem), 2):
                    x, y = segitem[idx], segitem[idx + 1]
                    if str(x).isalpha() or str(y).isalpha():
                        break
                    points.append([x, y])
                itemData = {'points': []}
                itemData["label"] = label[m_category_id - 1]
                itemData['points'].extend(points)
                itemData["group_id"] = None
                itemData["shape_type"] = "polygon"
                itemData["flag"] = {}
                data["shapes"].append(itemData)
    if flag:
        data['imagePath'] = m_filename
        data['imageData'] = None
        data['imageHeight'] = m_images_height
        data['imageWidth'] = m_images_width

        jsonName = ".".join([m_name, "json"])
        jpgName = ".".join([m_name, "jpg"])
        print(jsonName)
        jsonPath = os.path.join(labelme_json_img_path, jsonName)
        jpgPath = os.path.join(labelme_json_img_path, jpgName)
        with open(jsonPath, "w") as f:
            json.dump(data, f, indent=2)
        cv2.imwrite(jpgPath, m_image)

print('{} files'.format(len(os.listdir(labelme_json_img_path))))
print("加载入文件完成...")
posted @ 2024-09-07 15:45  阳光天气  阅读(7)  评论(0编辑  收藏  举报