Voc2Json--挑选voc中的类别生成json文件

 

 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import argparse
import json ,shutil
import os,sys
import xml.etree.ElementTree as ET
 
 
parent = os.path.dirname(os.path.realpath(__file__))
gadent = os.path.dirname(parent)
sys.path.insert(0,gadent)
sys.path.append(gadent)
 
from utils.tool import listDir,draw_res
 
 
 
json_base_info = {'version': '4.5.6',
                'flags': {},
                'shapes': [],
                "imagePath": "0001250.jpg",
                "imageData": "null",
                "imageHeight": 720,
                "imageWidth": 1280
                }
 
 
def voc2Json(voc_base_dir,save_path_root,classes,prefix):
 
    ann_dir = os.path.abspath(os.path.join(voc_base_dir,"Annotations"))
    img_dir = os.path.abspath(os.path.join(voc_base_dir,"JPEGImages"))
 
    xml_list =[]
    listDir(ann_dir,xml_list,"xml")
 
    for xml_path in xml_list:
 
        print("xml path : {}".format(xml_path))
 
        basename_xml = os.path.basename(xml_path)
        basename_jpg = basename_xml.replace("xml","jpg")
        img_path = os.path.abspath(os.path.join(img_dir,basename_jpg))
        if not os.path.exists(img_path):
            continue
 
        tree = ET.parse(xml_path)
        root = tree.getroot()
        size = root.find('size')
        width = int(size.find('width').text)
        height = int(size.find('height').text)
 
        json_rects = []
        for obj in root.iter('object'):
            difficult = obj.find('difficult').text
            cls = obj.find('name').text
            if cls not in classes or int(difficult) == 1:
                continue
            cls_id = classes.index(cls)
            xmlbox = obj.find('bndbox')
            xmin,ymin,xmax,ymax = float(xmlbox.find('xmin').text),float(xmlbox.find('ymin').text),float(xmlbox.find('xmax').text) ,float(xmlbox.find('ymax').text)
 
 
            rect_dict_person = {"label": "person",
                                "points": [[0, 0], [0, 0]],
                                "group_id": "null",
                                "shape_type": "rectangle",
                                "flags": {}
                                }
 
            min_x, min_y, max_x, max_y = int(xmin), int(ymin), int(xmax), int(ymax)
            rect_dict_person["points"][0][0] = max(min_x, 0)
            rect_dict_person["points"][0][1] = max(int(ymin), 0)
            rect_dict_person["points"][1][0] = min(max_x, width)
            rect_dict_person["points"][1][1] = min(max_y, height)
            rect_dict_person["group_id"] = None
 
            json_rects.append(rect_dict_person)
 
 
 
        if len(json_rects) > 0:
            # 第二步创建json文件
            jsondata = json.dumps(json_base_info, indent=4, separators=(',', ': '))
            new_basename_jpg = prefix + basename_jpg
 
            save_path = os.path.abspath(os.path.join(save_path_root, new_basename_jpg))
            json_data_path = save_path.replace('jpg', 'json')
 
            f = open(json_data_path, 'w')
            f.write(jsondata)
            f.close()
            # 修正其中的内容
            with open(json_data_path, "r", encoding='utf-8') as jsonFile:
                json_data = json.load(jsonFile)
 
            json_data['imagePath'] = new_basename_jpg
            json_data['imageData'] = None
            json_data['imageHeight'] = height
            json_data['imageWidth'] = width
 
 
            json_data['shapes'] = json_rects
            with open(json_data_path, "w") as jsonFile:
                json.dump(json_data, jsonFile, ensure_ascii=False, indent=4, separators=(',', ': '))
                jsonFile.close()
            shutil.copy(img_path, save_path)
 
 
 
 
class Voc2Json():
    def __init__(self,args):
        self.voc_base_dir = args.VOC_base_dir
        self.save_path = args.save_path
        self.classes =  args.classes
        self.prefix = args.prefix
 
    def make(self):
        voc2Json(self.voc_base_dir,self.save_path,self.classes,self.prefix)
 
 
 
parser = argparse.ArgumentParser(description='VOC Datasets Convert json dataset')
parser.add_argument('--VOC_base_dir',default=None,type=str,help='VOC数据集的基础路径')
parser.add_argument('--save_path',default=None,type=str,help="转换后的数据集保存路径")
parser.add_argument('--classes',default=['person'],type=list,help="需要从VOC提取的数据集的类别标签")
parser.add_argument('--prefix',default="voc2012_",type=str,help="新的数据集前缀")
 
 
 
if __name__ =='__main__':
    args =parser.parse_args()
    args.VOC_base_dir = "E:/datasets/public_datasets/VOC/VOCtrainval_11-May-2012/VOCdevkit/VOC2012"
    args.save_path = "E:/datasets/public_datasets/person_voc12_src_size_box"
 
    print("start !!!")
    vt = Voc2Json(args)
    vt.make()
    print("Done !!!")

  

posted @   水木清扬  阅读(35)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 阿里最新开源QwQ-32B,效果媲美deepseek-r1满血版,部署成本又又又降低了!
· 开源Multi-agent AI智能体框架aevatar.ai,欢迎大家贡献代码
· Manus重磅发布:全球首款通用AI代理技术深度解析与实战指南
· 被坑几百块钱后,我竟然真的恢复了删除的微信聊天记录!
· 没有Manus邀请码?试试免邀请码的MGX或者开源的OpenManus吧
历史上的今天:
2020-12-26 案例】S7-200SMART 实时时钟如何在MCGS触摸屏上显示并写入
2020-12-26 卡尔曼滤波:从入门到精通
2018-12-26 使用labelImg制作自己的数据集(VOC2007格式)用于Faster-RCNN训练
2018-12-26 从编程实现角度学习Faster R-CNN(附极简实现)
点击右上角即可分享
微信分享提示