Voc2Json--挑选voc中的类别生成json文件
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 | import argparse import json ,shutil import os,sys import xml.etree.ElementTree as ET parent = os.path.dirname(os.path.realpath(__file__)) gadent = os.path.dirname(parent) sys.path.insert( 0 ,gadent) sys.path.append(gadent) from utils.tool import listDir,draw_res json_base_info = { 'version' : '4.5.6' , 'flags' : {}, 'shapes' : [], "imagePath" : "0001250.jpg" , "imageData" : "null" , "imageHeight" : 720 , "imageWidth" : 1280 } def voc2Json(voc_base_dir,save_path_root,classes,prefix): ann_dir = os.path.abspath(os.path.join(voc_base_dir, "Annotations" )) img_dir = os.path.abspath(os.path.join(voc_base_dir, "JPEGImages" )) xml_list = [] listDir(ann_dir,xml_list, "xml" ) for xml_path in xml_list: print ( "xml path : {}" . format (xml_path)) basename_xml = os.path.basename(xml_path) basename_jpg = basename_xml.replace( "xml" , "jpg" ) img_path = os.path.abspath(os.path.join(img_dir,basename_jpg)) if not os.path.exists(img_path): continue tree = ET.parse(xml_path) root = tree.getroot() size = root.find( 'size' ) width = int (size.find( 'width' ).text) height = int (size.find( 'height' ).text) json_rects = [] for obj in root. iter ( 'object' ): difficult = obj.find( 'difficult' ).text cls = obj.find( 'name' ).text if cls not in classes or int (difficult) = = 1 : continue cls_id = classes.index( cls ) xmlbox = obj.find( 'bndbox' ) xmin,ymin,xmax,ymax = float (xmlbox.find( 'xmin' ).text), float (xmlbox.find( 'ymin' ).text), float (xmlbox.find( 'xmax' ).text) , float (xmlbox.find( 'ymax' ).text) rect_dict_person = { "label" : "person" , "points" : [[ 0 , 0 ], [ 0 , 0 ]], "group_id" : "null" , "shape_type" : "rectangle" , "flags" : {} } min_x, min_y, max_x, max_y = int (xmin), int (ymin), int (xmax), int (ymax) rect_dict_person[ "points" ][ 0 ][ 0 ] = max (min_x, 0 ) rect_dict_person[ "points" ][ 0 ][ 1 ] = max ( int (ymin), 0 ) rect_dict_person[ "points" ][ 1 ][ 0 ] = min (max_x, width) rect_dict_person[ "points" ][ 1 ][ 1 ] = min (max_y, height) rect_dict_person[ "group_id" ] = None json_rects.append(rect_dict_person) if len (json_rects) > 0 : # 第二步创建json文件 jsondata = json.dumps(json_base_info, indent = 4 , separators = ( ',' , ': ' )) new_basename_jpg = prefix + basename_jpg save_path = os.path.abspath(os.path.join(save_path_root, new_basename_jpg)) json_data_path = save_path.replace( 'jpg' , 'json' ) f = open (json_data_path, 'w' ) f.write(jsondata) f.close() # 修正其中的内容 with open (json_data_path, "r" , encoding = 'utf-8' ) as jsonFile: json_data = json.load(jsonFile) json_data[ 'imagePath' ] = new_basename_jpg json_data[ 'imageData' ] = None json_data[ 'imageHeight' ] = height json_data[ 'imageWidth' ] = width json_data[ 'shapes' ] = json_rects with open (json_data_path, "w" ) as jsonFile: json.dump(json_data, jsonFile, ensure_ascii = False , indent = 4 , separators = ( ',' , ': ' )) jsonFile.close() shutil.copy(img_path, save_path) class Voc2Json(): def __init__( self ,args): self .voc_base_dir = args.VOC_base_dir self .save_path = args.save_path self .classes = args.classes self .prefix = args.prefix def make( self ): voc2Json( self .voc_base_dir, self .save_path, self .classes, self .prefix) parser = argparse.ArgumentParser(description = 'VOC Datasets Convert json dataset' ) parser.add_argument( '--VOC_base_dir' ,default = None , type = str , help = 'VOC数据集的基础路径' ) parser.add_argument( '--save_path' ,default = None , type = str , help = "转换后的数据集保存路径" ) parser.add_argument( '--classes' ,default = [ 'person' ], type = list , help = "需要从VOC提取的数据集的类别标签" ) parser.add_argument( '--prefix' ,default = "voc2012_" , type = str , help = "新的数据集前缀" ) if __name__ = = '__main__' : args = parser.parse_args() args.VOC_base_dir = "E:/datasets/public_datasets/VOC/VOCtrainval_11-May-2012/VOCdevkit/VOC2012" args.save_path = "E:/datasets/public_datasets/person_voc12_src_size_box" print ( "start !!!" ) vt = Voc2Json(args) vt.make() print ( "Done !!!" ) |
分类:
工具箱--python
, 数据集制作工具
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 阿里最新开源QwQ-32B,效果媲美deepseek-r1满血版,部署成本又又又降低了!
· 开源Multi-agent AI智能体框架aevatar.ai,欢迎大家贡献代码
· Manus重磅发布:全球首款通用AI代理技术深度解析与实战指南
· 被坑几百块钱后,我竟然真的恢复了删除的微信聊天记录!
· 没有Manus邀请码?试试免邀请码的MGX或者开源的OpenManus吧
2020-12-26 案例】S7-200SMART 实时时钟如何在MCGS触摸屏上显示并写入
2020-12-26 卡尔曼滤波:从入门到精通
2018-12-26 使用labelImg制作自己的数据集(VOC2007格式)用于Faster-RCNN训练
2018-12-26 从编程实现角度学习Faster R-CNN(附极简实现)