项目小记_将xml转为json

参考博客:https://blog.csdn.net/qq_41672428/article/details/107451834

 

xml文件格式如下所示:

<annotation verified="no">
  <folder>JPG</folder>
  <filename>driving_0</filename>
  <path>E:\object detective\Fatigue Driving\Driving_Dataset_Labelimg\JPG\driving_0.jpg</path>
  <source>
    <database>Unknown</database>
  </source>
  <size>
    <width>416</width>
    <height>416</height>
    <depth>3</depth>
  </size>
  <segmented>0</segmented>
  <object>
    <name>mouth_close</name>
    <pose>Unspecified</pose>
    <truncated>0</truncated>
    <Difficult>0</Difficult>
    <bndbox>
      <xmin>203</xmin>
      <ymin>168</ymin>
      <xmax>246</xmax>
      <ymax>198</ymax>
    </bndbox>
  </object>
  <object>
    <name>sunglasses</name>
    <pose>Unspecified</pose>
    <truncated>0</truncated>
    <Difficult>0</Difficult>
    <bndbox>
      <xmin>188</xmin>
      <ymin>99</ymin>
      <xmax>275</xmax>
      <ymax>144</ymax>
    </bndbox>
  </object>
</annotation>

 

将xml数据集格式转为json格式(需利用百度飞桨平台训练)   主要获取文件名、标签和框坐标等信息

主要代码如下:

"""
@author : Chenery

@Time : 2020/8/19

@Function : 将xml文件转换为json文件(在百度开发平台训练)

xml.etree.cElementTree 函数的用法


"""

import os
import xml.etree.cElementTree as ET
import json
#解析Xml中标注框的label和bbox
def get_bbox(xmlname):
    sig_xml_box = []
    label_name=[]
    tree = ET.parse(xmlname)
    root = tree.getroot()
    for i in root:  # 遍历一级节点
        if i.tag == 'object':
            for j in i:
                if j.tag == 'name':
                    cls_name = j.text
                    label_name.append(cls_name)
                if j.tag == 'bndbox':
                    bbox = []
                    xmin = 0
                    ymin = 0
                    xmax = 0
                    ymax = 0
                    for r in j:
                        if r.tag == 'xmin':
                            xmin = eval(r.text)
                        if r.tag == 'ymin':
                            ymin = eval(r.text)
                        if r.tag == 'xmax':
                            xmax = eval(r.text)
                        if r.tag == 'ymax':
                            ymax = eval(r.text)
                    bbox.append(xmin)
                    bbox.append(ymin)
                    bbox.append(xmax)
                    bbox.append(ymax)
                    sig_xml_box.append(bbox)
    return label_name, sig_xml_box
#获得转换Json文件
def get_json(xml_dir):
    xml_list = os.listdir(xml_dir)
    for xml_name in xml_list:
        json_name = xml_name.split('.')[0]+'.json'
        json_path = os.path.join(json_dir, json_name)
        xml_path = os.path.join(xml_dir, xml_name)
        label_name, sigxml_bbox = get_bbox(xml_path)
        ann_js = {}
        annotations = []
        for index, box in enumerate(sigxml_bbox):
            anno = {}
            anno['name'] = label_name[index]
            anno['x1'] = box[0]
            anno['y1'] = box[1]
            anno['x2'] = box[2]
            anno['y2'] = box[3]
            annotations.append(anno)
        ann_js['labels'] = annotations
        json.dump(ann_js, open(json_path, 'w'), indent=4)  # indent=4 更加美观显示

 

Easy DL 平台要求数据格式如下:

 

posted @ 2020-08-20 12:47  cfancy  阅读(387)  评论(0编辑  收藏  举报