xml转txt

(64条消息) 目标检测数据集格式转换( xml - txt )和( txt - xml ),以及图像标签的对应裁剪_XC___XC的博客-CSDN博客

 

具体实现,里面有一些语句是无用的不影响结果

#-*- codeing =utf-8 -*-
#@Time : 2022/7/1 20:55
#@Author : huaobin
#@File : trn.py
#@Software: PyCharm


import xml.etree.ElementTree as ET
import pickle
import os
from os import listdir, getcwd
from os.path import join

sets=['train', 'test']

classes = ["buildings"]#这里输入你的数据集类别


def convert(size, box,img,newclasses):#读取xml文件中的数据,xywh
    dw = 1./size[0]
    dh = 1./size[1]
    x = (box[0] + box[1])/2.0
    y = (box[2] + box[3])/2.0
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x*dw
    w = w*dw
    y = y*dh
    h = h*dh
    aa=box[0]
    bb=box[1]
    cc=box[2]
    dd=box[3]
    return (img,newclasses,aa,bb,cc,dd)

def convert_annotation(image_id):
    in_file = open('./labels/%s.xml'%(image_id))#这里是读取xml的文件夹
    out_file = open('./txtlabels/%s.txt'%(image_id), 'w')#存入txt文件的文件夹
    tree=ET.parse(in_file)
    root = tree.getroot()
    size = root.find('size')
    w = int(size.find('width').text)
    h = int(size.find('height').text)
    #读取<object></object>
    for obj in root.iter('object'):
        difficult = obj.find('difficult').text
        cls = obj.find('name').text
        if cls not in classes or int(difficult) == 1:
            continue
        cls_id = classes.index(cls)  #xml标注的文件的框的labels
        xmlbox = obj.find('bndbox')
        b = (int(xmlbox.find('xmin').text),int(xmlbox.find('ymin').text),int(xmlbox.find('xmax').text), int(xmlbox.find('ymax').text))
        img=image_id+'.jpg'
        bb = convert((w,h), b,img,'buildings')
        print(bb)
        out_file.write(" ".join([str(a) for a in bb])+'\n')
        #out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')




def listdir(path, list_name):
    for file in os.listdir(path):
        file_path = os.path.join(path, file)
        if os.path.isdir(file_path):
            listdir(file_path, list_name)
        elif os.path.splitext(file_path)[1] == '.xml':
            newstr = file_path.split('\\')[1]
            str = newstr.split('.')[0]
            list_name.append(str)


wd = getcwd()

list_name=[]
listdir('./labels',list_name)
image_ids=list_name   #所有的.xml文件的文件名
for image_id in image_ids:
    convert_annotation(image_id)

  

 

posted @ 2022-06-28 19:59  青竹之下  阅读(309)  评论(0编辑  收藏  举报