pascalVOC 标注文件,解析为TXT
首先,读取所有xml文件完整路径,写入train.txt 文本文档中,
然后读取TXT文档,逐行读取xml文档,建文件夹,用于保存解析好的TXT,写入TXT时,只需要保存类别名和坐标信息即可,中间用Tab分割
#!/usr/bin/evn python # coding:utf-8 import os import glob try: import xml.etree.cElementTree as ET except ImportError: import xml.etree.ElementTree as ET import sys # filename = os.listdir('F:/snow leopard/000_IMAGE_FRAME/000_B_XML/') filename = glob.glob('F:/snow leopard/000_IMAGE_FRAME/000_B_XML/' + '*xml') fileObject = open('train.txt', 'w') for ip in filename: fileObject.write(ip) fileObject.write('\n') fileObject.close() file_srx = open("train.txt") #其中包含所有待计算的文件名 line = file_srx.readline() while line: f = line[:-1] # 除去末尾的换行符 tree = ET.parse(f) #打开xml文档 root = tree.getroot() #获得root节点 print ("*"*10) filename = root.find('filename').text filename = filename[:-4] print (filename) dir_name = 'F:/snow leopard/Data preprocessing/txt' if os.path.exists(dir_name) == False: os.mkdir(dir_name) # file_object_txt = open(dir_name +'/' + filename + ".txt","a") # # file_object_txt = open(dir_name, 'w') #写文件 # file_object_txt.write(filename +'\t') # file_object_log = open(filename + ".log", 'w') #写文件 flag = False ######################################## for size in root.findall('size'): #找到root节点下的size节点 width = size.find('width').text #子节点下节点width的值 height = size.find('height').text #子节点下节点height的值 print (width, height) ######################################## for object in root.findall('object'): #找到root节点下的所有object节点 name = object.find('name').text #子节点下节点name的值 file_object_txt = open(dir_name +'/' + filename + ".txt","a") # file_object_txt = open(dir_name, 'w') #写文件 file_object_txt.write(name +'\t') print (name) bndbox = object.find('bndbox') #子节点下属性bndbox的值 xmin = bndbox.find('xmin').text ymin = bndbox.find('ymin').text xmax = bndbox.find('xmax').text ymax = bndbox.find('ymax').text file_object_txt.write(xmin+'\t' + ymin + '\t'+ xmax + '\t'+ ymax) print (xmin, ymin, xmax, ymax) file_object_txt.close() # file_object_log.close() if flag == False: #如果没有符合条件的信息,则删掉相应的txt文件以及jpg文件 #os.remove(filename + ".txt") #os.remove(filename + ".jpg") # os.remove(filename + ".log") pass line = file_srx.readline()
参考: https://www.cnblogs.com/rainsoul/p/6283231.html