pascalVOC 标注文件,解析为TXT

首先,读取所有xml文件完整路径,写入train.txt 文本文档中,

然后读取TXT文档,逐行读取xml文档,建文件夹,用于保存解析好的TXT,写入TXT时,只需要保存类别名和坐标信息即可,中间用Tab分割

 

#!/usr/bin/evn python 
# coding:utf-8 
import os
import glob

try: 
  import xml.etree.cElementTree as ET 
except ImportError: 
  import xml.etree.ElementTree as ET 
import sys 

# filename = os.listdir('F:/snow leopard/000_IMAGE_FRAME/000_B_XML/')
filename = glob.glob('F:/snow leopard/000_IMAGE_FRAME/000_B_XML/' + '*xml') 
fileObject = open('train.txt', 'w')

for ip in filename:
  fileObject.write(ip)
  fileObject.write('\n')
fileObject.close()


file_srx = open("train.txt")  #其中包含所有待计算的文件名
line = file_srx.readline()
while line:
  f = line[:-1]    # 除去末尾的换行符
  tree = ET.parse(f)     #打开xml文档 
  root = tree.getroot()         #获得root节点  
  print ("*"*10)
  filename = root.find('filename').text
  filename = filename[:-4]
  print (filename) 

  dir_name = 'F:/snow leopard/Data preprocessing/txt'
  if os.path.exists(dir_name) == False:
    os.mkdir(dir_name)  

  # file_object_txt = open(dir_name +'/' + filename + ".txt","a")
  # # file_object_txt = open(dir_name, 'w') #写文件
  # file_object_txt.write(filename +'\t')

  # file_object_log = open(filename + ".log", 'w') #写文件
  flag = False
  
  ########################################
  for size in root.findall('size'): #找到root节点下的size节点 
    width = size.find('width').text   #子节点下节点width的值 
    height = size.find('height').text   #子节点下节点height的值 
    print (width, height)
  ########################################
  
  for object in root.findall('object'): #找到root节点下的所有object节点 
    name = object.find('name').text   #子节点下节点name的值 
    file_object_txt = open(dir_name +'/' + filename + ".txt","a")
    # file_object_txt = open(dir_name, 'w') #写文件
    file_object_txt.write(name +'\t')
    print (name)
    bndbox = object.find('bndbox')      #子节点下属性bndbox的值 
    xmin = bndbox.find('xmin').text
    ymin = bndbox.find('ymin').text
    xmax = bndbox.find('xmax').text
    ymax = bndbox.find('ymax').text
    file_object_txt.write(xmin+'\t' + ymin + '\t'+ xmax + '\t'+ ymax)

    print (xmin, ymin, xmax, ymax)
  file_object_txt.close()
  # file_object_log.close()
  if flag == False:  #如果没有符合条件的信息,则删掉相应的txt文件以及jpg文件
    #os.remove(filename + ".txt")
    #os.remove(filename + ".jpg")
    # os.remove(filename + ".log")
    pass
  line = file_srx.readline()

 

 

参考: https://www.cnblogs.com/rainsoul/p/6283231.html

posted @ 2019-03-20 20:55  静悟生慧  阅读(1007)  评论(1编辑  收藏  举报