从VOC数据集选择部分类别数据

 

 1 #!/usr/bin/env python
 2 # -*- encoding: utf-8 -*-
 3 """
 4 @Author  :   {FirstElfin}
 5 @License :   (C) Copyright 2013-2020, {DHWL}
 6 @Contact :   {2968793701@qq.com}
 7 @Software:   PyCharm
 8 @File    :   test.py
 9 @Time    :   11/22/19 11:55 AM
10 """
11 import os
12 import xml.etree.ElementTree as ET
13 import shutil
14 
15 ann_filepath = './VOC2007_22/Annotations/'
16 img_filepath = './VOC2007_22/JPEGImages/'
17 img_savepath = './VOC2007/JPEGImages/'
18 ann_savepath = './VOC2007/Annotations/'
19 if not os.path.exists(img_savepath):
20     os.mkdir(img_savepath)
21 
22 if not os.path.exists(ann_savepath):
23     os.mkdir(ann_savepath)
24 
25 classes = ['bicycle', 'bus', 'car', 'motorbike', 'train']
26 
27 
28 def save_annotation(file):
29 
30     tree = ET.parse(ann_filepath + '/' + file)
31     root = tree.getroot()
32     result = root.findall("object")
33     bool_num = 0
34     for obj in result:
35         if obj.find("name").text not in classes:
36             root.remove(obj)
37         else:
38             bool_num = 1
39     if bool_num:
40         tree.write(ann_savepath + file)
41         return True
42     else:
43         return False
44 
45 
46 def save_images(file):
47     name_img = img_filepath + os.path.splitext(file)[0] + ".jpg"
48     shutil.copy(name_img, img_savepath)
49     return True
50 
51 
52 if __name__ == '__main__':
53     for f in os.listdir(ann_filepath):
54         if save_annotation(f):
55             save_images(f)

  ./VOC2007_22是备份的数据集(原数据集),./VOC2007是我们自己要生成的数据集(制作自己的数据)

  通过classes可以选择你需要的类别,原始类别为:

1 classes = ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle',
2            'bus', 'car', 'cat', 'chair', 'cow', 'diningtable',
3            'dog', 'horse', 'motorbike', 'pottedplant',
4            'sheep', 'sofa', 'train', 'tvmonitor', 'person']

 

 

 

posted @ 2019-11-22 15:30  巴蜀秀才  阅读(2239)  评论(0编辑  收藏  举报