VOC数据集生成代码使用说明
#split.py 文件 输入格式为images ,和标签txt文件,txt中的数据为坐标值共8个。 import os import numpy as np import math import cv2 as cv import imageio #path = '/media/D/code/OCR/text-detection-ctpn/data/mlt_english+chinese/image' #path = '/home/chendali1/Gsj/text-detection-ctpn-master/prepare_training_data/image/image_1000/' path='/home/chendali1/Gsj/prepare_training_data/ICDAR/images_train/' #gt_path = '/home/chendali1/Gsj/text-detection-ctpn-master/prepare_training_data/label/labelDigit1000/' gt_path='/home/chendali1/Gsj/prepare_training_data/ICDAR/result_train/' out_path = 're_image' if not os.path.exists(out_path): os.makedirs(out_path) files = os.listdir(path) files.sort() #files=files[:100] for file in files: _, basename = os.path.split(file) if basename.lower().split('.')[-1] not in ['jpg', 'png']: continue stem, ext = os.path.splitext(basename) #stem=stem0.split('_')[2] gt_file = os.path.join(gt_path, stem+'.txt') img_path = os.path.join(path, file) print(img_path) #print(gt_file) img = cv.imread(img_path) if img is None: print('****************************') print('Image ' + img_path + ' may be a bad picture!') print('****************************') newname = os.path.join(path,stem+'.gif') os.rename(img_path,newname) img_path=newname print(img_path) print('Try read with imageio.') gif = imageio.mimread(img_path) if gif is None: print('****************************') print("Image " + img_path + " can't be read!") print('****************************') print('Read success!') img = cv.cvtColor(gif[0], cv.COLOR_RGB2BGR) img_size = img.shape im_size_min = np.min(img_size[0:2]) im_size_max = np.max(img_size[0:2]) im_scale = float(600) / float(im_size_min) if np.round(im_scale * im_size_max) > 1200: im_scale = float(1200) / float(im_size_max) re_im = cv.resize(img, None, None, fx=im_scale, fy=im_scale, interpolation=cv.INTER_LINEAR) re_size = re_im.shape cv.imwrite(os.path.join(out_path, stem) + '.jpg', re_im) with open(gt_file, 'r') as f: lines = f.readlines() for line in lines: splitted_line = line.strip().lower().split(',') pt_x = np.zeros((4, 1)) pt_y = np.zeros((4, 1)) pt_x[0, 0] = int(float(splitted_line[0]) / img_size[1] * re_size[1]) pt_y[0, 0] = int(float(splitted_line[1]) / img_size[0] * re_size[0]) pt_x[1, 0] = int(float(splitted_line[2]) / img_size[1] * re_size[1]) pt_y[1, 0] = int(float(splitted_line[3]) / img_size[0] * re_size[0]) pt_x[2, 0] = int(float(splitted_line[4]) / img_size[1] * re_size[1]) pt_y[2, 0] = int(float(splitted_line[5]) / img_size[0] * re_size[0]) pt_x[3, 0] = int(float(splitted_line[6]) / img_size[1] * re_size[1]) pt_y[3, 0] = int(float(splitted_line[7]) / img_size[0] * re_size[0]) ind_x = np.argsort(pt_x, axis=0) pt_x = pt_x[ind_x] pt_y = pt_y[ind_x] if pt_y[0] < pt_y[1]: pt1 = (pt_x[0], pt_y[0]) pt3 = (pt_x[1], pt_y[1]) else: pt1 = (pt_x[1], pt_y[1]) pt3 = (pt_x[0], pt_y[0]) if pt_y[2] < pt_y[3]: pt2 = (pt_x[2], pt_y[2]) pt4 = (pt_x[3], pt_y[3]) else: pt2 = (pt_x[3], pt_y[3]) pt4 = (pt_x[2], pt_y[2]) xmin = int(min(pt1[0], pt2[0])) ymin = int(min(pt1[1], pt2[1])) xmax = int(max(pt2[0], pt4[0])) ymax = int(max(pt3[1], pt4[1])) if xmin < 0: xmin = 0 if xmax > re_size[1] - 1: xmax = re_size[1] - 1 if ymin < 0: ymin = 0 if ymax > re_size[0] - 1: ymax = re_size[0] - 1 width = xmax - xmin height = ymax - ymin # reimplement step = 16.0 x_left = [] x_right = [] x_left.append(xmin) x_left_start = int(math.ceil(xmin / 16.0) * 16.0) if x_left_start == xmin: x_left_start = xmin + 16 for i in np.arange(x_left_start, xmax, 16): x_left.append(i) x_left = np.array(x_left) x_right.append(x_left_start - 1) for i in range(1, len(x_left) - 1): x_right.append(x_left[i] + 15) x_right.append(xmax) x_right = np.array(x_right) idx = np.where(x_left == x_right) x_left = np.delete(x_left, idx, axis=0) x_right = np.delete(x_right, idx, axis=0) if not os.path.exists('label_tmp'): os.makedirs('label_tmp') with open(os.path.join('label_tmp', stem) + '.txt', 'a') as f: #for i in range(len(x_left)): f.writelines("tianchi\t") f.writelines(str(int( pt_x[0, 0]))) f.writelines("\t") f.writelines(str(int( pt_y[0, 0]))) f.writelines("\t") f.writelines(str(int( pt_x[1, 0]))) f.writelines("\t") f.writelines(str(int( pt_y[1, 0]))) f.writelines("\t") f.writelines(str(int( pt_x[2, 0]))) f.writelines("\t") f.writelines(str(int( pt_y[2, 0]))) f.writelines("\t") f.writelines(str(int( pt_x[3, 0]))) f.writelines("\t") f.writelines(str(int( pt_y[3, 0]))) f.writelines("\n")
#ToVoc.py 上述执行完后直接运行这个脚本文件完美生成VOC文件 from xml.dom.minidom import Document import cv2 import os import glob import shutil import numpy as np def generate_xml(name, lines, img_size, class_sets, doncateothers=True): doc = Document() def append_xml_node_attr(child, parent=None, text=None): ele = doc.createElement(child) if not text is None: text_node = doc.createTextNode(text) ele.appendChild(text_node) parent = doc if parent is None else parent parent.appendChild(ele) return ele img_name = name + '.jpg' # create header annotation = append_xml_node_attr('annotation') append_xml_node_attr('folder', parent=annotation, text='tianchi') append_xml_node_attr('filename', parent=annotation, text=img_name) source = append_xml_node_attr('source', parent=annotation) append_xml_node_attr('database', parent=source, text='coco_text_database') append_xml_node_attr('annotation', parent=source, text='tianchi') append_xml_node_attr('image', parent=source, text='tianchi') append_xml_node_attr('flickrid', parent=source, text='000000') owner = append_xml_node_attr('owner', parent=annotation) append_xml_node_attr('name', parent=owner, text='ms') size = append_xml_node_attr('size', annotation) append_xml_node_attr('width', size, str(img_size[1])) append_xml_node_attr('height', size, str(img_size[0])) append_xml_node_attr('depth', size, str(img_size[2])) append_xml_node_attr('segmented', parent=annotation, text='0') # create objects objs = [] for line in lines: splitted_line = line.strip().lower().split() cls = splitted_line[0].lower() if not doncateothers and cls not in class_sets: continue cls = 'dontcare' if cls not in class_sets else cls if cls == 'dontcare': continue obj = append_xml_node_attr('object', parent=annotation) occlusion = int(0) x1, y1, x2, y2 = int(float(splitted_line[1]) + 1), int(float(splitted_line[2]) + 1), \ int(float(splitted_line[3]) + 1), int(float(splitted_line[4]) + 1) x0,y0,x1,y1,x2,y2,x3,y3 = int(float(splitted_line[1])+1),int(float(splitted_line[2])+1),\ int(float(splitted_line[3])+1),int(float(splitted_line[4])+1),int(float(splitted_line[5])+1),\ int(float(splitted_line[6])+1),int(float(splitted_line[7])+1),int(float(splitted_line[8])+1) truncation = float(0) difficult = 1 if _is_hard(cls, truncation, occlusion, x1, y1, x2, y2) else 0 truncted = 0 if truncation < 0.5 else 1 append_xml_node_attr('name', parent=obj, text=cls) append_xml_node_attr('pose', parent=obj, text='none') append_xml_node_attr('truncated', parent=obj, text=str(truncted)) append_xml_node_attr('difficult', parent=obj, text=str(int(difficult))) bb = append_xml_node_attr('bndbox', parent=obj) append_xml_node_attr('x0', parent=bb, text=str(int(x0))) append_xml_node_attr('y0', parent=bb, text=str(y0)) append_xml_node_attr('x1', parent=bb, text=str(x1)) append_xml_node_attr('y1', parent=bb, text=str(y1)) append_xml_node_attr('x1', parent=bb, text=str(x2)) append_xml_node_attr('y1', parent=bb, text=str(y2)) append_xml_node_attr('x1', parent=bb, text=str(x3)) append_xml_node_attr('y1', parent=bb, text=str(y3)) o = {'class': cls, 'box': np.asarray([x0, y0,x1,y1, x2, y2,x3,y3], dtype=float), \ 'truncation': truncation, 'difficult': difficult, 'occlusion': occlusion} objs.append(o) return doc, objs def _is_hard(cls, truncation, occlusion, x1, y1, x2, y2): hard = False if y2 - y1 < 25 and occlusion >= 2: hard = True return hard if occlusion >= 3: hard = True return hard if truncation > 0.8: hard = True return hard return hard def build_voc_dirs(outdir): mkdir = lambda dir: os.makedirs(dir) if not os.path.exists(dir) else None mkdir(outdir) mkdir(os.path.join(outdir, 'Annotations')) mkdir(os.path.join(outdir, 'ImageSets')) mkdir(os.path.join(outdir, 'ImageSets', 'Layout')) mkdir(os.path.join(outdir, 'ImageSets', 'Main')) mkdir(os.path.join(outdir, 'ImageSets', 'Segmentation')) mkdir(os.path.join(outdir, 'JPEGImages')) mkdir(os.path.join(outdir, 'SegmentationClass')) mkdir(os.path.join(outdir, 'SegmentationObject')) return os.path.join(outdir, 'Annotations'), os.path.join(outdir, 'JPEGImages'), os.path.join(outdir, 'ImageSets', 'Main') if __name__ == '__main__': _outdir = 'TEXTVOC/VOC2007' _draw = bool(0) _dest_label_dir, _dest_img_dir, _dest_set_dir = build_voc_dirs(_outdir) _doncateothers = bool(1) for dset in ['train']: _labeldir = 'label_tmp' _imagedir = 're_image' class_sets = ('tianchi', 'dontcare') class_sets_dict = dict((k, i) for i, k in enumerate(class_sets)) allclasses = {} fs = [open(os.path.join(_dest_set_dir, cls + '_' + dset + '.txt'), 'w') for cls in class_sets] ftrain = open(os.path.join(_dest_set_dir, dset + '.txt'), 'w') files = glob.glob(os.path.join(_labeldir, '*.txt')) files.sort() for file in files: path, basename = os.path.split(file) stem, ext = os.path.splitext(basename) with open(file, 'r') as f: lines = f.readlines() img_file = os.path.join(_imagedir, stem + '.jpg') print(img_file) img = cv2.imread(img_file) img_size = img.shape doc, objs = generate_xml(stem, lines, img_size, class_sets=class_sets, doncateothers=_doncateothers) cv2.imwrite(os.path.join(_dest_img_dir, stem + '.jpg'), img) xmlfile = os.path.join(_dest_label_dir, stem + '.xml') with open(xmlfile, 'w') as f: f.write(doc.toprettyxml(indent=' ')) ftrain.writelines(stem + '\n') cls_in_image = set([o['class'] for o in objs]) for obj in objs: cls = obj['class'] allclasses[cls] = 0 \ if not cls in list(allclasses.keys()) else allclasses[cls] + 1 for cls in cls_in_image: if cls in class_sets: fs[class_sets_dict[cls]].writelines(stem + ' 1\n') for cls in class_sets: if cls not in cls_in_image: fs[class_sets_dict[cls]].writelines(stem + ' -1\n') (f.close() for f in fs) ftrain.close() print('~~~~~~~~~~~~~~~~~~~') print(allclasses) print('~~~~~~~~~~~~~~~~~~~') shutil.copyfile(os.path.join(_dest_set_dir, 'train.txt'), os.path.join(_dest_set_dir, 'val.txt')) shutil.copyfile(os.path.join(_dest_set_dir, 'train.txt'), os.path.join(_dest_set_dir, 'trainval.txt')) for cls in class_sets: shutil.copyfile(os.path.join(_dest_set_dir, cls + '_train.txt'), os.path.join(_dest_set_dir, cls + '_trainval.txt')) shutil.copyfile(os.path.join(_dest_set_dir, cls + '_train.txt'), os.path.join(_dest_set_dir, cls + '_val.txt'))