The Oxford-IIIT Pet Dataset 转为 YOLO 格式脚本
本脚本用于划分数据集,并由 Oxford-IIIT Pet Dataset 的 Trimap 生成 检测框,其中手动删除了 多个对象的图像,以及 找不到 检测框的图像。
import os import os.path as osp from sklearn.model_selection import train_test_split import pandas as pd import shutil import numpy as np import cv2 try: import xml.etree.cElementTree as ET except ImportError: import xml.etree.ElementTree as ET def xyxy2xywh(x, shape): # Convert boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right y = list(np.copy(x)) y[0] = (x[0] + x[2]) / 2 # x center y[1] = (x[1] + x[3]) / 2 # y center y[2] = x[2] - x[0] # width y[3] = x[3] - x[1] # height # 归一化 y[0] = round(y[0] / shape[1], 4) y[1] = round(y[1] / shape[0], 4) y[2] = round(y[2] / shape[1], 4) y[3] = round(y[3] / shape[0], 4) return y def GetAnnotBoxLoc(AnotPath): tree = ET.ElementTree(file=AnotPath) root = tree.getroot() ObjectSet=root.findall('object') ObjBndBoxSet={} bbox_list = [] for Object in ObjectSet: ObjName=Object.find('name').text BndBox=Object.find('bndbox') x1 = int(BndBox.find('xmin').text) y1 = int(BndBox.find('ymin').text) x2 = int(BndBox.find('xmax').text) y2 = int(BndBox.find('ymax').text) bbox_list.append([x1,y1, x2,y2]) assert len(bbox_list)==1, 'bbox 数目不为1' return bbox_list def deal_data(data, mode): cat_number = 0 dog_number = 0 for img_name in np.array(data)[:,:3]: trimaps_img_path = osp.join('annotations/trimaps', img_name[0] + '.png') AnotPath = osp.join('annotations/xmls', img_name[0] + '.xml') in_img_path = osp.join('images', img_name[0] + '.jpg') if os.path.exists(trimaps_img_path) and os.path.exists(AnotPath) and os.path.exists(in_img_path): print(in_img_path) if img_name[2]==1: # cat cat_number += 1 # cls = str(3) cls = 'cat' else: # dog dog_number += 1 # cls = str(4) cls = 'dog' trimaps_img = cv2.imread(trimaps_img_path, 0) n = 1 xy = np.where(trimaps_img == n) y = xy[0] x = xy[1] x1,y1, x2,y2 = x.min(), y.min(), x.max(), y.max() # cat/dog bbox 左上,右下 # original_img = cv2.imread(in_img_path) # temp_image = cv2.rectangle(original_img, (x1, y1), (x2, y2), (0, 0, 255), 2) # (左上,右下) shape = trimaps_img.shape # h, w body_bbox = xyxy2xywh((x1,y1, x2,y2), shape) bbox_list = GetAnnotBoxLoc(AnotPath) with open(osp.join('../Pet/labels/' + mode, 'Oxford_IIIT_Pet_' + img_name[0] + '.txt'),mode = "w", encoding = "utf-8") as f: f.write(cls + ' ') for head_bbox in bbox_list: # temp_image = cv2.rectangle(temp_image, (head_bbox[0], head_bbox[1]), (head_bbox[2], head_bbox[3]), (0, 0, 255), 2) head_bbox = xyxy2xywh(head_bbox, shape) for bbx in body_bbox + head_bbox: f.write(str(bbx) + ' ') f.write('\n') # plot_img_path = osp.join('plot_body_images/', img_name[0] + '.jpg') # cv2.imwrite(plot_img_path, temp_image) out_img_path = osp.join('../Pet/images/' + mode, 'Oxford_IIIT_Pet_' + img_name[0] + '.jpg') shutil.copy(in_img_path, out_img_path) return cat_number, dog_number def break_data(origin_dataset, rate=0.2): train_data, test_data = train_test_split(origin_dataset, test_size=rate) train_data, eval_data = train_test_split(train_data, test_size=rate) train_cat_number, train_dog_number = deal_data(train_data, mode='train') val_cat_number, val_dog_number = deal_data(eval_data, mode='val') test_cat_number, test_dog_number = deal_data(test_data, mode='test') print('train_data:',len(train_data)) print('test_data:',len(test_data)) print('eval_data:',len(eval_data)) print('cat_number:', train_cat_number + val_cat_number + test_cat_number, 'dog_number:', train_dog_number + val_dog_number + test_dog_number) print(train_cat_number + val_cat_number + test_cat_number+train_dog_number + val_dog_number + test_dog_number) if __name__ == "__main__": data = pd.read_csv('annotations/list.txt', header=None, sep=' ') break_data(origin_dataset=data, rate=0.1)
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 分享一个免费、快速、无限量使用的满血 DeepSeek R1 模型,支持深度思考和联网搜索!
· 基于 Docker 搭建 FRP 内网穿透开源项目(很简单哒)
· ollama系列01:轻松3步本地部署deepseek,普通电脑可用
· 25岁的心里话
· 按钮权限的设计及实现