2.1 2.1 数据整理(yolov5,Voc,数据划分等)
数据整理(yolov5,Voc,数据划分等)
1 重新分布数据

""" 处理:1 处理文件目录 2 转化xml格式为txt格式 dir --01.jpg --02.xml 结果: tar_dir -images 01.jpg - xmllabels 01.xml """ import os,shutil from PIL import Image import datetime # 重新分布数据 def separate_data(init_dir,target_dir): print("Start Time: ",datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")) files_list =os.listdir(init_dir) jpg_list =[] xml_list =[] for file in files_list: if file.lower().endswith("jpg"): jpg_list.append(file) elif file.lower().endswith("xml"): xml_list.append(file) else: print(f"{file} 不满足收集条件,已经被过滤") print(f"collect jpg : {len(jpg_list)}") print(f"collect xml : {len(xml_list)}") mk_dir(target_dir,"images") mk_dir(target_dir,"xmllabels") for jpg in jpg_list: copy_file(init_dir,jpg,target_dir+os.sep+"images") for xml in xml_list: copy_file(init_dir,xml,target_dir+os.sep+"xmllabels") print("End Time: ",datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")) print("整理 jpg,xml文件成功!!","\n") def mk_dir(target_dir,creat_dir): new_path =os.path.join(target_dir,creat_dir) if not os.path.exists(target_dir): os.mkdir(target_dir) if not os.path.exists(new_path): os.mkdir(new_path) print("创建new文件夹成功: ",new_path) # 先读取图片后保存到其他目录(这里暂时不需要) def copy_pic(old_dir,filename,new_dir): # new_file =copy.copy(filepath) try: filepath =os.path.join(old_dir,filename) image=Image.open(filepath) if image.mode in ("RGBA", "P"): # 如果Rgba需要转换为rgb image = image.convert("RGB") image.save(os.path.join(new_dir,filename)) # print(f"copy {filename} success!!") except Exception as e: print("Error",filename,"fail:",e) # 复制pic ,xml等文件到其他目录 def copy_file(old_dir,filename,new_dir): try: source_file =os.path.join(old_dir,filename) shutil.copy(source_file, new_dir) # print(filename,"success!!") except Exception as e: print(f"copy {filename} fail:", e) if __name__ == '__main__': init_dir = "/home/data/918" target_dir ="/project/train/src_repo/datasets/918" separate_data(init_dir,target_dir)
2 voc_to_yolo

""" 功能:把voc的xml标签文件转化为yolo的txt标签文件 """ import xml.etree.ElementTree as ET import pickle import os from os import listdir, getcwd from os.path import join import shutil # 清空文件夹内所有文件,不删除文件夹 def del_file(path): ls = os.listdir(path) for i in ls: c_path = os.path.join(path, i) if os.path.isdir(c_path): del_file(c_path) else: os.remove(c_path) # 删除整个test文件夹,新建空的test文件夹 def del_dir(del_dir): if os.path.exists(del_dir): shutil.rmtree(del_dir) print("删除文件夹:",del_dir) os.mkdir(del_dir) print("新建voc2007空文件夹 Success!") def convert(size, box): x_center = (box[0] + box[1]) / 2.0 y_center = (box[2] + box[3]) / 2.0 x = x_center / size[0] y = y_center / size[1] w = (box[1] - box[0]) / size[0] h = (box[3] - box[2]) / size[1] return (x, y, w, h) def convert_annotation(xml_files_path, save_txt_files_path, classes): if not os.path.exists(save_txt_files_path): os.mkdir(save_txt_files_path) print("创建文件夹成功",save_txt_files_path) del_file(save_txt_files_path) #清空文件夹 xml_files = os.listdir(xml_files_path) for xml_name in xml_files: xml_file = os.path.join(xml_files_path, xml_name) out_txt_path = os.path.join(save_txt_files_path, xml_name.split('.')[0] + '.txt') out_txt_f = open(out_txt_path, 'w') tree = ET.parse(xml_file) root = tree.getroot() size = root.find('size') w = int(size.find('width').text) h = int(size.find('height').text) for obj in root.iter('object'): # difficult = obj.find('difficult').text cls = obj.find('name').text if cls not in classes: continue cls_id = classes.index(cls) xmlbox = obj.find('bndbox') b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text)) bb = convert((w, h), b) out_txt_f.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n') print("voc-->yolov success!",f"{len(xml_files)}","\n") if __name__ == "__main__": classes = ['head'] # 1 转化的类别 - 注意:这里根据自己的类别名称及种类自行更改 xml_files1 = r'/project/train/src_repo/datasets/918/xmllabels' # 2、voc格式的xml标签路径 save_txt_files1 = r'/project/train/src_repo/datasets/918/labels' # 3、转化为yolo格式txt标签存储路径 convert_annotation(xml_files1, save_txt_files1, classes)
3 collect_images_Proportion.py

#!\Users\Local\Programs\Python37 # -*- coding: utf-8 -*- """ 功能:按照 训练集(4):测试集(1): 收集不同文件夹下的图片路径,生成train.txt,val.txt 实现:每个文件夹按照4:1比例提取路径分别写入到 train.txt ,val.txt """ import os import random def write_imgpathTotxt(imgpath_list,train_file,val_file): train_num =0 val_num =0 with open(file=train_file, mode="a", encoding="utf-8") as f1: for num,item in enumerate(imgpath_list): if num <int(len(imgpath_list)*0.8): train_num+=1 f1.write(item + "\n") else: with open(file=val_file, mode="a", encoding="utf-8") as f2: val_num += 1 f2.write(item + "\n") print(f"TrainDataset-{train_file} :+{train_num}") print(f"ValDataset-{val_file} :+{val_num}") print("###################################") # 清空文件夹 def del_file(path): ls = os.listdir(path) for i in ls: c_path = os.path.join(path, i) if os.path.isdir(c_path): del_file(c_path) else: os.remove(c_path) # 统计训练集与测试集总数 def count_train_val_txt(train_file,val_file): print("\n#Over,Collect Successful !!") f1 =open(train_file,encoding="utf-8") f2 =open(val_file,encoding="utf-8") print(f"train_txt-total img: {len(f1.readlines())}") print(f"val_txt-total img: {len(f2.readlines())}") f1.close() f2.close() def collect_filepath(base_dir,train_file,val_file,imagestxt_dir): if not os.path.exists(imagestxt_dir): os.mkdir(imagestxt_dir) del_file(imagestxt_dir) img_file_dict ={} dirname_list =os.listdir(base_dir) for dir_name in dirname_list: target_dirpath =fr"{base_dir}/{dir_name}/images" img_file_dict[dir_name]=[] for file in os.listdir(target_dirpath): if file.endswith("jpg"): filepath =os.path.join(target_dirpath,file) img_file_dict[dir_name].append(filepath) print(f"start colllect 文件夹{dir_name}/images :{len(img_file_dict.get(dir_name))}") write_imgpathTotxt(img_file_dict.get(dir_name),train_file,val_file) count_train_val_txt(train_file,val_file) if __name__ == '__main__': base_dir = "/project/train/src_repo/datasets" #数据集所在目录 imagestxt_dir ="/project/train/src_repo/images_txt" train_file="/project/train/src_repo/images_txt/train.txt" #4 val_file="/project/train/src_repo/images_txt/val.txt" #1 #start collect_filepath(base_dir,train_file,val_file,imagestxt_dir)
作者:华王
博客:https://www.cnblogs.com/huahuawang/
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了
· winform 绘制太阳,地球,月球 运作规律
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
· 上周热点回顾(3.3-3.9)
· 超详细:普通电脑也行Windows部署deepseek R1训练数据并当服务器共享给他人