yolov5训练前准备工作(采样、制作数据集、数据集增强、数据集预处理)
写在前面
训练数据可以有多种输入方式,本文提到了其中一种。使用的时候注意工作路径。
使用方法:
收集图片,或使用video_2_jpg.py采样视频,(用cam_video.py拍视频,用get_img.py拍照片)
批量重命名图片,最好都是数字
把图片放在全英文路径下,开始用LabelImg标注,生成xml文件
把标注图片和文件分别放在images、annotations两个文件夹
用data_agumentation.py进行数据增强,同时生成图片和xml
用png_to_jpg.py把可能有的png转化为jpg
把图片放yolov5s-master文件夹的datasets/images文件夹下,把标注放在datasets/annotations下,用split_train_val.py分割测试集和训练集
用voc_label.py把datasets/annotations文件夹下的xml输出为labels文件夹下的txt,把分割数据集输出的转化为路径,这会直接输入到yolo,yolo应该是默认标注在图片目录父目录下的lables文件夹
修改模型yaml文件里的类别数量和名称
修改data的yaml文件,大致如下
path: datasets # dataset root dir
train: train.txt # train images (relative to 'path')
val: val.txt # val images (relative to 'path')
nc: 19 # number of classes
names: [自己的类别名]
代码
下面是相应文件的代码
get_img.py
# coding:utf-8
import cv2
import numpy as np
import time
# 参数为0,调用内置摄像头,如果有其他的摄像头可以调整参数为1,2等
cap = cv2.VideoCapture(1)
cameraMatrix =np.matrix([[804.4703,-4.7160,404.5110],[0,799.1279,351.8036],[0,0,1]])
distCoeffs = np.matrix([[-0.5834],[0.7615],[0.0026],[0.0107],[0]])
R = np.identity(3)
while True:
# 从摄像头读取图片
success, img = cap.read()
t = time.time()
timestamp = int(round(t * 1000)) #毫秒级时间戳
img_path = f"new_data/{timestamp}.jpg"
img_size = img.shape[:2]
newCameraMatrix, _ = cv2.getOptimalNewCameraMatrix(cameraMatrix, distCoeffs, img_size, 1, img_size, 0)
map1, map2 = cv2.initUndistortRectifyMap(cameraMatrix, distCoeffs, R, newCameraMatrix, img_size, cv2.CV_16SC2)
rectified_img = cv2.remap(img, map1, map2, cv2.INTER_LINEAR)
# 显示摄像头
cv2.imshow('----------please enter "s" to take a picture----------', rectified_img)
# 保持画面的持续,无限期等待输入
k = cv2.waitKey(1) # k == 27 通过esc键退出摄像 ESC(ASCII码为27)
if k == 27:
cv2.destroyAllWindows()
break
elif k == ord("s"):
# 通过s键保存图片,并退出。
cv2.imwrite(img_path, rectified_img)
# 关闭摄像头
cap.release()
cv2.destroyAllWindows()
cam_video.py
# -*- coding: UTF-8 -*-
import cv2
import os
import time
cap = cv2.VideoCapture(0)
width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
size = (int(width),int(height))
fourcc = cv2.VideoWriter_fourcc(*'mp4v') # 参数还可以 DIVX,XVID,MJPG,X264,WMV1,WMV2。
path = os.getcwd()
if not(os.path.exists(path)):
os.makedirs(path)
#创建VideoWriter,用于写视频
out = cv2.VideoWriter( path + '\\' + time.strftime(r"%Y-%m-%d_%H-%M-%S",time.localtime()) + '.mp4', fourcc, 24.0, size)
while cap.isOpened():
ret, frame = cap.read()
if not ret:
print("No frame")
break
cv2.imshow('frame', frame)
out.write(frame)
if cv2.waitKey(1) == ord('q'): #按Q键退出
break
cap.release()
out.release()
cv2.destroyAllWindows()
video_2_jpg.py
# -*- coding: UTF-8 -*-
import cv2
import os
filepath = r'./cam_video/'#视频路径
pathDir = os.listdir(filepath)
#如果共同文件夹
save_path = './cam_img_data'
if not os.path.exists(save_path):
os.makedirs(save_path)
cnt = 1
#i = 0
i = 4000
interval = 5
for Dir in pathDir:
video_path =filepath+Dir
if video_path[-4:]!='.mp4':
continue
video_name = Dir[:-4]
print(filepath + video_name + '.mp4')
video = cv2.VideoCapture(filepath + video_name+'.mp4')
if video.isOpened():
rval, frame = video.read()
print('open successfully')
else:
print('fail to open')
continue
#如果一个视频一个文件夹
#if not os.path.exists(filepath + video_name):
# os.makedirs(filepath + video_name)
while rval:
rval, frame = video.read()
if (cnt % interval == 0):
i += 1
#如果一个视频一个文件夹
#cv2.imwrite(filepath + video_name + '/image{}.jpg'.format(i), frame)
#如果共同文件夹
frame = cv2.flip(frame,0)#因为我的相机是倒着装的
try:
cv2.imwrite(save_path + '/01{}.jpg'.format(i), frame)
except:
continue
cnt += 1
video.release()
print('write successfully')
data_agumentation.py
# -*- coding: utf-8 -*-
import xml.etree.ElementTree as ET
import os
import numpy as np
from PIL import Image
import shutil
import imgaug as ia
from imgaug import augmenters as iaa
#【注意】文件命名为数字,xml文件别带中文,用labelimg标注的时候图片路径放在没有中文的地方
ia.seed(42)
def read_xml_annotation(root, image_id):
in_file = open(os.path.join(root, image_id))
tree = ET.parse(in_file)
root = tree.getroot()
bndboxlist = []
for object in root.findall('object'): # 找到root节点下的所有country节点
bndbox = object.find('bndbox') # 子节点下节点rank的值
xmin = int(bndbox.find('xmin').text)
xmax = int(bndbox.find('xmax').text)
ymin = int(bndbox.find('ymin').text)
ymax = int(bndbox.find('ymax').text)
# print(xmin,ymin,xmax,ymax)
bndboxlist.append([xmin, ymin, xmax, ymax])
# print(bndboxlist)
bndbox = root.find('object').find('bndbox')
return bndboxlist
# (506.0000, 330.0000, 528.0000, 348.0000) -> (520.4747, 381.5080, 540.5596, 398.6603)
def change_xml_annotation(root, image_id, new_target):
new_xmin = new_target[0]
new_ymin = new_target[1]
new_xmax = new_target[2]
new_ymax = new_target[3]
in_file = open(os.path.join(root, str(image_id) + '.xml')) # 这里root分别由两个意思
tree = ET.parse(in_file)
xmlroot = tree.getroot()
object = xmlroot.find('object')
bndbox = object.find('bndbox')
xmin = bndbox.find('xmin')
xmin.text = str(new_xmin)
ymin = bndbox.find('ymin')
ymin.text = str(new_ymin)
xmax = bndbox.find('xmax')
xmax.text = str(new_xmax)
ymax = bndbox.find('ymax')
ymax.text = str(new_ymax)
tree.write(os.path.join(root, str("%06d" % (str(id) + '.xml'))))
def change_xml_list_annotation(root, image_id, new_target, saveroot, id):
in_file = open(os.path.join(root, str(image_id) + '.xml')) # 这里root分别由两个意思
tree = ET.parse(in_file)
elem = tree.find('filename')
elem.text = (str("%06d" % int(id)) + '.jpg')
xmlroot = tree.getroot()
index = 0
for object in xmlroot.findall('object'): # 找到root节点下的所有country节点
bndbox = object.find('bndbox') # 子节点下节点rank的值
# xmin = int(bndbox.find('xmin').text)
# xmax = int(bndbox.find('xmax').text)
# ymin = int(bndbox.find('ymin').text)
# ymax = int(bndbox.find('ymax').text)
new_xmin = new_target[index][0]
new_ymin = new_target[index][1]
new_xmax = new_target[index][2]
new_ymax = new_target[index][3]
xmin = bndbox.find('xmin')
xmin.text = str(new_xmin)
ymin = bndbox.find('ymin')
ymin.text = str(new_ymin)
xmax = bndbox.find('xmax')
xmax.text = str(new_xmax)
ymax = bndbox.find('ymax')
ymax.text = str(new_ymax)
index = index + 1
tree.write(os.path.join(saveroot, str("%06d" % int(id)) + '.xml'))
def mkdir(path):
# 去除首位空格
path = path.strip()
# 去除尾部 \ 符号
path = path.rstrip("/")
# 判断路径是否存在
# 存在 True
# 不存在 False
isExists = os.path.exists(path)
# 判断结果
if not isExists:
# 如果不存在则创建目录
# 创建目录操作函数
os.makedirs(path)
print(path + 'is successfully created!')
return True
else:
# 如果目录存在则不创建,并提示目录已存在
print(path + 'already exits ')
return False
if __name__ == "__main__":
IMG_DIR = "new_data/image"
XML_DIR = "new_data/annotation"
AUG_XML_DIR = "argu_new_data/annotations" # 存储增强后的XML文件夹路径
try:
shutil.rmtree(AUG_XML_DIR)
except FileNotFoundError as e:
a = 1
mkdir(AUG_XML_DIR)
AUG_IMG_DIR = "argu_new_data/images" # 存储增强后的影像文件夹路径
try:
shutil.rmtree(AUG_IMG_DIR)
except FileNotFoundError as e:
a = 1
mkdir(AUG_IMG_DIR)
AUGLOOP = 7 # 每张影像增强的数量
boxes_img_aug_list = []
new_bndbox = []
new_bndbox_list = []
# 影像增强
seq = iaa.Sequential([
iaa.Flipud(0.5), # vertically flip 20% of all images
iaa.Fliplr(0.5), # 镜像
iaa.Multiply((1.2, 1.5)), # change brightness, doesn't affect BBs
iaa.GaussianBlur(sigma=(0, 2.0)), # iaa.GaussianBlur(0.5),
iaa.Affine(
translate_px={"x": 15, "y": 15},
scale=(0.8, 0.95),
rotate=(-30, 30)
) # translate by 40/60px on x/y axis, and scale to 50-70%, affects BBs
])
for root, sub_folders, files in os.walk(IMG_DIR):
for name in files:
bndbox = read_xml_annotation(XML_DIR, name[:-4] + '.xml')
shutil.copy(os.path.join(XML_DIR, name[:-4] + '.xml'), AUG_XML_DIR)
shutil.copy(os.path.join(IMG_DIR, name), AUG_IMG_DIR)
for epoch in range(AUGLOOP):
seq_det = seq.to_deterministic() # 保持坐标和图像同步改变,而不是随机
# 读取图片
img = Image.open(os.path.join(IMG_DIR, name))
# sp = img.size
img = np.asarray(img)
# bndbox 坐标增强
for i in range(len(bndbox)):
bbs = ia.BoundingBoxesOnImage([
ia.BoundingBox(x1=bndbox[i][0], y1=bndbox[i][1], x2=bndbox[i][2], y2=bndbox[i][3]),
], shape=img.shape)
bbs_aug = seq_det.augment_bounding_boxes([bbs])[0]
boxes_img_aug_list.append(bbs_aug)
# new_bndbox_list:[[x1,y1,x2,y2],...[],[]]
n_x1 = int(max(1, min(img.shape[1], bbs_aug.bounding_boxes[0].x1)))
n_y1 = int(max(1, min(img.shape[0], bbs_aug.bounding_boxes[0].y1)))
n_x2 = int(max(1, min(img.shape[1], bbs_aug.bounding_boxes[0].x2)))
n_y2 = int(max(1, min(img.shape[0], bbs_aug.bounding_boxes[0].y2)))
if n_x1 == 1 and n_x1 == n_x2:
n_x2 += 1
if n_y1 == 1 and n_y2 == n_y1:
n_y2 += 1
if n_x1 >= n_x2 or n_y1 >= n_y2:
print('error', name)
new_bndbox_list.append([n_x1, n_y1, n_x2, n_y2])
# 存储变化后的图片
image_aug = seq_det.augment_images([img])[0]
path = os.path.join(AUG_IMG_DIR,
str("%06d" % (len(files) + int(name[:-4]) + epoch * 1000)) + name[-4:])
image_auged = bbs.draw_on_image(image_aug, thickness=0)
Image.fromarray(image_auged).save(path)
# 存储变化后的XML
change_xml_list_annotation(XML_DIR, name[:-4], new_bndbox_list, AUG_XML_DIR,
len(files) + int(name[:-4]) + epoch * 1000)
print(str("%06d" % (len(files) + int(name[:-4]) + epoch * 250)) + '.xml')
new_bndbox_list = []
png_to_jpg.py
from PIL import Image
import os
import shutil
if __name__ == '__main__':
path = './images'
save_path = './jpg_images'
if not os.path.exists(save_path):
os.makedirs(save_path)
files = os.listdir(path)
for name in files:
save_filepath = os.path.join(save_path, name[:-4]+'.jpg')
filepath = os.path.join(path,name)
if name[-4:] == '.png':
img = Image.open(filepath)
img = img.convert('RGB')
img.save(save_filepath, quality=95)
else:
shutil.copy(filepath,save_filepath)
split_train_val.py
# coding:utf-8
import os
import random
import argparse
parser = argparse.ArgumentParser()
#xml文件的地址,根据自己的数据进行修改 xml一般存放在Annotations下
parser.add_argument('--xml_path', default='datasets/annotations', type=str, help='input xml label path')
#数据集的划分,地址选择自己数据下的ImageSets/Main
parser.add_argument('--txt_path', default='datasets/ImageSets/Main', type=str, help='output txt label path')
opt = parser.parse_args()
trainval_percent = 1.0
train_percent = 0.9
xmlfilepath = opt.xml_path
txtsavepath = opt.txt_path
total_xml = os.listdir(xmlfilepath)
if not os.path.exists(txtsavepath):
os.makedirs(txtsavepath)
num = len(total_xml)
list_index = range(num)
tv = int(num * trainval_percent)
tr = int(tv * train_percent)
trainval = random.sample(list_index, tv)
train = random.sample(trainval, tr)
file_trainval = open(txtsavepath + '/trainval.txt', 'w')
file_test = open(txtsavepath + '/test.txt', 'w')
file_train = open(txtsavepath + '/train.txt', 'w')
file_val = open(txtsavepath + '/val.txt', 'w')
for i in list_index:
name = total_xml[i][:-4] + '\n'
if i in trainval:
file_trainval.write(name)
if i in train:
file_train.write(name)
else:
file_val.write(name)
else:
file_test.write(name)
file_trainval.close()
file_train.close()
file_val.close()
file_test.close()
voc_label.py
# -*- coding: utf-8 -*-
import xml.etree.ElementTree as ET
import os
from os import getcwd
# 运行目录为yolov5-master,把xml转为txt,配合split_train_val.py划分训练集和验证集,需要数据全为jpg
# 少用全局路径,会产生转义字符
sets = ['train', 'val', 'test']
classes = ['ad', 'ad1', 'ad2', 'bskl', 'dp', 'dp1', 'hn', 'hsfk', 'jdb', 'jdb1', 'lsfk', 'mf', 'mf1', 'qdpj', 'wlj', 'xb', 'xhpj', 'xhpj1', 'xhpj2']
#abs_path = os.getcwd()
#print(abs_path)
def convert(size, box):
dw = 1. / (size[0])
dh = 1. / (size[1])
x = (box[0] + box[1]) / 2.0 - 1
y = (box[2] + box[3]) / 2.0 - 1
w = box[1] - box[0]
h = box[3] - box[2]
x = x * dw
w = w * dw
y = y * dh
h = h * dh
return x, y, w, h
def convert_annotation(image_id):
in_file = open('datasets/annotations/%s.xml' % (image_id), encoding='UTF-8')
out_file = open('datasets/labels/%s.txt' % (image_id), 'w')
tree = ET.parse(in_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
for obj in root.iter('object'):
#difficult = obj.find('Difficult').text
difficult = 0
cls = obj.find('name').text
if cls not in classes or int(difficult) == 1:
print(image_id," wrong class name:"+cls)
continue
cls_id = classes.index(cls)
xmlbox = obj.find('bndbox')
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),
float(xmlbox.find('ymax').text))
b1, b2, b3, b4 = b
# 标注越界修正
if b2 > w:
b2 = w
if b4 > h:
b4 = h
b = (b1, b2, b3, b4)
bb = convert((w, h), b)
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
wd = getcwd()
for image_set in sets:
if not os.path.exists('datasets/labels/'):
os.makedirs('datasets/labels/')
image_ids = open('datasets/ImageSets/Main/%s.txt' % (image_set)).read().strip().split()
list_file = open('datasets/%s.txt' % (image_set), 'w')
for image_id in image_ids:
#list_file.write(abs_path + '/datasets/images/%s.jpg\n' % (image_id))
list_file.write('datasets/images/%s.jpg\n' % (image_id))
#convert_annotation(image_id)
list_file.close()