1 yolov5介绍

 

 yolov5介绍

 git 链接:https://github.com/ultralytics/yolov5

文档链接:https://docs.ultralytics.com/

bibi视频:https://www.bilibili.com/video/BV1tf4y1t7ru?spm_id_from=333.1007.top_right_bar_window_custom_collection.content.click

 

#文章学习参考

1 https://zhuanlan.zhihu.com/p/172121380

2 yolov-数据集-资源等:https://blog.csdn.net/nan355655600/article/details/107852288

 

#Tools

1 标注工具:labelImg (pip install labelImg) 

 

 

 #Tutorials

1 detect.py

复制代码
if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--weights', nargs='+', type=str, default='yolov5s.pt', help='model.pt path(s)') # 模型选择 链接地址:https://github.com/ultralytics/yolov5/releases
    # parser.add_argument('--source', type=str, default='data/images', help='source')  # file/folder, 0 for webcam #文件(图片或者视频,rtsp)路径
    parser.add_argument('--source', type=str, default='data/video/niu.mp4', help='source')  # file/folder, 0 for webcam #视频
    parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
    parser.add_argument('--conf-thres', type=float, default=0.25, help='object confidence threshold') #显示界限,>=0.25才会显示
    parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS') # =0,框有交集时只选择一个框,一般不动
    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')

    parser.add_argument('--view-img', action='store_true', help='display results')  #显示实时结果,需要启动时添加此参数(python detect.py --view-img)
    parser.add_argument('--save-txt', action='store_true', help='save results to *.txt') #保存label
    parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels') #保存可信度
    parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
    parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3') #只保留某一类
    parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
    parser.add_argument('--augment', action='store_true', help='augmented inference') #检测增强,容易过拟合
    parser.add_argument('--update', action='store_true', help='update all models')
    parser.add_argument('--project', default='runs/detect', help='save results to project/name') #修改保存结果的存储位置
    parser.add_argument('--name', default='exp', help='save results to project/name') #保存文件夹名字,默认 exp
    parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
    opt = parser.parse_args()
    print(opt)
    check_requirements(exclude=('pycocotools', 'thop'))

    with torch.no_grad():
        if opt.update:  # update all models (to fix SourceChangeWarning)
            for opt.weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt']:
                detect()
                strip_optimizer(opt.weights)
        else:
            detect()
View Code
复制代码

1.1 推理单个图片-返回json数据

 utils详见yolov5/utils

复制代码
import json
import time

import argparse
import os
import sys
from pathlib import Path
import numpy as np
import cv2
import torch
import torch.backends.cudnn as cudnn
import subprocess

from models.common import DetectMultiBackend
from utils.datasets import IMG_FORMATS, VID_FORMATS, LoadImages, LoadStreams
from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr,
                           increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh)
from utils.plots import Annotator, colors, save_one_box
from utils.torch_utils import select_device, time_sync
from utils.augmentations import letterbox




####### 参数设置

# weights = "/project/train/src_repo/yolov5s.pt"  
# en_weights = "/project/train/src_repo/yolov5s.engine"  
# weights = "/project/train/models/yolov5s.engine"  
weights = "/project/train/models/last3-1.pt"
en_weights = "/project/train/models/last3-1.engine"
imgsz = 864
device = ''
stride = 32
names = ["head"]
conf_thres = 0.20
iou_thres = 0.30
prob_thres = 0.20

def init():
    # 转化pt为engine模型
    command =f"sudo python3.8 /project/train/src_repo/export.py --half --weights {weights} --imgsz {imgsz}"
    ret = subprocess.run(command,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE,encoding="utf-8",timeout=1200)
    if ret.returncode == 0:
        print("success:",ret)
    else:
        print("error:",ret)

    global device_obj
    global stride
    global pt
    device_obj = select_device(device)
    half = device_obj.type != 'cpu'  # half precision only supported on CUDA
    model = DetectMultiBackend(en_weights, device=device_obj, dnn=False)
    stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine
    print(stride,pt,jit,engine)
    imgsz_obj = check_img_size(imgsz, s=stride)  # check image size 尺寸为32的倍数
    model.eval() #不启用 Batch Normalization 和 Dropout
    model.warmup(imgsz=(1, 3, imgsz,imgsz), half=half)  # warmup  [batch, channel, h, w](nchw)

    return model
    
def process_image(handle=None, input_image=None, args=None, **kwargs):
    model =handle
    input_image =input_image

    # resize
    img = letterbox(input_image,imgsz,stride=stride,auto=pt)[0]

    # Convert
    img = img.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB [batch, channel, h, w](nchw)
    img = np.ascontiguousarray(img)
    img = torch.from_numpy(img).to(device_obj)
    img = img.half() #to FP16
    # img = img.float() 
    img /= 255.0  # 0 - 255 to 0.0 - 1.0

    if len(img.shape) == 3:
        img = img[None]
    # pred = model(img, augment=False, val=True)[0]
    pred = model(img, augment=False)
    # Apply NMS
    pred = non_max_suppression(pred, conf_thres, iou_thres, None,agnostic=False)

    fake_result = {}

    fake_result["algorithm_data"] = {
       "is_alert": False,
       "target_count": 0,
       "target_info": []
   }
    fake_result["model_data"] = {"objects": []}
    # Process detections
    cnt = 0
    for i, det in enumerate(pred):  # detections per image
        gn = torch.tensor(input_image.shape)[[1, 0, 1, 0]]  # normalization gain whwh
        if det is not None and len(det):
            # Rescale boxes from img_size to im0 size
            det[:, :4] = scale_coords(img.shape[2:], det[:, :4], input_image.shape).round()
            for *xyxy, conf, cls in det:
                if conf < prob_thres:
                    continue
                cnt += 1
                xmin =xyxy[0]
                ymin =xyxy[1]
                xmax =xyxy[2]
                ymax =xyxy[3]
                w = xmax-xmin
                h = ymax-ymin
                fake_result["model_data"]['objects'].append({
                    "x":int(xmin),
                    "y":int(ymin),
                    "width":int(w),
                    "height":int(h),
                    "confidence":float(conf),
                    "name":"head"                   
                })
                # fake_result["algorithm_data"]["target_info"].append({
                #     "xmin":int(xyxy[0]),
                #     "ymin":int(xyxy[1]),
                #     "xmax":int(xyxy[2]),
                #     "ymax":int(xyxy[3]),
                #     "confidence":float(conf),
                #     # "name":names[int(cls)
                #     "name":"head"                  
                # }
                # )
    if cnt:
        fake_result ["algorithm_data"]["is_alert"] = True
        fake_result ["algorithm_data"]["target_count"] = cnt
    return json.dumps(fake_result, indent = 4)

if __name__ == '__main__':
    import glob
    
    jpg_list =glob.glob("/project/train/src_repo/datasets/918/images/*.jpg")
    jpg_list =glob.glob("/project/train/src_repo/data/images/*.jpg") 
    handle=init()
    start_time =time.time()
    for jpg in jpg_list:
        img=cv2.imread(jpg)
        a =process_image(handle=handle,input_image=img)
        print(a)
    end_time =time.time()
    s =end_time-start_time
    print("spend _time:",s)
1 ji_tensorrt.py
复制代码
复制代码
import json
import time

import argparse
import os
import sys
from pathlib import Path
import numpy as np
import cv2
import torch
import torch.backends.cudnn as cudnn
import subprocess

from models.common import DetectMultiBackend
from utils.datasets import IMG_FORMATS, VID_FORMATS, LoadImages, LoadStreams
from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr,
                           increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh)
from utils.plots import Annotator, colors, save_one_box
from utils.torch_utils import select_device, time_sync
from utils.augmentations import letterbox




####### 参数设置
# weights = "/project/ev_sdk/src/models/yolov5s6.pt"  
weights = "/project/train/src_repo/yolov5s.pt"
imgsz = 960
device = '0'
stride = 32
# stride = 64
names = ["head"]
conf_thres = 0.20
iou_thres = 0.30
prob_thres = 0.20

def init(): 
    global device_obj
    device_obj = select_device(device)
    half = device_obj.type != 'cpu'  # half precision only supported on CUDA
    model = DetectMultiBackend(weights, device=device_obj, dnn=False)
    stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine
    # print(stride,pt,jit,engine)
    # time.sleep(10)
    imgsz_obj = check_img_size(imgsz, s=stride)  # check image size 尺寸为32的倍数
    model.half()  # to FP16
    model.eval() #不启用 Batch Normalization 和 Dropout
    # model.warmup(imgsz=(1, 3, imgsz,imgsz), half=half)  # warmup  [batch, channel, h, w](nchw)

    return model
    
def process_image(handle=None, input_image=None, args=None, **kwargs):
    model =handle
    input_image =input_image

    # resize
    img = letterbox(input_image,stride=stride,new_shape=imgsz,auto=True)[0]

    # Convert
    img = img.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB [batch, channel, h, w](nchw)
    img = np.ascontiguousarray(img)
    img = torch.from_numpy(img).to(device_obj)
    img = img.half() #to FP16
    # img = img.float() 
    img /= 255.0  # 0 - 255 to 0.0 - 1.0

    if len(img.shape) == 3:
        img = img[None]
    pred = model(img, augment=False, val=True)[0]
    # Apply NMS
    pred = non_max_suppression(pred, conf_thres, iou_thres, agnostic=False)

    fake_result = {}

    fake_result["algorithm_data"] = {
       "is_alert": False,
       "target_count": 0,
       "target_info": []
   }
    fake_result["model_data"] = {"objects": []}
    # Process detections
    cnt = 0
    for i, det in enumerate(pred):  # detections per image
        gn = torch.tensor(input_image.shape)[[1, 0, 1, 0]]  # normalization gain whwh
        if det is not None and len(det):
            # Rescale boxes from img_size to im0 size
            det[:, :4] = scale_coords(img.shape[2:], det[:, :4], input_image.shape).round()
            for *xyxy, conf, cls in det:
                if conf < prob_thres:
                    continue
                cnt += 1
                xmin =xyxy[0]
                ymin =xyxy[1]
                xmax =xyxy[2]
                ymax =xyxy[3]
                w = xmax-xmin
                h = ymax-ymin
                fake_result["model_data"]['objects'].append({
                    "x":int(xmin),
                    "y":int(ymin),
                    "width":int(w),
                    "height":int(h),
                    "confidence":float(conf),
                    "name":"head"                   
                })
                # fake_result["algorithm_data"]["target_info"].append({
                #     "xmin":int(xyxy[0]),
                #     "ymin":int(xyxy[1]),
                #     "xmax":int(xyxy[2]),
                #     "ymax":int(xyxy[3]),
                #     "confidence":float(conf),
                #     # "name":names[int(cls)
                #     "name":"head"                  
                # }
                # )
    if cnt:
        fake_result ["algorithm_data"]["is_alert"] = True
        fake_result ["algorithm_data"]["target_count"] = cnt
    return json.dumps(fake_result, indent = 4)

if __name__ == '__main__':
    import glob
    
    jpg_list =glob.glob("/project/train/src_repo/datasets/918/images/*.jpg")
    jpg_list =glob.glob("/project/train/src_repo/data/images/*.jpg") 
    handle=init()
    start_time =time.time()
    for jpg in jpg_list:
        img=cv2.imread(jpg)
        a =process_image(handle=handle,input_image=img)
        print(a)
    end_time =time.time()
    s =end_time-start_time
    print("spend _time:",s)
2 ji_pt.py
复制代码

 

 

 

2 train.py

复制代码
weights:权重文件路径,如果是''则重头训练参数,如果不为空则做迁移学习,权重文件的模型需与cfg参数中的模型对应

cfg:存储模型结构的配置文件

data:训练、验证数据配置文件

hyp:超参数配置文件,其中的参数意义下面会解释

epochs:指的就是训练过程中整个数据集将被迭代多少次

batch-size:每次梯度更新的批量数,太大会导致显存不足

img-size:训练图片的尺寸

rect:进行矩形训练

resume:恢复最近保存的模型开始训练

nosave:仅保存最终checkpoint

notest:仅测试最后的epoch

noautoanchor:不进行anchors的k-means聚类

evolve:进化超参数

bucket:gsutil bucket

cache-images:缓存图像以加快训练速度

image-weights:给图片加上权重进行训练

device:cuda device, i.e. 0 or 0,1,2,3 or cpu

multi-scale:多尺度训练,img-size +/- 50%

single-cls:单类别的训练集

adam:使用adam优化器

name:重命名results.txt to results_name.txt
参数解释
复制代码
复制代码
def parse_opt(known=False):
    parser = argparse.ArgumentParser()
    parser.add_argument('--weights', type=str, default='', help='initial weights path')  #初始化网络参数 (yolov5s.py,youlov5m.py,yolov5l.py,yolov5x.py)
    parser.add_argument('--cfg', type=str, default='models/yolov5s.yaml', help='model.yaml path')  #采用模型结构
    parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path') #数据集的配置文件
    parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch.yaml', help='hyperparameters path')
    parser.add_argument('--epochs', type=int, default=300) #训练的轮数,默认300轮
    parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs, -1 for autobatch') #每次读取图片
    parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)')
    parser.add_argument('--rect', action='store_true', help='rectangular training') #矩形训练
    parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training') #指定从之前训练的模型 开始
    parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
    parser.add_argument('--noval', action='store_true', help='only validate final epoch')
    parser.add_argument('--noautoanchor', action='store_true', help='disable AutoAnchor') #默认开启
    parser.add_argument('--evolve', type=int, nargs='?', const=300, help='evolve hyperparameters for x generations') #超参数进化(超参数调优的方式)
    parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
    parser.add_argument('--cache', type=str, nargs='?', const='ram', help='--cache images in "ram" (default) or "disk"')
    parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training')
    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
    parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%') #图片尺寸的变换
    parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class')
    parser.add_argument('--optimizer', type=str, choices=['SGD', 'Adam', 'AdamW'], default='SGD', help='optimizer')
    parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode') #多GPU训练
    # parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)')
    parser.add_argument('--workers', type=int, default=4, help='max dataloader workers (per RANK in DDP mode)')
    parser.add_argument('--project', default=ROOT / 'runs/train', help='save to project/name')
    parser.add_argument('--name', default='exp', help='save to project/name')
    parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
    parser.add_argument('--quad', action='store_true', help='quad dataloader') #四数据加载器
    parser.add_argument('--linear-lr', action='store_true', help='linear LR') #线性回归 LR
    parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon') #标签平滑,防止过拟合
    parser.add_argument('--patience', type=int, default=100, help='EarlyStopping patience (epochs without improvement)')
    parser.add_argument('--freeze', nargs='+', type=int, default=[0], help='Freeze layers: backbone=10, first3=0 1 2')
    parser.add_argument('--save-period', type=int, default=-1, help='Save checkpoint every x epochs (disabled if < 1)')
    parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter, do not modify')

    # Weights & Biases arguments
    parser.add_argument('--entity', default=None, help='W&B: Entity')
    parser.add_argument('--upload_dataset', nargs='?', const=True, default=False, help='W&B: Upload data, "val" option')
    parser.add_argument('--bbox_interval', type=int, default=-1, help='W&B: Set bounding-box image logging interval')
    parser.add_argument('--artifact_alias', type=str, default='latest', help='W&B: Version of dataset artifact to use')

    opt = parser.parse_known_args()[0] if known else parser.parse_args()
    return opt
View Code
复制代码
-原始pic与label对应关系

dataset/images/im0.jpg # image
dataset/labels/im0.txt # label 

 

 

3 hpy超参数

复制代码
超参数配置文件./data/hyp.scratch.yaml参数解释:

lr0:学习率,可以理解为模型的学习速度

lrf:OneCycleLR学习率变化策略的最终学习率系数

momentum:动量,梯度下降法中一种常用的加速技术,加快收敛

weight_decay:权值衰减,防止过拟合。在损失函数中,weight decay是正则项(regularization)前的一个系数

warmup_epochs:预热学习轮数

warmup_momentum:预热学习初始动量

warmup_bias_lr:预热学习初始偏差学习率

giou:GIoU损失收益

cls:类别损失收益

cls_pw:类别交叉熵损失正类权重

obj:是否有物体损失收益

obj_pw:是否有物体交叉熵正类权重

iou_t:iou阈值

anchor_t:多尺度anchor阈值

fl_gamma:focal loss gamma系数

hsv_h:色调Hue,增强系数

hsv_s:饱和度Saturation,增强系数

hsv_v:明度Value,增强系数

degrees:图片旋转角度

translate:图片转换

scale:图片缩放

shear:图片仿射变换

perspec:透视变换

mosaic:mosaic数据增强

mixup:mixup数据增强
超参数解释
复制代码
复制代码
lr0: 0.01  # initial learning rate (SGD=1E-2, Adam=1E-3)
lrf: 0.1  # final OneCycleLR learning rate (lr0 * lrf)
momentum: 0.937  # SGD momentum/Adam beta1
weight_decay: 0.0005  # optimizer weight decay 5e-4 权重衰减
warmup_epochs: 3.0  # warmup epochs (fractions ok)
warmup_momentum: 0.8  # warmup initial momentum
warmup_bias_lr: 0.1  # warmup initial bias lr
box: 0.05  # box loss gain
cls: 0.5  # cls loss gain
cls_pw: 1.0  # cls BCELoss positive_weight
obj: 1.0  # obj loss gain (scale with pixels)
obj_pw: 1.0  # obj BCELoss positive_weight
iou_t: 0.20  # IoU training threshold
anchor_t: 4.0  # anchor-multiple threshold
# anchors: 3  # anchors per output layer (0 to ignore)
fl_gamma: 0.0  # focal loss gamma (efficientDet default gamma=1.5)
hsv_h: 0.015  # image HSV-Hue augmentation (fraction) 色调
hsv_s: 0.7  # image HSV-Saturation augmentation (fraction) 饱和度
hsv_v: 0.4  # image HSV-Value augmentation (fraction) 明度
degrees: 0.0  # image rotation (+/- deg)
translate: 0.1  # image translation (+/- fraction) 平移
scale: 0.5  # image scale (+/- gain)  缩放(扩大)比例
shear: 0.0  # image shear (+/- deg)  裁剪
perspective: 0.0  # image perspective (+/- fraction), range 0-0.001 透视变换参数
flipud: 0.0  # image flip up-down (probability) 上下翻转 
fliplr: 0.5  # image flip left-right (probability) 左右翻转
mosaic: 1.0  # image mosaic (probability) 马赛克
mixup: 0.0  # image mixup (probability) 图片融合
copy_paste: 0.0  # segment copy-paste (probability)
View Code
复制代码

 

 

知识补充:

1 --half 的理解 (FP32,FP16)

2 detect.py

model =model.eval() #不启用 Batch Normalization 和 Dropout

model.eval()是保证BN层能够用全部训练数据的均值和方差,即测试过程中要保证BN层的均值和方差不变。对于Dropout,model.eval()是利用到了所有网络连接,即不进行随机舍弃神经元
训练完train样本后,生成的模型model要用来测试样本。在model(test)之前,需要加上model.eval(),否则的话,有输入数据,即使不训练,它也会改变权值。这是model中含有BN层和Dropout所带来的的性质。
说明

dropout:

 

posted @   风hua  阅读(421)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· winform 绘制太阳,地球,月球 运作规律
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了
· AI与.NET技术实操系列(五):向量存储与相似性搜索在 .NET 中的实现
· 超详细:普通电脑也行Windows部署deepseek R1训练数据并当服务器共享给他人
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
点击右上角即可分享
微信分享提示