Yolov8-源码解析-二十四-

Yolov8 源码解析(二十四)

.\yolov8\examples\YOLOv8-CPP-Inference\inference.cpp

// 引入推断头文件 "inference.h"
#include "inference.h"

// 构造函数,接收 ONNX 模型路径、模型输入形状、类别文件路径和是否使用 CUDA
Inference::Inference(const std::string &onnxModelPath, const cv::Size &modelInputShape, const std::string &classesTxtFile, const bool &runWithCuda)
{
    // 将参数赋值给成员变量
    modelPath = onnxModelPath;
    modelShape = modelInputShape;
    classesPath = classesTxtFile;
    cudaEnabled = runWithCuda;

    // 载入 ONNX 网络模型
    loadOnnxNetwork();
    // loadClassesFromFile(); 此处的类别已经在代码中硬编码,不需要再从文件加载
}

// 运行推断的函数,接收输入图像并返回检测结果向量
std::vector<Detection> Inference::runInference(const cv::Mat &input)
{
    // 将输入图像复制给模型输入变量
    cv::Mat modelInput = input;
    // 如果设置了 letterBoxForSquare 并且模型形状是正方形,则将输入图像格式化为正方形
    if (letterBoxForSquare && modelShape.width == modelShape.height)
        modelInput = formatToSquare(modelInput);

    // 将输入图像转换为 blob 格式
    cv::Mat blob;
    cv::dnn::blobFromImage(modelInput, blob, 1.0/255.0, modelShape, cv::Scalar(), true, false);
    // 设置网络输入
    net.setInput(blob);

    // 前向传播得到网络输出
    std::vector<cv::Mat> outputs;
    net.forward(outputs, net.getUnconnectedOutLayersNames());

    // 获取输出张量的维度信息
    int rows = outputs[0].size[1];
    int dimensions = outputs[0].size[2];

    // 初始化是否为 YOLOv8 模型的标志
    bool yolov8 = false;
    // 如果维度信息表明是 YOLOv8 模型
    if (dimensions > rows) // 检查 shape[2] 是否大于 shape[1](YOLOv8)
    {
        yolov8 = true;
        // 更新 rows 和 dimensions 的值
        rows = outputs[0].size[2];
        dimensions = outputs[0].size[1];

        // 调整输出张量的形状以适应 YOLOv8 的要求
        outputs[0] = outputs[0].reshape(1, dimensions);
        cv::transpose(outputs[0], outputs[0]);
    }
    // 获取输出数据的指针
    float *data = (float *)outputs[0].data;

    // 计算图像缩放因子
    float x_factor = modelInput.cols / modelShape.width;
    float y_factor = modelInput.rows / modelShape.height;

    // 初始化类别、置信度和边界框向量
    std::vector<int> class_ids;
    std::vector<float> confidences;
    std::vector<cv::Rect> boxes;

    // 遍历每个输出行
    for (int i = 0; i < rows; ++i)
    {
        // 如果是 YOLOv8 模型
        if (yolov8)
        {
            // 提取类别分数数据的指针
            float *classes_scores = data + 4;
    
            // 创建一个 OpenCV Mat 对象来存储类别分数
            cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
            
            // 存储最大类别分数及其对应的类别 ID
            cv::Point class_id;
            double maxClassScore;
            minMaxLoc(scores, 0, &maxClassScore, 0, &class_id);
    
            // 如果最大类别分数高于模型分数阈值
            if (maxClassScore > modelScoreThreshold)
            {
                // 将最大类别分数和类别 ID 添加到结果中
                confidences.push_back(maxClassScore);
                class_ids.push_back(class_id.x);
    
                // 提取检测框的位置和尺寸信息
                float x = data[0];
                float y = data[1];
                float w = data[2];
                float h = data[3];
    
                // 计算检测框的左上角坐标及宽高
                int left = int((x - 0.5 * w) * x_factor);
                int top = int((y - 0.5 * h) * y_factor);
                int width = int(w * x_factor);
                int height = int(h * y_factor);
    
                // 将检测框信息添加到 boxes 向量中
                boxes.push_back(cv::Rect(left, top, width, height));
            }
        }
        else // 如果是 YOLOv5 模型
        {
            // 提取置信度数据
            float confidence = data[4];
    
            // 如果置信度高于模型置信度阈值
            if (confidence >= modelConfidenceThreshold)
            {
                // 提取类别分数数据的指针
                float *classes_scores = data + 5;
    
                // 创建一个 OpenCV Mat 对象来存储类别分数
                cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
    
                // 存储最大类别分数及其对应的类别 ID
                cv::Point class_id;
                double max_class_score;
                minMaxLoc(scores, 0, &max_class_score, 0, &class_id);
    
                // 如果最大类别分数高于模型分数阈值
                if (max_class_score > modelScoreThreshold)
                {
                    // 将最大置信度和类别 ID 添加到结果中
                    confidences.push_back(confidence);
                    class_ids.push_back(class_id.x);
    
                    // 提取检测框的位置和尺寸信息
                    float x = data[0];
                    float y = data[1];
                    float w = data[2];
                    float h = data[3];
    
                    // 计算检测框的左上角坐标及宽高
                    int left = int((x - 0.5 * w) * x_factor);
                    int top = int((y - 0.5 * h) * y_factor);
                    int width = int(w * x_factor);
                    int height = int(h * y_factor);
    
                    // 将检测框信息添加到 boxes 向量中
                    boxes.push_back(cv::Rect(left, top, width, height));
                }
            }
        }
    
        // 更新数据指针到下一个检测结果
        data += dimensions;
    }
    
    // 对检测到的所有边界框执行非最大抑制操作,得到最终的边界框索引
    std::vector<int> nms_result;
    cv::dnn::NMSBoxes(boxes, confidences, modelScoreThreshold, modelNMSThreshold, nms_result);
    
    // 创建一个空的 Detection 结构体向量,用于存储最终的检测结果
    std::vector<Detection> detections{};
    
    // 遍历经过非最大抑制后的边界框索引
    for (unsigned long i = 0; i < nms_result.size(); ++i)
    {
        // 获取当前边界框的索引
        int idx = nms_result[i];
    
        // 创建一个 Detection 对象来存储当前边界框的检测结果
        Detection result;
        result.class_id = class_ids[idx];
        result.confidence = confidences[idx];
    
        // 生成一个随机颜色作为当前类别的显示颜色
        std::random_device rd;
        std::mt19937 gen(rd());
        std::uniform_int_distribution<int> dis(100, 255);
        result.color = cv::Scalar(dis(gen), dis(gen), dis(gen));
    
        // 获取当前类别的名称
        result.className = classes[result.class_id];
    
        // 将边界框信息添加到 Detection 对象中
        result.box = boxes[idx];
    
        // 将当前检测结果添加到 detections 向量中
        detections.push_back(result);
    }
    
    // 返回最终的检测结果向量
    return detections;
}

void Inference::loadClassesFromFile()
{
    // 打开指定路径的文件流
    std::ifstream inputFile(classesPath);
    // 如果文件成功打开
    if (inputFile.is_open())
    {
        // 用于存储每行读取的类别名称的变量
        std::string classLine;
        // 循环读取每行数据,将其添加到类别列表中
        while (std::getline(inputFile, classLine))
            classes.push_back(classLine);
        // 关闭文件流
        inputFile.close();
    }
}

void Inference::loadOnnxNetwork()
{
    // 从 ONNX 模型路径读取神经网络
    net = cv::dnn::readNetFromONNX(modelPath);
    // 如果启用了 CUDA 加速
    if (cudaEnabled)
    {
        // 输出提示信息,表明正在使用 CUDA 运行
        std::cout << "\nRunning on CUDA" << std::endl;
        // 设置神经网络的后端为 CUDA
        net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
        // 设置目标设备为 CUDA
        net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
    }
    else
    {
        // 输出提示信息,表明正在使用 CPU 运行
        std::cout << "\nRunning on CPU" << std::endl;
        // 设置神经网络的后端为 OpenCV
        net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
        // 设置目标设备为 CPU
        net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
    }
}

cv::Mat Inference::formatToSquare(const cv::Mat &source)
{
    // 获取输入图像的列数和行数
    int col = source.cols;
    int row = source.rows;
    // 计算出输入图像中较大的维度
    int _max = MAX(col, row);
    // 创建一个指定大小的全零图像作为结果
    cv::Mat result = cv::Mat::zeros(_max, _max, CV_8UC3);
    // 将输入图像复制到结果图像的左上角区域
    source.copyTo(result(cv::Rect(0, 0, col, row)));
    // 返回格式化后的正方形图像
    return result;
}

.\yolov8\examples\YOLOv8-CPP-Inference\inference.h

#ifndef INFERENCE_H
#define INFERENCE_H

// Cpp native
#include <fstream>          // 文件流操作
#include <vector>           // 向量容器
#include <string>           // 字符串操作
#include <random>           // 随机数生成

// OpenCV / DNN / Inference
#include <opencv2/imgproc.hpp>   // 图像处理功能
#include <opencv2/opencv.hpp>    // OpenCV 核心功能
#include <opencv2/dnn.hpp>       // OpenCV 深度神经网络模块

// 结构体用于表示检测结果
struct Detection
{
    int class_id{0};            // 类别 ID
    std::string className{};    // 类别名称
    float confidence{0.0};      // 置信度
    cv::Scalar color{};         // 框的颜色
    cv::Rect box{};             // 框的位置信息
};

// 推理类声明
class Inference
{
public:
    // 构造函数,初始化模型路径、输入形状、类别文件路径及是否使用 CUDA
    Inference(const std::string &onnxModelPath, const cv::Size &modelInputShape = {640, 640}, const std::string &classesTxtFile = "", const bool &runWithCuda = true);
    
    // 执行推理的方法,输入图像并返回检测结果
    std::vector<Detection> runInference(const cv::Mat &input);

private:
    // 从文件加载类别信息
    void loadClassesFromFile();
    
    // 加载 ONNX 模型
    void loadOnnxNetwork();
    
    // 将图像格式化为正方形
    cv::Mat formatToSquare(const cv::Mat &source);

    std::string modelPath{};            // 模型文件路径
    std::string classesPath{};          // 类别文件路径
    bool cudaEnabled{};                 // 是否启用 CUDA 加速

    std::vector<std::string> classes{   // 默认类别列表
        "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
        "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
        "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
        "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
        "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
        "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
        "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
        "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
        "hair drier", "toothbrush"
    };

    cv::Size2f modelShape{};            // 模型输入形状

    float modelConfidenceThreshold {0.25};   // 置信度阈值
    float modelScoreThreshold      {0.45};   // 分数阈值
    float modelNMSThreshold        {0.50};   // 非最大抑制阈值

    bool letterBoxForSquare = true;     // 是否使用 letterbox 方法来调整图像为正方形

    cv::dnn::Net net;                   // OpenCV DNN 网络对象
};

#endif // INFERENCE_H

.\yolov8\examples\YOLOv8-CPP-Inference\main.cpp

// 引入必要的头文件
#include <iostream>
#include <vector>
#include <getopt.h>

// 引入 OpenCV 库
#include <opencv2/opencv.hpp>

// 引入自定义的推理类头文件
#include "inference.h"

// 使用 std 命名空间,方便使用标准库函数
using namespace std;
using namespace cv;

// 主函数入口
int main(int argc, char **argv)
{
    // 设置 Ultralytics 项目的基础路径
    std::string projectBasePath = "/home/user/ultralytics"; // Set your ultralytics base path

    // 是否在 GPU 上运行推理
    bool runOnGPU = true;

    //
    // 选择要使用的 ONNX 模型文件:
    //
    // "yolov8s.onnx" 或 "yolov5s.onnx"
    //
    // 用于运行 yolov8/yolov5 的推理
    //

    // 注意:在此示例中类别信息是硬编码的,'classes.txt' 是一个占位符。
    // 创建推理对象,加载指定的 ONNX 模型文件、设置输入图像尺寸和类别文件名
    Inference inf(projectBasePath + "/yolov8s.onnx", cv::Size(640, 480), "classes.txt", runOnGPU);

    // 设置要处理的图像文件名列表
    std::vector<std::string> imageNames;
    imageNames.push_back(projectBasePath + "/ultralytics/assets/bus.jpg");
    imageNames.push_back(projectBasePath + "/ultralytics/assets/zidane.jpg");

    // 遍历图像列表
    for (int i = 0; i < imageNames.size(); ++i)
    {
        // 读取图像文件
        cv::Mat frame = cv::imread(imageNames[i]);

        // 推理开始...
        // 执行推理,获取检测结果
        std::vector<Detection> output = inf.runInference(frame);

        // 统计检测到的物体数量
        int detections = output.size();
        std::cout << "Number of detections:" << detections << std::endl;

        // 遍历每个检测结果
        for (int i = 0; i < detections; ++i)
        {
            Detection detection = output[i];

            // 获取检测框和颜色
            cv::Rect box = detection.box;
            cv::Scalar color = detection.color;

            // 在图像上绘制检测框
            cv::rectangle(frame, box, color, 2);

            // 绘制检测框上的文本
            std::string classString = detection.className + ' ' + std::to_string(detection.confidence).substr(0, 4);
            cv::Size textSize = cv::getTextSize(classString, cv::FONT_HERSHEY_DUPLEX, 1, 2, 0);
            cv::Rect textBox(box.x, box.y - 40, textSize.width + 10, textSize.height + 20);

            cv::rectangle(frame, textBox, color, cv::FILLED);
            cv::putText(frame, classString, cv::Point(box.x + 5, box.y - 10), cv::FONT_HERSHEY_DUPLEX, 1, cv::Scalar(0, 0, 0), 2, 0);
        }
        // 推理结束...

        // 仅用于预览目的,缩放图像并显示
        float scale = 0.8;
        cv::resize(frame, frame, cv::Size(frame.cols*scale, frame.rows*scale));
        cv::imshow("Inference", frame);

        cv::waitKey(-1); // 等待用户按键退出
    }
}

YOLOv8/YOLOv5 Inference C++

This example demonstrates how to perform inference using YOLOv8 and YOLOv5 models in C++ with OpenCV's DNN API.

Usage

git clone ultralytics
cd ultralytics
pip install .
cd examples/YOLOv8-CPP-Inference

# Add a **yolov8\_.onnx** and/or **yolov5\_.onnx** model(s) to the ultralytics folder.
# Edit the **main.cpp** to change the **projectBasePath** to match your user.

# Note that by default the CMake file will try to import the CUDA library to be used with the OpenCVs dnn (cuDNN) GPU Inference.
# If your OpenCV build does not use CUDA/cuDNN you can remove that import call and run the example on CPU.

mkdir build
cd build
cmake ..
make
./Yolov8CPPInference

Exporting YOLOv8 and YOLOv5 Models

To export YOLOv8 models:

yolo export model=yolov8s.pt imgsz=480,640 format=onnx opset=12

To export YOLOv5 models:

python3 export.py --weights yolov5s.pt --img 480 640 --include onnx --opset 12

yolov8s.onnx:

image

yolov5s.onnx:

image

This repository utilizes OpenCV's DNN API to run ONNX exported models of YOLOv5 and YOLOv8. In theory, it should work for YOLOv6 and YOLOv7 as well, but they have not been tested. Note that the example networks are exported with rectangular (640x480) resolutions, but any exported resolution will work. You may want to use the letterbox approach for square images, depending on your use case.

The main branch version uses Qt as a GUI wrapper. The primary focus here is the Inference class file, which demonstrates how to transpose YOLOv8 models to work as YOLOv5 models.

# YOLOv8 LibTorch Inference C++

This example demonstrates how to perform inference using YOLOv8 models in C++ with LibTorch API.

Dependencies

Dependency Version
OpenCV >=4.0.0
C++ Standard >=17
Cmake >=3.18
Libtorch >=1.12.1

Usage

git clone ultralytics
cd ultralytics
pip install .
cd examples/YOLOv8-LibTorch-CPP-Inference

mkdir build
cd build
cmake ..
make
./yolov8_libtorch_inference

Exporting YOLOv8

To export YOLOv8 models:

yolo export model=yolov8s.pt imgsz=640 format=torchscript

.\yolov8\examples\YOLOv8-ONNXRuntime\main.py

# 导入需要的库和模块
import argparse  # 用于解析命令行参数

import cv2  # OpenCV库,用于图像处理
import numpy as np  # NumPy库,用于数值计算
import onnxruntime as ort  # ONNX Runtime,用于加载和运行ONNX模型
import torch  # PyTorch库,用于深度学习模型

# 导入自定义的工具函数和模块
from ultralytics.utils import ASSETS, yaml_load
from ultralytics.utils.checks import check_requirements, check_yaml


class YOLOv8:
    """YOLOv8对象检测模型类,处理推理和可视化。"""

    def __init__(self, onnx_model, input_image, confidence_thres, iou_thres):
        """
        初始化YOLOv8类的实例。

        Args:
            onnx_model: ONNX模型的路径。
            input_image: 输入图像的路径。
            confidence_thres: 过滤检测的置信度阈值。
            iou_thres: 非最大抑制的IoU(交并比)阈值。
        """
        self.onnx_model = onnx_model  # 设置ONNX模型的路径
        self.input_image = input_image  # 设置输入图像的路径
        self.confidence_thres = confidence_thres  # 设置置信度阈值
        self.iou_thres = iou_thres  # 设置IoU阈值

        # 从COCO数据集的yaml文件中加载类别名称
        self.classes = yaml_load(check_yaml("coco8.yaml"))["names"]

        # 为每个类别生成颜色调色板
        self.color_palette = np.random.uniform(0, 255, size=(len(self.classes), 3))

    def draw_detections(self, img, box, score, class_id):
        """
        根据检测到的对象在输入图像上绘制边界框和标签。

        Args:
            img: 要绘制检测结果的输入图像。
            box: 检测到的边界框。
            score: 对应的检测置信度。
            class_id: 检测对象的类别ID。

        Returns:
            None
        """

        # 提取边界框的坐标
        x1, y1, w, h = box

        # 获取该类别ID对应的颜色
        color = self.color_palette[class_id]

        # 在图像上绘制边界框
        cv2.rectangle(img, (int(x1), int(y1)), (int(x1 + w), int(y1 + h)), color, 2)

        # 创建包含类别名称和置信度的标签文本
        label = f"{self.classes[class_id]}: {score:.2f}"

        # 计算标签文本的尺寸
        (label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)

        # 计算标签文本的位置
        label_x = x1
        label_y = y1 - 10 if y1 - 10 > label_height else y1 + 10

        # 在标签文本背景上绘制填充的矩形
        cv2.rectangle(
            img, (label_x, label_y - label_height), (label_x + label_width, label_y + label_height), color, cv2.FILLED
        )

        # 在图像上绘制标签文本
        cv2.putText(img, label, (label_x, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
    # 预处理输入图像以便进行推断

    # 使用 OpenCV 读取输入图像
    self.img = cv2.imread(self.input_image)

    # 获取输入图像的高度和宽度
    self.img_height, self.img_width = self.img.shape[:2]

    # 将图像的颜色空间从BGR转换为RGB
    img = cv2.cvtColor(self.img, cv2.COLOR_BGR2RGB)

    # 调整图像大小以匹配输入形状
    img = cv2.resize(img, (self.input_width, self.input_height))

    # 将图像数据标准化,通过除以255.0将像素值缩放到[0, 1]范围内
    image_data = np.array(img) / 255.0

    # 转置图像数组,使通道维度成为第一维度
    image_data = np.transpose(image_data, (2, 0, 1))  # 通道维度优先

    # 扩展图像数据的维度以匹配预期的输入形状
    image_data = np.expand_dims(image_data, axis=0).astype(np.float32)

    # 返回预处理后的图像数据
    return image_data
    # 对模型输出进行转置和挤压,以匹配预期的形状
    outputs = np.transpose(np.squeeze(output[0]))

    # 获取输出数组的行数
    rows = outputs.shape[0]

    # 存储检测结果的列表:边界框、置信度分数和类别 ID
    boxes = []
    scores = []
    class_ids = []

    # 计算边界框坐标的缩放因子
    x_factor = self.img_width / self.input_width
    y_factor = self.img_height / self.input_height

    # 遍历输出数组中的每一行
    for i in range(rows):
        # 从当前行提取类别分数
        classes_scores = outputs[i][4:]

        # 找出类别分数中的最大值
        max_score = np.amax(classes_scores)

        # 如果最大分数超过置信度阈值
        if max_score >= self.confidence_thres:
            # 获取具有最高分数的类别 ID
            class_id = np.argmax(classes_scores)

            # 从当前行提取边界框坐标
            x, y, w, h = outputs[i][0], outputs[i][1], outputs[i][2], outputs[i][3]

            # 计算边界框的缩放后坐标
            left = int((x - w / 2) * x_factor)
            top = int((y - h / 2) * y_factor)
            width = int(w * x_factor)
            height = int(h * y_factor)

            # 将类别 ID、分数和边界框坐标添加到相应的列表中
            class_ids.append(class_id)
            scores.append(max_score)
            boxes.append([left, top, width, height])

    # 应用非极大值抑制以过滤重叠的边界框
    indices = cv2.dnn.NMSBoxes(boxes, scores, self.confidence_thres, self.iou_thres)

    # 遍历非极大值抑制后选择的索引
    for i in indices:
        # 获取与索引对应的边界框、分数和类别 ID
        box = boxes[i]
        score = scores[i]
        class_id = class_ids[i]

        # 在输入图像上绘制检测结果
        self.draw_detections(input_image, box, score, class_id)

    # 返回修改后的输入图像
    return input_image
    def main(self):
        """
        Performs inference using an ONNX model and returns the output image with drawn detections.

        Returns:
            output_img: The output image with drawn detections.
        """
        # 创建一个 ONNX 模型推理会话,并指定执行提供者为 CUDAExecutionProvider 和 CPUExecutionProvider
        session = ort.InferenceSession(self.onnx_model, providers=["CUDAExecutionProvider", "CPUExecutionProvider"])

        # 获取模型的输入信息
        model_inputs = session.get_inputs()

        # 存储输入的形状以便后续使用
        input_shape = model_inputs[0].shape
        self.input_width = input_shape[2]
        self.input_height = input_shape[3]

        # 对图像数据进行预处理
        img_data = self.preprocess()

        # 使用预处理后的图像数据进行推理
        outputs = session.run(None, {model_inputs[0].name: img_data})

        # 对输出进行后处理以获取输出图像
        return self.postprocess(self.img, outputs)  # 返回输出图像
if __name__ == "__main__":
    # 创建一个参数解析器来处理命令行参数
    parser = argparse.ArgumentParser()
    # 添加命令行参数:模型文件的路径,默认为'yolov8n.onnx'
    parser.add_argument("--model", type=str, default="yolov8n.onnx", help="Input your ONNX model.")
    # 添加命令行参数:输入图片的路径,默认为'ASSETS/bus.jpg'
    parser.add_argument("--img", type=str, default=str(ASSETS / "bus.jpg"), help="Path to input image.")
    # 添加命令行参数:置信度阈值,默认为0.5
    parser.add_argument("--conf-thres", type=float, default=0.5, help="Confidence threshold")
    # 添加命令行参数:NMS IoU(非极大值抑制的IoU阈值),默认为0.5
    parser.add_argument("--iou-thres", type=float, default=0.5, help="NMS IoU threshold")
    # 解析命令行参数并存储到args对象中
    args = parser.parse_args()

    # 检查系统环境要求,并选择适当的后端(CPU或GPU)
    check_requirements("onnxruntime-gpu" if torch.cuda.is_available() else "onnxruntime")

    # 使用指定的参数创建YOLOv8类的实例
    detection = YOLOv8(args.model, args.img, args.conf_thres, args.iou_thres)

    # 执行目标检测并获取输出图像
    output_image = detection.main()

    # 在一个窗口中显示输出图像
    cv2.namedWindow("Output", cv2.WINDOW_NORMAL)
    cv2.imshow("Output", output_image)

    # 等待按键输入以退出
    cv2.waitKey(0)

YOLOv8 - ONNX Runtime

This project implements YOLOv8 using ONNX Runtime.

Installation

To run this project, you need to install the required dependencies. The following instructions will guide you through the installation process.

Installing Required Dependencies

You can install the required dependencies by running the following command:

pip install -r requirements.txt

Installing onnxruntime-gpu

If you have an NVIDIA GPU and want to leverage GPU acceleration, you can install the onnxruntime-gpu package using the following command:

pip install onnxruntime-gpu

Note: Make sure you have the appropriate GPU drivers installed on your system.

Installing onnxruntime (CPU version)

If you don't have an NVIDIA GPU or prefer to use the CPU version of onnxruntime, you can install the onnxruntime package using the following command:

pip install onnxruntime

Usage

After successfully installing the required packages, you can run the YOLOv8 implementation using the following command:

python main.py --model yolov8n.onnx --img image.jpg --conf-thres 0.5 --iou-thres 0.5

Make sure to replace yolov8n.onnx with the path to your YOLOv8 ONNX model file, image.jpg with the path to your input image, and adjust the confidence threshold (conf-thres) and IoU threshold (iou-thres) values as needed.

.\yolov8\examples\YOLOv8-ONNXRuntime-CPP\inference.cpp

#include "inference.h"
#include <regex>

// 定义预处理时的性能基准
#define benchmark
// 定义比较两个数中较小的宏
#define min(a,b)            (((a) < (b)) ? (a) : (b))

// YOLO_V8 类的默认构造函数
YOLO_V8::YOLO_V8() {

}

// YOLO_V8 类的析构函数,释放 session 资源
YOLO_V8::~YOLO_V8() {
    delete session;
}

#ifdef USE_CUDA
namespace Ort
{
    // 将 half 类型映射为 ONNX 中的 FLOAT16 数据类型
    template<>
    struct TypeToTensorType<half> { static constexpr ONNXTensorElementDataType type = ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16; };
}
#endif

// 从图像生成 Blob 数据,用于模型输入
template<typename T>
char* BlobFromImage(cv::Mat& iImg, T& iBlob) {
    int channels = iImg.channels();
    int imgHeight = iImg.rows;
    int imgWidth = iImg.cols;

    // 遍历图像像素并生成 Blob 数据
    for (int c = 0; c < channels; c++)
    {
        for (int h = 0; h < imgHeight; h++)
        {
            for (int w = 0; w < imgWidth; w++)
            {
                // 归一化像素值到 [0, 1] 并存入 Blob 中
                iBlob[c * imgWidth * imgHeight + h * imgWidth + w] = typename std::remove_pointer<T>::type(
                    (iImg.at<cv::Vec3b>(h, w)[c]) / 255.0f);
            }
        }
    }
    return RET_OK;  // 返回处理成功标志
}

// YOLO_V8 类的图像预处理函数
char* YOLO_V8::PreProcess(cv::Mat& iImg, std::vector<int> iImgSize, cv::Mat& oImg)
{
    if (iImg.channels() == 3)
    {
        // 如果图像是三通道的,进行颜色空间转换为 RGB
        oImg = iImg.clone();
        cv::cvtColor(oImg, oImg, cv::COLOR_BGR2RGB);
    }
    else
    {
        // 如果图像不是三通道的,先转换为三通道的灰度图,再转换为 RGB
        cv::cvtColor(iImg, oImg, cv::COLOR_GRAY2RGB);
    }

    // 根据模型类型选择不同的预处理方式
    switch (modelType)
    {
    case YOLO_DETECT_V8:
    case YOLO_POSE:
    case YOLO_DETECT_V8_HALF:
    case YOLO_POSE_V8_HALF: // LetterBox 模式
    {
        if (iImg.cols >= iImg.rows)
        {
            // 根据宽度缩放比例调整图像大小,并保持宽度不变,高度按比例缩放
            resizeScales = iImg.cols / (float)iImgSize.at(0);
            cv::resize(oImg, oImg, cv::Size(iImgSize.at(0), int(iImg.rows / resizeScales)));
        }
        else
        {
            // 根据高度缩放比例调整图像大小,并保持高度不变,宽度按比例缩放
            resizeScales = iImg.rows / (float)iImgSize.at(0);
            cv::resize(oImg, oImg, cv::Size(int(iImg.cols / resizeScales), iImgSize.at(1)));
        }
        // 创建一个与目标图像大小相同的临时图像,并将处理后的图像复制到其中
        cv::Mat tempImg = cv::Mat::zeros(iImgSize.at(0), iImgSize.at(1), CV_8UC3);
        oImg.copyTo(tempImg(cv::Rect(0, 0, oImg.cols, oImg.rows)));
        oImg = tempImg; // 将处理后的图像赋给输出图像
        break;
    }
    case YOLO_CLS: // CenterCrop 模式
    {
        int h = iImg.rows;
        int w = iImg.cols;
        int m = min(h, w);
        int top = (h - m) / 2;
        int left = (w - m) / 2;
        // 中心裁剪图像,并调整大小到指定尺寸
        cv::resize(oImg(cv::Rect(left, top, m, m)), oImg, cv::Size(iImgSize.at(0), iImgSize.at(1)));
        break;
    }
    }
    return RET_OK; // 返回处理成功标志
}

// YOLO_V8 类的创建会话函数,用于初始化模型会话
char* YOLO_V8::CreateSession(DL_INIT_PARAM& iParams) {
    char* Ret = RET_OK;
    // 检查模型路径中是否包含中文字符
    std::regex pattern("[\u4e00-\u9fa5]");
    bool result = std::regex_search(iParams.modelPath, pattern);
    if (result)
    {
        // 如果模型路径包含中文字符,返回错误信息
        Ret = "[YOLO_V8]:Your model path is error.Change your model path without chinese characters.";
        std::cout << Ret << std::endl;
        return Ret;
    }
    try
    {
        // 从输入参数 iParams 中获取 rectConfidenceThreshold 的值
        rectConfidenceThreshold = iParams.rectConfidenceThreshold;
        // 从输入参数 iParams 中获取 iouThreshold 的值
        iouThreshold = iParams.iouThreshold;
        // 从输入参数 iParams 中获取 imgSize 的值
        imgSize = iParams.imgSize;
        // 从输入参数 iParams 中获取 modelType 的值
        modelType = iParams.modelType;
        // 创建 Ort 环境对象,设置日志级别为警告级,名称为 "Yolo"
        env = Ort::Env(ORT_LOGGING_LEVEL_WARNING, "Yolo");
        // 创建 Ort 会话选项对象
        Ort::SessionOptions sessionOption;
        // 如果 iParams 中启用了 CUDA 加速
        if (iParams.cudaEnable)
        {
            // 将 cudaEnable 标志设为 true
            cudaEnable = iParams.cudaEnable;
            // 创建 OrtCUDAProviderOptions 对象,指定设备 ID 为 0
            OrtCUDAProviderOptions cudaOption;
            cudaOption.device_id = 0;
            // 将 CUDA 加速选项附加到会话选项中
            sessionOption.AppendExecutionProvider_CUDA(cudaOption);
        }
        // 设置图优化级别为启用全部优化
        sessionOption.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);
        // 设置会话内操作的线程数目
        sessionOption.SetIntraOpNumThreads(iParams.intraOpNumThreads);
        // 设置日志严重级别
        sessionOption.SetLogSeverityLevel(iParams.logSeverityLevel);
#ifdef _WIN32
        // 计算将 UTF-8 编码转换为宽字符所需的缓冲区大小
        int ModelPathSize = MultiByteToWideChar(CP_UTF8, 0, iParams.modelPath.c_str(), static_cast<int>(iParams.modelPath.length()), nullptr, 0);
        // 分配用于存储宽字符路径的内存
        wchar_t* wide_cstr = new wchar_t[ModelPathSize + 1];
        // 将 UTF-8 编码转换为宽字符
        MultiByteToWideChar(CP_UTF8, 0, iParams.modelPath.c_str(), static_cast<int>(iParams.modelPath.length()), wide_cstr, ModelPathSize);
        // 确保宽字符路径以 null 结尾
        wide_cstr[ModelPathSize] = L'\0';
        // 将宽字符路径赋值给 modelPath
        const wchar_t* modelPath = wide_cstr;
#else
        // 在非 Windows 平台,直接使用 UTF-8 编码的 modelPath
        const char* modelPath = iParams.modelPath.c_str();
#endif // _WIN32

        // 创建 Ort::Session 对象,加载模型
        session = new Ort::Session(env, modelPath, sessionOption);
        Ort::AllocatorWithDefaultOptions allocator;
        // 获取模型输入节点的数量
        size_t inputNodesNum = session->GetInputCount();
        // 遍历输入节点,获取每个输入节点的名称,并存储为 C 字符串
        for (size_t i = 0; i < inputNodesNum; i++)
        {
            Ort::AllocatedStringPtr input_node_name = session->GetInputNameAllocated(i, allocator);
            char* temp_buf = new char[50];
            std::strcpy(temp_buf, input_node_name.get());
            inputNodeNames.push_back(temp_buf);
        }
        // 获取模型输出节点的数量
        size_t OutputNodesNum = session->GetOutputCount();
        // 遍历输出节点,获取每个输出节点的名称,并存储为 C 字符串
        for (size_t i = 0; i < OutputNodesNum; i++)
        {
            Ort::AllocatedStringPtr output_node_name = session->GetOutputNameAllocated(i, allocator);
            char* temp_buf = new char[10];
            std::strcpy(temp_buf, output_node_name.get());
            outputNodeNames.push_back(temp_buf);
        }
        // 初始化 Ort::RunOptions 对象为 nullptr
        options = Ort::RunOptions{ nullptr };
        // 预热模型会话
        WarmUpSession();
        // 返回成功标志
        return RET_OK;
    }
    catch (const std::exception& e)
    {
        // 捕获异常并组合错误消息
        const char* str1 = "[YOLO_V8]:";
        const char* str2 = e.what();
        std::string result = std::string(str1) + std::string(str2);
        // 分配内存并复制组合后的错误消息
        char* merged = new char[result.length() + 1];
        std::strcpy(merged, result.c_str());
        // 打印错误消息
        std::cout << merged << std::endl;
        // 释放内存
        delete[] merged;
        // 返回错误消息
        return "[YOLO_V8]:Create session failed.";
    }

}


char* YOLO_V8::RunSession(cv::Mat& iImg, std::vector<DL_RESULT>& oResult) {
#ifdef benchmark
    clock_t starttime_1 = clock();
#endif // benchmark

    // 初始化返回值为成功标志
    char* Ret = RET_OK;
    cv::Mat processedImg;
    // 对输入图像进行预处理
    PreProcess(iImg, imgSize, processedImg);
    // 根据模型类型执行不同的处理逻辑
    if (modelType < 4)
    {
        // 分配 float 类型的 blob 存储图像数据
        float* blob = new float[processedImg.total() * 3];
        // 将图像转换为 blob
        BlobFromImage(processedImg, blob);
        // 定义输入节点的维度
        std::vector<int64_t> inputNodeDims = { 1, 3, imgSize.at(0), imgSize.at(1) };
        // 处理张量数据
        TensorProcess(starttime_1, iImg, blob, inputNodeDims, oResult);
    }
    else
    {
#ifdef USE_CUDA
        // 在使用 CUDA 的情况下,分配 half 类型的 blob 存储图像数据
        half* blob = new half[processedImg.total() * 3];
        // 将图像转换为 blob
        BlobFromImage(processedImg, blob);
        // 定义输入节点的维度
        std::vector<int64_t> inputNodeDims = { 1, 3, imgSize.at(0), imgSize.at(1) };
        // 处理张量数据
        TensorProcess(starttime_1, iImg, blob, inputNodeDims, oResult);
#endif
    }

    // 返回执行结果
    return Ret;
}


template<typename N>
char* YOLO_V8::TensorProcess(clock_t& starttime_1, cv::Mat& iImg, N& blob, std::vector<int64_t>& inputNodeDims,
    std::vector<DL_RESULT>& oResult) {
    # 使用 Ort::Value 类创建一个输入张量 inputTensor,其类型基于模板参数 N 的移除指针后的类型
    # Ort::Value::CreateTensor 是创建张量的方法
    # Ort::MemoryInfo::CreateCpu 用于指定内存信息,创建一个在 CPU 上分配内存的内存信息对象
    # OrtDeviceAllocator 是分配器对象,用于在指定设备上分配内存
    # OrtMemTypeCPU 表示内存类型为 CPU 内存
    # blob 是包含数据的指针或数组,用于初始化张量
    # 3 * imgSize.at(0) * imgSize.at(1) 是张量的总元素数量
    # inputNodeDims.data() 提供了张量的维度数据的指针,描述张量的形状
    # inputNodeDims.size() 是张量的维度数量
    Ort::Value inputTensor = Ort::Value::CreateTensor<typename std::remove_pointer<N>::type>(
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), blob, 3 * imgSize.at(0) * imgSize.at(1),
        inputNodeDims.data(), inputNodeDims.size());
#ifdef benchmark
    // 记录第二次时间戳,用于性能基准
    clock_t starttime_2 = clock();
#endif // benchmark
    // 运行推断会话,获取输出张量
    auto outputTensor = session->Run(options, inputNodeNames.data(), &inputTensor, 1, outputNodeNames.data(),
        outputNodeNames.size());
#ifdef benchmark
    // 记录第三次时间戳,用于性能基准
    clock_t starttime_3 = clock();
#endif // benchmark

    // 获取输出张量的类型信息
    Ort::TypeInfo typeInfo = outputTensor.front().GetTypeInfo();
    auto tensor_info = typeInfo.GetTensorTypeAndShapeInfo();
    // 获取输出节点的维度信息
    std::vector<int64_t> outputNodeDims = tensor_info.GetShape();
    // 获取输出张量的可变数据指针
    auto output = outputTensor.front().GetTensorMutableData<typename std::remove_pointer<N>::type>();
    // 删除之前的 blob 内存
    delete[] blob;
    // 根据模型类型执行不同的处理
    switch (modelType)
    {
    case YOLO_DETECT_V8:
    case YOLO_DETECT_V8_HALF:
    {
        // 获取第二维和第三维的大小,用于后续处理
        int strideNum = outputNodeDims[1];//8400
        int signalResultNum = outputNodeDims[2];//84
        // 初始化用于存储检测结果的向量和矩阵
        std::vector<int> class_ids;
        std::vector<float> confidences;
        std::vector<cv::Rect> boxes;
        cv::Mat rawData;
        if (modelType == YOLO_DETECT_V8)
        {
            // 如果模型类型是 YOLO_DETECT_V8,使用 FP32 类型创建原始数据矩阵
            rawData = cv::Mat(strideNum, signalResultNum, CV_32F, output);
        }
        else
        {
            // 如果模型类型是 YOLO_DETECT_V8_HALF,使用 FP16 类型创建原始数据矩阵,并转换为 FP32 类型
            rawData = cv::Mat(strideNum, signalResultNum, CV_16F, output);
            rawData.convertTo(rawData, CV_32F);
        }
        //Note:
        //ultralytics 添加转置操作以调整 yolov8 模型的输出,使其与 yolov8/v5/v7 的形状相同
        //https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n.pt
        //rowData = rowData.t();

        // 获取原始数据矩阵的数据指针
        float* data = (float*)rawData.data;

        // 遍历原始数据矩阵的每一行
        for (int i = 0; i < strideNum; ++i)
        {
            float* classesScores = data + 4;
            // 从原始数据中提取分数,并转换为 OpenCV 的矩阵格式
            cv::Mat scores(1, this->classes.size(), CV_32FC1, classesScores);
            cv::Point class_id;
            double maxClassScore;
            // 计算分数矩阵中的最大值和其索引
            cv::minMaxLoc(scores, 0, &maxClassScore, 0, &class_id);
            // 如果最大分数超过矩形置信度阈值,则记录相关信息
            if (maxClassScore > rectConfidenceThreshold)
            {
                confidences.push_back(maxClassScore);
                class_ids.push_back(class_id.x);
                float x = data[0];
                float y = data[1];
                float w = data[2];
                float h = data[3];

                // 计算边界框的左上角坐标及其宽度和高度
                int left = int((x - 0.5 * w) * resizeScales);
                int top = int((y - 0.5 * h) * resizeScales);
                int width = int(w * resizeScales);
                int height = int(h * resizeScales);

                // 将计算得到的边界框添加到 boxes 向量中
                boxes.push_back(cv::Rect(left, top, width, height));
            }
            // 移动到下一行的数据
            data += signalResultNum;
        }
        // 进行非极大值抑制(NMS)处理,剔除重叠的边界框
        std::vector<int> nmsResult;
        cv::dnn::NMSBoxes(boxes, confidences, rectConfidenceThreshold, iouThreshold, nmsResult);
        // 将符合条件的检测结果转换为自定义的结果结构,并添加到最终结果向量中
        for (int i = 0; i < nmsResult.size(); ++i)
        {
            int idx = nmsResult[i];
            DL_RESULT result;
            result.classId = class_ids[idx];
            result.confidence = confidences[idx];
            result.box = boxes[idx];
            oResult.push_back(result);
        }
#ifdef benchmark
        // 如果定义了 benchmark 宏,则进行性能统计
        clock_t starttime_4 = clock();
        // 计算预处理时间
        double pre_process_time = (double)(starttime_2 - starttime_1) / CLOCKS_PER_SEC * 1000;
        // 计算推理时间
        double process_time = (double)(starttime_3 - starttime_2) / CLOCKS_PER_SEC * 1000;
        // 计算后处理时间
        double post_process_time = (double)(starttime_4 - starttime_3) / CLOCKS_PER_SEC * 1000;
        // 根据是否启用 CUDA 输出不同的性能统计信息
        if (cudaEnable)
        {
            std::cout << "[YOLO_V8(CUDA)]: " << pre_process_time << "ms pre-process, " << process_time << "ms inference, " << post_process_time << "ms post-process." << std::endl;
        }
        else
        {
            std::cout << "[YOLO_V8(CPU)]: " << pre_process_time << "ms pre-process, " << process_time << "ms inference, " << post_process_time << "ms post-process." << std::endl;
        }
#endif // benchmark

        break;
    }
    case YOLO_CLS:
    case YOLO_CLS_HALF:
    {
        cv::Mat rawData;
        if (modelType == YOLO_CLS) {
            // 使用单通道 CV_32F 类型的数据填充 rawData
            rawData = cv::Mat(1, this->classes.size(), CV_32F, output);
        } else {
            // 使用单通道 CV_16F 类型的数据填充 rawData,并将其转换为 CV_32F 类型
            rawData = cv::Mat(1, this->classes.size(), CV_16F, output);
            rawData.convertTo(rawData, CV_32F);
        }
        float *data = (float *) rawData.data;

        DL_RESULT result;
        for (int i = 0; i < this->classes.size(); i++)
        {
            // 设置结果中的类别 ID 和置信度
            result.classId = i;
            result.confidence = data[i];
            // 将结果添加到输出结果列表中
            oResult.push_back(result);
        }
        break;
    }
    default:
        // 如果模型类型不支持,则输出错误信息
        std::cout << "[YOLO_V8]: " << "Not support model type." << std::endl;
    }
    // 返回函数执行结果 OK
    return RET_OK;

}


char* YOLO_V8::WarmUpSession() {
    // 记录开始时间点
    clock_t starttime_1 = clock();
    // 创建指定大小的空白图像 iImg
    cv::Mat iImg = cv::Mat(cv::Size(imgSize.at(0), imgSize.at(1)), CV_8UC3);
    cv::Mat processedImg;
    // 对输入图像进行预处理
    PreProcess(iImg, imgSize, processedImg);
    if (modelType < 4)
    {
        // 如果模型类型小于 4,即使用 FP32 或 FP16 进行推理
        float* blob = new float[iImg.total() * 3];
        // 从处理后的图像生成 blob
        BlobFromImage(processedImg, blob);
        // 设置输入节点的维度信息
        std::vector<int64_t> YOLO_input_node_dims = { 1, 3, imgSize.at(0), imgSize.at(1) };
        // 创建输入张量
        Ort::Value input_tensor = Ort::Value::CreateTensor<float>(
            Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), blob, 3 * imgSize.at(0) * imgSize.at(1),
            YOLO_input_node_dims.data(), YOLO_input_node_dims.size());
        // 运行推理会话并获取输出张量
        auto output_tensors = session->Run(options, inputNodeNames.data(), &input_tensor, 1, outputNodeNames.data(),
            outputNodeNames.size());
        delete[] blob;
        // 计算预处理到后处理的时间
        clock_t starttime_4 = clock();
        double post_process_time = (double)(starttime_4 - starttime_1) / CLOCKS_PER_SEC * 1000;
        // 如果启用了 CUDA,则输出 CUDA 加速的性能信息
        if (cudaEnable)
        {
            std::cout << "[YOLO_V8(CUDA)]: " << "Cuda warm-up cost " << post_process_time << " ms. " << std::endl;
        }
    }
    else
    {
#ifdef USE_CUDA
        // 分配存储空间用于存储处理后图像的半精度数据
        half* blob = new half[iImg.total() * 3];
        // 将处理后的图像转换为半精度数据存储在blob中
        BlobFromImage(processedImg, blob);
        // 定义YOLO模型输入节点的维度
        std::vector<int64_t> YOLO_input_node_dims = { 1, 3, imgSize.at(0), imgSize.at(1) };
        // 创建输入张量,包含半精度数据和相关维度信息
        Ort::Value input_tensor = Ort::Value::CreateTensor<half>(Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), blob, 3 * imgSize.at(0) * imgSize.at(1), YOLO_input_node_dims.data(), YOLO_input_node_dims.size());
        // 运行推理会话,将输入张量传递给模型,并获取输出张量
        auto output_tensors = session->Run(options, inputNodeNames.data(), &input_tensor, 1, outputNodeNames.data(), outputNodeNames.size());
        // 释放占用的存储空间,删除半精度数据数组blob
        delete[] blob;
        // 计算后处理时间,单位毫秒
        clock_t starttime_4 = clock();
        double post_process_time = (double)(starttime_4 - starttime_1) / CLOCKS_PER_SEC * 1000;
        // 如果CUDA启用,则输出CUDA预热所需时间
        if (cudaEnable)
        {
            std::cout << "[YOLO_V8(CUDA)]: " << "Cuda warm-up cost " << post_process_time << " ms. " << std::endl;
        }
#endif
    }
    // 返回函数执行成功状态码
    return RET_OK;
}

.\yolov8\examples\YOLOv8-ONNXRuntime-CPP\inference.h

#pragma once
#pragma once 指令:确保头文件只被编译一次,避免重复定义

#define    RET_OK nullptr
RET_OK 宏定义:表示一个空指针常量,用于表示操作成功时的返回值

#ifdef _WIN32
#include <Windows.h>
#include <direct.h>
#include <io.h>
#endif
#ifdef _WIN32 条件编译:仅在 Windows 平台下包含特定的系统头文件

#include <string>
#include <vector>
#include <cstdio>
#include <opencv2/opencv.hpp>
#include "onnxruntime_cxx_api.h"

#ifdef USE_CUDA
#include <cuda_fp16.h>
#endif
普通头文件包含:包含 C++ 标准库、OpenCV、ONNX Runtime 的 C++ API 头文件,并根据 USE_CUDA 宏条件包含 CUDA FP16 头文件

enum MODEL_TYPE
{
    //FLOAT32 MODEL
    YOLO_DETECT_V8 = 1,
    YOLO_POSE = 2,
    YOLO_CLS = 3,

    //FLOAT16 MODEL
    YOLO_DETECT_V8_HALF = 4,
    YOLO_POSE_V8_HALF = 5,
    YOLO_CLS_HALF = 6
};
MODEL_TYPE 枚举类型:定义了多个模型类型,包括 FLOAT32 和 FLOAT16 的 YOLO 模型

typedef struct _DL_INIT_PARAM
{
    std::string modelPath;
    MODEL_TYPE modelType = YOLO_DETECT_V8;
    std::vector<int> imgSize = { 640, 640 };
    float rectConfidenceThreshold = 0.6;
    float iouThreshold = 0.5;
    int    keyPointsNum = 2;//Note:kpt number for pose
    bool cudaEnable = false;
    int logSeverityLevel = 3;
    int intraOpNumThreads = 1;
} DL_INIT_PARAM;
_DL_INIT_PARAM 结构体:定义了初始化深度学习模型所需的各种参数,包括模型路径、类型、图像尺寸、阈值等

typedef struct _DL_RESULT
{
    int classId;
    float confidence;
    cv::Rect box;
    std::vector<cv::Point2f> keyPoints;
} DL_RESULT;
_DL_RESULT 结构体:定义了深度学习模型运行后的输出结果,包括类别 ID、置信度、边界框、关键点等信息

class YOLO_V8
{
public:
    YOLO_V8();
    构造函数:用于初始化 YOLO_V8 类的实例

    ~YOLO_V8();
    析构函数:用于释放 YOLO_V8 类的实例

public:
    char* CreateSession(DL_INIT_PARAM& iParams);
    方法声明:创建深度学习模型会话

    char* RunSession(cv::Mat& iImg, std::vector<DL_RESULT>& oResult);
    方法声明:运行深度学习模型会话并返回结果

    char* WarmUpSession();
    方法声明:预热深度学习模型会话

    template<typename N>
    char* TensorProcess(clock_t& starttime_1, cv::Mat& iImg, N& blob, std::vector<int64_t>& inputNodeDims,
        std::vector<DL_RESULT>& oResult);
    模板方法声明:对输入图像进行张量处理并返回处理结果

    char* PreProcess(cv::Mat& iImg, std::vector<int> iImgSize, cv::Mat& oImg);
    方法声明:预处理输入图像,将其调整为指定尺寸

    std::vector<std::string> classes{};
    类成员变量:用于存储模型输出类别的名称

private:
    Ort::Env env;
    Ort::Session* session;
    bool cudaEnable;
    Ort::RunOptions options;
    std::vector<const char*> inputNodeNames;
    std::vector<const char*> outputNodeNames;
    类私有成员变量:存储 ONNX Runtime 执行环境、会话、CUDA 加速状态、运行选项、输入节点名称、输出节点名称等信息

    MODEL_TYPE modelType;
    std::vector<int> imgSize;
    float rectConfidenceThreshold;
    float iouThreshold;
    float resizeScales;//letterbox scale
    类私有成员变量:存储模型类型、图像尺寸、置信度阈值、IoU 阈值、调整比例等信息
};

.\yolov8\examples\YOLOv8-ONNXRuntime-CPP\main.cpp

void Detector(YOLO_V8*& p) {
    // 获取当前工作目录的路径
    std::filesystem::path current_path = std::filesystem::current_path();
    // 设置图像目录路径为当前路径下的 "images" 子目录
    std::filesystem::path imgs_path = current_path / "images";
    
    // 遍历图像目录中的每一个文件
    for (auto& i : std::filesystem::directory_iterator(imgs_path))
    {
        // 检查文件扩展名是否为 .jpg, .png 或 .jpeg
        if (i.path().extension() == ".jpg" || i.path().extension() == ".png" || i.path().extension() == ".jpeg")
        {
            // 获取图像文件的完整路径
            std::string img_path = i.path().string();
            // 读取图像文件为 OpenCV 的 Mat 对象
            cv::Mat img = cv::imread(img_path);
            
            // 定义存储推断结果的向量
            std::vector<DL_RESULT> res;
            // 使用 YOLO_V8 模型进行推断
            p->RunSession(img, res);

            // 遍历每一个推断结果
            for (auto& re : res)
            {
                // 生成随机颜色
                cv::RNG rng(cv::getTickCount());
                cv::Scalar color(rng.uniform(0, 256), rng.uniform(0, 256), rng.uniform(0, 256));

                // 在图像上绘制带有边界框的矩形
                cv::rectangle(img, re.box, color, 3);

                // 格式化置信度并生成标签
                float confidence = floor(100 * re.confidence) / 100;
                std::cout << std::fixed << std::setprecision(2);
                std::string label = p->classes[re.classId] + " " +
                    std::to_string(confidence).substr(0, std::to_string(confidence).size() - 4);

                // 在矩形上方绘制带有标签的矩形
                cv::rectangle(
                    img,
                    cv::Point(re.box.x, re.box.y - 25),
                    cv::Point(re.box.x + label.length() * 15, re.box.y),
                    color,
                    cv::FILLED
                );

                // 在带有标签的矩形上绘制标签文字
                cv::putText(
                    img,
                    label,
                    cv::Point(re.box.x, re.box.y - 5),
                    cv::FONT_HERSHEY_SIMPLEX,
                    0.75,
                    cv::Scalar(0, 0, 0),
                    2
                );
            }

            // 显示处理结果并等待按键
            std::cout << "Press any key to exit" << std::endl;
            cv::imshow("Result of Detection", img);
            cv::waitKey(0);
            cv::destroyAllWindows();
        }
    }
}

void Classifier(YOLO_V8*& p)
{
    // 获取当前工作目录的路径
    std::filesystem::path current_path = std::filesystem::current_path();
    // 将图像目录路径设置为当前路径
    std::filesystem::path imgs_path = current_path;// / "images"
    
    // 生成随机设备并初始化随机数生成器
    std::random_device rd;
    std::mt19937 gen(rd());
    std::uniform_int_distribution<int> dis(0, 255);
    
    // 遍历图像目录中的每一个文件
    for (auto& i : std::filesystem::directory_iterator(imgs_path))
    {
        // 检查文件扩展名是否为 ".jpg" 或 ".png"
        if (i.path().extension() == ".jpg" || i.path().extension() == ".png")
        {
            // 获取图像文件的路径
            std::string img_path = i.path().string();
            // 使用 OpenCV 读取图像文件
            cv::Mat img = cv::imread(img_path);
            // 创建用于存储深度学习模型结果的向量
            std::vector<DL_RESULT> res;
            // 调用某个模型的 RunSession 方法,分析图像并返回结果
            char* ret = p->RunSession(img, res);
    
            // 初始化文本输出位置的 Y 坐标
            float positionY = 50;
            // 遍历每个模型返回的结果
            for (int i = 0; i < res.size(); i++)
            {
                // 随机生成颜色值
                int r = dis(gen);
                int g = dis(gen);
                int b = dis(gen);
                // 在图像上添加分类标签
                cv::putText(img, std::to_string(i) + ":", cv::Point(10, positionY), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(b, g, r), 2);
                // 在图像上添加分类结果的置信度
                cv::putText(img, std::to_string(res.at(i).confidence), cv::Point(70, positionY), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(b, g, r), 2);
                // 更新下一个文本输出的 Y 坐标位置
                positionY += 50;
            }
    
            // 显示带有分类信息的图像窗口
            cv::imshow("TEST_CLS", img);
            // 等待用户按下键盘上的任意键
            cv::waitKey(0);
            // 关闭所有打开的图像窗口
            cv::destroyAllWindows();
            // 将处理后的图像保存到指定路径(注释掉的代码)
            //cv::imwrite("E:\\output\\" + std::to_string(k) + ".png", img);
        }
    }
}

// 读取 coco.yaml 文件,提取类别名称并存储到 p->classes 中
int ReadCocoYaml(YOLO_V8*& p) {
    // 打开 YAML 文件
    std::ifstream file("coco.yaml");
    if (!file.is_open())
    {
        std::cerr << "Failed to open file" << std::endl;
        return 1;
    }

    // 逐行读取文件内容
    std::string line;
    std::vector<std::string> lines;
    while (std::getline(file, line))
    {
        lines.push_back(line);
    }

    // 查找 names 部分的起始和结束位置
    std::size_t start = 0;
    std::size_t end = 0;
    for (std::size_t i = 0; i < lines.size(); i++)
    {
        if (lines[i].find("names:") != std::string::npos)
        {
            start = i + 1;
        }
        else if (start > 0 && lines[i].find(':') == std::string::npos)
        {
            end = i;
            break;
        }
    }

    // 提取类别名称
    std::vector<std::string> names;
    for (std::size_t i = start; i < end; i++)
    {
        std::stringstream ss(lines[i]);
        std::string name;
        std::getline(ss, name, ':'); // 提取分隔符前的内容(数字)
        std::getline(ss, name); // 提取分隔符后的内容(字符串)
        names.push_back(name);
    }

    // 将类别名称存储到 p->classes 中
    p->classes = names;
    return 0;
}


// 进行检测测试
void DetectTest()
{
    // 创建 YOLO_V8 对象指针
    YOLO_V8* yoloDetector = new YOLO_V8;
    // 读取 coco.yaml 文件中的类别名称
    ReadCocoYaml(yoloDetector);
    // 设置检测参数
    DL_INIT_PARAM params;
    params.rectConfidenceThreshold = 0.1;
    params.iouThreshold = 0.5;
    params.modelPath = "yolov8n.onnx";
    params.imgSize = { 640, 640 };
#ifdef USE_CUDA
    params.cudaEnable = true;

    // 使用 GPU 进行 FP32 推断
    params.modelType = YOLO_DETECT_V8;
    // 使用 GPU 进行 FP16 推断(注意:需要更改为 fp16 的 onnx 模型)
    // params.modelType = YOLO_DETECT_V8_HALF;

#else
    // 使用 CPU 进行推断
    params.modelType = YOLO_DETECT_V8;
    params.cudaEnable = false;

#endif
    // 创建推断会话
    yoloDetector->CreateSession(params);
    // 进行检测
    Detector(yoloDetector);
}


// 进行分类测试
void ClsTest()
{
    // 创建 YOLO_V8 对象指针
    YOLO_V8* yoloDetector = new YOLO_V8;
    // 设置模型路径
    std::string model_path = "cls.onnx";
    // 读取 coco.yaml 文件中的类别名称
    ReadCocoYaml(yoloDetector);
    // 设置分类器参数
    DL_INIT_PARAM params{ model_path, YOLO_CLS, {224, 224} };
    // 创建分类器会话
    yoloDetector->CreateSession(params);
    // 进行分类
    Classifier(yoloDetector);
}


// 主函数
int main()
{
    // 执行分类测试
    ClsTest();
}

YOLOv8 OnnxRuntime C++

C++ Onnx-runtime

This example demonstrates how to perform inference using YOLOv8 in C++ with ONNX Runtime and OpenCV's API.

Benefits ✨

  • Friendly for deployment in the industrial sector.
  • Faster than OpenCV's DNN inference on both CPU and GPU.
  • Supports FP32 and FP16 CUDA acceleration.

Note ☕

  1. Benefit for Ultralytics' latest release, a Transpose op is added to the YOLOv8 model, while make v8 and v5 has the same output shape. Therefore, you can run inference with YOLOv5/v7/v8 via this project.

Exporting YOLOv8 Models 📦

To export YOLOv8 models, use the following Python script:

from ultralytics import YOLO

# Load a YOLOv8 model
model = YOLO("yolov8n.pt")

# Export the model
model.export(format="onnx", opset=12, simplify=True, dynamic=False, imgsz=640)

Alternatively, you can use the following command for exporting the model in the terminal

yolo export model=yolov8n.pt opset=12 simplify=True dynamic=False format=onnx imgsz=640,640

Exporting YOLOv8 FP16 Models 📦

import onnx
from onnxconverter_common import float16

model = onnx.load(R"YOUR_ONNX_PATH")
model_fp16 = float16.convert_float_to_float16(model)
onnx.save(model_fp16, R"YOUR_FP16_ONNX_PATH")

Download COCO.yaml file 📂

In order to run example, you also need to download coco.yaml. You can download the file manually from here

Dependencies ⚙️

Dependency Version
Onnxruntime(linux,windows,macos) >=1.14.1
OpenCV >=4.0.0
C++ Standard >=17
Cmake >=3.5
Cuda (Optional) >=11.4 <12.0
cuDNN (Cuda required) =8

Note: The dependency on C++17 is due to the usage of the C++17 filesystem feature.

Note (2): Due to ONNX Runtime, we need to use CUDA 11 and cuDNN 8. Keep in mind that this requirement might change in the future.

Build 🛠️

  1. Clone the repository to your local machine.

  2. Navigate to the root directory of the repository.

  3. Create a build directory and navigate to it:

    mkdir build && cd build
    
  4. Run CMake to generate the build files:

    cmake ..
    
  5. Build the project:

    make
    
  6. The built executable should now be located in the build directory.

Usage 🚀

//change your param as you like
//Pay attention to your device and the onnx model type(fp32 or fp16)
DL_INIT_PARAM params;
params.rectConfidenceThreshold = 0.1;
params.iouThreshold = 0.5;
params.modelPath = "yolov8n.onnx";
params.imgSize = { 640, 640 };
params.cudaEnable = true;
params.modelType = YOLO_DETECT_V8;
yoloDetector->CreateSession(params);
Detector(yoloDetector);

YOLOv8-ONNXRuntime-Rust for All the Key YOLO Tasks

This repository provides a Rust demo for performing YOLOv8 tasks like Classification, Segmentation, Detection, Pose Detection and OBB using ONNXRuntime.

Recently Updated

  • Add YOLOv8-OBB demo
  • Update ONNXRuntime to 1.17.x

Newly updated YOLOv8 example code is located in this repository (https://github.com/jamjamjon/usls/tree/main/examples/yolo)

Features

  • Support Classification, Segmentation, Detection, Pose(Keypoints)-Detection, OBB tasks.
  • Support FP16 & FP32 ONNX models.
  • Support CPU, CUDA and TensorRT execution provider to accelerate computation.
  • Support dynamic input shapes(batch, width, height).

Installation

1. Install Rust

Please follow the Rust official installation. (https://www.rust-lang.org/tools/install)

2. Install ONNXRuntime

This repository use ort crate, which is ONNXRuntime wrapper for Rust. (https://docs.rs/ort/latest/ort/)

You can follow the instruction with ort doc or simply do this:

On ubuntu, You can do like this:

vim ~/.bashrc

# Add the path of ONNXRUntime lib
export LD_LIBRARY_PATH=/home/qweasd/Documents/onnxruntime-linux-x64-gpu-1.16.3/lib${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}

source ~/.bashrc

3. [Optional] Install CUDA & CuDNN & TensorRT

  • CUDA execution provider requires CUDA v11.6+.
  • TensorRT execution provider requires CUDA v11.4+ and TensorRT v8.4+.

Get Started

1. Export the YOLOv8 ONNX Models

pip install -U ultralytics

# export onnx model with dynamic shapes
yolo export model=yolov8m.pt format=onnx  simplify dynamic
yolo export model=yolov8m-cls.pt format=onnx  simplify dynamic
yolo export model=yolov8m-pose.pt format=onnx  simplify dynamic
yolo export model=yolov8m-seg.pt format=onnx  simplify dynamic


# export onnx model with constant shapes
yolo export model=yolov8m.pt format=onnx  simplify
yolo export model=yolov8m-cls.pt format=onnx  simplify
yolo export model=yolov8m-pose.pt format=onnx  simplify
yolo export model=yolov8m-seg.pt format=onnx  simplify

2. Run Inference

It will perform inference with the ONNX model on the source image.

cargo run --release -- --model <MODEL> --source <SOURCE>

Set --cuda to use CUDA execution provider to speed up inference.

cargo run --release -- --cuda --model <MODEL> --source <SOURCE>

Set --trt to use TensorRT execution provider, and you can set --fp16 at the same time to use TensorRT FP16 engine.

cargo run --release -- --trt --fp16 --model <MODEL> --source <SOURCE>

Set --device_id to select which device to run. When you have only one GPU, and you set device_id to 1 will not cause program panic, the ort would automatically fall back to CPU EP.

cargo run --release -- --cuda --device_id 0 --model <MODEL> --source <SOURCE>

Set --batch to do multi-batch-size inference.

If you're using --trt, you can also set --batch-min and --batch-max to explicitly specify min/max/opt batch for dynamic batch input.(https://onnxruntime.ai/docs/execution-providers/TensorRT-ExecutionProvider.html#explicit-shape-range-for-dynamic-shape-input).(Note that the ONNX model should exported with dynamic shapes)

cargo run --release -- --cuda --batch 2 --model <MODEL> --source <SOURCE>

Set --height and --width to do dynamic image size inference. (Note that the ONNX model should exported with dynamic shapes)

cargo run --release -- --cuda --width 480 --height 640 --model <MODEL> --source <SOURCE>

Set --profile to check time consumed in each stage.(Note that the model usually needs to take 1~3 times dry run to warmup. Make sure to run enough times to evaluate the result.)

cargo run --release -- --trt --fp16 --profile --model <MODEL> --source <SOURCE>

Results: (yolov8m.onnx, batch=1, 3 times, trt, fp16, RTX 3060Ti)

==> 0
[Model Preprocess]: 12.75788ms
[ORT H2D]: 237.118µs
[ORT Inference]: 507.895469ms
[ORT D2H]: 191.655µs
[Model Inference]: 508.34589ms
[Model Postprocess]: 1.061122ms
==> 1
[Model Preprocess]: 13.658655ms
[ORT H2D]: 209.975µs
[ORT Inference]: 5.12372ms
[ORT D2H]: 182.389µs
[Model Inference]: 5.530022ms
[Model Postprocess]: 1.04851ms
==> 2
[Model Preprocess]: 12.475332ms
[ORT H2D]: 246.127µs
[ORT Inference]: 5.048432ms
[ORT D2H]: 187.117µs
[Model Inference]: 5.493119ms
[Model Postprocess]: 1.040906ms

And also:

--conf: confidence threshold [default: 0.3]

--iou: iou threshold in NMS [default: 0.45]

--kconf: confidence threshold of keypoint [default: 0.55]

--plot: plot inference result with random RGB color and save

you can check out all CLI arguments by:

git clone https://github.com/ultralytics/ultralytics
cd ultralytics/examples/YOLOv8-ONNXRuntime-Rust
cargo run --release -- --help

Examples

Ultralytics YOLO Tasks

Classification

Running dynamic shape ONNX model on CPU with image size --height 224 --width 224. Saving plotted image in runs directory.

cargo run --release -- --model ../assets/weights/yolov8m-cls-dyn.onnx --source ../assets/images/dog.jpg --height 224 --width 224 --plot --profile

You will see result like:

Summary:
> Task: Classify (Ultralytics 8.0.217)
> EP: Cpu
> Dtype: Float32
> Batch: 1 (Dynamic), Height: 224 (Dynamic), Width: 224 (Dynamic)
> nc: 1000 nk: 0, nm: 0, conf: 0.3, kconf: 0.55, iou: 0.45

[Model Preprocess]: 16.363477ms
[ORT H2D]: 50.722µs
[ORT Inference]: 16.295808ms
[ORT D2H]: 8.37µs
[Model Inference]: 16.367046ms
[Model Postprocess]: 3.527µs
[
    YOLOResult {
        Probs(top5): Some([(208, 0.6950566), (209, 0.13823675), (178, 0.04849795), (215, 0.019029364), (212, 0.016506357)]),
        Bboxes: None,
        Keypoints: None,
        Masks: None,
    },
]

Object Detection

Using CUDA EP and dynamic image size --height 640 --width 480

cargo run --release -- --cuda --model ../assets/weights/yolov8m-dynamic.onnx --source ../assets/images/bus.jpg --plot --height 640 --width 480

Pose Detection

using TensorRT EP

cargo run --release -- --trt --model ../assets/weights/yolov8m-pose.onnx --source ../assets/images/bus.jpg --plot

Instance Segmentation

using TensorRT EP and FP16 model --fp16

cargo run --release --  --trt --fp16 --model ../assets/weights/yolov8m-seg.onnx --source ../assets/images/0172.jpg --plot

.\yolov8\examples\YOLOv8-OpenCV-int8-tflite-Python\main.py

# 导入 argparse 模块,用于处理命令行参数
import argparse

# 导入 OpenCV 库,用于图像处理操作
import cv2

# 导入 NumPy 库,用于数组和矩阵运算
import numpy as np

# 导入 TensorFlow Lite 解释器,用于在移动和嵌入式设备上运行 TensorFlow Lite 模型
from tflite_runtime import interpreter as tflite

# 导入 Ultralytics 自定义模块中的 ASSETS 和 yaml_load 函数
from ultralytics.utils import ASSETS, yaml_load

# 导入 Ultralytics 自定义模块中的 check_yaml 函数,用于检查和处理 YAML 文件
from ultralytics.utils.checks import check_yaml

# 声明全局变量,用于指定训练模型期望的图像宽度和高度
img_width = 640
img_height = 640

class LetterBox:
    """Resizes and reshapes images while maintaining aspect ratio by adding padding, suitable for YOLO models."""

    def __init__(
        self, new_shape=(img_width, img_height), auto=False, scaleFill=False, scaleup=True, center=True, stride=32
    ):
        """
        初始化 LetterBox 对象,配置图像缩放和重塑参数,以保持图像长宽比,并添加填充。

        参数:
        - new_shape: 新图像的目标尺寸 (宽度, 高度)
        - auto: 是否自动处理
        - scaleFill: 是否填充以适应目标尺寸
        - scaleup: 是否放大图像
        - center: 图像放置位置是否居中
        - stride: 网格步长
        """
        self.new_shape = new_shape  # 新图像的目标尺寸
        self.auto = auto  # 是否自动处理
        self.scaleFill = scaleFill  # 是否填充以适应目标尺寸
        self.scaleup = scaleup  # 是否放大图像
        self.stride = stride  # 网格步长
        self.center = center  # 图像放置位置是否居中,默认为居中
    def __call__(self, labels=None, image=None):
        """Return updated labels and image with added border."""
        
        # 如果 labels 为 None,则初始化为空字典
        if labels is None:
            labels = {}
        
        # 如果 image 为 None,则从 labels 中获取 "img" 键对应的图像数据
        img = labels.get("img") if image is None else image
        
        # 获取图像的高度和宽度
        shape = img.shape[:2]  # current shape [height, width]
        
        # 获取要调整的新形状,如果新形状是整数,则转换为元组 (new_shape, new_shape)
        new_shape = labels.pop("rect_shape", self.new_shape)
        if isinstance(new_shape, int):
            new_shape = (new_shape, new_shape)
        
        # 计算缩放比例 (新 / 旧)
        r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
        
        # 如果不允许放大 (self.scaleup 为 False),则限制 r 不超过 1.0,只能缩小图像以获得更好的 mAP 值
        if not self.scaleup:  # only scale down, do not scale up (for better val mAP)
            r = min(r, 1.0)
        
        # 计算填充量
        ratio = r, r  # width, height ratios
        new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
        dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
        
        # 如果设置了 self.auto,则使用最小矩形的方式进行填充
        if self.auto:  # minimum rectangle
            dw, dh = np.mod(dw, self.stride), np.mod(dh, self.stride)  # wh padding
        
        # 如果设置了 self.scaleFill,则进行拉伸填充
        elif self.scaleFill:  # stretch
            dw, dh = 0.0, 0.0
            new_unpad = (new_shape[1], new_shape[0])
            ratio = new_shape[1] / shape[1], new_shape[0] / shape[0]  # width, height ratios
        
        # 如果设置了 self.center,则将填充量均分到两侧
        if self.center:
            dw /= 2  # divide padding into 2 sides
            dh /= 2
        
        # 如果当前图像的尺寸与新的非填充尺寸不同,则进行缩放
        if shape[::-1] != new_unpad:  # resize
            img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
        
        # 计算上下左右的填充量
        top, bottom = int(round(dh - 0.1)) if self.center else 0, int(round(dh + 0.1))
        left, right = int(round(dw - 0.1)) if self.center else 0, int(round(dw + 0.1))
        
        # 使用 cv2.copyMakeBorder() 函数给图像添加边框
        img = cv2.copyMakeBorder(
            img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114)
        )  # add border
        
        # 如果 labels 中存在 "ratio_pad" 键,则更新其值为填充的比例和填充的位置
        if labels.get("ratio_pad"):
            labels["ratio_pad"] = (labels["ratio_pad"], (left, top))  # for evaluation
        
        # 如果 labels 不为空,则调用 _update_labels() 方法更新标签,并返回更新后的 labels
        if len(labels):
            labels = self._update_labels(labels, ratio, dw, dh)
            labels["img"] = img
            labels["resized_shape"] = new_shape
            return labels
        else:
            return img

    def _update_labels(self, labels, ratio, padw, padh):
        """Update labels."""
        
        # 将实例的边界框转换为 (x1, y1, x2, y2) 格式
        labels["instances"].convert_bbox(format="xyxy")
        
        # 对边界框进行反归一化处理,以原始图像的高度和宽度为参数
        labels["instances"].denormalize(*labels["img"].shape[:2][::-1])
        
        # 根据比例对边界框进行缩放
        labels["instances"].scale(*ratio)
        
        # 添加填充到边界框
        labels["instances"].add_padding(padw, padh)
        
        # 返回更新后的 labels
        return labels
class Yolov8TFLite:
    """Class for performing object detection using YOLOv8 model converted to TensorFlow Lite format."""

    def __init__(self, tflite_model, input_image, confidence_thres, iou_thres):
        """
        Initializes an instance of the Yolov8TFLite class.

        Args:
            tflite_model: Path to the TFLite model.
            input_image: Path to the input image.
            confidence_thres: Confidence threshold for filtering detections.
            iou_thres: IoU (Intersection over Union) threshold for non-maximum suppression.
        """

        self.tflite_model = tflite_model  # 保存 TFLite 模型的路径
        self.input_image = input_image    # 保存输入图像的路径
        self.confidence_thres = confidence_thres  # 设定置信度阈值,用于筛选检测结果
        self.iou_thres = iou_thres        # 设定 IoU 阈值,用于非最大抑制

        # 从 COCO 数据集加载类别名称列表
        self.classes = yaml_load(check_yaml("coco8.yaml"))["names"]

        # 为类别生成一个颜色调色板
        self.color_palette = np.random.uniform(0, 255, size=(len(self.classes), 3))

    def draw_detections(self, img, box, score, class_id):
        """
        Draws bounding boxes and labels on the input image based on the detected objects.

        Args:
            img: The input image to draw detections on.
            box: Detected bounding box.
            score: Corresponding detection score.
            class_id: Class ID for the detected object.

        Returns:
            None
        """

        # 提取边界框的坐标
        x1, y1, w, h = box

        # 获取类别对应的颜色
        color = self.color_palette[class_id]

        # 在图像上绘制边界框
        cv2.rectangle(img, (int(x1), int(y1)), (int(x1 + w), int(y1 + h)), color, 2)

        # 创建包含类别名称和置信度得分的标签文本
        label = f"{self.classes[class_id]}: {score:.2f}"

        # 计算标签文本的尺寸
        (label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)

        # 计算标签文本的位置
        label_x = x1
        label_y = y1 - 10 if y1 - 10 > label_height else y1 + 10

        # 在标签文本背景上绘制填充矩形
        cv2.rectangle(
            img,
            (int(label_x), int(label_y - label_height)),
            (int(label_x + label_width), int(label_y + label_height)),
            color,
            cv2.FILLED,
        )

        # 在图像上绘制标签文本
        cv2.putText(img, label, (int(label_x), int(label_y)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
    def preprocess(self):
        """
        Preprocesses the input image before performing inference.

        Returns:
            image_data: Preprocessed image data ready for inference.
        """

        # Read the input image using OpenCV
        self.img = cv2.imread(self.input_image)

        # Get the height and width of the input image
        self.img_height, self.img_width = self.img.shape[:2]

        # Initialize a LetterBox object for resizing the image
        letterbox = LetterBox(new_shape=[self.img_width, self.img_height], auto=False, stride=32)
        
        # Resize the input image using the LetterBox object
        image = letterbox(image=self.img)
        image = [image]  # Convert image to list
        image = np.stack(image)  # Stack images along a new axis
        image = image[..., ::-1].transpose((0, 3, 1, 2))  # Rearrange dimensions for model compatibility
        img = np.ascontiguousarray(image)  # Return a contiguous array in memory

        # Convert image data to float32 and normalize
        image = img.astype(np.float32)
        return image / 255  # Return normalized image data for inference

    def postprocess(self, input_image, output):
        """
        Performs post-processing on the model's output to extract bounding boxes, scores, and class IDs.

        Args:
            input_image (numpy.ndarray): The input image.
            output (numpy.ndarray): The output of the model.

        Returns:
            numpy.ndarray: The input image with detections drawn on it.
        """

        boxes = []
        scores = []
        class_ids = []

        # Process each prediction in the model's output
        for pred in output:
            pred = np.transpose(pred)
            for box in pred:
                x, y, w, h = box[:4]
                x1 = x - w / 2
                y1 = y - h / 2
                boxes.append([x1, y1, w, h])  # Store box coordinates
                idx = np.argmax(box[4:])
                scores.append(box[idx + 4])  # Store score
                class_ids.append(idx)  # Store class ID

        # Perform non-maximum suppression to filter out overlapping boxes
        indices = cv2.dnn.NMSBoxes(boxes, scores, self.confidence_thres, self.iou_thres)

        # Draw bounding boxes on the input image based on selected indices
        for i in indices:
            box = boxes[i]
            gain = min(self.img_width / self.img_width, self.img_height / self.img_height)
            pad = (
                round((self.img_width - self.img_width * gain) / 2 - 0.1),
                round((self.img_height - self.img_height * gain) / 2 - 0.1),
            )
            box[0] = (box[0] - pad[0]) / gain
            box[1] = (box[1] - pad[1]) / gain
            box[2] = box[2] / gain
            box[3] = box[3] / gain
            score = scores[i]
            class_id = class_ids[i]
            
            if score > 0.25:
                print(box, score, class_id)
                # Draw detections on the input image
                self.draw_detections(input_image, box, score, class_id)

        return input_image
    def main(self):
        """
        Performs inference using a TFLite model and returns the output image with drawn detections.

        Returns:
            output_img: The output image with drawn detections.
        """

        # 创建一个用于 TFLite 模型的解释器
        interpreter = tflite.Interpreter(model_path=self.tflite_model)
        self.model = interpreter  # 将解释器保存到对象属性中
        interpreter.allocate_tensors()  # 分配张量空间

        # 获取模型的输入和输出详情
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()

        # 存储输入形状以备后用
        input_shape = input_details[0]["shape"]
        self.input_width = input_shape[1]  # 保存输入宽度
        self.input_height = input_shape[2]  # 保存输入高度

        # 预处理图像数据
        img_data = self.preprocess()
        img_data = img_data  # 这行代码没有实际作用,保留原样

        # 转置图像数据的通道顺序,以符合模型的要求
        img_data = img_data.transpose((0, 2, 3, 1))

        # 获取输入张量的量化参数,并将图像数据转换为 int8 类型
        scale, zero_point = input_details[0]["quantization"]
        img_data_int8 = (img_data / scale + zero_point).astype(np.int8)
        interpreter.set_tensor(input_details[0]["index"], img_data_int8)

        # 执行推断
        interpreter.invoke()

        # 从解释器获取输出张量
        output = interpreter.get_tensor(output_details[0]["index"])

        # 对输出张量进行反量化操作,以得到原始数值
        scale, zero_point = output_details[0]["quantization"]
        output = (output.astype(np.float32) - zero_point) * scale

        # 将检测框坐标映射回原始图像尺寸
        output[:, [0, 2]] *= img_width
        output[:, [1, 3]] *= img_height

        print(output)  # 打印输出结果(用于调试)

        # 对输出进行后处理,生成带有检测结果的输出图像
        return self.postprocess(self.img, output)
if __name__ == "__main__":
    # 创建参数解析器来处理命令行参数
    parser = argparse.ArgumentParser()
    # 添加命令行参数:模型文件路径,默认为"yolov8n_full_integer_quant.tflite"
    parser.add_argument(
        "--model", type=str, default="yolov8n_full_integer_quant.tflite", help="Input your TFLite model."
    )
    # 添加命令行参数:输入图片路径,默认为 ASSETS 目录下的"bus.jpg"
    parser.add_argument("--img", type=str, default=str(ASSETS / "bus.jpg"), help="Path to input image.")
    # 添加命令行参数:置信度阈值,默认为0.5
    parser.add_argument("--conf-thres", type=float, default=0.5, help="Confidence threshold")
    # 添加命令行参数:NMS IoU 阈值,默认为0.5
    parser.add_argument("--iou-thres", type=float, default=0.5, help="NMS IoU threshold")
    # 解析命令行参数并存储到 args 中
    args = parser.parse_args()

    # 使用指定的参数实例化 Yolov8TFLite 类
    detection = Yolov8TFLite(args.model, args.img, args.conf_thres, args.iou_thres)

    # 执行物体检测并获取输出图像
    output_image = detection.main()

    # 在窗口中显示输出图像
    cv2.imshow("Output", output_image)

    # 等待按键按下以退出
    cv2.waitKey(0)
posted @ 2024-09-05 11:58  绝不原创的飞龙  阅读(0)  评论(0编辑  收藏  举报