yolov8-物体检测代码示例
import os.path import cv2 import requests import numpy as np from ultralytics.utils import yaml_load from ultralytics.utils.checks import check_yaml class ImageDetect: """图片检测""" def __init__(self): self.MODEL_BASE_DIR = os.path.dirname(__file__) self.CLASSES = yaml_load(check_yaml("coco128.yaml"))["names"] self.colors = np.random.uniform(0, 255, size=(len(self.CLASSES), 3)) self.model_path = os.path.join(self.MODEL_BASE_DIR, "model/yolov8n.onnx") self.onnx_model = None def load_model(self): """加载模型""" # Load the ONNX model self.onnx_model: cv2.dnn.Net = cv2.dnn.readNetFromONNX(self.model_path) def draw_bounding_box(self, img, class_id, confidence, x, y, x_plus_w, y_plus_h): """ Draws bounding boxes on the input image based on the provided arguments. Args: img (numpy.ndarray): The input image to draw the bounding box on. class_id (int): Class ID of the detected object. confidence (float): Confidence score of the detected object. x (int): X-coordinate of the top-left corner of the bounding box. y (int): Y-coordinate of the top-left corner of the bounding box. x_plus_w (int): X-coordinate of the bottom-right corner of the bounding box. y_plus_h (int): Y-coordinate of the bottom-right corner of the bounding box. """ label = f"{self.CLASSES[class_id]} ({confidence:.2f})" color = self.colors[class_id] cv2.rectangle(img, (x, y), (x_plus_w, y_plus_h), color, 2) cv2.putText(img, label, (x - 10, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) def parse_image(self, image: str, show: bool = False): """ parse_image function to load ONNX model, perform inference, draw bounding boxes, and display the output image. Args: image (str): 图片路径 show (bool): 是否展示识别后的结果 Returns: list: List of dictionaries containing detection information such as class_id, class_name, confidence, etc. """ if not self.onnx_model: self.load_model() if not image.startswith("http"): # 读取本地图片 original_image: np.ndarray = cv2.imread(image) else: # 读取网络图片 response = requests.get(image) image_array = np.frombuffer(response.content, dtype=np.uint8) original_image: np.ndarray = cv2.imdecode(image_array, cv2.IMREAD_COLOR) [height, width, _] = original_image.shape # Prepare a square image for inference length = max((height, width)) image = np.zeros((length, length, 3), np.uint8) image[0:height, 0:width] = original_image # Calculate scale factor scale = length / 640 # Preprocess the image and prepare blob for model blob = cv2.dnn.blobFromImage(image, scalefactor=1 / 255, size=(640, 640), swapRB=True) self.onnx_model.setInput(blob) # Perform inference outputs = self.onnx_model.forward() # Prepare output array outputs = np.array([cv2.transpose(outputs[0])]) rows = outputs.shape[1] boxes = [] scores = [] class_ids = [] # Iterate through output to collect bounding boxes, confidence scores, and class IDs for i in range(rows): classes_scores = outputs[0][i][4:] (minScore, maxScore, minClassLoc, (x, maxClassIndex)) = cv2.minMaxLoc(classes_scores) if maxScore >= 0.25: box = [ outputs[0][i][0] - (0.5 * outputs[0][i][2]), outputs[0][i][1] - (0.5 * outputs[0][i][3]), outputs[0][i][2], outputs[0][i][3]] boxes.append(box) scores.append(maxScore) class_ids.append(maxClassIndex) # Apply NMS (Non-maximum suppression) result_boxes = cv2.dnn.NMSBoxes(boxes, scores, 0.25, 0.45, 0.5) detections = [] # Iterate through NMS results to draw bounding boxes and labels for i in range(len(result_boxes)): index = result_boxes[i] box = boxes[index] detection = { "class_id": class_ids[index], # 分类id "class_name": self.CLASSES[class_ids[index]], # 分类名称 "confidence": scores[index], # 置信度 "box": box, "scale": scale} detections.append(detection) # print(detection) if show: self.draw_bounding_box( original_image, class_ids[index], scores[index], round(box[0] * scale), round(box[1] * scale), round((box[0] + box[2]) * scale), round((box[1] + box[3]) * scale) ) if show: # Display the image with bounding boxes cv2.imshow('image', original_image) cv2.waitKey(0) cv2.destroyAllWindows() return detections detect = ImageDetect() if __name__ == '__main__': for _ in range(1): detect.parse_image(image="image/bus.jpg")
服务器占用资源小,默认数据集不太好用。建议自己针对场景进行训练。
推荐一个占用资源搭,比较好用的:https://github.com/xinyu1205/recognize-anything
本文来自博客园,作者:一石数字欠我15w!!!,转载请注明原文链接:https://www.cnblogs.com/52-qq/p/18291206