openvino_yoloV3

import sys, os, cv2, time
import numpy as np, math
from argparse import ArgumentParser
from openvino.inference_engine import IECore

m_input_size = 416                  #模型输入416*416

yolo_scale_13 = 13                  #cell 13*13
yolo_scale_26 = 26                  #cell 26*26
yolo_scale_52 = 52                  #cell 52*52

classes = 2                         #类别个数
coords = 4                          #坐标占位 (中心点x,中心点y,高度,宽度)
num = 3                             #每个cell三个检测框
anchors = [10,13,16,30,33,23,30,61,62,45,59,119,116,90,156,198,373,326]     #先验框

LABELS = ("operator1", "operator2") #label列表

label_text_color = (255, 255, 255)
label_background_color = (125, 175, 75)
box_color = (255, 128, 0)
box_thickness = 1

def build_argparser():
    parser = ArgumentParser()
    parser.add_argument("-d", "--device", help="Specify the target device to infer on; CPU, GPU, FPGA or MYRIAD is acceptable. \
                                                Sample will look for a suitable plugin for device specified (CPU by default)", default="CPU", type=str)
    return parser

#获取展平后的数据的索引
def EntryIndex(side, lcoords, lclasses, location, entry):       
    n = int(location / (side * side))
    loc = location % (side * side)
    return int(n * side * side * (lcoords + lclasses + 1) + entry * side * side + loc)


class DetectionObject():
    xmin = 0
    ymin = 0
    xmax = 0
    ymax = 0
    class_id = 0
    confidence = 0.0

    def __init__(self, x, y, h, w, class_id, confidence, h_scale, w_scale):
        self.xmin = int((x - w / 2) * w_scale)
        self.ymin = int((y - h / 2) * h_scale)
        self.xmax = int(self.xmin + w * w_scale)
        self.ymax = int(self.ymin + h * h_scale)
        self.class_id = class_id
        self.confidence = confidence

# IOU交并比
def IntersectionOverUnion(box_1, box_2):
    width_of_overlap_area = min(box_1.xmax, box_2.xmax) - max(box_1.xmin, box_2.xmin)
    height_of_overlap_area = min(box_1.ymax, box_2.ymax) - max(box_1.ymin, box_2.ymin)
    area_of_overlap = 0.0
    if (width_of_overlap_area < 0.0 or height_of_overlap_area < 0.0):
        area_of_overlap = 0.0
    else:
        area_of_overlap = width_of_overlap_area * height_of_overlap_area
    box_1_area = (box_1.ymax - box_1.ymin)  * (box_1.xmax - box_1.xmin)
    box_2_area = (box_2.ymax - box_2.ymin)  * (box_2.xmax - box_2.xmin)
    area_of_union = box_1_area + box_2_area - area_of_overlap
    retval = 0.0
    if area_of_union <= 0.0:
        retval = 0.0
    else:
        retval = (area_of_overlap / area_of_union)
    return retval


def ParseYOLOV3Output(blob, resized_im_h, resized_im_w, original_im_h, original_im_w, threshold, objects):
    # 输出结果 例子V3
    # conv2d_58/BiasAdd (1, 13, 13, 255)  anchor(116,90, 156,198, 373,326)
    # conv2d_66/BiasAdd (1, 26, 26, 255)  anchor(30,61, 62,45, 59,119)
    # conv2d_74/BiasAdd (1, 52, 52, 255)  anchor(10,13, 16,30, 33,23)
    # 255(B, Cx, Cy, N*85)  N:num of cell   85:(x, y, h, w, box_score, class_no_1, …, class_no_80)
    # print(blob.shape)                                     #(B,N*(5+class),Cx,Cy)
    out_blob_h = blob.shape[2]                              #Cx
    out_blob_w = blob.shape[3]                              #Cy
    side = out_blob_h
    anchor_offset = 0

    if len(anchors) == 18:   ## YoloV3                      #根据anchor判断yolo类型
        if side == yolo_scale_13:
            anchor_offset = 2 * 6
        elif side == yolo_scale_26:
            anchor_offset = 2 * 3
        elif side == yolo_scale_52:
            anchor_offset = 2 * 0

    elif len(anchors) == 12: ## tiny-YoloV3
        if side == yolo_scale_13:
            anchor_offset = 2 * 3
        elif side == yolo_scale_26:
            anchor_offset = 2 * 0

    else:                    ## ???
        if side == yolo_scale_13:
            anchor_offset = 2 * 6
        elif side == yolo_scale_26:
            anchor_offset = 2 * 3
        elif side == yolo_scale_52:
            anchor_offset = 2 * 0

    side_square = side * side
    output_blob = blob.flatten()                    #展平(例:1*(5+2)*3*13*13)
    for i in range(side_square):
        row = int(i / side)                         #cell的x
        col = int(i % side)                         #cell的y
        for n in range(num):                        #3个预测框
            obj_index = EntryIndex(side, coords, classes, n * side * side + i, coords)      #置信度索引  box_score
            box_index = EntryIndex(side, coords, classes, n * side * side + i, 0)           #表示框x索引 x
            scale = output_blob[obj_index]                                                  #取得置信度
            if (scale < threshold):
                continue
            # 中心点坐标(x,y)
            x = (col + output_blob[box_index + 0 * side_square]) / side * resized_im_w      #根据原始坐标,得到绝对坐标
            y = (row + output_blob[box_index + 1 * side_square]) / side * resized_im_h      
            # 高度和宽度
            height = math.exp(output_blob[box_index + 3 * side_square]) * anchors[anchor_offset + 2 * n + 1]    #得到绝对坐标
            width = math.exp(output_blob[box_index + 2 * side_square]) * anchors[anchor_offset + 2 * n]
            for j in range(classes):
                class_index = EntryIndex(side, coords, classes, n * side_square + i, coords + 1 + j)            #类索引
                prob = scale * output_blob[class_index]                                                         #类置信度
                if prob < threshold:
                    continue
                # 将矩形框及置信度、类别进行存储
                obj = DetectionObject(x, y, height, width, j, prob, (original_im_h / resized_im_h), (original_im_w / resized_im_w))
                objects.append(obj)
    return objects


def main_IE_infer():
    camera_width = 320
    camera_height = 240
    fps = ""
    framepos = 0
    frame_count = 0
    vidfps = 0
    skip_frame = 0
    elapsedTime = 0
    new_w = int(camera_width * m_input_size/camera_width)
    new_h = int(camera_height * m_input_size/camera_height)
    args = build_argparser().parse_args()
    model_xml = "/home/bhc/darknet-master/backup1/16/16.xml" #<--- CPU
    #model_xml = "lrmodels/YoloV3/FP16/frozen_yolo_v3.xml" #<--- MYRIAD
    model_bin = os.path.splitext(model_xml)[0] + ".bin"

    # cap = cv2.VideoCapture(0)
    # cap.set(cv2.CAP_PROP_FPS, 30)
    # cap.set(cv2.CAP_PROP_FRAME_WIDTH, camera_width)
    # cap.set(cv2.CAP_PROP_FRAME_HEIGHT, camera_height)

    cap = cv2.VideoCapture("/home/bhc/BHC/Q3/1_Astemo_DL/video/D16_operator/D16_20211025135235.mp4")
    camera_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))                   #视频的宽度
    camera_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))                 #视频的高度
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))                    #视频总帧数
    vidfps = int(cap.get(cv2.CAP_PROP_FPS))                                 #视频fps
    print("videosFrameCount =", str(frame_count))
    print("videosFPS =", str(vidfps))

    time.sleep(1)
    ie = IECore()
    net = ie.read_network(model=model_xml)                                  #读取模型
    input_blob = next(iter(net.input_info))                                 #模型输入
    exec_net = ie.load_network(network=net, device_name="CPU")              #加载模型

    while cap.isOpened():                                                   #视频是否打开
        t1 = time.time()

        ## Uncomment only when playing video files
        #cap.set(cv2.CAP_PROP_POS_FRAMES, framepos)

        ret, image = cap.read()                                             #读取帧
        if not ret:
            break
        resized_image = cv2.resize(image, (new_w, new_h), interpolation = cv2.INTER_CUBIC)  #resize操作到输入要求
        canvas = np.full((m_input_size, m_input_size, 3), 128)
        canvas[(m_input_size-new_h)//2:(m_input_size-new_h)//2 + new_h,(m_input_size-new_w)//2:(m_input_size-new_w)//2 + new_w,  :] = resized_image
        prepimg = canvas
        prepimg = prepimg[np.newaxis, :, :, :]                              # Batch size axis add
        prepimg = prepimg.transpose((0, 3, 1, 2))                           # NHWC to NCHW
        outputs = exec_net.infer(inputs={input_blob: prepimg})              # 推理BHWC(1,416,416,3) 3:RGB
        objects = []
        for output in outputs.values():
            objects = ParseYOLOV3Output(output, new_h, new_w, camera_height, camera_width, 0.7, objects) #解析推理结果

        # Filtering overlapping boxes
        objlen = len(objects)
        for i in range(objlen):
            if (objects[i].confidence == 0.0):
                continue
            for j in range(i + 1, objlen):
                if (IntersectionOverUnion(objects[i], objects[j]) >= 0.4):      #如果IOU大于0.4的则将置信度设置为0
                    objects[j].confidence = 0

        
        # Drawing boxes
        for obj in objects:
            if obj.confidence < 0.2:
                continue
            label = obj.class_id
            confidence = obj.confidence
            if confidence > 0.2:
                label_text = LABELS[label] + " (" + "{:.1f}".format(confidence * 100) + "%)"
                cv2.rectangle(image, (obj.xmin, obj.ymin), (obj.xmax, obj.ymax), box_color, box_thickness)
                cv2.putText(image, label_text, (obj.xmin, obj.ymin - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, label_text_color, 1)

        cv2.putText(image, fps, (camera_width - 170, 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (38, 0, 255), 1, cv2.LINE_AA)
        cv2.imshow("Result", image)

        if cv2.waitKey(1)&0xFF == ord('q'):
            break
        elapsedTime = time.time() - t1
        fps = "(Playback) {:.1f} FPS".format(1/elapsedTime)

        ## frame skip, video file only
        #skip_frame = int((vidfps - int(1/elapsedTime)) / int(1/elapsedTime))
        #framepos += skip_frame

    cv2.destroyAllWindows()
    del net
    del exec_net

if __name__ == '__main__':
    sys.exit(main_IE_infer() or 0)

 

posted @ 2022-02-28 15:06  wuyuan2011woaini  阅读(40)  评论(0编辑  收藏  举报