python画边界框bounding box

边界框的坐标方向:

python opencv画边界框程序:[程序摘自python OpenCV画 bounding box并标明数据类]

import cv2
import numpy as np

class_name = "car"
box_left_top = np.array([45, 147])       # bbox左上角坐标
box_right_bottom =np.array([640, 355])   # bbox右下角坐标

line_color = (0, 255, 0)
line_thickness = 2
line_type = 4

img = cv2.imread('./car_img.jpg')  # img is a np.array with shape (h, w, c)
print(img.shape)
cv2.rectangle(img, tuple(box_left_top), tuple(box_right_bottom), line_color, line_thickness, line_type)  # 画bbox

text_size = cv2.getTextSize(class_name, 1, cv2.FONT_HERSHEY_PLAIN, 1)[0]   # 获取文字区域框大小
text_right_bottom = box_left_top + np.array(list(text_size))   # 获取文字区域右下角坐标
cv2.rectangle(img, tuple(box_left_top), tuple(text_right_bottom),  line_color, -1)  # 绘制文字区域矩形框
box_left_top[1] = box_left_top[1] + (text_size[1]/2 + 4)    # 计算文字起始位置偏移
text_color = tuple(int(x) for x in 255 - np.array(line_color))
cv2.putText(img, class_name , tuple(box_left_top), cv2.FONT_HERSHEY_PLAIN, 1.0, text_color, 1)  # 绘字

cv2.imwrite('./car_img_with_box.jpg', img)
cv2.imshow('Image', img)
cv2.waitKey(0)
cv2.destroyAllWindows()

图片展示:

 

 

对JHMDB数据集中视频的检测框和真实框的可视化:[看官请忽略,仅当做笔记用]

def visualize_box_in_img_JHMDB(detected_boxes, gt_videos, video_name, detections_path, cls_names, base_path = '/home/wp/dataset/JHMDB/Frames'):
    '''
    detected_boxes: {img_name: {cls_ind: array[[x1,y1,x2,y2, cls_score], [], ...]}}.
    gt_videos: {v_name: {tubes: [[frame_index, x1,y1,x2,y2]], gt_classes: vlabel}}.
    An example of video_name: brush_hair/April_09_brush_hair_u_nm_np1_ba_goo_0
    '''
    base_path = os.path.join(base_path, video_name)
    frame_names = os.listdir(base_path)
    frame_names.sort()
    frames = {}
    frame_paths = {}
    for i, name in enumerate(frame_names):
        path = os.path.join(base_path, name)
        frames[i+1] = cv2.imread(path)   # cv2.imread(path) gets a np.array with shape as (h, w, c)
        frame_paths[i+1] = '/'.join(path.split('/')[-3:])
    
    video_name2 = '_'.join(video_name.split('/'))
    gt_label = cls_names[gt_videos[video_name2]['gt_classes'] - 1]
    tube = gt_videos[video_name2]['tubes']
    tube = np.squeeze(tube, axis=0)
    assert len(frames) == tube.shape[0], 'Error: not each frame has a GT box!'

    for i in range(tube.shape[0]):
        gt_box = tube[i, 1:]
        frame_index = int(tube[i, 0])
        frame = frames[frame_index]
        draw_box_in_img(frame, gt_box, gt_label, (255, 0, 0))  # blue
        
        frame_path = frame_paths[frame_index]
        detections = detected_boxes[frame_path]
        det_boxes = detections[1][:, :4]
        det_labels = []
        for j in range(det_boxes.shape[0]):
            scores = []
            for k in range(1, len(cls_names)+1):
                scores.append(detections[k][j, -1])
            idx = scores.index(max(scores))
            # det_labels.append(cls_names[idx])
            draw_box_in_img(frame, det_boxes[j], cls_names[idx], (0, 0, 255))  # red
        
        save_path = detections_path.split('/')[:-1]
        save_path.append('visulaize_box_in_img')
        save_path = '/'.join(save_path)
        if not os.path.exists(save_path):
            os.makedirs(save_path)
        save_path = os.path.join(save_path, video_name2)
        if not os.path.exists(save_path):
            os.makedirs(save_path)
        save_path = os.path.join(save_path, '{:>05d}.jpg'.format(frame_index))
        cv2.imwrite(save_path, frame)

 
def draw_box_in_img(img, box, class_name, line_color):
    '''
    box: [x1, y1, x2, y2] or np.array([x1, y1, x2, y2]).
    '''
    box_left_top = np.array([int(box[0]), int(box[1])])       # bbox左上角坐标
    box_right_bottom =np.array([int(box[2]), int(box[3])])   # bbox右下角坐标
    line_thickness = 2
    line_type = 4
    cv2.rectangle(img, tuple(box_left_top), tuple(box_right_bottom), line_color, line_thickness, line_type)  # 画bbox
    
    text_size = cv2.getTextSize(class_name, 1, cv2.FONT_HERSHEY_PLAIN, 1)[0]   # 获取文字区域框大小
    text_right_bottom = box_left_top + np.array(list(text_size))   # 获取文字区域右下角坐标
    cv2.rectangle(img, tuple(box_left_top), tuple(text_right_bottom),  line_color, -1)  # 绘制文字区域矩形框
    box_left_top[1] = box_left_top[1] + (text_size[1]/2 + 4)    # 计算文字起始位置偏移
    text_color = tuple(int(x) for x in 255 - np.array(line_color))
    cv2.putText(img, class_name , tuple(box_left_top), cv2.FONT_HERSHEY_PLAIN, 1.0, text_color, 1)  # 绘字
    
    return img

  

 

posted @ 2023-10-21 12:40  Picassooo  阅读(342)  评论(0编辑  收藏  举报