深度学习之目标检测:非极大值抑制源码解析(nms)
目标检测:nms源码解析
原理:选定一个阈值,例如为0.3,然后将所有3个窗口(bounding box)按照得分由高到低排序。选中得分最高的窗口,遍历计算剩余的2窗口与该窗口的IOU,如果IOU大于阈值0.3,则窗口删除(保留得分高的窗口),再从剩余的窗口中选得分最高的,重复该过程,直到所有窗口都被计算过。
1 import cv2 2 import numpy as np 3 import random 4 def nms(dets, thresh): 5 print(dets) 6 x1 = dets[:, 0] 7 y1 = dets[:, 1] 8 x2 = dets[:, 2] 9 y2 = dets[:, 3] 10 score = dets[:, 4] 11 # x1=3,x2=5,习惯上计算x方向长度就 12 # 是x=3、4、5这三个像素,即5-3+1=3,而不是5-3=2,所以需要加1 13 areas = (x2 - x1 + 1) * (y2 - y1 + 1) #所有box面积 14 order = score.argsort()[::-1] #得到score降序排列的坐标索引 15 print("order",order) 16 keep = [] 17 while order.size > 0: 18 i = order[0] #最大得分box的坐标索引 19 keep.append(i) 20 xx1 = np.maximum(x1[i], x1[order[1:]]) 21 yy1 = np.maximum(y1[i], y1[order[1:]]) 22 xx2 = np.minimum(x2[i], x2[order[1:]]) 23 yy2 = np.minimum(y2[i], y2[order[1:]]) 24 #最高得分的boax与其他box的公共部分(交集) 25 w = np.maximum(0.0, xx2 - xx1 + 1) 26 h = np.maximum(0.0, yy2 - yy1 + 1) #求高和宽,并使数值合法化 27 inter = w * h #其他所有box的面积 28 ovr = inter / (areas[i] + areas[order[1:]] - inter) #IOU:交并比 29 #ovr小表示两个box交集少,可能是另一个物体的框,故需要保留 30 inds = np.where(ovr <= thresh)[0] 31 order = order[inds + 1] #iou小于阈值的框 32 return keep 33 34 img = cv2.imread("E:\jupyterProject\Pytorch\cs75.jpg") 35 img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 36 dets=np.array([[500,250,1200,700,0.8], [400,200,1000,500,0.5], 37 [800,150,1300,600,0.6]], np.float) 38 # 设置thread为0.3 39 rtn_box=nms(dets, 0.3) #0.3为faster-rcnn中配置文件的默认值 40 cls_dets=dets[rtn_box, :] 41 print("nms box:", cls_dets) 42 img_cp1=img.copy() 43 for box in cls_dets.tolist(): 44 x1,y1,x2,y2,score=int(box[0]),int(box[1]),int(box[2]),int(box[3]),box[-1] 45 y_text=int(random.uniform(y1, y2)) 46 cv2.rectangle(img_cp1, (x1,y1), (x2, y2), (0, 255, 255), 2) 47 cv2.putText(img_cp1, "CAR:"+str(score), (x1,y1), 2,1, (255, 0, 255)) 48 cv2.imshow("nms_img",img_cp1) 49 # cv2.destroyAllWindows()#消除所有显示的图框 50 cv2.imwrite("newcs75.jpg",img_cp1) 51 cv2.waitKey(0)
IOU的阈值:threshold= 0.7
IOU的阈值:threshold= 0.3