【CV源码与项目实现】darknet yolov3中anchor box的理解
前言
训练定制数据集之后,测试发现bbox框的位置有时候不准确,当然与定制数据集的大小有很大关系,另外,是否也和模型参数配置有关呢?!
重点
Kmeans原理及其改进算法kmeans++算法原理的理解,以及应用;
anchorbox的理解
1. 修改主干部分的模型参数,还能使用预训练权重吗?
首先可以把anchor理解为:多尺度滑动窗口。
传统的检测过程是:
step1. 生成图像金字塔,因为待检测的物体的scale是变化的。
step2. 用滑动窗口在图片的特征金字塔上面滚动生成很多候选区域。
step3. 各种特征提取hog和分类器svm来对上面产生的候选区域中的图片信息来分类。
step4. NMS非极大值抑制得到最后的结果。
但由于cnn具有强大的提取特征的能力,可以替代第三步,但第一第二步独立于cnn之外的,需要大量循环,速度也限制了,因此要更好的定位,需要更多的scale和ratio不同窗口,但又增加了时间。而窗口滑动的时候,本质就是遍历像素的过程,因此直接为每个像素分配不同的尺度和比例的窗口矩形,它们的中心都是其所属的像素点。对于长度和比例的分配们可以根据标注图像信息通过k-means聚类得到。而每个像素分配几个不同长度和比例的窗口矩形框就是Anchor。一般模型的anchor非常多,因此可以看这些anchor与给定矩形的IOU是否满足条件来决定是否是所要的框。

# -*- coding=utf-8 -*- import glob import os import sys import xml.etree.ElementTree as ET import numpy as np from kmeans import kmeans, avg_iou # 根文件夹 ROOT_PATH = './tfl_dataset/' # 聚类的数目 CLUSTERS = 6 # 模型中图像的输入尺寸,默认是一样的 SIZE = 416 # 加载YOLO格式的标注数据 def load_dataset(path): jpegimages = os.path.join(path, 'JPEGImages') if not os.path.exists(jpegimages): print('no JPEGImages folders, program abort') # sys.exit(0) labels_txt = os.path.join(path, 'labels') if not os.path.exists(labels_txt): print('no labels folders, program abort') sys.exit(0) label_file = os.listdir(labels_txt) print('label count: {}'.format(len(label_file))) dataset = [] for label in label_file: with open(os.path.join(labels_txt, label), 'r') as f: txt_content = f.readlines() for line in txt_content: line_split = line.split(' ') roi_with = float(line_split[len(line_split)-2]) roi_height = float(line_split[len(line_split)-1]) if roi_with == 0 or roi_height == 0: continue dataset.append([roi_with, roi_height]) # print([roi_with, roi_height]) return np.array(dataset) data = load_dataset(ROOT_PATH) out = kmeans(data, k=CLUSTERS) print(out) print("Accuracy: {:.2f}%".format(avg_iou(data, out) * 100)) print("Boxes:\n {}-{}".format(out[:, 0] * SIZE, out[:, 1] * SIZE)) ratios = np.around(out[:, 0] / out[:, 1], decimals=2).tolist() print("Ratios:\n {}".format(sorted(ratios)))
kmeans.py

import numpy as np def iou(box, clusters): """ Calculates the Intersection over Union (IoU) between a box and k clusters. :param box: tuple or array, shifted to the origin (i. e. width and height) :param clusters: numpy array of shape (k, 2) where k is the number of clusters :return: numpy array of shape (k, 0) where k is the number of clusters """ x = np.minimum(clusters[:, 0], box[0]) y = np.minimum(clusters[:, 1], box[1]) if np.count_nonzero(x == 0) > 0 or np.count_nonzero(y == 0) > 0: raise ValueError("Box has no area") intersection = x * y box_area = box[0] * box[1] cluster_area = clusters[:, 0] * clusters[:, 1] iou_ = intersection / (box_area + cluster_area - intersection) return iou_ def avg_iou(boxes, clusters): """ Calculates the average Intersection over Union (IoU) between a numpy array of boxes and k clusters. :param boxes: numpy array of shape (r, 2), where r is the number of rows :param clusters: numpy array of shape (k, 2) where k is the number of clusters :return: average IoU as a single float """ return np.mean([np.max(iou(boxes[i], clusters)) for i in range(boxes.shape[0])]) def translate_boxes(boxes): """ Translates all the boxes to the origin. :param boxes: numpy array of shape (r, 4) :return: numpy array of shape (r, 2) """ new_boxes = boxes.copy() for row in range(new_boxes.shape[0]): new_boxes[row][2] = np.abs(new_boxes[row][2] - new_boxes[row][0]) new_boxes[row][3] = np.abs(new_boxes[row][3] - new_boxes[row][1]) return np.delete(new_boxes, [0, 1], axis=1) def kmeans(boxes, k, dist=np.median): """ Calculates k-means clustering with the Intersection over Union (IoU) metric. :param boxes: numpy array of shape (r, 2), where r is the number of rows :param k: number of clusters :param dist: distance function :return: numpy array of shape (k, 2) """ rows = boxes.shape[0] distances = np.empty((rows, k)) last_clusters = np.zeros((rows,)) np.random.seed() # the Forgy method will fail if the whole array contains the same rows clusters = boxes[np.random.choice(rows, k, replace=False)] while True: for row in range(rows): distances[row] = 1 - iou(boxes[row], clusters) nearest_clusters = np.argmin(distances, axis=1) if (last_clusters == nearest_clusters).all(): break for cluster in range(k): clusters[cluster] = dist(boxes[nearest_clusters == cluster], axis=0) last_clusters = nearest_clusters return clusters
kmeans_yolo.py 包含kmeans++算法

# coding=utf-8 # k-means ++ for YOLOv2 anchors # 通过k-means ++ 算法获取YOLOv2需要的anchors的尺寸 import os import numpy as np # 定义Box类,描述bounding box的坐标 class Box(): def __init__(self, x, y, w, h): self.x = x self.y = y self.w = w self.h = h # 计算两个box在某个轴上的重叠部分 # x1是box1的中心在该轴上的坐标 # len1是box1在该轴上的长度 # x2是box2的中心在该轴上的坐标 # len2是box2在该轴上的长度 # 返回值是该轴上重叠的长度 def overlap(x1, len1, x2, len2): len1_half = len1 / 2 len2_half = len2 / 2 left = max(x1 - len1_half, x2 - len2_half) right = min(x1 + len1_half, x2 + len2_half) return right - left # 计算box a 和box b 的交集面积 # a和b都是Box类型实例 # 返回值area是box a 和box b 的交集面积 def box_intersection(a, b): w = overlap(a.x, a.w, b.x, b.w) h = overlap(a.y, a.h, b.y, b.h) if w < 0 or h < 0: return 0 area = w * h return area # 计算 box a 和 box b 的并集面积 # a和b都是Box类型实例 # 返回值u是box a 和box b 的并集面积 def box_union(a, b): i = box_intersection(a, b) u = a.w * a.h + b.w * b.h - i return u # 计算 box a 和 box b 的 iou # a和b都是Box类型实例 # 返回值是box a 和box b 的iou def box_iou(a, b): return box_intersection(a, b) / box_union(a, b) # 使用k-means ++ 初始化 centroids,减少随机初始化的centroids对最终结果的影响 # boxes是所有bounding boxes的Box对象列表 # n_anchors是k-means的k值 # 返回值centroids 是初始化的n_anchors个centroid def init_centroids(boxes,n_anchors): centroids = [] boxes_num = len(boxes) centroid_index = np.random.choice(boxes_num, 1) centroids.append(boxes[centroid_index[0]]) # print(centroids[0].w,centroids[0].h) for centroid_index in range(0,n_anchors-1): sum_distance = 0 distance_thresh = 0 distance_list = [] cur_sum = 0 for box in boxes: min_distance = 1 for centroid_i, centroid in enumerate(centroids): distance = (1 - box_iou(box, centroid)) if distance < min_distance: min_distance = distance sum_distance += min_distance distance_list.append(min_distance) distance_thresh = sum_distance*np.random.random() for i in range(0,boxes_num): cur_sum += distance_list[i] if cur_sum > distance_thresh: centroids.append(boxes[i]) # print(boxes[i].w, boxes[i].h) break return centroids # 进行 k-means 计算新的centroids # boxes是所有bounding boxes的Box对象列表 # n_anchors是k-means的k值 # centroids是所有簇的中心 # 返回值new_centroids 是计算出的新簇中心 # 返回值groups是n_anchors个簇包含的boxes的列表 # 返回值loss是所有box距离所属的最近的centroid的距离的和 def do_kmeans(n_anchors, boxes, centroids): loss = 0 groups = [] new_centroids = [] for i in range(n_anchors): groups.append([]) new_centroids.append(Box(0, 0, 0, 0)) for box in boxes: min_distance = 1 group_index = 0 for centroid_index, centroid in enumerate(centroids): distance = (1 - box_iou(box, centroid)) if distance < min_distance: min_distance = distance group_index = centroid_index groups[group_index].append(box) loss += min_distance new_centroids[group_index].w += box.w new_centroids[group_index].h += box.h for i in range(n_anchors): new_centroids[i].w /= len(groups[i]) new_centroids[i].h /= len(groups[i]) return new_centroids, groups, loss # 计算给定bounding boxes的n_anchors数量的centroids # label_path是训练集列表文件地址 # n_anchors 是anchors的数量 # loss_convergence是允许的loss的最小变化值 # grid_size * grid_size 是栅格数量 # iterations_num是最大迭代次数 # plus = 1时启用k means ++ 初始化centroids def compute_centroids(label_path,n_anchors,loss_convergence,grid_size,iterations_num,plus): boxes = [] label_files = [] # f = open(label_path) # for line in f: # label_path = line.rstrip().replace('images', 'labels') # label_path = label_path.replace('JPEGImages', 'labels') # label_path = label_path.replace('.jpg', '.txt') # label_path = label_path.replace('.JPEG', '.txt') # label_files.append(label_path) # f.close() labels_txt = os.path.join(label_path, 'labels') if not os.path.exists(labels_txt): print('no labels folders, program abort') sys.exit(0) label_files = os.listdir(labels_txt) for label_file in label_files: f = open(os.path.join(labels_txt, label_file)) for line in f: temp = line.strip().split(" ") if len(temp) > 1: boxes.append(Box(0, 0, float(temp[3]), float(temp[4]))) if plus: centroids = init_centroids(boxes, n_anchors) else: centroid_indices = np.random.choice(len(boxes), n_anchors) centroids = [] for centroid_index in centroid_indices: centroids.append(boxes[centroid_index]) # iterate k-means centroids, groups, old_loss = do_kmeans(n_anchors, boxes, centroids) iterations = 1 while (True): centroids, groups, loss = do_kmeans(n_anchors, boxes, centroids) iterations = iterations + 1 # print("loss = %f" % loss) if abs(old_loss - loss) < loss_convergence or iterations > iterations_num: break old_loss = loss # for centroid in centroids: # print(centroid.w * grid_size, centroid.h * grid_size) # print result out = [] for centroid in centroids: # print("k-means result:\n") # print(centroid.w, centroid.h) # print(centroid.w * grid_size, centroid.h * grid_size) out.append([centroid.w, centroid.h]) bbox = [] for box in boxes: bbox.append([box.w, box.h]) return np.array(out), np.array(bbox) def avg_iou(boxes, clusters): """ Calculates the average Intersection over Union (IoU) between a numpy array of boxes and k clusters. :param boxes: numpy array of shape (r, 2), where r is the number of rows :param clusters: numpy array of shape (k, 2) where k is the number of clusters :return: average IoU as a single float """ return np.mean([np.max(iou(boxes[i], clusters)) for i in range(boxes.shape[0])]) def iou(box, clusters): """ Calculates the Intersection over Union (IoU) between a box and k clusters. :param box: tuple or array, shifted to the origin (i. e. width and height) :param clusters: numpy array of shape (k, 2) where k is the number of clusters :return: numpy array of shape (k, 0) where k is the number of clusters """ x = np.minimum(clusters[:, 0], box[0]) y = np.minimum(clusters[:, 1], box[1]) if np.count_nonzero(x == 0) > 0 or np.count_nonzero(y == 0) > 0: raise ValueError("Box has no area") intersection = x * y box_area = box[0] * box[1] cluster_area = clusters[:, 0] * clusters[:, 1] iou_ = intersection / (box_area + cluster_area - intersection) return iou_ if __name__ == "__main__": path = os.path.dirname(os.path.realpath(__file__)) n_anchors = 6 loss_convergence = 1e-6 grid_size = 416 iterations_num = 1000000 plus = 1 SIZE = 416 out, data = compute_centroids(path,n_anchors,loss_convergence,grid_size,iterations_num,plus) print(out) print("Accuracy: {:.2f}%".format(avg_iou(data, out) * 100)) print("Boxes SIZE(w,h):\n {}-{}".format(out[:, 0] * SIZE, out[:, 1] * SIZE)) print("Boxes (w=1280, h=720):\n {}-{}".format(out[:, 0] * 1280, out[:, 1] * 720)) ratios = np.around(out[:, 0] / out[:, 1], decimals=2).tolist() print("Ratios:\n {}".format(sorted(ratios)))
d(box,centroid)=1−IOU(box,centroid)
在计算anchor boxes时我们将所有boxes中心点的x,y坐标都置为0,这样所有的boxes都处在相同的位置上,方便我们通过新距离公式计算boxes之间的相似度。其实,类似于中心点(或者左上角)是固定在某个位置的,通过bbox的w和h计算两个box的IOU,且与距离是成反比例的,即iou越大,说明kmeans中的距离越小;
path = os.path.dirname(os.path.realpath(__file__)) print('path: ', path) path1 = os.path.join(path, 'yuv2png') data1 = load_dataset(path1) path2 = os.path.join(path, 'yuv2png0215') data2 = load_dataset(path2) data = np.append(data1, data2, axis=0) # 按行合并多维数组 print('data1 size: ', data1.shape) print('data2 size: ', data2.shape) print('data size: ', data.shape) out = kmeans(data, k=CLUSTERS)
参考
2. YOLOv3中Anchor理解;
完
心正意诚,做自己该做的事情,做自己喜欢做的事情,安静做一枚有思想的技术媛。
版权声明,转载请注明出处:https://www.cnblogs.com/happyamyhope/
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 地球OL攻略 —— 某应届生求职总结
· 周边上新:园子的第一款马克杯温暖上架
· Open-Sora 2.0 重磅开源!
· 提示词工程——AI应用必不可少的技术
· .NET周刊【3月第1期 2025-03-02】
2021-07-08 【python基础】labelme工具的解析
2021-07-08 【pytorch基础-error】RuntimeError: stack expects each tensor to be equal size