目标检测——yoolov4数据前处理

1. 数据增强

基于图像的深度学习算法，通常需要数据增强，比较常规的就是图片翻转，图片旋转，图像裁剪

在目标检测中，对图片进行变换，还会涉及到框的变化，尤其时对图像进行resize成相同大小时，需要对框进行相应的缩放

1.1读取图像&读取框(目标位置)

image = Image.open(line[0])

box = np.array([np.array(list(map(int, box.split(',')))) for box in line[1:]])      #（n, 5）

1.2图像resize&框的缩放

image = image.resize((nw, nh), Image.BICUBIC)   #按一定比例缩放
# 复制在（416， 416）的灰色背景图上，从而实现图像大小一致
new_image = Image.new('RGB', (w, h), (128,128,128))
new_image.paste(image, (dx, dy))
image = new_image

box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx
box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy

1.3图像翻转&框的翻转

if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)

if flip: box[:, [0, 2]

box[:, 0:2][box[:, 0:2] < 0] = 0
box[:, 2][box[:, 2] > w] = w
box[:, 3][box[:, 3] > h] = h
box_w = box[:, 2] - box[:, 0]
box_h = box[:, 3] - box[:, 1]
box = box[np.logical_and(box_w > 1, box_h > 1)]   # discard invalid box
if len(box) > max_boxes: box = box[:max_boxes]
box_data[:len(box)] = box

1.5 图像色域变换

x = cv2.cvtColor(np.array(image, np.float32)/255, cv2.COLOR_RGB2HSV)

x[..., 0] += hue*360
x[..., 0][x[..., 0]>1] -= 1
x[..., 0][x[..., 0]<1] += 1
x[..., 1] *= sat
x[..., 2] *= val

x[x[:,:,0] > 360, 0] = 360
x[:, :, 1:][x[:, :, 1:]>1] = 1
x[x<0] = 0

image_data = cv2.cvtColor(x, cv2.COLOR_HSV2RGB)

2. 真实框编码

真实框需要转化成与网络模型的输出维度一致的矩阵，这样才能计算损失

以一个batch_size 为例，网络模型的输出为[(b, 13, 13, 3*(num_classes+5)), (b, 26, 26, 3*(num_classes+5)), (b, 52, 52, 3*(num_classes+5))]

所以需要将真实框编码为[(b, 13, 13, 3，(num_classes+5)), (b, 26, 26, 3，(num_classes+5)), (b, 52, 52, 3，(num_classes+5))] 形式，

y_true = [np.zeros((m, grid_shape[l][0], grid_shape[l][1], len(anchor_mask[l]), 5+num_classes), dtype='float32') for l in range(num_layers)]

以（13，13）为例，有三个anchor框与真实框对应，所以维度中存在一个3，哪个anchor框与真实框的iou大，就将对应的位置的对应channel设置为存在真实框，对应位置，框的中心点落在其grid_cell内

for b in range(m):
    wh = boxes_wh[b, valid_mask[b]]
    if len(wh) == 0:
        continue

    # (n, 1, 2)  n代表一张图的框数， 2代表（w,h）
    wh = np.expand_dims(wh, -2)

    box_maxes = wh / 2.
    box_mins = -box_maxes

    #(n, 9, 2)
    intersect_mins = np.maximum(box_mins, anchor_mins)    # 逐位比较(broadcast)
    intersect_maxes = np.minimum(box_maxes, anchor_maxes)
    intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.)
    #(n, 9)
    intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
    #(n, 1)
    box_area = wh[..., 0] * wh[..., 1]
    #(1, 9)
    anchor_area = anchors[..., 0] * anchors[..., 1]
    #(n, 9)
    iou = intersect_area / (box_area + anchor_area - intersect_area)
    #(n,)
    best_anchor = np.argmax(iou, axis=-1)

    for n, index in enumerate(best_anchor):
        for l in range(num_layers):
            if index in anchor_mask[l]:

                i = np.floor(true_boxes[b, n, 0] * grid_shape[l][1]).astype('int32')
                # print("i=", i)
                j = np.floor(true_boxes[b, n, 1] * grid_shape[l][1]).astype('int32')
                # print("j=", j)
                k = anchor_mask[l].index(index)
                c = true_boxes[b, n, 4].astype('int32')

                # 坐标
                y_true[l][b, j, i, k, 0:4] = true_boxes[b, n, 0:4]
                # 置信度
                y_true[l][b, j, i, k, 4] = 1
                # 类别
                y_true[l][b, j, i, k, 5 + c] = 1

3. 生成迭代器

def data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes, mosaic=False):
    n = len(annotation_lines)
    i = 0
    flag = True

    while True:
        image_data = []
        box_data = []

        for b in range(batch_size):
            if i == 0:
                np.random.shuffle(annotation_lines)

            if mosaic:
                if flag and (i+4) < n:
                    image, box = get_random_data_with_Mosaic(annotation_lines[i:i+4], input_shape)
                    i = (i+1) % n

                else:
                    image, box = get_random_data(annotation_lines[i], input_shape)
                    i = (i+1) % n

                flag = bool(1 - flag)

            else:
                image, box = get_random_data(annotation_lines[i], input_shape)
                i = (i+1) % n

            image_data.append(image)
            box_data.append(box)

        image_data = np.array(image_data)
        box_data = np.array(box_data)

        y_true = preprocess_true_boxes(box_data, input_shape, anchors, num_classes)

        yield [image_data, *y_true], np.zeros(batch_size)

在yolov4中用了一个特殊的图像增强trick，mosaic,

将四张图片拼接成一张，这样可以产生大量样本，通过拼接，使图像的正样本数量增多，可以减小样本不平衡造成的影响

posted @ 2020-12-08 23:10 learningcaiji 阅读(360) 评论(0) 编辑收藏举报

会员力量，点亮园子希望

刷新页面返回顶部

learningcaiji

目标检测——yoolov4数据前处理

公告