目标检测中图像和检测框bbox同时进行变换

检测和分类不同,检测过程中,图片处理以后,bbox往往也需要同步变换。

[1] 知乎:目标检测-数据增强(Data Augmentation) (推荐,但要注意代码中的个别错误)

[2] CSDN: 目标检测中的数据增强,包括bbox的变换  (写的还可以)

[3] 知乎:一种目标检测任务中图像-标注对增强方法 (介绍了图像数据增强库albumentations)

 

下面的代码来自上面的参考资料1,github链接:Data-Augmentation/data_augmentation.py at main · REN-HT/Data-Augmentation · GitHub

import torch
from PIL import Image
from PIL import ImageDraw
from torchvision import transforms
import numpy as np

class DataAugmentation(object):
    def __init__(self):
        super(DataAugmentation,self).__init__()

    def resize(self,img,boxes,size):
        #---------------------------------------------------------
        # 类型为 img=Image.open(path),boxes:Tensor,size:int
        # 功能为:将图像长和宽缩放到指定值size,并且相应调整boxes
        #---------------------------------------------------------
        w,h=img.size
        sw=size/w
        sh=size/h
        return img.resize((size, size), Image.BILINEAR), boxes*torch.Tensor([sw, sh, sw, sh])

    def resize_(self, img, boxes, size):
        #-----------------------------------------------------------
        # 类型为 img=Image.open(path),boxes:Tensor,size:int
        # 功能为:将图像短边缩放到指定值size,保持原有比例不变,并且相应调整boxes
        #-----------------------------------------------------------
        w,h=img.size
        min_size=min(w,h)
        sw=sh=size/min_size
        ow=int(sw*w+0.5)
        oh=int(sh*h+0.5)
        return img.resize((ow, oh), Image.BILINEAR), boxes*torch.Tensor([sw,sh,sw,sh])

    def random_flip_horizon(self, img, boxes):
        #-------------------------------------
        # 随机水平翻转
        #-------------------------------------
        if np.random.random() > 0.5:
            transform=transforms.RandomHorizontalFlip()
            img=transform(img)
            w = img.width
            xmin=w-boxes[:,2]
            xmax=w-boxes[:,0]
            boxes[:,0]=xmin
            boxes[:,2]=xmax
        return img, boxes

    def random_flip_vertical(self, img, boxes):
        #-------------------------------------
        # 随机垂直翻转
        #-------------------------------------
        if np.random.random()>0.5:
            transform=transforms.RandomVerticalFlip()
            img=transform(img)
            h=img.height
            ymin=h-boxes[:, 3]
            ymax=h-boxes[:, 1]
            boxes[:, 1]=ymin
            boxes[:, 3]=ymax
        return img, boxes

    def center_crop(self, img, boxes):
        #-------------------------------------
        # 中心裁剪
        #-------------------------------------
        w,h=img.size
        size=min(w,h)
        transform=transforms.CenterCrop(size)
        img=transform(img)
        sw,sh=size/w, size/h
        boxes=boxes*torch.Tensor([sw, sh, sw, sh])
        boxes=boxes.clamp(min=0, max=size)
        return img, boxes

    #------------------------------------------------------
    # 以下img皆为Tensor类型
    #------------------------------------------------------

    def random_bright(self, img, u=32):
        #-------------------------------------
        # 随机亮度变换
        #-------------------------------------
        if np.random.random()>0.5:
            alpha=np.random.uniform(-u, u)/255
            img+=alpha
            img=img.clamp(min=0.0, max=1.0)
        return img

    def random_contrast(self, img, lower=0.5, upper=1.5):
        #-------------------------------------
        # 随机增强对比度
        #-------------------------------------
        if np.random.random()>0.5:
            alpha=np.random.uniform(lower, upper)
            img*=alpha
            img=img.clamp(min=0, max=1.0)
        return img

    def random_saturation(self, img,lower=0.5, upper=1.5):
        #-----------------------------------------------
        # 随机饱和度变换,针对彩色三通道图像,中间通道乘以一个值
        #-----------------------------------------------
        if np.random.random()>0.5:
            alpha=np.random.uniform(lower, upper)
            img[1]=img[1]*alpha
            img[1]=img[1].clamp(min=0,max=1.0)
        return img

    def add_gasuss_noise(self, img, mean=0, std=0.1):
        noise=torch.normal(mean,std,img.shape)
        img+=noise
        img=img.clamp(min=0, max=1.0)
        return img

    def add_salt_noise(self, img):
        noise=torch.rand(img.shape)
        alpha=np.random.random()
        img[noise[:,:,:]>alpha]=1.0
        return img

    def add_pepper_noise(self, img):
        noise=torch.rand(img.shape)
        alpha=np.random.random()
        img[noise[:,:,:]>alpha]=0
        return img

    def mixup(self, img1, img2, box1, box2,alpha=2):
        weight=np.random.beta(alpha, alpha)
        print(weight)
        miximg=weight*img1+(1-weight)*img2
        if weight > 0.5:
            return miximg, box1
        return miximg, box2


    def draw_img(self, img, boxes):
        draw=ImageDraw.Draw(img)
        for box in boxes:
            draw.rectangle(list(box), outline='yellow', width=2)
        img.show()


D=DataAugmentation()
img1=Image.open('C:/AllProgram/testimage/superResolution/bird_GT.bmp')
img2=Image.open('C:/AllProgram/testimage/superResolution/lenna.bmp')

box1=torch.Tensor([[92,20,261,190]])
box2=torch.Tensor([[23,18,97,123],
                    [45,65,194,225]])

to_tensor=transforms.ToTensor()
to_image=transforms.ToPILImage()

img1,box1=D.resize(img1,box1,256)
img2,box2=D.resize(img2,box2,256)
img1=to_tensor(img1)
img2=to_tensor(img2)
miximg,box=D.mixup(img1,img2,box1,box2)

miximg=to_image(miximg)
D.draw_img(miximg,box)

  

 

posted @ 2023-05-15 11:52  Picassooo  阅读(181)  评论(0编辑  收藏  举报