ruijiege

  博客园 :: 首页 :: 博问 :: 闪存 :: 新随笔 :: 联系 :: 订阅 订阅 :: 管理 ::
from albumentations.core.transforms_interface import DualTransform
from albumentations.pytorch import ToTensorV2, ToTensor
import random
View Code
# 随机仿射变换增广
class RandomAffine(DualTransform):
    def __init__(self, 
             width,                      # 输出的图像宽度(int)
             height,                     # 输出的图像高度(int)
             angle_limit=(-45, +45),     # 旋转的角度限制(float, tuple),如果为float,则范围是[-angle_limit, +angle_limit]
             scale_limit=(0.8, 1.2),     # 缩放尺度限制(float, tuple),如果为float,则范围是[1-scale_limit, 1+scale_limit]
             offset_limit=(0.4, 0.6),    # 平移限制(float, tuple),如果为float,则范围是[0.5-offset_limit, 0.5+offset_limit]
             rotate_angle_threshold=30,  # 旋转角度的判断阈值,当角度大于阈值时,返回的boxes[:, 4]指示为True,其他情况为False
             min_size=10,                # 目框大小的阈值(float, tuple),如果为float,则阈值是[width=size, height=size],低于阈值的框会被删掉
             always_apply=False, 
             p=1.0):
        super(RandomAffine, self).__init__(always_apply=always_apply, p=p)
        self.width = width
        self.height = height
        self.rotate_angle_threshold = rotate_angle_threshold
        self.scale_limit = self.to_tuple(scale_limit, 1)
        self.offset_limit = self.to_tuple(offset_limit, 0.5)
        self.angle_limit = self.to_tuple(angle_limit, 0)
        self.min_size = self.to_tuple(min_size, None)

    def apply(self, img, M, **params):
        return cv2.warpAffine(img, M, (self.width, self.height))

    def get_params_dependent_on_targets(self, params):
        image = params["image"]
        height, width = image.shape[:2]
        
        scale = 1
        angle = 0
        cx = 0.5 * width
        cy = 0.5 * height
        
        angle = random.uniform(*self.angle_limit)
        scale = random.uniform(*self.scale_limit)
        cx = random.uniform(*self.offset_limit) * width
        cy = random.uniform(*self.offset_limit) * height
        
        M = cv2.getRotationMatrix2D((cx, cy), angle, scale)
        M[0, 2] -= cx - self.width * 0.5
        M[1, 2] -= cy - self.height * 0.5
        return {"M": M, "scale": scale, "angle": angle, "image_width": width, "image_height": height}
    
    # 因为继承自DualTransform,输入的bboxes会被归一化到0-1(除以输入图像的宽高)
    # 输出也必须保证是0-1。因此输出除以output_size,也就是800, 800
    def apply_to_bboxes(self, bboxes, M, scale, angle, image_width, image_height, **params):
        
        if len(bboxes) == 0:
            return []
        
        # 因为bboxes被np.array转换为nx4的数组,为了实现4个维度,每个维度分别乘以[width, height, width, height]
        # 利用广播机制,我们需要建立np_image_size数组,维度是1x4,所以是[[两个中括号]]
        np_image_size = np.array([[image_width, image_height, image_width, image_height]])
        
        # 因为输出部分需要除以self.width, self.height,但是输出部分维度是(4,),因此我们需要建立
        # np_output_size,维度为(4,)
        np_output_size = np.array([self.width, self.height, self.width, self.height])
        
        # 获取每个box的中心位置, nx2
        tail = np.array([item[4:] for item in bboxes])
        npbboxes = np.array([item[:4] for item in bboxes]) * np_image_size
        np_bboxes_center = np.array([[(x + r) * 0.5, (y + b) * 0.5] for x, y, r, b in npbboxes])
        
        # 将nx2转换为2xn
        np_bboxes_center_t_2row = np_bboxes_center.T
        
        # 增加维度,变换为3xn,第三个行是全1
        one_row = np.ones((1, np_bboxes_center_t_2row.shape[-1]))
        np_bboxes_center_coordinate = np.vstack([np_bboxes_center_t_2row, one_row])
        
        # 变换
        project = M @ np_bboxes_center_coordinate
        
        # 转换为list
        list_project = project.T.tolist()
        
        # scale乘以0.5,是return时cx - scale * width的转换。真值为cx - scale * 0.5 * width。此时合并scale * 0.5
        half_scale = scale * 0.5
        
        result = np.array([[
            cx - (r - x + 1) * half_scale, 
            cy - (b - y + 1) * half_scale, 
            cx + (r - x + 1) * half_scale, 
            cy + (b - y + 1) * half_scale
        ] for (x, y, r, b), (cx, cy) in zip(npbboxes, list_project)])
        
        # 限制框不能超出范围
        x, y, r, b = result[:, 0], result[:, 1], result[:, 2], result[:, 3]
        x[...] = x.clip(min=0, max=self.width-1)
        y[...] = y.clip(min=0, max=self.height-1)
        r[...] = r.clip(min=0, max=self.width-1)
        b[...] = b.clip(min=0, max=self.height-1)
        w = (r - x + 1).clip(min=0)
        h = (b - y + 1).clip(min=0)
        
        w_threshold, h_threshold = self.min_size
        cond = (w >= w_threshold) & (h >= h_threshold)
        
        # 对于tail0,认为是是否存在旋转,即目标是否大于30度
        if abs(angle) > self.rotate_angle_threshold and len(tail) > 0:
            tail[cond, 0] = True
            
        output = [list(coord / np_output_size) + list(tail_item) for coord, tail_item in zip(result[cond], tail[cond])]
        return output
    
    @property
    def targets(self):
        return {
            "image": self.apply,
            "bboxes": self.apply_to_bboxes
        }
    
    def to_tuple(self, value, add_value=1):
        if isinstance(value, tuple):
            return value
        elif add_value is not None:
            return (add_value-value, add_value+value)
        else:
            return (value, value)

    @property
    def targets_as_params(self):
        return ["image", "bboxes"]

    def get_transform_init_args_names(self):
        return "height", "width", "scale_limit", "angle_limit", "offset_limit", "rotate_angle_threshold"
View Code
transform = A.Compose([
    RandomAffine(800, 800),
    A.HorizontalFlip(),
    A.OneOf([
        A.HueSaturationValue(hue_shift_limit=20, sat_shift_limit=20, val_shift_limit=27),
        A.RandomContrast(limit=0.8),
        A.JpegCompression(quality_lower=5, quality_upper=100),
    ]),
    A.OneOf([
        A.ISONoise(),
        A.IAAAdditiveGaussianNoise(),
        A.IAASharpen(),
    ]),
    A.OneOf([
        A.Cutout(num_holes=32, max_h_size=24, max_w_size=24, p=0.5),
        A.RandomRain(p=0.2),
        A.RandomFog(fog_coef_lower=0.1, fog_coef_upper=0.3, p=0.2),
        A.IAAPerspective(p=0.5)
    ]),
    A.OneOf([
        A.Blur(blur_limit=9),
        A.MotionBlur(p=1, blur_limit=7),
        A.GaussianBlur(blur_limit=21),
        A.GlassBlur(),
        A.ToGray(),
        A.RandomGamma(gamma_limit=(0, 120), p=0.5),
    ]),
    #ToTensorV2(), 直接归一化
    #ToTensor()    可以进行normalize
], bbox_params=A.BboxParams("pascal_voc"))
cv_image = cv2.imread("cv_image.jpg")

# 如果bboxes的列数大于4时,label_field不用提供
w, h = cv_image.shape[:2]
trans_out = transform(image=cv_image, bboxes=[[x, y, r, b, False] for x, y, r, b in bboxes])
trans_out_image = trans_out["image"]
#print(trans_out["bboxes"])
for x, y, r, b, rot in trans_out["bboxes"]:
    x = int(x)
    y = int(y)
    r = int(r)
    b = int(b)
    color = (0, 0, 255) if rot else (0, 255, 0)
    cv2.rectangle(trans_out_image, (x, y), (r, b), color, 2)
show(trans_out_image)
View Code

 

posted on 2022-11-03 08:07  哦哟这个怎么搞  阅读(18)  评论(0编辑  收藏  举报