cv2.imread("图片地址", “模式参数”), 模式参数包括从cv2.IMREAD_COLOR(1),默认;cv2.IMREAD_GRAYSCALE(0),cv2.IMREAD_UNCHANGED(-1)
img_path = os.path.join(current_dir, image_dir) img = cv2.imread(img_path, 1) print(img.shape) >>>(500, 375, 3) img_path = os.path.join(current_dir, image_dir) img = cv2.imread(img_path, 0) print(img.shape) >>>(500, 375)
class Normalize(object): def __init__(self, mean_val, std_val, val_scale=1): # set val_scale = 1 if mean and std are in range (0,1) # set val_scale to other value, if mean and std are in range (0,255) self.mean = np.array(mean_val, dtype=np.float32) self.std = np.array(std_val, dtype=np.float32) self.val_scale = 1 / 255.0 if val_scale == 1 else 1 def __call__(self, image, label=None):
# 转化为32位浮点型,便于后面转化位tensor 输入网络 image = image.astype(np.float32)
# 将图片缩放到[0-1] image = image * self.val_scale
# 标准化 image = image - self.mean image = image * (1 / self.std)return image, label
class Resize(object): def __init__(self, size): assert type(size) in [int, tuple], "CHECK SIZE TYPE!" if isinstance(size, int): self.size = (size, size) else: self.size = size def __call__(self, image, label=None):
# 采用opencv内置API(resize), interpolation= 插值类型 image = cv2.resize(image, dsize=self.size, interpolation=cv2.INTER_LINEAR) if label is not None: label = cv2.resize(label, dsize=self.size, interpolation=cv2.INTER_NEAREST) return image, label
resize = Resize(600) r_img = resize(img) r_img[0].shape >>>(600,600,3)
1.4 随机裁剪(crop)
class RandomCrop(object): def __init__(self, size): assert type(size) in [int, tuple], "CHECK SIZE TYPE!" if isinstance(size, int): self.size = (size, size) else: self.size = size def __call__(self, image, label=None): h, w = image.shape[:2] try: h_start = np.random.randint(0, h - self.size[0] + 1) w_start = np.random.randint(0, w - self.size[1] + 1) h_end, w_end = h_start + self.size[0], w_start + self.size[1] image = image[h_start:h_end, w_start:w_end, :] if label is not None: label = label[h_start:h_end, w_start:w_end] except Exception as e: print('CROP OUT OF IMAGE, RETURN ORIGIN IMAGE!') return image, label
1.5 水平或竖直翻转
class RandomHoriFlip(): def __init__(self, prob=0.5): self.prob = prob def __call__(self, image, label=None): rd = np.random.rand() if rd < self.prob: # 水平翻转 image = image[:, ::-1, :] # 竖直翻转 # image = image[::-1, :, :] if label is not None: # 水平翻转 label = label[:,::-1] # 竖直翻转 # label = label[::-1,:] return image, label
1.6 旋转图像
class RandomRotate(): def __init__(self, degree): self.degree = degree def __call__(self, image, label=None): h, w, c = image.shape center_x = w // 2 center_y = h // 2 center = (center_x, center_y) M= cv2.getRotationMatrix2D(center, -self.degree, 1.) image = cv2.warpAffine(image, M, (w, h)) if label is not None: label = cv2.warpAffine(label, M, (w, h)) return image, label
rr = RandomRotate(45) rr_img = rr(img) cv2.namedWindow("imageShowTest") cv2.imshow("imageShowTest", rr_img[0]) cv2.waitKey(0) cv2.destroyAllWindows()
class Compose(): def __init__(self, transforms): self.transforms = transforms def __call__(self,image, label=None): for t in self.transforms: image, label = t(image, label) return image, label
compose = Compose([Resize(600), RandomRotate(45)])
c_img = compose(img)
class TrainAugumentation(): def __init__(self, image_size, rotation_degree, mean_val, std_val): self.image_size = image_size self.rotation_degree = rotation_degree self.mean_val = mean_val self.std_val = std_val self.transforms = Compose([Resize(image_size), RandomRotate(rotation_degree), Normalize(mean_val, std_val)]) def __call__(self, image, label=None): return self.augment(image, label) def augment(self, image, label): image, label = self.transforms(image, label) return image, label
Compose() #封装各种图像变换
注意 ToTensor() 是将图片像素值转化成[0-1],然后转化为Tensor
def __getitem__(self, index): ''' index 自动+1 ''' img = Image.open(os.path.join(self.data_dir, self.names[index])).convert('RGB') # print("图像数据已输入") target = int(self.labels[index]) camid = self.cams[index] if self.train_data_transform != None: img = self.train_data_transform(img) return img, target, camid
tensorflow2 在tensorflow.kereas.preprocessing.image 封装了各种图像处理的API,在模型训练时都会使用图片生成器ImageDataGenerator。
keras.preprocessing.image.ImageDataGenerator(featurewise_center=False, samplewise_center=False, featurewise_std_normalization=False, samplewise_std_normalization=False, zca_whitening=False, zca_epsilon=1e-06, rotation_range=0, width_shift_range=0.0, height_shift_range=0.0, brightness_range=None, shear_range=0.0, zoom_range=0.0, channel_shift_range=0.0, fill_mode='nearest', cval=0.0, horizontal_flip=False, vertical_flip=False, rescale=None, preprocessing_function=None, data_format=None, validation_split=0.0, dtype=None)
featurewise_center: 将输入数据的均值设置为0,逐特征(按通道)进行
from tensorflow.keras.preprocessing.image import ImageDataGenerator train_datagen = ImageDataGenerator(rotation_range=45, width_shift_range=0.2, height_shift_range=0.2, shear_range=0.2, zoom_range=0.25, horizontal_flip=True, fill_mode='nearest')
hist = model.fit(train_datagen.flow(Xtr, ytr, batch_size=BATCH_SIZE),steps_per_epoch=train_idx.sum()//BATCH_SIZE, epochs=EPOCHS, validation_data=test_datagen.flow(Xv, yv, batch_size=BATCH_SIZE), validation_steps=valid_idx.sum()//BATCH_SIZE, verbose=2)
在调用flow()d方法时,会成成一个生成器,按batch_size的大小来输出图片数据,输出时会自动调用random_transform 和standardize方法进行数据变换。
batch_x = np.zeros(tuple([current_batch_size] + list(self.x.shape)[1:]), dtype=K.floatx()) for i, j in enumerate(index_array): x = self.x[j] x = self.image_data_generator.random_transform(x.astype(K.floatx())) x = self.image_data_generator.standardize(x) batch_x[i] = x
def random_transform(self, x, seed=None): """Randomly augment a single image tensor. # Arguments x: 3D tensor, single image. seed: random seed. # Returns A randomly transformed version of the input (same shape). """ # x is a single image, so it doesn't have image number at index 0 img_row_axis = self.row_axis - 1 #0 img_col_axis = self.col_axis - 1 #1 img_channel_axis = self.channel_axis - 1 #2 if seed is not None: np.random.seed(seed) # use composition of homographies # to generate final transform that needs to be applied if self.rotation_range: theta = np.pi / 180 * np.random.uniform(-self.rotation_range, self.rotation_range) else: theta = 0 if self.height_shift_range: tx = np.random.uniform(-self.height_shift_range, self.height_shift_range) * x.shape[img_row_axis] else: tx = 0 if self.width_shift_range: ty = np.random.uniform(-self.width_shift_range, self.width_shift_range) * x.shape[img_col_axis] else: ty = 0 if self.shear_range: shear = np.random.uniform(-self.shear_range, self.shear_range) else: shear = 0 if self.zoom_range[0] == 1 and self.zoom_range[1] == 1: zx, zy = 1, 1 else: zx, zy = np.random.uniform(self.zoom_range[0], self.zoom_range[1], 2) transform_matrix = None
if theta != 0:
# 确定旋转矩阵 rotation_matrix = np.array([[np.cos(theta), -np.sin(theta), 0], [np.sin(theta), np.cos(theta), 0], [0, 0, 1]]) transform_matrix = rotation_matrix if tx != 0 or ty != 0:
# 确定平移矩阵 shift_matrix = np.array([[1, 0, tx], [0, 1, ty], [0, 0, 1]]) transform_matrix = shift_matrix if transform_matrix is None else np.dot(transform_matrix, shift_matrix) if shear != 0:
# 确定剪切矩阵 shear_matrix = np.array([[1, -np.sin(shear), 0], [0, np.cos(shear), 0], [0, 0, 1]]) transform_matrix = shear_matrix if transform_matrix is None else np.dot(transform_matrix, shear_matrix) if zx != 1 or zy != 1:
# 确定缩放矩阵 zoom_matrix = np.array([[zx, 0, 0], [0, zy, 0], [0, 0, 1]]) transform_matrix = zoom_matrix if transform_matrix is None else np.dot(transform_matrix, zoom_matrix) if transform_matrix is not None: h, w = x.shape[img_row_axis], x.shape[img_col_axis]
# 沿中心点平移 transform_matrix = transform_matrix_offset_center(transform_matrix, h, w)
# 应用上述变换,实现图像数据增强 x = apply_transform(x, transform_matrix, img_channel_axis, fill_mode=self.fill_mode, cval=self.cval) # 通道偏移 if self.channel_shift_range != 0: x = random_channel_shift(x, self.channel_shift_range, img_channel_axis)
# 水平翻转 if self.horizontal_flip: if np.random.random() < 0.5: x = flip_axis(x, img_col_axis) # 垂直翻转 if self.vertical_flip: if np.random.random() < 0.5: x = flip_axis(x, img_row_axis) return x
def transform_matrix_offset_center(matrix, x, y): o_x = float(x) / 2 + 0.5 o_y = float(y) / 2 + 0.5 offset_matrix = np.array([[1, 0, o_x], [0, 1, o_y], [0, 0, 1]]) reset_matrix = np.array([[1, 0, -o_x], [0, 1, -o_y], [0, 0, 1]]) transform_matrix = np.dot(np.dot(offset_matrix, matrix), reset_matrix) return transform_matrix def apply_transform(x, transform_matrix, channel_axis=0, fill_mode='nearest', cval=0.): """Apply the image transformation specified by a matrix. # Returns The transformed version of the input. """ x = np.rollaxis(x, channel_axis, 0) final_affine_matrix = transform_matrix[:2, :2] final_offset = transform_matrix[:2, 2]
# ndi :scipy.ndimage
channel_images = [ndi.interpolation.affine_transform( x_channel, final_affine_matrix, final_offset, order=0, mode=fill_mode, cval=cval) for x_channel in x] x = np.stack(channel_images, axis=0) x = np.rollaxis(x, 0, channel_axis + 1) return x