pytorch图像处理的问题

 

有段时间读取数据的时候都是用下面的命令:

img = cv2.imread(os.path.join(path, file, pic))
img = Image.fromarray(img.astype(np.uint8))

突然有一天突然反应过来为什么要搞这么麻烦,直接使用:

img = Image.open(os.path.join(path, file, pic))

就好了呀

要在pytorch中使用transform等操作就是要用Image.open读入数据才行,这也是为什么上面还要用Image.fromarray转换的原因

 

补充一点知识:

  • 使用 scipy.misc.imread 读取的图片数据是 RGB 格式;
  • 使用 cv2.imread 读取的图片数据是 BGR 格式;
  • 使用 PIL.Image.open 读取的图片数据是RGB格式;

 

所以做小波变换的时候就出了问题:

#coding:utf-8
import torch.nn as nn
import torch

def dwt_init(x):

    x01 = x[:, :, 0::2, :] / 2
    x02 = x[:, :, 1::2, :] / 2
    x1 = x01[:, :, :, 0::2]
    x2 = x02[:, :, :, 0::2]
    x3 = x01[:, :, :, 1::2]
    x4 = x02[:, :, :, 1::2]
    x_LL = x1 + x2 + x3 + x4
    x_HL = -x1 - x2 + x3 + x4
    x_LH = -x1 + x2 - x3 + x4
    x_HH = x1 - x2 - x3 + x4

    return torch.cat((x_LL, x_HL, x_LH, x_HH), 1)


# 使用哈尔 haar 小波变换来实现二维逆向离散小波
def iwt_init(x):
    r = 2
    in_batch, in_channel, in_height, in_width = x.size()
    #print([in_batch, in_channel, in_height, in_width])
    out_batch, out_channel, out_height, out_width = in_batch, int(
        in_channel / (r**2)), r * in_height, r * in_width
    print(out_batch, out_channel, out_height, out_width)
    x1 = x[:, 0:out_channel, :, :] / 2
    x2 = x[:, out_channel:out_channel * 2, :, :] / 2
    x3 = x[:, out_channel * 2:out_channel * 3, :, :] / 2
    x4 = x[:, out_channel * 3:out_channel * 4, :, :] / 2
    print(x1.shape)
    print(x2.shape)
    print(x3.shape)
    print(x4.shape)
    # h = torch.zeros([out_batch, out_channel, out_height, out_width]).float().cuda()
    h = torch.zeros([out_batch, out_channel, out_height, out_width]).float()

    h[:, :, 0::2, 0::2] = x1 - x2 - x3 + x4
    h[:, :, 1::2, 0::2] = x1 - x2 + x3 - x4
    h[:, :, 0::2, 1::2] = x1 + x2 - x3 - x4
    h[:, :, 1::2, 1::2] = x1 + x2 + x3 + x4

    return h


# 二维离散小波
class DWT(nn.Module):
    def __init__(self):
        super(DWT, self).__init__()
        self.requires_grad = False  # 信号处理,非卷积运算,不需要进行梯度求导

    def forward(self, x):
        return dwt_init(x)


# 逆向二维离散小波
class IWT(nn.Module):
    def __init__(self):
        super(IWT, self).__init__()
        self.requires_grad = False

    def forward(self, x):
        return iwt_init(x)

if __name__ == '__main__':
    import os, cv2, torchvision
    from PIL import Image
    import numpy as np
    from torchvision import transforms as trans
    # img = cv2.imread('./1.jpg')
    # print(img.shape)
    # img = Image.fromarray(img.astype(np.uint8))
    img = Image.open('./1.jpg')
    transform = trans.Compose([
        trans.ToTensor()
    ])
    img = transform(img).unsqueeze(0)
    dwt = DWT()
    change_img_tensor = dwt(img)
    print(change_img_tensor.shape)

    for i in range(change_img_tensor.size(1)//3):
        print(i*3,i*3+3)
        torchvision.utils.save_image(change_img_tensor[:,i*3:i*3+3:,:], os.path.join('./', 'change_{}.jpg'.format(i)))

只用第一种方法得到的数据为:

 

 

用第二种方法得到的数据是:

 这是因为第一种方法读进来的是BGR格式,但是实际要的是下面这种RGB格式,正常情况下存储也是RGB格式

 

 

为什么深度学习中普遍用BRG描述图像,而非RGB通道?

 因为caffe,作为最早最流行的一批库的代表,用了opencv,而opencv默认通道是bgr的。这是opencv的入门大坑之一,bgr是个历史遗留问题,为了兼容早年的某些硬件。其实你自己训练完全可以用rgb,新库也基本没了bgr还是rgb这个问题,就是切换下顺序。但如果你要用一些老的训练好的模型,就得兼容老模型的bgr。可以进行一下外部转换



比如我之前有处理过一个数据:

from pathlib import Path
from config import get_config
import argparse

from PIL import Image
from tqdm import tqdm
import mxnet as mx
import cv2
import numpy as np

def load_mx_rec(rec_path):
    save_path = rec_path/'imgs'
    if not save_path.exists():
        save_path.mkdir()
    imgrec = mx.recordio.MXIndexedRecordIO(str(rec_path/'train.idx'), str(rec_path/'train.rec'), 'r')
    img_info = imgrec.read_idx(0)
    header,_ = mx.recordio.unpack(img_info)
    max_idx = int(header.label[0])
    for idx in tqdm(range(1,max_idx)):
        img_info = imgrec.read_idx(idx)
        header, img = mx.recordio.unpack_img(img_info)
        label = int(header.label)
        #add
        # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        # img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
        img = Image.fromarray(img)
        label_path = save_path/str(label)
        if not label_path.exists():
            label_path.mkdir()
        img.save(label_path/'{}.jpg'.format(idx), quality=95)

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='for face verification')
    parser.add_argument("-r", "--rec_path", help="mxnet record file path",default='faces_emore', type=str)
    args = parser.parse_args()
    conf = get_config()
    rec_path = conf.data_path/args.rec_path
    load_mx_rec(rec_path) #训练数据在train.idx和train.rec

如果这里我没有添加:

img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

那么得到的图片颜色就很奇怪:

 

如果我添加了这一个处理后,得到的图像就正常了:

 

之前有错误写成:

img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

但是发现效果是一样的

 

posted @ 2020-03-19 15:47  慢行厚积  阅读(1984)  评论(0编辑  收藏  举报