CPU指令集——bayer抽取r、g、b三通道

需求：在高帧率场景下，一般拿到的是bayer格式数据。图像处理时，一般会先插值成rgb，再拆分为单通道。如果可以直接bayer中抽出r、g、b，那效率将大大提升。

注意：抽取后r、g、b尺寸是原来的一半，没有做插值（插值只会让数据量变大，并没有引入有效信息）

效果：CPU指令集优化后，速度是传统算法的8倍左右。

应用举例：

#include<opencv.hpp>
#include <Windows.h>
int main()
{
    cv::Mat img_bayerRG = cv::imread("1.bmp", 0);    //单通道图像读取（1.bmp是bayerRG格式存储的单通道图像）
    const uint8_t *bayer = img_bayerRG.data;        //指向bayerRG数据
    int height = img_bayerRG.rows;
    int width = img_bayerRG.cols;
    uint8_t *r = new uint8_t[width*height / 4];    //抽完后尺寸为原来的1/2
    uint8_t *g = new uint8_t[width*height / 4];    //g做特殊处理，2个g的均值合成1个g
    uint8_t *b = new uint8_t[width*height / 4];

    LARGE_INTEGER nEndTime, nBeginTime, nFreq;
    double time;
    QueryPerformanceFrequency(&nFreq);
    QueryPerformanceCounter(&nBeginTime);//获取开始时刻计数值

    for (int i = 0; i < 100; i++)
    {
        if (0)
        {
            bayer2rgb_CPU(bayer, width, height, BayerFormat::bayerRG, r, g, b);
        }
        else
        {
            const uint8_t *src[2];
            size_t index = 0;

            for (int i = 0; i < height; i += 2)
            {
                //每两行中的，第一行和第二行
                src[0] = bayer + width*i;
                src[1] = src[0] + width;

                for (int j = 0; j < width; j += 2)
                {
                    *(r + index) = src[0][j];
                    *(b + index) = src[1][j + 1];
                    *(g + index) = (src[0][j + 1] + src[1][j]) / 2;
                    index++;
                }
            }
        }
    }

    QueryPerformanceCounter(&nEndTime);//获取开始时刻计数值
    time = (double)(nEndTime.QuadPart - nBeginTime.QuadPart) * 1000 / (double)nFreq.QuadPart;//ms（开始-停止）/频率即为秒数，精确到小数点后6位
    printf("100次bayer2rgb耗时（ms）：    %f \n\n", time);

    cv::Mat R = cv::Mat(height / 2, width / 2, CV_8U, r);
    cv::Mat B = cv::Mat(height / 2, width / 2, CV_8U, b);
    cv::Mat G = cv::Mat(height / 2, width / 2, CV_8U, g);

    cv::waitKey(100000);

    delete[] r;
    delete[] g;
    delete[] b;

    return 0;
}

函数封装：

#include <intrin.h>

enum BayerFormat
{
    bayerRG,
    bayerGR,
    bayerBG,
    bayerGB
};

//使用要求，assert((nWidth % 32 == 0) && (nHeight % 2) == 0);
void bayer2rgb_CPU(const unsigned char* pBayer, int nWidth,int nHeight,int nBayerFormat, unsigned char* pR, unsigned char* pG, unsigned char* pB)
{
    __m256i shuffle_oe = _mm256_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15, 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15);

    for (int row2 = 0; row2 < nHeight / 2; row2++)
    {
        for (int col32 = 0; col32 < nWidth / 32; col32++)
        {
            __m256i line1 = _mm256_load_si256((__m256i*)(pBayer + nWidth*row2 * 2) + col32);
            __m256i line2 = _mm256_load_si256((__m256i*)(pBayer + nWidth*(row2 * 2 + 1)) + col32);

            __m256i line1_128oe = _mm256_shuffle_epi8(line1, shuffle_oe);    //前16字节与后16字节是分开处理的，得到：前16字节的奇数位元素A、前16字节的偶数位元素B、后16字节的奇数位元素C、后16字节的偶数位元素D
            __m256i line2_128oe = _mm256_shuffle_epi8(line2, shuffle_oe);
            __m256i line1_oe = _mm256_permute4x64_epi64(line1_128oe, 0b11011000);//将ABCD重排，得到ACBD，即32字节里所有奇数位元素E、所有偶数位元素F
            __m256i line2_oe = _mm256_permute4x64_epi64(line2_128oe, 0b11011000);

            __m128i line11 = _mm256_extracti128_si256(line1_oe, 0);            //得到EF中的E
            __m128i line12 = _mm256_extracti128_si256(line1_oe, 1);            //得到EF中的F
            __m128i line21 = _mm256_extracti128_si256(line2_oe, 0);
            __m128i line22 = _mm256_extracti128_si256(line2_oe, 1);
            switch (nBayerFormat)
            {
            case bayerRG:
                _mm_storeu_si128((__m128i*)pR + nWidth / 32 * row2 + col32, line11);
                _mm_storeu_si128((__m128i*)pB + nWidth / 32 * row2 + col32, line22);
                _mm_storeu_si128((__m128i*)pG + nWidth / 32 * row2 + col32, _mm_avg_epu8(line12, line21));//对g通道求均值
                break;
            case bayerGR:
                _mm_storeu_si128((__m128i*)pR + nWidth / 32 * row2 + col32, line12);
                _mm_storeu_si128((__m128i*)pB + nWidth / 32 * row2 + col32, line21);
                _mm_storeu_si128((__m128i*)pG + nWidth / 32 * row2 + col32, _mm_avg_epu8(line11, line22));//对g通道求均值
                break;
            case bayerBG:
                _mm_storeu_si128((__m128i*)pR + nWidth / 32 * row2 + col32, line22);
                _mm_storeu_si128((__m128i*)pB + nWidth / 32 * row2 + col32, line11);
                _mm_storeu_si128((__m128i*)pG + nWidth / 32 * row2 + col32, _mm_avg_epu8(line12, line21));//对g通道求均值
                break;
            case bayerGB:
                _mm_storeu_si128((__m128i*)pR + nWidth / 32 * row2 + col32, line21);
                _mm_storeu_si128((__m128i*)pB + nWidth / 32 * row2 + col32, line12);
                _mm_storeu_si128((__m128i*)pG + nWidth / 32 * row2 + col32, _mm_avg_epu8(line11, line22));//对g通道求均值
                break;
            default:
                break;
            }
        }
    }
}

posted @ 2024-06-18 14:18 夕西行阅读(61) 评论(0) 编辑收藏举报

刷新页面返回顶部

夕西行

当我走过这些日子时，我从未在意过。然而，当我如今提起笔，要把这些往事写下来时，才发现‘人生如戏，戏如人生’。

CPU指令集——bayer抽取r、g、b三通道

公告