1. 这是一个对图像通道 bgra 转换为 rgb的示例程序。转换方式有普通写法、openmp-simd编译指导语句、neon intrinsic函数三种实现方式
2. 源码
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <iostream> |
| #include <arm_neon.h> |
| #include <omp.h> |
| using namespace std; |
| |
| void bgra2rgb(const uint8_t *src, uint8_t *dst, int w, int h) |
| { |
| for (int i = 0; i < h; ++i) |
| { |
| for (int j = 0; j < w; j++) |
| { |
| dst[(i * w + j) * 3] = src[(i * w + j) * 4 + 2]; |
| dst[(i * w + j) * 3 + 1] = src[(i * w + j) * 4 + 1]; |
| dst[(i * w + j) * 3 + 2] = src[(i * w + j) * 4]; |
| } |
| } |
| } |
| |
| |
| |
| void bgra2rgb_omp(uint8_t *src, uint8_t *dst, int w, int h) |
| { |
| for (int i = 0; i < h; ++i) |
| { |
| #pragma omp simd |
| for (int j = 0; j < w; j++) |
| { |
| dst[(i * w + j) * 3] = src[(i * w + j) * 4 + 2]; |
| dst[(i * w + j) * 3 + 1] = src[(i * w + j) * 4 + 1]; |
| dst[(i * w + j) * 3 + 2] = src[(i * w + j) * 4]; |
| } |
| } |
| } |
| |
| void bgra2rgb_neon(const uint8_t *src, uint8_t *dst, int w, int h) |
| { |
| uint8x16x4_t vec1 = {0}; |
| uint8x16x3_t vec2 = {0}; |
| for (int i = 0; i < h; ++i) |
| { |
| |
| for (int j = 0; j < w; j+=16) |
| { |
| |
| vec1 = vld4q_u8(&src[(i*w+j)*4]); |
| vec2.val[0] = vec1.val[2]; |
| vec2.val[1] = vec1.val[1]; |
| vec2.val[2] = vec1.val[0]; |
| vst3q_u8(&dst[(i*w+j)*3],vec2); |
| } |
| } |
| } |
| |
| int main(int argc,char** argv) |
| { |
| if(argc!=2) |
| { |
| printf("should parameter 0:original 1:omp simd 2:neon simd."); |
| return 0; |
| } |
| int mode = atoi(argv[1]); |
| int nloop = 100; |
| const int w = 480; |
| const int h = 640; |
| uint8_t bgra_mat[w * h * 4]; |
| uint8_t rgb_mat[w * h * 3]; |
| srand(100); |
| for (int i = 0; i < w * h * 4; i++) |
| { |
| bgra_mat[i] = rand() % 256; |
| } |
| |
| clock_t t = clock(); |
| switch (mode) |
| { |
| case 0: |
| for (int iloop = 0; iloop < nloop; iloop++) |
| bgra2rgb(bgra_mat, rgb_mat, w, h); |
| break; |
| case 1: |
| omp_set_num_threads(4); |
| for (int iloop = 0; iloop < nloop; iloop++) |
| bgra2rgb_omp(bgra_mat, rgb_mat, w, h); |
| break; |
| case 2: |
| for (int iloop = 0; iloop < nloop; iloop++) |
| bgra2rgb_neon(bgra_mat, rgb_mat, w, h); |
| break; |
| default: |
| break; |
| } |
| |
| t = clock() - t; |
| |
| cout << "bgra[4-6] data:" << (int)bgra_mat[4] << "," << (int)bgra_mat[5] << "," << (int)bgra_mat[6] << endl; |
| cout << "rgb[3-5] data:" << (int)rgb_mat[3] << "," << (int)rgb_mat[4] << "," << (int)rgb_mat[5] << endl; |
| cout << "cost time(clock):" << t / nloop << endl; |
| } |
3. 编译命令
| g++ brga2rgb.cpp -o brga2rgb -fopenmp -O1 |
4. 数据分析
| 源码未向量化耗时:490 时间单位 |
| openmp-simd 耗时:250 时间单位 |
| neon intrinsic 函数:210 时间单位 |
分析:使用手动向量化neon intrinsic 函数的效率最高。
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】凌霞软件回馈社区,博客园 & 1Panel & Halo 联合会员上线
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】博客园社区专享云产品让利特惠,阿里云新客6.5折上折
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 微软正式发布.NET 10 Preview 1:开启下一代开发框架新篇章
· 没有源码,如何修改代码逻辑?
· DeepSeek R1 简明指南:架构、训练、本地部署及硬件要求
· NetPad:一个.NET开源、跨平台的C#编辑器
· PowerShell开发游戏 · 打蜜蜂