1. 目标:使用 NEON intrinsic 函数,对512*512 png 四通道图像顺时针旋转90度。
思路: 像素分块,对块内转置;再水平镜像。图像库使用 stb img
2. 代码
| #include <stdio.h> |
| #include <arm_neon.h> |
| |
| #include <stdlib.h> |
| #define STB_IMAGE_IMPLEMENTATION |
| #include "./stb/stb_image.h" |
| #define STB_IMAGE_WRITE_IMPLEMENTATION |
| #include "./stb/stb_image_write.h" |
| |
| |
| |
| int main() |
| { |
| |
| int w,h,c; |
| #ifdef DEBUG |
| w=h=8;c=4; |
| uint8_t* src = (uint8_t*)calloc(w*h*c,1); |
| for(int i=0;i<h;i++) |
| { |
| for(int j=0;j<h*c;j++) |
| src[i*h*c+j] = j; |
| } |
| for(int i=0;i<h;i++) |
| { |
| for(int j=0;j<w*c;j+=4) |
| printf("%u%u%u%u ",*(dst+i*h*c+j),*(dst+i*h*c+j+1),*(dst+i*h*c+j+2),*(dst+i*h*c+j+3)); |
| printf("\n"); |
| } |
| printf("======\n"); |
| #else |
| uint8_t *src = stbi_load("./pic.png",&w,&h,&c,0); |
| if(!src) |
| { |
| printf("load img failed.\n"); |
| return 0; |
| } |
| else |
| printf("int w %d h %d c %d\n",w,h,c); |
| |
| #endif |
| |
| uint8_t *dst = (uint8_t*)calloc(w*h*c,sizeof(uint8_t)); |
| int blockSize = 4; |
| |
| for(int i=0;i<h;i+=blockSize) |
| { |
| for(int j=0;j<w;j+=blockSize) |
| { |
| uint32x4x4_t block = {0}; |
| uint32x4x2_t blockTemp = {0}; |
| |
| |
| for(int m=0;m<blockSize;m++) |
| block.val[m] = vreinterpretq_u32_u8(vld1q_u8(src+((i+m)*w+j)*c)); |
| |
| blockTemp = vtrnq_u32(block.val[0],block.val[1]); |
| block.val[0] = blockTemp.val[0]; |
| block.val[1] = blockTemp.val[1]; |
| blockTemp = vtrnq_u32(block.val[2],block.val[3]); |
| block.val[2] = blockTemp.val[0]; |
| block.val[3] = blockTemp.val[1]; |
| |
| blockTemp.val[0] = vreinterpretq_u32_u64(vtrn1q_u64(vreinterpretq_u64_u32(block.val[0]),vreinterpretq_u64_u32(block.val[2]))); |
| blockTemp.val[1] = vreinterpretq_u32_u64(vtrn2q_u64(vreinterpretq_u64_u32(block.val[0]),vreinterpretq_u64_u32(block.val[2]))); |
| block.val[0] = blockTemp.val[0]; |
| block.val[2] = blockTemp.val[1]; |
| |
| blockTemp.val[0] = vreinterpretq_u32_u64(vtrn1q_u64(vreinterpretq_u64_u32(block.val[1]),vreinterpretq_u64_u32(block.val[3]))); |
| blockTemp.val[1] = vreinterpretq_u32_u64(vtrn2q_u64(vreinterpretq_u64_u32(block.val[1]),vreinterpretq_u64_u32(block.val[3]))); |
| block.val[1] = blockTemp.val[0]; |
| block.val[3] = blockTemp.val[1]; |
| |
| for(int m=0;m<blockSize;m++) |
| { |
| block.val[m] = vrev64q_u32(block.val[m]); |
| block.val[m] = vcombine_u32(vget_high_u32(block.val[m]),vget_low_u32(block.val[m])); |
| |
| vst1q_u8(dst+((j+m)*h+(h-i-blockSize))*c,vreinterpretq_u8_u32(block.val[m])); |
| } |
| } |
| } |
| #ifdef DEBUG |
| for(int i=0;i<w;i++) |
| { |
| for(int j=0;j<h*c;j+=4) |
| printf("%u%u%u%u ",*(dst+i*h*c+j),*(dst+i*h*c+j+1),*(dst+i*h*c+j+2),*(dst+i*h*c+j+3)); |
| printf("\n"); |
| } |
| free(src); |
| #else |
| stbi_write_png("pic1.png",h,w,c,dst,h*c); |
| stbi_image_free(src); |
| #endif |
| free(dst); |
| return 0; |
| |
| } |
3. 测试结果
原图

旋转后图像

【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】凌霞软件回馈社区,博客园 & 1Panel & Halo 联合会员上线
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】博客园社区专享云产品让利特惠,阿里云新客6.5折上折
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 微软正式发布.NET 10 Preview 1:开启下一代开发框架新篇章
· 没有源码,如何修改代码逻辑?
· DeepSeek R1 简明指南:架构、训练、本地部署及硬件要求
· NetPad:一个.NET开源、跨平台的C#编辑器
· PowerShell开发游戏 · 打蜜蜂