MCODEC - 感恩的心,感谢生命中的每一个人

(本站所有文章都是原创,转载请注明出处)
  首页  :: 联系 :: 订阅 订阅  :: 管理

tm1500 fdct

Posted on 2009-10-23 19:17  mcodec  阅读(335)  评论(0编辑  收藏  举报

 

应用于 Philips Trimedia 1500 系列 8x8 快速离散余弦变换

 

#define C0 0xa73b4b42
#define C1 0x11a8cdb7
#define C2 0xcdb7a73b
#define C3 0x4b42ee58
#define C4 0x4b4211a8
#define C5 0x3249a73b
#define C6 0x11a83249
#define C7 0x58c54b42

#define C8 0x40004000
#define C9 0x4000c000
#define C10 0x539e22a3
#define C11 0x22a3ac62

#define HROUND(x) (((x) + (x)) + 0x8000)
#define VROUND(x) ((x) + 0x8000)

#define PACK16_MSB(a, b)       PACK16MSB(b, a)
#define READ_NORM(a)  ROLI(16, (a))
#define READ_REV(a)  (a)

#define horiz_dct(tab, o0, o1, o2, o3, o4, o5, o6, o7)  \
 tmp0 =  READ_NORM((tab)[0]);    \
 tmp1 =  READ_REV ((tab)[1]);    \
 tmp2 =  READ_NORM((tab)[2]);    \
 tmp3 =  READ_REV ((tab)[3]);    \
         \
 tmp101 = DSPIDUALADD(tmp0,tmp3);   \
 tmp132 = DSPIDUALADD(tmp1,tmp2);   \
 tmp176 = DSPIDUALSUB(tmp0,tmp3);   \
 tmp145 = DSPIDUALSUB(tmp1,tmp2);   \
        \
 tmp201 = DSPIDUALADD(tmp101,tmp132);   \
 tmp232 = DSPIDUALSUB(tmp101,tmp132);   \
        \
 o0 = IFIR16(tmp201,C8);     \
 o4 = IFIR16(tmp201,C9);     \
 o2 = IFIR16(tmp232,C10);    \
        \
 o6 = IFIR16(tmp232,C11);    \
 o7 = IFIR16(tmp145,C0) + IFIR16(tmp176,C1);  \
 o3 = IFIR16(tmp145,C2) + IFIR16(tmp176,C3);  \
 o5 = IFIR16(tmp145,C4) + IFIR16(tmp176,C5);  \
 o1 = IFIR16(tmp145,C6) + IFIR16(tmp176,C7);

#define packltor(o0, o1, o2, o3, o4, o5, o6, o7)  \
 o0 = PACK16MSB(HROUND(s0), HROUND(t0));  \
 o1 = PACK16MSB(HROUND(s1), HROUND(t1));  \
 o2 = PACK16MSB(HROUND(s2), HROUND(t2));  \
 o3 = PACK16MSB(HROUND(s3), HROUND(t3));  \
 o4 = PACK16MSB(HROUND(s4), HROUND(t4));  \
 o5 = PACK16MSB(HROUND(s5), HROUND(t5));  \
 o6 = PACK16MSB(HROUND(s6), HROUND(t6));  \
 o7 = PACK16MSB(HROUND(s7), HROUND(t7));

#define packrtol(o0, o1, o2, o3, o4, o5, o6, o7)  \
 o0 = PACK16MSB(HROUND(t0), HROUND(s0));  \
 o1 = PACK16MSB(HROUND(t1), HROUND(s1));  \
 o2 = PACK16MSB(HROUND(t2), HROUND(s2));  \
 o3 = PACK16MSB(HROUND(t3), HROUND(s3));  \
 o4 = PACK16MSB(HROUND(t4), HROUND(s4));  \
 o5 = PACK16MSB(HROUND(t5), HROUND(s5));  \
 o6 = PACK16MSB(HROUND(t6), HROUND(s6));  \
 o7 = PACK16MSB(HROUND(t7), HROUND(s7));

#define vertical_dct(tmp0, tmp1, tmp2, tmp3, o0, o1, o2, o3, o4, o5, o6, o7)  \
        \
 tmp101 = DSPIDUALADD(tmp0,tmp3);   \
 tmp132 = DSPIDUALADD(tmp1,tmp2);   \
 tmp176 = DSPIDUALSUB(tmp0,tmp3);   \
 tmp145 = DSPIDUALSUB(tmp1,tmp2);   \
       \
 tmp201 = DSPIDUALADD(tmp101,tmp132);   \
 tmp232 = DSPIDUALSUB(tmp101,tmp132);   \
       \
 o0 = IFIR16(tmp201,C8);    \
 o4 = IFIR16(tmp201,C9);    \
 o2 = IFIR16(tmp232,C10);    \
 o6 = IFIR16(tmp232,C11);    \
       \
 o7 = IFIR16(tmp145,C0) + IFIR16(tmp176,C1);  \
 o3 = IFIR16(tmp145,C2) + IFIR16(tmp176,C3);  \
 o5 = IFIR16(tmp145,C4) + IFIR16(tmp176,C5);  \
 o1 = IFIR16(tmp145,C6) + IFIR16(tmp176,C7);

#define vertical_flush(res)     \
 (res)[0] =  PACK16_MSB(VROUND(s0), VROUND(t0));  \
 (res)[4] =  PACK16_MSB(VROUND(s1), VROUND(t1));  \
 (res)[8]  = PACK16_MSB(VROUND(s2), VROUND(t2));  \
 (res)[12] = PACK16_MSB(VROUND(s3), VROUND(t3));  \
 (res)[16] = PACK16_MSB(VROUND(s4), VROUND(t4));  \
 (res)[20] = PACK16_MSB(VROUND(s5), VROUND(t5));  \
 (res)[24] = PACK16_MSB(VROUND(s6), VROUND(t6));  \
 (res)[28] = PACK16_MSB(VROUND(s7), VROUND(t7));

void fdct(short * restrict block)
{
#pragma TCS_no_caller_save

 int tmp0, tmp1, tmp2, tmp3, tmp101, tmp132, tmp176, tmp145, tmp201, tmp232;

 int s0, s1, s2, s3, s4, s5, s6, s7;
 int t0, t1, t2, t3, t4, t5, t6, t7;

 int  r00, r02, r04, r06, r10, r12, r14, r16;
 int  r20, r22, r24, r26, r30, r32, r34, r36;
 int  r40, r42, r44, r46, r50, r52, r54, r56;
 int  r60, r62, r64, r66, r70, r72, r74, r76;

 long *restrict tab = (long* )block;

 horiz_dct(&tab[0], s0, s1, s2, s3, s4, s5, s6, s7);
 horiz_dct(&tab[4], t0, t1, t2, t3, t4, t5, t6, t7);
 packltor(r00, r02, r04, r06, r10, r12, r14, r16);

 horiz_dct(&tab[8],  s0, s1, s2, s3, s4, s5, s6, s7);
 horiz_dct(&tab[12], t0, t1, t2, t3, t4, t5, t6, t7);
 packrtol(r20, r22, r24, r26, r30, r32, r34, r36);

 horiz_dct(&tab[16], s0, s1, s2, s3, s4, s5, s6, s7);
 horiz_dct(&tab[20], t0, t1, t2, t3, t4, t5, t6, t7);
 packltor(r40, r42, r44, r46, r50, r52, r54, r56);

 horiz_dct(&tab[24], s0, s1, s2, s3, s4, s5, s6, s7);
 horiz_dct(&tab[28], t0, t1, t2, t3, t4, t5, t6, t7);
 packrtol(r60, r62, r64, r66, r70, r72, r74, r76);

 vertical_dct(r00, r20, r40, r60, s0, s1, s2, s3, s4, s5, s6, s7);
 vertical_dct(r02, r22, r42, r62, t0, t1, t2, t3, t4, t5, t6, t7);
 vertical_flush(&tab[0]);

 vertical_dct(r04, r24, r44, r64, s0, s1, s2, s3, s4, s5, s6, s7);
 vertical_dct(r06, r26, r46, r66, t0, t1, t2, t3, t4, t5, t6, t7);
 vertical_flush(&tab[1]);

 vertical_dct(r10, r30, r50, r70, s0, s1, s2, s3, s4, s5, s6, s7);
 vertical_dct(r12, r32, r52, r72, t0, t1, t2, t3, t4, t5, t6, t7);
 vertical_flush(&tab[2]);

 vertical_dct(r14, r34, r54, r74, s0, s1, s2, s3, s4, s5, s6, s7);
 vertical_dct(r16, r36, r56, r76, t0, t1, t2, t3, t4, t5, t6, t7);
 vertical_flush(&tab[3]);
}