MCODEC - 感恩的心,感谢生命中的每一个人

(本站所有文章都是原创,转载请注明出处)
  首页  :: 联系 :: 订阅 订阅  :: 管理

tm1500 idct

Posted on 2009-10-23 19:18  mcodec  阅读(379)  评论(0编辑  收藏  举报

 

应用于 Philips Trimedia 1500 系列SOC芯片上的8x8快速离散余弦反变换


#define MASK1 0x89be30fc
#define MASK2 0x30fc7642
#define MASK3 0x5a825a82
#define MASK4 0xa57e5a82

#define D0 0x6a6e18f9
#define D1 0x8276b8e3
#define D2 0x18f9471d
#define D3 0x6a6e8276
#define D4 0x82766a6e
#define D5 0xb8e3e707
#define D6 0x471d7d8a
#define D7 0x18f96a6e

#define C0 0x6a6e8276
#define C1 0xb8e318f9
#define C2 0x8276b8e3
#define C3 0xe7076a6e
#define C4 0x18f96a6e
#define C5 0x8276471d
#define C6 0x471d18f9
#define C7 0x6a6e7d8a
#define C8 0x5a825a82
#define C9 0xa57e5a82
#define C10 0x30fc7642
#define C11 0x89be30fc

#define SCALED_COEFFS  1 
#define EIGHT_BIT_SAMPLES 0 
#define PASS1_BITS   1+(EIGHT_BIT_SAMPLES)
#define CONST_BITS2  14
#define TMP_20_21_H_BIAS 0x8000 
#define H_ROUNDING  (32 << (16*!LITTLE_ENDIAN))


#define UPSCL 3 
#define DWNSCL  CONST_BITS2+PASS1_BITS+SCALED_COEFFS+3-16


#if defined(__LITTLE_ENDIAN__)
#define PACK16_MSB(a, b)       PACK16MSB(b, a)
#define PACK16_LSB(a, b)       PACK16LSB(b, a)
#else
#define PACK16_MSB(a, b)       PACK16MSB(a, b)
#define PACK16_LSB(a, b)       PACK16LSB(a, b)
#endif

custom_op long   dualasr(long a, unsigned long b);
custom_op long   dualiclipi(long a, unsigned long b);
#define pack16_msb(a, b) PACK16_MSB(a, b)


#define combinePred(dct0, dct1, dct2, dct3, pred1, pred2)      \
 pred1 = dualiclipi(dualasr(pack16_msb(dct0, dct1), DWNSCL), clip);  \
 pred2 = dualiclipi(dualasr(pack16_msb(dct2, dct3), DWNSCL), clip);  \
 

#define horiz_idct(data, offset, r0, r1, r2, r3, r4, r5, r6, r7, comp)   \
 \
 z0 = PACK16_LSB(data[offset+ 0] << UPSCL, data[offset+32] << UPSCL) ; \
 z2 = PACK16_LSB(data[offset+ 8] << UPSCL, data[offset+40] << UPSCL) ; \
 z5 = PACK16_LSB(data[offset+16] << UPSCL, data[offset+48] << UPSCL) ; \
 z3 = PACK16_LSB(data[offset+24] << UPSCL, data[offset+56] << UPSCL) ; \
 zz0 = PACK16_LSB(data[offset+ 4] << UPSCL,data[offset+36] << UPSCL) ; \
 zz2 = PACK16_LSB(data[offset+12] << UPSCL,data[offset+44] << UPSCL) ; \
 zz5 = PACK16_LSB(data[offset+20] << UPSCL,data[offset+52] << UPSCL) ; \
 zz3 = PACK16_LSB(data[offset+28] << UPSCL,data[offset+60] << UPSCL) ; \
 \
 tmp22 = IFIR16(z5, MASK1);      \
 tmp23 = IFIR16(z5, MASK2);      \
 tmp20 = IFIR16(z0, MASK3) + TMP_20_21_H_BIAS;   \
 tmp21 = IFIR16(z0, MASK4) + TMP_20_21_H_BIAS;   \
 \
 tmp10 = tmp20 + tmp23;       \
 tmp13 = tmp20 - tmp23;       \
 tmp11 = tmp21 + tmp22;       \
 tmp12 = tmp21 - tmp22;       \
 \
 tmp0 = IFIR16(z2, D0) + IFIR16(z3, D1);    \
 tmp1 = IFIR16(z2, D2) + IFIR16(z3, D3);    \
 tmp2 = IFIR16(z2, D4) + IFIR16(z3, D5);    \
 tmp3 = IFIR16(z2, D6) + IFIR16(z3, D7);    \
 \
 temp22 = IFIR16(zz5, MASK1);      \
 temp23 = IFIR16(zz5, MASK2);      \
 temp20 = IFIR16(zz0, MASK3) + TMP_20_21_H_BIAS;   \
 temp21 = IFIR16(zz0, MASK4) + TMP_20_21_H_BIAS;   \
 \
 temp10 = temp20 + temp23;      \
 temp13 = temp20 - temp23;      \
 temp11 = temp21 + temp22;      \
 temp12 = temp21 - temp22;      \
 \
 \
 temp0 = IFIR16(zz2, D0) + IFIR16(zz3, D1);    \
 temp1 = IFIR16(zz2, D2) + IFIR16(zz3, D3);    \
 temp2 = IFIR16(zz2, D4) + IFIR16(zz3, D5);    \
 temp3 = IFIR16(zz2, D6) + IFIR16(zz3, D7);    \
 \
 \
 r0 =  PACK16_MSB(tmp10 + tmp3, temp10 + temp3);   \
 r1 =  PACK16_MSB(tmp11 + tmp2, temp11 + temp2);   \
 r2 =  PACK16_MSB(tmp12 + tmp1, temp12 + temp1);   \
 r3 =  PACK16_MSB(tmp13 + tmp0, temp13 + temp0);   \
 r4 =  PACK16_MSB(tmp13 - tmp0, temp13 - temp0);   \
 r5 =  PACK16_MSB(tmp12 - tmp1, temp12 - temp1);   \
 r6 =  PACK16_MSB(tmp11 - tmp2, temp11 - temp2);   \
 r7 =  PACK16_MSB(tmp10 - tmp3, temp10 - temp3);   \


#define vertical_idct(r0, r1, r2, r3, dest1, dest2, dest3, dest4)        \
 \
 tmp22 = IFIR16(r2, MASK1);       \
 tmp23 = IFIR16(r2, MASK2);       \
 tmp20 = IFIR16(r0, MASK3);       \
 tmp21 = IFIR16(r0, MASK4);       \
 \
 tmp10 = tmp20 + tmp23;        \
 tmp13 = tmp20 - tmp23;        \
 tmp11 = tmp21 + tmp22;        \
 tmp12 = tmp21 - tmp22;        \
 \
 tmp0 = IFIR16(r1, D0) + IFIR16(r3, D1);     \
 tmp1 = IFIR16(r1, D2) + IFIR16(r3, D3);     \
 tmp2 = IFIR16(r1, D4) + IFIR16(r3, D5);     \
 tmp3 = IFIR16(r1, D6) + IFIR16(r3, D7);     \
 \
 combinePred(tmp10 + tmp3 + rd, tmp11 + tmp2 + rd, tmp12 + tmp1 + rd,  \
     tmp13 + tmp0 + rd, dest1, dest2)    \
 combinePred(tmp13 - tmp0 + rd, tmp12 - tmp1 + rd, tmp11 - tmp2 + rd,   \
     tmp10 - tmp3 + rd, dest3, dest4 ) ;

void idct(short * restrict datain)
{
 int r00, r01, r02, r03;
 int r10, r11, r12, r13;
 int r20, r21, r22, r23;
 int r30, r31, r32, r33;
 int r40, r41, r42, r43;
 int r50, r51, r52, r53;
 int r60, r61, r62, r63;
 int r70, r71, r72, r73;
 
 int tmp0, tmp1, tmp2, tmp3;
 int tmp10, tmp11, tmp12, tmp13;
 int tmp20, tmp21, tmp22, tmp23;
 int temp0, temp1, temp2, temp3;
 int temp10, temp11, temp12, temp13;
 int temp20, temp21, temp22, temp23;
 
 int z0, z2, z3, z5;
 int zz0, zz2, zz3, zz5;
 long * restrict dataout;
 int rd; 
 
 const unsigned int clip = 2047;
 
#pragma TCS_no_caller_save
 
 rd = 1 << (DWNSCL + 15); 
 dataout = (long *)datain;
 

 horiz_idct(datain, 0, r00, r01, r02, r03, r40, r41, r42, r43, H_ROUNDING);
 horiz_idct(datain, 1, r10, r11, r12, r13, r50, r51, r52, r53, 0);
 horiz_idct(datain, 2, r20, r21, r22, r23, r60, r61, r62, r63, 0);
 horiz_idct(datain, 3, r30, r31, r32, r33, r70, r71, r72, r73,0);
 

 vertical_idct(r00, r10, r20, r30, dataout[0], dataout[1], dataout[2], dataout[3]);
 vertical_idct(r01, r11, r21, r31, dataout[4], dataout[5], dataout[6], dataout[7]);
 vertical_idct(r02, r12, r22, r32, dataout[8], dataout[9], dataout[10], dataout[11]);
 vertical_idct(r03, r13, r23, r33, dataout[12], dataout[13], dataout[14], dataout[15]);
 vertical_idct(r40, r50, r60, r70, dataout[16], dataout[17], dataout[18], dataout[19]);
 vertical_idct(r41, r51, r61, r71, dataout[20], dataout[21], dataout[22], dataout[23]);
 vertical_idct(r42, r52, r62, r72, dataout[24], dataout[25], dataout[26], dataout[27]);
 vertical_idct(r43, r53, r63, r73, dataout[28], dataout[29], dataout[30], dataout[31]);
}