应用于Philips Trimedia 1500 系列SOC MPEG4视频编解码算法中,H263方式量化反量化快速算法
#if defined(__LITTLE_ENDIAN__)
#define PACK16_MSB(a, b) PACK16MSB(a, b)
#define PACK16_LSB(a, b) PACK16LSB(a, b)
#define PACK2_16LSB(a, b) PACK16LSB(a, b)
#define LOW16BIT(a) SEX16(a)
#define HIGH16BIT(a) ASRI(16, (a))
#define READ_NORM(a) ROLI(16, (a))
#define READ_REV(a) (a)
#else
#define PACK16_MSB(a, b) PACK16LSB(a, b)
#define PACK16_LSB(a, b) PACK16MSB(a, b)
#define PACK2_16LSB(a, b) PACK16LSB(b, a)
#define LOW16BIT(a) ASRI(16, (a))
#define HIGH16BIT(a) SEX16(a)
#define READ_NORM(a) (a)
#define READ_REV(a) ROLI(16, (a))
#endif
#define SCALEBITS 16
#define FIX(X) ((1 << SCALEBITS) / (X) + 1)
#define FIX0(X) ((1 << SCALEBITS) / (X) )
static const unsigned short multipliers[48] =
{
0, (FIX(2)-2), FIX0(4), FIX(6),
FIX0(8), FIX(10), FIX(12), FIX(14),
FIX0(16), FIX(18), FIX(20), FIX(22),
FIX(24), FIX(26), FIX(28), FIX(30),
FIX0(32), FIX(34), FIX(36), FIX(38),
FIX(40), FIX(42), FIX(44), FIX(46),
FIX(48), FIX(50), FIX(52), FIX(54),
FIX(56), FIX(58), FIX(60), FIX(62),
FIX0(64), FIX(66), FIX(68), FIX(70),
FIX(72), FIX(74), FIX(76), FIX(78),
FIX(80), FIX(82), FIX(84), FIX(86),
FIX(88), FIX(90), FIX(92), FIX(94),
};
#define ZERO 0
#define H263_QUANT_INTRA_DC() \
tmp00 = (dataptr)[0]; \
tmp1 = (dataptr)[1]; \
sign0 = DUALASR(tmp00, rightshift); \
sign1 = DUALASR(tmp1, rightshift); \
tmp00_0 = LOW16BIT(tmp00); \
tmp00_1 = HIGH16BIT(tmp00); \
tmp00_0 = ASLI(1, tmp00_0); \
tmp00_0 = ADDSUB(tmp00_0, dcscalar); \
tmp00 = PACK2_16LSB(tmp00_1, tmp00_0); \
tmp00 = DSPIDUALABS(tmp00); \
tmp1 = DSPIDUALABS(tmp1); \
tmp00 = DUALIMULM(tmp00, dc_mult); \
tmp1 = DUALIMULM(tmp1, mult); \
tmp00 = sign0 ^ tmp00; \
tmp1 = sign1 ^ tmp1; \
tmp00 = DSPIDUALSUB( tmp00, sign0); \
tmp1 = DSPIDUALSUB( tmp1, sign1); \
(coeffptr)[0] = tmp00; \
(coeffptr)[1] = tmp1; \
tmp2 = (dataptr)[2]; \
tmp3 = (dataptr)[3]; \
sign2 = DUALASR(tmp2, rightshift); \
sign3 = DUALASR(tmp3, rightshift); \
tmp2 = DSPIDUALABS(tmp2); \
tmp3 = DSPIDUALABS(tmp3); \
tmp2 = DUALIMULM(tmp2, mult); \
tmp3 = DUALIMULM(tmp3, mult); \
tmp2 = sign2 ^ tmp2; \
tmp3 = sign3 ^ tmp3; \
tmp2 = DSPIDUALSUB( tmp2, sign2); \
tmp3 = DSPIDUALSUB( tmp3, sign3); \
(coeffptr)[2] = tmp2; \
(coeffptr)[3] = tmp3; \
dataptr += 4; \
coeffptr += 4
#define H263_QUANT_INTRA_OTHER() \
tmp0 = (dataptr)[0]; \
tmp1 = (dataptr)[1]; \
sign0 = DUALASR(tmp0, rightshift); \
sign1 = DUALASR(tmp1, rightshift); \
tmp0 = DSPIDUALABS(tmp0); \
tmp1 = DSPIDUALABS(tmp1); \
tmp0 = DUALIMULM(tmp0, mult); \
tmp1 = DUALIMULM(tmp1, mult); \
tmp0 = sign0 ^ tmp0; \
tmp1 = sign1 ^ tmp1; \
tmp0 = DSPIDUALSUB( tmp0, sign0); \
tmp1 = DSPIDUALSUB( tmp1, sign1); \
(coeffptr)[0] = tmp0; \
(coeffptr)[1] = tmp1; \
tmp2 = (dataptr)[2]; \
tmp3 = (dataptr)[3]; \
sign2 = DUALASR(tmp2, rightshift); \
sign3 = DUALASR(tmp3, rightshift); \
tmp2 = DSPIDUALABS(tmp2); \
tmp3 = DSPIDUALABS(tmp3); \
tmp2 = DUALIMULM(tmp2, mult); \
tmp3 = DUALIMULM(tmp3, mult); \
tmp2 = sign2 ^ tmp2; \
tmp3 = sign3 ^ tmp3; \
tmp2 = DSPIDUALSUB( tmp2, sign2); \
tmp3 = DSPIDUALSUB( tmp3, sign3); \
(coeffptr)[2] = tmp2; \
(coeffptr)[3] = tmp3; \
dataptr += 4; \
coeffptr += 4
unsigned int quant_h263_intra(short * coeff, const short * data, const unsigned int quant, const unsigned int dcscalar)
{
#pragma TCS_no_caller_save
int tmp0, tmp1, tmp2, tmp3;
int sign0, sign1, sign2, sign3;
int tmp00, tmp00_0, tmp00_1;
unsigned int mult, dc_mult;
const unsigned int rightshift = 15;
long *restrict coeffptr;
long *restrict dataptr;
coeffptr = (long*)coeff;
dataptr = (long*)data;
mult = PACK16LSB(multipliers[quant],multipliers[quant]);
dc_mult = PACK2_16LSB(multipliers[quant], multipliers[dcscalar]);
H263_QUANT_INTRA_DC();
H263_QUANT_INTRA_OTHER();
H263_QUANT_INTRA_OTHER();
H263_QUANT_INTRA_OTHER();
H263_QUANT_INTRA_OTHER();
H263_QUANT_INTRA_OTHER();
H263_QUANT_INTRA_OTHER();
H263_QUANT_INTRA_OTHER();
return(0);
}
#define H263_QUANT_INTER() \
tmp0 = (dataptr)[0]; \
tmp1 = (dataptr)[1]; \
sign0 = DUALASR(tmp0, rightshift); \
sign1 = DUALASR(tmp1, rightshift); \
tmp0 = DSPIDUALABS(tmp0); \
tmp1 = DSPIDUALABS(tmp1); \
tmp0 = DSPIDUALSUB( tmp0, dualquant_d_2); \
tmp1 = DSPIDUALSUB( tmp1, dualquant_d_2); \
tmp0 = DSPIDUALABS(tmp0); \
tmp1 = DSPIDUALABS(tmp1); \
tmp0 = DUALIMULM(tmp0, mult); \
tmp1 = DUALIMULM(tmp1, mult); \
sum0 = DSPIDUALADD(tmp0, tmp1); \
tmp0 = sign0 ^ tmp0; \
tmp1 = sign1 ^ tmp1; \
tmp0 = DSPIDUALSUB( tmp0, sign0); \
tmp1 = DSPIDUALSUB( tmp1, sign1); \
(coeffptr)[0] = tmp0; \
(coeffptr)[1] = tmp1; \
sum = DSPIDUALADD(sum, sum0); \
tmp2 = (dataptr)[2]; \
tmp3 = (dataptr)[3]; \
sign2 = DUALASR(tmp2, rightshift); \
sign3 = DUALASR(tmp3, rightshift); \
tmp2 = DSPIDUALABS(tmp2); \
tmp3 = DSPIDUALABS(tmp3); \
tmp2 = DSPIDUALSUB(tmp2, dualquant_d_2); \
tmp3 = DSPIDUALSUB(tmp3, dualquant_d_2); \
tmp2 = DSPIDUALABS(tmp2); \
tmp3 = DSPIDUALABS(tmp3); \
tmp2 = DUALIMULM(tmp2, mult); \
tmp3 = DUALIMULM(tmp3, mult); \
sum1 = DSPIDUALADD(tmp2, tmp3); \
tmp2 = sign2 ^ tmp2; \
tmp3 = sign3 ^ tmp3; \
tmp2 = DSPIDUALSUB( tmp2, sign2); \
tmp3 = DSPIDUALSUB( tmp3, sign3); \
(coeffptr)[2] = tmp2; \
(coeffptr)[3] = tmp3; \
sum = DSPIDUALADD(sum, sum1); \
dataptr += 4; \
coeffptr += 4
unsigned int quant_h263_inter(short * coeff, const short * data, const unsigned int quant)
{
#pragma TCS_no_caller_save
int tmp0, tmp1, tmp2, tmp3;
int sign0, sign1, sign2, sign3;
unsigned int mult, dualquant_d_2;
const unsigned int quant_d_2 = quant >> 1;
const unsigned int rightshift = 15;
unsigned int sum = 0, sum0, sum1;
const unsigned int sum_count = 0x00010001;
long *restrict coeffptr;
long *restrict dataptr;
coeffptr = (long*)coeff;
dataptr = (long*)data;
mult = PACK16LSB(multipliers[quant], multipliers[quant]);
dualquant_d_2 = PACK16LSB(quant_d_2, quant_d_2);
H263_QUANT_INTER();
H263_QUANT_INTER();
H263_QUANT_INTER();
H263_QUANT_INTER();
H263_QUANT_INTER();
H263_QUANT_INTER();
H263_QUANT_INTER();
H263_QUANT_INTER();
sum = UFIR16(sum, sum_count);
return(sum);
}
#define H263_DEQUANT_INTRA_DC() \
tmp00 = (coeffptr)[0]; \
tmp1 = (coeffptr)[1]; \
tmp00 = DSPIDUALMUL( tmp00, dc_quant_m_2); \
tmp1 = DSPIDUALMUL( tmp1, dualquant_m_2); \
tmp00_1 = HIGH16BIT(tmp00); \
tmp00_0 = LOW16BIT(tmp00); \
sign1_h = ASRI(16, tmp1); \
sign1_l = SEX16(tmp1); \
tmp00_1 = ADDSUB(tmp00_1, quant_add); \
sign1_h = ADDSUB(sign1_h, quant_add); \
sign1_l = ADDSUB(sign1_l, quant_add); \
tmp00 = PACK2_16LSB(tmp00_1, tmp00_0); \
tmp1 = PACK16LSB(sign1_h, sign1_l); \
(dataptr)[0] = DUALICLIPI( tmp00, 2047); \
(dataptr)[1] = DUALICLIPI( tmp1, 2047); \
tmp2 = (coeffptr)[2]; \
tmp3 = (coeffptr)[3]; \
tmp2 = DSPIDUALMUL( tmp2, dualquant_m_2); \
tmp3 = DSPIDUALMUL( tmp3, dualquant_m_2); \
sign2_h = ASRI(16, tmp2); \
sign2_l = SEX16(tmp2); \
sign3_h = ASRI(16, tmp3); \
sign3_l = SEX16(tmp3); \
sign2_h = ADDSUB(sign2_h, quant_add); \
sign2_l = ADDSUB(sign2_l, quant_add); \
sign3_h = ADDSUB(sign3_h, quant_add); \
sign3_l = ADDSUB(sign3_l, quant_add); \
tmp2 = PACK16LSB(sign2_h, sign2_l); \
tmp3 = PACK16LSB(sign3_h, sign3_l); \
(dataptr)[2] = DUALICLIPI( tmp2, 2047); \
(dataptr)[3] = DUALICLIPI( tmp3, 2047); \
dataptr += 4; \
coeffptr += 4
#define H263_DEQUANT_INTRA_OTHER() \
tmp0 = (coeffptr)[0]; \
tmp1 = (coeffptr)[1]; \
tmp0 = DSPIDUALMUL( tmp0, dualquant_m_2); \
tmp1 = DSPIDUALMUL( tmp1, dualquant_m_2); \
sign0_h = ASRI(16, tmp0); \
sign0_l = SEX16(tmp0); \
sign1_h = ASRI(16, tmp1); \
sign1_l = SEX16(tmp1); \
sign0_h = ADDSUB(sign0_h, quant_add); \
sign0_l = ADDSUB(sign0_l, quant_add); \
sign1_h = ADDSUB(sign1_h, quant_add); \
sign1_l = ADDSUB(sign1_l, quant_add); \
tmp0 = PACK16LSB(sign0_h, sign0_l); \
tmp1 = PACK16LSB(sign1_h, sign1_l); \
(dataptr)[0] = DUALICLIPI( tmp0, 2047); \
(dataptr)[1] = DUALICLIPI( tmp1, 2047); \
tmp2 = (coeffptr)[2]; \
tmp3 = (coeffptr)[3]; \
tmp2 = DSPIDUALMUL( tmp2, dualquant_m_2); \
tmp3 = DSPIDUALMUL( tmp3, dualquant_m_2); \
sign2_h = ASRI(16, tmp2); \
sign2_l = SEX16(tmp2); \
sign3_h = ASRI(16, tmp3); \
sign3_l = SEX16(tmp3); \
sign2_h = ADDSUB(sign2_h, quant_add); \
sign2_l = ADDSUB(sign2_l, quant_add); \
sign3_h = ADDSUB(sign3_h, quant_add); \
sign3_l = ADDSUB(sign3_l, quant_add); \
tmp2 = PACK16LSB(sign2_h, sign2_l); \
tmp3 = PACK16LSB(sign3_h, sign3_l); \
(dataptr)[2] = DUALICLIPI( tmp2, 2047); \
(dataptr)[3] = DUALICLIPI( tmp3, 2047); \
dataptr += 4; \
coeffptr += 4
unsigned int dequant_h263_intra(short * data, const short * coeff, const unsigned int quant, const unsigned int dcscalar)
{
#pragma TCS_no_caller_save
int tmp0, tmp1, tmp2, tmp3;
int sign0, sign1, sign2, sign3;
int sign0_h, sign0_l, sign1_h, sign1_l, sign2_h, sign2_l, sign3_h, sign3_l;
int tmp00, tmp00_0, tmp00_1;
unsigned int dualquant_m_2, dc_quant_m_2;
const unsigned int quant_m_2 = quant << 1;
const unsigned int quant_add = mux((quant & 1) ,(quant) , (quant - 1));
long *restrict coeffptr;
long *restrict dataptr;
coeffptr = (long*)coeff;
dataptr = (long*)data;
dualquant_m_2 = PACK16LSB(quant_m_2, quant_m_2);
dc_quant_m_2 = PACK2_16LSB(quant_m_2, dcscalar);
H263_DEQUANT_INTRA_DC();
H263_DEQUANT_INTRA_OTHER();
H263_DEQUANT_INTRA_OTHER();
H263_DEQUANT_INTRA_OTHER();
H263_DEQUANT_INTRA_OTHER();
H263_DEQUANT_INTRA_OTHER();
H263_DEQUANT_INTRA_OTHER();
H263_DEQUANT_INTRA_OTHER();
return(0);
}
#define H263_DEQUANT_INTER() \
tmp0 = (coeffptr)[0]; \
tmp1 = (coeffptr)[1]; \
tmp0 = DSPIDUALMUL( tmp0, dualquant_m_2); \
tmp1 = DSPIDUALMUL( tmp1, dualquant_m_2); \
sign0_h = ASRI(16, tmp0); \
sign0_l = SEX16(tmp0); \
sign1_h = ASRI(16, tmp1); \
sign1_l = SEX16(tmp1); \
sign0_h = ADDSUB(sign0_h, quant_add); \
sign0_l = ADDSUB(sign0_l, quant_add); \
sign1_h = ADDSUB(sign1_h, quant_add); \
sign1_l = ADDSUB(sign1_l, quant_add); \
tmp0 = PACK16LSB(sign0_h, sign0_l); \
tmp1 = PACK16LSB(sign1_h, sign1_l); \
(dataptr)[0] = DUALICLIPI( tmp0, 2047); \
(dataptr)[1] = DUALICLIPI( tmp1, 2047); \
tmp2 = (coeffptr)[2]; \
tmp3 = (coeffptr)[3]; \
tmp2 = DSPIDUALMUL( tmp2, dualquant_m_2); \
tmp3 = DSPIDUALMUL( tmp3, dualquant_m_2); \
sign2_h = ASRI(16, tmp2); \
sign2_l = SEX16(tmp2); \
sign3_h = ASRI(16, tmp3); \
sign3_l = SEX16(tmp3); \
sign2_h = ADDSUB(sign2_h, quant_add); \
sign2_l = ADDSUB(sign2_l, quant_add); \
sign3_h = ADDSUB(sign3_h, quant_add); \
sign3_l = ADDSUB(sign3_l, quant_add); \
tmp2 = PACK16LSB(sign2_h, sign2_l); \
tmp3 = PACK16LSB(sign3_h, sign3_l); \
(dataptr)[2] = DUALICLIPI( tmp2, 2047); \
(dataptr)[3] = DUALICLIPI( tmp3, 2047); \
dataptr += 4; \
coeffptr += 4
unsigned int dequant_h263_inter(short * data, const short * coeff, const unsigned int quant)
{
#pragma TCS_no_caller_save
int tmp0, tmp1, tmp2, tmp3;
int sign0, sign1, sign2, sign3;
int sign0_h, sign0_l, sign1_h, sign1_l, sign2_h, sign2_l, sign3_h, sign3_l;
unsigned int dualquant_m_2;
const unsigned int quant_m_2 = quant << 1;
const unsigned int quant_add = mux((quant & 1) ,(quant) , (quant - 1));
long *restrict coeffptr;
long *restrict dataptr;
coeffptr = (long*)coeff;
dataptr = (long*)data;
dualquant_m_2 = PACK16LSB(quant_m_2, quant_m_2);
H263_DEQUANT_INTER();
H263_DEQUANT_INTER();
H263_DEQUANT_INTER();
H263_DEQUANT_INTER();
H263_DEQUANT_INTER();
H263_DEQUANT_INTER();
H263_DEQUANT_INTER();
H263_DEQUANT_INTER();
return(0);
}