转置位矩阵
/* * http://www-graphics.stanford.edu/~seander/bithacks.html#ReverseByteWith64Bits * * 0010 0100 -- 24 * 0010 0001 -- 21 * 1111 0000 -- F0 * 0111 1111 -- 7F * 1000 0000 -- 80 * 0011 0111 -- 37 * 1111 1111 -- FF * 0001 1111 -- 1F * |||| |||| * 4CFC 898A * 546E CEEE * */ uint8_t t1[ 8 ] = { 0x24, 0x21, 0xf0, 0x7f, 0x80, 0x37, 0xff, 0x1f }; uint8_t t2[ 8 ]; union { uint32_t l[ 2 ]; uint8_t c[ 8 ]; } Tmp; void foo( void ) { long Ll, Lh; uint8_t i; for ( i = 0; i < 8; i++ ) Tmp.c = t1; i = 8; while ( i-- ) { Ll = ( Tmp.l[ 0 ] & 0x01010101 ) * 0x01020408; // 0x08040201 Lh = ( Tmp.l[ 1 ] & 0x01010101 ) * 0x01020408; // Reserved t2 = (uint8_t) ( ( ( Lh & 0x0F000000 ) >> 20 ) | ( ( Ll & 0x0F000000 ) >> 24 ) ); Tmp.l[ 0 ] >>= 1; Tmp.l[ 1 ] >>= 1; } } // 《高效程序的奥秘》翻译 Henry S. Warren. Jr. 的“Hacher's Delight" // 0x57 0x17 0x97 0x13 0x37 0xf6 0x32 0x2a void transpose8( uint8_t i[ 8 ], uint8_t o[ 8 ] ) { uint32_t x, y, t; x = ( i[ 0 ] << 24 ) | ( i[ 1 ] << 16 ) | ( i[ 2 ] << 8 ) | i[ 3 ]; y = ( i[ 4 ] << 24 ) | ( i[ 5 ] << 16 ) | ( i[ 6 ] << 8 ) | i[ 7 ]; t = ( x & 0xf0f0f0f0 ) | ( ( y >> 4 ) & 0x0f0f0f0f ); y = ( ( x << 4 ) & 0xf0f0f0f0 ) | ( y & 0x0f0f0f0f ); x = t; t = ( x ^ ( x >> 14 ) ) & 0x0000cccc; x = x ^ t ^ ( t << 14 ); t = ( y ^ ( y >> 14 ) ) & 0x0000cccc; y = y ^ t ^ ( t << 14 ); t = ( x ^ ( x >> 7 ) ) & 0x00aa00aa; x = x ^ t ^ ( t << 7 ); t = ( y ^ ( y >> 7 ) ) & 0x00aa00aa; y = y ^ t ^ ( t << 7 ); o[ 7 ] = x >> 24; o[ 6 ] = x >> 16; o[ 5 ] = x >> 8; o[ 4 ] = x; o[ 3 ] = y >> 24; o[ 2 ] = y >> 16; o[ 1 ] = y >> 8; o[ 0 ] = y; } int main( void ) { int i; uint8_t t1[ 8 ] = { 0x24, 0x21, 0xf0, 0x7f, 0x80, 0x37, 0xff, 0x1f }; uint8_t t2[ 8 ]; transpose8( t1, t2 ); for ( i = 0; i < 8; i++ ) printf( "0x%x ", t2 ); }