构造最短程序打印自身的 MD5
一,介绍
比赛题目很简单:构造一个程序,在 stdout 上打印出自身的 MD5,程序越短越好。按最终程序文件大小字节数排名,文件越小,排名越靠前。
只能使用 ld-linux-x86-64.so, libc.so, libdl.so, libgcc_s.so, libm.so, libstdc++.so 。
禁止了 socket, shmget, fork, execvc 等 syscall 。
汇编高手如云,本人只做到 752 字节,只拿到 27 名。
但忙活好几天,学到不少东西,也有苦劳,还是值得记录一下。
基本是纯 C 实现,没有动用汇编。
二,用过的一些思路和资源
基于这个 MD5 实现改造
https://gist.github.com/creationix/4710780
https://www.nayuki.io/page/fast-md5-hash-implementation-in-x86-assembly
参照 musl 和 rt0 ,去除对 glibc 依赖
- musl libc https://www.musl-libc.org/
- rt0 : https://github.com/lpsantil/rt0
三,没有成功的一些思路
-
思路,利用内核的 MD5 计算代码:
grep -i md5 linux-5.6.2
只找到 2处,-
TCP_MD5SIG , CONFIG_TCP_MD5SIG : "TCP: MD5 Signature Option support (RFC2385)"
一个冷门特性,看起来通过 socket 才能执行。 -
AF_ALG Linux 的 crypto api,通过 socket 形式暴露 api, 但是 socket 被禁用了。
https://www.chronox.de/libkcapi.html
-
-
构造碰撞,hardcode 直接 print 出来
https://github.com/corkami/collisions
https://www.rogdham.net/2017/03/12/gif-md5-hashquine.en
https://github.com/Rogdham/gif-md5-hashquine
http://www.win.tue.nl/hashclash/
尝试了 fastcoll_v1 ,但是 fastcoll 构造1个字节的冲突要 2 block = 128 字节 ,则 16 字节就要 2048 字节,还不如 C 语言直接算。 -
通过某些 ipc 机制,绕过评判。
试了简单的 shmget, 不成功。
三. 代码
后续借鉴思路,有优化
#define SYS_write 1
#define SYS_exit 60
inline long syscall3(long n, long a0, long a1, long a2) {
long ret;
__asm__ volatile("syscall" : "=a"(ret) : "a"(n), "D"(a0), "S"(a1), "d"(a2) : "rcx", "r11", "memory");
return (ret);
}
inline long syscall1(long n, long a0) {
long ret;
__asm__ volatile("syscall" : "=a"(ret) : "a"(n), "D"(a0) : "rcx", "r11", "memory");
return (ret);
}
static int write(int f, const char* d, int l) {
int ret = syscall3(SYS_write, f, (long)(d), l);
return (ret);
}
void _exit(int r) { syscall1(SYS_exit, r); }
typedef unsigned uint32_t;
typedef unsigned char uint8_t;
typedef unsigned long size_t;
#define NULL 0 // In words
#define memcpy(dest, src, n) \
for (size_t i = 0; i < n; ++i) { \
((char*)dest)[i] = ((const char*)src)[i]; \
}
// leftrotate function definition
#define LEFTROTATE(x, c) (((x) << (c)) | ((x) >> (32 - (c))))
static void md5_hash(uint8_t* initial_msg, const size_t initial_len, uint32_t hash[4]) {
// Note: All variables are unsigned 32 bit and wrap modulo 2^32 when calculating
// r specifies the per-round shift amounts
const static uint8_t r[] = {
7, 12, 17, 22, 5, 9, 14, 20, 4, 11, 16, 23, 6, 10, 15, 21,
};
// Use binary integer part of the sines of integers (in radians) as constants// Initialize variables:
uint32_t k[64];
for (int i = 0; i < 64; ++i) {
double f = 1.0 + i;
asm("fsin;\n\t"
"fabs;\n\t"
: "=t"(f)
: "0"(f));
f *= 4294967296.0;
k[i] = (uint32_t)f;
}
// Pre-processing: adding a single 1 bit
// append "1" bit to message
/* Notice: the input bytes are considered as bits strings,
where the first bit is the most significant bit of the byte.[37] */
// Pre-processing: padding with zeros
// append "0" bit until message length in bit ≡ 448 (mod 512)
// append length mod (2 pow 64) to message
const int new_len = ((((initial_len + 8) / 64) + 1) * 64) - 8;
// Message (to prepare)
uint8_t* msg = initial_msg msg[initial_len] = 128; // write the "1" bit
uint32_t bits_len = 8 * initial_len; // note, we append the len
memcpy(msg + new_len, &bits_len, 4); // in bits at the end of the buffer
// Process the message in successive 512-bit chunks:
// for each 512-bit chunk of message:
for (int offset = 0; offset < new_len; offset += 64) {
// break chunk into sixteen 32-bit words w[j], 0 ≤ j ≤ 15
uint32_t* w = (uint32_t*)(msg + offset);
// Initialize hash value for this chunk:
uint32_t a = hash[0];
uint32_t b = hash[1];
uint32_t c = hash[2];
uint32_t d = hash[3];
// Main loop:
uint32_t i;
for (i = 0; i < 64; i++) {
uint32_t f, g;
if (i < 16) {
f = (b & c) | ((~b) & d);
g = i;
} else if (i < 32) {
f = (d & b) | ((~d) & c);
g = (5 * i + 1) % 16;
} else if (i < 48) {
f = b ^ c ^ d;
g = (3 * i + 5) % 16;
} else {
f = c ^ (b | (~d));
g = (7 * i) % 16;
}
uint32_t temp = d;
d = c;
c = b;
b = b + LEFTROTATE((a + f + k[i] + w[g]), r[i / 4 / 4 * 4 + i % 4]);
a = temp;
}
// Add this chunk's hash to result so far:
hash[0] += a;
hash[1] += b;
hash[2] += c;
hash[3] += d;
}
}
static char xdigits(char c) {
if ((c >= 0) && (c <= 9)) {
return '0' + c;
}
return 'a' + (c - 10);
}
void _start(void) __attribute__((noreturn));
void _start(void) {
uint32_t hash[4] = {0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476};
char* addr = (char*)0x400000;
const size_t len = 688;
md5_hash(addr, len, hash);
unsigned char* md = (unsigned char*)&hash[0];
char buff[32];
for (int i = 0; i < 32; ++i) {
unsigned x = md[(i >> 1)];
x >>= (i & 1 ? 0 : 4);
buff[i] = xdigits(x & 15);
}
write(1, &buff[0], sizeof(buff));
_exit(0);
}
CFLAGS="-static -Os -Wl,--omagic -D__RT0_WITH_FASTER_SYSCALL__ -ffunction-sections -Wl,--gc-sections -nostartfiles -nodefaultlibs -nostdlib -nostdinc -Wl,--build-id=none -fomit-frame-pointer -fno-stack-protector -fno-unwind-tables -fno-asynchronous-unwind-tables -fno-unroll-loops -fmerge-all-constants -fno-ident -mfpmath=sse -mfancy-math-387 -finline-functions-called-once -I ./ "
gcc $CFLAGS print_my_md5.c -S -fverbose-asm
gcc $CFLAGS print_my_md5.s -o print_my_md5
strip -R .comment print_my_md5
strip -R .bss print_my_md5
ls -l ./print_my_md5
./ELFkickers/sstrip/sstrip print_my_md5
./print_my_md5
echo
md5sum ./print_my_md5
ls -l ./print_my_md5