构造最短程序打印自身的 MD5

一,介绍

比赛题目很简单:构造一个程序,在 stdout 上打印出自身的 MD5,程序越短越好。按最终程序文件大小字节数排名,文件越小,排名越靠前。
只能使用 ld-linux-x86-64.so, libc.so, libdl.so, libgcc_s.so, libm.so, libstdc++.so 。
禁止了 socket, shmget, fork, execvc 等 syscall 。

汇编高手如云,本人只做到 752 字节,只拿到 27 名。
但忙活好几天,学到不少东西,也有苦劳,还是值得记录一下。

基本是纯 C 实现,没有动用汇编。

二,用过的一些思路和资源

基于这个 MD5 实现改造
https://gist.github.com/creationix/4710780

https://www.nayuki.io/page/fast-md5-hash-implementation-in-x86-assembly

参照 musl 和 rt0 ,去除对 glibc 依赖

  1. musl libc https://www.musl-libc.org/
  2. rt0 : https://github.com/lpsantil/rt0

三,没有成功的一些思路

  1. 思路,利用内核的 MD5 计算代码:
    grep -i md5 linux-5.6.2
    只找到 2处,

    1. TCP_MD5SIG , CONFIG_TCP_MD5SIG : "TCP: MD5 Signature Option support (RFC2385)"
      一个冷门特性,看起来通过 socket 才能执行。

    2. AF_ALG Linux 的 crypto api,通过 socket 形式暴露 api, 但是 socket 被禁用了。
      https://www.chronox.de/libkcapi.html

  2. 构造碰撞,hardcode 直接 print 出来

    https://github.com/corkami/collisions

    https://www.rogdham.net/2017/03/12/gif-md5-hashquine.en

    https://github.com/Rogdham/gif-md5-hashquine

    http://www.win.tue.nl/hashclash/
    尝试了 fastcoll_v1 ,但是 fastcoll 构造1个字节的冲突要 2 block = 128 字节 ,则 16 字节就要 2048 字节,还不如 C 语言直接算。

  3. 通过某些 ipc 机制,绕过评判。
    试了简单的 shmget, 不成功。

三. 代码

后续借鉴思路,有优化

#define SYS_write 1
#define SYS_exit 60

inline long syscall3(long n, long a0, long a1, long a2) {
    long ret;
    __asm__ volatile("syscall" : "=a"(ret) : "a"(n), "D"(a0), "S"(a1), "d"(a2) : "rcx", "r11", "memory");
    return (ret);
}
inline long syscall1(long n, long a0) {
    long ret;
    __asm__ volatile("syscall" : "=a"(ret) : "a"(n), "D"(a0) : "rcx", "r11", "memory");
    return (ret);
}

static int write(int f, const char* d, int l) {
    int ret = syscall3(SYS_write, f, (long)(d), l);
    return (ret);
}

void _exit(int r) { syscall1(SYS_exit, r); }

typedef unsigned uint32_t;
typedef unsigned char uint8_t;
typedef unsigned long size_t;
#define NULL 0  // In words

#define memcpy(dest, src, n)                      \
    for (size_t i = 0; i < n; ++i) {              \
        ((char*)dest)[i] = ((const char*)src)[i]; \
    }

// leftrotate function definition
#define LEFTROTATE(x, c) (((x) << (c)) | ((x) >> (32 - (c))))

static void md5_hash(uint8_t* initial_msg, const size_t initial_len, uint32_t hash[4]) {
    // Note: All variables are unsigned 32 bit and wrap modulo 2^32 when calculating

    // r specifies the per-round shift amounts
    const static uint8_t r[] = {
        7, 12, 17, 22, 5, 9, 14, 20, 4, 11, 16, 23, 6, 10, 15, 21,
    };

    // Use binary integer part of the sines of integers (in radians) as constants// Initialize variables:
    uint32_t k[64];
    for (int i = 0; i < 64; ++i) {
        double f = 1.0 + i;
        asm("fsin;\n\t"
            "fabs;\n\t"
            : "=t"(f)
            : "0"(f));
        f *= 4294967296.0;
        k[i] = (uint32_t)f;
    }

    // Pre-processing: adding a single 1 bit
    // append "1" bit to message
    /* Notice: the input bytes are considered as bits strings,
       where the first bit is the most significant bit of the byte.[37] */

    // Pre-processing: padding with zeros
    // append "0" bit until message length in bit ≡ 448 (mod 512)
    // append length mod (2 pow 64) to message

    const int new_len = ((((initial_len + 8) / 64) + 1) * 64) - 8;

    // Message (to prepare)
    uint8_t* msg = initial_msg msg[initial_len] = 128;  // write the "1" bit

    uint32_t bits_len = 8 * initial_len;  // note, we append the len
    memcpy(msg + new_len, &bits_len, 4);  // in bits at the end of the buffer

    // Process the message in successive 512-bit chunks:
    // for each 512-bit chunk of message:
    for (int offset = 0; offset < new_len; offset += 64) {
        // break chunk into sixteen 32-bit words w[j], 0 ≤ j ≤ 15
        uint32_t* w = (uint32_t*)(msg + offset);

        // Initialize hash value for this chunk:
        uint32_t a = hash[0];
        uint32_t b = hash[1];
        uint32_t c = hash[2];
        uint32_t d = hash[3];

        // Main loop:
        uint32_t i;
        for (i = 0; i < 64; i++) {
            uint32_t f, g;

            if (i < 16) {
                f = (b & c) | ((~b) & d);
                g = i;
            } else if (i < 32) {
                f = (d & b) | ((~d) & c);
                g = (5 * i + 1) % 16;
            } else if (i < 48) {
                f = b ^ c ^ d;
                g = (3 * i + 5) % 16;
            } else {
                f = c ^ (b | (~d));
                g = (7 * i) % 16;
            }

            uint32_t temp = d;
            d = c;
            c = b;
            b = b + LEFTROTATE((a + f + k[i] + w[g]), r[i / 4 / 4 * 4 + i % 4]);
            a = temp;
        }

        // Add this chunk's hash to result so far:

        hash[0] += a;
        hash[1] += b;
        hash[2] += c;
        hash[3] += d;
    }
}

static char xdigits(char c) {
    if ((c >= 0) && (c <= 9)) {
        return '0' + c;
    }
    return 'a' + (c - 10);
}

void _start(void) __attribute__((noreturn));

void _start(void) {
    uint32_t hash[4] = {0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476};

    char* addr = (char*)0x400000;
    const size_t len = 688;
    md5_hash(addr, len, hash);

    unsigned char* md = (unsigned char*)&hash[0];
    char buff[32];
    for (int i = 0; i < 32; ++i) {
        unsigned x = md[(i >> 1)];
        x >>= (i & 1 ? 0 : 4);
        buff[i] = xdigits(x & 15);
    }
    write(1, &buff[0], sizeof(buff));
    _exit(0);
}
CFLAGS="-static -Os -Wl,--omagic -D__RT0_WITH_FASTER_SYSCALL__ -ffunction-sections -Wl,--gc-sections -nostartfiles -nodefaultlibs -nostdlib -nostdinc -Wl,--build-id=none -fomit-frame-pointer -fno-stack-protector -fno-unwind-tables -fno-asynchronous-unwind-tables -fno-unroll-loops -fmerge-all-constants -fno-ident -mfpmath=sse -mfancy-math-387  -finline-functions-called-once -I ./ "

gcc   $CFLAGS  print_my_md5.c  -S -fverbose-asm
gcc   $CFLAGS  print_my_md5.s  -o print_my_md5 

strip -R .comment print_my_md5
strip -R .bss print_my_md5
ls -l ./print_my_md5
./ELFkickers/sstrip/sstrip print_my_md5
./print_my_md5
echo
md5sum ./print_my_md5
ls -l ./print_my_md5

posted on 2020-04-30 18:35  windydays  阅读(632)  评论(0编辑  收藏  举报

导航