通过ptrace跟踪进程

项目地址

https://gitee.com/bunner/linux-binary-analysis

1. 任务环境与目标

1.1 实验机器

Ubuntu 20.04 64位

1.2 任务目标

给定一个可执行文件，该程序调用两次print_string函数，分别输出Hello 1，Hello 2，要求在print_string处下断点，并输出该处时的各寄存器值

2. 原理

2.1 ptrace

ptrace的函数原型为：long ptrace(enum __ptrace_request request, pid_t pid, void *addr, void *data);

那么有关本任务中涉及到的一些request类型的介绍如下：

PTRACE_TRACEME

创建子进程后，子进程使用该参数调用ptrace，表示子进程要求父进程对其进行跟踪。该进程之后所有对exec()的系统调用都会产生一个SIGTRAP信号发送给父进程，这使得父进程有机会在新程序还是执行前获取对子进程的控制权

ptrace(PTRACE_TRACEME, pid, NULL, NULL);
PTRACE_PEEKTEXT

从指定内存地址中读取数据，按照官方文档是返回一个字的数据，根据源码可以看到是传入了sizeof(unsigned long)

ret = ptrace(PTRACE_PEEKTEXT, pid, addr, NULL);
PTRACE_POKETEXT

修改子进程指定内存地址处的数据

ptrace(PTRACE_POKETEXT, pid, addr, data);
PTRACE_CONT

重启已终止的子进程使其继续执行

ptrace(PTRACE_CONT, pid, NULL, NULL);
PTRACE_GETREGS

获取子进程的寄存器数据

ptrace(PTRACE_GETREGS, pid, NULL, &pt_regs);
PTRACE_SETREGS

设置子进程的寄存器值

ptrace(PTRACE_SETREGS, pid, NULL, &pt_regs);
PTRACE_SINGLESTEP

设置子进程执行一条指令后切换到终止状态，将控制权交给父进程

ptrace(PTRACE_SINGLESTEP, pid, NULL, NULL);

2.2 wait

父进程跟踪子进程时，自己使用wait等待子进程的信号

wait(int *statloc)
子进程切换到终止状态后，
- 若statloc不为空，则进程的终止状态就存放在statloc指向的单元中
- 若statloc为空，则说明父进程不关心子进程的终止状态
statloc中有一些位表示退出状态，还有四个互斥的宏来取得进程终止的原因

2.3 四个互斥宏

WIFEXITED(status)

判断子进程是否为正常终止，若是，则为真
WIFSIGNALED(status)

判断子进程是否为异常终止，若是，则为真

对于这种情况，可使用WTERMSIG(status)取得子进程终止的信号编号
WIFSTOPPED(status)

若子进程是暂停运行，则为真

对于这种情况，可以使用WSTOPSIG(status)取得使得子进程暂停的信号编号
WIFCONTINUED(status)

若进程暂停后继续运行，则为真

3. 实现

3.1 程序流程

step1: 映射可执行文件到内存空间，获取ELF可执行文件的文件头、段表、节表信息
step2: 通过指定符号名去找符号在可执行文件中的地址
step3: fork子进程，并跟踪子进程，子进程执行该可执行文件
step4: 父进程等待子进程execve，并获得子进程的控制权
step5: 父进程获取子进程的目标符号地址的数据并保存下来，
step6: 向目标符号地址处写入0xcc，设下断点，再使子进程继续执行
step7: 子进程进入断点，父进程收到信号后获取寄存器数据并打印
step8: 父进程还原子进程目标符号地址处的数据，并使指令指针重新指向该处(减1)
step9: 父进程使子进程单步执行
step10: 继续从step6执行

3.2 代码实现

tracer.c

$ gcc tracer.c -o tracer

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
#include <signal.h>
#include <elf.h>
#include <stdint.h>
#include <sys/mman.h>
#include <sys/user.h>
#include <sys/ptrace.h>
#include <sys/stat.h>

typedef struct handle {
    Elf64_Ehdr *ehdr;
    Elf64_Phdr *phdr;
    Elf64_Shdr *shdr;
    uint8_t *mem;
    char *symname;
    Elf64_Addr symaddr;
    struct user_regs_struct pt_reg;
    char *exec;
} handle_t;

/**
 * @brief 根据符号名查找符号地址 
*/
Elf64_Addr lookup_symbol(handle_t *h, const char *symname) {
    int i, j, NumOfSym;
    char *strtab;
    Elf64_Sym *symtab;
    
    for (i = 0; i < h->ehdr->e_shnum; i++) {
        /* 找 ./symtab 节，该节保存了符号信息，每一个符号项为Elf64_Sym */
        if (h->shdr[i].sh_type == SHT_SYMTAB) {
            /* 类型为SHT_SYMTAB的节，其sh_link为字符串表所在节表中的下标 */
            /* 因此h->shdr[i].sh_link为字符串表下标 */
            strtab = (char *) (h->mem + h->shdr[h->shdr[i].sh_link].sh_offset);
            
            /* 此段代码为测试分析，输出字符串表中可视字符 */
            // printf("0x%x\n", h->shdr[29].sh_offset);
            // for (j = 0; j < h->shdr[29].sh_size; j++) {
            //     if (strtab[j] >= 0x20 && strtab[j] <= 0x7e) {
            //         printf("%c", strtab[j]);
            //     }
            // }
            // puts("");

            /* 获取符号表的首地址 */
            symtab = (Elf64_Sym *) &h->mem[h->shdr[i].sh_offset];

            NumOfSym = h->shdr[i].sh_size / sizeof(Elf64_Sym);
            
            for (j = 0; j < NumOfSym; j++) {
                /* st_name为符号名在字符串表中的下标 */
                if (!strncmp(&strtab[symtab->st_name], symname, strlen(symname)))  {
                    /* st_value 为符号的地址 */
                    return symtab->st_value;
                }
                symtab++;
            }
        }
    }

    return 0;
}

int main(int argc, char **argv, char **envp) {
    int fd;
    handle_t h;
    struct stat st;
    long trap, orig;
    int status, pid;
    char *args[2];

    if (argc < 2) {
        printf("Usage: %s <program> <function>\n", argv[0]);
        exit(0);
    }

    if ((h.exec = strdup(argv[1])) == NULL) {
        perror("strdup");
        exit(-1);
    }

    args[0] = h.exec;
    args[1] = NULL;
    /* 获取指定符号名 */
    if ((h.symname = strdup(argv[2])) == NULL) {
        perror("strdup");
        exit(-1);
    }
    
    /* 打开指定文件 */
    if ((fd = open(argv[1], O_RDONLY)) < 0) {
        perror("open");
        exit(-1);
    }
    
    /* 获取指定文件的属性 */
    if (fstat(fd, &st) < 0) {
        perror("fstat");
        exit(-1);
    }

    /* 将目标文件映射到本进程的虚拟内存中 */
    h.mem = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
    if (h.mem == MAP_FAILED) {
        perror("mmap");
        exit(-1);
    }
    
    h.ehdr = (Elf64_Ehdr *) h.mem;
    h.phdr = (Elf64_Phdr *) (h.mem + h.ehdr->e_phoff);
    h.shdr = (Elf64_Shdr *) (h.mem + h.ehdr->e_shoff);
    
    /* 确保文件为ELF格式文件, '\0x7FELF' */
    if ((h.mem[0] != 0x7f || strncmp((char *)&h.mem[1], "ELF", 3))) {
        printf("%s is not an ELF file\n", h.exec);
        exit(-1);
    }

    /* 确保目标文件为可执行ELF文件, gcc编译时需要加-no-pie参数 */
    if (h.ehdr->e_type != ET_EXEC) {
        printf("%s is not an ELF executable\n", h.exec);
        exit(-1);
    }
    
    /* 确保程序在编译过程中没有去掉节表 */
    if (h.ehdr->e_shstrndx == 0 || h.ehdr->e_shoff == 0 || h.ehdr->e_shnum == 0) {
        printf("Section header table not found\n");
        exit(-1);
    }
    
    /* 寻找目标符号地址 */
    if ((h.symaddr = lookup_symbol(&h, h.symname)) == 0) {
        printf("Unable to find symbol: %s not found in executable\n", h.symname);
        exit(-1);
    }
    
    close(fd);

    /* 创建子进程，在父进程中返回子进程pid，在子进程中返回0 */
    if ((pid = fork()) < 0) {
        perror("fork");
        exit(-1);
    }

    /* 子进程中处理流程 */
    if (pid == 0) {
        /* 主动让父进程追踪自己 */
        if (ptrace(PTRACE_TRACEME, pid, NULL, NULL) < 0) {
            perror("PTRACE_TRACEME");
            exit(-1);
        }
        /* 执行新的程序，子进程调用了execve之后，将控制权交还给父进程 */
        execve(h.exec, args, envp);
        exit(0);
    }

    /* 父进程中执行流程 */

    /* 等待子进程运行终止，获取子进程状态放入status中 */
    wait(&status);
    printf("Begining analysis of pid: %d at %lx\n", pid, h.symaddr);
    
    /* 读取被追踪进程中地址为h.symaddr处的数据 */
    /* 这里有一个问题就是如何判断是否读取成功？ */
    orig = ptrace(PTRACE_PEEKTEXT, pid, h.symaddr, NULL);

    /* int3指令码 0xcc */
    trap = (orig & ~0xff) | 0xcc;

    /* 将int3指令替换掉print_string的第一条指令从而设置断点 */
    if (ptrace(PTRACE_POKETEXT, pid, h.symaddr, trap) < 0) {
        perror("PTRACE_POKETEXT");
        exit(-1);
    }

trace:
    /* 重启已终止的被追踪进程 */
    if (ptrace(PTRACE_CONT, pid, NULL, NULL) < 0) {
        perror("PTRACE_CONT");
        exit(-1);
    }
    wait(&status);
    /* 由于breakpoint断点、trap终止运行 */
    if (WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) {
        /* 获取寄存器内容 */
        if (ptrace(PTRACE_GETREGS, pid, NULL, &h.pt_reg) < 0) {
            perror("PTRACE_GETREGS");
            exit(-1);
        }
    
        printf("\nExecutable %s (pid: %d) has hit breakpoint 0x%lx\n", h.exec, pid, h.symaddr);
        printf("%%rcx: %llx\n", h.pt_reg.rcx);
        printf("%%rdx: %llx\n", h.pt_reg.rdx);
        printf("%%rbx: %llx\n", h.pt_reg.rbx);
        printf("%%rax: %llx\n", h.pt_reg.rax);
        printf("%%rdi: %llx\n", h.pt_reg.rdi);
        printf("%%rsi: %llx\n", h.pt_reg.rsi);
        printf("%%r8:  %llx\n", h.pt_reg.r8);
        printf("%%r9:  %llx\n", h.pt_reg.r9);
        printf("%%r10: %llx\n", h.pt_reg.r10);
        printf("%%r11: %llx\n", h.pt_reg.r11);
        printf("%%r12: %llx\n", h.pt_reg.r12);
        printf("%%r13: %llx\n", h.pt_reg.r13);
        printf("%%r14: %llx\n", h.pt_reg.r14);
        printf("%%r15: %llx\n", h.pt_reg.r15);
        printf("%%rsp: %llx\n", h.pt_reg.rsp);
        printf("\n Please hit any key to continue: ");
        getchar();

        /* 还原print_string中的第一条指令内容 */
        if (ptrace(PTRACE_POKETEXT, pid, h.symaddr, orig) < 0) {
            perror("PTRACE_POKETEXT");
            exit(-1);
        }

        /* 将指令指针-1，即从print_string的第一条指令开始执行 */
        h.pt_reg.rip = h.pt_reg.rip - 1;

        /* 设置寄存器内容 */
        if (ptrace(PTRACE_SETREGS, pid, NULL, &h.pt_reg) < 0) {
            perror("PTRACE_SETREGS");
            exit(-1);
        }
        /* 设置进程执行一条指令后切换到终止状态 */
        if (ptrace(PTRACE_SINGLESTEP, pid, NULL, NULL) < 0) {
            perror("PTRACE_SINGLESTEP");
            exit(-1);
        }
        wait(NULL);
        /* 继续设置断点 */
        if (ptrace(PTRACE_POKETEXT, pid, h.symaddr, trap) < 0) {
            perror("PTRACE_POKETEXT");
            exit(-1);
        }
        goto trace;
    }
    /* 判断进程是否正常退出 */
    if (WIFEXITED(status)) {
        printf("Completed tracing pid: %d\n", pid);
    }
    exit(0);
}

example.c

$ gcc -no-pie example.c -o example

#include <stdio.h>

void print_string(char *s) {
    puts(s);
}

int main() {
    print_string("Hello 1");
    print_string("Hello 2");

    return 0;
}

3.3 运行结果

$ ./tracer example print_string
Begining analysis of pid: 4495 at 401136

Executable example (pid: 4495) has hit breakpoint 0x401136
%rcx: 401180
%rdx: 7ffc0caeee88
%rbx: 401180
%rax: 401155
%rdi: 402004
%rsi: 7ffc0caeee78
%r8:  0
%r9:  7f469cdc2d50
%r10: 7f469cddef68
%r11: 202
%r12: 401050
%r13: 7ffc0caeee70
%r14: 0
%r15: 0
%rsp: 7ffc0caeed78

 Please hit any key to continue: c
Hello 1

Executable example (pid: 4495) has hit breakpoint 0x401136
%rcx: 7f469ccbc1e7
%rdx: 0
%rbx: 401180
%rax: 8
%rdi: 40200c
%rsi: ca12a0
%r8:  8
%r9:  7c
%r10: 7f469cd96be0
%r11: 246
%r12: 401050
%r13: 7ffc0caeee70
%r14: 0
%r15: 0
%rsp: 7ffc0caeed78

Hello 2
 Please hit any key to continue: Completed tracing pid: 4495

4. 参考

《linux二进制分析》
wait()函数的详细分析 - suonikeyinsu - 博客园 (cnblogs.com)

posted @ 2021-06-10 12:17 bunner 阅读(1068) 评论(0) 编辑收藏举报

刷新页面返回顶部

bunner