深入理解计算机系统CSAPP Ch7:静动态库制作与神奇的库打桩机制
创建静态库
/* addvec.c */
/* $begin addvec */
int addcnt = 0;
void addvec(int *x, int *y,
int *z, int n)
{
int i;
addcnt++;
for (i = 0; i < n; i++)
z[i] = x[i] + y[i];
}
/* $end addvec */
/* multvec.c */
/* $begin multvec */
int multcnt = 0;
void multvec(int *x, int *y,
int *z, int n)
{
int i;
multcnt++;
for (i = 0; i < n; i++)
z[i] = x[i] * y[i];
}
/* $end multvec */
我们创建一个关于矩阵的静态库,使用AR工具:
AR工具简介,我们使用tldr查看简介的介绍
lion@ubuntu:~/csapp/code/link$ tldr ar
ar
Create, modify, and extract from archives (.a, .so, .o).
More information: https://manned.org/ar.
- Extract all members from an archive:
ar -x path/to/file.a
- List the members of an archive:
ar -t path/to/file.a
- Replace or add files to an archive:
ar -r path/to/file.a path/to/file1.o path/to/file2.o
- Insert an object file index (equivalent to using ranlib):
ar -s path/to/file.a
- Create an archive with files and an accompanying object file index:
ar -rs path/to/file.a path/to/file1.o path/to/file2.o
运行指令
$ ar rcs libvector.a addvec.o multvec.o
这样完成制作了一个名为libvector的静态库函数。
接下来是主函数:
/* main2.c */
/* $begin main2 */
#include <stdio.h>
#include "vector.h"
int x[2] = {1, 2};
int y[2] = {3, 4};
int z[2];
int main()
{
addvec(x, y, z, 2);
printf("z = [%d %d]\n", z[0], z[1]);
return 0;
}
/* $end main2 */
先创建可重定向文件 main2.o
$ gcc -c main2.c
然后进行链接
$ gcc -static -o prog2c main2.o ./libvector.a
或
$ gcc -static -o prog2c main2.o -L. -lvector
注意命名要以lib开头
这样就生成了prog2c的可执行目标文件。
创建动态库
同理,先使用gcc对源码制作.so文件
$ gcc -shared -fpic -o libvector.so addvec.c multvec.c
-shard 表明制作动态库 -fpic指明需要与位置无关的目标文件
$ gcc -o prog2l main2.c ./libvector.so
库打桩机制
/*
* hello.c - Example program to demonstrate different ways to
* interpose on the malloc and free functions.
*
* Note: be sure to compile unoptimized (-O0) so that gcc won't
* optimize away the calls to malloc and free.
*/
/* $begin interposemain */
#include <stdio.h>
#include <malloc.h>
int main()
{
int *p = malloc(32);
free(p);
return(0);
}
/* $end interposemain */
/* Local malloc header file */
/* $begin mallocheader */
#define malloc(size) mymalloc(size)
#define free(ptr) myfree(ptr)
void *mymalloc(size_t size);
void myfree(void *ptr);
/* $end mallocheader */
/*
* mymalloc.c - Examples of run-time, link-time, and compile-time
* library interpositioning.
*/
/*
* Run-time interpositioning of malloc and free based
* on the dynamic linker's (ld-linux.so) LD_PRELOAD mechanism
*
* Example (Assume a.out calls malloc and free):
* linux> gcc -Wall -DRUNTIME -shared -fpic -o mymalloc.so mymalloc.c -ldl
*
* bash> (LD_PRELOAD="./mymalloc.so" ./a.out)
* ...or
* tcsh> (setenv LD_PRELOAD "./mymalloc.so"; ./a.out; unsetenv LD_PRELOAD)
*/
/* $begin interposer */
#ifdef RUNTIME
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <dlfcn.h>
/* malloc wrapper function */
void *malloc(size_t size)
{
void *(*mallocp)(size_t size);
char *error;
mallocp = dlsym(RTLD_NEXT, "malloc"); /* Get address of libc malloc */
if ((error = dlerror()) != NULL) {
fputs(error, stderr);
exit(1);
}
char *ptr = mallocp(size); /* Call libc malloc */
printf("malloc(%d) = %p\n", (int)size, ptr);
return ptr;
}
/* free wrapper function */
void free(void *ptr)
{
void (*freep)(void *) = NULL;
char *error;
if (!ptr)
return;
freep = dlsym(RTLD_NEXT, "free"); /* Get address of libc free */
if ((error = dlerror()) != NULL) {
fputs(error, stderr);
exit(1);
}
freep(ptr); /* Call libc free */
printf("free(%p)\n", ptr);
}
#endif
/* $end interposer */
/*
* Link-time interposition of malloc and free using the static
* linker's (ld) "--wrap symbol" flag.
*
* Compile the executable using "-Wl,--wrap,malloc -Wl,--wrap,free".
* This tells the linker to resolve references to malloc as
* __wrap_malloc, free as __wrap_free, __real_malloc as malloc, and
* __real_free as free.
*/
/* $begin interposel */
#ifdef LINKTIME
#include <stdio.h>
void *__real_malloc(size_t size);
void __real_free(void *ptr);
/* malloc wrapper function */
void *__wrap_malloc(size_t size)
{
void *ptr = __real_malloc(size); /* Call libc malloc */
printf("malloc(%d) = %p\n", (int)size, ptr);
return ptr;
}
/* free wrapper function */
void __wrap_free(void *ptr)
{
__real_free(ptr); /* Call libc free */
printf("free(%p)\n", ptr);
}
#endif
/* $end interposel */
/*
* Compile-time interpositioning of malloc and free using the C
* preprocessor. A local malloc.h file defines malloc and free as
* wrappers mymalloc and myfree respectively.
*/
/* $begin interposec */
#ifdef COMPILETIME
#include <stdio.h>
#include <malloc.h>
/* malloc wrapper function */
void *mymalloc(size_t size)
{
void *ptr = malloc(size);
printf("malloc(%d)=%p\n",
(int)size, ptr);
return ptr;
}
/* free wrapper function */
void myfree(void *ptr)
{
free(ptr);
printf("free(%p)\n", ptr);
}
#endif
/* $end interposec */
编译时打桩:
lion@ubuntu:~/csapp/code/link/interpose$ gcc -DCOMPILETIME -c mymalloc.c
lion@ubuntu:~/csapp/code/link/interpose$ gcc -I. -o intc int.c mymalloc.o
lion@ubuntu:~/csapp/code/link/interpose$ ./intc
malloc(32)=0x55f3e2c092a0
free(0x55f3e2c092a0)
其中-DCOMPILETIME 也就是 -D参数使用COMPILETIME来选择代码段, -I 指定编译打桩(预处理器在搜索系统目录之前会在当前目录中查找malloc.h
链接时打桩
编译可重定向目标文件
$ gcc -c int.c
$ gcc -DLINKTIME -c mymalloc.c
然后链接
$ gcc -Wl,--wrap,malloc -Wl,--wrap,free -o intl int.o mymalloc.o
其中, W1后的参数一直到空格会被传递给链接器,其中逗号会被替换成空格,后面同理。 --wrap标志知道链接器进行链接时打桩
运行
lion@ubuntu:~/csapp/code/link/interpose$ ./intl
malloc(32) = 0x55abc29192a0
free(0x55abc29192a0) 可见完成了对malloc和free的打桩
运行时打桩
编译时打桩需要能访问程序的源代码,链接时打桩需要能访问程序的可重定向文件,运行时打桩只需要可以访问可执行目标文件 .so文件。
制作运行时打桩的共享库
$ gcc -Wall -DRUNTIME -shared -fpic -o mymalloc.so mymalloc.c -ldl
编译主程序,这里,可见编译主文件时,不需要任何关于动态库的信息。
$ gcc -o intr int.c
运行时需要指定动态库的路径,然而这里出现了问题:
lion@ubuntu:~/csapp/code/link/interpose$ LD_PRELOAD="./mymalloc.so" ./intr
Segmentation fault (core dumped)
人给的现成的代码都能出问题?为什么会这样呢?
运行时打桩的printf与malloc循环调用debug
通过gdb调试,发现
#0 0x00007ffff78591c3 in vfprintf () from /lib/x86_64-linux-gnu/libc.so.6
#1 0x00007ffff7861849 in printf () from /lib/x86_64-linux-gnu/libc.so.6
#2 0x00007ffff7bd582e in malloc (size=1024) at badmalloc.c:19
#3 0x00007ffff7879185 in _IO_file_doallocate () from /lib/x86_64-linux-gnu/libc.so.6
#4 0x00007ffff78874c4 in _IO_doallocbuf () from /lib/x86_64-linux-gnu/libc.so.6
#5 0x00007ffff7886828 in _IO_file_overflow () from /lib/x86_64-linux-gnu/libc.so.6
#6 0x00007ffff78851bd in _IO_file_xsputn () from /lib/x86_64-linux-gnu/libc.so.6
#7 0x00007ffff7859201 in vfprintf () from /lib/x86_64-linux-gnu/libc.so.6
#8 0x00007ffff7861849 in printf () from /lib/x86_64-linux-gnu/libc.so.6
#9 0x00007ffff7bd582e in malloc (size=1024) at badmalloc.c:19
#10 0x00007ffff7879185 in _IO_file_doallocate () from /lib/x86_64-linux-gnu/libc.so.6
#11 0x00007ffff78874c4 in _IO_doallocbuf () from /lib/x86_64-linux-gnu/libc.so.6
#12 0x00007ffff7886828 in _IO_file_overflow () from /lib/x86_64-linux-gnu/libc.so.6
#13 0x00007ffff78851bd in _IO_file_xsputn () from /lib/x86_64-linux-gnu/libc.so.6
#14 0x00007ffff7859201 in vfprintf () from /lib/x86_64-linux-gnu/libc.so.6
#15 0x00007ffff7861849 in printf () from /lib/x86_64-linux-gnu/libc.so.6
#16 0x00007ffff7bd582e in malloc (size=1024) at badmalloc.c:19
#17 0x00007ffff7879185 in _IO_file_doallocate () from /lib/x86_64-linux-gnu/libc.so.6
#18 0x00007ffff78874c4 in _IO_doallocbuf () from /lib/x86_64-linux-gnu/libc.so.6
#19 0x00007ffff7886828 in _IO_file_overflow () from /lib/x86_64-linux-gnu/libc.so.6
#20 0x00007ffff78851bd in _IO_file_xsputn () from /lib/x86_64-linux-gnu/libc.so.6
#21 0x00007ffff7859201 in vfprintf () from /lib/x86_64-linux-gnu/libc.so.6
#22 0x00007ffff7861849 in printf () from /lib/x86_64-linux-gnu/libc.so.6
#23 0x00007ffff7bd582e in malloc (size=1024) at badmalloc.c:19
#24 0x00007ffff7879185 in _IO_file_doallocate () from /lib/x86_64-linux-gnu/libc.so.6
#25 0x00007ffff78874c4 in _IO_doallocbuf () from /lib/x86_64-linux-gnu/libc.so.6
---Type <return> to continue, or q <return> to quit---
从#9到#2(调用关系要倒着看):
malloc->printf->vfprintf->_IO_file_xsputn->_IO_file_overflow->_IO_doallocbuf->_IO_file_doallocate->malloc
我们的malloc函数中调用了printf函数,printf函数又调用了我们的malloc函数,malloc函数又会调用printf函数……这产生了一个调用死循环,调用层次足够深,栈就溢出了。
那么如何打破这个死循环呢?首先要尽量避免在自己写的malloc函数中调用其他标准库函数,毕竟不清楚标准库函数的内部实现机制。但是为了输出一些信息,printf函数还是要保留的,那么怎么办呢?首先考虑单线程的情况,如果在我们自己写的malloc函数中发生了循环调用自己malloc的情况,唯一的可能就是printf调用了malloc。我们可以设置一个静态计数变量count,每次完成执行malloc函数后将count清零,每次进入malloc函数后count自增1,如果count=1,说明现在调用栈上只有对自定义malloc函数的一次调用,这时可以调用printf输出信息;如果count=2,说明此时调用栈上对malloc函数发生了第二次调用,即一个malloc函数还没有执行完,就又进行了一次malloc函数调用,我们认为这个问题出在printf上,此时我们就不再调用printf了。那么多线程情况呢?这个使用__thread修饰符将静态变量设置为thread local的就可以了。最后的malloc函数代码如下:
void *malloc(size_t size)
{
static __thread int print_times = 0;
print_times++;
void *(*mallocp)(size_t size);
char *error;
mallocp = dlsym(RTLD_NEXT, "malloc");
if ((error = dlerror()) != NULL)
{
fputs(error, stderr);
exit(1);
}
char *ptr = mallocp(size);
if (print_times == 1)
{
printf("malloc(%d) = %p\n", (int)size, ptr);
}
print_times = 0;
return ptr;
}
lion@ubuntu:~/csapp/code/link/interpose$ LD_PRELOAD="./mymalloc.so" ./intr
malloc(32) = 0x560aaff2c2a0
free(0x560aaff2c2a0)
此时,程序可以正确运行。
使用LD_PRELOAD对任意可执行程序调用运行时打桩
lion@ubuntu:~/csapp/code/link/interpose$ LD_PRELOAD="./mymalloc.so" /usr/bin/uptime
malloc(37) = 0x5649964a12d0
malloc(472) = 0x5649964a1710
malloc(4096) = 0x5649964a18f0
malloc(1024) = 0x5649964a2900
free(0x5649964a2900)
free(0x5649964a1710)
free(0x5649964a18f0)
malloc(472) = 0x5649964a1710
malloc(1024) = 0x5649964a2900
free(0x5649964a2900)
free(0x5649964a1710)
malloc(472) = 0x5649964a1710
malloc(4096) = 0x5649964a18f0
malloc(1024) = 0x5649964a2900
free(0x5649964a2900)
free(0x5649964a1710)
free(0x5649964a18f0)
malloc(5) = 0x5649964a18f0
free(0x5649964a18f0)
malloc(120) = 0x5649964a1910
malloc(12) = 0x5649964a18f0
malloc(776) = 0x5649964a1990
malloc(112) = 0x5649964a1ca0
malloc(1336) = 0x5649964a1d20
malloc(216) = 0x5649964a2260
malloc(432) = 0x5649964a2340
malloc(104) = 0x5649964a2500
malloc(88) = 0x5649964a2570
malloc(120) = 0x5649964a25d0
malloc(168) = 0x5649964a2650
malloc(104) = 0x5649964a2700
malloc(80) = 0x5649964a2770
malloc(192) = 0x5649964a27d0
malloc(12) = 0x5649964a28a0
malloc(12) = 0x5649964a28c0
malloc(12) = 0x5649964a28e0
malloc(12) = 0x5649964a2d10
malloc(12) = 0x5649964a2d30
malloc(12) = 0x5649964a2d50
malloc(12) = 0x5649964a2d70
malloc(12) = 0x5649964a2d90
malloc(12) = 0x5649964a2db0
malloc(12) = 0x5649964a2dd0
malloc(12) = 0x5649964a2df0
malloc(12) = 0x5649964a2e10
malloc(12) = 0x5649964a2e30
malloc(34) = 0x5649964a2e50
malloc(10) = 0x5649964a2e80
malloc(15) = 0x5649964a2ea0
malloc(472) = 0x5649964a1710
malloc(4096) = 0x5649964a2ec0
malloc(1757) = 0x5649964a3ed0
free(0x5649964a2ec0)
free(0x5649964a1710)
malloc(20) = 0x5649964a2ec0
malloc(20) = 0x5649964a2ee0
malloc(20) = 0x5649964a2f00
malloc(20) = 0x5649964a2f20
malloc(20) = 0x5649964a2f40
malloc(12) = 0x5649964a2f60
malloc(271) = 0x5649964a2f80
free(0x5649964a2df0)
free(0x5649964a2e30)
malloc(12) = 0x5649964a2e30
malloc(12) = 0x5649964a2df0
free(0x5649964a2f80)
free(0x5649964a2f60)
malloc(384) = 0x5649964a30a0
malloc(12) = 0x5649964a2f60
malloc(271) = 0x5649964a2f80
free(0x5649964a2e30)
free(0x5649964a2df0)
malloc(12) = 0x5649964a2df0
malloc(12) = 0x5649964a2e30
free(0x5649964a2f80)
free(0x5649964a2f60)
04:17:18 up 9:29, 1 user, load average: 0.02, 0.04, 0.04
总结
库打桩是Linux链接器的一个很强大的功能,截获对库函数的调用。跟踪调用次数等等,也可以用来验证输入输出,或者将其替换成一个完全不同的实现。可以想象利用此技术可以做到的非常有趣的事情。
CSAPP 第七章 链接