buffer 大小超出子线程栈大小(1*1024 *1024 -8192)限制,出现SIGSEGV crash
在Opwrt平台上测试ok的一个demo程序移植到Android平台后,运行出现莫名其妙的SIGSEGV crash。
出现crash后的关键log信息如下:
--------- beginning of crash F/libc ( 1173): Fatal signal 11 (SIGSEGV), code 1, fault addr 0xb6d72c24 in tid 1174 (tcp_cli) I/DEBUG ( 62): *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** I/DEBUG ( 62): Build fingerprint: 'BUILD_FINGERPRINT' I/DEBUG ( 62): Revision: '0' I/DEBUG ( 62): ABI: 'arm' I/DEBUG ( 62): pid: 1173, tid: 1174, name: tcp_cli >>> ./tcp_cli <<< I/DEBUG ( 62): signal 11 (SIGSEGV), code 1 (SEGV_MAPERR), fault addr 0xb6d72c24 I/DEBUG ( 62): r0 b6d82d3c r1 00000000 r2 00100000 r3 91902916 I/DEBUG ( 62): r4 b6f1fdd4 r5 b6f41a6c r6 00000003 r7 b6d82d3c I/DEBUG ( 62): r8 b8e590e8 r9 b8e590e8 sl b6eda581 fp b6e82dd0 I/DEBUG ( 62): ip b6f43f7c sp b6d72c28 lr b6f40e45 pc b6ed6c00 cpsr 40070010 I/DEBUG ( 62): I/DEBUG ( 62): backtrace: I/DEBUG ( 62): #00 pc 00012c00 /system/lib/libc.so (memset) I/DEBUG ( 62): #01 pc 00000e41 /dev/tcp_cli I/DEBUG ( 62): #02 pc 000011e1 /dev/tcp_cli I/DEBUG ( 62): #03 pc 0001659f /system/lib/libc.so (__pthread_start(void*)+30) I/DEBUG ( 62): #04 pc 000144cb /system/lib/libc.so (__start_thread+6) W/libbacktrace( 62): virtual bool BacktracePtrace::ReadWord(uintptr_t, word_t*): invalid pointer 0xb6d72be8 reading from tid 1174, ptrace() strerror(errno)=I/O error W/libbacktrace( 62): virtual bool BacktracePtrace::ReadWord(uintptr_t, word_t*): invalid pointer 0xb6d72c28 reading from tid 1174, ptrace() strerror(errno)=I/O error W/libbacktrace( 62): virtual bool BacktracePtrace::ReadWord(uintptr_t, word_t*): invalid pointer 0xb6d72c28 reading from tid 1174, ptrace() strerror(errno)=I/O error
从log上看是在创建线程后,线程启动,系统调用memset出现了错误。
使用addr2line工具查看对应的bin文件找到出错地址e41对应的源码所在如下:
./prebuilts/gcc/linux-x86/arm/arm-eabi-4.8/bin/arm-eabi-addr2line -e out/target/product/rk3036/symbols/system/bin/tcp_cli e41 /home/liuxueneng/workCode/rk3036_dongle/external/lollipop_wifi/net/tcp_cli.c:71
找到对应源文件指定行71
68 69 static int do_recv_handle(int sockfd) 70 { 71 char buf[BUFSIZE] = {0}; 72 int numbytes = 0; 73 struct timeval timeout, recvtv, tv;
这是用户自定义BUFSIZE的地方如下,大小为1M
36 37 #define BUFSIZE (1 << 20) 38
出错的地方定义了一个1M的buf,初始化为0。
系统自动调用memset进行初始化,出现crash。
猜测可能因为线程栈大小限制导致,先尝试修改成128K,测试验证通过。
源码分析确认。
一、Androird 主线程栈大小
查看Android 主线程栈大小, 显示是8M
root@rk3036:/ # ulimit -s 8192
二、Android子线程栈大小
线程创建函数pthread_create需要四个参数,
NAME pthread_create - create a new thread SYNOPSIS #include <pthread.h> int pthread_create(pthread_t *thread, const pthread_attr_t *attr, void *(*start_routine) (void *), void *arg); Compile and link with -pthread
第二个参数attr可以设置stack大小,默认传递NULL,则系统采用默认值的,或者调用pthread_attr_init初始化一个
int pthread_attr_init(pthread_attr_t *attr);
查看pthread_create 和pthread_attr_init两个函数的源码,
bionic/libc/bionic/pthread_create.cpp
154 int pthread_create(pthread_t* thread_out, pthread_attr_t const* attr, 155 void* (*start_routine)(void*), void* arg) { 156 ErrnoRestorer errno_restorer; 157 158 // Inform the rest of the C library that at least one thread was created. 159 __isthreaded = 1; 160 161 pthread_internal_t* thread = reinterpret_cast<pthread_internal_t*>(calloc(sizeof(*thread), 1)); 162 if (thread == NULL) { 163 __libc_format_log(ANDROID_LOG_WARN, "libc", "pthread_create failed: couldn't allocate thread"); 164 return EAGAIN; 165 } 166 167 if (attr == NULL) { 168 pthread_attr_init(&thread->attr); 169 } else { 170 thread->attr = *attr; 171 attr = NULL; // Prevent misuse below. 172 } 173 174 // Make sure the stack size and guard size are multiples of PAGE_SIZE. 175 thread->attr.stack_size = BIONIC_ALIGN(thread->attr.stack_size, PAGE_SIZE); 176 thread->attr.guard_size = BIONIC_ALIGN(thread->attr.guard_size, PAGE_SIZE); 177 178 if (thread->attr.stack_base == NULL) { 179 // The caller didn't provide a stack, so allocate one. 180 thread->attr.stack_base = __create_thread_stack(thread); 181 if (thread->attr.stack_base == NULL) { 182 free(thread); 183 return EAGAIN; 184 } 185 } else { 186 // The caller did provide a stack, so remember we're not supposed to free it. 187 thread->attr.flags |= PTHREAD_ATTR_FLAG_USER_ALLOCATED_STACK; 188 } 189 190 // Make room for the TLS area. 191 // The child stack is the same address, just growing in the opposite direction. 192 // At offsets >= 0, we have the TLS slots. 193 // At offsets < 0, we have the child stack. 194 thread->tls = reinterpret_cast<void**>(reinterpret_cast<uint8_t*>(thread->attr.stack_base) + 195 thread->attr.stack_size - BIONIC_TLS_SLOTS * sizeof(void*)); 196 void* child_stack = thread->tls; 197 __init_tls(thread); 198 199 // Create a mutex for the thread in TLS to wait on once it starts so we can keep 200 // it from doing anything until after we notify the debugger about it 201 // 202 // This also provides the memory barrier we need to ensure that all 203 // memory accesses previously performed by this thread are visible to 204 // the new thread. 205 pthread_mutex_init(&thread->startup_handshake_mutex, NULL); 206 pthread_mutex_lock(&thread->startup_handshake_mutex);
如果传入的 attr == NULL,pthread_create 会自动调用pthread_attr_init初始化一个attr
再看pthread_attr_init函数
bionic/libc/bionic/pthread_attr.cpp
40 41 int pthread_attr_init(pthread_attr_t* attr) { 42 attr->flags = 0; 43 attr->stack_base = NULL; 44 attr->stack_size = PTHREAD_STACK_SIZE_DEFAULT; 45 attr->guard_size = PAGE_SIZE; 46 attr->sched_policy = SCHED_NORMAL; 47 attr->sched_priority = 0; 48 return 0; 49 }
stack_size是由宏定义PTHREAD_STACK_SIZE_DEFAULT决定的。再看定义宏定义的地方
bionic/libc/bionic/pthread_internal.h
108 /* 109 * Traditionally we gave threads a 1MiB stack. When we started 110 * allocating per-thread alternate signal stacks to ease debugging of 111 * stack overflows, we subtracted the same amount we were using there 112 * from the default thread stack size. This should keep memory usage 113 * roughly constant. 114 */ 115 #define PTHREAD_STACK_SIZE_DEFAULT ((1 * 1024 * 1024) - SIGSTKSZ)
可以看到栈大小是小于1M的。再追一下SIGSTKSZ是多少
./prebuilts/gcc/linux-x86/host/x86_64-linux-glibc2.11-4.6/sysroot/usr/include/asm/signal.h:93:#define MINSIGSTKSZ 2048 ./prebuilts/gcc/linux-x86/host/x86_64-linux-glibc2.11-4.6/sysroot/usr/include/asm/signal.h:94:#define SIGSTKSZ 8192 ./prebuilts/gcc/linux-x86/host/x86_64-linux-glibc2.11-4.6/sysroot/usr/include/asm-generic/signal.h:89:#define MINSIGSTKSZ 2048 ./prebuilts/gcc/linux-x86/host/x86_64-linux-glibc2.11-4.6/sysroot/usr/include/asm-generic/signal.h:90:#define SIGSTKSZ 8192 ./prebuilts/gcc/linux-x86/host/x86_64-linux-glibc2.11-4.6/sysroot/usr/include/bits/sigstack.h:43:#define MINSIGSTKSZ 2048 ./prebuilts/gcc/linux-x86/host/x86_64-linux-glibc2.11-4.6/sysroot/usr/include/bits/sigstack.h:46:#define SIGSTKSZ 8192 ./prebuilts/ndk/8/platforms/android-4/arch-arm/usr/include/asm/signal.h:79:#define MINSIGSTKSZ 2048 ./prebuilts/ndk/8/platforms/android-4/arch-arm/usr/include/asm/signal.h:80:#define SIGSTKSZ 8192 ./prebuilts/ndk/8/platforms/android-5/arch-arm/usr/include/asm/signal.h:79:#define MINSIGSTKSZ 2048 ./prebuilts/ndk/8/platforms/android-5/arch-arm/usr/include/asm/signal.h:80:#define SIGSTKSZ 8192 ./prebuilts/ndk/8/platforms/android-3/arch-arm/usr/include/asm/signal.h:79:#define MINSIGSTKSZ 2048 ./prebuilts/ndk/8/platforms/android-3/arch-arm/usr/include/asm/signal.h:80:#define SIGSTKSZ 8192 ./prebuilts/ndk/8/platforms/android-8/arch-arm/usr/include/asm/signal.h:79:#define MINSIGSTKSZ 2048 ./prebuilts/ndk/8/platforms/android-8/arch-arm/usr/include/asm/signal.h:80:#define SIGSTKSZ 8192 ./prebuilts/ndk/8/platforms/android-9/arch-x86/usr/include/asm/signal.h:91:#define MINSIGSTKSZ 2048 ./prebuilts/ndk/8/platforms/android-9/arch-x86/usr/include/asm/signal.h:92:#define SIGSTKSZ 8192
signa.h 和sigstack.h都有定义,都是8192 也就是8K, 可以推测出PTHREAD_STACK_SIZE_DEFAULT 应该是(1024 * 1024 - 8192) = 1040384
调用pthread_attr_init 写一段代码测试验证一下
77 pthread_attr_t attr = {}; 78 pthread_attr_init(&attr); 79 logw("PTHREAD_STACK_SIZE_DEFAULT:%d SIGSTKSZ:%d",attr.stack_size, (1 * 1024 * 1024) - attr.stack_size);
编译push 执行。
root@rk3036:/ # cd /data/ root@rk3036:/data # chmod 0777 net_cli root@rk3036:/data # ./net_cli W NET_CLI : PTHREAD_STACK_SIZE_DEFAULT:1040384 SIGSTKSZ:8192
终上确认了Android主线程的栈大小是8M,子线程的栈大小是1M- 8192。
出错原因是buf大小超出了子线程默认栈限制。
解决办法有两个,
一、是修改buf小于1040384
二、 新建线程的时候设置attr 里面的stack_size。