转载:
http://blog.chinaunix.net/uid-30343738-id-5757210.html
#include <stdio.h>
#include <pthread.h>
#include <stdlib.h>
#include <unistd.h>
static int sequence1 = 0;
static int sequence2 = 0;
pthread_mutex_t lock1;
pthread_mutex_t lock2;
int func1()
{
pthread_mutex_lock(&lock1);
++sequence1;
sleep(1);
pthread_mutex_lock(&lock2);
++sequence2;
pthread_mutex_unlock(&lock2);
pthread_mutex_unlock(&lock1);
return sequence1;
}
int func2()
{
pthread_mutex_lock(&lock2);
++sequence2;
sleep(1);
pthread_mutex_lock(&lock1);
++sequence2;
pthread_mutex_unlock(&lock1);
pthread_mutex_unlock(&lock2);
return sequence1;
}
void* thread1(void *arg)
{
int rev = 0;
while(1)
{
rev = func1();
if (rev == 100000)
{
pthread_exit(NULL);
}
}
}
void* thread2(void *arg)
{
int rev = 0;
while(1)
{
rev = func2();
if (rev == 100000)
{
pthread_exit(NULL);
}
}
}
void* thread3(void *arg)
{
int count = 0;
while(1)
{
sleep(1);
if ( count++ > 10000)
{
pthread_exit(NULL);
}
}
}
void* thread4(void *arg)
{
int count = 0;
while(1)
{
sleep(1);
if ( count++ > 10000)
{
pthread_exit(NULL);
}
}
}
int main()
{
pthread_t tid[4];
pthread_mutex_init(&lock1, NULL);
pthread_mutex_init(&lock2, NULL);
if(pthread_create(&tid[0], NULL, &thread1, NULL) != 0)
{
_exit(1);
}
if(pthread_create(&tid[1], NULL, &thread2, NULL) != 0)
{
_exit(1);
}
if(pthread_create(&tid[2], NULL, &thread3, NULL) != 0)
{
_exit(1);
}
if(pthread_create(&tid[3], NULL, &thread4, NULL) != 0)
{
_exit(1);
}
sleep(5);
pthread_join(tid[0], NULL);
pthread_join(tid[1], NULL);
pthread_join(tid[2], NULL);
pthread_join(tid[3], NULL);
pthread_mutex_destroy( &lock1 );
pthread_mutex_destroy( &lock2 );
return 0;
}
编译执行程序。
gcc -o main main17.c -lpthread -g
使用 pstack 和 gdb 工具对死锁程序进行分析
1、使用pstack
查找测试程序的进程号
root 5383 1 0 06:31 ? 00:00:43 gedit /root/Project/xa/main17.c
root 7197 7179 0 10:04 pts/1 00:00:00 ./main
root 7218 7206 0 10:04 pts/2 00:00:00 grep --color=auto main
对死锁进程第一次执行 pstack(pstack –进程号)的输出结果
Thread 5 (Thread 0x41e37940 (LWP 6722)):
#0 0x0000003d1a80d4c4 in __lll_lock_wait () from /lib64/libpthread.so.0
#1 0x0000003d1a808e1a in _L_lock_1034 () from /lib64/libpthread.so.0
#2 0x0000003d1a808cdc in pthread_mutex_lock () from /lib64/libpthread.so.0
#3 0x0000000000400a9b in func1() ()
#4 0x0000000000400ad7 in thread1(void*) ()
#5 0x0000003d1a80673d in start_thread () from /lib64/libpthread.so.0
#6 0x0000003d19cd40cd in clone () from /lib64/libc.so.6
Thread 4 (Thread 0x42838940 (LWP 6723)):
#0 0x0000003d1a80d4c4 in __lll_lock_wait () from /lib64/libpthread.so.0
#1 0x0000003d1a808e1a in _L_lock_1034 () from /lib64/libpthread.so.0
#2 0x0000003d1a808cdc in pthread_mutex_lock () from /lib64/libpthread.so.0
#3 0x0000000000400a17 in func2() ()
#4 0x0000000000400a53 in thread2(void*) ()
#5 0x0000003d1a80673d in start_thread () from /lib64/libpthread.so.0
#6 0x0000003d19cd40cd in clone () from /lib64/libc.so.6
Thread 3 (Thread 0x43239940 (LWP 6724)):
#0 0x0000003d19c9a541 in nanosleep () from /lib64/libc.so.6
#1 0x0000003d19c9a364 in sleep () from /lib64/libc.so.6
#2 0x00000000004009bc in thread3(void*) ()
#3 0x0000003d1a80673d in start_thread () from /lib64/libpthread.so.0
#4 0x0000003d19cd40cd in clone () from /lib64/libc.so.6
Thread 2 (Thread 0x43c3a940 (LWP 6725)):
#0 0x0000003d19c9a541 in nanosleep () from /lib64/libc.so.6
#1 0x0000003d19c9a364 in sleep () from /lib64/libc.so.6
#2 0x0000000000400976 in thread4(void*) ()
#3 0x0000003d1a80673d in start_thread () from /lib64/libpthread.so.0
#4 0x0000003d19cd40cd in clone () from /lib64/libc.so.6
Thread 1 (Thread 0x2b984ecabd90 (LWP 6721)):
#0 0x0000003d1a807b35 in pthread_join () from /lib64/libpthread.so.0
#1 0x0000000000400900 in main ()
对死锁进程第二次执行 pstack(pstack –进程号)的输出结果
Thread 5 (Thread 0x40bd6940 (LWP 6722)):
#0 0x0000003d1a80d4c4 in __lll_lock_wait () from /lib64/libpthread.so.0
#1 0x0000003d1a808e1a in _L_lock_1034 () from /lib64/libpthread.so.0
#2 0x0000003d1a808cdc in pthread_mutex_lock () from /lib64/libpthread.so.0
#3 0x0000000000400a87 in func1() ()
#4 0x0000000000400ac3 in thread1(void*) ()
#5 0x0000003d1a80673d in start_thread () from /lib64/libpthread.so.0
#6 0x0000003d19cd40cd in clone () from /lib64/libc.so.6
Thread 4 (Thread 0x415d7940 (LWP 6723)):
#0 0x0000003d1a80d4c4 in __lll_lock_wait () from /lib64/libpthread.so.0
#1 0x0000003d1a808e1a in _L_lock_1034 () from /lib64/libpthread.so.0
#2 0x0000003d1a808cdc in pthread_mutex_lock () from /lib64/libpthread.so.0
#3 0x0000000000400a03 in func2() ()
#4 0x0000000000400a3f in thread2(void*) ()
#5 0x0000003d1a80673d in start_thread () from /lib64/libpthread.so.0
#6 0x0000003d19cd40cd in clone () from /lib64/libc.so.6
Thread 3 (Thread 0x41fd8940 (LWP 6724)):
#0 0x0000003d19c7aec2 in memset () from /lib64/libc.so.6
#1 0x00000000004009be in thread3(void*) ()
#2 0x0000003d1a80673d in start_thread () from /lib64/libpthread.so.0
#3 0x0000003d19cd40cd in clone () from /lib64/libc.so.6
Thread 2 (Thread 0x429d9940 (LWP 6725)):
#0 0x0000003d19c7ae0d in memset () from /lib64/libc.so.6
#1 0x0000000000400982 in thread4(void*) ()
#2 0x0000003d1a80673d in start_thread () from /lib64/libpthread.so.0
#3 0x0000003d19cd40cd in clone () from /lib64/libc.so.6
Thread 1 (Thread 0x2af906fd9d90 (LWP 6721)):
#0 0x0000003d1a807b35 in pthread_join () from /lib64/libpthread.so.0
#1 0x0000000000400900 in main ()
连续多次查看这个进程的函数调用关系堆栈进行分析:当进程吊死时,多次使用 pstack 查看进程的函数调用堆栈,死锁线程将一直处于等锁的状态,对比多次的函数调用堆栈输出结果,
确定哪两个线程(或者几个线程)一直没有变化且一直处于等锁的状态(可能存在两个线程 一直没有变化)。
输出分析:
根据上面的输出对比可以发现,线程 1 和线程 2 由第一次 pstack 输出的处在 sleep 函数变化为第二次 pstack 输出的处在 memset 函数。但是线程 4 和线程 5 一直处在等锁状态(pthread_mutex_lock),
在连续两次的 pstack 信息输出中没有变化,所以我们可以推测线程 4 和线程 5 发生了死锁
2、使用gdb进行进一步的分析
查找测试程序的进程号
root 5383 1 0 06:31 ? 00:00:43 gedit /root/Project/xa/main17.c
root 7197 7179 0 10:04 pts/1 00:00:00 ./main
root 7218 7206 0 10:04 pts/2 00:00:00 grep --color=auto main
使用gdb 的attach功能
gdb attach 7197
查看当前进程的线程信息
(gdb) info thread
Id Target Id Frame
5 Thread 0xb7539b40 (LWP 7198) "main" 0xb7717424 in __kernel_vsyscall ()
4 Thread 0xb6d38b40 (LWP 7199) "main" 0xb7717424 in __kernel_vsyscall ()
3 Thread 0xb6537b40 (LWP 7200) "main" 0xb7717424 in __kernel_vsyscall ()
2 Thread 0xb5d36b40 (LWP 7201) "main" 0xb7717424 in __kernel_vsyscall ()
* 1 Thread 0xb753a6c0 (LWP 7197) "main" 0xb7717424 in __kernel_vsyscall ()
切换到线程 5 的输出
(gdb) thread 5
[Switching to thread 5 (Thread 0xb7539b40 (LWP 7198))]
#0 0xb7717424 in __kernel_vsyscall ()
(gdb) where
#0 0xb7717424 in __kernel_vsyscall ()
#1 0xb76f25a2 in __lll_lock_wait () from /lib/i386-linux-gnu/libpthread.so.0
#2 0xb76edead in _L_lock_686 () from /lib/i386-linux-gnu/libpthread.so.0
#3 0xb76edcf3 in pthread_mutex_lock ()
from /lib/i386-linux-gnu/libpthread.so.0
#4 0x0804864b in func1 () at main17.c:17
#5 0x080486ef in thread1 (arg=0x0) at main17.c:44
#6 0xb76ebd4c in start_thread () from /lib/i386-linux-gnu/libpthread.so.0
#7 0xb762adde in clone () from /lib/i386-linux-gnu/libc.so.6
(gdb) f 4
#4 0x0804864b in func1 () at main17.c:17
warning: Source file is more recent than executable.
17 pthread_mutex_lock(&lock2); ////线程 5 正试图获得锁 lock2
切换到线程4的输出
(gdb) thread 4
[Switching to thread 4 (Thread 0xb6d38b40 (LWP 7199))]
#0 0xb7717424 in __kernel_vsyscall ()
(gdb) where
#0 0xb7717424 in __kernel_vsyscall ()
#1 0xb76f25a2 in __lll_lock_wait () from /lib/i386-linux-gnu/libpthread.so.0
#2 0xb76edead in _L_lock_686 () from /lib/i386-linux-gnu/libpthread.so.0
#3 0xb76edcf3 in pthread_mutex_lock ()
from /lib/i386-linux-gnu/libpthread.so.0
#4 0x080486ae in func2 () at main17.c:30
#5 0x0804871c in thread2 (arg=0x0) at main17.c:58
#6 0xb76ebd4c in start_thread () from /lib/i386-linux-gnu/libpthread.so.0
#7 0xb762adde in clone () from /lib/i386-linux-gnu/libc.so.6
(gdb) f 4
#4 0x080486ae in func2 () at main17.c:30
30 pthread_mutex_lock(&lock1); //线程 4 正试图获得锁 lock1
打印锁的信息
(gdb) p lock1
$1 = {__data = {__lock = 2, __count = 0, __owner = 7198, __kind = 0,
__nusers = 1, {__spins = 0, __list = {__next = 0x0}}},
__size = "\002\000\000\000\000\000\000\000\036\034\000\000\000\000\000\000\001\000\000\000\000\000\000", __align = 2}
(gdb) p lock2
$2 = {__data = {__lock = 2, __count = 0, __owner = 7199, __kind = 0,
__nusers = 1, {__spins = 0, __list = {__next = 0x0}}},
__size = "\002\000\000\000\000\000\000\000\037\034\000\000\000\000\000\000\001\000\000\000\000\000\000", __align = 2}
从上面可以发现,线程 4 正试图获得锁 lock1,但是锁 lock1已经被 LWP 为 7198的线程得到(__owner = 7198),
线程 5 正试图获得锁 lock2,但是锁 lock2 已经被 LWP 为 7199的 得到(__owner = 7199),从 pstack 的输出可以发现(gdb info thread),LWP 7198与线程 5 是对应的,LWP 7199与线程 4 是对应的。
所以我们可以得出, 线程 4 和线程 5 发生了交叉持锁的死锁现象。查看线程的源代码发现,线程 4 和线程 5 同时使用 mutex1 和 mutex2,且申请顺序不合理