一个死锁问题
很长时间没有写多线程的程序了,这周写了个应用层协议栈的demo,模型很简单,基本的CS架构,server端收到一个连接请求,就创建一个收包线程来处理收到的报文,基本代码如下:
1 #define DEF_MAX_CONNECTION 10 2 #define DEF_PACK_BUF 512 3 struct threadPara 4 { 5 int sockFd; 6 int localSockFd; 7 struct sockaddr_in *pstLocalAddr; 8 }; 9 10 pthread_mutex_t thread_mutex = PTHREAD_MUTEX_INITIALIZER; 11 pthread_cond_t thread_cond = PTHREAD_COND_INITIALIZER; 12 13 int initNetwork(int *aPSockFd,struct sockaddr_in *apstAddr) 14 { 15 16 return 0; 17 } 18 19 /* receive packet from remote termination */ 20 void * threadRcvPkt(void *aParam) 21 { 22 char szProcessBuf[2*DEF_PACK_BUF ] = {0}; 23 unsigned int luiExistedLen = 0; 24 char lRcvBuf[DEF_PACK_BUF ] = {0}; 25 int lRcvLen = 0; 26 int len = sizeof(struct sockaddr_in); 27 pthread_mutex_lock(&threadMutex) 28 struct threadPara *pstPara = ( struct threadPara *)aParam; 29 pthread_mutex_unlock(&thread_mutex); 30 pthread_cond_signal(&thead_cond); 31 pstPara->sockFd = accept(pstPara->localSockFd,(struct sockaddr*)pstPara->pstLocalAddr,&len); 32 if( 0 > pstPara->sockFd) 33 { 34 printf("accept error \n"); 35 return NULL; 36 } 37 while(1) 38 { 39 lRcvLen = recv(pstPara->sockFd,lRcvBuf,DEF_PACK_BUF,0); 40 if( 0 > lRcvLen ) 41 { 42 printf("receive packet failed! \n"); 43 continue; 44 } 45 pakcetProcess(szProcessBuf,&luiExistedLen); 46 } 47 48 } 49 50 int main(int argc ,char *argv[]) 51 { 52 struct sockaddr_in lstAddr; 53 pthread_t pid[DEF_MAX_CONNECTION]; 54 int liSockFd = 0xFFFFFFFF; 55 initNetwork(&liSockFd,&lstAddr); /* 初始化socket */ 56 struct threadPara lstFuncPara[DEF_MAX_CONNECTION]; 57 l 58 for(int i = 0 ;i < DEF_MAX_CONNECTION; i++) 59 { 60 pthread_lock(&thread_mutex); 61 stFuncPara[i].pstLocalAddr = &lstAddr; 62 lstFuncPara[i].sockFd = 0xFFFFFFFF; 63 if( 0 != pthread_create(&pid[i],NULL,threadRcvPkt,&stThreadPara)) 64 { 65 printf("create %d-th thread failed \n", i + 1); 66 return 1; 67 } 68 pthread_unlock(&thread_mutex); 69 pthread_wait(&thread_cond,&thread_mutex); 70 } 71 pthread_join(pid[0],NULL); 72 pthread_mutex_destroy(&thread_mutex); 73 pthread_cond_destroy(&thread_cond); 74 return 0; 75 }
通过运行代码发现,程序在创建了两个接收线程以后发生了死锁,跟踪代码发现是主线程一直在等待接收线程的释放信号,奇怪的是主线程已经在互斥区间以外等待,此时接收线程不可能获取不到互斥锁,应该是接收线程发送的信号没有收到,查阅pthread库的手册发现是posix线程库中的信号与windows线程库中的事件是类似的,有一点很大的不同是,posix库中的信号在发送时如果没有线程接收就失效了,而windows中事件一直有效,直到有线程接收,因此产生此问题的根本原因是没有先等待导致接收线程的信号失效。根据以上的代码,应该将pthread_cond_wait 放在pthread_mutex_lock和pthread_mutex_unlock之间,问题即可迎刃而解。