我们先看一下服务器中存在僵尸进程的情况。
服务器是多进程模型,客户端是单进程。
服务器程序如下:
1 #include <sys/types.h> 2 #include <unistd.h> 3 #include <stdlib.h> 4 #include <stdio.h> 5 #include <string.h> 6 #include <errno.h> 7 #include <arpa/inet.h> 8 #include <sys/socket.h> 9 #include <netinet/in.h> 10 #include <sys/socket.h> 11 #include <netinet/ip.h> /* superset of previous */ 12 13 14 int main() 15 { 16 int sockfd = 0; 17 sockfd = socket(AF_INET, SOCK_STREAM, 0); 18 19 if(sockfd == -1) 20 { 21 perror("socket error"); 22 exit(0); 23 } 24 25 struct sockaddr_in addr; 26 addr.sin_family = AF_INET; 27 addr.sin_port = htons(8001); 28 inet_aton("192.168.31.128", &addr.sin_addr); 29 //addr.sin_addr.s_addr = inet_addr("192.168.6.249"); 30 //addr.sin_addr.s_addr = INADDR_ANY; 31 32 int optval = 1; 33 if( setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval)) < 0) 34 { 35 perror("setsockopt error"); 36 exit(0); 37 } 38 39 if( bind(sockfd, (struct sockaddr *)&addr, sizeof(addr)) < 0) 40 { 41 perror("bind error"); 42 exit(0); 43 } 44 45 if(listen(sockfd, SOMAXCONN) < 0) 46 { 47 perror("listen error"); 48 exit(0); 49 } 50 51 struct sockaddr_in peeraddr; 52 socklen_t peerlen; 53 54 int conn = 0; 55 56 while(1) 57 { 58 conn = accept(sockfd, (struct sockaddr *)&peeraddr, &peerlen); 59 if(conn == -1) 60 { 61 perror("accept error"); 62 exit(0); 63 } 64 65 char *p = NULL; 66 int peerport = 0; 67 p = inet_ntoa(peeraddr.sin_addr); 68 peerport = ntohs(peeraddr.sin_port); 69 printf("peeraddr = %s\n peerport = %d\n", p, peerport); 70 71 pid_t pid = fork(); 72 73 if(pid == -1) 74 { 75 perror("fork error"); 76 exit(0); 77 } 78 79 if(pid == 0) 80 { 81 char recvbuf[1024] = {0}; 82 int ret = 0; 83 while(1) 84 { 85 ret = read(conn, recvbuf, sizeof(recvbuf)); 86 87 if(ret == 0) 88 { 89 printf("peer closed \n"); 90 exit(0); 91 } 92 else if(ret < 0) 93 { 94 perror("read error"); 95 exit(0); 96 } 97 98 fputs(recvbuf, stdout); 99 100 write(conn, recvbuf, ret); 101 } 102 } 103 } 104 105 close(conn); 106 close(sockfd); 107 108 return 0; 109 }
客户端程序如下:
1 #include <sys/types.h> 2 #include <unistd.h> 3 #include <stdlib.h> 4 #include <stdio.h> 5 #include <string.h> 6 #include <errno.h> 7 #include <arpa/inet.h> 8 #include <sys/socket.h> 9 #include <netinet/in.h> 10 #include <sys/socket.h> 11 #include <netinet/ip.h> /* superset of previous */ 12 13 int main() 14 { 15 int sockfd = 0; 16 sockfd = socket(AF_INET, SOCK_STREAM, 0); 17 18 struct sockaddr_in addr; 19 addr.sin_family = AF_INET; 20 addr.sin_port = htons(8001); 21 inet_aton("192.168.31.128", &addr.sin_addr); 22 //addr.sin_addr.s_addr = inet_addr("192.168.31.128"); 23 24 if( connect(sockfd, (struct sockaddr *)&addr, sizeof(addr)) == -1 ) 25 { 26 perror("connect error"); 27 exit(0); 28 } 29 30 char recvbuf[1024] = {0}; 31 char sendbuf[1024] = {0}; 32 int ret = 0; 33 34 while(fgets(sendbuf, sizeof(sendbuf), stdin) != NULL) 35 { 36 write(sockfd, sendbuf, strlen(sendbuf)); 37 38 ret = read(sockfd, recvbuf, sizeof(recvbuf)); 39 40 fputs(recvbuf, stdout); 41 memset(recvbuf, 0, sizeof(recvbuf)); 42 memset(sendbuf, 0, sizeof(sendbuf)); 43 44 } 45 46 close(sockfd); 47 48 return 0; 49 }
执行结果如下:
可以看到,当客户端使用ctrl+c关闭时,服务器中的子进程成了僵尸进程。这是因为,子进程死了,但是没有进程给它收尸,我们可以调用signal(SIGCHLD, SIG_IGN)函数告诉内核,忽略SIGCHLD信号,让内核来处理SIGCHLD信号,也即让内核收尸,但是工程中一般不这样做。一般是注册信号让父进程调用wait来收尸。
修改服务器程序,加入信号处理函数,如下所示:
1 #include <sys/types.h> 2 #include <unistd.h> 3 #include <stdlib.h> 4 #include <stdio.h> 5 #include <string.h> 6 #include <errno.h> 7 #include <signal.h> 8 #include <arpa/inet.h> 9 #include <sys/socket.h> 10 #include <netinet/in.h> 11 #include <sys/socket.h> 12 #include <netinet/ip.h> /* superset of previous */ 13 14 void handler(int num) 15 { 16 printf("child die\n"); 17 wait(NULL); 18 } 19 20 int main() 21 { 22 int sockfd = 0; 23 signal(SIGCHLD, handler); 24 sockfd = socket(AF_INET, SOCK_STREAM, 0); 25 26 if(sockfd == -1) 27 { 28 perror("socket error"); 29 exit(0); 30 } 31 32 struct sockaddr_in addr; 33 addr.sin_family = AF_INET; 34 addr.sin_port = htons(8001); 35 inet_aton("192.168.31.128", &addr.sin_addr); 36 //addr.sin_addr.s_addr = inet_addr("192.168.6.249"); 37 //addr.sin_addr.s_addr = INADDR_ANY; 38 39 int optval = 1; 40 if( setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval)) < 0) 41 { 42 perror("setsockopt error"); 43 exit(0); 44 } 45 46 if( bind(sockfd, (struct sockaddr *)&addr, sizeof(addr)) < 0) 47 { 48 perror("bind error"); 49 exit(0); 50 } 51 52 if(listen(sockfd, SOMAXCONN) < 0) 53 { 54 perror("listen error"); 55 exit(0); 56 } 57 58 struct sockaddr_in peeraddr; 59 socklen_t peerlen; 60 61 int conn = 0; 62 63 while(1) 64 { 65 conn = accept(sockfd, (struct sockaddr *)&peeraddr, &peerlen); 66 if(conn == -1) 67 { 68 perror("accept error"); 69 exit(0); 70 } 71 72 char *p = NULL; 73 int peerport = 0; 74 p = inet_ntoa(peeraddr.sin_addr); 75 peerport = ntohs(peeraddr.sin_port); 76 printf("peeraddr = %s\n peerport = %d\n", p, peerport); 77 78 pid_t pid = fork(); 79 80 if(pid == -1) 81 { 82 perror("fork error"); 83 exit(0); 84 } 85 86 if(pid == 0) 87 { 88 char recvbuf[1024] = {0}; 89 int ret = 0; 90 while(1) 91 { 92 ret = read(conn, recvbuf, sizeof(recvbuf)); 93 94 if(ret == 0) 95 { 96 printf("peer closed \n"); 97 exit(0); 98 } 99 else if(ret < 0) 100 { 101 perror("read error"); 102 exit(0); 103 } 104 105 fputs(recvbuf, stdout); 106 107 write(conn, recvbuf, ret); 108 } 109 } 110 } 111 112 close(conn); 113 close(sockfd); 114 115 return 0; 116 }
执行结果如下:
由上图可以看到服务器中子进程死掉后,不存在僵尸进程。而且服务器还正常工作,我们重启客户端,依然可以正常发送数据。
下面我们研究多个客户端连接服务器,并且同时死掉的情况,模型如下:
上图中,5个客户端连接服务器,并且同时死掉,则TCP IP协议栈会同时向服务器发送5个FIN,服务器中的5个子进程也会同时退出,这时就会向父进程同时发送5个SIGCHLD信号,但是我们知道SIGCHLD信号是不可靠信号,有可能会丢失,父进程有可能会收到1个、2个。。。或者5个,具体收到几个SIGCHLD信号是不确定的。
服务器程序如下:
1 #include <sys/types.h> 2 #include <unistd.h> 3 #include <stdlib.h> 4 #include <stdio.h> 5 #include <string.h> 6 #include <errno.h> 7 #include <signal.h> 8 #include <arpa/inet.h> 9 #include <sys/socket.h> 10 #include <netinet/in.h> 11 #include <sys/socket.h> 12 #include <netinet/ip.h> /* superset of previous */ 13 14 void handler(int num) 15 { 16 printf("child die\n"); 17 wait(NULL); 18 } 19 20 int main() 21 { 22 int sockfd = 0; 23 signal(SIGCHLD, handler); 24 sockfd = socket(AF_INET, SOCK_STREAM, 0); 25 26 if(sockfd == -1) 27 { 28 perror("socket error"); 29 exit(0); 30 } 31 32 struct sockaddr_in addr; 33 addr.sin_family = AF_INET; 34 addr.sin_port = htons(8001); 35 inet_aton("192.168.31.128", &addr.sin_addr); 36 //addr.sin_addr.s_addr = inet_addr("192.168.6.249"); 37 //addr.sin_addr.s_addr = INADDR_ANY; 38 39 int optval = 1; 40 if( setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval)) < 0) 41 { 42 perror("setsockopt error"); 43 exit(0); 44 } 45 46 if( bind(sockfd, (struct sockaddr *)&addr, sizeof(addr)) < 0) 47 { 48 perror("bind error"); 49 exit(0); 50 } 51 52 if(listen(sockfd, SOMAXCONN) < 0) 53 { 54 perror("listen error"); 55 exit(0); 56 } 57 58 struct sockaddr_in peeraddr; 59 socklen_t peerlen; 60 61 int conn = 0; 62 63 while(1) 64 { 65 conn = accept(sockfd, (struct sockaddr *)&peeraddr, &peerlen); 66 if(conn == -1) 67 { 68 perror("accept error"); 69 exit(0); 70 } 71 72 char *p = NULL; 73 int peerport = 0; 74 p = inet_ntoa(peeraddr.sin_addr); 75 peerport = ntohs(peeraddr.sin_port); 76 printf("peeraddr = %s\n peerport = %d\n", p, peerport); 77 78 pid_t pid = fork(); 79 80 if(pid == -1) 81 { 82 perror("fork error"); 83 exit(0); 84 } 85 86 if(pid == 0) 87 { 88 char recvbuf[1024] = {0}; 89 int ret = 0; 90 while(1) 91 { 92 ret = read(conn, recvbuf, sizeof(recvbuf)); 93 94 if(ret == 0) 95 { 96 printf("peer closed \n"); 97 exit(0); 98 } 99 else if(ret < 0) 100 { 101 perror("read error"); 102 exit(0); 103 } 104 105 fputs(recvbuf, stdout); 106 107 write(conn, recvbuf, ret); 108 } 109 } 110 } 111 112 close(conn); 113 close(sockfd); 114 115 return 0; 116 }
客户端程序如下:
1 #include <sys/types.h> 2 #include <unistd.h> 3 #include <stdlib.h> 4 #include <stdio.h> 5 #include <string.h> 6 #include <errno.h> 7 #include <arpa/inet.h> 8 #include <sys/socket.h> 9 #include <netinet/in.h> 10 #include <sys/socket.h> 11 #include <netinet/ip.h> /* superset of previous */ 12 13 int main() 14 { 15 int sockfd[5]; 16 17 int i = 0; 18 19 for(i = 0; i < 5; i++) 20 { 21 sockfd[i] = socket(AF_INET, SOCK_STREAM, 0); 22 23 struct sockaddr_in addr; 24 addr.sin_family = AF_INET; 25 addr.sin_port = htons(8001); 26 inet_aton("192.168.31.128", &addr.sin_addr); 27 //addr.sin_addr.s_addr = inet_addr("192.168.31.128"); 28 29 if( connect(sockfd[i], (struct sockaddr *)&addr, sizeof(addr)) == -1 ) 30 { 31 perror("connect error"); 32 exit(0); 33 } 34 } 35 36 char recvbuf[1024] = {0}; 37 char sendbuf[1024] = {0}; 38 int ret = 0; 39 40 while(fgets(sendbuf, sizeof(sendbuf), stdin) != NULL) 41 { 42 write(sockfd[0], sendbuf, strlen(sendbuf)); 43 44 ret = read(sockfd[0], recvbuf, sizeof(recvbuf)); 45 46 fputs(recvbuf, stdout); 47 memset(recvbuf, 0, sizeof(recvbuf)); 48 memset(sendbuf, 0, sizeof(sendbuf)); 49 50 } 51 52 53 return 0; 54 }
客户端中我们建立了5个连接,但是只用第一个连接和服务器通信,执行结果和网络状态如下:
现在我们杀死客户端,结果如下:
从输出可以看到,服务器中的父进程只收到了一个SIGCHLD信号,也只给一个子进程收了尸,其他四个子进程成了僵尸进程。
为了消除这种服务器模型下的僵尸进程,我们改进服务器程序,如下所示:
1 #include <sys/types.h> 2 #include <unistd.h> 3 #include <stdlib.h> 4 #include <stdio.h> 5 #include <string.h> 6 #include <errno.h> 7 #include <signal.h> 8 #include <arpa/inet.h> 9 #include <sys/socket.h> 10 #include <netinet/in.h> 11 #include <sys/socket.h> 12 #include <netinet/ip.h> /* superset of previous */ 13 14 void handler(int num) 15 { 16 int mypid = 0; 17 while((mypid = waitpid(-1, NULL, WNOHANG)) > 0) 18 { 19 printf("child %d die\n", mypid); 20 } 21 } 22 23 int main() 24 { 25 int sockfd = 0; 26 signal(SIGCHLD, handler); 27 sockfd = socket(AF_INET, SOCK_STREAM, 0); 28 29 if(sockfd == -1) 30 { 31 perror("socket error"); 32 exit(0); 33 } 34 35 struct sockaddr_in addr; 36 addr.sin_family = AF_INET; 37 addr.sin_port = htons(8001); 38 inet_aton("192.168.31.128", &addr.sin_addr); 39 //addr.sin_addr.s_addr = inet_addr("192.168.6.249"); 40 //addr.sin_addr.s_addr = INADDR_ANY; 41 42 int optval = 1; 43 if( setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval)) < 0) 44 { 45 perror("setsockopt error"); 46 exit(0); 47 } 48 49 if( bind(sockfd, (struct sockaddr *)&addr, sizeof(addr)) < 0) 50 { 51 perror("bind error"); 52 exit(0); 53 } 54 55 if(listen(sockfd, SOMAXCONN) < 0) 56 { 57 perror("listen error"); 58 exit(0); 59 } 60 61 struct sockaddr_in peeraddr; 62 socklen_t peerlen; 63 64 int conn = 0; 65 66 while(1) 67 { 68 conn = accept(sockfd, (struct sockaddr *)&peeraddr, &peerlen); 69 if(conn == -1) 70 { 71 perror("accept error"); 72 exit(0); 73 } 74 75 char *p = NULL; 76 int peerport = 0; 77 p = inet_ntoa(peeraddr.sin_addr); 78 peerport = ntohs(peeraddr.sin_port); 79 printf("peeraddr = %s\n peerport = %d\n", p, peerport); 80 81 pid_t pid = fork(); 82 83 if(pid == -1) 84 { 85 perror("fork error"); 86 exit(0); 87 } 88 89 if(pid == 0) 90 { 91 char recvbuf[1024] = {0}; 92 int ret = 0; 93 while(1) 94 { 95 ret = read(conn, recvbuf, sizeof(recvbuf)); 96 97 if(ret == 0) 98 { 99 printf("peer closed \n"); 100 exit(0); 101 } 102 else if(ret < 0) 103 { 104 perror("read error"); 105 exit(0); 106 } 107 108 fputs(recvbuf, stdout); 109 110 write(conn, recvbuf, ret); 111 } 112 } 113 } 114 115 close(conn); 116 close(sockfd); 117 118 return 0; 119 }
客户端程序如下:
1 #include <sys/types.h> 2 #include <unistd.h> 3 #include <stdlib.h> 4 #include <stdio.h> 5 #include <string.h> 6 #include <errno.h> 7 #include <arpa/inet.h> 8 #include <sys/socket.h> 9 #include <netinet/in.h> 10 #include <sys/socket.h> 11 #include <netinet/ip.h> /* superset of previous */ 12 13 int main() 14 { 15 int sockfd[5]; 16 17 int i = 0; 18 19 for(i = 0; i < 5; i++) 20 { 21 sockfd[i] = socket(AF_INET, SOCK_STREAM, 0); 22 23 struct sockaddr_in addr; 24 addr.sin_family = AF_INET; 25 addr.sin_port = htons(8001); 26 inet_aton("192.168.31.128", &addr.sin_addr); 27 //addr.sin_addr.s_addr = inet_addr("192.168.31.128"); 28 29 if( connect(sockfd[i], (struct sockaddr *)&addr, sizeof(addr)) == -1 ) 30 { 31 perror("connect error"); 32 exit(0); 33 } 34 35 struct sockaddr_in localaddr; 36 socklen_t addrlen = sizeof(localaddr); 37 if(getsockname(sockfd[i], (struct sockaddr*)&localaddr, &addrlen) < 0) 38 { 39 perror("getsockname error"); 40 exit(0); 41 } 42 43 printf("ip=%s port=%d\n", inet_ntoa(localaddr.sin_addr), ntohs(localaddr.sin_port)); 44 45 } 46 47 char recvbuf[1024] = {0}; 48 char sendbuf[1024] = {0}; 49 int ret = 0; 50 51 while(fgets(sendbuf, sizeof(sendbuf), stdin) != NULL) 52 { 53 write(sockfd[0], sendbuf, strlen(sendbuf)); 54 55 ret = read(sockfd[0], recvbuf, sizeof(recvbuf)); 56 57 fputs(recvbuf, stdout); 58 memset(recvbuf, 0, sizeof(recvbuf)); 59 memset(sendbuf, 0, sizeof(sendbuf)); 60 61 } 62 63 64 return 0; 65 }
执行结果如下:
可以看到客户端关掉之后,服务器中的子进程全部退出,而且正常的被父进程收尸了,没有产生僵尸进程。我们改进了服务器端的信号处理函数,调用了waitpid,具体如下:mypid = waitpid(-1, NULL, WNOHANG)
第一个参数-1表示等待任何一个子进程死亡,第三个参数WNOHANG表示,当没有子进程存在时,waitpid函数立即返回0。加上while循环,可以等待所有的子进程死亡。
客户端程序中的35-45行我们增加了函数getsockname,这个函数用于获取套接字上绑定的地址(包括ip地址和端口号),这个套接字必须是已经连接的。我们客户端程序一般不指定地址,而是让connect函数自动分配,但是物理机一般有多个ip,所以想要知道具体的地址值,只能调用这个函数获取,而且套接字必须是已经连接好的。
getpeername是获取对端的地址,参数也必须是已连接的套接字。假设服务器调用accept函数时,没有传入有效指针来获取对等端IP,但是后面有可能想获取对等端IP,则这时候可以调用getpeername来完成这项工作。