Linux内核bind系统调用源码分析
一、环境说明
内核版本:Linux 3.10
内核源码地址:https://elixir.bootlin.com/linux/v3.10/source (包含各个版本内核源码,且网页可全局搜索函数)
二、应用层-bind()函数
将socket 套接字绑定指定的地址:
/* * sockfd:由socket函数返回的套接口描述符 * sockaddr:一个指向特定于协议的地址结构的指针 * socklen_t:该地址结构的长度 * return:若成功则为0,若出错则为-1 */ #include <sys/socket.h> struct sockaddr_in sock_addr; memset(&sock_addr,0,sizeof(sock_addr)); sock_addr.sin_family = AF_INET; sock_addr.sin_addr.s_addr = htonl(INADDR_ANY); sock_addr.sin_port = htons(SERVER_PORT); // int bind(int sockfd, const struct sockaddr *myaddr, socklen_t addrlen); err = bind(sockfd,(struct sockaddr*)(&sock_addr),sizeof(sock_addr));
三、BSD Socket层-sys_socketcall()函数
网络栈专用操作函数集的总入口函数,主要是将请求分配,调用具体的底层函数进行处理:
// file: net/socket.c
SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
{
......
switch (call) {
case SYS_SOCKET:
err = sys_socket(a0, a1, a[2]);
break;
case SYS_BIND:
err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
break;
case SYS_CONNECT:
err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
break;
case SYS_LISTEN:
err = sys_listen(a0, a1);
break;
case SYS_ACCEPT:
err = sys_accept4(a0, (struct sockaddr __user *)a1, (int __user *)a[2], 0);
break;
......
}
return err;
}
四、sys_bind()函数
// file: net/socket.c SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen) { struct socket *sock; struct sockaddr_storage address; int err, fput_needed; sock = sockfd_lookup_light(fd, &err, &fput_needed); //获取fd对应的socket结构 if (sock) { err = move_addr_to_kernel(umyaddr, addrlen, &address); //将地址从用户缓冲区复制到内核缓冲区,umyaddr->address if (err >= 0) { err = security_socket_bind(sock,(struct sockaddr *)&address, addrlen); //SElinux相关,跳过 if (!err) err = sock->ops->bind(sock,(struct sockaddr *) &address, addrlen); //调用bind函数 } fput_light(sock->file, fput_needed); } return err; }
在上一篇文章中(socket系统调用分析),我们分析了sock->ops = answer->ops,而answer对应的结构:
// file: net/ipv4/af_inet.c static struct inet_protosw inetsw_array[] = { { .type = SOCK_STREAM, .protocol = IPPROTO_TCP, .prot = &tcp_prot, .ops = &inet_stream_ops, .no_check = 0, .flags = INET_PROTOSW_PERMANENT | INET_PROTOSW_ICSK, }, ...... }
// file: net/ipv4/af_inet.c const struct proto_ops inet_stream_ops = { .family = PF_INET, .owner = THIS_MODULE, .release = inet_release, .bind = inet_bind, .connect = inet_stream_connect, .socketpair = sock_no_socketpair, .accept = inet_accept, .getname = inet_getname, .poll = tcp_poll, .ioctl = inet_ioctl, .listen = inet_listen, ...... };
因此,sock->ops->bind最终调用的是inet_bind函数。
五、inet_bind()函数
inet_bind()函数的主要工作:
调用具体协议的bind函数;
校验端口是否冲突,是否可绑定
把传入的struct sockaddr赋值给struct inet_sock(四元组确定了源IP、源端口);
// file: net/ipv4/af_inet.c int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sockaddr_in *addr = (struct sockaddr_in *)uaddr; struct sock *sk = sock->sk; struct inet_sock *inet = inet_sk(sk); struct net *net = sock_net(sk); unsigned short snum; int chk_addr_ret; int err; /* If the socket has its own bind function then use it. (RAW) */ if (sk->sk_prot->bind) { err = sk->sk_prot->bind(sk, uaddr, addr_len); goto out; } err = -EINVAL; if (addr_len < sizeof(struct sockaddr_in)) goto out; if (addr->sin_family != AF_INET) { /* Compatibility games : accept AF_UNSPEC (mapped to AF_INET) * only if s_addr is INADDR_ANY. */ err = -EAFNOSUPPORT; if (addr->sin_family != AF_UNSPEC || addr->sin_addr.s_addr != htonl(INADDR_ANY)) goto out; } chk_addr_ret = inet_addr_type(net, addr->sin_addr.s_addr); /* Not specified by any standard per-se, however it breaks too * many applications when removed. It is unfortunate since * allowing applications to make a non-local bind solves * several problems with systems using dynamic addressing. * (ie. your servers still start up even if your ISDN link * is temporarily down) */ err = -EADDRNOTAVAIL; if (!sysctl_ip_nonlocal_bind && !(inet->freebind || inet->transparent) && addr->sin_addr.s_addr != htonl(INADDR_ANY) && chk_addr_ret != RTN_LOCAL && chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) goto out; snum = ntohs(addr->sin_port); err = -EACCES; if (snum && snum < PROT_SOCK && !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) goto out; /* We keep a pair of addresses. rcv_saddr is the one * used by hash lookups, and saddr is used for transmit. * * In the BSD API these are the same except where it * would be illegal to use them (multicast/broadcast) in * which case the sending device address is used. */ lock_sock(sk); /* Check these errors (active socket, double bind). */ err = -EINVAL; if (sk->sk_state != TCP_CLOSE || inet->inet_num) //bind时,state为TCP_CLOSE goto out_release_sock; inet->inet_rcv_saddr = inet->inet_saddr = addr->sin_addr.s_addr; //地址绑定 if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST) inet->inet_saddr = 0; /* Use device */ /* Make sure we are allowed to bind here. */ if (sk->sk_prot->get_port(sk, snum)) { //校验端口是否冲突,是否可绑定 inet->inet_saddr = inet->inet_rcv_saddr = 0; err = -EADDRINUSE; goto out_release_sock; } if (inet->inet_rcv_saddr) sk->sk_userlocks |= SOCK_BINDADDR_LOCK; if (snum) sk->sk_userlocks |= SOCK_BINDPORT_LOCK; inet->inet_sport = htons(inet->inet_num); //端口绑定 inet->inet_daddr = 0; //目标地址置为0 inet->inet_dport = 0; //目标ip地址置为0 sk_dst_reset(sk); err = 0; out_release_sock: release_sock(sk); out: return err; }
在上一篇文章中(socket系统调用分析),我们分析了sk->sk_prot = answer->prot,而answer对应的结构:
// file: net/ipv4/af_inet.c static struct inet_protosw inetsw_array[] = { { .type = SOCK_STREAM, .protocol = IPPROTO_TCP, .prot = &tcp_prot, .ops = &inet_stream_ops, .no_check = 0, .flags = INET_PROTOSW_PERMANENT | INET_PROTOSW_ICSK, }, ...... }
// file: net/ipv4/tcp_ipv4.c struct proto tcp_prot = { .name = "TCP", .owner = THIS_MODULE, .close = tcp_close, .connect = tcp_v4_connect, .disconnect = tcp_disconnect, .accept = inet_csk_accept, .ioctl = tcp_ioctl, .init = tcp_v4_init_sock, .get_port = inet_csk_get_port, ...... }
针对,tcp_prot未设置bind函数,所以sk->sk_prot->bind为false,跳过执行下面的代码。
sk->sk_prot->get_port对应的则是inet_csk_get_port()函数,inet_csk_get_port()主要是校验端口是否冲突,是否可绑定。它里面的inet_get_local_port_range()函数,会获取内核设置的端口号范围(对应内核参数/proc/sys/net/ipv4/ip_local_port_range)
六、bind代码流程图