Linux内核源代码解析——用户发送数据包的起源之sendto
本文原创为freas_1990,转载请标明出处:http://blog.csdn.net/freas_1990/article/details/10162853
Jack:我想知道用户如何把数据发送到内核空间的?
我:你觉得哪里比较难理解呢?
Jack:一般程序员会在程序里通过socket变量获得一个文件描述符,然后通过write把定义好的字符串写入到该描述符。
我:是的。你有什么不明白的吗?
Jack:可是,我不知道这个write底层到底会做什么。
我:这个write底层会调用sock_send函数。我给你看一下这个函数的定义。
static int sock_send(int fd, void * buff, int len, unsigned flags) { struct socket *sock; struct file *file; DPRINTF((net_debug, "NET: sock_send(fd = %d, buff = %X, len = %d, flags = %X)\n", fd, buff, len, flags)); if (fd < 0 || fd >= NR_OPEN || ((file = current->filp[fd]) == NULL)) return(-EBADF); if (!(sock = sockfd_lookup(fd, NULL))) return(-ENOTSOCK); return(sock->ops->send(sock, buff, len, (file->f_flags & O_NONBLOCK), flags)); }
sock_send函数通过用户传入的socket描述符fd找到对应的struct socket结构,然后把找到的socket结构。然后把socket结构(sock),buff(这是一个逻辑地址),以及文件flag传入传输层的对应函数。
最后一个语句return调用了一个函数指针( 这就是函数指针的妙处!),这个函数指针如果对应下面的传输层协议是UDP协议,就会调用udp_sendto.
static int udp_write(struct sock *sk, unsigned char *buff, int len, int noblock, unsigned flags) { return(udp_sendto(sk, buff, len, noblock, flags, NULL, 0)); }
其实是一个包裹函数。干活儿的是udp_sendto。
static int udp_sendto(struct sock *sk, unsigned char *from, int len, int noblock, unsigned flags, struct sockaddr_in *usin, int addr_len) { struct sockaddr_in sin; int tmp; int err; DPRINTF((DBG_UDP, "UDP: sendto(len=%d, flags=%X)\n", len, flags)); /* Check the flags. */ if (flags) return(-EINVAL); if (len < 0) return(-EINVAL); if (len == 0) return(0); /* Get and verify the address. */ if (usin) { if (addr_len < sizeof(sin)) return(-EINVAL); err=verify_area(VERIFY_READ, usin, sizeof(sin)); if(err) return err; memcpy_fromfs(&sin, usin, sizeof(sin)); if (sin.sin_family && sin.sin_family != AF_INET) return(-EINVAL); if (sin.sin_port == 0) return(-EINVAL); } else { if (sk->state != TCP_ESTABLISHED) return(-EINVAL); sin.sin_family = AF_INET; sin.sin_port = sk->dummy_th.dest; sin.sin_addr.s_addr = sk->daddr; } if(!sk->broadcast && chk_addr(sin.sin_addr.s_addr)==IS_BROADCAST) return -EACCES;/* Must turn broadcast on first */ sk->inuse = 1; /* Send the packet. */ tmp = udp_send(sk, &sin, from, len); /* The datagram has been sent off. Release the socket. */ release_sock(sk); return(tmp); }
这其实也是一个包裹函数,真正干活的是udp_send函数。
static int udp_send(struct sock *sk, struct sockaddr_in *sin, unsigned char *from, int len) { struct sk_buff *skb; struct device *dev; struct udphdr *uh; unsigned char *buff; unsigned long saddr; int size, tmp; int err; DPRINTF((DBG_UDP, "UDP: send(dst=%s:%d buff=%X len=%d)\n", in_ntoa(sin->sin_addr.s_addr), ntohs(sin->sin_port), from, len)); err=verify_area(VERIFY_READ, from, len); if(err) return(err); /* Allocate a copy of the packet. */ size = sizeof(struct sk_buff) + sk->prot->max_header + len; skb = sk->prot->wmalloc(sk, size, 0, GFP_KERNEL); if (skb == NULL) return(-ENOMEM); skb->mem_addr = skb; skb->mem_len = size; skb->sk = NULL; /* to avoid changing sk->saddr */ skb->free = 1; skb->arp = 0; /* Now build the IP and MAC header. */ buff = skb->data; saddr = 0; dev = NULL; DPRINTF((DBG_UDP, "UDP: >> IP_Header: %X -> %X dev=%X prot=%X len=%d\n", saddr, sin->sin_addr.s_addr, dev, IPPROTO_UDP, skb->mem_len)); tmp = sk->prot->build_header(skb, saddr, sin->sin_addr.s_addr, &dev, IPPROTO_UDP, sk->opt, skb->mem_len,sk->ip_tos,sk->ip_ttl); skb->sk=sk; /* So memory is freed correctly */ if (tmp < 0 ) { sk->prot->wfree(sk, skb->mem_addr, skb->mem_len); return(tmp); } buff += tmp; saddr = dev->pa_addr; DPRINTF((DBG_UDP, "UDP: >> MAC+IP len=%d\n", tmp)); skb->len = tmp + sizeof(struct udphdr) + len; /* len + UDP + IP + MAC */ skb->dev = dev; #ifdef OLD /* * This code used to hack in some form of fragmentation. * I removed that, since it didn't work anyway, and it made the * code a bad thing to read and understand. -FvK */ if (len > dev->mtu) { #else if (skb->len > 4095) { #endif printk("UDP: send: length %d > mtu %d (ignored)\n", len, dev->mtu); sk->prot->wfree(sk, skb->mem_addr, skb->mem_len); return(-EMSGSIZE); } /* Fill in the UDP header. */ uh = (struct udphdr *) buff; uh->len = htons(len + sizeof(struct udphdr)); uh->source = sk->dummy_th.source; uh->dest = sin->sin_port; buff = (unsigned char *) (uh + 1); /* Copy the user data. */ memcpy_fromfs(buff, from, len); /* Set up the UDP checksum. */ udp_send_check(uh, saddr, sin->sin_addr.s_addr, skb->len - tmp, sk); /* Send the datagram to the interface. */ sk->prot->queue_xmit(sk, dev, skb, 1); return(len); }
这个函数里真正干活的是memcpy_fromfs函数,执行完了这个函数,数据就已经从用户空间拷贝到内核空间了。
之后的sk->prot->queue_xmit(sk, dev, skb, 1);通过函数指针把sk上的skb这个数据包排入发送队列。