TCP/IP源码学习(53)——TCP的连接过程的实现(2)

http://blog.chinaunix.net/uid-23629988-id-3179301.html

作者:gfree.wind@gmail.com
博客:blog.focus-linux.net   linuxfocus.blog.chinaunix.net
 
 
本文的copyleft归gfree.wind@gmail.com所有,使用GPL发布,可以自由拷贝,转载。但转载请保持文档的完整性,注明原作者及原链接,严禁用于任何商业用途。
======================================================================================================
昨天写了一篇博文关于TCP的连接过程的实现——主要是接到第一个syn包的处理。那部分代码有不少地方没有看明白,只不过走了一遍流程。惭愧。

今天继续昨天的流程,在回复了syn+ack包后,新创建的request_sock结构被加入到父socket的icsk_accept_queue中。接下来不考虑错误等情况,如重传。接下来就考虑如何处理TCP三次握手中的最后一个ack包。

依然按照前文中的流程,最后一个ack包会进入函数tcp_v4_do_rcv。此时,仍然匹配的是父socket,即处于listening状态的socket,因此再次调用tcp_v4_hnd_req——前文并没有对这个函数,进行分析,只是说明了对于第一个syn包,该函数返回的仍然是传入的参数sock。

下面看一下tcp_v4_hnd_req的代码
    static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
    {
        struct tcphdr *th = tcp_hdr(skb);
        const struct iphdr *iph = ip_hdr(skb);
        struct sock *nsk;
        struct request_sock **prev;
        /* Find possible connection requests. */
        /*
        上次处理syn包时,已经将对应的request_sock加入了icsk_accept_queue中的listen_opt,
        因此这次可以找到req。
        并且可以注意到这个函数还有一个返回值prev,为找到的request_sock在queue中的前一个元素。
        返回前一个元素,可以在后面的tcp_check_req中,在移除req时,避免二次查找。
        */
        struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
                             iph->saddr, iph->daddr);
        if (req)
            return tcp_check_req(sk, skb, req, prev);

     ...... ......

    }

进入tcp_check_req

    struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
                 struct request_sock *req,
                 struct request_sock **prev)
    {
        struct tcp_options_received tmp_opt;
        const u8 *hash_location;
        struct sock *child;
        const struct tcphdr *th = tcp_hdr(skb);
        __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
        int paws_reject = 0;

     
     //每次都要将saw_tstamp重置,因为其依赖于每一个TCP包

        tmp_opt.saw_tstamp = 0;
        if (th->doff > (sizeof(struct tcphdr)>>2)) {
            //表明有option存在于TCP首部,解析TCP的option
            tcp_parse_options(skb, &tmp_opt, &hash_location, 0);

         /* 
         TCP首部含有Timestamp Option
         该option有两个用途:
         1.计算RTT
         2.PAWS,即Protection Against Wrapped Sequence
         参见RFC1323
         */

            if (tmp_opt.saw_tstamp) {
                //这里就是进行PAWS检查
                tmp_opt.ts_recent = req->ts_recent;
                /* We do not store true stamp, but it is not required,
                 * it can be estimated (approximately)
                 * from another data.
                 */
                tmp_opt.ts_recent_stamp = get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->retrans);
                paws_reject = tcp_paws_reject(&tmp_opt, th->rst);
            }
        }

        /* Check for pure retransmitted SYN. */
        if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn &&
         flg == TCP_FLAG_SYN &&
         !paws_reject) {
            /* 这是重发的syn包,因为sequence相同,回复syn+ack */
            /*
             * RFC793 draws ( It was fixed in RFC1122)
             * this case on figure 6 and figure 8, but formal
             * protocol description says NOTHING.
             * To be more exact, it says that we should send ACK,
             * because this segment (at least, if it has no data)
             * is out of window.
             *
             * CONCLUSION: RFC793 (even with RFC1122) DOES NOT
             * describe SYN-RECV state. All the description
             * is wrong, we cannot believe to it and should
             * rely only on common sense and implementation
             * experience.
             *
             * Enforce "SYN-ACK" according to figure 8, figure 6
             * of RFC793, fixed by RFC1122.
             */
            req->rsk_ops->rtx_syn_ack(sk, req, NULL);
            return NULL;
        }

      //省略一大堆检查和检验,感兴趣的朋友可以直接看代码。注释很清楚
      ...... ......


        /* OK, ACK is valid, create big socket and
         * feed this segment to it. It will repeat all
         * the tests. THIS SEGMENT MUST MOVE SOCKET TO
         * ESTABLISHED STATE. If it will be dropped after
         * socket is created, wait for troubles.
         */
        /*
        tcp在IPv4下的实现为tcp_v4_syn_recv_sock。这里不去看tcp_v4_syn_recv_sock了,它的主要作用就是利用
        sk, skb, req中的信息,生成一个新的socket。
        */
        child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
        if (child == NULL)
            goto listen_overflow;

      //利用prev将req从accept_queue的listen_opt中删除

        inet_csk_reqsk_queue_unlink(sk, req, prev);
        inet_csk_reqsk_queue_removed(sk, req);

     
      //将这个req和新的socket child真正加入了父socket sock的accept queue中。
      //这里不要与前文中的inet_csk_reqsk_queue_hash_add混淆,inet_csk_reqsk_queue_hash_add是将
      //requst_sock加入到listen的队列中

        inet_csk_reqsk_queue_add(sk, req, child);
        //返回新生成的socket child
        return child;

    listen_overflow:
        if (!sysctl_tcp_abort_on_overflow) {
            inet_rsk(req)->acked = 1;
            return NULL;
        }

    embryonic_reset:
        NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_EMBRYONICRSTS);
        if (!(flg & TCP_FLAG_RST))
            req->rsk_ops->send_reset(sk, skb);

        inet_csk_reqsk_queue_drop(sk, req, prev);
        return NULL;
    }

那么对于tcp_v4_hnd_request最后返回的就是上面函数中新创建的socket,那么在tcp_v4_do_rcv中,就会进入下面的函数tcp_child_process
    int tcp_child_process(struct sock *parent, struct sock *child,
             struct sk_buff *skb)
    {
        int ret = 0;
        int state = child->sk_state;

     
     /* 这个检查还是用来保证TCP状态的正确 */

        if (!sock_owned_by_user(child)) {
            
            ret = tcp_rcv_state_process(child, skb, tcp_hdr(skb),
                         skb->len);
            /* Wakeup parent, send SIGIO */
            if (state == TCP_SYN_RECV && child->sk_state != state)
                parent->sk_data_ready(parent, 0);
        } else {
            /* Alas, it is possible again, because we do lookup
             * in main socket hash table and lock on listening
             * socket does not protect us more.
             */
            __sk_add_backlog(child, skb);
        }

        bh_unlock_sock(child);
        sock_put(child);
        return ret;
    }

今天有些困了。三次握手中的最后一个ack包的处理还是没有看完。不继续坚持看了,没有效率了。明天继续了。在完成被动连接的三次握手,还会看看主动连接的流程



posted @ 2016-09-30 10:17  张同光  阅读(181)  评论(0编辑  收藏  举报