TCP定时器 之 重传定时器

注:这部分还没有完全分析透彻,先在此记录,后面回顾的时候再进行补充;

启动定时器:

(1) 之前发送的数据段已经得到确认,新发出一个数据段之后设定;

(2) 新建连接发送syn之后设定;

(3) PMTU探测失败之后设定;

(4) 接收方丢弃SACK部分接收的段时设定;

 

定时器回调函数:

重传定时器超时回调,根据连接控制块中不同的事件类型来分别调用不同的函数进行处理,这里我们只关心ICSK_TIME_RETRANS类型(重传类型),重传细节会继续调用函数tcp_retransmit_timer进行下一步的处理;

 1 /* Called with bottom-half processing disabled.
 2    Called by tcp_write_timer() */
 3 void tcp_write_timer_handler(struct sock *sk)
 4 {
 5     struct inet_connection_sock *icsk = inet_csk(sk);
 6     int event;
 7 
 8     /* 连接处于CLOSE或者LISTEN状态或者 没有指定待处理事件类型 */
 9     if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) ||
10         !icsk->icsk_pending)
11         goto out;
12 
13     /* 超时时间未到,则重新设置定时器超时时间 */
14     if (time_after(icsk->icsk_timeout, jiffies)) {
15         sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout);
16         goto out;
17     }
18 
19     /* 获取事件类型 */
20     event = icsk->icsk_pending;
21 
22     switch (event) {
23     case ICSK_TIME_REO_TIMEOUT:
24         tcp_rack_reo_timeout(sk);
25         break;
26     case ICSK_TIME_LOSS_PROBE:
27         tcp_send_loss_probe(sk);
28         break;
29     /* 重传事件 */
30     case ICSK_TIME_RETRANS:
31         icsk->icsk_pending = 0;
32         tcp_retransmit_timer(sk);
33         break;
34     case ICSK_TIME_PROBE0:
35         icsk->icsk_pending = 0;
36         tcp_probe_timer(sk);
37         break;
38     }
39 
40 out:
41     sk_mem_reclaim(sk);
42 }

 

tcp_retransmit_timer函数即为超时重传的核心函数,其根据不同的情况决定是否进行重传,并且调整重传次数和退避指数,设定下一次重传定时器等;

  1 /**
  2  *  tcp_retransmit_timer() - The TCP retransmit timeout handler
  3  *  @sk:  Pointer to the current socket.
  4  *
  5  *  This function gets called when the kernel timer for a TCP packet
  6  *  of this socket expires.
  7  *
  8  *  It handles retransmission, timer adjustment and other necesarry measures.
  9  *
 10  *  Returns: Nothing (void)
 11  */
 12 void tcp_retransmit_timer(struct sock *sk)
 13 {
 14     struct tcp_sock *tp = tcp_sk(sk);
 15     struct net *net = sock_net(sk);
 16     struct inet_connection_sock *icsk = inet_csk(sk);
 17 
 18     /* fastopen请求控制块不为空 */
 19     if (tp->fastopen_rsk) {
 20         WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
 21                  sk->sk_state != TCP_FIN_WAIT1);
 22         /* fastopen重传syn+ack */
 23         tcp_fastopen_synack_timer(sk);
 24         /* Before we receive ACK to our SYN-ACK don't retransmit
 25          * anything else (e.g., data or FIN segments).
 26          */
 27         return;
 28     }
 29 
 30     /* 发送队列列出的段都已经得到确认 */
 31     if (!tp->packets_out)
 32         goto out;
 33 
 34     WARN_ON(tcp_write_queue_empty(sk));
 35 
 36     tp->tlp_high_seq = 0;
 37 
 38     /* 
 39         对端窗口为0,套接口状态不是DEAD,
 40         连接不是出于连接过程中的状态
 41     */
 42     if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) &&
 43         !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) {
 44         /* Receiver dastardly shrinks window. Our retransmits
 45          * become zero probes, but we should not timeout this
 46          * connection. If the socket is an orphan, time it out,
 47          * we cannot allow such beasts to hang infinitely.
 48          */
 49         struct inet_sock *inet = inet_sk(sk);
 50         if (sk->sk_family == AF_INET) {
 51             net_dbg_ratelimited("Peer %pI4:%u/%u unexpectedly shrunk window %u:%u (repaired)\n",
 52                         &inet->inet_daddr,
 53                         ntohs(inet->inet_dport),
 54                         inet->inet_num,
 55                         tp->snd_una, tp->snd_nxt);
 56         }
 57 #if IS_ENABLED(CONFIG_IPV6)
 58         else if (sk->sk_family == AF_INET6) {
 59             net_dbg_ratelimited("Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n",
 60                         &sk->sk_v6_daddr,
 61                         ntohs(inet->inet_dport),
 62                         inet->inet_num,
 63                         tp->snd_una, tp->snd_nxt);
 64         }
 65 #endif
 66         /* 接收时间已经超过了TCP_RTO_MAX,出错 */
 67         if (tcp_time_stamp - tp->rcv_tstamp > TCP_RTO_MAX) {
 68             tcp_write_err(sk);
 69             goto out;
 70         }
 71 
 72         /* 进入loss状态 */
 73         tcp_enter_loss(sk);
 74 
 75         /* 发送重传队列的第一个数据段 */
 76         tcp_retransmit_skb(sk, tcp_write_queue_head(sk), 1);
 77 
 78         /* 重置路由缓存 */
 79         __sk_dst_reset(sk);
 80         goto out_reset_timer;
 81     }
 82 
 83     /* 重传检查 */
 84     if (tcp_write_timeout(sk))
 85         goto out;
 86 
 87     /* 重传次数为0,第一次进入重传 */
 88     if (icsk->icsk_retransmits == 0) {
 89         int mib_idx;
 90 
 91         /* 不同拥塞状态的数据统计 */
 92 
 93         if (icsk->icsk_ca_state == TCP_CA_Recovery) {
 94             if (tcp_is_sack(tp))
 95                 mib_idx = LINUX_MIB_TCPSACKRECOVERYFAIL;
 96             else
 97                 mib_idx = LINUX_MIB_TCPRENORECOVERYFAIL;
 98         } else if (icsk->icsk_ca_state == TCP_CA_Loss) {
 99             mib_idx = LINUX_MIB_TCPLOSSFAILURES;
100         } else if ((icsk->icsk_ca_state == TCP_CA_Disorder) ||
101                tp->sacked_out) {
102             if (tcp_is_sack(tp))
103                 mib_idx = LINUX_MIB_TCPSACKFAILURES;
104             else
105                 mib_idx = LINUX_MIB_TCPRENOFAILURES;
106         } else {
107             mib_idx = LINUX_MIB_TCPTIMEOUTS;
108         }
109         __NET_INC_STATS(sock_net(sk), mib_idx);
110     }
111 
112     /* 进入loss阶段 */
113     tcp_enter_loss(sk);
114 
115     /* 发送重传队列的第一个数据段失败 */
116     if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk), 1) > 0) {
117         /* Retransmission failed because of local congestion,
118          * do not backoff.
119          */
120         /* 更新重传数 */
121         if (!icsk->icsk_retransmits)
122             icsk->icsk_retransmits = 1;
123 
124         /* 复位定时器,等待下次重传 */
125         inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
126                       min(icsk->icsk_rto, TCP_RESOURCE_PROBE_INTERVAL),
127                       TCP_RTO_MAX);
128         goto out;
129     }
130 
131     /* Increase the timeout each time we retransmit.  Note that
132      * we do not increase the rtt estimate.  rto is initialized
133      * from rtt, but increases here.  Jacobson (SIGCOMM 88) suggests
134      * that doubling rto each time is the least we can get away with.
135      * In KA9Q, Karn uses this for the first few times, and then
136      * goes to quadratic.  netBSD doubles, but only goes up to *64,
137      * and clamps at 1 to 64 sec afterwards.  Note that 120 sec is
138      * defined in the protocol as the maximum possible RTT.  I guess
139      * we'll have to use something other than TCP to talk to the
140      * University of Mars.
141      *
142      * PAWS allows us longer timeouts and large windows, so once
143      * implemented ftp to mars will work nicely. We will have to fix
144      * the 120 second clamps though!
145      */
146     /* 递增退避指数和重传次数 */
147     icsk->icsk_backoff++;
148     icsk->icsk_retransmits++;
149 
150 out_reset_timer:
151     /* If stream is thin, use linear timeouts. Since 'icsk_backoff' is
152      * used to reset timer, set to 0. Recalculate 'icsk_rto' as this
153      * might be increased if the stream oscillates between thin and thick,
154      * thus the old value might already be too high compared to the value
155      * set by 'tcp_set_rto' in tcp_input.c which resets the rto without
156      * backoff. Limit to TCP_THIN_LINEAR_RETRIES before initiating
157      * exponential backoff behaviour to avoid continue hammering
158      * linear-timeout retransmissions into a black hole
159      */
160 
161     if (sk->sk_state == TCP_ESTABLISHED &&
162         (tp->thin_lto || sysctl_tcp_thin_linear_timeouts) &&
163         tcp_stream_is_thin(tp) &&
164         icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) {
165         /* 退避指数清0 */
166         icsk->icsk_backoff = 0;
167         /* 重传超时时间不变 */
168         icsk->icsk_rto = min(__tcp_set_rto(tp), TCP_RTO_MAX);
169     } else {
170         /* Use normal (exponential) backoff */
171         /* 重传超时时间*2 */
172         icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
173     }
174 
175     /* 复位定时器,等待下次重传 */
176     inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX);
177 
178     /* 重传超时重置路由缓存 */
179     if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1 + 1, 0, 0))
180         __sk_dst_reset(sk);
181 
182 out:;
183 }

 

tcp_write_timeout为重传超时情况的判断,函数根据不同情况,获取最大重传次数,并且通过该次数获取最大的超时时间,若发送时间超过了该最大超时时间,则断开连接;

 1 /* A write timeout has occurred. Process the after effects. */
 2 static int tcp_write_timeout(struct sock *sk)
 3 {
 4     struct inet_connection_sock *icsk = inet_csk(sk);
 5     struct tcp_sock *tp = tcp_sk(sk);
 6     struct net *net = sock_net(sk);
 7     int retry_until;
 8     bool do_reset, syn_set = false;
 9 
10     /* 连接建立过程中 */
11     if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
12         /* 已经重传过 */
13         if (icsk->icsk_retransmits) {
14 
15             /* 更新路由缓存项 */
16             dst_negative_advice(sk);
17 
18             /* fastopen缓存 */
19             if (tp->syn_fastopen || tp->syn_data)
20                 tcp_fastopen_cache_set(sk, 0, NULL, true, 0);
21             if (tp->syn_data && icsk->icsk_retransmits == 1)
22                 NET_INC_STATS(sock_net(sk),
23                           LINUX_MIB_TCPFASTOPENACTIVEFAIL);
24         } else if (!tp->syn_data && !tp->syn_fastopen) {
25             sk_rethink_txhash(sk);
26         }
27 
28         /* 重传最大次数 */
29         retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries;
30         syn_set = true;
31     } else {
32         /* 重传次数超过retries1,黑洞? */
33         if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1, 0, 0)) {
34             /* Some middle-boxes may black-hole Fast Open _after_
35              * the handshake. Therefore we conservatively disable
36              * Fast Open on this path on recurring timeouts after
37              * successful Fast Open.
38              */
39             if (tp->syn_data_acked) {
40                 tcp_fastopen_cache_set(sk, 0, NULL, true, 0);
41                 if (icsk->icsk_retransmits == net->ipv4.sysctl_tcp_retries1)
42                     NET_INC_STATS(sock_net(sk),
43                               LINUX_MIB_TCPFASTOPENACTIVEFAIL);
44             }
45             /* Black hole detection */
46 
47             /* PMTU探测 */
48             tcp_mtu_probing(icsk, sk);
49 
50             /* 更新路由缓存 */
51             dst_negative_advice(sk);
52         } else {
53             sk_rethink_txhash(sk);
54         }
55 
56         /* 连接已建立重传次数 */
57         retry_until = net->ipv4.sysctl_tcp_retries2;
58 
59         /* 套接口在关闭状态 */
60         if (sock_flag(sk, SOCK_DEAD)) {
61 
62             /* rto < 最大值 */
63             const bool alive = icsk->icsk_rto < TCP_RTO_MAX;
64 
65             /* 获取重传次数 */
66             retry_until = tcp_orphan_retries(sk, alive);
67 
68             /* 连接超时判断 */
69             do_reset = alive ||
70                 !retransmits_timed_out(sk, retry_until, 0, 0);
71 
72             /* 孤儿socket超过资源限制 */
73             if (tcp_out_of_resources(sk, do_reset))
74                 return 1;
75         }
76     }
77 
78     /* 判断连接是否超时 */
79     if (retransmits_timed_out(sk, retry_until,
80                   syn_set ? 0 : icsk->icsk_user_timeout, syn_set)) {
81         /* Has it gone just too far? */
82         tcp_write_err(sk);
83         return 1;
84     }
85     return 0;
86 }

 

 1 /**
 2  *  retransmits_timed_out() - returns true if this connection has timed out
 3  *  @sk:       The current socket
 4  *  @boundary: max number of retransmissions
 5  *  @timeout:  A custom timeout value.
 6  *             If set to 0 the default timeout is calculated and used.
 7  *             Using TCP_RTO_MIN and the number of unsuccessful retransmits.
 8  *  @syn_set:  true if the SYN Bit was set.
 9  *
10  * The default "timeout" value this function can calculate and use
11  * is equivalent to the timeout of a TCP Connection
12  * after "boundary" unsuccessful, exponentially backed-off
13  * retransmissions with an initial RTO of TCP_RTO_MIN or TCP_TIMEOUT_INIT if
14  * syn_set flag is set.
15  *
16  */
17 static bool retransmits_timed_out(struct sock *sk,
18                   unsigned int boundary,
19                   unsigned int timeout,
20                   bool syn_set)
21 {
22     unsigned int linear_backoff_thresh, start_ts;
23 
24     /* 设置基础超时时间 */
25     unsigned int rto_base = syn_set ? TCP_TIMEOUT_INIT : TCP_RTO_MIN;
26 
27     /* 未发生过重传 */
28     if (!inet_csk(sk)->icsk_retransmits)
29         return false;
30 
31     /* 开始时间设置为数据包发送时间戳 */
32     start_ts = tcp_sk(sk)->retrans_stamp;
33 
34     /* 开始时间为0,则设置为第一个sk的 */
35     if (unlikely(!start_ts))
36         start_ts = tcp_skb_timestamp(tcp_write_queue_head(sk));
37 
38     /* syn包timeout为0,非syn包tcp_user_timeout为0 */
39     if (likely(timeout == 0)) {
40 
41         /* 指数退避次数 */
42         linear_backoff_thresh = ilog2(TCP_RTO_MAX/rto_base);
43 
44         /* 根据重传次数boudany计算超时时间 */
45         if (boundary <= linear_backoff_thresh)
46             timeout = ((2 << boundary) - 1) * rto_base;
47         else
48             timeout = ((2 << linear_backoff_thresh) - 1) * rto_base +
49                 (boundary - linear_backoff_thresh) * TCP_RTO_MAX;
50     }
51 
52     /* 经过的时间是否超过了超时时间 */
53     return (tcp_time_stamp - start_ts) >= timeout;
54 }

 

posted @ 2019-10-27 22:14  AlexAlex  阅读(1481)  评论(0编辑  收藏  举报