TCP状态切换流程 - mylinuxer

公告

enum {
/*

* Description of States:
*
* TCP_SYN_SENT sent a connection request, waiting for ack
*
* TCP_SYN_RECV received a connection request, sent ack,
* waiting for final ack in three-way handshake.
*
* TCP_ESTABLISHED connection established
*
* TCP_FIN_WAIT1 our side has shutdown, waiting to complete
* transmission of remaining buffered data
*
* TCP_FIN_WAIT2 all buffered data sent, waiting for remote
* to shutdown
*
* TCP_CLOSING both sides have shutdown but we still have
* data we have to finish sending

*
* TCP_TIME_WAIT timeout to catch resent junk before entering
* closed, can only be entered from FIN_WAIT2
* or CLOSING. Required because the other end
* may not have gotten our last ACK causing it
* to retransmit the data packet (which we ignore)
*
* TCP_CLOSE_WAIT remote side has shutdown and is waiting for
* us to finish writing our data and to shutdown
* (we have to close() to move on to LAST_ACK)
*
* TCP_LAST_ACK out side has shutdown after remote has
* shutdown. There may still be data in our
* buffer that we have to finish sending
*
* TCP_CLOSE socket is finished
*/

* 连接已建立
*/
TCP_ESTABLISHED = 1,
/*
* 已发送SYN包
*/
TCP_SYN_SENT,
/*
* 已接收到SYN包
*/
TCP_SYN_RECV,
/*
* 执行主动关闭，已发送FIN包
*/
TCP_FIN_WAIT1,
/*
* 执行主动关闭，发送的FIN包后收到对端的ACK包
*/
TCP_FIN_WAIT2,
/*
* 执行主动关闭，，接收到对端的FIN包，并发送ACK包
*/
TCP_TIME_WAIT,
/*
* 连接初始状态
*/
TCP_CLOSE,
/*
* 执行被动关闭，接收到对端的FIN包，并发送ACK包
*/
TCP_CLOSE_WAIT,
/*
* 执行被动关闭，接收到FIN包后，发送自己的FIN包
*/
TCP_LAST_ACK,
/*
* 监听状态
*/
TCP_LISTEN,
/*
* 两端同时关闭，在发送FIN包后接收到对端的FIN包
*/
TCP_CLOSING, /* Now a valid state */

TCP_MAX_STATES /* Leave at the end! */
};

一、主动端

1、TCP_CLOSE ---->TCP_SYN_SENT

141 int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
142 {
143         struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
144         struct inet_sock *inet = inet_sk(sk);
145         struct tcp_sock *tp = tcp_sk(sk);
146         __be16 orig_sport, orig_dport;
147         __be32 daddr, nexthop;
148         struct flowi4 *fl4;
149         struct rtable *rt;
150         int err;
151         struct ip_options_rcu *inet_opt;
152 
153         if (addr_len < sizeof(struct sockaddr_in))
154                 return -EINVAL;
155 
156         if (usin->sin_family != AF_INET)
157                 return -EAFNOSUPPORT;
158 
159         nexthop = daddr = usin->sin_addr.s_addr;
160         inet_opt = rcu_dereference_protected(inet->inet_opt,
161                                              sock_owned_by_user(sk));
162         if (inet_opt && inet_opt->opt.srr) {
163                 if (!daddr)
164                         return -EINVAL;
165                 nexthop = inet_opt->opt.faddr;
166         }
167 
168         orig_sport = inet->inet_sport;
169         orig_dport = usin->sin_port;
170         fl4 = &inet->cork.fl.u.ip4;
171         rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
172                               RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
173                               IPPROTO_TCP,
174                               orig_sport, orig_dport, sk);
175         if (IS_ERR(rt)) {
176                 err = PTR_ERR(rt);
177                 if (err == -ENETUNREACH)
178                         IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
179                 return err;
180         }
181 
182         if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
183                 ip_rt_put(rt);
184                 return -ENETUNREACH;
185         }
186 
187         if (!inet_opt || !inet_opt->opt.srr)
188                 daddr = fl4->daddr;
189 
190         if (!inet->inet_saddr)
191                 inet->inet_saddr = fl4->saddr;
192         sk_rcv_saddr_set(sk, inet->inet_saddr);
193 
194         if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
195                 /* Reset inherited state */
196                 tp->rx_opt.ts_recent       = 0;
197                 tp->rx_opt.ts_recent_stamp = 0;
198                 if (likely(!tp->repair))
199                         tp->write_seq      = 0;
200         }
201 
202         if (tcp_death_row.sysctl_tw_recycle &&
203             !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
204                 tcp_fetch_timewait_stamp(sk, &rt->dst);
205 
206         inet->inet_dport = usin->sin_port;
207         sk_daddr_set(sk, daddr);
208 
209         inet_csk(sk)->icsk_ext_hdr_len = 0;
210         if (inet_opt)
211                 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
212 
213         tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
214 
215         /* Socket identity is still unknown (sport may be zero).
216          * However we set state to SYN-SENT and not releasing socket
217          * lock select source port, enter ourselves into the hash tables and
218          * complete initialization after this.
219          */
220     tcp_set_state(sk, TCP_SYN_SENT);
221         err = inet_hash_connect(&tcp_death_row, sk);
222         if (err)
223                 goto failure;
224 
225         inet_set_txhash(sk);
226 
227         rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
228                                inet->inet_sport, inet->inet_dport, sk);
229         if (IS_ERR(rt)) {
230                 err = PTR_ERR(rt);
231                 rt = NULL;
232                 goto failure;
233         }
234         /* OK, now commit destination to socket.  */
235         sk->sk_gso_type = SKB_GSO_TCPV4;
236         sk_setup_caps(sk, &rt->dst);
237 
238         if (!tp->write_seq && likely(!tp->repair))
239                 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
240                                                            inet->inet_daddr,
241                                                            inet->inet_sport,
242                                                            usin->sin_port);
243 
244         inet->inet_id = tp->write_seq ^ jiffies;
245 
246         err = tcp_connect(sk);
247 
248         rt = NULL;
249         if (err)
250                 goto failure;
251 
252         return 0;
253 
254 failure:
255         /*
256          * This unhashes the socket and releases the local port,
257          * if necessary.
258          */
259         tcp_set_state(sk, TCP_CLOSE);
260         ip_rt_put(rt);
261         sk->sk_route_caps = 0;
262         inet->inet_dport = 0;
263         return err;
264 }
265 EXPORT_SYMBOL(tcp_v4_connect);

2、TCP_SYN_SEND---->TCP_ESTABLISHED

5434 static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5435                                          struct tcphdr *th, unsigned len)
5436 {
5437         u8 *hash_location;
5438         struct inet_connection_sock *icsk = inet_csk(sk);
5439         struct tcp_sock *tp = tcp_sk(sk);
5440         struct tcp_cookie_values *cvp = tp->cookie_values;
5441         int saved_clamp = tp->rx_opt.mss_clamp;
5442 
5443         tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0);
5444 
5445         if (th->ack) {
5446                 /* rfc793:
5447                  * "If the state is SYN-SENT then
5448                  *    first check the ACK bit
5449                  *      If the ACK bit is set
5450                  *        If SEG.ACK =< ISS, or SEG.ACK > SND.NXT, send
5451                  *        a reset (unless the RST bit is set, if so drop
5452                  *        the segment and return)"
5453                  *
5454                  *  We do not send data with SYN, so that RFC-correct
5455                  *  test reduces to:
5456                  */
5457                 if (TCP_SKB_CB(skb)->ack_seq != tp->snd_nxt)
5458                         goto reset_and_undo;
5459 
5460                 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
5461                     !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,
5462                              tcp_time_stamp)) {
5463                         NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSACTIVEREJECTED);
5464                         goto reset_and_undo;
5465                 }
5466 
5467                 /* Now ACK is acceptable.
5468                  *
5469                  * "If the RST bit is set
5470                  *    If the ACK was acceptable then signal the user "error:
5471                  *    connection reset", drop the segment, enter CLOSED state,
5472                  *    delete TCB, and return."
5473                  */
5474 
5475                 if (th->rst) {
5476                         tcp_reset(sk);
5477                         goto discard;
5478                 }
5479 
5480                 /* rfc793:
5481                  *   "fifth, if neither of the SYN or RST bits is set then
5482                  *    drop the segment and return."
5483                  *
5484                  *    See note below!
5485                  *                                        --ANK(990513)
5486                  */
5487                 if (!th->syn)
5488                         goto discard_and_undo;
5489 
5490                 /* rfc793:
5491                  *   "If the SYN bit is on ...
5492                  *    are acceptable then ...
5493                  *    (our SYN has been ACKed), change the connection
5494                  *    state to ESTABLISHED..."
5495                  */
5496 
5497                 TCP_ECN_rcv_synack(tp, th);
5498 
5499                 tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
5500                 tcp_ack(sk, skb, FLAG_SLOWPATH);
5501 
5502                 /* Ok.. it's good. Set up sequence numbers and
5503                  * move to established.
5504                  */
5505                 tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
5506                 tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
5507 
5508                 /* RFC1323: The window in SYN & SYN/ACK segments is
5509                  * never scaled.
5510                  */
5511                 tp->snd_wnd = ntohs(th->window);
5512                 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
5513 
5514                 if (!tp->rx_opt.wscale_ok) {
5515                         tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0;
5516                         tp->window_clamp = min(tp->window_clamp, 65535U);
5517                 }
5518 
5519                 if (tp->rx_opt.saw_tstamp) {
5520                         tp->rx_opt.tstamp_ok       = 1;
5521                         tp->tcp_header_len =
5522                                 sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
5523                         tp->advmss          -= TCPOLEN_TSTAMP_ALIGNED;
5524                         tcp_store_ts_recent(tp);
5525                 } else {
5526                         tp->tcp_header_len = sizeof(struct tcphdr);
5527                 }
5528 
5529                 if (tcp_is_sack(tp) && sysctl_tcp_fack)
5530                         tcp_enable_fack(tp);
5531 
5532                 tcp_mtup_init(sk);
5533                 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
5534                 tcp_initialize_rcv_mss(sk);
5535 
5536                 /* Remember, tcp_poll() does not lock socket!
5537                  * Change state from SYN-SENT only after copied_seq
5538                  * is initialized. */
5539                 tp->copied_seq = tp->rcv_nxt;
5540 
5541                 if (cvp != NULL &&
5542                     cvp->cookie_pair_size > 0 &&
5543                     tp->rx_opt.cookie_plus > 0) {
5544                         int cookie_size = tp->rx_opt.cookie_plus
5545                                         - TCPOLEN_COOKIE_BASE;
5546                         int cookie_pair_size = cookie_size
5547                                              + cvp->cookie_desired;
5548 
5549                         /* A cookie extension option was sent and returned.
5550                          * Note that each incoming SYNACK replaces the
5551                          * Responder cookie.  The initial exchange is most
5552                          * fragile, as protection against spoofing relies
5553                          * entirely upon the sequence and timestamp (above).
5554                          * This replacement strategy allows the correct pair to
5555                          * pass through, while any others will be filtered via
5556                          * Responder verification later.
5557                          */
5558                         if (sizeof(cvp->cookie_pair) >= cookie_pair_size) {
5559                                 memcpy(&cvp->cookie_pair[cvp->cookie_desired],
5560                                        hash_location, cookie_size);
5561                                 cvp->cookie_pair_size = cookie_pair_size;
5562                         }
5563                 }
5564 
5565                 smp_mb();
5566           tcp_set_state(sk, TCP_ESTABLISHED);
5567 
5568                 security_inet_conn_established(sk, skb);
5569 
5570                 /* Make sure socket is routed, for correct metrics.  */
5571                 icsk->icsk_af_ops->rebuild_header(sk);
5572 
5573                 tcp_init_metrics(sk);
5574 
5575                 tcp_init_congestion_control(sk);
5576 
5577                 /* Prevent spurious tcp_cwnd_restart() on first data
5578                  * packet.
5579                  */
5580                 tp->lsndtime = tcp_time_stamp;
5581 
5582                 tcp_init_buffer_space(sk);
5583 
5584                 if (sock_flag(sk, SOCK_KEEPOPEN))
5585                         inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
5586 
5587                 if (!tp->rx_opt.snd_wscale)
5588                         __tcp_fast_path_on(tp, tp->snd_wnd);
5589                 else
5590                         tp->pred_flags = 0;
5591 
5592                 if (!sock_flag(sk, SOCK_DEAD)) {
5593                         sk->sk_state_change(sk);
5594                         sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
5595                 }
5596 
5597                 if (sk->sk_write_pending ||
5598                     icsk->icsk_accept_queue.rskq_defer_accept ||
5599                     icsk->icsk_ack.pingpong) {
5600                         /* Save one ACK. Data will be ready after
5601                          * several ticks, if write_pending is set.
5602                          *
5603                          * It may be deleted, but with this feature tcpdumps
5604                          * look so _wonderfully_ clever, that I was not able
5605                          * to stand against the temptation 8)     --ANK
5606                          */
5607                         inet_csk_schedule_ack(sk);
5608                         icsk->icsk_ack.lrcvtime = tcp_time_stamp;
5609                         icsk->icsk_ack.ato       = TCP_ATO_MIN;
5610                         tcp_incr_quickack(sk);
5611                         tcp_enter_quickack_mode(sk);
5612                         inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
5613                                                   TCP_DELACK_MAX, TCP_RTO_MAX);
5614 
5615 discard:
5616                         __kfree_skb(skb);
5617                         return 0;
5618                 } else {
5619                         tcp_send_ack(sk);
5620                 }
5621                 return -1;
5622         }
5623 
5624         /* No ACK in the segment */
5625 
5626         if (th->rst) {
5627                 /* rfc793:
5628                  * "If the RST bit is set
5629                  *
5630                  *      Otherwise (no ACK) drop the segment and return."
5631                  */
5632 
5633                 goto discard_and_undo;
5634         }
5635 
5636         /* PAWS check. */
5637         if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp &&
5638             tcp_paws_reject(&tp->rx_opt, 0))
5639                 goto discard_and_undo;
5640 
5641         if (th->syn) {
5642                 /* We see SYN without ACK. It is attempt of
5643                  * simultaneous connect with crossed SYNs.
5644                  * Particularly, it can be connect to self.
5645                  */
5646                 tcp_set_state(sk, TCP_SYN_RECV);
5647 
5648                 if (tp->rx_opt.saw_tstamp) {
5649                         tp->rx_opt.tstamp_ok = 1;
5650                         tcp_store_ts_recent(tp);
5651                         tp->tcp_header_len =
5652                                 sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
5653                 } else {
5654                         tp->tcp_header_len = sizeof(struct tcphdr);
5655                 }
5656 
5657                 tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
5658                 tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
5659 
5660                 /* RFC1323: The window in SYN & SYN/ACK segments is
5661                  * never scaled.
5662                  */
5663                 tp->snd_wnd    = ntohs(th->window);
5664                 tp->snd_wl1    = TCP_SKB_CB(skb)->seq;
5665                 tp->max_window = tp->snd_wnd;
5666 
5667                 TCP_ECN_rcv_syn(tp, th);
5668 
5669                 tcp_mtup_init(sk);
5670                 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
5671                 tcp_initialize_rcv_mss(sk);
5672 
5673                 tcp_send_synack(sk);
5674 #if 0
5675                 /* Note, we could accept data and URG from this segment.
5676                  * There are no obstacles to make this.
5677                  *
5678                  * However, if we ignore data in ACKless segments sometimes,
5679                  * we have no reasons to accept it sometimes.
5680                  * Also, seems the code doing it in step6 of tcp_rcv_state_process
5681                  * is not flawless. So, discard packet for sanity.
5682                  * Uncomment this return to process the data.
5683                  */
5684                 return -1;
5685 #else
5686                 goto discard;
5687 #endif
5688         }
5689         /* "fifth, if neither of the SYN or RST bits is set then
5690          * drop the segment and return."
5691          */
5692 
5693 discard_and_undo:
5694         tcp_clear_options(&tp->rx_opt);
5695         tp->rx_opt.mss_clamp = saved_clamp;
5696         goto discard;
5697 
5698 reset_and_undo:
5699         tcp_clear_options(&tp->rx_opt);
5700         tp->rx_opt.mss_clamp = saved_clamp;
5701         return 1;
5702 }

二、被动打开

1、TCP_CLOSE ----> TCP_LISTEN

794 int inet_csk_listen_start(struct sock *sk, const int nr_table_entries)
795 {
796         struct inet_sock *inet = inet_sk(sk);
797         struct inet_connection_sock *icsk = inet_csk(sk);
798         int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, nr_table_entries);
799 
800         if (rc != 0)
801                 return rc;
802 
803         sk->sk_max_ack_backlog = 0;
804         sk->sk_ack_backlog = 0;
805         inet_csk_delack_init(sk);
806 
807         /* There is race window here: we announce ourselves listening,
808          * but this transition is still not validated by get_port().
809          * It is OK, because this socket enters to hash table only
810          * after validation is complete.
811          */
812     sk->sk_state = TCP_LISTEN;
813         if (!sk->sk_prot->get_port(sk, inet->inet_num)) {
814                 inet->inet_sport = htons(inet->inet_num);
815 
816                 sk_dst_reset(sk);
817                 sk->sk_prot->hash(sk);
818 
819                 return 0;
820         }
821 
822         sk->sk_state = TCP_CLOSE;
823         __reqsk_queue_destroy(&icsk->icsk_accept_queue);
824         return -EADDRINUSE;
825 }

2、TCP_LISTEN ----> TCP_SYN_RCVE

5434 static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5435                                          struct tcphdr *th, unsigned len)
5436 {
5437         u8 *hash_location;
5438         struct inet_connection_sock *icsk = inet_csk(sk);
5439         struct tcp_sock *tp = tcp_sk(sk);
5440         struct tcp_cookie_values *cvp = tp->cookie_values;
5441         int saved_clamp = tp->rx_opt.mss_clamp;
5442 
5443         tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0);
5444 
5445         if (th->ack) {
5446                 /* rfc793:
5447                  * "If the state is SYN-SENT then
5448                  *    first check the ACK bit
5449                  *      If the ACK bit is set
5450                  *        If SEG.ACK =< ISS, or SEG.ACK > SND.NXT, send
5451                  *        a reset (unless the RST bit is set, if so drop
5452                  *        the segment and return)"
5453                  *
5454                  *  We do not send data with SYN, so that RFC-correct
5455                  *  test reduces to:
5456                  */
5457                 if (TCP_SKB_CB(skb)->ack_seq != tp->snd_nxt)
5458                         goto reset_and_undo;
5459 
5460                 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
5461                     !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,
5462                              tcp_time_stamp)) {
5463                         NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSACTIVEREJECTED);
5464                         goto reset_and_undo;
5465                 }
5466 
5467                 /* Now ACK is acceptable.
5468                  *
5469                  * "If the RST bit is set
5470                  *    If the ACK was acceptable then signal the user "error:
5471                  *    connection reset", drop the segment, enter CLOSED state,
5472                  *    delete TCB, and return."
5473                  */
5474 
5475                 if (th->rst) {
5476                         tcp_reset(sk);
5477                         goto discard;
5478                 }
5479 
5480                 /* rfc793:
5481                  *   "fifth, if neither of the SYN or RST bits is set then
5482                  *    drop the segment and return."
5483                  *
5484                  *    See note below!
5485                  *                                        --ANK(990513)
5486                  */
5487                 if (!th->syn)
5488                         goto discard_and_undo;
5489 
5490                 /* rfc793:
5491                  *   "If the SYN bit is on ...
5492                  *    are acceptable then ...
5493                  *    (our SYN has been ACKed), change the connection
5494                  *    state to ESTABLISHED..."
5495                  */
5496 
5497                 TCP_ECN_rcv_synack(tp, th);
5498 
5499                 tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
5500                 tcp_ack(sk, skb, FLAG_SLOWPATH);
5501 
5502                 /* Ok.. it's good. Set up sequence numbers and
5503                  * move to established.
5504                  */
5505                 tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
5506                 tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
5507 
5508                 /* RFC1323: The window in SYN & SYN/ACK segments is
5509                  * never scaled.
5510                  */
5511                 tp->snd_wnd = ntohs(th->window);
5512                 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
5513 
5514                 if (!tp->rx_opt.wscale_ok) {
5515                         tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0;
5516                         tp->window_clamp = min(tp->window_clamp, 65535U);
5517                 }
5518 
5519                 if (tp->rx_opt.saw_tstamp) {
5520                         tp->rx_opt.tstamp_ok       = 1;
5521                         tp->tcp_header_len =
5522                                 sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
5523                         tp->advmss          -= TCPOLEN_TSTAMP_ALIGNED;
5524                         tcp_store_ts_recent(tp);
5525                 } else {
5526                         tp->tcp_header_len = sizeof(struct tcphdr);
5527                 }
5528 
5529                 if (tcp_is_sack(tp) && sysctl_tcp_fack)
5530                         tcp_enable_fack(tp);
5531 
5532                 tcp_mtup_init(sk);
5533                 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
5534                 tcp_initialize_rcv_mss(sk);
5535 
5536                 /* Remember, tcp_poll() does not lock socket!
5537                  * Change state from SYN-SENT only after copied_seq
5538                  * is initialized. */
5539                 tp->copied_seq = tp->rcv_nxt;
5540 
5541                 if (cvp != NULL &&
5542                     cvp->cookie_pair_size > 0 &&
5543                     tp->rx_opt.cookie_plus > 0) {
5544                         int cookie_size = tp->rx_opt.cookie_plus
5545                                         - TCPOLEN_COOKIE_BASE;
5546                         int cookie_pair_size = cookie_size
5547                                              + cvp->cookie_desired;
5548 
5549                         /* A cookie extension option was sent and returned.
5550                          * Note that each incoming SYNACK replaces the
5551                          * Responder cookie.  The initial exchange is most
5552                          * fragile, as protection against spoofing relies
5553                          * entirely upon the sequence and timestamp (above).
5554                          * This replacement strategy allows the correct pair to
5555                          * pass through, while any others will be filtered via
5556                          * Responder verification later.
5557                          */
5558                         if (sizeof(cvp->cookie_pair) >= cookie_pair_size) {
5559                                 memcpy(&cvp->cookie_pair[cvp->cookie_desired],
5560                                        hash_location, cookie_size);
5561                                 cvp->cookie_pair_size = cookie_pair_size;
5562                         }
5563                 }
5564 
5565                 smp_mb();
5566                 tcp_set_state(sk, TCP_ESTABLISHED);
5567 
5568                 security_inet_conn_established(sk, skb);
5569 
5570                 /* Make sure socket is routed, for correct metrics.  */
5571                 icsk->icsk_af_ops->rebuild_header(sk);
5572 
5573                 tcp_init_metrics(sk);
5574 
5575                 tcp_init_congestion_control(sk);
5576 
5577                 /* Prevent spurious tcp_cwnd_restart() on first data
5578                  * packet.
5579                  */
5580                 tp->lsndtime = tcp_time_stamp;
5581 
5582                 tcp_init_buffer_space(sk);
5583 
5584                 if (sock_flag(sk, SOCK_KEEPOPEN))
5585                         inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
5586 
5587                 if (!tp->rx_opt.snd_wscale)
5588                         __tcp_fast_path_on(tp, tp->snd_wnd);
5589                 else
5590                         tp->pred_flags = 0;
5591 
5592                 if (!sock_flag(sk, SOCK_DEAD)) {
5593                         sk->sk_state_change(sk);
5594                         sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
5595                 }
5596 
5597                 if (sk->sk_write_pending ||
5598                     icsk->icsk_accept_queue.rskq_defer_accept ||
5599                     icsk->icsk_ack.pingpong) {
5600                         /* Save one ACK. Data will be ready after
5601                          * several ticks, if write_pending is set.
5602                          *
5603                          * It may be deleted, but with this feature tcpdumps
5604                          * look so _wonderfully_ clever, that I was not able
5605                          * to stand against the temptation 8)     --ANK
5606                          */
5607                         inet_csk_schedule_ack(sk);
5608                         icsk->icsk_ack.lrcvtime = tcp_time_stamp;
5609                         icsk->icsk_ack.ato       = TCP_ATO_MIN;
5610                         tcp_incr_quickack(sk);
5611                         tcp_enter_quickack_mode(sk);
5612                         inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
5613                                                   TCP_DELACK_MAX, TCP_RTO_MAX);
5614 
5615 discard:
5616                         __kfree_skb(skb);
5617                         return 0;
5618                 } else {
5619                         tcp_send_ack(sk);
5620                 }
5621                 return -1;
5622         }
5623 
5624         /* No ACK in the segment */
5625 
5626         if (th->rst) {
5627                 /* rfc793:
5628                  * "If the RST bit is set
5629                  *
5630                  *      Otherwise (no ACK) drop the segment and return."
5631                  */
5632 
5633                 goto discard_and_undo;
5634         }
5635 
5636         /* PAWS check. */
5637         if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp &&
5638             tcp_paws_reject(&tp->rx_opt, 0))
5639                 goto discard_and_undo;
5640 
5641         if (th->syn) {
5642                 /* We see SYN without ACK. It is attempt of
5643                  * simultaneous connect with crossed SYNs.
5644                  * Particularly, it can be connect to self.
5645                  */
5646           tcp_set_state(sk, TCP_SYN_RECV);
5647 
5648                 if (tp->rx_opt.saw_tstamp) {
5649                         tp->rx_opt.tstamp_ok = 1;
5650                         tcp_store_ts_recent(tp);
5651                         tp->tcp_header_len =
5652                                 sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
5653                 } else {
5654                         tp->tcp_header_len = sizeof(struct tcphdr);
5655                 }
5656 
5657                 tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
5658                 tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
5659 
5660                 /* RFC1323: The window in SYN & SYN/ACK segments is
5661                  * never scaled.
5662                  */
5663                 tp->snd_wnd    = ntohs(th->window);
5664                 tp->snd_wl1    = TCP_SKB_CB(skb)->seq;
5665                 tp->max_window = tp->snd_wnd;
5666 
5667                 TCP_ECN_rcv_syn(tp, th);
5668 
5669                 tcp_mtup_init(sk);
5670                 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
5671                 tcp_initialize_rcv_mss(sk);
5672 
5673                 tcp_send_synack(sk);
5674 #if 0
5675                 /* Note, we could accept data and URG from this segment.
5676                  * There are no obstacles to make this.
5677                  *
5678                  * However, if we ignore data in ACKless segments sometimes,
5679                  * we have no reasons to accept it sometimes.
5680                  * Also, seems the code doing it in step6 of tcp_rcv_state_process
5681                  * is not flawless. So, discard packet for sanity.
5682                  * Uncomment this return to process the data.
5683                  */
5684                 return -1;
5685 #else
5686                 goto discard;
5687 #endif
5688         }
5689         /* "fifth, if neither of the SYN or RST bits is set then
5690          * drop the segment and return."
5691          */
5692 
5693 discard_and_undo:
5694         tcp_clear_options(&tp->rx_opt);
5695         tp->rx_opt.mss_clamp = saved_clamp;
5696         goto discard;
5697 
5698 reset_and_undo:
5699         tcp_clear_options(&tp->rx_opt);
5700         tp->rx_opt.mss_clamp = saved_clamp;
5701         return 1;
5702 }

3、TCP_SYN_RCVE ----> TCP_ESTABLISHED

5711 int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5712                           struct tcphdr *th, unsigned len)
5713 {
5714         struct tcp_sock *tp = tcp_sk(sk);
5715         struct inet_connection_sock *icsk = inet_csk(sk);
5716         int queued = 0;
5717         int res;
5718 
5719         tp->rx_opt.saw_tstamp = 0;
5720 
5721         switch (sk->sk_state) {
5722         case TCP_CLOSE:
5723                 goto discard;
5724 
5725         case TCP_LISTEN:
5726                 if (th->ack)
5727                         return 1;
5728 
5729                 if (th->rst)
5730                         goto discard;
5731 
5732                 if (th->syn) {
5733                         if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)
5734                                 return 1;
5735 
5736                         /* Now we have several options: In theory there is
5737                          * nothing else in the frame. KA9Q has an option to
5738                          * send data with the syn, BSD accepts data with the
5739                          * syn up to the [to be] advertised window and
5740                          * Solaris 2.1 gives you a protocol error. For now
5741                          * we just ignore it, that fits the spec precisely
5742                          * and avoids incompatibilities. It would be nice in
5743                          * future to drop through and process the data.
5744                          *
5745                          * Now that TTCP is starting to be used we ought to
5746                          * queue this data.
5747                          * But, this leaves one open to an easy denial of
5748                          * service attack, and SYN cookies can't defend
5749                          * against this problem. So, we drop the data
5750                          * in the interest of security over speed unless
5751                          * it's still in use.
5752                          */
5753                         kfree_skb(skb);
5754                         return 0;
5755                 }
5756                 goto discard;
5757 
5758         case TCP_SYN_SENT:
5759                 queued = tcp_rcv_synsent_state_process(sk, skb, th, len);
5760                 if (queued >= 0)
5761                         return queued;
5762 
5763                 /* Do step6 onward by hand. */
5764                 tcp_urg(sk, skb, th);
5765                 __kfree_skb(skb);
5766                 tcp_data_snd_check(sk);
5767                 return 0;
5768         }
5769 
5770         res = tcp_validate_incoming(sk, skb, th, 0);
5771         if (res <= 0)
5772                 return -res;
5773 
5774         /* step 5: check the ACK field */
5775         if (th->ack) {
5776                 int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH) > 0;
5777 
5778                 switch (sk->sk_state) {
5779                 case TCP_SYN_RECV:
5780                         if (acceptable) {
5781                                 tp->copied_seq = tp->rcv_nxt;
5782                                 smp_mb();
5783                      tcp_set_state(sk, TCP_ESTABLISHED);
5784                                 sk->sk_state_change(sk);
5785 
5786                                 /* Note, that this wakeup is only for marginal
5787                                  * crossed SYN case. Passively open sockets
5788                                  * are not waked up, because sk->sk_sleep ==
5789                                  * NULL and sk->sk_socket == NULL.
5790                                  */
5791                                 if (sk->sk_socket)
5792                                         sk_wake_async(sk,
5793                                                       SOCK_WAKE_IO, POLL_OUT);
5794 
5795                                 tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
5796                                 tp->snd_wnd = ntohs(th->window) <<
5797                                               tp->rx_opt.snd_wscale;
5798                                 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
5799 
5800                                 /* tcp_ack considers this ACK as duplicate
5801                                  * and does not calculate rtt.
5802                                  * Force it here.
5803                                  */
5804                                 tcp_ack_update_rtt(sk, 0, 0);
5805 
5806                                 if (tp->rx_opt.tstamp_ok)
5807                                         tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
5808 
5809                                 /* Make sure socket is routed, for
5810                                  * correct metrics.
5811                                  */
5812                                 icsk->icsk_af_ops->rebuild_header(sk);
5813 
5814                                 tcp_init_metrics(sk);
5815 
5816                                 tcp_init_congestion_control(sk);
5817 
5818                                 /* Prevent spurious tcp_cwnd_restart() on
5819                                  * first data packet.
5820                                  */
5821                                 tp->lsndtime = tcp_time_stamp;
5822 
5823                                 tcp_mtup_init(sk);
5824                                 tcp_initialize_rcv_mss(sk);
5825                                 tcp_init_buffer_space(sk);
5826                                 tcp_fast_path_on(tp);
5827                         } else {
5828                                 return 1;
5829                         }
5830                         break;
5831 
5832                 case TCP_FIN_WAIT1:
5833                         if (tp->snd_una == tp->write_seq) {
5834                                 tcp_set_state(sk, TCP_FIN_WAIT2);
5835                                 sk->sk_shutdown |= SEND_SHUTDOWN;
5836                                 dst_confirm(sk->sk_dst_cache);
5837 
5838                                 if (!sock_flag(sk, SOCK_DEAD))
5839                                         /* Wake up lingering close() */
5840                                         sk->sk_state_change(sk);
5841                                 else {
5842                                         int tmo;
5843 
5844                                         if (tp->linger2 < 0 ||
5845                                             (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
5846                                              after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) {
5847                                                 tcp_done(sk);
5848                                                 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
5849                                                 return 1;
5850                                         }
5851 
5852                                         tmo = tcp_fin_time(sk);
5853                                         if (tmo > TCP_TIMEWAIT_LEN) {
5854                                                 inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN);
5855                                         } else if (th->fin || sock_owned_by_user(sk)) {
5856                                                 /* Bad case. We could lose such FIN otherwise.
5857                                                  * It is not a big problem, but it looks confusing
5858                                                  * and not so rare event. We still can lose it now,
5859                                                  * if it spins in bh_lock_sock(), but it is really
5860                                                  * marginal case.
5861                                                  */
5862                                                 inet_csk_reset_keepalive_timer(sk, tmo);
5863                                         } else {
5864                                                 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
5865                                                 goto discard;
5866                                         }
5867                                 }
5868                         }
5869                         break;
5870 
5871                 case TCP_CLOSING:
5872                         if (tp->snd_una == tp->write_seq) {
5873                                 tcp_time_wait(sk, TCP_TIME_WAIT, 0);
5874                                 goto discard;
5875                         }
5876                         break;
5877 
5878                 case TCP_LAST_ACK:
5879                         if (tp->snd_una == tp->write_seq) {
5880                                 tcp_update_metrics(sk);
5881                                 tcp_done(sk);
5882                                 goto discard;
5883                         }
5884                         break;
5885                 }
5886         } else
5887                 goto discard;
5888 
5889         /* step 6: check the URG bit */
5890         tcp_urg(sk, skb, th);
5891 
5892         /* step 7: process the segment text */
5893         switch (sk->sk_state) {
5894         case TCP_CLOSE_WAIT:
5895         case TCP_CLOSING:
5896         case TCP_LAST_ACK:
5897                 if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
5898                         break;
5899         case TCP_FIN_WAIT1:
5900         case TCP_FIN_WAIT2:
5901                 /* RFC 793 says to queue data in these states,
5902                  * RFC 1122 says we MUST send a reset.
5903                  * BSD 4.4 also does reset.
5904                  */
5905                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
5906                         if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
5907                             after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
5908                                 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
5909                                 tcp_reset(sk);
5910                                 return 1;
5911                         }
5912                 }
5913                 /* Fall through */
5914         case TCP_ESTABLISHED:
5915                 tcp_data_queue(sk, skb);
5916                 queued = 1;
5917                 break;
5918         }
5919 
5920         /* tcp_data could move socket to TIME-WAIT */
5921         if (sk->sk_state != TCP_CLOSE) {
5922                 tcp_data_snd_check(sk);
5923                 tcp_ack_snd_check(sk);
5924         }
5925 
5926         if (!queued) {
5927 discard:
5928                 __kfree_skb(skb);
5929         }
5930         return 0;
5931 }

三、主动关闭

1、TCP_ESTABLISHED ----> TCP_FIN_WAIT1

2008 void tcp_close(struct sock *sk, long timeout)
2009 {
2010         struct sk_buff *skb;
2011         int data_was_unread = 0;
2012         int state;
2013 
2014         lock_sock(sk);
2015         sk->sk_shutdown = SHUTDOWN_MASK;
2016 
2017         if (sk->sk_state == TCP_LISTEN) {
2018                 tcp_set_state(sk, TCP_CLOSE);
2019 
2020                 /* Special case. */
2021                 inet_csk_listen_stop(sk);
2022 
2023                 goto adjudge_to_death;
2024         }
2025 
2026         /*  We need to flush the recv. buffs.  We do this only on the
2027          *  descriptor close, not protocol-sourced closes, because the
2028          *  reader process may not have drained the data yet!
2029          */
2030         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
2031                 u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq;
2032 
2033                 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
2034                         len--;
2035                 data_was_unread += len;
2036                 __kfree_skb(skb);
2037         }
2038 
2039         sk_mem_reclaim(sk);
2040 
2041         /* If socket has been already reset (e.g. in tcp_reset()) - kill it. */
2042         if (sk->sk_state == TCP_CLOSE)
2043                 goto adjudge_to_death;
2044 
2045         /* As outlined in RFC 2525, section 2.17, we send a RST here because
2046          * data was lost. To witness the awful effects of the old behavior of
2047          * always doing a FIN, run an older 2.1.x kernel or 2.0.x, start a bulk
2048          * GET in an FTP client, suspend the process, wait for the client to
2049          * advertise a zero window, then kill -9 the FTP client, wheee...
2050          * Note: timeout is always zero in such a case.
2051          */
2052         if (unlikely(tcp_sk(sk)->repair)) {
2053                 sk->sk_prot->disconnect(sk, 0);
2054         } else if (data_was_unread) {
2055                 /* Unread data was tossed, zap the connection. */
2056                 NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);
2057                 tcp_set_state(sk, TCP_CLOSE);
2058                 tcp_send_active_reset(sk, sk->sk_allocation);
2059         } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
2060                 /* Check zero linger _after_ checking for unread data. */
2061                 sk->sk_prot->disconnect(sk, 0);
2062                 NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
2063         } else if (tcp_close_state(sk)) {
2064                 /* We FIN if the application ate all the data before
2065                  * zapping the connection.
2066                  */
2067 
2068                 /* RED-PEN. Formally speaking, we have broken TCP state
2069                  * machine. State transitions:
2070                  *
2071                  * TCP_ESTABLISHED -> TCP_FIN_WAIT1
2072                  * TCP_SYN_RECV -> TCP_FIN_WAIT1 (forget it, it's impossible)
2073                  * TCP_CLOSE_WAIT -> TCP_LAST_ACK
2074                  *
2075                  * are legal only when FIN has been sent (i.e. in window),
2076                  * rather than queued out of window. Purists blame.
2077                  *
2078                  * F.e. "RFC state" is ESTABLISHED,
2079                  * if Linux state is FIN-WAIT-1, but FIN is still not sent.
2080                  *
2081                  * The visible declinations are that sometimes
2082                  * we enter time-wait state, when it is not required really
2083                  * (harmless), do not send active resets, when they are
2084                  * required by specs (TCP_ESTABLISHED, TCP_CLOSE_WAIT, when
2085                  * they look as CLOSING or LAST_ACK for Linux)
2086                  * Probably, I missed some more holelets.
2087                  *                                              --ANK
2088                  * XXX (TFO) - To start off we don't support SYN+ACK+FIN
2089                  * in a single packet! (May consider it later but will
2090                  * probably need API support or TCP_CORK SYN-ACK until
2091                  * data is written and socket is closed.)
2092                  */
2093                 tcp_send_fin(sk);
2094         }
2095 
2096         sk_stream_wait_close(sk, timeout);
2097 
2098 adjudge_to_death:
2099         state = sk->sk_state;
2100         sock_hold(sk);
2101         sock_orphan(sk);
2102 
2103         /* It is the last release_sock in its life. It will remove backlog. */
2104         release_sock(sk);
2105 
2106 
2107         /* Now socket is owned by kernel and we acquire BH lock
2108            to finish close. No need to check for user refs.
2109          */
2110         local_bh_disable();
2111         bh_lock_sock(sk);
2112         WARN_ON(sock_owned_by_user(sk));
2113 
2114         percpu_counter_inc(sk->sk_prot->orphan_count);
2115 
2116         /* Have we already been destroyed by a softirq or backlog? */
2117         if (state != TCP_CLOSE && sk->sk_state == TCP_CLOSE)
2118                 goto out;
2119 
2120         /*      This is a (useful) BSD violating of the RFC. There is a
2121          *      problem with TCP as specified in that the other end could
2122          *      keep a socket open forever with no application left this end.
2123          *      We use a 1 minute timeout (about the same as BSD) then kill
2124          *      our end. If they send after that then tough - BUT: long enough
2125          *      that we won't make the old 4*rto = almost no time - whoops
2126          *      reset mistake.
2127          *
2128          *      Nope, it was not mistake. It is really desired behaviour
2129          *      f.e. on http servers, when such sockets are useless, but
2130          *      consume significant resources. Let's do it with special
2131          *      linger2 option.                                 --ANK
2132          */
2133 
2134         if (sk->sk_state == TCP_FIN_WAIT2) {
2135                 struct tcp_sock *tp = tcp_sk(sk);
2136                 if (tp->linger2 < 0) {
2137                         tcp_set_state(sk, TCP_CLOSE);
2138                         tcp_send_active_reset(sk, GFP_ATOMIC);
2139                         NET_INC_STATS_BH(sock_net(sk),
2140                                         LINUX_MIB_TCPABORTONLINGER);
2141                 } else {
2142                         const int tmo = tcp_fin_time(sk);
2143 
2144                         if (tmo > TCP_TIMEWAIT_LEN) {
2145                                 inet_csk_reset_keepalive_timer(sk,
2146                                                 tmo - TCP_TIMEWAIT_LEN);
2147                         } else {
2148                                 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
2149                                 goto out;
2150                         }
2151                 }
2152         }
2153         if (sk->sk_state != TCP_CLOSE) {
2154                 sk_mem_reclaim(sk);
2155                 if (tcp_check_oom(sk, 0)) {
2156                         tcp_set_state(sk, TCP_CLOSE);
2157                         tcp_send_active_reset(sk, GFP_ATOMIC);
2158                         NET_INC_STATS_BH(sock_net(sk),
2159                                         LINUX_MIB_TCPABORTONMEMORY);
2160                 }
2161         }
2162 
2163         if (sk->sk_state == TCP_CLOSE) {
2164                 struct request_sock *req = tcp_sk(sk)->fastopen_rsk;
2165                 /* We could get here with a non-NULL req if the socket is
2166                  * aborted (e.g., closed with unread data) before 3WHS
2167                  * finishes.
2168                  */
2169                 if (req)
2170                         reqsk_fastopen_remove(sk, req, false);
2171                 inet_csk_destroy_sock(sk);
2172         }
2173         /* Otherwise, socket is reprieved until protocol close. */
2174 
2175 out:
2176         bh_unlock_sock(sk);
2177         local_bh_enable();
2178         sock_put(sk);
2179 }
2180 EXPORT_SYMBOL(tcp_close);

1959 static int tcp_close_state(struct sock *sk)
1960 {
1961         int next = (int)new_state[sk->sk_state];
1962         int ns = next & TCP_STATE_MASK;
1963 
1964     tcp_set_state(sk, ns);
1965 
1966         return next & TCP_ACTION_FIN;
1967 }

1974 void tcp_shutdown(struct sock *sk, int how)
1975 {
1976         /*      We need to grab some memory, and put together a FIN,
1977          *      and then put it into the queue to be sent.
1978          *              Tim MacKenzie(tym@dibbler.cs.monash.edu.au) 4 Dec '92.
1979          */
1980         if (!(how & SEND_SHUTDOWN))
1981                 return;
1982 
1983         /* If we've already sent a FIN, or it's a closed state, skip this. */
1984         if ((1 << sk->sk_state) &
1985             (TCPF_ESTABLISHED | TCPF_SYN_SENT |
1986              TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) {
1987                 /* Clear out any half completed packets.  FIN if needed. */
1988                 if (tcp_close_state(sk))
1989                         tcp_send_fin(sk);
1990         }
1991 }

1959 static int tcp_close_state(struct sock *sk)
1960 {
1961         int next = (int)new_state[sk->sk_state];
1962         int ns = next & TCP_STATE_MASK;
1963 
1964     tcp_set_state(sk, ns);
1965 
1966         return next & TCP_ACTION_FIN;
1967 }

2、TCP_FIN_WAIT1---->TCP_FIN_WAIT2

5711 int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5712                           struct tcphdr *th, unsigned len)
5713 {
5714         struct tcp_sock *tp = tcp_sk(sk);
5715         struct inet_connection_sock *icsk = inet_csk(sk);
5716         int queued = 0;
5717         int res;
5718 
5719         tp->rx_opt.saw_tstamp = 0;
5720 
5721         switch (sk->sk_state) {
5722         case TCP_CLOSE:
5723                 goto discard;
5724 
5725         case TCP_LISTEN:
5726                 if (th->ack)
5727                         return 1;
5728 
5729                 if (th->rst)
5730                         goto discard;
5731 
5732                 if (th->syn) {
5733                         if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)
5734                                 return 1;
5735 
5736                         /* Now we have several options: In theory there is
5737                          * nothing else in the frame. KA9Q has an option to
5738                          * send data with the syn, BSD accepts data with the
5739                          * syn up to the [to be] advertised window and
5740                          * Solaris 2.1 gives you a protocol error. For now
5741                          * we just ignore it, that fits the spec precisely
5742                          * and avoids incompatibilities. It would be nice in
5743                          * future to drop through and process the data.
5744                          *
5745                          * Now that TTCP is starting to be used we ought to
5746                          * queue this data.
5747                          * But, this leaves one open to an easy denial of
5748                          * service attack, and SYN cookies can't defend
5749                          * against this problem. So, we drop the data
5750                          * in the interest of security over speed unless
5751                          * it's still in use.
5752                          */
5753                         kfree_skb(skb);
5754                         return 0;
5755                 }
5756                 goto discard;
5757 
5758         case TCP_SYN_SENT:
5759                 queued = tcp_rcv_synsent_state_process(sk, skb, th, len);
5760                 if (queued >= 0)
5761                         return queued;
5762 
5763                 /* Do step6 onward by hand. */
5764                 tcp_urg(sk, skb, th);
5765                 __kfree_skb(skb);
5766                 tcp_data_snd_check(sk);
5767                 return 0;
5768         }
5769 
5770         res = tcp_validate_incoming(sk, skb, th, 0);
5771         if (res <= 0)
5772                 return -res;
5773 
5774         /* step 5: check the ACK field */
5775         if (th->ack) {
5776                 int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH) > 0;
5777 
5778                 switch (sk->sk_state) {
5779                 case TCP_SYN_RECV:
5780                         if (acceptable) {
5781                                 tp->copied_seq = tp->rcv_nxt;
5782                                 smp_mb();
5783                                 tcp_set_state(sk, TCP_ESTABLISHED);
5784                                 sk->sk_state_change(sk);
5785 
5786                                 /* Note, that this wakeup is only for marginal
5787                                  * crossed SYN case. Passively open sockets
5788                                  * are not waked up, because sk->sk_sleep ==
5789                                  * NULL and sk->sk_socket == NULL.
5790                                  */
5791                                 if (sk->sk_socket)
5792                                         sk_wake_async(sk,
5793                                                       SOCK_WAKE_IO, POLL_OUT);
5794 
5795                                 tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
5796                                 tp->snd_wnd = ntohs(th->window) <<
5797                                               tp->rx_opt.snd_wscale;
5798                                 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
5799 
5800                                 /* tcp_ack considers this ACK as duplicate
5801                                  * and does not calculate rtt.
5802                                  * Force it here.
5803                                  */
5804                                 tcp_ack_update_rtt(sk, 0, 0);
5805 
5806                                 if (tp->rx_opt.tstamp_ok)
5807                                         tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
5808 
5809                                 /* Make sure socket is routed, for
5810                                  * correct metrics.
5811                                  */
5812                                 icsk->icsk_af_ops->rebuild_header(sk);
5813 
5814                                 tcp_init_metrics(sk);
5815 
5816                                 tcp_init_congestion_control(sk);
5817 
5818                                 /* Prevent spurious tcp_cwnd_restart() on
5819                                  * first data packet.
5820                                  */
5821                                 tp->lsndtime = tcp_time_stamp;
5822 
5823                                 tcp_mtup_init(sk);
5824                                 tcp_initialize_rcv_mss(sk);
5825                                 tcp_init_buffer_space(sk);
5826                                 tcp_fast_path_on(tp);
5827                         } else {
5828                                 return 1;
5829                         }
5830                         break;
5831 
5832                 case TCP_FIN_WAIT1:
5833                         if (tp->snd_una == tp->write_seq) {
5834                      tcp_set_state(sk, TCP_FIN_WAIT2);
5835                                 sk->sk_shutdown |= SEND_SHUTDOWN;
5836                                 dst_confirm(sk->sk_dst_cache);
5837 
5838                                 if (!sock_flag(sk, SOCK_DEAD))
5839                                         /* Wake up lingering close() */
5840                                         sk->sk_state_change(sk);
5841                                 else {
5842                                         int tmo;
5843 
5844                                         if (tp->linger2 < 0 ||
5845                                             (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
5846                                              after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) {
5847                                                 tcp_done(sk);
5848                                                 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
5849                                                 return 1;
5850                                         }
5851 
5852                                         tmo = tcp_fin_time(sk);
5853                                         if (tmo > TCP_TIMEWAIT_LEN) {
5854                                                 inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN);
5855                                         } else if (th->fin || sock_owned_by_user(sk)) {
5856                                                 /* Bad case. We could lose such FIN otherwise.
5857                                                  * It is not a big problem, but it looks confusing
5858                                                  * and not so rare event. We still can lose it now,
5859                                                  * if it spins in bh_lock_sock(), but it is really
5860                                                  * marginal case.
5861                                                  */
5862                                                 inet_csk_reset_keepalive_timer(sk, tmo);
5863                                         } else {
5864                                                 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
5865                                                 goto discard;
5866                                         }
5867                                 }
5868                         }
5869                         break;
5870 
5871                 case TCP_CLOSING:
5872                         if (tp->snd_una == tp->write_seq) {
5873                                 tcp_time_wait(sk, TCP_TIME_WAIT, 0);
5874                                 goto discard;
5875                         }
5876                         break;
5877 
5878                 case TCP_LAST_ACK:
5879                         if (tp->snd_una == tp->write_seq) {
5880                                 tcp_update_metrics(sk);
5881                                 tcp_done(sk);
5882                                 goto discard;
5883                         }
5884                         break;
5885                 }
5886         } else
5887                 goto discard;
5888 
5889         /* step 6: check the URG bit */
5890         tcp_urg(sk, skb, th);
5891 
5892         /* step 7: process the segment text */
5893         switch (sk->sk_state) {
5894         case TCP_CLOSE_WAIT:
5895         case TCP_CLOSING:
5896         case TCP_LAST_ACK:
5897                 if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
5898                         break;
5899         case TCP_FIN_WAIT1:
5900         case TCP_FIN_WAIT2:
5901                 /* RFC 793 says to queue data in these states,
5902                  * RFC 1122 says we MUST send a reset.
5903                  * BSD 4.4 also does reset.
5904                  */
5905                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
5906                         if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
5907                             after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
5908                                 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
5909                                 tcp_reset(sk);
5910                                 return 1;
5911                         }
5912                 }
5913                 /* Fall through */
5914         case TCP_ESTABLISHED:
5915                 tcp_data_queue(sk, skb);
5916                 queued = 1;
5917                 break;
5918         }
5919 
5920         /* tcp_data could move socket to TIME-WAIT */
5921         if (sk->sk_state != TCP_CLOSE) {
5922                 tcp_data_snd_check(sk);
5923                 tcp_ack_snd_check(sk);
5924         }
5925 
5926         if (!queued) {
5927 discard:
5928                 __kfree_skb(skb);
5929         }
5930         return 0;
5931 }
5932

3、TCP_FIN_WAIT2 ----> TCP_TIME_WAIT

4064 static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
4065 {
4066         struct tcp_sock *tp = tcp_sk(sk);
4067 
4068         inet_csk_schedule_ack(sk);
4069 
4070         sk->sk_shutdown |= RCV_SHUTDOWN;
4071         sock_set_flag(sk, SOCK_DONE);
4072 
4073         switch (sk->sk_state) {
4074         case TCP_SYN_RECV:
4075         case TCP_ESTABLISHED:
4076                 /* Move to CLOSE_WAIT */
4077                 tcp_set_state(sk, TCP_CLOSE_WAIT);
4078                 inet_csk(sk)->icsk_ack.pingpong = 1;
4079                 break;
4080 
4081         case TCP_CLOSE_WAIT:
4082         case TCP_CLOSING:
4083                 /* Received a retransmission of the FIN, do
4084                  * nothing.
4085                  */
4086                 break;
4087         case TCP_LAST_ACK:
4088                 /* RFC793: Remain in the LAST-ACK state. */
4089                 break;
4090 
4091         case TCP_FIN_WAIT1:
4092                 /* This case occurs when a simultaneous close
4093                  * happens, we must ack the received FIN and
4094                  * enter the CLOSING state.
4095                  */
4096                 tcp_send_ack(sk);
4097                 tcp_set_state(sk, TCP_CLOSING);
4098                 break;
4099         case TCP_FIN_WAIT2:
4100                 /* Received a FIN -- send ACK and enter TIME_WAIT. */
4101                 tcp_send_ack(sk);
4102           tcp_time_wait(sk, TCP_TIME_WAIT, 0);
4103                 break;
4104         default:
4105                 /* Only TCP_LISTEN and TCP_CLOSE are left, in these
4106                  * cases we should never reach this piece of code.
4107                  */
4108                 printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n",
4109                        __func__, sk->sk_state);
4110                 break;
4111         }
4112 
4113         /* It _is_ possible, that we have something out-of-order _after_ FIN.
4114          * Probably, we should reset in this case. For now drop them.
4115          */
4116         __skb_queue_purge(&tp->out_of_order_queue);
4117         if (tcp_is_sack(tp))
4118                 tcp_sack_reset(&tp->rx_opt);
4119         sk_mem_reclaim(sk);
4120 
4121         if (!sock_flag(sk, SOCK_DEAD)) {
4122                 sk->sk_state_change(sk);
4123 
4124                 /* Do not send POLL_HUP for half duplex close. */
4125                 if (sk->sk_shutdown == SHUTDOWN_MASK ||
4126                     sk->sk_state == TCP_CLOSE)
4127                         sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
4128                 else
4129                         sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
4130         }
4131 }
4132

4、TCP_TIME_WAIT ---->TCP_CLOSE

四、被动关闭

1、TCP_ESTABLISHED---->TCP_CLOSE_WAIT

4064 static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
4065 {
4066         struct tcp_sock *tp = tcp_sk(sk);
4067 
4068         inet_csk_schedule_ack(sk);
4069 
4070         sk->sk_shutdown |= RCV_SHUTDOWN;
4071         sock_set_flag(sk, SOCK_DONE);
4072 
4073         switch (sk->sk_state) {
4074         case TCP_SYN_RECV:
4075         case TCP_ESTABLISHED:
4076                 /* Move to CLOSE_WAIT */
4077                 tcp_set_state(sk, TCP_CLOSE_WAIT);
4078                 inet_csk(sk)->icsk_ack.pingpong = 1;
4079                 break;
4080 
4081         case TCP_CLOSE_WAIT:
4082         case TCP_CLOSING:
4083                 /* Received a retransmission of the FIN, do
4084                  * nothing.
4085                  */
4086                 break;
4087         case TCP_LAST_ACK:
4088                 /* RFC793: Remain in the LAST-ACK state. */
4089                 break;
4090 
4091         case TCP_FIN_WAIT1:
4092                 /* This case occurs when a simultaneous close
4093                  * happens, we must ack the received FIN and
4094                  * enter the CLOSING state.
4095                  */
4096                 tcp_send_ack(sk);
4097           tcp_set_state(sk, TCP_CLOSING);
4098                 break;
4099         case TCP_FIN_WAIT2:
4100                 /* Received a FIN -- send ACK and enter TIME_WAIT. */
4101                 tcp_send_ack(sk);
4102                 tcp_time_wait(sk, TCP_TIME_WAIT, 0);
4103                 break;
4104         default:
4105                 /* Only TCP_LISTEN and TCP_CLOSE are left, in these
4106                  * cases we should never reach this piece of code.
4107                  */
4108                 printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n",
4109                        __func__, sk->sk_state);
4110                 break;
4111         }
4112 
4113         /* It _is_ possible, that we have something out-of-order _after_ FIN.
4114          * Probably, we should reset in this case. For now drop them.
4115          */
4116         __skb_queue_purge(&tp->out_of_order_queue);
4117         if (tcp_is_sack(tp))
4118                 tcp_sack_reset(&tp->rx_opt);
4119         sk_mem_reclaim(sk);
4120 
4121         if (!sock_flag(sk, SOCK_DEAD)) {
4122                 sk->sk_state_change(sk);
4123 
4124                 /* Do not send POLL_HUP for half duplex close. */
4125                 if (sk->sk_shutdown == SHUTDOWN_MASK ||
4126                     sk->sk_state == TCP_CLOSE)
4127                         sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
4128                 else
4129                         sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
4130         }
4131 }
4132

2、TCP_CLOSE_WAIT---->TCP_LAST_ACK

1974 void tcp_shutdown(struct sock *sk, int how)
1975 {
1976         /*      We need to grab some memory, and put together a FIN,
1977          *      and then put it into the queue to be sent.
1978          *              Tim MacKenzie(tym@dibbler.cs.monash.edu.au) 4 Dec '92.
1979          */
1980         if (!(how & SEND_SHUTDOWN))
1981                 return;
1982 
1983         /* If we've already sent a FIN, or it's a closed state, skip this. */
1984         if ((1 << sk->sk_state) &
1985             (TCPF_ESTABLISHED | TCPF_SYN_SENT |
1986              TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) {
1987                 /* Clear out any half completed packets.  FIN if needed. */
1988                 if (tcp_close_state(sk))
1989                         tcp_send_fin(sk);
1990         }
1991 }


1959 static int tcp_close_state(struct sock *sk)
1960 {
1961         int next = (int)new_state[sk->sk_state];
1962         int ns = next & TCP_STATE_MASK;
1963 
1964         tcp_set_state(sk, ns);
1965 
1966         return next & TCP_ACTION_FIN;
1967 }

3、TCP_LAST_ACK---->TCP_CLOSE

5711 int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5712                           struct tcphdr *th, unsigned len)
5713 {
5714         struct tcp_sock *tp = tcp_sk(sk);
5715         struct inet_connection_sock *icsk = inet_csk(sk);
5716         int queued = 0;
5717         int res;
5718 
5719         tp->rx_opt.saw_tstamp = 0;
5720 
5721         switch (sk->sk_state) {
5722         case TCP_CLOSE:
5723                 goto discard;
5724 
5725         case TCP_LISTEN:
5726                 if (th->ack)
5727                         return 1;
5728 
5729                 if (th->rst)
5730                         goto discard;
5731 
5732                 if (th->syn) {
5733                         if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)
5734                                 return 1;
5735 
5736                         /* Now we have several options: In theory there is
5737                          * nothing else in the frame. KA9Q has an option to
5738                          * send data with the syn, BSD accepts data with the
5739                          * syn up to the [to be] advertised window and
5740                          * Solaris 2.1 gives you a protocol error. For now
5741                          * we just ignore it, that fits the spec precisely
5742                          * and avoids incompatibilities. It would be nice in
5743                          * future to drop through and process the data.
5744                          *
5745                          * Now that TTCP is starting to be used we ought to
5746                          * queue this data.
5747                          * But, this leaves one open to an easy denial of
5748                          * service attack, and SYN cookies can't defend
5749                          * against this problem. So, we drop the data
5750                          * in the interest of security over speed unless
5751                          * it's still in use.
5752                          */
5753                         kfree_skb(skb);
5754                         return 0;
5755                 }
5756                 goto discard;
5757 
5758         case TCP_SYN_SENT:
5759                 queued = tcp_rcv_synsent_state_process(sk, skb, th, len);
5760                 if (queued >= 0)
5761                         return queued;
5762 
5763                 /* Do step6 onward by hand. */
5764                 tcp_urg(sk, skb, th);
5765                 __kfree_skb(skb);
5766                 tcp_data_snd_check(sk);
5767                 return 0;
5768         }
5769 
5770         res = tcp_validate_incoming(sk, skb, th, 0);
5771         if (res <= 0)
5772                 return -res;
5773 
5774         /* step 5: check the ACK field */
5775         if (th->ack) {
5776                 int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH) > 0;
5777 
5778                 switch (sk->sk_state) {
5779                 case TCP_SYN_RECV:
5780                         if (acceptable) {
5781                                 tp->copied_seq = tp->rcv_nxt;
5782                                 smp_mb();
5783                                 tcp_set_state(sk, TCP_ESTABLISHED);
5784                                 sk->sk_state_change(sk);
5785 
5786                                 /* Note, that this wakeup is only for marginal
5787                                  * crossed SYN case. Passively open sockets
5788                                  * are not waked up, because sk->sk_sleep ==
5789                                  * NULL and sk->sk_socket == NULL.
5790                                  */
5791                                 if (sk->sk_socket)
5792                                         sk_wake_async(sk,
5793                                                       SOCK_WAKE_IO, POLL_OUT);
5794 
5795                                 tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
5796                                 tp->snd_wnd = ntohs(th->window) <<
5797                                               tp->rx_opt.snd_wscale;
5798                                 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
5799 
5800                                 /* tcp_ack considers this ACK as duplicate
5801                                  * and does not calculate rtt.
5802                                  * Force it here.
5803                                  */
5804                                 tcp_ack_update_rtt(sk, 0, 0);
5805 
5806                                 if (tp->rx_opt.tstamp_ok)
5807                                         tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
5808 
5809                                 /* Make sure socket is routed, for
5810                                  * correct metrics.
5811                                  */
5812                                 icsk->icsk_af_ops->rebuild_header(sk);
5813 
5814                                 tcp_init_metrics(sk);
5815 
5816                                 tcp_init_congestion_control(sk);
5817 
5818                                 /* Prevent spurious tcp_cwnd_restart() on
5819                                  * first data packet.
5820                                  */
5821                                 tp->lsndtime = tcp_time_stamp;
5822 
5823                                 tcp_mtup_init(sk);
5824                                 tcp_initialize_rcv_mss(sk);
5825                                 tcp_init_buffer_space(sk);
5826                                 tcp_fast_path_on(tp);
5827                         } else {
5828                                 return 1;
5829                         }
5830                         break;
5831 
5832                 case TCP_FIN_WAIT1:
5833                         if (tp->snd_una == tp->write_seq) {
5834                                 tcp_set_state(sk, TCP_FIN_WAIT2);
5835                                 sk->sk_shutdown |= SEND_SHUTDOWN;
5836                                 dst_confirm(sk->sk_dst_cache);
5837 
5838                                 if (!sock_flag(sk, SOCK_DEAD))
5839                                         /* Wake up lingering close() */
5840                                         sk->sk_state_change(sk);
5841                                 else {
5842                                         int tmo;
5843 
5844                                         if (tp->linger2 < 0 ||
5845                                             (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
5846                                              after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) {
5847                                                 tcp_done(sk);
5848                                                 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
5849                                                 return 1;
5850                                         }
5851 
5852                                         tmo = tcp_fin_time(sk);
5853                                         if (tmo > TCP_TIMEWAIT_LEN) {
5854                                                 inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN);
5855                                         } else if (th->fin || sock_owned_by_user(sk)) {
5856                                                 /* Bad case. We could lose such FIN otherwise.
5857                                                  * It is not a big problem, but it looks confusing
5858                                                  * and not so rare event. We still can lose it now,
5859                                                  * if it spins in bh_lock_sock(), but it is really
5860                                                  * marginal case.
5861                                                  */
5862                                                 inet_csk_reset_keepalive_timer(sk, tmo);
5863                                         } else {
5864                                                 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
5865                                                 goto discard;
5866                                         }
5867                                 }
5868                         }
5869                         break;
5870 
5871                 case TCP_CLOSING:
5872                         if (tp->snd_una == tp->write_seq) {
5873                                 tcp_time_wait(sk, TCP_TIME_WAIT, 0);
5874                                 goto discard;
5875                         }
5876                         break;
5877 
5878                 case TCP_LAST_ACK:
5879                         if (tp->snd_una == tp->write_seq) {
5880                                 tcp_update_metrics(sk);
5881                                 tcp_done(sk);
5882                                 goto discard;
5883                         }
5884                         break;
5885                 }
5886         } else
5887                 goto discard;
5888 
5889         /* step 6: check the URG bit */
5890         tcp_urg(sk, skb, th);
5891 
5892         /* step 7: process the segment text */
5893         switch (sk->sk_state) {
5894         case TCP_CLOSE_WAIT:
5895         case TCP_CLOSING:
5896         case TCP_LAST_ACK:
5897                 if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
5898                         break;
5899         case TCP_FIN_WAIT1:
5900         case TCP_FIN_WAIT2:
5901                 /* RFC 793 says to queue data in these states,
5902                  * RFC 1122 says we MUST send a reset.
5903                  * BSD 4.4 also does reset.
5904                  */
5905                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
5906                         if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
5907                             after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
5908                                 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
5909                                 tcp_reset(sk);
5910                                 return 1;
5911                         }
5912                 }
5913                 /* Fall through */
5914         case TCP_ESTABLISHED:
5915                 tcp_data_queue(sk, skb);
5916                 queued = 1;
5917                 break;
5918         }
5919 
5920         /* tcp_data could move socket to TIME-WAIT */
5921         if (sk->sk_state != TCP_CLOSE) {
5922                 tcp_data_snd_check(sk);
5923                 tcp_ack_snd_check(sk);
5924         }
5925 
5926         if (!queued) {
5927 discard:
5928                 __kfree_skb(skb);
5929         }
5930         return 0;
5931 }
5932

posted on 2016-02-06 10:36 mylinuxer 阅读(1818) 评论(0) 收藏举报

刷新页面返回顶部