TCP输入 之 tcp_v4_rcv
tcp_v4_rcv函数为TCP的总入口,数据包从IP层传递上来,进入该函数;其协议操作函数结构如下所示,其中handler即为IP层向TCP传递数据包的回调函数,设置为tcp_v4_rcv;
1 static struct net_protocol tcp_protocol = { 2 .early_demux = tcp_v4_early_demux, 3 .early_demux_handler = tcp_v4_early_demux, 4 .handler = tcp_v4_rcv, 5 .err_handler = tcp_v4_err, 6 .no_policy = 1, 7 .netns_ok = 1, 8 .icmp_strict_tag_validation = 1, 9 };
在IP层处理本地数据包时,会获取到上述结构的实例,并且调用实例的handler回调,也就是调用了tcp_v4_rcv;
1 static int ip_local_deliver_finish(struct net *net, struct sock *sk, struct sk_buff *skb) 2 { 3 /* 获取协议处理结构 */ 4 ipprot = rcu_dereference(inet_protos[protocol]); 5 if (ipprot) { 6 int ret; 7 8 /* 协议上层收包处理函数 */ 9 ret = ipprot->handler(skb); 10 if (ret < 0) { 11 protocol = -ret; 12 goto resubmit; 13 } 14 __IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS); 15 } 16 }
tcp_v4_rcv函数只要做以下几个工作:(1) 设置TCP_CB (2) 查找控制块 (3)根据控制块状态做不同处理,包括TCP_TIME_WAIT状态处理,TCP_NEW_SYN_RECV状态处理,TCP_LISTEN状态处理 (4) 接收TCP段;
1 int tcp_v4_rcv(struct sk_buff *skb) 2 { 3 struct net *net = dev_net(skb->dev); 4 const struct iphdr *iph; 5 const struct tcphdr *th; 6 bool refcounted; 7 struct sock *sk; 8 int ret; 9 10 /* 非本机 */ 11 if (skb->pkt_type != PACKET_HOST) 12 goto discard_it; 13 14 /* Count it even if it's bad */ 15 __TCP_INC_STATS(net, TCP_MIB_INSEGS); 16 17 /* 检查头部数据,若不满足,则拷贝分片 */ 18 if (!pskb_may_pull(skb, sizeof(struct tcphdr))) 19 goto discard_it; 20 21 /* 取tcp头 */ 22 th = (const struct tcphdr *)skb->data; 23 24 /* 长度过小 */ 25 if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) 26 goto bad_packet; 27 28 /* 检查头部数据,若不满足,则拷贝分片 */ 29 if (!pskb_may_pull(skb, th->doff * 4)) 30 goto discard_it; 31 32 /* An explanation is required here, I think. 33 * Packet length and doff are validated by header prediction, 34 * provided case of th->doff==0 is eliminated. 35 * So, we defer the checks. */ 36 37 if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo)) 38 goto csum_error; 39 40 /* 取tcp头 */ 41 th = (const struct tcphdr *)skb->data; 42 /* 取ip头 */ 43 iph = ip_hdr(skb); 44 /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB() 45 * barrier() makes sure compiler wont play fool^Waliasing games. 46 */ 47 /* 移动ipcb */ 48 memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb), 49 sizeof(struct inet_skb_parm)); 50 barrier(); 51 52 /* 获取开始序号*/ 53 TCP_SKB_CB(skb)->seq = ntohl(th->seq); 54 /* 获取结束序号,syn与fin各占1 */ 55 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + 56 skb->len - th->doff * 4); 57 /* 获取确认序号 */ 58 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); 59 /* 获取标记字节,tcp首部第14个字节 */ 60 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th); 61 TCP_SKB_CB(skb)->tcp_tw_isn = 0; 62 /* 获取ip头的服务字段 */ 63 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph); 64 TCP_SKB_CB(skb)->sacked = 0; 65 66 lookup: 67 /* 查找控制块 */ 68 sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source, 69 th->dest, &refcounted); 70 if (!sk) 71 goto no_tcp_socket; 72 73 process: 74 75 /* TIME_WAIT转过去处理 */ 76 if (sk->sk_state == TCP_TIME_WAIT) 77 goto do_time_wait; 78 79 /* TCP_NEW_SYN_RECV状态处理 */ 80 if (sk->sk_state == TCP_NEW_SYN_RECV) { 81 struct request_sock *req = inet_reqsk(sk); 82 struct sock *nsk; 83 84 /* 获取控制块 */ 85 sk = req->rsk_listener; 86 if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) { 87 sk_drops_add(sk, skb); 88 reqsk_put(req); 89 goto discard_it; 90 } 91 92 /* 不是listen状态 */ 93 if (unlikely(sk->sk_state != TCP_LISTEN)) { 94 /* 从连接队列移除控制块 */ 95 inet_csk_reqsk_queue_drop_and_put(sk, req); 96 97 /* 根据skb参数重新查找控制块 */ 98 goto lookup; 99 } 100 /* We own a reference on the listener, increase it again 101 * as we might lose it too soon. 102 */ 103 sock_hold(sk); 104 refcounted = true; 105 106 /* 处理第三次握手ack,成功返回新控制块 */ 107 nsk = tcp_check_req(sk, skb, req, false); 108 109 /* 失败 */ 110 if (!nsk) { 111 reqsk_put(req); 112 goto discard_and_relse; 113 } 114 115 /* 未新建控制块,进一步处理 */ 116 if (nsk == sk) { 117 reqsk_put(req); 118 } 119 /* 有新建控制块,进行初始化等 */ 120 else if (tcp_child_process(sk, nsk, skb)) { 121 /* 失败发送rst */ 122 tcp_v4_send_reset(nsk, skb); 123 goto discard_and_relse; 124 } else { 125 sock_put(sk); 126 return 0; 127 } 128 } 129 130 /* TIME_WAIT和TCP_NEW_SYN_RECV以外的状态 */ 131 132 /* ttl错误 */ 133 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) { 134 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 135 goto discard_and_relse; 136 } 137 138 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) 139 goto discard_and_relse; 140 141 if (tcp_v4_inbound_md5_hash(sk, skb)) 142 goto discard_and_relse; 143 144 /* 初始化nf成员 */ 145 nf_reset(skb); 146 147 /* tcp过滤 */ 148 if (tcp_filter(sk, skb)) 149 goto discard_and_relse; 150 151 /* 取tcp和ip头 */ 152 th = (const struct tcphdr *)skb->data; 153 iph = ip_hdr(skb); 154 155 /* 清空设备 */ 156 skb->dev = NULL; 157 158 /* LISTEN状态处理 */ 159 if (sk->sk_state == TCP_LISTEN) { 160 ret = tcp_v4_do_rcv(sk, skb); 161 goto put_and_return; 162 } 163 164 /* TIME_WAIT和TCP_NEW_SYN_RECV和LISTEN以外的状态 */ 165 166 /* 记录cpu */ 167 sk_incoming_cpu_update(sk); 168 169 bh_lock_sock_nested(sk); 170 171 /* 分段统计 */ 172 tcp_segs_in(tcp_sk(sk), skb); 173 ret = 0; 174 175 /* 未被用户锁定 */ 176 if (!sock_owned_by_user(sk)) { 177 /* 未能加入到prequeue */ 178 if (!tcp_prequeue(sk, skb)) 179 /* 进入tcpv4处理 */ 180 ret = tcp_v4_do_rcv(sk, skb); 181 } 182 /* 已经被用户锁定,加入到backlog */ 183 else if (tcp_add_backlog(sk, skb)) { 184 goto discard_and_relse; 185 } 186 bh_unlock_sock(sk); 187 188 put_and_return: 189 /* 减少引用计数 */ 190 if (refcounted) 191 sock_put(sk); 192 193 return ret; 194 195 no_tcp_socket: 196 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) 197 goto discard_it; 198 199 if (tcp_checksum_complete(skb)) { 200 csum_error: 201 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); 202 bad_packet: 203 __TCP_INC_STATS(net, TCP_MIB_INERRS); 204 } else { 205 /* 发送rst */ 206 tcp_v4_send_reset(NULL, skb); 207 } 208 209 discard_it: 210 /* Discard frame. */ 211 kfree_skb(skb); 212 return 0; 213 214 discard_and_relse: 215 sk_drops_add(sk, skb); 216 if (refcounted) 217 sock_put(sk); 218 goto discard_it; 219 220 do_time_wait: 221 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { 222 inet_twsk_put(inet_twsk(sk)); 223 goto discard_it; 224 } 225 226 /* 校验和错误 */ 227 if (tcp_checksum_complete(skb)) { 228 inet_twsk_put(inet_twsk(sk)); 229 goto csum_error; 230 } 231 232 /* TIME_WAIT入包处理 */ 233 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { 234 235 /* 收到syn */ 236 case TCP_TW_SYN: { 237 /* 查找监听控制块 */ 238 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev), 239 &tcp_hashinfo, skb, 240 __tcp_hdrlen(th), 241 iph->saddr, th->source, 242 iph->daddr, th->dest, 243 inet_iif(skb)); 244 245 /* 找到 */ 246 if (sk2) { 247 /* 删除tw控制块 */ 248 inet_twsk_deschedule_put(inet_twsk(sk)); 249 /* 记录监听控制块 */ 250 sk = sk2; 251 refcounted = false; 252 253 /* 进行新请求的处理 */ 254 goto process; 255 } 256 /* Fall through to ACK */ 257 } 258 259 /* 发送ack */ 260 case TCP_TW_ACK: 261 tcp_v4_timewait_ack(sk, skb); 262 break; 263 /* 发送rst */ 264 case TCP_TW_RST: 265 tcp_v4_send_reset(sk, skb); 266 /* 删除tw控制块 */ 267 inet_twsk_deschedule_put(inet_twsk(sk)); 268 goto discard_it; 269 /* 成功*/ 270 case TCP_TW_SUCCESS:; 271 } 272 goto discard_it; 273 }