当目标socket不存在时发送方会看到什么

intro

移动端的设备的应用被切到后台之后,可能就无法收到对方socket关闭连接的FIN。当应用从后台切回前台之后,可能还是继续通过这个socket来尝试向对方一个已经不存在的socket发送数据。

这种情况下,该应用网络层将会经历怎样的波折呢?

接收方

如果报文指定的socket不存在,流程会走到no_tcp_socket标签,如果报文的校验和正确,会通过tcp_v4_send_reset函数向发送方发送rst报文。

///@file: tcp_ipv4.c
/*
 *	From tcp_input.c
 */
int tcp_v4_rcv(struct sk_buff *skb)
{

no_tcp_socket:
	drop_reason = SKB_DROP_REASON_NO_SOCKET;
	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
		goto discard_it;

	tcp_v4_fill_cb(skb, iph, th);

	if (tcp_checksum_complete(skb)) {
csum_error:
		drop_reason = SKB_DROP_REASON_TCP_CSUM;
		trace_tcp_bad_csum(skb);
		__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
bad_packet:
		__TCP_INC_STATS(net, TCP_MIB_INERRS);
	} else {
		tcp_v4_send_reset(NULL, skb, sk_rst_convert_drop_reason(drop_reason));
	}

发送RESET的接口

///@file: tcp_ipv4.c
/*
 *	This routine will send an RST to the other tcp.
 *
 *	Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
 *		      for reset.
 *	Answer: if a packet caused RST, it is not for a socket
 *		existing in our system, if it is matched to a socket,
 *		it is just duplicate segment or bug in other side's TCP.
 *		So that we build reply only basing on parameters
 *		arrived with segment.
 *	Exception: precedence violation. We do not implement it in any case.
 */

static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb,
			      enum sk_rst_reason reason)
{
	const struct tcphdr *th = tcp_hdr(skb);
	struct {
		struct tcphdr th;
		__be32 opt[REPLY_OPTIONS_LEN];
	} rep;
	const __u8 *md5_hash_location = NULL;
	const struct tcp_ao_hdr *aoh;
	struct ip_reply_arg arg;
#ifdef CONFIG_TCP_MD5SIG
	struct tcp_md5sig_key *key = NULL;
	unsigned char newhash[16];
	struct sock *sk1 = NULL;
	int genhash;
#endif
	u64 transmit_time = 0;
	struct sock *ctl_sk;
	struct net *net;
	u32 txhash = 0;

	/* Never send a reset in response to a reset. */
	if (th->rst)
		return;

	/* If sk not NULL, it means we did a successful lookup and incoming
	 * route had to be correct. prequeue might have dropped our dst.
	 */
	if (!sk && skb_rtable(skb)->rt_type != RTN_LOCAL)
		return;

	/* Swap the send and the receive. */
	memset(&rep, 0, sizeof(rep));
	rep.th.dest   = th->source;
	rep.th.source = th->dest;
	rep.th.doff   = sizeof(struct tcphdr) / 4;
	rep.th.rst    = 1;

	if (th->ack) {
		rep.th.seq = th->ack_seq;
	} else {
		rep.th.ack = 1;
		rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
				       skb->len - (th->doff << 2));
	}

	memset(&arg, 0, sizeof(arg));
	arg.iov[0].iov_base = (unsigned char *)&rep;
	arg.iov[0].iov_len  = sizeof(rep.th);

	net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);

	/* Invalid TCP option size or twice included auth */
	if (tcp_parse_auth_options(tcp_hdr(skb), &md5_hash_location, &aoh))
		return;

	if (aoh && tcp_v4_ao_sign_reset(sk, skb, aoh, &arg, &rep.th, rep.opt))
		return;

#ifdef CONFIG_TCP_MD5SIG
	rcu_read_lock();
	if (sk && sk_fullsock(sk)) {
		const union tcp_md5_addr *addr;
		int l3index;

		/* sdif set, means packet ingressed via a device
		 * in an L3 domain and inet_iif is set to it.
		 */
		l3index = tcp_v4_sdif(skb) ? inet_iif(skb) : 0;
		addr = (union tcp_md5_addr *)&ip_hdr(skb)->saddr;
		key = tcp_md5_do_lookup(sk, l3index, addr, AF_INET);
	} else if (md5_hash_location) {
		const union tcp_md5_addr *addr;
		int sdif = tcp_v4_sdif(skb);
		int dif = inet_iif(skb);
		int l3index;

		/*
		 * active side is lost. Try to find listening socket through
		 * source port, and then find md5 key through listening socket.
		 * we are not loose security here:
		 * Incoming packet is checked with md5 hash with finding key,
		 * no RST generated if md5 hash doesn't match.
		 */
		sk1 = __inet_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
					     NULL, 0, ip_hdr(skb)->saddr,
					     th->source, ip_hdr(skb)->daddr,
					     ntohs(th->source), dif, sdif);
		/* don't send rst if it can't find key */
		if (!sk1)
			goto out;

		/* sdif set, means packet ingressed via a device
		 * in an L3 domain and dif is set to it.
		 */
		l3index = sdif ? dif : 0;
		addr = (union tcp_md5_addr *)&ip_hdr(skb)->saddr;
		key = tcp_md5_do_lookup(sk1, l3index, addr, AF_INET);
		if (!key)
			goto out;


		genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb);
		if (genhash || memcmp(md5_hash_location, newhash, 16) != 0)
			goto out;

	}

	if (key) {
		rep.opt[0] = htonl((TCPOPT_NOP << 24) |
				   (TCPOPT_NOP << 16) |
				   (TCPOPT_MD5SIG << 8) |
				   TCPOLEN_MD5SIG);
		/* Update length and the length the header thinks exists */
		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
		rep.th.doff = arg.iov[0].iov_len / 4;

		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
				     key, ip_hdr(skb)->saddr,
				     ip_hdr(skb)->daddr, &rep.th);
	}
#endif
	/* Can't co-exist with TCPMD5, hence check rep.opt[0] */
	if (rep.opt[0] == 0) {
		__be32 mrst = mptcp_reset_option(skb);

		if (mrst) {
			rep.opt[0] = mrst;
			arg.iov[0].iov_len += sizeof(mrst);
			rep.th.doff = arg.iov[0].iov_len / 4;
		}
	}

	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
				      ip_hdr(skb)->saddr, /* XXX */
				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
	arg.flags = (sk && inet_sk_transparent(sk)) ? IP_REPLY_ARG_NOSRCCHECK : 0;

	/* When socket is gone, all binding information is lost.
	 * routing might fail in this case. No choice here, if we choose to force
	 * input interface, we will misroute in case of asymmetric route.
	 */
	if (sk)
		arg.bound_dev_if = sk->sk_bound_dev_if;

	trace_tcp_send_reset(sk, skb, reason);

	BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
		     offsetof(struct inet_timewait_sock, tw_bound_dev_if));

	arg.tos = ip_hdr(skb)->tos;
	arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
	local_bh_disable();
	ctl_sk = this_cpu_read(ipv4_tcp_sk);
	sock_net_set(ctl_sk, net);
	if (sk) {
		ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
				   inet_twsk(sk)->tw_mark : sk->sk_mark;
		ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ?
				   inet_twsk(sk)->tw_priority : READ_ONCE(sk->sk_priority);
		transmit_time = tcp_transmit_time(sk);
		xfrm_sk_clone_policy(ctl_sk, sk);
		txhash = (sk->sk_state == TCP_TIME_WAIT) ?
			 inet_twsk(sk)->tw_txhash : sk->sk_txhash;
	} else {
		ctl_sk->sk_mark = 0;
		ctl_sk->sk_priority = 0;
	}
	ip_send_unicast_reply(ctl_sk,
			      skb, &TCP_SKB_CB(skb)->header.h4.opt,
			      ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
			      &arg, arg.iov[0].iov_len,
			      transmit_time, txhash);

	xfrm_sk_free_policy(ctl_sk);
	sock_net_set(ctl_sk, &init_net);
	__TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
	__TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
	local_bh_enable();

#ifdef CONFIG_TCP_MD5SIG
out:
	rcu_read_unlock();
#endif
}

RESET有效性检测

The many ways of handling TCP RST packets

rst报文本身的序列号也需要在合法范围内:

An RST is accepted if the sequence number is in the receiver's window (i.e. RCV.NXT =< SEG.SEQ < RCV.NXT+RCV.WND). The effect of the RST is to immediately close the connection. This is slightly different from a FIN, which just says that the other endpoint will no longer be transmitting any new data but can still receive some.

There are two types of event that cause a RST to be emitted. A) the connection is explicitly aborted by the endpoint, e.g. the process holding the socket being killed (just closing the socket normally is not grounds for RST, even if there is still unreceived data). B) the TCP stack receiving certain kinds of invalid packets, e.g. a non-RST packet for a connection that doesn't exist or has already been closed.

The RST packet that should be generated is slightly different for these two cases. For case A the sequence number for the RST packet should be SND.NXT of the connection. For case B the sequence number should be set to the sequence number ACKed by the received packet. In the latter case the ACK bit will not be set in the RST. (Where the distiction matters, I'll call the first one RST-ABORT, the second a RST-REPLY).

那么如何保证RESET包的seq从哪里来呢?因为正确的报文header中的ack_seq表示的是希望收到的序列号,所以直接使用错误报文中的对应字段即可。
如果收到的报文中没有置位ack,那回包的seq为0,但是ack字段有效?(这也意味着不是所有的报文都是有序列号的)。

static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb,
			      enum sk_rst_reason reason)
{
///...
	/* Swap the send and the receive. */
	memset(&rep, 0, sizeof(rep));
	rep.th.dest   = th->source;
	rep.th.source = th->dest;
	rep.th.doff   = sizeof(struct tcphdr) / 4;
	rep.th.rst    = 1;

	if (th->ack) {
		rep.th.seq = th->ack_seq;
	} else {
		rep.th.ack = 1;
		rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
				       skb->len - (th->doff << 2));
	}
///...

RESET效果


/* When we get a reset we do this. */
void tcp_reset(struct sock *sk, struct sk_buff *skb)
{
	trace_tcp_receive_reset(sk);

	/* mptcp can't tell us to ignore reset pkts,
	 * so just ignore the return value of mptcp_incoming_options().
	 */
	if (sk_is_mptcp(sk))
		mptcp_incoming_options(sk, skb);

	/* We want the right error as BSD sees it (and indeed as we do). */
	switch (sk->sk_state) {
	case TCP_SYN_SENT:
		WRITE_ONCE(sk->sk_err, ECONNREFUSED);
		break;
	case TCP_CLOSE_WAIT:
		WRITE_ONCE(sk->sk_err, EPIPE);
		break;
	case TCP_CLOSE:
		return;
	default:
		WRITE_ONCE(sk->sk_err, ECONNRESET);
	}
	/* This barrier is coupled with smp_rmb() in tcp_poll() */
	smp_wmb();

	tcp_write_queue_purge(sk);
	tcp_done(sk);

	if (!sock_flag(sk, SOCK_DEAD))
		sk_error_report(sk);
}

void tcp_done(struct sock *sk)
{
	struct request_sock *req;

	/* We might be called with a new socket, after
	 * inet_csk_prepare_forced_close() has been called
	 * so we can not use lockdep_sock_is_held(sk)
	 */
	req = rcu_dereference_protected(tcp_sk(sk)->fastopen_rsk, 1);

	if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
		TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS);

	tcp_set_state(sk, TCP_CLOSE);
	tcp_clear_xmit_timers(sk);
	if (req)
		reqsk_fastopen_remove(sk, req, false);

	WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);

	if (!sock_flag(sk, SOCK_DEAD))
		sk->sk_state_change(sk);
	else
		inet_csk_destroy_sock(sk);
}

函数中我们比较关心的是其中的

WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);

操作。其中

///@file: linux/include/net/sock.h
 #define SHUTDOWN_MASK   3
  #define RCV_SHUTDOWN    1
  #define SEND_SHUTDOWN   2

当通过一个被RESET的socket发送数据时,由于sk_shutdown置位了SEND_SHUTDOWN,所以返回的错误码是前面设置的-EPIPE。

int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
{
///...
	err = -EPIPE;
	if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
		goto do_error;

一个有意思的细节

reset标志位对listen状态的socket做了保护,也就是不能通过伪造RESET报文来关闭(服务器)的侦听socket。

enum skb_drop_reason
tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
{
///...
	case TCP_LISTEN:
		if (th->ack)
			return SKB_DROP_REASON_TCP_FLAGS;

		if (th->rst) {
			SKB_DR_SET(reason, TCP_RESET);
			goto discard;
		}
///...
}

为什么tcp使用RESET而udp使用TCMP

一个很好的补充)

According to the RFC 793 Reset Generation rules:

As a general rule, reset (RST) must be sent whenever a segment arrives
which apparently is not intended for the current connection. A reset
must not be sent if it is not clear that this is the case.

There are three groups of states:

  1. If the connection does not exist (CLOSED) then a reset is sent
    in response to any incoming segment except another reset. In
    particular, SYNs addressed to a non-existent connection are rejected
    by this means.
    Since the port is closed (not listening or communicating) there is no connections and because of that TCP is supposed to reply with a RST package.

RFC 768 for UDP does not specify any action on a closed port but the ICMP RFC 792 specifies a message Type 3 Code 3, Destination Unreachable: Destination port unreachable that may be sent.

However, ports only actually do this if they are unfiltered. Filtered connections do not reply at all and simply drop the packet. Filtering is usually done by any firewall worthy of the name since it makes attackers jobs harder by providing less information.

但是依然没有说明两种协议使用不用规则的方法:例如,为什么tcp不使用ICMP?或许就是因为TCP需要更严谨的可靠?

outro

即使大家非常熟悉的TCP协议,其中可能也存在一些"dark corner"。

posted on 2024-12-21 17:27  tsecer  阅读(3)  评论(0编辑  收藏  举报

导航