IP_PKTINFO选项

 UDP 在多网卡的情况下,可能会发生服务器端回复报文源地址不对的情况。目前这种情况在portal  对接ac的时候经常出现

所以问题就是:需要保存本次udp 请求的local ip remote ip

IP_PKTINFO 这个选项就是让内核在 socket 中保存 IP 报文的信息,当然也包括了报文的源地址和目的地址。IP_PKTINFO 和 msghdr 的关系可以在这个 stackoverflow 中找到:

https://stackoverflow.com/questions/3062205/setting-the-source-ip-for-a-udp-socket。

 The code in the link uses IP_PKTINFO (or IP_RECVDSTADDR depending on the platform) to get the destination address of a UDP message from the ancillary cmsg(3) data. Paraphrased here:


struct msghdr msg;
struct cmsghdr *cmsg;
struct in_addr addr;
// after recvmsg(sd, &msg, flags);
for(cmsg = CMSG_FIRSTHDR(&msg);
    cmsg != NULL;
    cmsg = CMSG_NXTHDR(&msg, cmsg)) {
  if (cmsg->cmsg_level == IPPROTO_IP && cmsg->cmsg_type == IP_PKTINFO) {
    addr = ((struct in_pktinfo*)CMSG_DATA(cmsg))->ipi_addr;
    printf("message received on address %s\n", inet_ntoa(addr));
  }
}

Gene, your question asked how to set the source address on outgoing packets. With IP_PKTINFO it is possible to set the ipi_spec_dst field of the struct in_pktinfo in the ancillary data passed to sendmsg(2). See the post referenced above, cmsg(3), and sendmsg(2) for guidelines on how to create and manipulate the ancillary data in a struct msghdr. An example (no guarantee here) might be: 

struct msghdr msg;
struct cmsghdr *cmsg;
struct in_pktinfo *pktinfo;
// after initializing msghdr & control data to CMSG_SPACE(sizeof(struct in_pktinfo))
cmsg = CMSG_FIRSTHDR(&msg);
cmsg->cmsg_level = IPPROTO_IP;
cmsg->cmsg_type = IP_PKTINFO;
cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
pktinfo = (struct in_pktinfo*) CMSG_DATA(cmsg);
pktinfo->ipi_ifindex = src_interface_index;
pktinfo->ipi_spec_dst = src_addr;
// bytes_sent = sendmsg(sd, &msg, flags);

  结构体in_pktinfo如下,其中ipi_ifindex表示接收报文的接口索引;成员ipi_spec_dst表示报文的本地地址;而ipi_addr表示报文头部的目的地址。在使用sendmsg结构发送报文时,如果ipi_spec_dst字段不为零,在内核中查找路由时,将使用此指定地址为源地址进行查询。再者,如果ipi_ifindex不为零,在查找路由时,此索引所对应的接口的索引作为查找路由时的出接口索引。

struct in_pktinfo {
        unsigned int   ipi_ifindex;  /* Interface index */
        struct in_addr ipi_spec_dst; /* Local address  是UDP包中路由目的地址(the destination in_addr)*/  
        struct in_addr ipi_addr;     /* Header Destination
                                        address是UDP包中的头标识目的地址(the receiving interface in_addr) */
    };

 

IP_PKTINFO信息

如下的UDP协议接收函数udp_recvmsg,如果inet_sock套接口cmsg_flags标志不为空,使用函数ip_cmsg_recv_offset添加控制信息。

ip_cmsg_recv_offset--->
    if (flags & IP_CMSG_PKTINFO) {
		ip_cmsg_recv_pktinfo(msg, skb);

static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
{
	struct in_pktinfo info = *PKTINFO_SKB_CB(skb);

	info.ipi_addr.s_addr = ip_hdr(skb)->daddr;

	put_cmsg(msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);
}

  ipi_addr中保存了报文IP头部的目的地址。结构体in_pktinfo中的其它两个字段保存在skb的回调cb字段,参见宏PKTINFO_SKB_CB。

其余的几个字段在ipv4_pktinfo_prepare 中填充

/**
 * ipv4_pktinfo_prepare - transfer some info from rtable to skb
 * @sk: socket
 * @skb: buffer
 *
 * To support IP_CMSG_PKTINFO option, we store rt_iif and specific
 * destination in skb->cb[] before dst drop.
 * This way, receiver doesn't make cache line misses to read rtable.
 */
void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb)
{
	struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb);
	bool prepare = (inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO) ||
		       ipv6_sk_rxinfo(sk);

	if (prepare && skb_rtable(skb)) {
		/* skb->cb is overloaded: prior to this point it is IP{6}CB
		 * which has interface index (iif) as the first member of the
		 * underlying inet{6}_skb_parm struct. This code then overlays
		 * PKTINFO_SKB_CB and in_pktinfo also has iif as the first
		 * element so the iif is picked up from the prior IPCB. If iif
		 * is the loopback interface, then return the sending interface
		 * (e.g., process binds socket to eth0 for Tx which is
		 * redirected to loopback in the rtable/dst).
		 */
		struct rtable *rt = skb_rtable(skb);
		bool l3slave = ipv4_l3mdev_skb(IPCB(skb)->flags);//indicating a Layer 3 slave device

		if (pktinfo->ipi_ifindex == LOOPBACK_IFINDEX)
			pktinfo->ipi_ifindex = inet_iif(skb);
        //ipi_ifindex索引等于环回接口LOOPBACK_IFINDEX的情况,如果路由缓存中的rt_iif有值,inet_iif函数返回此值作为原始发送接口索引,
        //否者,使用skb接口中的skb_iif值,此值在接收路径的函数__netif_receive_skb_core中赋值。
		else if (l3slave && rt && rt->rt_iif)
			pktinfo->ipi_ifindex = rt->rt_iif;

		pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb);
	} else {
		pktinfo->ipi_ifindex = 0;
		pktinfo->ipi_spec_dst.s_addr = 0;
	}
	skb_dst_drop(skb);
}

/*
ipi_ifindex索引等于环回接口LOOPBACK_IFINDEX的情况,如果路由缓存中的rt_iif有值,inet_iif函数返回此值作为原始发送接口索引,
否者,使用skb接口中的skb_iif值,此值在接收路径的函数__netif_receive_skb_core中赋值。
*/
static inline int inet_iif(const struct sk_buff *skb)
{
	struct rtable *rt = skb_rtable(skb);
	if (rt && rt->rt_iif)
		return rt->rt_iif;
	return skb->skb_iif;
}
__be32 fib_compute_spec_dst(struct sk_buff *skb)
{
	struct net_device *dev = skb->dev;
	struct in_device *in_dev;
	struct fib_result res;
	struct rtable *rt;
	struct net *net;
	int scope;
/*对于路由缓存中仅有RTCF_LOCAL本地标志,而没有广播和多播标志的情况,返回报文IP头部中的目的地址,
可见此时in_pktinfo结构中的ipi_spec_dst就和ipi_addr成员的值相同
	*/
	rt = skb_rtable(skb);
	if ((rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST | RTCF_LOCAL)) ==
	    RTCF_LOCAL)
		return ip_hdr(skb)->daddr;

/*
否则,如果路由缓存显示为广播或者多播属性,并且如果报文的源IP地址不是零网地址,使用此地址作为目的地址,执行路由查找,
返回查找到的路由中的源地址(FIB_RES_PREFSRC)。其它情况下,使用函数inet_select_addr,也是以报文IP头中的源地址作为目的地址,查找相符的源地址。
*/
	in_dev = __in_dev_get_rcu(dev);

	net = dev_net(dev);

	scope = RT_SCOPE_UNIVERSE;
	if (!ipv4_is_zeronet(ip_hdr(skb)->saddr)) {
		bool vmark = in_dev && IN_DEV_SRC_VMARK(in_dev);
		struct flowi4 fl4 = {
			.flowi4_iif = LOOPBACK_IFINDEX,
			.flowi4_oif = l3mdev_master_ifindex_rcu(dev),
			.daddr = ip_hdr(skb)->saddr,
			.flowi4_tos = RT_TOS(ip_hdr(skb)->tos),
			.flowi4_scope = scope,
			.flowi4_mark = vmark ? skb->mark : 0,
		};
		if (!fib_lookup(net, &fl4, &res, 0))
			return fib_result_prefsrc(net, &res);
	} else {
		scope = RT_SCOPE_LINK;
	}

	return inet_select_addr(dev, ip_hdr(skb)->saddr, scope);
}

 

UDP_SEND IP_PKTINFO控制信息

如下以UDP协议为例,在函数udp_sendmsg中,如果控制消息的长度不为零,首先处理UDP层的控制消息,如果其返回值大于零,表明需要进行IP层控制消息的处理,调用处理函数ip_cmsg_send。

在函数ip_cmsg_send中,将in_pktinfo结构中的成员ipi_ifindex和ipi_spec_dst赋值给ipcm_cookie结构的成员ofi和addr变量。

case IP_PKTINFO:
		{
			struct in_pktinfo *info;
			if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct in_pktinfo)))
				return -EINVAL;
			info = (struct in_pktinfo *)CMSG_DATA(cmsg);
			if (info->ipi_ifindex)
				ipc->oif = info->ipi_ifindex;
			ipc->addr = info->ipi_spec_dst.s_addr;
			break;
		}

继续看函数udp_sendmsg的后续处理,对于flowi4结构类型变量fl4,其成员flowi4_oif和saddr,赋予了ipc中的oif和addr值,作为查询路由时使用的出接口和源IP地址。

saddr = ipc.addr;
if (!rt) {
		struct net *net = sock_net(sk);
		__u8 flow_flags = inet_sk_flowi_flags(sk);

		fl4 = &fl4_stack;

		flowi4_init_output(fl4, ipc.oif, ipc.sockc.mark, tos,
				   RT_SCOPE_UNIVERSE, sk->sk_protocol,
				   flow_flags,
				   faddr, saddr, dport, inet->inet_sport,
				   sk->sk_uid);

		security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
		rt = ip_route_output_flow(net, fl4, sk);
		if (IS_ERR(rt)) {
			err = PTR_ERR(rt);
			rt = NULL;
			if (err == -ENETUNREACH)
				IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
			goto out;
		}

		err = -EACCES;
		if ((rt->rt_flags & RTCF_BROADCAST) &&
		    !sock_flag(sk, SOCK_BROADCAST))
			goto out;
		if (connected)
			sk_dst_set(sk, dst_clone(&rt->dst));
	}

 

posted @ 2024-01-17 15:17  codestacklinuxer  阅读(177)  评论(0编辑  收藏  举报