Linux rndis 速度低及规避方案(目前仅部分厂商存在问题)

关键字

rndis_host, linux, kernel, modem

综述

rndis 是微软定义的一套通讯方案。类似的协议还有 qmi/mbim/ecm/ncm。
rndis 协议足够简单,可靠。所以最近在使用一款 quectel 公司模块时采用的就是 rndis 模式。在linux 下 对应驱动是 rndis_host 驱动。windows 10下自带rndis 驱动!
拿到模块首先测速度! 发现模块下行速度 Windows 上速度比 Linux 高很多,而且上行速度则差不多! 单独对比 Linux,发现上行又比下行高很多。。。问题很奇怪!

分析

分析上行发包逻辑:
Linux rndis_host 发包函数代码


struct sk_buff *
rndis_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags)
{
	struct rndis_data_hdr *hdr;
	struct sk_buff *skb2;
	unsigned len = skb->len;

	// hexdump(">> ", skb->data, 14);
	if (likely(!skb_cloned(skb)))
	{
		int room = skb_headroom(skb);

		/* enough head room as-is? */
		if (unlikely((sizeof *hdr) <= room))
			goto fill;

		/* enough room, but needs to be readjusted? */
		room += skb_tailroom(skb);
		if (likely((sizeof *hdr) <= room))
		{
			skb->data = memmove(skb->head + sizeof *hdr,
								skb->data, len);
			skb_set_tail_pointer(skb, len);
			goto fill;
		}
	}

	/* create a new skb, with the correct size (and tailpad) */
	skb2 = skb_copy_expand(skb, sizeof *hdr, 1, flags);
	dev_kfree_skb_any(skb);
	if (unlikely(!skb2))
		return skb2;
	skb = skb2;

	/* fill out the RNDIS header.  we won't bother trying to batch
	 * packets; Linux minimizes wasted bandwidth through tx queues.
	 */
fill:
	hdr = __skb_push(skb, sizeof *hdr);
	memset(hdr, 0, sizeof *hdr);
	hdr->msg_type = cpu_to_le32(RNDIS_MSG_PACKET);
	hdr->msg_len = cpu_to_le32(skb->len);
	hdr->data_offset = cpu_to_le32(sizeof(*hdr) - 8);
	hdr->data_len = cpu_to_le32(len);

	/* FIXME make the last packet always be short ... */
	return skb;
}
EXPORT_SYMBOL_GPL(rndis_tx_fixup);

上述函数很短,可以看到发包函数就是把上层传过来的数据包加上 rndis 协议报文头发出去,并没有别的处理! 需要注意的是,rndis 是支持报文聚合的!!!意思就是调用一次USB BULK OUT可以发送/接收多个IP报文!
所以可以看出,即使在上行未发生聚合的情况下,下行还比上行低,再结合Windows 下下行速度比较高那么问题就很明显了,一定是驱动收包有问题!

分析下行收包逻辑:

/*
 * DATA -- host must not write zlps
 */
int rndis_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
{
	int tm = 0;
	/* This check is no longer done by usbnet */
	if (skb->len < dev->net->hard_header_len)
		return 0;

	/* peripheral may have batched packets to us... */
	while (likely(skb->len)) {
		struct rndis_data_hdr	*hdr = (void *)skb->data;
		struct sk_buff		*skb2;
		u32			msg_type, msg_len, data_offset, data_len;

		msg_type = le32_to_cpu(hdr->msg_type);
		msg_len = le32_to_cpu(hdr->msg_len);
		data_offset = le32_to_cpu(hdr->data_offset);
		data_len = le32_to_cpu(hdr->data_len);

		/* don't choke if we see oob, per-packet data, etc */
		if (unlikely(msg_type != RNDIS_MSG_PACKET || skb->len < msg_len
				|| (data_offset + data_len + 8) > msg_len)) {
			dev->net->stats.rx_frame_errors++;
			netdev_dbg(dev->net, "bad rndis message %d/%d/%d/%d, len %d\n",
				   le32_to_cpu(hdr->msg_type),
				   msg_len, data_offset, data_len, skb->len);
			return 0;
		}
		skb_pull(skb, 8 + data_offset);

		/* at most one packet left? */
		if (likely((data_len - skb->len) <= sizeof *hdr)) {
			skb_trim(skb, data_len);
			break;
		}

		/* try to return all the packets in the batch */
		skb2 = skb_clone(skb, GFP_ATOMIC);
		if (unlikely(!skb2))
			break;
		skb_pull(skb, msg_len - sizeof *hdr);
		skb_trim(skb2, data_len);
		usbnet_skb_return(dev, skb2);
	}

	/* caller will usbnet_skb_return the remaining packet */
	return 1;
}
EXPORT_SYMBOL_GPL(rndis_rx_fixup);

收包代码稍微复杂点,因为收包需要考虑到聚合报文的情况!因此起了一个while循环判断。while 里面就是剥离rndis 报文头,并调用网卡收包函数的过程!
这里对skb 有两次偏移操作:

  1. skb_pull(skb, 8 + data_offset); 这一步从skb 去除当前消息的 rndis 报文头!
  2. skb_pull(skb, msg_len - sizeof *hdr); 因为skb payload 部分已经在skb2 有了一份clone,那么skb 当前的payload 就不重要了。因此,这里实际要做的是继续从skb剥离当前rndis 报文的数据部分(报文头已经剥离掉了)。这一步操作后,skb 将指向下一个rndis 报文的 rndis 报文头!
    但是这里第2步逻辑错了,这里直接减去 rndis 报文头是错的! 因为rndis 报文的payload 之前并不一定全是协议头,payload 的偏移是头部offset 定义的。

解决方案

方案很简单,修改偏移计算逻辑!

/*
 * DATA -- host must not write zlps
 */
int rndis_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
{
	int tm = 0;
	/* This check is no longer done by usbnet */
	if (skb->len < dev->net->hard_header_len)
		return 0;

	/* peripheral may have batched packets to us... */
	while (likely(skb->len)) {
		struct rndis_data_hdr	*hdr = (void *)skb->data;
		struct sk_buff		*skb2;
		u32			msg_type, msg_len, data_offset, data_len;

		msg_type = le32_to_cpu(hdr->msg_type);
		msg_len = le32_to_cpu(hdr->msg_len);
		data_offset = le32_to_cpu(hdr->data_offset);
		data_len = le32_to_cpu(hdr->data_len);

		/* don't choke if we see oob, per-packet data, etc */
		if (unlikely(msg_type != RNDIS_MSG_PACKET || skb->len < msg_len
				|| (data_offset + data_len + 8) > msg_len)) {
			dev->net->stats.rx_frame_errors++;
			netdev_dbg(dev->net, "bad rndis message %d/%d/%d/%d, len %d\n",
				   le32_to_cpu(hdr->msg_type),
				   msg_len, data_offset, data_len, skb->len);
			return 0;
		}
		skb_pull(skb, 8 + data_offset);

		/* at most one packet left? */
		if (likely((data_len - skb->len) <= sizeof *hdr)) {
			skb_trim(skb, data_len);
			break;
		}

		/* try to return all the packets in the batch */
		skb2 = skb_clone(skb, GFP_ATOMIC);
		if (unlikely(!skb2))
			break;
		skb_pull(skb, msg_len - data_offset - 8); // here is what I fixed
		skb_trim(skb2, data_len);
		usbnet_skb_return(dev, skb2);
	}

	/* caller will usbnet_skb_return the remaining packet */
	return 1;
}
EXPORT_SYMBOL_GPL(rndis_rx_fixup);

注:这里的修改仅是规避方案,根本原因还是模块侧封包逻辑的问题

posted @ 2020-07-31 18:06  sinpo828  阅读(4488)  评论(4编辑  收藏  举报