DPDK-22.11.2 [四] 官方basicfwd编译运行讲解

步骤概览

  • 编译安装dpdk
  • 绑定网卡驱动vfio,必须有两个网口
  • 配置LD_LIBRARY_PATH和PKG_CONFIG_PATH
  • 编译运行

源码

basicfwd.c

这个程序是从一个网口获取数据,然后发送到另一个网口,所以收发消息的代码都有了,可以根据这个程序编写自己的dpdk应用。

/* SPDX-License-Identifier: BSD-3-Clause
 * Copyright(c) 2010-2015 Intel Corporation
 */

#include <stdint.h>
#include <stdlib.h>
#include <inttypes.h>
#include <rte_eal.h>
#include <rte_ethdev.h>
#include <rte_cycles.h>
#include <rte_lcore.h>
#include <rte_mbuf.h>

#define RX_RING_SIZE 1024
#define TX_RING_SIZE 1024

#define NUM_MBUFS 8191
#define MBUF_CACHE_SIZE 250
#define BURST_SIZE 32

/* basicfwd.c: Basic DPDK skeleton forwarding example. */

/*
 * Initializes a given port using global settings and with the RX buffers
 * coming from the mbuf_pool passed as a parameter.
 */

/* Main functional part of port initialization. 8< */
static inline int
port_init(uint16_t port, struct rte_mempool *mbuf_pool)
{
	struct rte_eth_conf port_conf;
	const uint16_t rx_rings = 1, tx_rings = 1;
	uint16_t nb_rxd = RX_RING_SIZE;
	uint16_t nb_txd = TX_RING_SIZE;
	int retval;
	uint16_t q;
	struct rte_eth_dev_info dev_info;
	struct rte_eth_txconf txconf;

    // 判断网口是否合法
	if (!rte_eth_dev_is_valid_port(port))
		return -1;

	memset(&port_conf, 0, sizeof(struct rte_eth_conf));

    // 获取网口配置信息
	retval = rte_eth_dev_info_get(port, &dev_info);
	if (retval != 0) {
		printf("Error during getting device (port %u) info: %s\n",
				port, strerror(-retval));
		return retval;
	}

    // 测试网卡是否支持RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE,如果支持,就设置
    // 大体作用就是快速释放mbuf,不需要等待或者锁
	if (dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE)
		port_conf.txmode.offloads |=
			RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE;

	/* Configure the Ethernet device. */
    // 配置网口,其中设置了接收队列个数rx_rings和发送队列个数tx_rings
	retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf);
	if (retval != 0)
		return retval;
    // 设置收发队列的descriptors,如果超过了设备的最大限制,就设定到设备的最大限制。比如nb_rxd等于RX_RING_SIZE(1024),如果设备只有512,就设定为512,并把nb_rxd修改为512。
	retval = rte_eth_dev_adjust_nb_rx_tx_desc(port, &nb_rxd, &nb_txd);
	if (retval != 0)
		return retval;

    // 初始化接收队列
	/* Allocate and set up 1 RX queue per Ethernet port. */
	for (q = 0; q < rx_rings; q++) {
		retval = rte_eth_rx_queue_setup(port, q, nb_rxd,
				rte_eth_dev_socket_id(port), NULL, mbuf_pool);
		if (retval < 0)
			return retval;
	}

	txconf = dev_info.default_txconf;
	txconf.offloads = port_conf.txmode.offloads;
	/* Allocate and set up 1 TX queue per Ethernet port. */
    // 初始化发送队列。可以看到与接收队列不同的是,没有内存池。因为发送数据,直接给网卡发送就可以了,不需要内存池。接收数据保存接收的数据给程序使用,所以需要内存池
	for (q = 0; q < tx_rings; q++) {
		retval = rte_eth_tx_queue_setup(port, q, nb_txd,
				rte_eth_dev_socket_id(port), &txconf);
		if (retval < 0)
			return retval;
	}

	/* Starting Ethernet port. 8< */
	retval = rte_eth_dev_start(port);
	/* >8 End of starting of ethernet port. */
	if (retval < 0)
		return retval;

	/* Display the port MAC address. */
	struct rte_ether_addr addr;
	retval = rte_eth_macaddr_get(port, &addr);
	if (retval != 0)
		return retval;

	printf("Port %u MAC: %02" PRIx8 " %02" PRIx8 " %02" PRIx8
			   " %02" PRIx8 " %02" PRIx8 " %02" PRIx8 "\n",
			port, RTE_ETHER_ADDR_BYTES(&addr));

	/* Enable RX in promiscuous mode for the Ethernet device. */
	retval = rte_eth_promiscuous_enable(port);
	/* End of setting RX port in promiscuous mode. */
	if (retval != 0)
		return retval;

	return 0;
}
/* >8 End of main functional part of port initialization. */

/*
 * The lcore main. This is the main thread that does the work, reading from
 * an input port and writing to an output port.
 */

 /* Basic forwarding application lcore. 8< */
static __rte_noreturn void
lcore_main(void)
{
	uint16_t port;

	/*
	 * Check that the port is on the same NUMA node as the polling thread
	 * for best performance.
	 */
	RTE_ETH_FOREACH_DEV(port) //遍历所有的可用网口,并把数值传递给port
        // 如果网口的socket id与程序运行的socket id不匹配,表示在不同的numa结点上,提示有性能影响。
		if (rte_eth_dev_socket_id(port) >= 0 &&
				rte_eth_dev_socket_id(port) !=
						(int)rte_socket_id())
			printf("WARNING, port %u is on remote NUMA node to "
					"polling thread.\n\tPerformance will "
					"not be optimal.\n", port);

	printf("\nCore %u forwarding packets. [Ctrl+C to quit]\n",
			rte_lcore_id());

	/* Main work of application loop. 8< */
	for (;;) {
		/*
		 * Receive packets on a port and forward them on the paired
		 * port. The mapping is 0 -> 1, 1 -> 0, 2 -> 3, 3 -> 2, etc.
		 */
		RTE_ETH_FOREACH_DEV(port) {

			/* Get burst of RX packets, from first port of pair. */
			struct rte_mbuf *bufs[BURST_SIZE];
            //从网口接收数据
			const uint16_t nb_rx = rte_eth_rx_burst(port, 0,
					bufs, BURST_SIZE);
            // 如果收到0个,则重新接收
			if (unlikely(nb_rx == 0))
				continue;

			/* Send burst of TX packets, to second port of pair. */
            // 把数据发送到另一个网口
			const uint16_t nb_tx = rte_eth_tx_burst(port ^ 1, 0,
					bufs, nb_rx);

			/* Free any unsent packets. */
            // 如果发送的数据比接收的少,那么把剩下的数据释放掉
            // 发送过的数据之所以不用释放,是因为rte_eth_tx_burst没发送一个数据,都会把其释放掉
            // 如果你的程序只有接收,那么当接收的数据使用完成后,也需要手动释放。
			if (unlikely(nb_tx < nb_rx)) {
				uint16_t buf;
				for (buf = nb_tx; buf < nb_rx; buf++)
                    // 释放空间
					rte_pktmbuf_free(bufs[buf]);
			}
		}
	}
	/* >8 End of loop. */
}
/* >8 End Basic forwarding application lcore. */

/*
 * The main function, which does initialization and calls the per-lcore
 * functions.
 */
int
main(int argc, char *argv[])
{
	struct rte_mempool *mbuf_pool;
	unsigned nb_ports;
	uint16_t portid;

	/* Initializion the Environment Abstraction Layer (EAL). 8< */
	int ret = rte_eal_init(argc, argv);
	if (ret < 0)
		rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
	/* >8 End of initialization the Environment Abstraction Layer (EAL). */

	argc -= ret;
	argv += ret;

	/* Check that there is an even number of ports to send/receive on. */
    // 获取当前可用的网口数
	nb_ports = rte_eth_dev_count_avail();
	if (nb_ports < 2 || (nb_ports & 1))
		rte_exit(EXIT_FAILURE, "Error: number of ports must be even\n");

	/* Creates a new mempool in memory to hold the mbufs. */

	/* Allocates mempool to hold the mbufs. 8< */
	mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", NUM_MBUFS * nb_ports,
		MBUF_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
	/* >8 End of allocating mempool to hold mbuf. */

	if (mbuf_pool == NULL)
		rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");

	/* Initializing all ports. 8< */
	RTE_ETH_FOREACH_DEV(portid)
		if (port_init(portid, mbuf_pool) != 0)
			rte_exit(EXIT_FAILURE, "Cannot init port %"PRIu16 "\n",
					portid);
	/* >8 End of initializing all ports. */

	if (rte_lcore_count() > 1)
		printf("\nWARNING: Too many lcores enabled. Only 1 used.\n");

	/* Call lcore_main on the main core only. Called on single lcore. 8< */
	lcore_main();
	/* >8 End of called on single lcore. */

	/* clean up the EAL */
	rte_eal_cleanup();

	return 0;
}

重要api解析

rte_eal_init

rte_eal_init()
int rte_eal_init 	( 	int  	argc,
		char **  	argv 
	) 	

Initialize the Environment Abstraction Layer (EAL).

This function is to be executed on the MAIN lcore only, as soon as possible in the application's main() function. It puts the WORKER lcores in the WAIT state.

初始化程序,需要在主线程调用,尽可能优先调用。

argcargv与c语言main函数的参数一致,argc表示参数个数,argv是参数列表。

支持的参数

官方文档给定了rte_eal_init的参数说明,比如常见的:

-l <core list>

List of cores to run on

The argument format is <c1>[-c2][,c3[-c4],...] where c1, c2, etc are core indexes between 0 and 128.

具体可以参考官方资料 http://doc.dpdk.org/guides-22.11/linux_gsg/linux_eal_parameters.html

返回值表示rte_eal_init使用了几个参数,后续通过

	argc -= ret;
	argv += ret;

跳过使用过的参数,然后解析自己程序需要的参数。

所以dpdk中的示例代码大部分都有这套逻辑,并且官方有说明,rte_eal_init需要的参数要放在前面,后面跟--,然后是自己的参数。rte_eal_init解析到--就会结束,返回解析了多少参数,然后跳过对应的参数,再解析自己的参数。

l2fwd

http://doc.dpdk.org/guides-22.11/sample_app_ug/l2_forward_real_virtual.html
我们可以找一个示例确认一下,比如l2fwd,代码中有相同逻辑

	/* Init EAL. 8< */
	ret = rte_eal_init(argc, argv);
	if (ret < 0)
		rte_exit(EXIT_FAILURE, "Invalid EAL arguments\n");
	argc -= ret;
	argv += ret;

	force_quit = false;
	signal(SIGINT, signal_handler);
	signal(SIGTERM, signal_handler);

	/* parse application arguments (after the EAL ones) */
	ret = l2fwd_parse_args(argc, argv);

官方文档也介绍了用法:

./<build_dir>/examples/dpdk-l2fwd [EAL options] -- -p PORTMASK
                               [-P]
                               [-q NQ]
                               --[no-]mac-updating
                               [--portmap="(port, port)[,(port, port)]"]

前面是EAL options,中间使用--分割开,后面是l2fwd自己的参数,由l2fwd_parse_args自己进行解析。

rte_pktmbuf_pool_create

创建内存池

	/* Creates a new mempool in memory to hold the mbufs. */

	/* Allocates mempool to hold the mbufs. 8< */
	mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", NUM_MBUFS * nb_ports,
		MBUF_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
	/* >8 End of allocating mempool to hold mbuf. */
rte_pktmbuf_pool_create()
struct rte_mempool* rte_pktmbuf_pool_create 	( 	const char *  	name,
		unsigned  	n,
		unsigned  	cache_size,
		uint16_t  	priv_size,
		uint16_t  	data_room_size,
		int  	socket_id 
	) 		

Create a mbuf pool.

This function creates and initializes a packet mbuf pool. It is a wrapper to rte_mempool functions.

Parameters
    name	The name of the mbuf pool. 内存池的名称
    n	The number of elements in the mbuf pool. The optimum size (in terms of memory usage) for a mempool is when n is a power of two minus one: n = (2^q - 1). 内存池元素的个数,最好是2的q次方减1。代码中NUM_MBUFS定义的就是2^13-1,然后乘以可用的网口数,也就是为每一个网口申请一个包含NUM_MBUFS个元素的内存池。
    cache_size	Size of the per-core object cache. See rte_mempool_create() for details. cache大小,具体如何设置,可以参考rte_mempool_create的介绍。代码中设定的是MBUF_CACHE_SIZE,大小是250。
    priv_size	Size of application private are between the rte_mbuf structure and the data buffer. This value must be aligned to RTE_MBUF_PRIV_ALIGN. 私有程序的大小,必须使用RTE_MBUF_PRIV_ALIGN对齐。代码中设置的是0。
    data_room_size	Size of data buffer in each mbuf, including RTE_PKTMBUF_HEADROOM. 每一个mbuf的大小。代码中设置的是RTE_MBUF_DEFAULT_BUF_SIZE
    socket_id	The socket identifier where the memory should be allocated. The value can be SOCKET_ID_ANY if there is no NUMA constraint for the reserved zone. 内存应该在那个socket上申请,这里就是前面知识介绍的CPU的socket,也可以说在那个numa的node上申请。代码中设置的是rte_socket_id(),获取当前程序运行的socket id。建议程序运行,和网卡和内存都在同一个socket上。前面文章也有介绍过。

rte_lcore_count

获取当前程序运行在几个cpu core上,当前程序需要1个,有的可能需要多个。

	if (rte_lcore_count() > 1)
		printf("\nWARNING: Too many lcores enabled. Only 1 used.\n");

test_order_common

比如这个测试case,就需要3个,一个用作生产数据,一个用作处理数据,一个是主线程运行。

	/* 1 producer + N workers + main */
	if (rte_lcore_count() < 3) {
		evt_err("test need minimum 3 lcores");
		return -1;
	}

rte_eth_rx_queue_setup

申请并设置接收队列

rte_eth_rx_queue_setup()
int rte_eth_rx_queue_setup 	( 	uint16_t  	port_id,
		uint16_t  	rx_queue_id,
		uint16_t  	nb_rx_desc,
		unsigned int  	socket_id,
		const struct rte_eth_rxconf *  	rx_conf,
		struct rte_mempool *  	mb_pool 
	) 		

Allocate and set up a receive queue for an Ethernet device.

The function allocates a contiguous block of memory for nb_rx_desc receive descriptors from a memory zone associated with socket_id and initializes each receive descriptor with a network buffer allocated from the memory pool mb_pool.

Parameters
    port_id	The port identifier of the Ethernet device. 网口id
    rx_queue_id	The index of the receive queue to set up. The value must be in the range [0, nb_rx_queue - 1] previously supplied to rte_eth_dev_configure(). 队列id
    nb_rx_desc	The number of receive descriptors to allocate for the receive ring. 接收ring的descriptors数量
    socket_id	The socket_id argument is the socket identifier in case of NUMA. The value can be SOCKET_ID_ANY if there is no NUMA constraint for the DMA memory allocated for the receive descriptors of the ring.
    rx_conf	The pointer to the configuration data to be used for the receive queue. NULL value is allowed, in which case default Rx configuration will be used. The rx_conf structure contains an rx_thresh structure with the values of the Prefetch, Host, and Write-Back threshold registers of the receive ring. In addition it contains the hardware offloads features to activate using the RTE_ETH_RX_OFFLOAD_* flags. If an offloading set in rx_conf->offloads hasn't been set in the input argument eth_conf->rxmode.offloads to rte_eth_dev_configure(), it is a new added offloading, it must be per-queue type and it is enabled for the queue. No need to repeat any bit in rx_conf->offloads which has already been enabled in rte_eth_dev_configure() at port level. An offloading enabled at port level can't be disabled at queue level. The configuration structure also contains the pointer to the array of the receiving buffer segment descriptions, see rx_seg and rx_nseg fields, this extended configuration might be used by split offloads like RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT. If mb_pool is not NULL, the extended configuration fields must be set to NULL and zero.
    mb_pool	The pointer to the memory pool from which to allocate rte_mbuf network memory buffers to populate each descriptor of the receive ring. There are two options to provide Rx buffer configuration:

        single pool: mb_pool is not NULL, rx_conf.rx_nseg is 0.
        multiple segments description: mb_pool is NULL, rx_conf.rx_seg is not NULL, rx_conf.rx_nseg is not 0. Taken only if flag RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT is set in offloads.

http://doc.dpdk.org/api-22.11/index.html
http://doc.dpdk.org/guides-22.11/index.html

posted @ 2023-08-23 11:05  秋来叶黄  阅读(336)  评论(0编辑  收藏  举报