* [RFC 2/4] test: add test for af_packet
2026-01-28 17:30 [RFC 0/4] net/af_packet: cleanups and optimizations Stephen Hemminger
2026-01-28 17:30 ` [RFC 1/4] net/af_packet: remove volatile from statistics Stephen Hemminger
@ 2026-01-28 17:30 ` Stephen Hemminger
2026-01-28 20:36 ` Scott Mitchell
2026-01-28 17:30 ` [RFC 3/4] net/af_packet: fix indentation Stephen Hemminger
2026-01-28 17:30 ` [RFC 4/4] net/af_packet: add VPP-style prefetching to receive path Stephen Hemminger
3 siblings, 1 reply; 18+ messages in thread
From: Stephen Hemminger @ 2026-01-28 17:30 UTC (permalink / raw)
To: dev; +Cc: Stephen Hemminger
Add a test for AF_PACKET PMD.
This test was generated with Claude AI based off of existing
test_pmd_ring.c with some cleanup afterwards.
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
app/test/meson.build | 1 +
app/test/test_pmd_af_packet.c | 875 ++++++++++++++++++++++++++++++++++
2 files changed, 876 insertions(+)
create mode 100644 app/test/test_pmd_af_packet.c
diff --git a/app/test/meson.build b/app/test/meson.build
index f4d04a6e42..9fb7067b4b 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -141,6 +141,7 @@ source_file_deps = {
'test_per_lcore.c': [],
'test_pflock.c': [],
'test_pie.c': ['sched'],
+ 'test_pmd_af_packet.c': ['net_af_packet', 'ethdev', 'bus_vdev'],
'test_pmd_perf.c': ['ethdev', 'net'] + packet_burst_generator_deps,
'test_pmd_ring.c': ['net_ring', 'ethdev', 'bus_vdev'],
'test_pmd_ring_perf.c': ['ethdev', 'net_ring', 'bus_vdev'],
diff --git a/app/test/test_pmd_af_packet.c b/app/test/test_pmd_af_packet.c
new file mode 100644
index 0000000000..e6426adf0c
--- /dev/null
+++ b/app/test/test_pmd_af_packet.c
@@ -0,0 +1,875 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2026 Stephen Hemminger
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <linux/if.h>
+#include <linux/if_tun.h>
+#include <linux/sockios.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <unistd.h>
+
+#include <rte_bus_vdev.h>
+#include <rte_config.h>
+#include <rte_cycles.h>
+#include <rte_ethdev.h>
+#include <rte_ether.h>
+#include <rte_lcore.h>
+#include <rte_mbuf.h>
+#include <rte_mempool.h>
+
+#include "test.h"
+
+#ifndef RTE_EXEC_ENV_LINUX
+static int
+test_pmd_af_packet(void)
+{
+ printf("af_packet only supported on Linux, skipping test\n");
+ return TEST_SKIPPED;
+}
+
+#else
+
+#define NUM_MBUFS 512
+#define MBUF_CACHE_SIZE 32
+#define BURST_SIZE 32
+#define RING_SIZE 256
+
+/* Test device names */
+#define AF_PACKET_DEV_NAME "net_af_packet_test"
+#define TAP_DEV_NAME "dpdkafptest"
+
+static struct rte_mempool *mp;
+static uint16_t port_id = RTE_MAX_ETHPORTS;
+static int tap_fd = -1;
+static bool tap_created;
+static bool port_created;
+static bool port_started;
+
+/*
+ * Create a TAP interface for testing.
+ * Returns fd on success, -1 on failure.
+ */
+static int
+create_tap_interface(const char *name)
+{
+ struct ifreq ifr;
+ int fd, ret;
+
+ fd = open("/dev/net/tun", O_RDWR);
+ if (fd < 0) {
+ printf("Cannot open /dev/net/tun: %s\n", strerror(errno));
+ printf("(Are you running as root?)\n");
+ return -1;
+ }
+
+ memset(&ifr, 0, sizeof(ifr));
+ ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
+ snprintf(ifr.ifr_name, IFNAMSIZ, "%s", name);
+
+ ret = ioctl(fd, TUNSETIFF, &ifr);
+ if (ret < 0) {
+ printf("Cannot create TAP interface '%s': %s\n",
+ name, strerror(errno));
+ close(fd);
+ return -1;
+ }
+
+ /* Bring the interface up */
+ int sock = socket(AF_INET, SOCK_DGRAM, 0);
+ if (sock >= 0) {
+ memset(&ifr, 0, sizeof(ifr));
+ snprintf(ifr.ifr_name, IFNAMSIZ, "%s", name);
+
+ /* Get current flags */
+ if (ioctl(sock, SIOCGIFFLAGS, &ifr) == 0) {
+ ifr.ifr_flags |= IFF_UP;
+ ioctl(sock, SIOCSIFFLAGS, &ifr);
+ }
+ close(sock);
+ }
+
+ printf("Created TAP interface '%s'\n", name);
+ return fd;
+}
+
+static void
+destroy_tap_interface(int fd)
+{
+ if (fd >= 0)
+ close(fd);
+}
+
+static int
+create_af_packet_port(const char *name, const char *args, uint16_t *out_port_id)
+{
+ int ret;
+
+ ret = rte_vdev_init(name, args);
+ if (ret != 0) {
+ printf("Failed to create af_packet device '%s': %d\n", name, ret);
+ return ret;
+ }
+
+ ret = rte_eth_dev_get_port_by_name(name, out_port_id);
+ if (ret != 0) {
+ printf("Failed to get port id for '%s': %d\n", name, ret);
+ rte_vdev_uninit(name);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int
+configure_af_packet_port(uint16_t pid, uint16_t nb_rx_q, uint16_t nb_tx_q)
+{
+ struct rte_eth_conf port_conf = {0};
+ struct rte_eth_dev_info dev_info;
+ int ret;
+ uint16_t q;
+
+ ret = rte_eth_dev_info_get(pid, &dev_info);
+ if (ret != 0) {
+ printf("Failed to get device info for port %u: %d\n", pid, ret);
+ return ret;
+ }
+
+ ret = rte_eth_dev_configure(pid, nb_rx_q, nb_tx_q, &port_conf);
+ if (ret != 0) {
+ printf("Failed to configure port %u: %d\n", pid, ret);
+ return ret;
+ }
+
+ for (q = 0; q < nb_rx_q; q++) {
+ ret = rte_eth_rx_queue_setup(pid, q, RING_SIZE,
+ rte_eth_dev_socket_id(pid),
+ NULL, mp);
+ if (ret != 0) {
+ printf("Failed to setup RX queue %u for port %u: %d\n",
+ q, pid, ret);
+ return ret;
+ }
+ }
+
+ for (q = 0; q < nb_tx_q; q++) {
+ ret = rte_eth_tx_queue_setup(pid, q, RING_SIZE,
+ rte_eth_dev_socket_id(pid),
+ NULL);
+ if (ret != 0) {
+ printf("Failed to setup TX queue %u for port %u: %d\n",
+ q, pid, ret);
+ return ret;
+ }
+ }
+
+ ret = rte_eth_dev_start(pid);
+ if (ret != 0) {
+ printf("Failed to start port %u: %d\n", pid, ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int
+test_af_packet_setup(void)
+{
+ char devargs[256];
+ int ret;
+
+ /* Create mempool for mbufs */
+ mp = rte_pktmbuf_pool_create("af_packet_test_pool", NUM_MBUFS,
+ MBUF_CACHE_SIZE, 0,
+ RTE_MBUF_DEFAULT_BUF_SIZE,
+ rte_socket_id());
+ if (mp == NULL) {
+ printf("Failed to create mempool\n");
+ return -1;
+ }
+
+ /* Create TAP interface for testing */
+ tap_fd = create_tap_interface(TAP_DEV_NAME);
+ if (tap_fd >= 0)
+ tap_created = true;
+ else {
+ printf("TAP interface creation failed - tests will be skipped\n");
+ return 0; /* Return success to allow skipped tests */
+ }
+
+ /* Create and configure af_packet port */
+ snprintf(devargs, sizeof(devargs), "iface=%s", TAP_DEV_NAME);
+ ret = create_af_packet_port(AF_PACKET_DEV_NAME, devargs, &port_id);
+ if (ret != 0) {
+ printf("Failed to create af_packet port\n");
+ return -1;
+ }
+ port_created = true;
+
+ ret = configure_af_packet_port(port_id, 1, 1);
+ if (ret != 0) {
+ printf("Failed to configure af_packet port\n");
+ return -1;
+ }
+ port_started = true;
+
+ return 0;
+}
+
+static void
+test_af_packet_teardown(void)
+{
+ /* Stop and close test port */
+ if (port_started) {
+ rte_eth_dev_stop(port_id);
+ port_started = false;
+ }
+
+ if (port_created) {
+ rte_eth_dev_close(port_id);
+ rte_vdev_uninit(AF_PACKET_DEV_NAME);
+ port_id = RTE_MAX_ETHPORTS;
+ port_created = false;
+ }
+
+ /* Destroy TAP interface */
+ if (tap_created) {
+ destroy_tap_interface(tap_fd);
+ tap_fd = -1;
+ tap_created = false;
+ }
+
+ if (mp != NULL) {
+ rte_mempool_free(mp);
+ mp = NULL;
+ }
+}
+
+/*
+ * Test: Device info verification
+ */
+static int
+test_af_packet_dev_info(void)
+{
+ struct rte_eth_dev_info dev_info;
+ int ret;
+
+ if (!tap_created || port_id >= RTE_MAX_ETHPORTS) {
+ printf("SKIPPED: Port not available (need root)\n");
+ return TEST_SKIPPED;
+ }
+
+ ret = rte_eth_dev_info_get(port_id, &dev_info);
+ TEST_ASSERT(ret == 0, "Failed to get device info");
+
+ /* Verify expected device info values */
+ TEST_ASSERT(dev_info.max_mac_addrs == 1,
+ "Expected max_mac_addrs=1, got %u", dev_info.max_mac_addrs);
+ TEST_ASSERT(dev_info.max_rx_pktlen == RTE_ETHER_MAX_LEN,
+ "Unexpected max_rx_pktlen: %u", dev_info.max_rx_pktlen);
+ TEST_ASSERT(dev_info.min_rx_bufsize == 0,
+ "Expected min_rx_bufsize=0, got %u", dev_info.min_rx_bufsize);
+ TEST_ASSERT(dev_info.max_rx_queues >= 1, "No RX queues available");
+ TEST_ASSERT(dev_info.max_tx_queues >= 1, "No TX queues available");
+ TEST_ASSERT(dev_info.if_index > 0, "Invalid interface index");
+
+ /* Check TX offload capabilities */
+ TEST_ASSERT(dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_MULTI_SEGS,
+ "Expected MULTI_SEGS TX offload capability");
+ TEST_ASSERT(dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_VLAN_INSERT,
+ "Expected VLAN_INSERT TX offload capability");
+
+ /* Check RX offload capabilities */
+ TEST_ASSERT(dev_info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_VLAN_STRIP,
+ "Expected VLAN_STRIP RX offload capability");
+ TEST_ASSERT(dev_info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_TIMESTAMP,
+ "Expected TIMESTAMP RX offload capability");
+
+ return TEST_SUCCESS;
+}
+
+/*
+ * Test: Link status
+ * Note: af_packet PMD link status reflects the underlying interface state,
+ * not the DPDK device start/stop state.
+ */
+static int
+test_af_packet_link_status(void)
+{
+ struct rte_eth_link link;
+ int ret;
+
+ if (!tap_created || port_id >= RTE_MAX_ETHPORTS) {
+ printf("SKIPPED: Port not available (need root)\n");
+ return TEST_SKIPPED;
+ }
+
+ ret = rte_eth_link_get_nowait(port_id, &link);
+ TEST_ASSERT(ret == 0, "Failed to get link status");
+
+ /* TAP interface was brought up during setup, so link should be UP */
+ TEST_ASSERT(link.link_status == RTE_ETH_LINK_UP,
+ "Expected link UP (TAP interface is up)");
+ TEST_ASSERT(link.link_speed == RTE_ETH_SPEED_NUM_10G,
+ "Expected 10G link speed");
+ TEST_ASSERT(link.link_duplex == RTE_ETH_LINK_FULL_DUPLEX,
+ "Expected full duplex");
+
+ return TEST_SUCCESS;
+}
+
+/*
+ * Test: Statistics initial state
+ */
+static int
+test_af_packet_stats_init(void)
+{
+ struct rte_eth_stats stats;
+ int ret;
+
+ if (!tap_created || port_id >= RTE_MAX_ETHPORTS) {
+ printf("SKIPPED: Port not available (need root)\n");
+ return TEST_SKIPPED;
+ }
+
+ /* Reset stats */
+ ret = rte_eth_stats_reset(port_id);
+ TEST_ASSERT(ret == 0, "Failed to reset stats");
+
+ /* Get initial stats */
+ ret = rte_eth_stats_get(port_id, &stats);
+ TEST_ASSERT(ret == 0, "Failed to get stats");
+
+ /* After reset, stats should be zero */
+ TEST_ASSERT(stats.ipackets == 0,
+ "Expected ipackets=0 after reset, got %"PRIu64,
+ stats.ipackets);
+ TEST_ASSERT(stats.opackets == 0,
+ "Expected opackets=0 after reset, got %"PRIu64,
+ stats.opackets);
+ TEST_ASSERT(stats.ibytes == 0,
+ "Expected ibytes=0 after reset, got %"PRIu64,
+ stats.ibytes);
+ TEST_ASSERT(stats.obytes == 0,
+ "Expected obytes=0 after reset, got %"PRIu64,
+ stats.obytes);
+
+ return TEST_SUCCESS;
+}
+
+/*
+ * Test: TX packets (packets will be sent to TAP interface)
+ */
+static int
+test_af_packet_tx(void)
+{
+ struct rte_mbuf *bufs[BURST_SIZE];
+ struct rte_eth_stats stats;
+ uint16_t nb_tx;
+ unsigned int i;
+ int ret;
+
+ if (!tap_created || port_id >= RTE_MAX_ETHPORTS) {
+ printf("SKIPPED: Port not available (need root)\n");
+ return TEST_SKIPPED;
+ }
+
+ /* Reset stats */
+ ret = rte_eth_stats_reset(port_id);
+ TEST_ASSERT(ret == 0, "Failed to reset stats");
+
+ /* Allocate and prepare mbufs for TX */
+ for (i = 0; i < BURST_SIZE; i++) {
+ bufs[i] = rte_pktmbuf_alloc(mp);
+ TEST_ASSERT(bufs[i] != NULL, "Failed to allocate mbuf");
+
+ /* Create a minimal Ethernet frame */
+ struct rte_ether_hdr *eth_hdr;
+ eth_hdr = (struct rte_ether_hdr *)rte_pktmbuf_append(bufs[i],
+ sizeof(struct rte_ether_hdr) + 46);
+ TEST_ASSERT(eth_hdr != NULL, "Failed to append data to mbuf");
+
+ /* Set destination and source MAC */
+ memset(ð_hdr->dst_addr, 0xFF, RTE_ETHER_ADDR_LEN);
+ memset(ð_hdr->src_addr, 0x00, RTE_ETHER_ADDR_LEN);
+ eth_hdr->ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
+ }
+
+ /* TX burst */
+ nb_tx = rte_eth_tx_burst(port_id, 0, bufs, BURST_SIZE);
+
+ /* Free any unsent mbufs */
+ for (i = nb_tx; i < BURST_SIZE; i++)
+ rte_pktmbuf_free(bufs[i]);
+
+ /* Small delay to allow stats update */
+ rte_delay_us_block(1000);
+
+ /* Get stats */
+ ret = rte_eth_stats_get(port_id, &stats);
+ TEST_ASSERT(ret == 0, "Failed to get stats");
+
+ /* Verify some packets were sent */
+ if (nb_tx > 0) {
+ TEST_ASSERT(stats.opackets > 0,
+ "Expected opackets > 0 after TX");
+ }
+
+ return TEST_SUCCESS;
+}
+
+/*
+ * Test: RX packets (non-blocking, may not receive anything)
+ */
+static int
+test_af_packet_rx(void)
+{
+ struct rte_mbuf *bufs[BURST_SIZE];
+ uint16_t nb_rx;
+ unsigned int i;
+
+ if (!tap_created || port_id >= RTE_MAX_ETHPORTS) {
+ printf("SKIPPED: Port not available (need root)\n");
+ return TEST_SKIPPED;
+ }
+
+ /* Try to receive packets (non-blocking) */
+ nb_rx = rte_eth_rx_burst(port_id, 0, bufs, BURST_SIZE);
+
+ /* Free any received mbufs */
+ for (i = 0; i < nb_rx; i++)
+ rte_pktmbuf_free(bufs[i]);
+
+ /* RX from tap interface without external traffic will return 0 */
+ /* This test just verifies the RX path doesn't crash */
+
+ return TEST_SUCCESS;
+}
+
+/*
+ * Test: Promiscuous mode
+ */
+static int
+test_af_packet_promiscuous(void)
+{
+ int ret;
+
+ if (!tap_created || port_id >= RTE_MAX_ETHPORTS) {
+ printf("SKIPPED: Port not available (need root)\n");
+ return TEST_SKIPPED;
+ }
+
+ /* Enable promiscuous mode */
+ ret = rte_eth_promiscuous_enable(port_id);
+ TEST_ASSERT(ret == 0, "Failed to enable promiscuous mode");
+
+ ret = rte_eth_promiscuous_get(port_id);
+ TEST_ASSERT(ret == 1, "Expected promiscuous mode enabled");
+
+ /* Disable promiscuous mode */
+ ret = rte_eth_promiscuous_disable(port_id);
+ TEST_ASSERT(ret == 0, "Failed to disable promiscuous mode");
+
+ ret = rte_eth_promiscuous_get(port_id);
+ TEST_ASSERT(ret == 0, "Expected promiscuous mode disabled");
+
+ return TEST_SUCCESS;
+}
+
+/*
+ * Test: MAC address operations
+ */
+static int
+test_af_packet_mac_addr(void)
+{
+ struct rte_ether_addr mac_addr;
+ struct rte_ether_addr new_mac = {
+ .addr_bytes = {0x02, 0x11, 0x22, 0x33, 0x44, 0x55}
+ };
+ int ret;
+
+ if (!tap_created || port_id >= RTE_MAX_ETHPORTS) {
+ printf("SKIPPED: Port not available (need root)\n");
+ return TEST_SKIPPED;
+ }
+
+ /* Get current MAC address */
+ ret = rte_eth_macaddr_get(port_id, &mac_addr);
+ TEST_ASSERT(ret == 0, "Failed to get MAC address");
+
+ /* Set new MAC address (use locally administered address) */
+ ret = rte_eth_dev_default_mac_addr_set(port_id, &new_mac);
+ TEST_ASSERT(ret == 0, "Failed to set MAC address");
+
+ /* Verify MAC was set */
+ ret = rte_eth_macaddr_get(port_id, &mac_addr);
+ TEST_ASSERT(ret == 0, "Failed to get MAC address after set");
+
+ TEST_ASSERT(memcmp(&mac_addr, &new_mac, sizeof(mac_addr)) == 0,
+ "MAC address mismatch after set");
+
+ return TEST_SUCCESS;
+}
+
+/*
+ * Test: MTU operations
+ */
+static int
+test_af_packet_mtu(void)
+{
+ uint16_t mtu;
+ int ret;
+
+ if (!tap_created || port_id >= RTE_MAX_ETHPORTS) {
+ printf("SKIPPED: Port not available (need root)\n");
+ return TEST_SKIPPED;
+ }
+
+ /* Get current MTU */
+ ret = rte_eth_dev_get_mtu(port_id, &mtu);
+ TEST_ASSERT(ret == 0, "Failed to get MTU");
+
+ /* Try to set a smaller MTU */
+ ret = rte_eth_dev_set_mtu(port_id, 1400);
+ if (ret == 0) {
+ ret = rte_eth_dev_get_mtu(port_id, &mtu);
+ TEST_ASSERT(ret == 0, "Failed to get MTU after set");
+
+ /* Restore original MTU */
+ rte_eth_dev_set_mtu(port_id, 1500);
+ }
+ /* MTU set may fail depending on permissions, that's OK */
+
+ return TEST_SUCCESS;
+}
+
+/*
+ * Test: Stats reset verification
+ */
+static int
+test_af_packet_stats_reset(void)
+{
+ struct rte_eth_stats stats;
+ struct rte_mbuf *bufs[BURST_SIZE / 2];
+ uint16_t nb_tx;
+ unsigned int i;
+ int ret;
+
+ if (!tap_created || port_id >= RTE_MAX_ETHPORTS) {
+ printf("SKIPPED: Port not available (need root)\n");
+ return TEST_SKIPPED;
+ }
+
+ /* Generate some TX traffic */
+ for (i = 0; i < BURST_SIZE / 2; i++) {
+ bufs[i] = rte_pktmbuf_alloc(mp);
+ if (bufs[i] == NULL)
+ break;
+
+ struct rte_ether_hdr *eth_hdr;
+ eth_hdr = (struct rte_ether_hdr *)rte_pktmbuf_append(bufs[i],
+ sizeof(struct rte_ether_hdr) + 46);
+ if (eth_hdr == NULL) {
+ rte_pktmbuf_free(bufs[i]);
+ break;
+ }
+
+ memset(ð_hdr->dst_addr, 0xFF, RTE_ETHER_ADDR_LEN);
+ memset(ð_hdr->src_addr, 0x00, RTE_ETHER_ADDR_LEN);
+ eth_hdr->ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
+ }
+
+ nb_tx = rte_eth_tx_burst(port_id, 0, bufs, i);
+ for (; nb_tx < i; nb_tx++)
+ rte_pktmbuf_free(bufs[nb_tx]);
+
+ /* Small delay */
+ rte_delay_us_block(1000);
+
+ /* Reset stats */
+ ret = rte_eth_stats_reset(port_id);
+ TEST_ASSERT(ret == 0, "Failed to reset stats");
+
+ /* Verify stats are zero */
+ ret = rte_eth_stats_get(port_id, &stats);
+ TEST_ASSERT(ret == 0, "Failed to get stats after reset");
+
+ TEST_ASSERT(stats.ipackets == 0,
+ "Expected ipackets=0, got %"PRIu64, stats.ipackets);
+ TEST_ASSERT(stats.opackets == 0,
+ "Expected opackets=0, got %"PRIu64, stats.opackets);
+ TEST_ASSERT(stats.ibytes == 0,
+ "Expected ibytes=0, got %"PRIu64, stats.ibytes);
+ TEST_ASSERT(stats.obytes == 0,
+ "Expected obytes=0, got %"PRIu64, stats.obytes);
+
+ return TEST_SUCCESS;
+}
+
+/*
+ * Test: Invalid configuration handling - missing iface
+ */
+static int
+test_af_packet_invalid_no_iface(void)
+{
+ int ret;
+
+ /* Test without iface argument (should fail) */
+ ret = rte_vdev_init("net_af_packet_invalid1", "");
+ TEST_ASSERT(ret != 0, "Expected failure without iface argument");
+
+ return TEST_SUCCESS;
+}
+
+/*
+ * Test: Invalid configuration handling - non-existent interface
+ */
+static int
+test_af_packet_invalid_bad_iface(void)
+{
+ int ret;
+
+ /* Test with non-existent interface (should fail) */
+ ret = rte_vdev_init("net_af_packet_invalid2",
+ "iface=nonexistent_iface_12345");
+ TEST_ASSERT(ret != 0, "Expected failure with non-existent interface");
+
+ return TEST_SUCCESS;
+}
+
+/*
+ * Test: Invalid configuration handling - invalid qpairs
+ */
+static int
+test_af_packet_invalid_qpairs(void)
+{
+ int ret;
+
+ if (!tap_created) {
+ printf("SKIPPED: TAP interface not available (need root)\n");
+ return TEST_SKIPPED;
+ }
+
+ /* Test with invalid qpairs (should fail) */
+ ret = rte_vdev_init("net_af_packet_invalid3",
+ "iface=" TAP_DEV_NAME ",qpairs=0");
+ TEST_ASSERT(ret != 0, "Expected failure with qpairs=0");
+
+ return TEST_SUCCESS;
+}
+
+/*
+ * Test: Custom frame size configuration
+ */
+static int
+test_af_packet_frame_config(void)
+{
+ struct rte_eth_dev_info dev_info;
+ uint16_t test_port;
+ char devargs[256];
+ int ret;
+
+ if (!tap_created) {
+ printf("SKIPPED: TAP interface not available (need root)\n");
+ return TEST_SKIPPED;
+ }
+
+ /* Create with custom frame parameters */
+ snprintf(devargs, sizeof(devargs),
+ "iface=%s,blocksz=4096,framesz=2048,framecnt=256",
+ TAP_DEV_NAME);
+
+ ret = rte_vdev_init("net_af_packet_frame_test", devargs);
+ if (ret != 0) {
+ printf("SKIPPED: Could not create port with custom frame config\n");
+ return TEST_SKIPPED;
+ }
+
+ ret = rte_eth_dev_get_port_by_name("net_af_packet_frame_test",
+ &test_port);
+ TEST_ASSERT(ret == 0, "Failed to get port id");
+
+ ret = rte_eth_dev_info_get(test_port, &dev_info);
+ TEST_ASSERT(ret == 0, "Failed to get device info");
+
+ /* Cleanup */
+ rte_eth_dev_close(test_port);
+ rte_vdev_uninit("net_af_packet_frame_test");
+
+ return TEST_SUCCESS;
+}
+
+/*
+ * Test: Qdisc bypass configuration
+ */
+static int
+test_af_packet_qdisc_bypass(void)
+{
+ uint16_t test_port;
+ char devargs[256];
+ int ret;
+
+ if (!tap_created) {
+ printf("SKIPPED: TAP interface not available (need root)\n");
+ return TEST_SKIPPED;
+ }
+
+ /* Create with qdisc_bypass enabled */
+ snprintf(devargs, sizeof(devargs), "iface=%s,qdisc_bypass=1",
+ TAP_DEV_NAME);
+
+ ret = rte_vdev_init("net_af_packet_qdisc_test", devargs);
+ if (ret != 0) {
+ printf("SKIPPED: qdisc_bypass may not be supported\n");
+ return TEST_SKIPPED;
+ }
+
+ ret = rte_eth_dev_get_port_by_name("net_af_packet_qdisc_test",
+ &test_port);
+ TEST_ASSERT(ret == 0, "Failed to get port id");
+
+ /* Cleanup */
+ rte_eth_dev_close(test_port);
+ rte_vdev_uninit("net_af_packet_qdisc_test");
+
+ return TEST_SUCCESS;
+}
+
+/*
+ * Test: Multiple queue pairs
+ */
+static int
+test_af_packet_multi_queue(void)
+{
+ struct rte_eth_dev_info dev_info;
+ struct rte_eth_conf port_conf = {0};
+ struct rte_mbuf *bufs[BURST_SIZE];
+ uint16_t test_port;
+ char devargs[256];
+ const uint16_t num_queues = 2;
+ uint16_t nb_tx;
+ unsigned int i, q;
+ int ret;
+
+ if (!tap_created) {
+ printf("SKIPPED: TAP interface not available (need root)\n");
+ return TEST_SKIPPED;
+ }
+
+ snprintf(devargs, sizeof(devargs), "iface=%s,qpairs=%u",
+ TAP_DEV_NAME, num_queues);
+ ret = rte_vdev_init("net_af_packet_multi_q", devargs);
+ if (ret != 0) {
+ printf("SKIPPED: Could not create multi-queue port\n");
+ return TEST_SKIPPED;
+ }
+
+ ret = rte_eth_dev_get_port_by_name("net_af_packet_multi_q", &test_port);
+ TEST_ASSERT(ret == 0, "Failed to get port id for multi-queue port");
+
+ ret = rte_eth_dev_info_get(test_port, &dev_info);
+ TEST_ASSERT(ret == 0, "Failed to get device info");
+
+ TEST_ASSERT(dev_info.max_rx_queues >= num_queues,
+ "Expected at least %u RX queues, got %u",
+ num_queues, dev_info.max_rx_queues);
+ TEST_ASSERT(dev_info.max_tx_queues >= num_queues,
+ "Expected at least %u TX queues, got %u",
+ num_queues, dev_info.max_tx_queues);
+
+ /* Configure the port */
+ ret = rte_eth_dev_configure(test_port, num_queues, num_queues, &port_conf);
+ TEST_ASSERT(ret == 0, "Failed to configure multi-queue port");
+
+ for (q = 0; q < num_queues; q++) {
+ ret = rte_eth_rx_queue_setup(test_port, q, RING_SIZE,
+ rte_eth_dev_socket_id(test_port),
+ NULL, mp);
+ TEST_ASSERT(ret == 0, "Failed to setup RX queue %u", q);
+
+ ret = rte_eth_tx_queue_setup(test_port, q, RING_SIZE,
+ rte_eth_dev_socket_id(test_port),
+ NULL);
+ TEST_ASSERT(ret == 0, "Failed to setup TX queue %u", q);
+ }
+
+ ret = rte_eth_dev_start(test_port);
+ TEST_ASSERT(ret == 0, "Failed to start multi-queue port");
+
+ /* Test TX on different queues */
+ for (q = 0; q < num_queues; q++) {
+ for (i = 0; i < BURST_SIZE / 2; i++) {
+ bufs[i] = rte_pktmbuf_alloc(mp);
+ if (bufs[i] == NULL)
+ break;
+
+ struct rte_ether_hdr *eth_hdr;
+ eth_hdr = (struct rte_ether_hdr *)rte_pktmbuf_append(
+ bufs[i], sizeof(struct rte_ether_hdr) + 46);
+ if (eth_hdr == NULL) {
+ rte_pktmbuf_free(bufs[i]);
+ break;
+ }
+
+ memset(ð_hdr->dst_addr, 0xFF, RTE_ETHER_ADDR_LEN);
+ memset(ð_hdr->src_addr, 0x00, RTE_ETHER_ADDR_LEN);
+ eth_hdr->ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
+ }
+
+ nb_tx = rte_eth_tx_burst(test_port, q, bufs, i);
+
+ /* Free unsent mbufs */
+ for (; nb_tx < i; nb_tx++)
+ rte_pktmbuf_free(bufs[nb_tx]);
+ }
+
+ /* Cleanup */
+ rte_eth_dev_stop(test_port);
+ rte_eth_dev_close(test_port);
+ rte_vdev_uninit("net_af_packet_multi_q");
+
+ return TEST_SUCCESS;
+}
+
+static struct unit_test_suite af_packet_test_suite = {
+ .suite_name = "AF_PACKET PMD Unit Test Suite",
+ .setup = test_af_packet_setup,
+ .teardown = test_af_packet_teardown,
+ .unit_test_cases = {
+ /* Tests that don't modify device state */
+ TEST_CASE(test_af_packet_dev_info),
+ TEST_CASE(test_af_packet_link_status),
+ TEST_CASE(test_af_packet_stats_init),
+ TEST_CASE(test_af_packet_tx),
+ TEST_CASE(test_af_packet_rx),
+ TEST_CASE(test_af_packet_promiscuous),
+ TEST_CASE(test_af_packet_mac_addr),
+ TEST_CASE(test_af_packet_mtu),
+ TEST_CASE(test_af_packet_stats_reset),
+
+ /* Tests that create their own devices */
+ TEST_CASE(test_af_packet_invalid_no_iface),
+ TEST_CASE(test_af_packet_invalid_bad_iface),
+ TEST_CASE(test_af_packet_invalid_qpairs),
+ TEST_CASE(test_af_packet_frame_config),
+ TEST_CASE(test_af_packet_qdisc_bypass),
+ TEST_CASE(test_af_packet_multi_queue),
+
+ TEST_CASES_END() /**< NULL terminate unit test array */
+ }
+};
+
+static int
+test_pmd_af_packet(void)
+{
+ return unit_test_suite_runner(&af_packet_test_suite);
+}
+#endif
+
+REGISTER_FAST_TEST(af_packet_pmd_autotest, NOHUGE_OK, ASAN_OK, test_pmd_af_packet);
--
2.51.0
^ permalink raw reply related [flat|nested] 18+ messages in thread* [RFC 3/4] net/af_packet: fix indentation
2026-01-28 17:30 [RFC 0/4] net/af_packet: cleanups and optimizations Stephen Hemminger
2026-01-28 17:30 ` [RFC 1/4] net/af_packet: remove volatile from statistics Stephen Hemminger
2026-01-28 17:30 ` [RFC 2/4] test: add test for af_packet Stephen Hemminger
@ 2026-01-28 17:30 ` Stephen Hemminger
2026-01-28 17:30 ` [RFC 4/4] net/af_packet: add VPP-style prefetching to receive path Stephen Hemminger
3 siblings, 0 replies; 18+ messages in thread
From: Stephen Hemminger @ 2026-01-28 17:30 UTC (permalink / raw)
To: dev; +Cc: Stephen Hemminger, John W. Linville
DPDK code is supposed to be indented with TABS not spaces.
Also, used "unsigned int" to keep checkpatch happy.
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
drivers/net/af_packet/rte_eth_af_packet.c | 64 +++++++++++------------
1 file changed, 32 insertions(+), 32 deletions(-)
diff --git a/drivers/net/af_packet/rte_eth_af_packet.c b/drivers/net/af_packet/rte_eth_af_packet.c
index 158393dd70..5847e14d80 100644
--- a/drivers/net/af_packet/rte_eth_af_packet.c
+++ b/drivers/net/af_packet/rte_eth_af_packet.c
@@ -78,7 +78,7 @@ struct __rte_cache_aligned pkt_tx_queue {
};
struct pmd_internals {
- unsigned nb_queues;
+ unsigned int nb_queues;
int if_index;
char *if_name;
@@ -537,7 +537,7 @@ eth_dev_close(struct rte_eth_dev *dev)
static int
eth_link_update(struct rte_eth_dev *dev,
- int wait_to_complete __rte_unused)
+ int wait_to_complete __rte_unused)
{
const struct pmd_internals *internals = dev->data->dev_private;
struct rte_eth_link *dev_link = &dev->data->dev_link;
@@ -557,11 +557,11 @@ eth_link_update(struct rte_eth_dev *dev,
static int
eth_rx_queue_setup(struct rte_eth_dev *dev,
- uint16_t rx_queue_id,
- uint16_t nb_rx_desc __rte_unused,
- unsigned int socket_id __rte_unused,
- const struct rte_eth_rxconf *rx_conf __rte_unused,
- struct rte_mempool *mb_pool)
+ uint16_t rx_queue_id,
+ uint16_t nb_rx_desc __rte_unused,
+ unsigned int socket_id __rte_unused,
+ const struct rte_eth_rxconf *rx_conf __rte_unused,
+ struct rte_mempool *mb_pool)
{
struct pmd_internals *internals = dev->data->dev_private;
struct pkt_rx_queue *pkt_q = &internals->rx_queue[rx_queue_id];
@@ -592,10 +592,10 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
static int
eth_tx_queue_setup(struct rte_eth_dev *dev,
- uint16_t tx_queue_id,
- uint16_t nb_tx_desc __rte_unused,
- unsigned int socket_id __rte_unused,
- const struct rte_eth_txconf *tx_conf __rte_unused)
+ uint16_t tx_queue_id,
+ uint16_t nb_tx_desc __rte_unused,
+ unsigned int socket_id __rte_unused,
+ const struct rte_eth_txconf *tx_conf __rte_unused)
{
struct pmd_internals *internals = dev->data->dev_private;
@@ -722,8 +722,8 @@ static const struct eth_dev_ops ops = {
*/
static int
open_packet_iface(const char *key __rte_unused,
- const char *value __rte_unused,
- void *extra_args)
+ const char *value __rte_unused,
+ void *extra_args)
{
int *sockfd = extra_args;
@@ -786,17 +786,17 @@ get_fanout(const char *fanout_mode, int if_index)
static int
rte_pmd_init_internals(struct rte_vdev_device *dev,
- const int sockfd,
- const unsigned nb_queues,
- unsigned int blocksize,
- unsigned int blockcnt,
- unsigned int framesize,
- unsigned int framecnt,
+ const int sockfd,
+ const unsigned int nb_queues,
+ unsigned int blocksize,
+ unsigned int blockcnt,
+ unsigned int framesize,
+ unsigned int framecnt,
unsigned int qdisc_bypass,
const char *fanout_mode,
- struct pmd_internals **internals,
- struct rte_eth_dev **eth_dev,
- struct rte_kvargs *kvlist)
+ struct pmd_internals **internals,
+ struct rte_eth_dev **eth_dev,
+ struct rte_kvargs *kvlist)
{
const char *name = rte_vdev_device_name(dev);
const unsigned int numa_node = dev->device.numa_node;
@@ -822,7 +822,7 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
if (pair == NULL) {
PMD_LOG(ERR,
"%s: no interface specified for AF_PACKET ethdev",
- name);
+ name);
return -1;
}
@@ -831,7 +831,7 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
name, numa_node);
*internals = rte_zmalloc_socket(name, sizeof(**internals),
- 0, numa_node);
+ 0, numa_node);
if (*internals == NULL)
return -1;
@@ -1072,8 +1072,8 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
static int
rte_eth_from_packet(struct rte_vdev_device *dev,
- int const *sockfd,
- struct rte_kvargs *kvlist)
+ int const *sockfd,
+ struct rte_kvargs *kvlist)
{
const char *name = rte_vdev_device_name(dev);
struct pmd_internals *internals = NULL;
@@ -1104,7 +1104,7 @@ rte_eth_from_packet(struct rte_vdev_device *dev,
if (qpairs < 1) {
PMD_LOG(ERR,
"%s: invalid qpairs value",
- name);
+ name);
return -1;
}
continue;
@@ -1114,7 +1114,7 @@ rte_eth_from_packet(struct rte_vdev_device *dev,
if (!blocksize) {
PMD_LOG(ERR,
"%s: invalid blocksize value",
- name);
+ name);
return -1;
}
continue;
@@ -1124,7 +1124,7 @@ rte_eth_from_packet(struct rte_vdev_device *dev,
if (!framesize) {
PMD_LOG(ERR,
"%s: invalid framesize value",
- name);
+ name);
return -1;
}
continue;
@@ -1134,7 +1134,7 @@ rte_eth_from_packet(struct rte_vdev_device *dev,
if (!framecount) {
PMD_LOG(ERR,
"%s: invalid framecount value",
- name);
+ name);
return -1;
}
continue;
@@ -1158,7 +1158,7 @@ rte_eth_from_packet(struct rte_vdev_device *dev,
if (framesize > blocksize) {
PMD_LOG(ERR,
"%s: AF_PACKET MMAP frame size exceeds block size!",
- name);
+ name);
return -1;
}
@@ -1233,7 +1233,7 @@ rte_pmd_af_packet_probe(struct rte_vdev_device *dev)
if (rte_kvargs_count(kvlist, ETH_AF_PACKET_IFACE_ARG) == 1) {
ret = rte_kvargs_process(kvlist, ETH_AF_PACKET_IFACE_ARG,
- &open_packet_iface, &sockfd);
+ &open_packet_iface, &sockfd);
if (ret < 0)
goto exit;
}
--
2.51.0
^ permalink raw reply related [flat|nested] 18+ messages in thread* [RFC 4/4] net/af_packet: add VPP-style prefetching to receive path
2026-01-28 17:30 [RFC 0/4] net/af_packet: cleanups and optimizations Stephen Hemminger
` (2 preceding siblings ...)
2026-01-28 17:30 ` [RFC 3/4] net/af_packet: fix indentation Stephen Hemminger
@ 2026-01-28 17:30 ` Stephen Hemminger
2026-01-29 1:06 ` Stephen Hemminger
3 siblings, 1 reply; 18+ messages in thread
From: Stephen Hemminger @ 2026-01-28 17:30 UTC (permalink / raw)
To: dev; +Cc: Stephen Hemminger, John W. Linville
Implement the single/dual/quad loop design pattern from FD.IO VPP to
improve cache efficiency in the af_packet PMD receive path.
The original implementation processes packets one at a time in a simple
loop, which can result in cache misses when accessing frame headers and
packet data. The new implementation:
- Processes packets in batches of 4 (quad), 2 (dual), and 1 (single)
- Prefetches next batch of frame headers while processing current batch
- Prefetches packet data before memcpy to hide memory latency
- Reduces loop overhead through partial unrolling
Two helper functions are introduced:
- af_packet_get_frame(): Returns frame pointer at index with wraparound
- af_packet_rx_one(): Common per-packet processing (mbuf alloc, memcpy,
VLAN handling, timestamp offload)
The quad loop checks availability of all 4 frames before processing,
falling through to dual/single loops when fewer frames are ready. Early
exit paths (out_advance1/2/3) ensure correct frame index tracking when
mbuf allocation fails mid-batch.
Prefetch strategy:
- Frame headers: prefetch N+4..N+7 while processing N..N+3
- Packet data: prefetch at tp_mac offset before memcpy
This pattern is well-established in high-performance packet processing
and should improve throughput by better utilizing CPU cache hierarchy,
particularly beneficial when processing bursts of packets.
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
drivers/net/af_packet/rte_eth_af_packet.c | 208 +++++++++++++++++-----
1 file changed, 164 insertions(+), 44 deletions(-)
diff --git a/drivers/net/af_packet/rte_eth_af_packet.c b/drivers/net/af_packet/rte_eth_af_packet.c
index 5847e14d80..946c21d878 100644
--- a/drivers/net/af_packet/rte_eth_af_packet.c
+++ b/drivers/net/af_packet/rte_eth_af_packet.c
@@ -14,6 +14,7 @@
#include <rte_malloc.h>
#include <rte_kvargs.h>
#include <bus_vdev_driver.h>
+#include <rte_prefetch.h>
#include <errno.h>
#include <linux/if_ether.h>
@@ -120,75 +121,194 @@ RTE_LOG_REGISTER_DEFAULT(af_packet_logtype, NOTICE);
RTE_LOG_LINE(level, AFPACKET, "%s(): " fmt ":%s", __func__, \
## __VA_ARGS__, strerror(errno))
-static uint16_t
-eth_af_packet_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+/*
+ * Helper to get the frame pointer at a given index with wraparound
+ */
+static inline struct tpacket2_hdr *
+af_packet_get_frame(struct pkt_rx_queue *pkt_q, unsigned int idx)
+{
+ if (idx >= pkt_q->framecount)
+ idx -= pkt_q->framecount;
+ return (struct tpacket2_hdr *)pkt_q->rd[idx].iov_base;
+}
+
+/*
+ * Process a single received packet - common code for all loop variants
+ */
+static inline int
+af_packet_rx_one(struct pkt_rx_queue *pkt_q,
+ struct tpacket2_hdr *ppd,
+ struct rte_mbuf **mbuf_out,
+ unsigned long *rx_bytes)
{
- unsigned i;
- struct tpacket2_hdr *ppd;
struct rte_mbuf *mbuf;
uint8_t *pbuf;
+
+ mbuf = rte_pktmbuf_alloc(pkt_q->mb_pool);
+ if (unlikely(mbuf == NULL)) {
+ pkt_q->rx_nombuf++;
+ return -1;
+ }
+
+ rte_pktmbuf_pkt_len(mbuf) = rte_pktmbuf_data_len(mbuf) = ppd->tp_snaplen;
+ pbuf = (uint8_t *)ppd + ppd->tp_mac;
+ memcpy(rte_pktmbuf_mtod(mbuf, void *), pbuf, rte_pktmbuf_data_len(mbuf));
+
+ if (ppd->tp_status & TP_STATUS_VLAN_VALID) {
+ mbuf->vlan_tci = ppd->tp_vlan_tci;
+ mbuf->ol_flags |= (RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED);
+ if (!pkt_q->vlan_strip && rte_vlan_insert(&mbuf))
+ PMD_LOG(ERR, "Failed to reinsert VLAN tag");
+ }
+
+ if (pkt_q->timestamp_offloading) {
+ *RTE_MBUF_DYNFIELD(mbuf, timestamp_dynfield_offset,
+ rte_mbuf_timestamp_t *) =
+ (uint64_t)ppd->tp_sec * 1000000000 + ppd->tp_nsec;
+ mbuf->ol_flags |= timestamp_dynflag;
+ }
+
+ mbuf->port = pkt_q->in_port;
+ *mbuf_out = mbuf;
+ *rx_bytes += mbuf->pkt_len;
+ ppd->tp_status = TP_STATUS_KERNEL;
+
+ return 0;
+}
+
+/*
+ * Receive packets using VPP-style single/dual/quad loop pattern with prefetching.
+ */
+static uint16_t
+eth_af_packet_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
struct pkt_rx_queue *pkt_q = queue;
+ struct tpacket2_hdr *ppd0, *ppd1, *ppd2, *ppd3;
uint16_t num_rx = 0;
unsigned long num_rx_bytes = 0;
unsigned int framecount, framenum;
+ uint16_t n_left;
if (unlikely(nb_pkts == 0))
return 0;
- /*
- * Reads the given number of packets from the AF_PACKET socket one by
- * one and copies the packet data into a newly allocated mbuf.
- */
framecount = pkt_q->framecount;
framenum = pkt_q->framenum;
- for (i = 0; i < nb_pkts; i++) {
- /* point at the next incoming frame */
- ppd = (struct tpacket2_hdr *) pkt_q->rd[framenum].iov_base;
- if ((ppd->tp_status & TP_STATUS_USER) == 0)
+ n_left = nb_pkts;
+
+ /* Quad loop: Process 4 packets at a time with prefetching */
+ while (n_left >= 4) {
+ ppd0 = af_packet_get_frame(pkt_q, framenum);
+ ppd1 = af_packet_get_frame(pkt_q, framenum + 1);
+ ppd2 = af_packet_get_frame(pkt_q, framenum + 2);
+ ppd3 = af_packet_get_frame(pkt_q, framenum + 3);
+
+ if ((ppd0->tp_status & TP_STATUS_USER) == 0)
break;
+ if ((ppd1->tp_status & TP_STATUS_USER) == 0)
+ goto dual_loop;
+ if ((ppd2->tp_status & TP_STATUS_USER) == 0)
+ goto dual_loop;
+ if ((ppd3->tp_status & TP_STATUS_USER) == 0)
+ goto dual_loop;
+
+ /* Prefetch next 4 frame headers */
+ rte_prefetch0(af_packet_get_frame(pkt_q, framenum + 4));
+ rte_prefetch0(af_packet_get_frame(pkt_q, framenum + 5));
+ rte_prefetch0(af_packet_get_frame(pkt_q, framenum + 6));
+ rte_prefetch0(af_packet_get_frame(pkt_q, framenum + 7));
+
+ /* Prefetch packet data */
+ rte_prefetch0((uint8_t *)ppd0 + ppd0->tp_mac);
+ rte_prefetch0((uint8_t *)ppd1 + ppd1->tp_mac);
+ rte_prefetch0((uint8_t *)ppd2 + ppd2->tp_mac);
+ rte_prefetch0((uint8_t *)ppd3 + ppd3->tp_mac);
+
+ if (unlikely(af_packet_rx_one(pkt_q, ppd0, &bufs[num_rx], &num_rx_bytes) < 0))
+ goto out;
+ num_rx++;
+ if (unlikely(af_packet_rx_one(pkt_q, ppd1, &bufs[num_rx], &num_rx_bytes) < 0))
+ goto out_advance1;
+ num_rx++;
+ if (unlikely(af_packet_rx_one(pkt_q, ppd2, &bufs[num_rx], &num_rx_bytes) < 0))
+ goto out_advance2;
+ num_rx++;
+ if (unlikely(af_packet_rx_one(pkt_q, ppd3, &bufs[num_rx], &num_rx_bytes) < 0))
+ goto out_advance3;
+ num_rx++;
- /* allocate the next mbuf */
- mbuf = rte_pktmbuf_alloc(pkt_q->mb_pool);
- if (unlikely(mbuf == NULL)) {
- pkt_q->rx_nombuf++;
+ framenum += 4;
+ if (framenum >= framecount)
+ framenum -= framecount;
+ n_left -= 4;
+ }
+
+dual_loop:
+ /* Dual loop: Process 2 packets at a time */
+ while (n_left >= 2) {
+ ppd0 = af_packet_get_frame(pkt_q, framenum);
+ ppd1 = af_packet_get_frame(pkt_q, framenum + 1);
+
+ if ((ppd0->tp_status & TP_STATUS_USER) == 0)
break;
- }
+ if ((ppd1->tp_status & TP_STATUS_USER) == 0)
+ goto single_loop;
- /* packet will fit in the mbuf, go ahead and receive it */
- rte_pktmbuf_pkt_len(mbuf) = rte_pktmbuf_data_len(mbuf) = ppd->tp_snaplen;
- pbuf = (uint8_t *) ppd + ppd->tp_mac;
- memcpy(rte_pktmbuf_mtod(mbuf, void *), pbuf, rte_pktmbuf_data_len(mbuf));
+ rte_prefetch0(af_packet_get_frame(pkt_q, framenum + 2));
+ rte_prefetch0(af_packet_get_frame(pkt_q, framenum + 3));
+ rte_prefetch0((uint8_t *)ppd0 + ppd0->tp_mac);
+ rte_prefetch0((uint8_t *)ppd1 + ppd1->tp_mac);
- /* check for vlan info */
- if (ppd->tp_status & TP_STATUS_VLAN_VALID) {
- mbuf->vlan_tci = ppd->tp_vlan_tci;
- mbuf->ol_flags |= (RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED);
+ if (unlikely(af_packet_rx_one(pkt_q, ppd0, &bufs[num_rx], &num_rx_bytes) < 0))
+ goto out;
+ num_rx++;
+ if (unlikely(af_packet_rx_one(pkt_q, ppd1, &bufs[num_rx], &num_rx_bytes) < 0))
+ goto out_advance1;
+ num_rx++;
- if (!pkt_q->vlan_strip && rte_vlan_insert(&mbuf))
- PMD_LOG(ERR, "Failed to reinsert VLAN tag");
- }
+ framenum += 2;
+ if (framenum >= framecount)
+ framenum -= framecount;
+ n_left -= 2;
+ }
- /* add kernel provided timestamp when offloading is enabled */
- if (pkt_q->timestamp_offloading) {
- /* since TPACKET_V2 timestamps are provided in nanoseconds resolution */
- *RTE_MBUF_DYNFIELD(mbuf, timestamp_dynfield_offset,
- rte_mbuf_timestamp_t *) =
- (uint64_t)ppd->tp_sec * 1000000000 + ppd->tp_nsec;
+single_loop:
+ /* Single loop: Process remaining packets */
+ while (n_left >= 1) {
+ ppd0 = af_packet_get_frame(pkt_q, framenum);
- mbuf->ol_flags |= timestamp_dynflag;
- }
+ if ((ppd0->tp_status & TP_STATUS_USER) == 0)
+ break;
- /* release incoming frame and advance ring buffer */
- ppd->tp_status = TP_STATUS_KERNEL;
- if (++framenum >= framecount)
- framenum = 0;
- mbuf->port = pkt_q->in_port;
+ rte_prefetch0(af_packet_get_frame(pkt_q, framenum + 1));
+ rte_prefetch0((uint8_t *)ppd0 + ppd0->tp_mac);
- /* account for the receive frame */
- bufs[i] = mbuf;
+ if (unlikely(af_packet_rx_one(pkt_q, ppd0, &bufs[num_rx], &num_rx_bytes) < 0))
+ goto out;
num_rx++;
- num_rx_bytes += mbuf->pkt_len;
+
+ if (++framenum >= framecount)
+ framenum = 0;
+ n_left--;
}
+
+ goto out;
+
+out_advance3:
+ framenum += 3;
+ if (framenum >= framecount)
+ framenum -= framecount;
+ goto out;
+out_advance2:
+ framenum += 2;
+ if (framenum >= framecount)
+ framenum -= framecount;
+ goto out;
+out_advance1:
+ framenum += 1;
+ if (framenum >= framecount)
+ framenum -= framecount;
+out:
pkt_q->framenum = framenum;
pkt_q->rx_pkts += num_rx;
pkt_q->rx_bytes += num_rx_bytes;
--
2.51.0
^ permalink raw reply related [flat|nested] 18+ messages in thread