DPDK-dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v6 01/23] net/sxe2: remove software statistics devargs
From: liujie5 @ 2026-06-24  2:02 UTC (permalink / raw)
  To: stephen; +Cc: dev, Jie Liu
In-Reply-To: <20260622092731.3092201-1-liujie5@linkdatatechnology.com>

From: Jie Liu <liujie5@linkdatatechnology.com>

Remove the optional drv-sw-stats device argument and make software
statistics always-on. Per-queue software statistics are point-in-time
measurements used for accumulation at queue stop/dump, so atomic
operations with rte_memory_order_relaxed add unnecessary overhead
without correctness benefit.

Also rename high_performance_mode field to no_sched_mode to match
the devargs string definition.

Changes:
- Remove sw_stats_en field from struct sxe2_devargs
- Remove RTE_ATOMIC qualifiers from sxe2_rxq_sw_stats fields
- Replace rte_atomic_fetch_add_explicit(relaxed) with plain addition
- Replace rte_atomic_store/load_explicit(relaxed) with plain assignment
- Remove sw_stats_en conditional checks in Rx fast path
- Always pass umbcast_flags to vec Rx functions
- Remove unused #include <rte_stdatomic.h>
- Rename high_performance_mode → no_sched_mode in devargs struct
- Fix int → int32_t for return type in sxe2_parse_eth_devargs

Signed-off-by: Jie Liu <liujie5@linkdatatechnology.com>
---
 drivers/net/sxe2/sxe2_ethdev.c          |  2 +-
 drivers/net/sxe2/sxe2_ethdev.h          |  3 +-
 drivers/net/sxe2/sxe2_queue.h           | 15 ++++---
 drivers/net/sxe2/sxe2_rx.c              | 55 +++++++------------------
 drivers/net/sxe2/sxe2_txrx_poll.c       | 38 ++++++-----------
 drivers/net/sxe2/sxe2_txrx_vec_common.h | 52 ++++++++++-------------
 drivers/net/sxe2/sxe2_txrx_vec_sse.c    | 29 +------------
 7 files changed, 61 insertions(+), 133 deletions(-)

diff --git a/drivers/net/sxe2/sxe2_ethdev.c b/drivers/net/sxe2/sxe2_ethdev.c
index b6cc8703a7..066e1faf7e 100644
--- a/drivers/net/sxe2/sxe2_ethdev.c
+++ b/drivers/net/sxe2/sxe2_ethdev.c
@@ -891,7 +891,7 @@ static int32_t sxe2_eth_pmd_probe_pf(struct sxe2_common_device *cdev,
 static int32_t sxe2_parse_eth_devargs(struct rte_device *dev,
 			  struct rte_eth_devargs *eth_da)
 {
-	int ret = 0;
+	int32_t ret = 0;
 
 	if (dev->devargs == NULL)
 		return 0;
diff --git a/drivers/net/sxe2/sxe2_ethdev.h b/drivers/net/sxe2/sxe2_ethdev.h
index a3706945e8..8015d9a064 100644
--- a/drivers/net/sxe2/sxe2_ethdev.h
+++ b/drivers/net/sxe2/sxe2_ethdev.h
@@ -130,9 +130,8 @@ struct sxe2_devargs {
 	uint8_t flow_dup_pattern_mode;
 	uint8_t func_flow_direct_en;
 	uint8_t fnav_stat_type;
-	uint8_t high_performance_mode;
+	uint8_t no_sched_mode;
 	uint8_t sched_layer_mode;
-	uint8_t sw_stats_en;
 	uint8_t rx_low_latency;
 };
 
diff --git a/drivers/net/sxe2/sxe2_queue.h b/drivers/net/sxe2/sxe2_queue.h
index adb4be1214..a300b66771 100644
--- a/drivers/net/sxe2/sxe2_queue.h
+++ b/drivers/net/sxe2/sxe2_queue.h
@@ -7,7 +7,6 @@
 
 #include <rte_ethdev.h>
 #include <rte_io.h>
-#include <rte_stdatomic.h>
 #include <ethdev_driver.h>
 
 #include "sxe2_drv_cmd.h"
@@ -123,13 +122,13 @@ struct sxe2_rxq_stats {
 };
 
 struct sxe2_rxq_sw_stats {
-	RTE_ATOMIC(uint64_t)pkts;
-	RTE_ATOMIC(uint64_t)bytes;
-	RTE_ATOMIC(uint64_t)drop_pkts;
-	RTE_ATOMIC(uint64_t)drop_bytes;
-	RTE_ATOMIC(uint64_t)unicast_pkts;
-	RTE_ATOMIC(uint64_t)multicast_pkts;
-	RTE_ATOMIC(uint64_t)broadcast_pkts;
+	uint64_t pkts;
+	uint64_t bytes;
+	uint64_t drop_pkts;
+	uint64_t drop_bytes;
+	uint64_t unicast_pkts;
+	uint64_t multicast_pkts;
+	uint64_t broadcast_pkts;
 };
 
 struct sxe2_rx_queue {
diff --git a/drivers/net/sxe2/sxe2_rx.c b/drivers/net/sxe2/sxe2_rx.c
index 28832d5f71..543d825166 100644
--- a/drivers/net/sxe2/sxe2_rx.c
+++ b/drivers/net/sxe2/sxe2_rx.c
@@ -479,20 +479,13 @@ int32_t __rte_cold sxe2_rxqs_all_start(struct rte_eth_dev *dev)
 			goto l_free_started_queue;
 		}
 
-		rte_atomic_store_explicit(&rxq->sw_stats.pkts, 0,
-			rte_memory_order_relaxed);
-		rte_atomic_store_explicit(&rxq->sw_stats.bytes, 0,
-			rte_memory_order_relaxed);
-		rte_atomic_store_explicit(&rxq->sw_stats.drop_pkts, 0,
-			rte_memory_order_relaxed);
-		rte_atomic_store_explicit(&rxq->sw_stats.drop_bytes, 0,
-			rte_memory_order_relaxed);
-		rte_atomic_store_explicit(&rxq->sw_stats.unicast_pkts, 0,
-			rte_memory_order_relaxed);
-		rte_atomic_store_explicit(&rxq->sw_stats.broadcast_pkts, 0,
-			rte_memory_order_relaxed);
-		rte_atomic_store_explicit(&rxq->sw_stats.multicast_pkts, 0,
-			rte_memory_order_relaxed);
+		rxq->sw_stats.pkts = 0;
+		rxq->sw_stats.bytes = 0;
+		rxq->sw_stats.drop_pkts = 0;
+		rxq->sw_stats.drop_bytes = 0;
+		rxq->sw_stats.unicast_pkts = 0;
+		rxq->sw_stats.broadcast_pkts = 0;
+		rxq->sw_stats.multicast_pkts = 0;
 	}
 	ret = 0;
 	goto l_end;
@@ -524,31 +517,15 @@ void __rte_cold sxe2_rxqs_all_stop(struct rte_eth_dev *dev)
 
 		rxq = dev->data->rx_queues[nb_rxq];
 		if (rxq) {
-			sw_stats_prev->ipackets +=
-				rte_atomic_load_explicit(&rxq->sw_stats.pkts,
-					rte_memory_order_relaxed);
-			sw_stats_prev->ierrors +=
-				rte_atomic_load_explicit(&rxq->sw_stats.drop_pkts,
-					rte_memory_order_relaxed);
-			sw_stats_prev->ibytes +=
-				rte_atomic_load_explicit(&rxq->sw_stats.bytes,
-					rte_memory_order_relaxed);
-
-			sw_stats_prev->rx_sw_unicast_packets +=
-				rte_atomic_load_explicit(&rxq->sw_stats.unicast_pkts,
-					rte_memory_order_relaxed);
-			sw_stats_prev->rx_sw_broadcast_packets +=
-				rte_atomic_load_explicit(&rxq->sw_stats.broadcast_pkts,
-					rte_memory_order_relaxed);
-			sw_stats_prev->rx_sw_multicast_packets +=
-				rte_atomic_load_explicit(&rxq->sw_stats.multicast_pkts,
-					rte_memory_order_relaxed);
-			sw_stats_prev->rx_sw_drop_packets +=
-				rte_atomic_load_explicit(&rxq->sw_stats.drop_pkts,
-					rte_memory_order_relaxed);
-			sw_stats_prev->rx_sw_drop_bytes +=
-				rte_atomic_load_explicit(&rxq->sw_stats.drop_bytes,
-					rte_memory_order_relaxed);
+			sw_stats_prev->ipackets += rxq->sw_stats.pkts;
+			sw_stats_prev->ierrors += rxq->sw_stats.drop_pkts;
+			sw_stats_prev->ibytes += rxq->sw_stats.bytes;
+
+			sw_stats_prev->rx_sw_unicast_packets += rxq->sw_stats.unicast_pkts;
+			sw_stats_prev->rx_sw_broadcast_packets += rxq->sw_stats.broadcast_pkts;
+			sw_stats_prev->rx_sw_multicast_packets += rxq->sw_stats.multicast_pkts;
+			sw_stats_prev->rx_sw_drop_packets += rxq->sw_stats.drop_pkts;
+			sw_stats_prev->rx_sw_drop_bytes += rxq->sw_stats.drop_bytes;
 		}
 	}
 }
diff --git a/drivers/net/sxe2/sxe2_txrx_poll.c b/drivers/net/sxe2/sxe2_txrx_poll.c
index b9d34afb31..947a5247ed 100644
--- a/drivers/net/sxe2/sxe2_txrx_poll.c
+++ b/drivers/net/sxe2/sxe2_txrx_poll.c
@@ -682,23 +682,17 @@ sxe2_rx_sw_stats_update(struct sxe2_rx_queue *rxq, struct rte_mbuf *mbuf,
 		union sxe2_rx_desc *rxd)
 {
 	uint64_t qword1 = rte_le_to_cpu_64(rxd->wb.status_err_ptype_len);
-	rte_atomic_fetch_add_explicit(&rxq->sw_stats.pkts, 1,
-		rte_memory_order_relaxed);
-	rte_atomic_fetch_add_explicit(&rxq->sw_stats.bytes,
-			mbuf->pkt_len + RTE_ETHER_CRC_LEN,
-			rte_memory_order_relaxed);
+	rxq->sw_stats.pkts += 1;
+	rxq->sw_stats.bytes += mbuf->pkt_len + RTE_ETHER_CRC_LEN;
 	switch (SXE2_RX_DESC_STATUS_UMBCAST_VAL_GET(qword1)) {
 	case SXE2_RX_DESC_STATUS_UNICAST:
-		rte_atomic_fetch_add_explicit(&rxq->sw_stats.unicast_pkts, 1,
-			rte_memory_order_relaxed);
+		rxq->sw_stats.unicast_pkts += 1;
 		break;
 	case SXE2_RX_DESC_STATUS_MULTICAST:
-		rte_atomic_fetch_add_explicit(&rxq->sw_stats.multicast_pkts, 1,
-			rte_memory_order_relaxed);
+		rxq->sw_stats.multicast_pkts += 1;
 		break;
 	case SXE2_RX_DESC_STATUS_BROADCAST:
-		rte_atomic_fetch_add_explicit(&rxq->sw_stats.broadcast_pkts, 1,
-			rte_memory_order_relaxed);
+		rxq->sw_stats.broadcast_pkts += 1;
 		break;
 	default:
 		break;
@@ -787,11 +781,9 @@ uint16_t sxe2_rx_pkts_scattered(void *rx_queue, struct rte_mbuf **rx_pkts, uint1
 
 		if (unlikely(qword1 & SXE2_RX_DESC_ERROR_RXE_MASK) ||
 			unlikely(qword1 & SXE2_RX_DESC_ERROR_OVERSIZE_MASK)) {
-			rte_atomic_fetch_add_explicit(&rxq->sw_stats.drop_pkts, 1,
-				rte_memory_order_relaxed);
-			rte_atomic_fetch_add_explicit(&rxq->sw_stats.drop_bytes,
-				first_seg->pkt_len - rxq->crc_len + RTE_ETHER_CRC_LEN,
-				rte_memory_order_relaxed);
+			rxq->sw_stats.drop_pkts += 1;
+			rxq->sw_stats.drop_bytes +=
+				first_seg->pkt_len - rxq->crc_len + RTE_ETHER_CRC_LEN;
 			rte_pktmbuf_free(first_seg);
 			first_seg = NULL;
 			continue;
@@ -822,8 +814,7 @@ uint16_t sxe2_rx_pkts_scattered(void *rx_queue, struct rte_mbuf **rx_pkts, uint1
 
 		sxe2_rx_mbuf_common_fields_fill(rxq, first_seg, &desc_tmp);
 
-		if (rxq->vsi->adapter->devargs.sw_stats_en)
-			sxe2_rx_sw_stats_update(rxq, first_seg, &desc_tmp);
+		sxe2_rx_sw_stats_update(rxq, first_seg, &desc_tmp);
 
 		rte_prefetch0(RTE_PTR_ADD(first_seg->buf_addr, first_seg->data_off));
 
@@ -990,11 +981,9 @@ uint16_t sxe2_rx_pkts_scattered_split(void *rx_queue, struct rte_mbuf **rx_pkts,
 
 		if (unlikely(qword1 & SXE2_RX_DESC_ERROR_RXE_MASK) ||
 			unlikely(qword1 & SXE2_RX_DESC_ERROR_OVERSIZE_MASK)) {
-			rte_atomic_fetch_add_explicit(&rxq->sw_stats.drop_pkts, 1,
-				rte_memory_order_relaxed);
-			rte_atomic_fetch_add_explicit(&rxq->sw_stats.drop_bytes,
-				first_seg->pkt_len - rxq->crc_len + RTE_ETHER_CRC_LEN,
-				rte_memory_order_relaxed);
+			rxq->sw_stats.drop_pkts += 1;
+			rxq->sw_stats.drop_bytes +=
+				first_seg->pkt_len - rxq->crc_len + RTE_ETHER_CRC_LEN;
 			rte_pktmbuf_free(first_seg);
 			first_seg = NULL;
 			continue;
@@ -1023,8 +1012,7 @@ uint16_t sxe2_rx_pkts_scattered_split(void *rx_queue, struct rte_mbuf **rx_pkts,
 		first_seg->port = rxq->port_id;
 		sxe2_rx_mbuf_common_fields_fill(rxq, first_seg, &desc_tmp);
 
-		if (rxq->vsi->adapter->devargs.sw_stats_en)
-			sxe2_rx_sw_stats_update(rxq, first_seg, &desc_tmp);
+		sxe2_rx_sw_stats_update(rxq, first_seg, &desc_tmp);
 
 		rte_prefetch0(RTE_PTR_ADD(first_seg->buf_addr, first_seg->data_off));
 
diff --git a/drivers/net/sxe2/sxe2_txrx_vec_common.h b/drivers/net/sxe2/sxe2_txrx_vec_common.h
index 6b1649c390..cc74f6e582 100644
--- a/drivers/net/sxe2/sxe2_txrx_vec_common.h
+++ b/drivers/net/sxe2/sxe2_txrx_vec_common.h
@@ -130,27 +130,20 @@ sxe2_tx_desc_fill_offloads(struct rte_mbuf *mbuf, uint64_t *desc_qw1)
 static inline void sxe2_vf_rx_vec_sw_stats_cnt(struct sxe2_rx_queue *rxq,
 		struct rte_mbuf *mbuf, uint8_t umbcast_flag)
 {
-	if (rxq->vsi->adapter->devargs.sw_stats_en) {
-		rte_atomic_fetch_add_explicit(&rxq->sw_stats.pkts, 1,
-					rte_memory_order_relaxed);
-		rte_atomic_fetch_add_explicit(&rxq->sw_stats.bytes,
-				 mbuf->pkt_len + RTE_ETHER_CRC_LEN, rte_memory_order_relaxed);
-		switch (SXE2_RX_UMBCAST_FLAGS_VAL_GET(umbcast_flag)) {
-		case SXE2_RX_DESC_STATUS_UNICAST:
-			rte_atomic_fetch_add_explicit(&rxq->sw_stats.unicast_pkts, 1,
-					rte_memory_order_relaxed);
-			break;
-		case SXE2_RX_DESC_STATUS_MULTICAST:
-			rte_atomic_fetch_add_explicit(&rxq->sw_stats.multicast_pkts, 1,
-					rte_memory_order_relaxed);
-			break;
-		case SXE2_RX_DESC_STATUS_BROADCAST:
-			rte_atomic_fetch_add_explicit(&rxq->sw_stats.broadcast_pkts, 1,
-					rte_memory_order_relaxed);
-			break;
-		default:
-			break;
-		}
+	rxq->sw_stats.pkts += 1;
+	rxq->sw_stats.bytes += mbuf->pkt_len + RTE_ETHER_CRC_LEN;
+	switch (SXE2_RX_UMBCAST_FLAGS_VAL_GET(umbcast_flag)) {
+	case SXE2_RX_DESC_STATUS_UNICAST:
+		rxq->sw_stats.unicast_pkts += 1;
+		break;
+	case SXE2_RX_DESC_STATUS_MULTICAST:
+		rxq->sw_stats.multicast_pkts += 1;
+		break;
+	case SXE2_RX_DESC_STATUS_BROADCAST:
+		rxq->sw_stats.broadcast_pkts += 1;
+		break;
+	default:
+		break;
 	}
 }
 
@@ -196,11 +189,9 @@ sxe2_rx_pkts_refactor(struct sxe2_rx_queue *rxq,
 			} else if (split_rxe_flags[buf_idx] & SXE2_RX_DESC_STATUS_EOP_MASK) {
 				continue;
 			} else {
-				rte_atomic_fetch_add_explicit(&rxq->sw_stats.drop_pkts, 1,
-					rte_memory_order_relaxed);
-				rte_atomic_fetch_add_explicit(&rxq->sw_stats.drop_bytes,
-				 first_seg->pkt_len - rxq->crc_len + RTE_ETHER_CRC_LEN,
-				 rte_memory_order_relaxed);
+				rxq->sw_stats.drop_pkts += 1;
+				rxq->sw_stats.drop_bytes +=
+					first_seg->pkt_len - rxq->crc_len + RTE_ETHER_CRC_LEN;
 				rte_pktmbuf_free(first_seg);
 				first_seg = NULL;
 				last_seg  = NULL;
@@ -218,11 +209,10 @@ sxe2_rx_pkts_refactor(struct sxe2_rx_queue *rxq,
 				mbuf_bufs[buf_idx]->data_len += rxq->crc_len;
 				mbuf_bufs[buf_idx]->pkt_len  += rxq->crc_len;
 			} else {
-				rte_atomic_fetch_add_explicit(&rxq->sw_stats.drop_pkts, 1,
-					rte_memory_order_relaxed);
-				rte_atomic_fetch_add_explicit(&rxq->sw_stats.drop_bytes,
-				 mbuf_bufs[buf_idx]->pkt_len - rxq->crc_len + RTE_ETHER_CRC_LEN,
-				 rte_memory_order_relaxed);
+				rxq->sw_stats.drop_pkts += 1;
+				rxq->sw_stats.drop_bytes +=
+					mbuf_bufs[buf_idx]->pkt_len - rxq->crc_len +
+					RTE_ETHER_CRC_LEN;
 				rte_pktmbuf_free_seg(mbuf_bufs[buf_idx]);
 				continue;
 			}
diff --git a/drivers/net/sxe2/sxe2_txrx_vec_sse.c b/drivers/net/sxe2/sxe2_txrx_vec_sse.c
index f6e3f45937..182a7dfc17 100644
--- a/drivers/net/sxe2/sxe2_txrx_vec_sse.c
+++ b/drivers/net/sxe2/sxe2_txrx_vec_sse.c
@@ -483,41 +483,16 @@ static __rte_always_inline uint16_t
 sxe2_rx_pkts_scattered_batch_vec_sse(struct sxe2_rx_queue *rxq,
 		struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 {
-	const uint64_t *split_rxe_flags64;
 	uint8_t split_rxe_flags[SXE2_RX_PKTS_BURST_BATCH_NUM_VEC] = {0};
 	uint8_t umbcast_flags[SXE2_RX_PKTS_BURST_BATCH_NUM_VEC] = {0};
 	uint16_t rx_done_num;
 	uint16_t rx_pkt_done_num;
 	rx_pkt_done_num = 0;
 
-	if (rxq->vsi->adapter->devargs.sw_stats_en) {
-		rx_done_num = sxe2_rx_pkts_common_vec_sse(rxq, rx_pkts,
-				nb_pkts, split_rxe_flags, umbcast_flags);
-	} else {
-		rx_done_num = sxe2_rx_pkts_common_vec_sse(rxq, rx_pkts,
-				nb_pkts, split_rxe_flags, NULL);
-	}
+	rx_done_num = sxe2_rx_pkts_common_vec_sse(rxq, rx_pkts,
+			nb_pkts, split_rxe_flags, umbcast_flags);
 	if (rx_done_num == 0)
 		goto l_end;
-	if (!rxq->vsi->adapter->devargs.sw_stats_en) {
-		split_rxe_flags64 = (uint64_t *)split_rxe_flags;
-		if (rxq->pkt_first_seg == NULL &&
-			split_rxe_flags64[0] == 0 &&
-			split_rxe_flags64[1] == 0 &&
-			split_rxe_flags64[2] == 0 &&
-			split_rxe_flags64[3] == 0) {
-			rx_pkt_done_num = rx_done_num;
-			goto l_end;
-		}
-		if (rxq->pkt_first_seg == NULL) {
-			while (rx_pkt_done_num < rx_done_num &&
-			       split_rxe_flags[rx_pkt_done_num] == 0)
-				rx_pkt_done_num++;
-			if (rx_pkt_done_num == rx_done_num)
-				goto l_end;
-			rxq->pkt_first_seg = rx_pkts[rx_pkt_done_num];
-		}
-	}
 	rx_pkt_done_num += sxe2_rx_pkts_refactor(rxq, &rx_pkts[rx_pkt_done_num],
 			rx_done_num - rx_pkt_done_num, &split_rxe_flags[rx_pkt_done_num],
 			&umbcast_flags[rx_pkt_done_num]);
-- 
2.52.0


^ permalink raw reply related

* [PATCH v6 00/23] net/sxe2: added Linkdata sxe2 ethernet driver
From: liujie5 @ 2026-06-24  2:02 UTC (permalink / raw)
  To: stephen; +Cc: dev, Jie Liu
In-Reply-To: <20260622092731.3092201-1-liujie5@linkdatatechnology.com>

From: Jie Liu <liujie5@linkdatatechnology.com>

This patch set implements core functionality for the SXE2 PMD,
including basic driver framework, data path setup, and advanced
offload features (VLAN, RSS,TM, PTP etc.).

V6:
	Refactored sxe2_ptype_tbl from adapter-indirection pattern (adapter->ptype_tbl[]) 
	to extern const direct-access pattern, matching txgbe PMD convention

	All vector/SIMD Rx paths (SSE, AVX2, AVX512, NEON) index sxe2_ptype_tbl[] directly without local pointer indirection

	remove flow_dup_pattern_mode devarg

Jie Liu (23):
  net/sxe2: remove software statistics devargs
  net/sxe2: add Rx framework and packet types callback
  net/sxe2: support AVX512 vectorized path for Rx and Tx
  net/sxe2: add AVX2 vector data path for Rx and Tx
  net/sxe2: add link update callback
  net/sxe2: support L2 filtering and MAC config
  drivers: support RSS feature
  net/sxe2: support TM hierarchy and shaping
  net/sxe2: support IPsec inline protocol offload
  net/sxe2: support statistics and multi-process
  drivers: interrupt handling
  net/sxe2: add NEON vec Rx/Tx burst functions
  drivers: add support for VF representors
  net/sxe2: add support for custom UDP tunnel ports
  net/sxe2: support firmware version reading
  net/sxe2: implement get monitor address
  common/sxe2: add shared SFP module definitions
  net/sxe2: support SFP module info and EEPROM access
  net/sxe2: implement private dump info
  net/sxe2: add mbuf validation in Tx debug mode
  common/sxe2: add callback for memory event handling
  net/sxe2: add private devargs parsing
  net/sxe2: update sxe2 feature matrix docs

 doc/guides/nics/features/sxe2.ini          |   56 +
 doc/guides/nics/sxe2.rst                   |  147 ++
 drivers/common/sxe2/sxe2_common.c          |  156 ++
 drivers/common/sxe2/sxe2_common.h          |    4 +
 drivers/common/sxe2/sxe2_flow_public.h     |  633 +++++++
 drivers/common/sxe2/sxe2_ioctl_chnl.c      |  178 +-
 drivers/common/sxe2/sxe2_ioctl_chnl_func.h |   18 +
 drivers/common/sxe2/sxe2_msg.h             |  118 ++
 drivers/net/sxe2/meson.build               |   52 +
 drivers/net/sxe2/sxe2_cmd_chnl.c           | 1587 +++++++++++++++-
 drivers/net/sxe2/sxe2_cmd_chnl.h           |  139 ++
 drivers/net/sxe2/sxe2_drv_cmd.h            |  523 +++++-
 drivers/net/sxe2/sxe2_dump.c               |  300 +++
 drivers/net/sxe2/sxe2_dump.h               |   12 +
 drivers/net/sxe2/sxe2_ethdev.c             | 1468 ++++++++++++++-
 drivers/net/sxe2/sxe2_ethdev.h             |  111 +-
 drivers/net/sxe2/sxe2_ethdev_repr.c        |  609 ++++++
 drivers/net/sxe2/sxe2_ethdev_repr.h        |   32 +
 drivers/net/sxe2/sxe2_filter.c             |  895 +++++++++
 drivers/net/sxe2/sxe2_filter.h             |  100 +
 drivers/net/sxe2/sxe2_flow.c               | 1391 ++++++++++++++
 drivers/net/sxe2/sxe2_flow.h               |   30 +
 drivers/net/sxe2/sxe2_flow_define.h        |  144 ++
 drivers/net/sxe2/sxe2_flow_parse_action.c  | 1182 ++++++++++++
 drivers/net/sxe2/sxe2_flow_parse_action.h  |   23 +
 drivers/net/sxe2/sxe2_flow_parse_engine.c  |  106 ++
 drivers/net/sxe2/sxe2_flow_parse_engine.h  |   13 +
 drivers/net/sxe2/sxe2_flow_parse_pattern.c | 1935 +++++++++++++++++++
 drivers/net/sxe2/sxe2_flow_parse_pattern.h |   46 +
 drivers/net/sxe2/sxe2_ipsec.c              | 1565 ++++++++++++++++
 drivers/net/sxe2/sxe2_ipsec.h              |  254 +++
 drivers/net/sxe2/sxe2_irq.c                | 1026 ++++++++++
 drivers/net/sxe2/sxe2_irq.h                |   25 +
 drivers/net/sxe2/sxe2_mac.c                |  530 ++++++
 drivers/net/sxe2/sxe2_mac.h                |   84 +
 drivers/net/sxe2/sxe2_mp.c                 |  414 +++++
 drivers/net/sxe2/sxe2_mp.h                 |   67 +
 drivers/net/sxe2/sxe2_queue.c              |   17 +-
 drivers/net/sxe2/sxe2_queue.h              |   15 +-
 drivers/net/sxe2/sxe2_rss.c                |  584 ++++++
 drivers/net/sxe2/sxe2_rss.h                |   81 +
 drivers/net/sxe2/sxe2_rx.c                 |   93 +-
 drivers/net/sxe2/sxe2_rx.h                 |    2 +
 drivers/net/sxe2/sxe2_security.c           |  335 ++++
 drivers/net/sxe2/sxe2_security.h           |   77 +
 drivers/net/sxe2/sxe2_stats.c              |  586 ++++++
 drivers/net/sxe2/sxe2_stats.h              |   39 +
 drivers/net/sxe2/sxe2_switchdev.c          |  332 ++++
 drivers/net/sxe2/sxe2_switchdev.h          |   33 +
 drivers/net/sxe2/sxe2_tm.c                 | 1151 ++++++++++++
 drivers/net/sxe2/sxe2_tm.h                 |   76 +
 drivers/net/sxe2/sxe2_tx.c                 |    7 +
 drivers/net/sxe2/sxe2_txrx.c               | 1958 +++++++++++++++++++-
 drivers/net/sxe2/sxe2_txrx.h               |    8 +
 drivers/net/sxe2/sxe2_txrx_check_mbuf.c    |  595 ++++++
 drivers/net/sxe2/sxe2_txrx_check_mbuf.h    |   38 +
 drivers/net/sxe2/sxe2_txrx_poll.c          |  284 ++-
 drivers/net/sxe2/sxe2_txrx_vec.c           |   46 +-
 drivers/net/sxe2/sxe2_txrx_vec.h           |   38 +-
 drivers/net/sxe2/sxe2_txrx_vec_avx2.c      |  747 ++++++++
 drivers/net/sxe2/sxe2_txrx_vec_avx512.c    |  867 +++++++++
 drivers/net/sxe2/sxe2_txrx_vec_common.h    |   54 +-
 drivers/net/sxe2/sxe2_txrx_vec_neon.c      |  689 +++++++
 drivers/net/sxe2/sxe2_txrx_vec_sse.c       |   38 +-
 drivers/net/sxe2/sxe2_vsi.c                |  146 ++
 drivers/net/sxe2/sxe2_vsi.h                |   12 +-
 drivers/net/sxe2/sxe2vf_regs.h             |   85 +
 67 files changed, 24733 insertions(+), 273 deletions(-)
 create mode 100644 drivers/common/sxe2/sxe2_flow_public.h
 create mode 100644 drivers/common/sxe2/sxe2_msg.h
 create mode 100644 drivers/net/sxe2/sxe2_dump.c
 create mode 100644 drivers/net/sxe2/sxe2_dump.h
 create mode 100644 drivers/net/sxe2/sxe2_ethdev_repr.c
 create mode 100644 drivers/net/sxe2/sxe2_ethdev_repr.h
 create mode 100644 drivers/net/sxe2/sxe2_filter.c
 create mode 100644 drivers/net/sxe2/sxe2_filter.h
 create mode 100644 drivers/net/sxe2/sxe2_flow.c
 create mode 100644 drivers/net/sxe2/sxe2_flow.h
 create mode 100644 drivers/net/sxe2/sxe2_flow_define.h
 create mode 100644 drivers/net/sxe2/sxe2_flow_parse_action.c
 create mode 100644 drivers/net/sxe2/sxe2_flow_parse_action.h
 create mode 100644 drivers/net/sxe2/sxe2_flow_parse_engine.c
 create mode 100644 drivers/net/sxe2/sxe2_flow_parse_engine.h
 create mode 100644 drivers/net/sxe2/sxe2_flow_parse_pattern.c
 create mode 100644 drivers/net/sxe2/sxe2_flow_parse_pattern.h
 create mode 100644 drivers/net/sxe2/sxe2_ipsec.c
 create mode 100644 drivers/net/sxe2/sxe2_ipsec.h
 create mode 100644 drivers/net/sxe2/sxe2_irq.c
 create mode 100644 drivers/net/sxe2/sxe2_mac.c
 create mode 100644 drivers/net/sxe2/sxe2_mac.h
 create mode 100644 drivers/net/sxe2/sxe2_mp.c
 create mode 100644 drivers/net/sxe2/sxe2_mp.h
 create mode 100644 drivers/net/sxe2/sxe2_rss.c
 create mode 100644 drivers/net/sxe2/sxe2_rss.h
 create mode 100644 drivers/net/sxe2/sxe2_security.c
 create mode 100644 drivers/net/sxe2/sxe2_security.h
 create mode 100644 drivers/net/sxe2/sxe2_stats.c
 create mode 100644 drivers/net/sxe2/sxe2_stats.h
 create mode 100644 drivers/net/sxe2/sxe2_switchdev.c
 create mode 100644 drivers/net/sxe2/sxe2_switchdev.h
 create mode 100644 drivers/net/sxe2/sxe2_tm.c
 create mode 100644 drivers/net/sxe2/sxe2_tm.h
 create mode 100644 drivers/net/sxe2/sxe2_txrx_check_mbuf.c
 create mode 100644 drivers/net/sxe2/sxe2_txrx_check_mbuf.h
 create mode 100644 drivers/net/sxe2/sxe2_txrx_vec_avx2.c
 create mode 100644 drivers/net/sxe2/sxe2_txrx_vec_avx512.c
 create mode 100644 drivers/net/sxe2/sxe2_txrx_vec_neon.c
 create mode 100644 drivers/net/sxe2/sxe2vf_regs.h

-- 
2.52.0


^ permalink raw reply

* Re: [PATCH 0/3] lib: remove use of strncpy
From: fengchengwen @ 2026-06-24  1:53 UTC (permalink / raw)
  To: Bruce Richardson, dev
In-Reply-To: <20260623141930.704771-1-bruce.richardson@intel.com>

Series-reviewed-by: Chengwen Feng <fengchengwen@huawei.com>

On 6/23/2026 10:19 PM, Bruce Richardson wrote:
> Taking a lead from the kernel, which has just finished a multi-year
> effort to remove use of strncpy[1], rework DPDK to remove use of the
> same function. This series removes all remaining uses of strncpy
> in lib directory.
> 
> [1] https://www.phoronix.com/news/Linux-7.2-Drops-strncpy
> 
> Bruce Richardson (3):
>   ethdev: remove use of strncpy
>   eventdev: improve bounds checks for names in adapter create
>   vhost: remove use of strncpy
> 
>  lib/ethdev/ethdev_driver.c              |  7 ++++++-
>  lib/eventdev/rte_event_eth_tx_adapter.c |  4 ++--
>  lib/vhost/socket.c                      |  4 +---
>  lib/vhost/vduse.c                       |  2 +-
>  lib/vhost/vhost.c                       | 12 +++---------
>  lib/vhost/vhost.h                       |  2 +-
>  6 files changed, 14 insertions(+), 17 deletions(-)
> 
> --
> 2.53.0
> 


^ permalink raw reply

* Re: [PATCH v2] dts: fix port info getter when port is not found
From: Patrick Robb @ 2026-06-24  1:44 UTC (permalink / raw)
  To: Riley Fletcher; +Cc: dev, stable, Luca.Vizzarro
In-Reply-To: <20260422183524.2012369-1-rileyf@linux.ibm.com>

[-- Attachment #1: Type: text/plain, Size: 33 bytes --]

Merged to dpdk-next-dts, thanks.

[-- Attachment #2: Type: text/html, Size: 54 bytes --]

^ permalink raw reply

* Re: [PATCH v3 04/11] bus/pci: fix mapping leak in bus cleanup
From: fengchengwen @ 2026-06-24  1:24 UTC (permalink / raw)
  To: David Marchand, dev
  Cc: thomas, stephen, bruce.richardson, longli, hemant.agrawal, stable,
	Chenbo Xia, Nipun Gupta, Morten Brørup, Kevin Laatz
In-Reply-To: <20260623105439.2144694-5-david.marchand@redhat.com>

On 6/23/2026 6:54 PM, David Marchand wrote:
> When calling this bus cleanup, PCI resources were not unmapped.
> 
> Fixes: 1cab1a40ea9b ("bus: cleanup devices on shutdown")
> Cc: stable@dpdk.org
> 
> Signed-off-by: David Marchand <david.marchand@redhat.com>
> ---
>  drivers/bus/pci/pci_common.c | 4 ++++
>  1 file changed, 4 insertions(+)
> 
> diff --git a/drivers/bus/pci/pci_common.c b/drivers/bus/pci/pci_common.c
> index fd18b8772b..791e9a7b49 100644
> --- a/drivers/bus/pci/pci_common.c
> +++ b/drivers/bus/pci/pci_common.c
> @@ -344,6 +344,10 @@ pci_cleanup(void)
>  			rte_errno = errno;
>  			error = -1;
>  		}
> +
> +		if (drv->drv_flags & RTE_PCI_DRV_NEED_MAPPING)
> +			rte_pci_unmap_device(dev);

rte_pci_unmap_device() also invoked in some drivers remove callback, e.g.
  eth_virtio_pci_uninit()
  ifcvf_pci_remove()
  ...

In this case it may doulbe-free

> +
>  		dev->device.driver = NULL;
>  
>  free:


^ permalink raw reply

* Re: [PATCH v3 03/11] bus/vdev: remove driver setting in probe
From: fengchengwen @ 2026-06-24  1:15 UTC (permalink / raw)
  To: David Marchand, dev
  Cc: thomas, stephen, bruce.richardson, longli, hemant.agrawal
In-Reply-To: <20260623105439.2144694-4-david.marchand@redhat.com>

Acked-by: Chengwen Feng <fengchengwen@huawei.com>

On 6/23/2026 6:54 PM, David Marchand wrote:
> Setting the device driver field is not the responsibility of the
> probe_device callback anymore, but that of EAL (see local_dev_probe).
> Yet, because of the VDEV API, rte_vdev_init() must be updated to mark
> the device as probed.
> 
> Fixes: f282771a04ef ("bus: factorize driver reference")
> 
> Signed-off-by: David Marchand <david.marchand@redhat.com>


^ permalink raw reply

* Re: [PATCH v3 01/11] bus: fix reference to plug callback
From: fengchengwen @ 2026-06-24  1:14 UTC (permalink / raw)
  To: David Marchand, dev
  Cc: thomas, stephen, bruce.richardson, longli, hemant.agrawal
In-Reply-To: <20260623105439.2144694-2-david.marchand@redhat.com>

Acked-by: Chengwen Feng <fengchengwen@huawei.com>

On 6/23/2026 6:54 PM, David Marchand wrote:
> Remove now unused typedef, update documentation
> and some log following the callback rename.
> 
> Fixes: 76622feba9e6 ("bus: refactor device probe")
> 
> Signed-off-by: David Marchand <david.marchand@redhat.com>
> Acked-by: Bruce Richardson <bruce.richardson@intel.com>


^ permalink raw reply

* Re: [PATCH] app/dma_perf: skip case if worker maps to main lcore
From: fengchengwen @ 2026-06-24  0:57 UTC (permalink / raw)
  To: Bruce Richardson, Rupesh Chiluka
  Cc: Cheng Jiang, dev, gakhil, anoobj, ktejasree
In-Reply-To: <ajo-ra8UUPRXKoWF@bricha3-mobl1.ger.corp.intel.com>

On 6/23/2026 4:07 PM, Bruce Richardson wrote:
> On Tue, Jun 23, 2026 at 10:28:41AM +0530, Rupesh Chiluka wrote:
>> Refuse to run DMA/CPU mem-copy cases when any worker is bound to the
>> EAL main lcore.
>>
> 
> Can you explain a bit more why?

+1
I understand this bugfix, as the following config, the test will stuck
because the master lcore is the same with worker lcore.

[GLOBAL]
eal_args=--in-memory --file-prefix=test -l 10-12

[case4]
type=CPU_MEM_COPY
mem_size=10
buf_size=64,8192,2,MUL
src_numa_node=0
dst_numa_node=1
lcore = 10, 11

So please add more explain as Bruce suggest, and also add Fixes/CC tag

> 
>> Signed-off-by: Rupesh Chiluka <rchiluka@marvell.com>
>> ---
>>  app/test-dma-perf/main.c | 9 +++++++++
>>  1 file changed, 9 insertions(+)
>>
>> diff --git a/app/test-dma-perf/main.c b/app/test-dma-perf/main.c
>> index 4249dcfd3d..b6aa5b8401 100644
>> --- a/app/test-dma-perf/main.c
>> +++ b/app/test-dma-perf/main.c
>> @@ -109,6 +109,7 @@ run_test_case(struct test_configure *case_cfg)
>>  static void
>>  run_test(uint32_t case_id, struct test_configure *case_cfg)
>>  {
>> +	uint32_t main_lcore = rte_get_main_lcore();
>>  	uint32_t nb_lcores = rte_lcore_count();
>>  	struct test_configure_entry *mem_size = &case_cfg->mem_size;
>>  	struct test_configure_entry *buf_size = &case_cfg->buf_size;
>> @@ -122,6 +123,14 @@ run_test(uint32_t case_id, struct test_configure *case_cfg)
>>  		return;
>>  	}
>>  
>> +	for (uint32_t i = 0; i < case_cfg->num_worker; i++) {

Please define the variables in front of function impl.

Thanks

>> +		if (case_cfg->dma_config[i].lcore_dma_map.lcore == main_lcore) {
>> +			printf("Case %u: worker %u cannot run on the EAL main lcore (%u).\n",
>> +			       case_id, i, main_lcore);
>> +			return;
>> +		}
>> +	}
>> +
>>  	printf("Number of used lcores: %u.\n", nb_lcores);
>>  
>>  	if (mem_size->incr != 0)
>> -- 
>> 2.48.1
>>
> 


^ permalink raw reply

* Re: [PATCH v2 1/3] dma/ae4dma: introduce AMD AE4DMA DMA PMD
From: fengchengwen @ 2026-06-24  0:38 UTC (permalink / raw)
  To: David Marchand, Raghavendra Ningoji
  Cc: dev, Thomas Monjalon, Bhagyada Modali, Robin Jarry,
	Selwin.Sebastian
In-Reply-To: <CAJFAV8w_67sp9iGW9+Gpwxx0ZkDYc4Zc2JKDtsPFFccU0UHePg@mail.gmail.com>

On 6/22/2026 8:06 PM, David Marchand wrote:
> On Mon, 25 May 2026 at 20:43, Raghavendra Ningoji
> <raghavendra.ningoji@amd.com> wrote:
>> Add the skeleton of a new dmadev poll-mode driver for the AMD AE4DMA
>> hardware DMA engine, providing only PCI probe/remove and per-queue
>> hardware initialisation. An AE4DMA engine exposes 16 hardware command
>> queues, each with a 32-entry descriptor ring; the PMD maps each
>> hardware channel to its own dmadev with a single virtual channel,
>> so a PCI function appears as 16 dmadevs named "<pci-bdf>-ch0" ..
>> "<pci-bdf>-ch15".
> I am not familiar with DMA drivers, I am not sure it is something acceptable.
> @Chengwen for info.

This is acceptable. For a DMA controller (which may be a PCI device), there
may be multiple hardware channels, and each hardware channel is presented as
a dmadev device. The device name can be in the format of BDF-chX.


^ permalink raw reply

* Re: [PATCH v2 1/2] test/dma: update the sg test to verify wrap around case
From: fengchengwen @ 2026-06-24  0:29 UTC (permalink / raw)
  To: Tejasree Kondoj, Akhil Goyal, Kevin Laatz, Bruce Richardson
  Cc: Vidya Sagar Velumuri, Anoob Joseph, dev
In-Reply-To: <20260622135208.87697-2-ktejasree@marvell.com>

On 6/22/2026 9:52 PM, Tejasree Kondoj wrote:
> Run the sg test in a loop to verify wrap around case.
> Total number commands submitted to be more than the number descriptors
> allocated to verify the scenario.
> 
> Signed-off-by: Vidya Sagar Velumuri <vvelumuri@marvell.com>
> Signed-off-by: Tejasree Kondoj <ktejasree@marvell.com>
> ---
>  app/test/test_dmadev.c     | 45 ++++++++++++++++++++++++--------------
>  app/test/test_dmadev_api.c |  1 -
>  app/test/test_dmadev_api.h |  2 ++
>  3 files changed, 31 insertions(+), 17 deletions(-)
> 
> diff --git a/app/test/test_dmadev.c b/app/test/test_dmadev.c
> index 5488a1af33..b30f2214e5 100644
> --- a/app/test/test_dmadev.c
> +++ b/app/test/test_dmadev.c
> @@ -393,36 +393,28 @@ test_stop_start(int16_t dev_id, uint16_t vchan)
>  }
>  
>  static int
> -test_enqueue_sg_copies(int16_t dev_id, uint16_t vchan)
> +test_enqueue_sg(int16_t dev_id, uint16_t vchan, unsigned int n_sge, unsigned int test_len)
>  {
> -	unsigned int src_len, dst_len, n_sge, len, i, j, k;
>  	char orig_src[COPY_LEN], orig_dst[COPY_LEN];
> -	struct rte_dma_info info = { 0 };
> +	unsigned int src_len, dst_len, i, j, k;
>  	enum rte_dma_status_code status;
>  	uint16_t id, n_src, n_dst;
>  
> -	if (rte_dma_info_get(dev_id, &info) < 0)
> -		ERR_RETURN("Failed to get dev info");
> -
> -	if (info.max_sges < 2)
> -		ERR_RETURN("Test needs minimum 2 SG pointers");
> -
> -	n_sge = info.max_sges;
> -
>  	for (n_src = 1; n_src <= n_sge; n_src++) {
>  		for (n_dst = 1; n_dst <= n_sge; n_dst++) {
>  			/* Normalize SG buffer lengths */
> -			len = COPY_LEN;
> -			len -= (len % (n_src * n_dst));
> -			dst_len = len / n_dst;
> -			src_len = len / n_src;
> -
>  			struct rte_dma_sge *sg_src = alloca(sizeof(struct rte_dma_sge) * n_sge);
>  			struct rte_dma_sge *sg_dst = alloca(sizeof(struct rte_dma_sge) * n_sge);
>  			struct rte_mbuf **src = alloca(sizeof(struct rte_mbuf *) * n_sge);
>  			struct rte_mbuf **dst = alloca(sizeof(struct rte_mbuf *) * n_sge);
>  			char **src_data = alloca(sizeof(char *) * n_sge);
>  			char **dst_data = alloca(sizeof(char *) * n_sge);
> +			unsigned int len = test_len - (test_len % (n_src * n_dst));
> +
> +			dst_len = len / n_dst;
> +			src_len = len / n_src;
> +			if (dst_len == 0 || src_len == 0)
> +				continue;
>  
>  			for (i = 0 ; i < len; i++)
>  				orig_src[i] = rte_rand() & 0xFF;
> @@ -514,6 +506,27 @@ test_enqueue_sg_copies(int16_t dev_id, uint16_t vchan)
>  	return 0;
>  }
>  
> +static int
> +test_enqueue_sg_copies(int16_t dev_id, uint16_t vchan)
> +{
> +	struct rte_dma_info info = { 0 };
> +	unsigned int n_sge, len;
> +	int loop_count = 0;
> +
> +	if (rte_dma_info_get(dev_id, &info) < 0)
> +		ERR_RETURN("Failed to get dev info");
> +
> +	n_sge = RTE_MIN(info.max_sges, TEST_SG_MAX);

test_enqueue_sg() has protection:
+			if (dst_len == 0 || src_len == 0)
+				continue;

So no need RTE_MIN, just info.max_sges, in this way we will test the device's capability.

> +	len = COPY_LEN;
> +
> +	do {
> +		test_enqueue_sg(dev_id, vchan, n_sge, len);

We need check the retcode of test_enqueue_sg, else this case will return OK even the
copy failed.

> +		loop_count++;
> +	} while (loop_count * n_sge * n_sge < TEST_RINGSIZE * 3);
> +
> +	return 0;
> +}
> +
>  static int
>  test_single_sva_copy(int16_t dev_id, uint16_t vchan, const char *mem_src,
>  		     char *src, char *dst, uint32_t len)
> diff --git a/app/test/test_dmadev_api.c b/app/test/test_dmadev_api.c
> index 1ba053696b..4bb8f9e820 100644
> --- a/app/test/test_dmadev_api.c
> +++ b/app/test/test_dmadev_api.c
> @@ -16,7 +16,6 @@ extern int test_dma_api(uint16_t dev_id);
>  
>  #define TEST_MEMCPY_SIZE	1024
>  #define TEST_WAIT_US_VAL	50000
> -#define TEST_SG_MAX		64
>  
>  static int16_t test_dev_id;
>  static int16_t invalid_dev_id;
> diff --git a/app/test/test_dmadev_api.h b/app/test/test_dmadev_api.h
> index 33fbc5bd41..a03f7acd4f 100644
> --- a/app/test/test_dmadev_api.h
> +++ b/app/test/test_dmadev_api.h
> @@ -2,4 +2,6 @@
>   * Copyright(c) 2021 HiSilicon Limited
>   */
>  
> +#define TEST_SG_MAX		64

No need public this macro.

> +
>  int test_dma_api(uint16_t dev_id);


^ permalink raw reply

* RE: [EXTERNAL] Re: [PATCH 1/2] eal: return error on devargs truncation in hotplug MP messages
From: Long Li @ 2026-06-24  0:06 UTC (permalink / raw)
  To: David Marchand
  Cc: dev@dpdk.org, bruce.richardson@intel.com,
	stephen@networkplumber.org, Burakov, Anatoly
In-Reply-To: <CAJFAV8ye5NG=-xkXjNX5+jBpJgV1S5oe02_+rAa9BYwNv_wmAA@mail.gmail.com>

> On Wed, 25 Mar 2026 at 02:45, Long Li <longli@microsoft.com> wrote:
> >
> > The EAL hotplug multi-process messaging uses a fixed-size buffer
> > (EAL_DEV_MP_DEV_ARGS_MAX_LEN, 128 bytes) for device arguments.
> > When devargs exceeds this limit, strlcpy silently truncates the
> > string. This causes secondary processes to receive incomplete devargs
> > during hotplug re-add, leading to failed port re-initialization.
> >
> > For example, a MANA PCI device with 6 mac= arguments:
> >
> >   mac=AA:BB:CC:DD:EE:01,mac=AA:BB:CC:DD:EE:02,
> >   mac=AA:BB:CC:DD:EE:03,mac=AA:BB:CC:DD:EE:04,
> >   mac=AA:BB:CC:DD:EE:05,mac=AA:BB:CC:DD:EE:06
> >
> > produces a 131-byte devargs string that gets silently truncated to 127
> > bytes, losing the last MAC address.
> >
> > Return -E2BIG from rte_dev_probe() and rte_dev_remove() when devargs
> > would be truncated, instead of silently corrupting data.
> >
> > Signed-off-by: Long Li <longli@microsoft.com>
> 
> Worth a Fixes: tag and Cc: stable.
> 
> > ---
> >  lib/eal/common/eal_common_dev.c | 11 +++++++++++
> >  1 file changed, 11 insertions(+)
> >
> > diff --git a/lib/eal/common/eal_common_dev.c
> > b/lib/eal/common/eal_common_dev.c index 7185de0cb9..de24d14d28
> 100644
> > --- a/lib/eal/common/eal_common_dev.c
> > +++ b/lib/eal/common/eal_common_dev.c
> > @@ -250,6 +250,11 @@ rte_dev_probe(const char *devargs)
> >
> >         memset(&req, 0, sizeof(req));
> >         req.t = EAL_DEV_REQ_TYPE_ATTACH;
> > +       if (strlen(devargs) >= EAL_DEV_MP_DEV_ARGS_MAX_LEN) {
> > +               EAL_LOG(ERR, "devargs truncated (len %zu, max %d)",
> > +                       strlen(devargs), EAL_DEV_MP_DEV_ARGS_MAX_LEN);
> > +               return -E2BIG;
> > +       }
> >         strlcpy(req.devargs, devargs, EAL_DEV_MP_DEV_ARGS_MAX_LEN);
> 
> Please move the check before the memset().
> 
> >
> >         if (rte_eal_process_type() != RTE_PROC_PRIMARY) { @@ -397,6
> > +402,12 @@ rte_dev_remove(struct rte_device *dev)
> >
> >         memset(&req, 0, sizeof(req));
> >         req.t = EAL_DEV_REQ_TYPE_DETACH;
> > +       if (strlen(devargs) >= EAL_DEV_MP_DEV_ARGS_MAX_LEN) {
> > +               EAL_LOG(ERR, "devargs truncated (len %zu, max %d)",
> > +                       strlen(devargs), EAL_DEV_MP_DEV_ARGS_MAX_LEN);
> > +               free(devargs);
> > +               return -E2BIG;
> > +       }
> >         strlcpy(req.devargs, devargs, EAL_DEV_MP_DEV_ARGS_MAX_LEN);
> >         free(devargs);
> >
> 
> Why do we need to validate devargs on cleanup?
> Its length should have been validated during probe.
> 
> 
> --
> David Marchand

I have sent v2 with all the comments addressed.

Thanks,

Long


^ permalink raw reply

* [PATCH v2] eal: return error on devargs truncation in hotplug MP messages
From: Long Li @ 2026-06-24  0:05 UTC (permalink / raw)
  To: dev; +Cc: stable, david.marchand, Long Li
In-Reply-To: <20260325014506.1866374-1-longli@microsoft.com>

The EAL hotplug multi-process messaging uses a fixed-size buffer
(EAL_DEV_MP_DEV_ARGS_MAX_LEN, 128 bytes) for device arguments.
When devargs exceeds this limit, strlcpy silently truncates the
string. This causes secondary processes to receive incomplete
devargs during hotplug re-add, leading to failed port
re-initialization.

For example, a MANA PCI device with 6 mac= arguments:

  mac=AA:BB:CC:DD:EE:01,mac=AA:BB:CC:DD:EE:02,
  mac=AA:BB:CC:DD:EE:03,mac=AA:BB:CC:DD:EE:04,
  mac=AA:BB:CC:DD:EE:05,mac=AA:BB:CC:DD:EE:06

produces a 131-byte devargs string that gets silently truncated
to 127 bytes, losing the last MAC address.

Return -E2BIG from rte_dev_probe() when devargs would be truncated,
instead of silently corrupting data. rte_dev_remove() does not need
the same check because the length was already validated at probe time.

Fixes: 244d5130719c ("eal: enable hotplug on multi-process")
Cc: stable@dpdk.org

Signed-off-by: Long Li <longli@microsoft.com>
---
v2:
 - Added Fixes: tag and Cc: stable@dpdk.org.
 - Moved the length check before memset() in rte_dev_probe().
 - Removed the redundant length check from rte_dev_remove();
   devargs length is already validated at probe time.
 - Dropped the [2/2] meson-options patch from this series; it will
   be sent separately.

 lib/eal/common/eal_common_dev.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/lib/eal/common/eal_common_dev.c b/lib/eal/common/eal_common_dev.c
index 48b631532a..f3fc4d585e 100644
--- a/lib/eal/common/eal_common_dev.c
+++ b/lib/eal/common/eal_common_dev.c
@@ -271,6 +271,12 @@ rte_dev_probe(const char *devargs)
 	struct rte_device *dev;
 	int ret;
 
+	if (strlen(devargs) >= EAL_DEV_MP_DEV_ARGS_MAX_LEN) {
+		EAL_LOG(ERR, "devargs truncated (len %zu, max %d)",
+			strlen(devargs), EAL_DEV_MP_DEV_ARGS_MAX_LEN);
+		return -E2BIG;
+	}
+
 	memset(&req, 0, sizeof(req));
 	req.t = EAL_DEV_REQ_TYPE_ATTACH;
 	strlcpy(req.devargs, devargs, EAL_DEV_MP_DEV_ARGS_MAX_LEN);
-- 
2.43.0


^ permalink raw reply related

* [PATCH v4 7/7] test/bpf: check that bpf_convert can be JIT'd
From: Stephen Hemminger @ 2026-06-23 23:23 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, Konstantin Ananyev, Marat Khalili
In-Reply-To: <20260623232522.257208-1-stephen@networkplumber.org>

Run each converted filter through both the interpreter and the JIT and
check they agree, catching JIT miscompiles.

test_bpf_filter and test_bpf_match did nearly the same thing: compile,
load and run a filter against the dummy packet. Combine them into
test_bpf_match, which now builds the packet itself and returns whether
the filter matched. Callers run it for both load methods.

The dummy packet is a UDP packet to a fixed destination MAC, source
and destination ports, so the filter results are deterministic. None
of the sample filters should match it, so assert that; a convert or
JIT bug that flips a result is then caught. The destination MAC and
source port are chosen so the negative ethernet and port filters do
not match, and "port not 53 and not arp" is dropped as it matches
any non-ARP packet that lacks port 53.

Reduce log output to make it easier to match which expression might be
causing issues.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 app/test/test_bpf.c | 171 ++++++++++++++++++++++++++------------------
 1 file changed, 100 insertions(+), 71 deletions(-)

diff --git a/app/test/test_bpf.c b/app/test/test_bpf.c
index 9adffcce64..8934c98c3c 100644
--- a/app/test/test_bpf.c
+++ b/app/test/test_bpf.c
@@ -32,6 +32,7 @@ test_bpf(void)
 #include <rte_bpf.h>
 #include <rte_ether.h>
 #include <rte_ip.h>
+#include <rte_udp.h>
 
 
 /* Tests of most simple BPF programs (no instructions, one instruction etc.) */
@@ -4763,11 +4764,13 @@ load_cbpf_program_convert(struct bpf_program *cbpf_program, const char *str)
 		return NULL;
 	}
 
+#ifdef DEBUG
 	printf("bpf convert(\"%s\") produced:\n", str);
 	rte_bpf_dump(stdout, prm->ins, prm->nb_ins);
 
 	printf("%s \"%s\"\n", __func__, str);
 	test_bpf_dump(cbpf_program, prm);
+#endif
 
 	bpf = rte_bpf_load(prm);
 	rte_free(prm);
@@ -4792,18 +4795,65 @@ load_cbpf_program_direct(struct bpf_program *cbpf_program, const char *str __rte
 	});
 }
 
+static const load_cbpf_program_t cbpf_program_loaders[] = {
+	load_cbpf_program_convert,
+	load_cbpf_program_direct,
+};
+
+/* Setup Ethernet/IP/UDP headers in a dummy packet buffer for filter tests */
+static void
+dummy_ip_prep(void *data, uint16_t plen)
+{
+	struct {
+		struct rte_ether_hdr eth_hdr;
+		struct rte_ipv4_hdr ip_hdr;
+		struct rte_udp_hdr udp_hdr;
+	} *hdr = data;
+
+	hdr->eth_hdr = (struct rte_ether_hdr) {
+		.dst_addr.addr_bytes = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x0e },
+		.ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4),
+	};
+	hdr->ip_hdr = (struct rte_ipv4_hdr) {
+		.version_ihl = RTE_IPV4_VHL_DEF,
+		.total_length = rte_cpu_to_be_16(plen),
+		.time_to_live = IPDEFTTL,
+		.next_proto_id = IPPROTO_UDP,
+		.src_addr = rte_cpu_to_be_32(RTE_IPV4_LOOPBACK),
+		.dst_addr = rte_cpu_to_be_32(RTE_IPV4_BROADCAST),
+	};
+	hdr->udp_hdr = (struct rte_udp_hdr) {
+		.src_port = rte_cpu_to_be_16(49152),	/* fixed, avoids filter ports */
+		.dst_port = rte_cpu_to_be_16(9),	/* discard port */
+		.dgram_len = rte_cpu_to_be_16(plen - sizeof(struct rte_ipv4_hdr)),
+		.dgram_cksum = 0,
+	};
+}
+
+/*
+ * Compile a pcap filter, load it with the given loader, then run it against
+ * a standard dummy packet with both the interpreter and (when available) the
+ * JIT, checking the two agree.
+ *
+ * Returns 1 if the filter matched, 0 if it did not, and -1 on any error
+ * (compile, load, or interpreter/JIT mismatch).
+ */
 static int
-test_bpf_match(pcap_t *pcap, const char *str, struct rte_mbuf *mb,
+test_bpf_match(pcap_t *pcap, const char *str,
 	load_cbpf_program_t load_cbpf_program)
 {
+	uint8_t tbuf[RTE_MBUF_DEFAULT_BUF_SIZE];
+	const uint32_t plen = 100;
 	struct bpf_program fcode;
-	struct rte_bpf *bpf;
+	struct rte_mbuf mb = { 0 };
+	struct rte_bpf *bpf = NULL;
 	int ret = -1;
 	uint64_t rc;
 
+	printf("%s '%s'\n", __func__, str);
 	if (pcap_compile(pcap, &fcode, str, 1, PCAP_NETMASK_UNKNOWN)) {
 		printf("%s@%d: pcap_compile(\"%s\") failed: %s;\n",
-		       __func__, __LINE__,  str, pcap_geterr(pcap));
+		       __func__, __LINE__, str, pcap_geterr(pcap));
 		return -1;
 	}
 
@@ -4811,15 +4861,41 @@ test_bpf_match(pcap_t *pcap, const char *str, struct rte_mbuf *mb,
 	if (bpf == NULL) {
 		printf("%s@%d: failed to load cbpf program for \"%s\", error=%d(%s);\n",
 			__func__, __LINE__, str, rte_errno, strerror(rte_errno));
+		test_bpf_dump(&fcode, NULL);
 		goto error;
 	}
 
-	rc = rte_bpf_exec(bpf, mb);
-	/* The return code from bpf capture filter is non-zero if matched */
-	ret = (rc == 0);
+	dummy_mbuf_prep(&mb, tbuf, sizeof(tbuf), plen);
+	dummy_ip_prep(rte_pktmbuf_mtod(&mb, void *), plen);
+
+	rc = rte_bpf_exec(bpf, &mb);
+
+	/* Verify the JIT, when available, produces the same result. */
+	{
+		struct rte_bpf_jit jit;
+
+		rte_bpf_get_jit(bpf, &jit);
+		if (jit.func != NULL) {
+			fflush(stdout);
+			if (jit.func(&mb) != rc) {
+				printf("%s@%d: JIT return code does not match\n",
+				       __func__, __LINE__);
+				goto error;
+			}
+		}
+#ifdef RTE_BPF_JIT_SUPPORTED
+		else {
+			printf("%s@%d: no JIT code generated\n",
+			       __func__, __LINE__);
+			goto error;
+		}
+#endif
+	}
+
+	/* The return code from a bpf capture filter is non-zero if matched. */
+	ret = (rc != 0);
 error:
-	if (bpf)
-		rte_bpf_destroy(bpf);
+	rte_bpf_destroy(bpf);
 	pcap_freecode(&fcode);
 	return ret;
 }
@@ -4828,44 +4904,13 @@ test_bpf_match(pcap_t *pcap, const char *str, struct rte_mbuf *mb,
 static int
 test_bpf_filter_sanity(pcap_t *pcap)
 {
-	static const load_cbpf_program_t cbpf_program_loaders[] = {
-		load_cbpf_program_convert,
-		load_cbpf_program_direct,
-	};
-
-	const uint32_t plen = 100;
-	struct rte_mbuf mb, *m;
-	uint8_t tbuf[RTE_MBUF_DEFAULT_BUF_SIZE];
-	struct {
-		struct rte_ether_hdr eth_hdr;
-		struct rte_ipv4_hdr ip_hdr;
-	} *hdr;
-
-	memset(&mb, 0, sizeof(mb));
-	dummy_mbuf_prep(&mb, tbuf, sizeof(tbuf), plen);
-	m = &mb;
-
-	hdr = rte_pktmbuf_mtod(m, typeof(hdr));
-	hdr->eth_hdr = (struct rte_ether_hdr) {
-		.dst_addr.addr_bytes = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
-		.ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4),
-	};
-	hdr->ip_hdr = (struct rte_ipv4_hdr) {
-		.version_ihl = RTE_IPV4_VHL_DEF,
-		.total_length = rte_cpu_to_be_16(plen),
-		.time_to_live = IPDEFTTL,
-		.next_proto_id = IPPROTO_RAW,
-		.src_addr = rte_cpu_to_be_32(RTE_IPV4_LOOPBACK),
-		.dst_addr = rte_cpu_to_be_32(RTE_IPV4_BROADCAST),
-	};
-
-	for (int li = 0; li != RTE_DIM(cbpf_program_loaders); ++li) {
-		if (test_bpf_match(pcap, "ip", m, cbpf_program_loaders[li]) != 0) {
+	for (unsigned int li = 0; li != RTE_DIM(cbpf_program_loaders); ++li) {
+		if (test_bpf_match(pcap, "ip", cbpf_program_loaders[li]) != 1) {
 			printf("%s@%d: filter \"ip\" doesn't match test data\n",
 			       __func__, __LINE__);
 			return -1;
 		}
-		if (test_bpf_match(pcap, "not ip", m, cbpf_program_loaders[li]) == 0) {
+		if (test_bpf_match(pcap, "not ip", cbpf_program_loaders[li]) != 0) {
 			printf("%s@%d: filter \"not ip\" does match test data\n",
 			       __func__, __LINE__);
 			return -1;
@@ -4889,7 +4934,6 @@ static const char * const sample_filters[] = {
 	"port 53",
 	"host 192.0.2.1 and not (port 80 or port 25)",
 	"host 2001:4b98:db0::8 and not port 80 and not port 25",
-	"port not 53 and not arp",
 	"(tcp[0:2] > 1500 and tcp[0:2] < 1550) or (tcp[2:2] > 1500 and tcp[2:2] < 1550)",
 	"ether proto 0x888e",
 	"ether[0] & 1 = 0 and ip[16] >= 224",
@@ -4916,35 +4960,10 @@ static const char * const sample_filters[] = {
 	"or host 192.0.2.1 or host 192.0.2.100 or host 192.0.2.200"),
 };
 
-static int
-test_bpf_filter(pcap_t *pcap, const char *s, load_cbpf_program_t load_cbpf_program)
-{
-	struct bpf_program fcode;
-	struct rte_bpf *bpf;
-
-	if (pcap_compile(pcap, &fcode, s, 1, PCAP_NETMASK_UNKNOWN)) {
-		printf("%s@%d: pcap_compile(\"%s\") failed: %s;\n",
-		       __func__, __LINE__, s, pcap_geterr(pcap));
-		return -1;
-	}
-
-	bpf = load_cbpf_program(&fcode, s);
-	if (bpf == NULL) {
-		printf("%s@%d: failed to load cbpf program for \"%s\", error=%d(%s);\n",
-			__func__, __LINE__, s, rte_errno, strerror(rte_errno));
-		test_bpf_dump(&fcode, NULL);
-	}
-
-	rte_bpf_destroy(bpf);
-
-	pcap_freecode(&fcode);
-	return (bpf == NULL) ? -1 : 0;
-}
-
 static int
 test_bpf_convert(void)
 {
-	unsigned int i;
+	unsigned int i, li;
 	pcap_t *pcap;
 	int rc;
 
@@ -4956,8 +4975,18 @@ test_bpf_convert(void)
 
 	rc = test_bpf_filter_sanity(pcap);
 	for (i = 0; i < RTE_DIM(sample_filters); i++) {
-		rc |= test_bpf_filter(pcap, sample_filters[i], load_cbpf_program_convert);
-		rc |= test_bpf_filter(pcap, sample_filters[i], load_cbpf_program_direct);
+		for (li = 0; li < RTE_DIM(cbpf_program_loaders); li++) {
+			int m = test_bpf_match(pcap, sample_filters[i],
+					       cbpf_program_loaders[li]);
+
+			/* None of the sample filters match the dummy packet. */
+			if (m != 0) {
+				if (m > 0)
+					printf("%s@%d: filter \"%s\" unexpectedly matched\n",
+					       __func__, __LINE__, sample_filters[i]);
+				rc = -1;
+			}
+		}
 	}
 
 	pcap_close(pcap);
-- 
2.53.0


^ permalink raw reply related

* [PATCH v4 6/7] bpf/arm64: add BPF_ABS/BPF_IND packet load support
From: Stephen Hemminger @ 2026-06-23 23:23 UTC (permalink / raw)
  To: dev
  Cc: Stephen Hemminger, Wathsala Vithanage, Konstantin Ananyev,
	Marat Khalili
In-Reply-To: <20260623232522.257208-1-stephen@networkplumber.org>

The arm64 JIT rejected BPF_LD | BPF_ABS and BPF_LD | BPF_IND with
"invalid opcode", so cBPF programs converted by rte_bpf_convert() could
not be JITed. Add these opcodes, mirroring the x86 JIT: a fast path for
data held in the first mbuf segment, and a __rte_pktmbuf_read() slow
path for everything else.

The forward branches over the call cannot use fixed distances:
emit_call() materializes the helper address with a variable number of
mov/movk instructions, so the block sizes are not known up front. Size
the three blocks (fast path, slow path, common tail) in a dry run, then
emit for real with the branches resolved from the measured offsets.

Programs using these opcodes use the call register layout, since the
slow path makes a function call.

Bugzilla ID: 1427

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 lib/bpf/bpf_jit_arm64.c | 149 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 148 insertions(+), 1 deletion(-)

diff --git a/lib/bpf/bpf_jit_arm64.c b/lib/bpf/bpf_jit_arm64.c
index 776d7c8e97..7b2a1595e8 100644
--- a/lib/bpf/bpf_jit_arm64.c
+++ b/lib/bpf/bpf_jit_arm64.c
@@ -1125,6 +1125,135 @@ emit_branch(struct a64_jit_ctx *ctx, uint8_t op, uint32_t i, int16_t off)
 	emit_b_cond(ctx, ebpf_to_a64_cond(op), jump_offset_get(ctx, i, off));
 }
 
+/* LD_ABS/LD_IND code block offsets (in arm64 instructions) */
+enum {
+	LDMB_FAST_OFS, /* fast path */
+	LDMB_SLOW_OFS, /* slow path */
+	LDMB_FIN_OFS,  /* common tail */
+	LDMB_OFS_NUM
+};
+
+/*
+ * Helper for emit_ld_mbuf(): fast path.
+ * Compute the packet offset; if it lies inside the first segment leave the
+ * data pointer in R0, otherwise branch to the slow path.
+ */
+static void
+emit_ldmb_fast_path(struct a64_jit_ctx *ctx, uint8_t src, uint8_t mode,
+		    uint32_t sz, int32_t imm, const uint32_t ofs[LDMB_OFS_NUM])
+{
+	uint8_t r0 = ebpf_to_a64_reg(ctx, EBPF_REG_0);
+	uint8_t r6 = ebpf_to_a64_reg(ctx, EBPF_REG_6);
+	uint8_t tmp1 = ebpf_to_a64_reg(ctx, TMP_REG_1);
+	uint8_t tmp2 = ebpf_to_a64_reg(ctx, TMP_REG_2);
+	uint8_t tmp3 = ebpf_to_a64_reg(ctx, TMP_REG_3);
+
+	/* off = imm (+ src for BPF_IND) */
+	emit_mov_imm(ctx, 1, tmp1, imm);
+	if (mode == BPF_IND)
+		emit_add(ctx, 1, tmp1, src);
+
+	/* if ((int64_t)(mbuf->data_len - off) < sz) goto slow_path */
+	emit_mov_imm(ctx, 1, tmp2, offsetof(struct rte_mbuf, data_len));
+	emit_ldr(ctx, BPF_H, tmp2, r6, tmp2);
+	emit_sub(ctx, 1, tmp2, tmp1);
+	emit_mov_imm(ctx, 1, tmp3, sz);
+	emit_cmp(ctx, 1, tmp2, tmp3);
+	emit_b_cond(ctx, A64_LT, (int32_t)(ofs[LDMB_SLOW_OFS] - ctx->idx));
+
+	/* R0 = mbuf->buf_addr + mbuf->data_off + off */
+	emit_mov_imm(ctx, 1, tmp2, offsetof(struct rte_mbuf, data_off));
+	emit_ldr(ctx, BPF_H, tmp2, r6, tmp2);
+	emit_mov_imm(ctx, 1, r0, offsetof(struct rte_mbuf, buf_addr));
+	emit_ldr(ctx, EBPF_DW, r0, r6, r0);
+	emit_add(ctx, 1, r0, tmp2);
+	emit_add(ctx, 1, r0, tmp1);
+
+	emit_b(ctx, (int32_t)(ofs[LDMB_FIN_OFS] - ctx->idx));
+}
+
+/*
+ * Helper for emit_ld_mbuf(): slow path.
+ * R0 = __rte_pktmbuf_read(mbuf, off, sz, buf); return 0 if NULL.
+ * The scratch buffer is the space reserved by __rte_bpf_validate() at the
+ * bottom of the eBPF stack frame, i.e. (frame_pointer - stack_ofs).
+ */
+static void
+emit_ldmb_slow_path(struct a64_jit_ctx *ctx, uint32_t sz, uint32_t stack_ofs)
+{
+	uint8_t r0 = ebpf_to_a64_reg(ctx, EBPF_REG_0);
+	uint8_t r6 = ebpf_to_a64_reg(ctx, EBPF_REG_6);
+	uint8_t fp = ebpf_to_a64_reg(ctx, EBPF_FP);
+	uint8_t tmp1 = ebpf_to_a64_reg(ctx, TMP_REG_1);
+
+	/* arguments of __rte_pktmbuf_read(mbuf, off, len, buf) */
+	emit_mov_64(ctx, A64_R(1), tmp1);		/* off (held in tmp1) */
+	emit_mov_64(ctx, A64_R(0), r6);			/* mbuf */
+	emit_mov_imm(ctx, 0, A64_R(2), sz);		/* len */
+	emit_sub_imm_64(ctx, A64_R(3), fp, stack_ofs);	/* buf */
+
+	emit_call(ctx, tmp1, (void *)(uintptr_t)__rte_pktmbuf_read);
+	emit_return_zero_if_src_zero(ctx, 1, r0);
+}
+
+/*
+ * Helper for emit_ld_mbuf(): common tail.
+ * Load the value pointed to by R0 and convert from network byte order.
+ */
+static void
+emit_ldmb_fin(struct a64_jit_ctx *ctx, uint8_t opsz, uint32_t sz)
+{
+	uint8_t r0 = ebpf_to_a64_reg(ctx, EBPF_REG_0);
+
+	emit_ldr(ctx, opsz, r0, r0, A64_ZR);
+	if (opsz != BPF_B)
+		emit_be(ctx, r0, sz * 8);
+}
+
+/*
+ * emit code for BPF_ABS/BPF_IND load.
+ * generates the following construction:
+ * fast_path:
+ *   off = src + imm
+ *   if (mbuf->data_len - off < sz)
+ *      goto slow_path;
+ *   ptr = mbuf->buf_addr + mbuf->data_off + off;
+ *   goto fin_part;
+ * slow_path:
+ *   typeof(sz) buf;  // scratch space reserved on the eBPF stack
+ *   ptr = __rte_pktmbuf_read(mbuf, off, sz, &buf);
+ *   if (ptr == NULL)
+ *      return 0;
+ * fin_part:
+ *   res = *(typeof(sz))ptr;
+ *   res = ntoh(res);
+ */
+static void
+emit_ld_mbuf(struct a64_jit_ctx *ctx, uint8_t op, uint8_t src, int32_t imm,
+	     uint32_t stack_ofs)
+{
+	uint8_t mode = BPF_MODE(op);
+	uint8_t opsz = BPF_SIZE(op);
+	uint32_t sz = bpf_size(opsz);
+	uint32_t ofs[LDMB_OFS_NUM];
+
+	/* seed offsets so the dry-run branches stay in range */
+	ofs[LDMB_FAST_OFS] = ofs[LDMB_SLOW_OFS] = ofs[LDMB_FIN_OFS] = ctx->idx;
+
+	/* dry run to record block offsets */
+	emit_ldmb_fast_path(ctx, src, mode, sz, imm, ofs);
+	ofs[LDMB_SLOW_OFS] = ctx->idx;
+	emit_ldmb_slow_path(ctx, sz, stack_ofs);
+	ofs[LDMB_FIN_OFS] = ctx->idx;
+	emit_ldmb_fin(ctx, opsz, sz);
+
+	/* rewind and emit for real with resolved offsets */
+	ctx->idx = ofs[LDMB_FAST_OFS];
+	emit_ldmb_fast_path(ctx, src, mode, sz, imm, ofs);
+	emit_ldmb_slow_path(ctx, sz, stack_ofs);
+	emit_ldmb_fin(ctx, opsz, sz);
+}
+
 static void
 check_program_has_call(struct a64_jit_ctx *ctx, struct rte_bpf *bpf)
 {
@@ -1137,8 +1266,17 @@ check_program_has_call(struct a64_jit_ctx *ctx, struct rte_bpf *bpf)
 		op = ins->code;
 
 		switch (op) {
-		/* Call imm */
+		/*
+		 * BPF_ABS/BPF_IND can fall through to __rte_pktmbuf_read(),
+		 * so they need the call-clobbered register layout as well.
+		 */
 		case (BPF_JMP | EBPF_CALL):
+		case (BPF_LD | BPF_ABS | BPF_B):
+		case (BPF_LD | BPF_ABS | BPF_H):
+		case (BPF_LD | BPF_ABS | BPF_W):
+		case (BPF_LD | BPF_IND | BPF_B):
+		case (BPF_LD | BPF_IND | BPF_H):
+		case (BPF_LD | BPF_IND | BPF_W):
 			ctx->foundcall = 1;
 			return;
 		}
@@ -1340,6 +1478,15 @@ emit(struct a64_jit_ctx *ctx, struct rte_bpf *bpf)
 			emit_mov_imm(ctx, 1, dst, u64);
 			i++;
 			break;
+		/* R0 = ntoh(*(size *)(mbuf data + (src) + imm)) */
+		case (BPF_LD | BPF_ABS | BPF_B):
+		case (BPF_LD | BPF_ABS | BPF_H):
+		case (BPF_LD | BPF_ABS | BPF_W):
+		case (BPF_LD | BPF_IND | BPF_B):
+		case (BPF_LD | BPF_IND | BPF_H):
+		case (BPF_LD | BPF_IND | BPF_W):
+			emit_ld_mbuf(ctx, op, src, imm, bpf->stack_sz);
+			break;
 		/* *(size *)(dst + off) = src */
 		case (BPF_STX | BPF_MEM | BPF_B):
 		case (BPF_STX | BPF_MEM | BPF_H):
-- 
2.53.0


^ permalink raw reply related

* [PATCH v4 5/7] test/bpf: check that JIT was generated
From: Stephen Hemminger @ 2026-06-23 23:23 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, Marat Khalili, Konstantin Ananyev
In-Reply-To: <20260623232522.257208-1-stephen@networkplumber.org>

Avoid silently ignoring JIT failures. The test cases should
all succeed JIT compilation; if not it is a bug in the JIT
implementation and should be reported.

Introduce a configuration setting RTE_BPF_JIT_SUPPORTED
which is cleaner that adding ARCH specific #ifdef.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Acked-by: Marat Khalili <marat.khalili@huawei.com>
---
 app/test/test_bpf.c | 8 ++++++++
 lib/bpf/meson.build | 2 ++
 2 files changed, 10 insertions(+)

diff --git a/app/test/test_bpf.c b/app/test/test_bpf.c
index b54e36910b..9adffcce64 100644
--- a/app/test/test_bpf.c
+++ b/app/test/test_bpf.c
@@ -3656,6 +3656,14 @@ run_test(const struct bpf_test *tst)
 				rv, strerror(rv));
 		}
 	}
+#ifdef RTE_BPF_JIT_SUPPORTED
+	else {
+		/* a JIT backend exists for this arch, so it must compile */
+		printf("%s@%d: %s: no JIT code generated;\n",
+			__func__, __LINE__, tst->name);
+		ret = -1;
+	}
+#endif
 
 	rte_bpf_destroy(bpf);
 	return ret;
diff --git a/lib/bpf/meson.build b/lib/bpf/meson.build
index 7e8a300e3f..04ede96689 100644
--- a/lib/bpf/meson.build
+++ b/lib/bpf/meson.build
@@ -27,8 +27,10 @@ sources = files(
 )
 
 if arch_subdir == 'x86' and dpdk_conf.get('RTE_ARCH_64')
+    dpdk_conf.set('RTE_BPF_JIT_SUPPORTED', 1)
     sources += files('bpf_jit_x86.c')
 elif dpdk_conf.has('RTE_ARCH_ARM64')
+    dpdk_conf.set('RTE_BPF_JIT_SUPPORTED', 1)
     sources += files('bpf_jit_arm64.c')
 endif
 
-- 
2.53.0


^ permalink raw reply related

* [PATCH v4 4/7] bpf/arm64: fix offset type to allow a negative jump
From: Stephen Hemminger @ 2026-06-23 23:23 UTC (permalink / raw)
  To: dev
  Cc: Christophe Fontaine, stable, Stephen Hemminger,
	Wathsala Vithanage, Konstantin Ananyev, Marat Khalili,
	Jerin Jacob
In-Reply-To: <20260623232522.257208-1-stephen@networkplumber.org>

From: Christophe Fontaine <cfontain@redhat.com>

The DPDK BPF JIT standalone test test_ld_mbuf1 fails on arm64.
It does:
	r6 = r1                    // mbuf
	r0 = *(u8 *)pkt[0]         // BPF_ABS
	if ((r0 & 0xf0) == 0x40)
		goto parse
	r0 = 0
	exit                       // epilogue E0
parse:
	r0 = *(u8 *)pkt[r0 + 3]    // BPF_IND
	...
	exit

emit_return_zero_if_src_zero() returns 0 by branching to a function
epilogue. The target maybe a previous epilogue so branch
might be backwards; therefore the offset needs to be negative.

The offset was stored in a uint16_t, so a negative value wrapped to a
large positive number; emit_b() then branched past the end of the
program and faulted at run time.

Fixes: 111e2a747a4f ("bpf/arm: add basic arithmetic operations")
Cc: stable@dpdk.org

Signed-off-by: Christophe Fontaine <cfontain@redhat.com>
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 lib/bpf/bpf_jit_arm64.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/lib/bpf/bpf_jit_arm64.c b/lib/bpf/bpf_jit_arm64.c
index ba7ae4d680..776d7c8e97 100644
--- a/lib/bpf/bpf_jit_arm64.c
+++ b/lib/bpf/bpf_jit_arm64.c
@@ -957,10 +957,12 @@ static void
 emit_return_zero_if_src_zero(struct a64_jit_ctx *ctx, bool is64, uint8_t src)
 {
 	uint8_t r0 = ebpf_to_a64_reg(ctx, EBPF_REG_0);
-	uint16_t jump_to_epilogue;
+	int32_t jump_to_epilogue;
 
 	emit_cbnz(ctx, is64, src, 3);
 	emit_mov_imm(ctx, is64, r0, 0);
+
+	/* maybe backwards branch to earlier epilogue */
 	jump_to_epilogue = (ctx->program_start + ctx->program_sz) - ctx->idx;
 	emit_b(ctx, jump_to_epilogue);
 }
-- 
2.53.0


^ permalink raw reply related

* [PATCH v4 3/7] test/bpf: add test for large shift
From: Stephen Hemminger @ 2026-06-23 23:23 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, Konstantin Ananyev, Marat Khalili
In-Reply-To: <20260623232522.257208-1-stephen@networkplumber.org>

The JIT compiler had issues with immediate values on shift instructions
so add a new test to cover that case.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 app/test/test_bpf.c | 66 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 66 insertions(+)

diff --git a/app/test/test_bpf.c b/app/test/test_bpf.c
index 232e9e2a98..b54e36910b 100644
--- a/app/test/test_bpf.c
+++ b/app/test/test_bpf.c
@@ -2005,6 +2005,58 @@ test_div1_check(uint64_t rc, const void *arg)
 	return cmp_res(__func__, 0, rc, dve.out, dvt->out, sizeof(dve.out));
 }
 
+/*
+ * Shift by an immediate that doesn't fit in a signed byte: the C1 shift
+ * group takes a fixed 1-byte immediate, but imm_size() returns 4 for
+ * counts >= 128, so the x86 JIT emits 3 stray bytes and desyncs the
+ * instruction stream. The shift results are discarded (a count >= 64 is
+ * UB in the interpreter); the test returns a known constant, which the
+ * corrupted stream fails to produce.
+ */
+static const struct ebpf_insn test_shift_big_imm_prog[] = {
+	{
+		.code = (BPF_ALU | EBPF_MOV | BPF_K),
+		.dst_reg = EBPF_REG_2,
+		.imm = 0x1,
+	},
+	{
+		.code = (EBPF_ALU64 | BPF_LSH | BPF_K),
+		.dst_reg = EBPF_REG_2,
+		.imm = 137,
+	},
+	{
+		.code = (EBPF_ALU64 | BPF_RSH | BPF_K),
+		.dst_reg = EBPF_REG_2,
+		.imm = 200,
+	},
+	{
+		.code = (EBPF_ALU64 | EBPF_ARSH | BPF_K),
+		.dst_reg = EBPF_REG_2,
+		.imm = 255,
+	},
+	/* known result; a desynced stream won't reproduce it */
+	{
+		.code = (BPF_ALU | EBPF_MOV | BPF_K),
+		.dst_reg = EBPF_REG_0,
+		.imm = 0x55,
+	},
+	{
+		.code = (BPF_JMP | EBPF_EXIT),
+	},
+};
+
+static void
+test_shift_big_imm_prepare(void *arg)
+{
+	memset(arg, 0, sizeof(struct dummy_offset));
+}
+
+static int
+test_shift_big_imm_check(uint64_t rc, const void *arg)
+{
+	return cmp_res(__func__, 0x55, rc, arg, arg, 0);
+}
+
 /* call test-cases */
 static const struct ebpf_insn test_call1_prog[] = {
 
@@ -3409,6 +3461,20 @@ static const struct bpf_test tests[] = {
 		.prepare = test_mul1_prepare,
 		.check_result = test_div1_check,
 	},
+	{
+		.name = "test_shift_big_imm",
+		.arg_sz = sizeof(struct dummy_offset),
+		.prm = {
+			.ins = test_shift_big_imm_prog,
+			.nb_ins = RTE_DIM(test_shift_big_imm_prog),
+			.prog_arg = {
+				.type = RTE_BPF_ARG_PTR,
+				.size = sizeof(struct dummy_offset),
+			},
+		},
+		.prepare = test_shift_big_imm_prepare,
+		.check_result = test_shift_big_imm_check,
+	},
 	{
 		.name = "test_call1",
 		.arg_sz = sizeof(struct dummy_offset),
-- 
2.53.0


^ permalink raw reply related

* [PATCH v4 2/7] test/bpf: add JSET test with small immediate
From: Stephen Hemminger @ 2026-06-23 23:23 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, Marat Khalili, Konstantin Ananyev
In-Reply-To: <20260623232522.257208-1-stephen@networkplumber.org>

The existing jump test only used a 32-bit JSET mask,
so the broken imm8 encoding of TEST in the x86 JIT was never exercised.
Add a case with a byte-sized mask;
run_test() runs it through the interpreter and the JIT.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Acked-by: Marat Khalili <marat.khalili@huawei.com>
---
 app/test/test_bpf.c | 82 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 82 insertions(+)

diff --git a/app/test/test_bpf.c b/app/test/test_bpf.c
index 6b07e72295..232e9e2a98 100644
--- a/app/test/test_bpf.c
+++ b/app/test/test_bpf.c
@@ -3158,7 +3158,89 @@ static const struct ebpf_insn test_ld_mbuf3_prog[] = {
 };
 
 /* all bpf test cases */
+/*
+ * JSET with a byte-sized mask: exercises the imm8 path of the TEST
+ * encoding in the x86 JIT (a 32-bit mask takes a different path).
+ */
+static const struct ebpf_insn test_jset1_prog[] = {
+	{
+		.code = (BPF_ALU | EBPF_MOV | BPF_K),
+		.dst_reg = EBPF_REG_0,
+		.imm = 0,
+	},
+	{
+		.code = (BPF_LDX | BPF_MEM | BPF_B),
+		.dst_reg = EBPF_REG_2,
+		.src_reg = EBPF_REG_1,
+		.off = offsetof(struct dummy_offset, u8),
+	},
+	/* bit 0 is set in the input: branch is taken */
+	{
+		.code = (BPF_JMP | BPF_JSET | BPF_K),
+		.dst_reg = EBPF_REG_2,
+		.imm = 0x1,
+		.off = 1,
+	},
+	{
+		.code = (BPF_JMP | BPF_JA),
+		.off = 1,
+	},
+	{
+		.code = (EBPF_ALU64 | BPF_OR | BPF_K),
+		.dst_reg = EBPF_REG_0,
+		.imm = 0x1,
+	},
+	/* bit 1 is clear in the input: branch is not taken */
+	{
+		.code = (BPF_JMP | BPF_JSET | BPF_K),
+		.dst_reg = EBPF_REG_2,
+		.imm = 0x2,
+		.off = 1,
+	},
+	{
+		.code = (BPF_JMP | BPF_JA),
+		.off = 1,
+	},
+	{
+		.code = (EBPF_ALU64 | BPF_OR | BPF_K),
+		.dst_reg = EBPF_REG_0,
+		.imm = 0x2,
+	},
+	{
+		.code = (BPF_JMP | EBPF_EXIT),
+	},
+};
+
+static void
+test_jset1_prepare(void *arg)
+{
+	struct dummy_offset *df = arg;
+
+	memset(df, 0, sizeof(*df));
+	df->u8 = 0x1;	/* bit 0 set, bit 1 clear */
+}
+
+static int
+test_jset1_check(uint64_t rc, const void *arg)
+{
+	return cmp_res(__func__, 0x1, rc, arg, arg, 0);
+}
+
 static const struct bpf_test tests[] = {
+	{
+		.name = "test_jset1",
+		.arg_sz = sizeof(struct dummy_offset),
+		.prm = {
+			.ins = test_jset1_prog,
+			.nb_ins = RTE_DIM(test_jset1_prog),
+			.prog_arg = {
+				.type = RTE_BPF_ARG_PTR,
+				.size = sizeof(struct dummy_offset),
+			},
+		},
+		.prepare = test_jset1_prepare,
+		.check_result = test_jset1_check,
+	},
 	{
 		.name = "test_store1",
 		.arg_sz = sizeof(struct dummy_offset),
-- 
2.53.0


^ permalink raw reply related

* [PATCH v4 1/7] bpf/x86: fix JIT encoding of fixed-width immediates
From: Stephen Hemminger @ 2026-06-23 23:23 UTC (permalink / raw)
  To: dev
  Cc: Stephen Hemminger, stable, Marat Khalili, Konstantin Ananyev,
	Ferruh Yigit
In-Reply-To: <20260623232522.257208-1-stephen@networkplumber.org>

Several places in the x86 JIT size an immediate with imm_size(), which
returns 1 or 4 bytes depending on the value. That is wrong for opcodes
whose immediate width is fixed by the encoding, and it breaks in both
directions.

TEST (0xF7 /0, used for BPF_JSET) has no imm8 form; the immediate is
always 32 bits. For a small mask such as BPF_JSET | BPF_K #0x1,
imm_size() returns 1, so the JIT emits a 1-byte immediate. The CPU
still consumes 4, swallowing 3 bytes of the following Jcc. The
instruction stream desyncs and the program crashes.

ROR and the shifts (0xC1 group) have the opposite problem: their
immediate is always imm8. For a count >= 128, imm_size() returns 4 and
the JIT emits 3 stray bytes, again desyncing the stream.

Size each immediate by its encoding: 32 bits for TEST, 8 bits for ROR
and the shifts.

Bugzilla ID: 1959
Fixes: cc752e43e079 ("bpf: add JIT compilation for x86_64 ISA")
Cc: stable@dpdk.org

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Acked-by: Marat Khalili <marat.khalili@huawei.com>
---
 lib/bpf/bpf_jit_x86.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/bpf/bpf_jit_x86.c b/lib/bpf/bpf_jit_x86.c
index 54eb279643..912d3f69bc 100644
--- a/lib/bpf/bpf_jit_x86.c
+++ b/lib/bpf/bpf_jit_x86.c
@@ -300,7 +300,7 @@ emit_ror_imm(struct bpf_jit_state *st, uint32_t dreg, uint32_t imm)
 	emit_rex(st, BPF_ALU, 0, dreg);
 	emit_bytes(st, &ops, sizeof(ops));
 	emit_modregrm(st, MOD_DIRECT, mods, dreg);
-	emit_imm(st, imm, imm_size(imm));
+	emit_imm(st, imm, sizeof(uint8_t));
 }
 
 /*
@@ -441,7 +441,7 @@ emit_shift_imm(struct bpf_jit_state *st, uint32_t op, uint32_t dreg,
 	uint32_t imm)
 {
 	emit_shift(st, op, dreg);
-	emit_imm(st, imm, imm_size(imm));
+	emit_imm(st, imm, sizeof(uint8_t));
 }
 
 /*
@@ -921,7 +921,7 @@ emit_tst_imm(struct bpf_jit_state *st, uint32_t op, uint32_t dreg, uint32_t imm)
 	emit_rex(st, op, 0, dreg);
 	emit_bytes(st, &ops, sizeof(ops));
 	emit_modregrm(st, MOD_DIRECT, mods, dreg);
-	emit_imm(st, imm, imm_size(imm));
+	emit_imm(st, imm, sizeof(int32_t));
 }
 
 static void
-- 
2.53.0


^ permalink raw reply related

* [PATCH v4 0/7] bpf: JIT related bug fixes
From: Stephen Hemminger @ 2026-06-23 23:23 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger
In-Reply-To: <20260608203322.1116296-1-stephen@networkplumber.org>

While implementing JIT for packet capture ran into several issues:
  1. x86 JIT had pre-existing bug which would crash
  2. ARM64 BPF JIT was missing instructions for packet access.
     Which had been discovered previously [1]
  3. Tests related to JIT were not being run or missing coverage.

Fixed all of these. Patches are ordered so that most urgent fix
is first, followed by the test that should have caught the problem.

The arm64 epilogue branch fix (patch 4) was originally posted by
Christophe Fontaine [1]; that series stalled, so it is carried here
with his authorship.

Changes since v3:
 - incorporate review feedback
 - rebase to current main
 - extend the x86 fix to all fixed-width immediates, not just JSET:
   TEST is always imm32, while ROR and the shift group are always
   imm8; patch 1 retitled to match
 - add a regression test for a large shift count (patch 3)

Changes since v2:
 - found more places where the x86 JIT emitted invalid opcodes for
   fixed-width immediates

Changes since v1:
 - add the x86 JSET encoding fix and its regression test, found once
   the convert test ran generated code through the JIT
 - carry Christophe's arm64 epilogue fix with his sign-off
 - convert test now runs the converted filters through the JIT, not
   just loading them
 - kept Marat's ack on the "check JIT was generated" patch; dropped it
   on the convert test since that changed substantially

[1] https://inbox.dpdk.org/dev/20260319114500.9757-2-cfontain@redhat.com/

Christophe Fontaine (1):
  bpf/arm64: fix offset type to allow a negative jump

Stephen Hemminger (6):
  bpf/x86: fix JIT encoding of fixed-width immediates
  test/bpf: add JSET test with small immediate
  test/bpf: add test for large shift
  test/bpf: check that JIT was generated
  bpf/arm64: add BPF_ABS/BPF_IND packet load support
  test/bpf: check that bpf_convert can be JIT'd

 app/test/test_bpf.c     | 327 +++++++++++++++++++++++++++++++---------
 lib/bpf/bpf_jit_arm64.c | 153 ++++++++++++++++++-
 lib/bpf/bpf_jit_x86.c   |   6 +-
 lib/bpf/meson.build     |   2 +
 4 files changed, 412 insertions(+), 76 deletions(-)

-- 
2.53.0


^ permalink raw reply

* Re: [PATCH v1 1/2] dts: update parsing for cryptodev latency
From: Patrick Robb @ 2026-06-23 22:44 UTC (permalink / raw)
  To: Andrew Bailey; +Cc: luca.vizzarro, dev, lylavoie, ahassick, knimoji
In-Reply-To: <20260513152715.133381-1-abailey@iol.unh.edu>

[-- Attachment #1: Type: text/plain, Size: 54 bytes --]

Reviewed-by: Patrick Robb <patrickrobb1997@gmail.com>

[-- Attachment #2: Type: text/html, Size: 128 bytes --]

^ permalink raw reply

* Re: [PATCH v2] dts: update dts check format script and resolve errors
From: Patrick Robb @ 2026-06-23 22:34 UTC (permalink / raw)
  To: Koushik Bhargav Nimoji
  Cc: luca.vizzarro, dev, abailey, ahassick, lylavoie,
	NBU-Contact-Thomas Monjalon
In-Reply-To: <20260618204525.1010218-1-knimoji@iol.unh.edu>

[-- Attachment #1: Type: text/plain, Size: 1255 bytes --]

Looks like your patch is failing some of the checks on patchwork, including
doc build: https://github.com/ovsrobot/dpdk/actions/runs/27789008643

Remember to run a doc build locally before sending any patches:

meson setup build
ninja -C build doc

Otherwise, please provide a little more info regarding your info. So, you
have updated some of the dependencies used in the dts check format script.
I think what I see from a quick look that is relevant is:

-mypy = "^1.13.0"
+mypy = "^2.1.0"
 toml = "^0.10.2"
-ruff = "^0.8.1"
-types-paramiko = "^3.5.0.20240928"
+ruff = "^0.15.16"
+types-paramiko = "^4.0.0.20260518"
 types-invoke = "^2.0.0.10"
-types-pyyaml = "^6.0.12.20240917"
+types-pyyaml = "^6.0.12.20260518"

What is being done broadly? All dependencies covered by poetry are being
updated? or just the subset included in format checks? Are dependencies
being brought to current latest or something different?

I remember Thomas mentioning that DTS was not checking the
dts-check-format.sh at DPDK Summit and that confused me. Perhaps he is
running DTS and dts-check-format.sh outside of poetry (which we have said
is okay to do) and he is on newer versions of the formatting dependencies
than what we currently have committed to the poetry.lock.

[-- Attachment #2: Type: text/html, Size: 1633 bytes --]

^ permalink raw reply

* Re: [PATCH] net/af_xdp: add Rx metadata and dynamic timestamping support
From: Stephen Hemminger @ 2026-06-23 22:06 UTC (permalink / raw)
  To: Mark Blasko
  Cc: dev, Ciara Loftus, Maryam Tahhan, Joshua Washington,
	Jasper Tran O'Leary
In-Reply-To: <20260623215325.814776-1-blasko@google.com>

On Tue, 23 Jun 2026 21:53:24 +0000
Mark Blasko <blasko@google.com> wrote:

> +		if (rxq->rx_timestamp_enabled &&
> +		    timestamp_dynfield_offset >= 0) {
> +			struct af_xdp_rx_metadata *meta;
> +
> +			meta = (struct af_xdp_rx_metadata *)
> +				((char *)rte_pktmbuf_mtod(bufs[i], void *) -
> +				 sizeof(struct af_xdp_rx_metadata));
> +			*RTE_MBUF_DYNFIELD(bufs[i],
> +					   timestamp_dynfield_offset,
> +					   uint64_t *) = meta->rx_timestamp;
> +			bufs[i]->ol_flags |= timestamp_dynflag;
> +		}
> +

Why does XDP time stamp need to be different than how other drivers
already do timestamps. See AF_PACKET and TAP device?

Should not be driver specific here.

^ permalink raw reply

* [PATCH] net/af_xdp: add Rx metadata and dynamic timestamping support
From: Mark Blasko @ 2026-06-23 21:53 UTC (permalink / raw)
  To: dev, Ciara Loftus, Maryam Tahhan
  Cc: Mark Blasko, Joshua Washington, Jasper Tran O'Leary

Enable dynamic RX timestamping in the AF_XDP Poll Mode Driver.
This extracts the ingress timestamp prepended to the packet
headroom by the XDP program and populates it in the mbuf.

Signed-off-by: Mark Blasko <blasko@google.com>
Reviewed-by: Joshua Washington <joshwash@google.com>
Reviewed-by: Jasper Tran O'Leary <jtranoleary@google.com>
---
 doc/guides/rel_notes/release_26_07.rst |  5 +++
 drivers/net/af_xdp/rte_eth_af_xdp.c    | 56 +++++++++++++++++++++++++-
 2 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/doc/guides/rel_notes/release_26_07.rst b/doc/guides/rel_notes/release_26_07.rst
index 6eba91a5e9..727442258f 100644
--- a/doc/guides/rel_notes/release_26_07.rst
+++ b/doc/guides/rel_notes/release_26_07.rst
@@ -63,6 +63,11 @@ New Features
     ``rte_eal_init`` and the application is responsible for probing each device,
   * ``--auto-probing`` enables the initial bus probing, which is the current default behavior.
 
+* **Updated AF_XDP ethernet driver.**
+
+  * Added support for dynamic RX metadata and timestamping offload
+    (``RTE_ETH_RX_OFFLOAD_TIMESTAMP``).
+
 * **Added LinkData sxe2 ethernet driver.**
 
   Added network driver for the LinkData network adapters.
diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c
index 2cdb533276..c90e232d57 100644
--- a/drivers/net/af_xdp/rte_eth_af_xdp.c
+++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
@@ -62,6 +62,13 @@
 #define PF_XDP AF_XDP
 #endif
 
+struct af_xdp_rx_metadata {
+	uint64_t rx_timestamp;
+};
+
+static int timestamp_dynfield_offset = -1;
+static uint64_t timestamp_dynflag;
+
 RTE_LOG_REGISTER_DEFAULT(af_xdp_logtype, NOTICE);
 #define RTE_LOGTYPE_NET_AF_XDP af_xdp_logtype
 
@@ -144,6 +151,7 @@ struct pkt_rx_queue {
 	struct pollfd fds[1];
 	int xsk_queue_idx;
 	int busy_budget;
+	bool rx_timestamp_enabled;
 };
 
 struct tx_stats {
@@ -398,6 +406,20 @@ af_xdp_rx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 		rte_pktmbuf_pkt_len(bufs[i]) = len;
 		rte_pktmbuf_data_len(bufs[i]) = len;
+
+		if (rxq->rx_timestamp_enabled &&
+		    timestamp_dynfield_offset >= 0) {
+			struct af_xdp_rx_metadata *meta;
+
+			meta = (struct af_xdp_rx_metadata *)
+				((char *)rte_pktmbuf_mtod(bufs[i], void *) -
+				 sizeof(struct af_xdp_rx_metadata));
+			*RTE_MBUF_DYNFIELD(bufs[i],
+					   timestamp_dynfield_offset,
+					   uint64_t *) = meta->rx_timestamp;
+			bufs[i]->ol_flags |= timestamp_dynflag;
+		}
+
 		rx_bytes += len;
 	}
 
@@ -457,6 +479,18 @@ af_xdp_rx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		len = desc->len;
 		pkt = xsk_umem__get_data(rxq->umem->mz->addr, addr);
 
+		if (rxq->rx_timestamp_enabled &&
+		    timestamp_dynfield_offset >= 0) {
+			struct af_xdp_rx_metadata *meta;
+
+			meta = (struct af_xdp_rx_metadata *)((char *)pkt -
+				sizeof(struct af_xdp_rx_metadata));
+			*RTE_MBUF_DYNFIELD(mbufs[i],
+					   timestamp_dynfield_offset,
+					   uint64_t *) = meta->rx_timestamp;
+			mbufs[i]->ol_flags |= timestamp_dynflag;
+		}
+
 		rte_memcpy(rte_pktmbuf_mtod(mbufs[i], void *), pkt, len);
 		rte_ring_enqueue(umem->buf_ring, (void *)addr);
 		rte_pktmbuf_pkt_len(mbufs[i]) = len;
@@ -743,6 +777,23 @@ eth_dev_start(struct rte_eth_dev *dev)
 {
 	uint16_t i;
 
+	if (dev->data->dev_conf.rxmode.offloads &
+	    RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
+		int rc;
+
+		rc = rte_mbuf_dyn_rx_timestamp_register(
+				&timestamp_dynfield_offset,
+				&timestamp_dynflag);
+		if (rc) {
+			AF_XDP_LOG_LINE(ERR,
+				"Failed to register mbuf timestamp field");
+			return rc;
+		}
+		AF_XDP_LOG_LINE(INFO,
+			"Registered mbuf timestamp field, offset: %d",
+			timestamp_dynfield_offset);
+	}
+
 	dev->data->dev_link.link_status = RTE_ETH_LINK_UP;
 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
 		dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED;
@@ -870,6 +921,8 @@ eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 	dev_info->max_rx_queues = internals->queue_cnt;
 	dev_info->max_tx_queues = internals->queue_cnt;
 
+	dev_info->rx_offload_capa = RTE_ETH_RX_OFFLOAD_TIMESTAMP;
+
 	dev_info->min_mtu = RTE_ETHER_MIN_MTU;
 #if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
 	dev_info->max_rx_pktlen = getpagesize() -
@@ -1873,7 +1926,8 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
 	process_private->rxq_xsk_fds[rx_queue_id] = rxq->fds[0].fd;
 
 	rxq->port = dev->data->port_id;
-
+	rxq->rx_timestamp_enabled = !!(dev->data->dev_conf.rxmode.offloads &
+					RTE_ETH_RX_OFFLOAD_TIMESTAMP);
 	dev->data->rx_queues[rx_queue_id] = rxq;
 	return 0;
 
-- 
2.55.0.rc0.799.gd6f94ed593-goog


^ permalink raw reply related

* RE: [EXTERNAL] [PATCH v3 11/11] bus/vmbus: support unplug
From: Long Li @ 2026-06-23 21:30 UTC (permalink / raw)
  To: David Marchand, dev@dpdk.org
  Cc: thomas@monjalon.net, stephen@networkplumber.org,
	bruce.richardson@intel.com, fengchengwen@huawei.com,
	hemant.agrawal@nxp.com, Wei Hu
In-Reply-To: <20260623105439.2144694-12-david.marchand@redhat.com>

> Add .unplug callback to handle driver removal, device unmapping, and
> interrupt cleanup. This enables use of the generic bus cleanup helper.
> 
> The cleanup function was already performing these operations, so it seems
> safe to expose them through the unplug operation.
> 
> Signed-off-by: David Marchand <david.marchand@redhat.com>

Reviewed-by: Long Li <longli@microsoft.com>


> ---
>  doc/guides/rel_notes/release_26_07.rst |  4 +++
>  drivers/bus/vmbus/vmbus_common.c       | 41 ++++++++++++--------------
>  2 files changed, 23 insertions(+), 22 deletions(-)
> 
> diff --git a/doc/guides/rel_notes/release_26_07.rst
> b/doc/guides/rel_notes/release_26_07.rst
> index 5d7aa8d1bf..55d3b44527 100644
> --- a/doc/guides/rel_notes/release_26_07.rst
> +++ b/doc/guides/rel_notes/release_26_07.rst
> @@ -114,6 +114,10 @@ New Features
> 
>    Added no-IOMMU mode for devices without or not enabling IOMMU/SVA.
> 
> +* **Added unplug operation support to VMBUS bus.**
> +
> +  Implemented device unplug operation to allow runtime removal of VMBUS
> devices.
> +
>  * **Added selective Rx in ethdev API.**
> 
>    Some parts of packets may be discarded in Rx diff --git
> a/drivers/bus/vmbus/vmbus_common.c
> b/drivers/bus/vmbus/vmbus_common.c
> index a6e3a24a7c..cd6e851e4c 100644
> --- a/drivers/bus/vmbus/vmbus_common.c
> +++ b/drivers/bus/vmbus/vmbus_common.c
> @@ -144,34 +144,29 @@ rte_vmbus_probe(void)  }
> 
>  static int
> -rte_vmbus_cleanup(struct rte_bus *bus)
> +vmbus_unplug_device(struct rte_device *rte_dev)
>  {
> -	struct rte_vmbus_device *dev;
> -	int error = 0;
> -
> -	RTE_BUS_FOREACH_DEV(dev, bus) {
> -		const struct rte_vmbus_driver *drv;
> -		int ret;
> -
> -		if (!rte_dev_is_probed(&dev->device))
> -			continue;
> -		drv = RTE_BUS_DRIVER(dev->device.driver, *drv);
> -		if (drv->remove == NULL)
> -			continue;
> +	const struct rte_vmbus_driver *drv = RTE_BUS_DRIVER(rte_dev-
> >driver, *drv);
> +	struct rte_vmbus_device *dev = RTE_BUS_DEVICE(rte_dev, *dev);
> +	int ret = 0;
> 
> +	if (drv->remove != NULL) {
>  		ret = drv->remove(dev);
>  		if (ret < 0)
> -			error = -1;
> +			return ret;
> +	}
> 
> -		rte_vmbus_unmap_device(dev);
> -		rte_intr_instance_free(dev->intr_handle);
> +	rte_vmbus_unmap_device(dev);
> +	rte_intr_instance_free(dev->intr_handle);
> +	dev->intr_handle = NULL;
> 
> -		dev->device.driver = NULL;
> -		rte_bus_remove_device(bus, &dev->device);
> -		free(dev);
> -	}
> +	return 0;
> +}
> 
> -	return error;
> +static void
> +vmbus_free_device(struct rte_device *dev) {
> +	free(RTE_BUS_DEVICE(dev, struct rte_vmbus_device));
>  }
> 
>  static int
> @@ -222,10 +217,12 @@ rte_vmbus_unregister(struct rte_vmbus_driver
> *driver)  struct rte_bus rte_vmbus_bus = {
>  	.scan = rte_vmbus_scan,
>  	.probe = rte_bus_generic_probe,
> -	.cleanup = rte_vmbus_cleanup,
> +	.free_device = vmbus_free_device,
> +	.cleanup = rte_bus_generic_cleanup,
>  	.find_device = rte_bus_generic_find_device,
>  	.match = vmbus_bus_match,
>  	.probe_device = vmbus_probe_device,
> +	.unplug_device = vmbus_unplug_device,
>  	.parse = vmbus_parse,
>  	.dev_compare = vmbus_dev_compare,
>  };
> --
> 2.54.0


^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox