DPDK-dev Archive on lore.kernel.org

DPDK-dev Archive on lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH 1/4] net/ngbe: add USO support
From: Zaiyu Wang @ 2026-06-17 10:59 UTC (permalink / raw)
  To: dev; +Cc: Zaiyu Wang, Jiawen Wu
In-Reply-To: <20260617105959.10764-1-zaiyuwang@trustnetic.com>

USO (UDP Segmentation Offload), also known as UFO (UDP Fragmentation
Offload), is a hardware offload rarely seen in DPDK. Its implementation
is similar to TSO (TCP Segmentation Offload), so the driver enables
USO based on existing TSO support.

Note:
USO segments UDP packets, requiring hardware to recalculate both IP
and UDP checksums due to length change. Thus, USO implicitly requires
IP and UDP checksum offloads, same as TSO.

Signed-off-by: Zaiyu Wang <zaiyuwang@trustnetic.com>
---
 drivers/net/ngbe/ngbe_rxtx.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ngbe/ngbe_rxtx.c b/drivers/net/ngbe/ngbe_rxtx.c
index 91e215694c..a1389de9c0 100644
--- a/drivers/net/ngbe/ngbe_rxtx.c
+++ b/drivers/net/ngbe/ngbe_rxtx.c
@@ -30,6 +30,7 @@ static const u64 NGBE_TX_OFFLOAD_MASK = (RTE_MBUF_F_TX_IP_CKSUM |
 		RTE_MBUF_F_TX_VLAN |
 		RTE_MBUF_F_TX_L4_MASK |
 		RTE_MBUF_F_TX_TCP_SEG |
+		RTE_MBUF_F_TX_UDP_SEG |
 		NGBE_TX_IEEE1588_TMST);
 
 #define NGBE_TX_OFFLOAD_NOTSUP_MASK \
@@ -317,7 +318,7 @@ ngbe_set_xmit_ctx(struct ngbe_tx_queue *txq,
 	type_tucmd_mlhl |= NGBE_TXD_PTID(tx_offload.ptid);
 
 	/* check if TCP segmentation required for this packet */
-	if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
+	if (ol_flags & (RTE_MBUF_F_TX_TCP_SEG | RTE_MBUF_F_TX_UDP_SEG)) {
 		tx_offload_mask.l2_len |= ~0;
 		tx_offload_mask.l3_len |= ~0;
 		tx_offload_mask.l4_len |= ~0;
@@ -427,7 +428,7 @@ tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
 		tmp |= NGBE_TXD_CC;
 		tmp |= NGBE_TXD_EIPCS;
 	}
-	if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
+	if (ol_flags & (RTE_MBUF_F_TX_TCP_SEG | RTE_MBUF_F_TX_UDP_SEG)) {
 		tmp |= NGBE_TXD_CC;
 		/* implies IPv4 cksum */
 		if (ol_flags & RTE_MBUF_F_TX_IPV4)
@@ -447,7 +448,7 @@ tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
 
 	if (ol_flags & RTE_MBUF_F_TX_VLAN)
 		cmdtype |= NGBE_TXD_VLE;
-	if (ol_flags & RTE_MBUF_F_TX_TCP_SEG)
+	if (ol_flags & (RTE_MBUF_F_TX_TCP_SEG | RTE_MBUF_F_TX_UDP_SEG))
 		cmdtype |= NGBE_TXD_TSE;
 	return cmdtype;
 }
@@ -483,6 +484,8 @@ tx_desc_ol_flags_to_ptype(uint64_t oflags)
 
 	if (oflags & RTE_MBUF_F_TX_TCP_SEG)
 		ptype |= RTE_PTYPE_L4_TCP;
+	else if (oflags & RTE_MBUF_F_TX_UDP_SEG)
+		ptype |= RTE_PTYPE_L4_UDP;
 
 	return ptype;
 }
@@ -764,7 +767,7 @@ ngbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 
 		olinfo_status = 0;
 		if (tx_ol_req) {
-			if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
+			if (ol_flags & (RTE_MBUF_F_TX_TCP_SEG | RTE_MBUF_F_TX_UDP_SEG)) {
 				/* when TSO is on, paylen in descriptor is the
 				 * not the packet len but the tcp payload len
 				 */
@@ -1991,7 +1994,7 @@ ngbe_get_tx_port_offloads(struct rte_eth_dev *dev)
 		RTE_ETH_TX_OFFLOAD_TCP_CKSUM   |
 		RTE_ETH_TX_OFFLOAD_SCTP_CKSUM  |
 		RTE_ETH_TX_OFFLOAD_TCP_TSO     |
-		RTE_ETH_TX_OFFLOAD_UDP_TSO	   |
+		RTE_ETH_TX_OFFLOAD_UDP_TSO     |
 		RTE_ETH_TX_OFFLOAD_MULTI_SEGS;
 
 	if (hw->is_pf)
-- 
2.21.0.windows.1


^ permalink raw reply related

* [PATCH 0/4] Wangxun new feature
From: Zaiyu Wang @ 2026-06-17 10:59 UTC (permalink / raw)
  To: dev; +Cc: Zaiyu Wang

This patchset introduces three new features and critical fixes for our
recent release cycle.

Patch 1/2 adds support for UDP Segmentation Offload (USO) to improve
large-packet transmission performance for UDP workloads.

Patch 3 enables VFs to sense PF ifconfig down/up events, allowing
better fault tolerance and fast recovery in virtualized environments.

Patch 4 adds the missing VF support for the Amber-Lite 40G NICs, which
was previously omitted in the initial integration.

Zaiyu Wang (4):
  net/ngbe: add USO support
  net/txgbe: add USO support
  net/txgbe: add support for VF sensing PF down
  net/txgbe: add VF support for Amber-Lite 40G NIC

 drivers/net/ngbe/ngbe_rxtx.c          | 13 +++---
 drivers/net/txgbe/base/txgbe_devids.h |  2 +
 drivers/net/txgbe/base/txgbe_hw.c     |  7 ++++
 drivers/net/txgbe/base/txgbe_regs.h   |  7 +++-
 drivers/net/txgbe/base/txgbe_type.h   |  2 +
 drivers/net/txgbe/base/txgbe_vf.c     |  7 ++--
 drivers/net/txgbe/txgbe_ethdev.c      |  4 +-
 drivers/net/txgbe/txgbe_ethdev_vf.c   | 60 +++++++++++++++++++++++----
 drivers/net/txgbe/txgbe_rxtx.c        | 13 +++---
 9 files changed, 92 insertions(+), 23 deletions(-)

-- 
2.21.0.windows.1

^ permalink raw reply

* [PATCH v1 1/1] net/i40e: do not reject RSS types parameter
From: Anatoly Burakov @ 2026-06-17 10:40 UTC (permalink / raw)
  To: dev, Bruce Richardson

After the recent refactor, global RSS configuration started rejecting the
RSS types parameter because it was not used for anything.

However, because testpmd will specify RSS types by default if omitted from
the flow command (i.e. `actions rss queues 0 1 end` vs `actions rss queues
0 1 end types end`), RSS action based flows that are created without
mentioning RSS types will still have RSS types as non-zero, causing flow
creation failures not directly related to the pattern.

Fix it by printing a warning but allowing spurious RSS types as opposed to
rejecting it outright.

Fixes: 0185303c2e24 ("net/i40e: refactor RSS flow parameter checks")

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/net/intel/i40e/i40e_hash.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/drivers/net/intel/i40e/i40e_hash.c b/drivers/net/intel/i40e/i40e_hash.c
index 8b80d0a91c..5a6543a9ec 100644
--- a/drivers/net/intel/i40e/i40e_hash.c
+++ b/drivers/net/intel/i40e/i40e_hash.c
@@ -1089,12 +1089,18 @@ i40e_hash_validate_rss_common(const struct rte_flow_action_rss *rss_act,
 				"Symmetric hash function not supported without specific patterns");
 	}
 
-	/* hash types are not supported for global RSS configuration */
-	if (rss_act->types != 0) {
-		return rte_flow_error_set(error, EINVAL,
-				RTE_FLOW_ERROR_TYPE_ACTION_CONF, rss_act,
-				"RSS types not supported without a pattern");
-	}
+	/*
+	 * When RSS types is not specified in testpmd, it will set up a default
+	 * RSS types value for the flow. Even though no hash engine part calling
+	 * this particular function will use RSS types parameter for anything,
+	 * we cannot reject having it because it is extra effort for testpmd
+	 * user to avoid specifying it.
+	 *
+	 * So, instead, accept types value even though we are not using it for
+	 * anything, but produce a warning for the user.
+	 */
+	if (rss_act->types != 0)
+		PMD_DRV_LOG(WARNING, "RSS types specified but will not be used");
 
 	/* check RSS key length if it is specified */
 	if (rss_act->key_len != 0 && rss_act->key_len != I40E_RSS_KEY_LEN) {
-- 
2.47.3


^ permalink raw reply related

* [PATCH v1 5/5] test: use new lcore role enum names
From: Huisong Li @ 2026-06-17 10:28 UTC (permalink / raw)
  To: thomas; +Cc: mb, andrew.rybchenko, dev, zhanjie9, lihuisong
In-Reply-To: <20260617102834.2343356-1-lihuisong@huawei.com>

Replace old lcore role enum names with new RTE_LCORE_ prefixed names
in test applications.

Signed-off-by: Huisong Li <lihuisong@huawei.com>
---
 app/test/test_lcores.c  | 2 +-
 app/test/test_mempool.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/app/test/test_lcores.c b/app/test/test_lcores.c
index 13842615d5..60354b3f7f 100644
--- a/app/test/test_lcores.c
+++ b/app/test/test_lcores.c
@@ -396,7 +396,7 @@ test_lcores(void)
 	unsigned int i;
 
 	for (i = 0; i < RTE_MAX_LCORE; i++) {
-		if (!rte_lcore_has_role(i, ROLE_OFF))
+		if (!rte_lcore_has_role(i, RTE_LCORE_ROLE_OFF))
 			eal_threads_count++;
 	}
 	if (eal_threads_count == 0) {
diff --git a/app/test/test_mempool.c b/app/test/test_mempool.c
index e54249ce61..38f0b6e712 100644
--- a/app/test/test_mempool.c
+++ b/app/test/test_mempool.c
@@ -353,7 +353,7 @@ test_mempool_sp_sc(void)
 		ret = -1;
 		goto err;
 	}
-	if (rte_eal_lcore_role(lcore_next) != ROLE_RTE) {
+	if (rte_eal_lcore_role(lcore_next) != RTE_LCORE_ROLE_RTE) {
 		ret = -1;
 		goto err;
 	}
-- 
2.33.0


^ permalink raw reply related

* [PATCH v1 4/5] net/softnic: use new lcore role enum names
From: Huisong Li @ 2026-06-17 10:28 UTC (permalink / raw)
  To: thomas; +Cc: mb, andrew.rybchenko, dev, zhanjie9, lihuisong
In-Reply-To: <20260617102834.2343356-1-lihuisong@huawei.com>

Replace old lcore role enum names with new RTE_LCORE_ prefixed names
in softnic driver.

Signed-off-by: Huisong Li <lihuisong@huawei.com>
---
 drivers/net/softnic/rte_eth_softnic_thread.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/softnic/rte_eth_softnic_thread.c b/drivers/net/softnic/rte_eth_softnic_thread.c
index f72c836199..a6d47c8b33 100644
--- a/drivers/net/softnic/rte_eth_softnic_thread.c
+++ b/drivers/net/softnic/rte_eth_softnic_thread.c
@@ -98,9 +98,9 @@ thread_is_valid(struct pmd_internals *softnic, uint32_t thread_id)
 	if (thread_id == rte_get_main_lcore())
 		return 0; /* FALSE */
 
-	if (softnic->params.sc && rte_lcore_has_role(thread_id, ROLE_SERVICE))
+	if (softnic->params.sc && rte_lcore_has_role(thread_id, RTE_LCORE_ROLE_SERVICE))
 		return 1; /* TRUE */
-	if (!softnic->params.sc && rte_lcore_has_role(thread_id, ROLE_RTE))
+	if (!softnic->params.sc && rte_lcore_has_role(thread_id, RTE_LCORE_ROLE_RTE))
 		return 1; /* TRUE */
 
 	return 0; /* FALSE */
-- 
2.33.0


^ permalink raw reply related

* [PATCH v1 2/5] eal: use new lcore role enum names
From: Huisong Li @ 2026-06-17 10:28 UTC (permalink / raw)
  To: thomas; +Cc: mb, andrew.rybchenko, dev, zhanjie9, lihuisong
In-Reply-To: <20260617102834.2343356-1-lihuisong@huawei.com>

Replace old lcore role enum names with new RTE_LCORE_ prefixed names
in EAL common code.

Signed-off-by: Huisong Li <lihuisong@huawei.com>
---
 lib/eal/common/eal_common_lcore.c   | 34 ++++++++++++++---------------
 lib/eal/common/eal_common_options.c | 28 ++++++++++++------------
 lib/eal/common/eal_private.h        |  4 ++--
 lib/eal/common/rte_service.c        | 12 +++++-----
 4 files changed, 39 insertions(+), 39 deletions(-)

diff --git a/lib/eal/common/eal_common_lcore.c b/lib/eal/common/eal_common_lcore.c
index 39411f9370..021687599b 100644
--- a/lib/eal/common/eal_common_lcore.c
+++ b/lib/eal/common/eal_common_lcore.c
@@ -76,7 +76,7 @@ rte_eal_lcore_role(unsigned int lcore_id)
 	struct rte_config *cfg = rte_eal_get_configuration();
 
 	if (lcore_id >= RTE_MAX_LCORE)
-		return ROLE_OFF;
+		return RTE_LCORE_ROLE_OFF;
 	return cfg->lcore_role[lcore_id];
 }
 
@@ -99,7 +99,7 @@ int rte_lcore_is_enabled(unsigned int lcore_id)
 
 	if (lcore_id >= RTE_MAX_LCORE)
 		return 0;
-	return cfg->lcore_role[lcore_id] == ROLE_RTE;
+	return cfg->lcore_role[lcore_id] == RTE_LCORE_ROLE_RTE;
 }
 
 RTE_EXPORT_SYMBOL(rte_get_next_lcore)
@@ -176,7 +176,7 @@ rte_eal_cpu_init(void)
 		lcore_to_socket_id[lcore_id] = socket_id;
 
 		if (eal_cpu_detected(lcore_id) == 0) {
-			config->lcore_role[lcore_id] = ROLE_OFF;
+			config->lcore_role[lcore_id] = RTE_LCORE_ROLE_OFF;
 			lcore_config[lcore_id].core_index = -1;
 			continue;
 		}
@@ -185,8 +185,8 @@ rte_eal_cpu_init(void)
 		CPU_SET(lcore_id, &lcore_config[lcore_id].cpuset);
 
 		/* By default, each detected core is enabled */
-		config->lcore_role[lcore_id] = ROLE_RTE;
-		lcore_config[lcore_id].core_role = ROLE_RTE;
+		config->lcore_role[lcore_id] = RTE_LCORE_ROLE_RTE;
+		lcore_config[lcore_id].core_role = RTE_LCORE_ROLE_RTE;
 		lcore_config[lcore_id].core_id = eal_cpu_core_id(lcore_id);
 		lcore_config[lcore_id].numa_id = socket_id;
 		EAL_LOG(DEBUG, "Detected lcore %u as "
@@ -314,7 +314,7 @@ rte_lcore_callback_register(const char *name, rte_lcore_init_cb init,
 	if (callback->init == NULL)
 		goto no_init;
 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
-		if (cfg->lcore_role[lcore_id] == ROLE_OFF)
+		if (cfg->lcore_role[lcore_id] == RTE_LCORE_ROLE_OFF)
 			continue;
 		if (callback_init(callback, lcore_id) == 0)
 			continue;
@@ -322,7 +322,7 @@ rte_lcore_callback_register(const char *name, rte_lcore_init_cb init,
 		 * previous lcore.
 		 */
 		while (lcore_id-- != 0) {
-			if (cfg->lcore_role[lcore_id] == ROLE_OFF)
+			if (cfg->lcore_role[lcore_id] == RTE_LCORE_ROLE_OFF)
 				continue;
 			callback_uninit(callback, lcore_id);
 		}
@@ -354,7 +354,7 @@ rte_lcore_callback_unregister(void *handle)
 	if (callback->uninit == NULL)
 		goto no_uninit;
 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
-		if (cfg->lcore_role[lcore_id] == ROLE_OFF)
+		if (cfg->lcore_role[lcore_id] == RTE_LCORE_ROLE_OFF)
 			continue;
 		callback_uninit(callback, lcore_id);
 	}
@@ -376,9 +376,9 @@ eal_lcore_non_eal_allocate(void)
 
 	rte_rwlock_write_lock(&lcore_lock);
 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
-		if (cfg->lcore_role[lcore_id] != ROLE_OFF)
+		if (cfg->lcore_role[lcore_id] != RTE_LCORE_ROLE_OFF)
 			continue;
-		cfg->lcore_role[lcore_id] = ROLE_NON_EAL;
+		cfg->lcore_role[lcore_id] = RTE_LCORE_ROLE_NON_EAL;
 		cfg->lcore_count++;
 		break;
 	}
@@ -399,7 +399,7 @@ eal_lcore_non_eal_allocate(void)
 		}
 		EAL_LOG(DEBUG, "Initialization refused for lcore %u.",
 			lcore_id);
-		cfg->lcore_role[lcore_id] = ROLE_OFF;
+		cfg->lcore_role[lcore_id] = RTE_LCORE_ROLE_OFF;
 		cfg->lcore_count--;
 		lcore_id = RTE_MAX_LCORE;
 		goto out;
@@ -416,11 +416,11 @@ eal_lcore_non_eal_release(unsigned int lcore_id)
 	struct lcore_callback *callback;
 
 	rte_rwlock_write_lock(&lcore_lock);
-	if (cfg->lcore_role[lcore_id] != ROLE_NON_EAL)
+	if (cfg->lcore_role[lcore_id] != RTE_LCORE_ROLE_NON_EAL)
 		goto out;
 	TAILQ_FOREACH(callback, &lcore_callbacks, next)
 		callback_uninit(callback, lcore_id);
-	cfg->lcore_role[lcore_id] = ROLE_OFF;
+	cfg->lcore_role[lcore_id] = RTE_LCORE_ROLE_OFF;
 	cfg->lcore_count--;
 out:
 	rte_rwlock_write_unlock(&lcore_lock);
@@ -436,7 +436,7 @@ rte_lcore_iterate(rte_lcore_iterate_cb cb, void *arg)
 
 	rte_rwlock_read_lock(&lcore_lock);
 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
-		if (cfg->lcore_role[lcore_id] == ROLE_OFF)
+		if (cfg->lcore_role[lcore_id] == RTE_LCORE_ROLE_OFF)
 			continue;
 		ret = cb(lcore_id, arg);
 		if (ret != 0)
@@ -450,11 +450,11 @@ static const char *
 lcore_role_str(enum rte_lcore_role_t role)
 {
 	switch (role) {
-	case ROLE_RTE:
+	case RTE_LCORE_ROLE_RTE:
 		return "RTE";
-	case ROLE_SERVICE:
+	case RTE_LCORE_ROLE_SERVICE:
 		return "SERVICE";
-	case ROLE_NON_EAL:
+	case RTE_LCORE_ROLE_NON_EAL:
 		return "NON_EAL";
 	default:
 		return "UNKNOWN";
diff --git a/lib/eal/common/eal_common_options.c b/lib/eal/common/eal_common_options.c
index 1049838d73..6dd748e37e 100644
--- a/lib/eal/common/eal_common_options.c
+++ b/lib/eal/common/eal_common_options.c
@@ -898,10 +898,10 @@ eal_parse_service_coremask(const char *coremask)
 					return -1;
 				}
 
-				if (cfg->lcore_role[idx] == ROLE_RTE)
+				if (cfg->lcore_role[idx] == RTE_LCORE_ROLE_RTE)
 					taken_lcore_count++;
 
-				lcore_config[idx].core_role = ROLE_SERVICE;
+				lcore_config[idx].core_role = RTE_LCORE_ROLE_SERVICE;
 				count++;
 			}
 		}
@@ -938,7 +938,7 @@ update_lcore_config(const rte_cpuset_t *cpuset, bool remap, uint16_t remap_base)
 
 	/* set everything to disabled first, then set up values */
 	for (i = 0; i < RTE_MAX_LCORE; i++) {
-		cfg->lcore_role[i] = ROLE_OFF;
+		cfg->lcore_role[i] = RTE_LCORE_ROLE_OFF;
 		lcore_config[i].core_index = -1;
 	}
 
@@ -966,7 +966,7 @@ update_lcore_config(const rte_cpuset_t *cpuset, bool remap, uint16_t remap_base)
 				continue;
 			}
 
-			cfg->lcore_role[lcore_id] = ROLE_RTE;
+			cfg->lcore_role[lcore_id] = RTE_LCORE_ROLE_RTE;
 			lcore_config[lcore_id].core_index = count;
 			CPU_ZERO(&lcore_config[lcore_id].cpuset);
 			CPU_SET(i, &lcore_config[lcore_id].cpuset);
@@ -1138,12 +1138,12 @@ eal_parse_service_corelist(const char *corelist)
 			if (min == RTE_MAX_LCORE)
 				min = idx;
 			for (idx = min; idx <= max; idx++) {
-				if (cfg->lcore_role[idx] != ROLE_SERVICE) {
-					if (cfg->lcore_role[idx] == ROLE_RTE)
+				if (cfg->lcore_role[idx] != RTE_LCORE_ROLE_SERVICE) {
+					if (cfg->lcore_role[idx] == RTE_LCORE_ROLE_RTE)
 						taken_lcore_count++;
 
 					lcore_config[idx].core_role =
-							ROLE_SERVICE;
+							RTE_LCORE_ROLE_SERVICE;
 					count++;
 				}
 			}
@@ -1166,7 +1166,7 @@ eal_parse_service_corelist(const char *corelist)
 	rte_cpuset_t service_cpuset;
 	CPU_ZERO(&service_cpuset);
 	for (i = 0; i < RTE_MAX_LCORE; i++) {
-		if (lcore_config[i].core_role == ROLE_SERVICE)
+		if (lcore_config[i].core_role == RTE_LCORE_ROLE_SERVICE)
 			CPU_SET(i, &service_cpuset);
 	}
 	if (CPU_COUNT(&service_cpuset) > 0) {
@@ -1195,12 +1195,12 @@ eal_parse_main_lcore(const char *arg)
 		return -1;
 
 	/* ensure main core is not used as service core */
-	if (lcore_config[cfg->main_lcore].core_role == ROLE_SERVICE) {
+	if (lcore_config[cfg->main_lcore].core_role == RTE_LCORE_ROLE_SERVICE) {
 		EAL_LOG(ERR, "Error: Main lcore is used as a service core");
 		return -1;
 	}
 	/* check that we have the core recorded in the core list */
-	if (cfg->lcore_role[cfg->main_lcore] != ROLE_RTE) {
+	if (cfg->lcore_role[cfg->main_lcore] != RTE_LCORE_ROLE_RTE) {
 		EAL_LOG(ERR, "Error: Main lcore is not enabled for DPDK");
 		return -1;
 	}
@@ -1389,7 +1389,7 @@ eal_parse_lcores(const char *lcores)
 
 	/* Reset lcore config */
 	for (idx = 0; idx < RTE_MAX_LCORE; idx++) {
-		cfg->lcore_role[idx] = ROLE_OFF;
+		cfg->lcore_role[idx] = RTE_LCORE_ROLE_OFF;
 		lcore_config[idx].core_index = -1;
 		CPU_ZERO(&lcore_config[idx].cpuset);
 	}
@@ -1451,9 +1451,9 @@ eal_parse_lcores(const char *lcores)
 				continue;
 			set_count--;
 
-			if (cfg->lcore_role[idx] != ROLE_RTE) {
+			if (cfg->lcore_role[idx] != RTE_LCORE_ROLE_RTE) {
 				lcore_config[idx].core_index = count;
-				cfg->lcore_role[idx] = ROLE_RTE;
+				cfg->lcore_role[idx] = RTE_LCORE_ROLE_RTE;
 				count++;
 			}
 
@@ -2432,7 +2432,7 @@ compute_ctrl_threads_cpuset(struct internal_config *internal_cfg)
 	unsigned int lcore_id;
 
 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
-		if (rte_lcore_has_role(lcore_id, ROLE_OFF))
+		if (rte_lcore_has_role(lcore_id, RTE_LCORE_ROLE_OFF))
 			continue;
 		RTE_CPU_OR(cpuset, cpuset, &lcore_config[lcore_id].cpuset);
 	}
diff --git a/lib/eal/common/eal_private.h b/lib/eal/common/eal_private.h
index 0c0544beaf..dff3565099 100644
--- a/lib/eal/common/eal_private.h
+++ b/lib/eal/common/eal_private.h
@@ -430,7 +430,7 @@ uint64_t get_tsc_freq_arch(void);
  * Allocate a free lcore to associate to a non-EAL thread.
  *
  * @return
- *   - the id of a lcore with role ROLE_NON_EAL on success.
+ *   - the id of a lcore with role RTE_LCORE_ROLE_NON_EAL on success.
  *   - RTE_MAX_LCORE if none was available or initializing was refused (see
  *     rte_lcore_callback_register).
  */
@@ -441,7 +441,7 @@ unsigned int eal_lcore_non_eal_allocate(void);
  * Counterpart of eal_lcore_non_eal_allocate().
  *
  * @param lcore_id
- *   The lcore with role ROLE_NON_EAL to release.
+ *   The lcore with role RTE_LCORE_ROLE_NON_EAL to release.
  */
 void eal_lcore_non_eal_release(unsigned int lcore_id);
 
diff --git a/lib/eal/common/rte_service.c b/lib/eal/common/rte_service.c
index d2ac9d3f14..5c3a350ae8 100644
--- a/lib/eal/common/rte_service.c
+++ b/lib/eal/common/rte_service.c
@@ -107,7 +107,7 @@ rte_service_init(void)
 	int i;
 	struct rte_config *cfg = rte_eal_get_configuration();
 	for (i = 0; i < RTE_MAX_LCORE; i++) {
-		if (lcore_config[i].core_role == ROLE_SERVICE) {
+		if (lcore_config[i].core_role == RTE_LCORE_ROLE_SERVICE) {
 			if ((unsigned int)i == cfg->main_lcore)
 				continue;
 			rte_service_lcore_add(i);
@@ -718,7 +718,7 @@ set_lcore_state(uint32_t lcore, int32_t state)
 	lcore_config[lcore].core_role = state;
 
 	/* update per-lcore optimized state tracking */
-	cs->is_service_core = (state == ROLE_SERVICE);
+	cs->is_service_core = (state == RTE_LCORE_ROLE_SERVICE);
 
 	rte_eal_trace_service_lcore_state_change(lcore, state);
 }
@@ -734,7 +734,7 @@ rte_service_lcore_reset_all(void)
 
 		if (cs->is_service_core) {
 			rte_bitset_clear_all(cs->mapped_services, RTE_SERVICE_NUM_MAX);
-			set_lcore_state(i, ROLE_RTE);
+			set_lcore_state(i, RTE_LCORE_ROLE_RTE);
 			/* runstate act as guard variable Use
 			 * store-release memory order here to synchronize
 			 * with load-acquire in runstate read functions.
@@ -761,7 +761,7 @@ rte_service_lcore_add(uint32_t lcore)
 	if (cs->is_service_core)
 		return -EALREADY;
 
-	set_lcore_state(lcore, ROLE_SERVICE);
+	set_lcore_state(lcore, RTE_LCORE_ROLE_SERVICE);
 
 	/* ensure that after adding a core the mask and state are defaults */
 	rte_bitset_clear_all(cs->mapped_services, RTE_SERVICE_NUM_MAX);
@@ -793,7 +793,7 @@ rte_service_lcore_del(uint32_t lcore)
 			RUNSTATE_STOPPED)
 		return -EBUSY;
 
-	set_lcore_state(lcore, ROLE_RTE);
+	set_lcore_state(lcore, RTE_LCORE_ROLE_RTE);
 
 	rte_smp_wmb();
 	return 0;
@@ -1126,7 +1126,7 @@ rte_service_dump(FILE *f, uint32_t id)
 
 	fprintf(f, "Service Cores Summary\n");
 	for (i = 0; i < RTE_MAX_LCORE; i++) {
-		if (lcore_config[i].core_role != ROLE_SERVICE)
+		if (lcore_config[i].core_role != RTE_LCORE_ROLE_SERVICE)
 			continue;
 
 		service_dump_calls_per_lcore(f, i);
-- 
2.33.0


^ permalink raw reply related

* [PATCH v1 3/5] graph: use new lcore role enum names
From: Huisong Li @ 2026-06-17 10:28 UTC (permalink / raw)
  To: thomas; +Cc: mb, andrew.rybchenko, dev, zhanjie9, lihuisong
In-Reply-To: <20260617102834.2343356-1-lihuisong@huawei.com>

Replace old lcore role enum names with new RTE_LCORE_ prefixed names
in graph library.

Signed-off-by: Huisong Li <lihuisong@huawei.com>
---
 lib/graph/graph.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/graph/graph.c b/lib/graph/graph.c
index 5f8ada2185..8165a0a932 100644
--- a/lib/graph/graph.c
+++ b/lib/graph/graph.c
@@ -359,7 +359,7 @@ rte_graph_model_mcore_dispatch_core_bind(rte_graph_t id, int lcore)
 		goto fail;
 	}
 
-	if (rte_lcore_has_role(lcore, ROLE_OFF))
+	if (rte_lcore_has_role(lcore, RTE_LCORE_ROLE_OFF))
 		SET_ERR_JMP(ENOLINK, fail, "lcore %d is invalid", lcore);
 
 	STAILQ_FOREACH(graph, &graph_list, next)
-- 
2.33.0


^ permalink raw reply related

* [PATCH v1 1/5] eal: prefix lcore role enum values
From: Huisong Li @ 2026-06-17 10:28 UTC (permalink / raw)
  To: thomas; +Cc: mb, andrew.rybchenko, dev, zhanjie9, lihuisong
In-Reply-To: <20260617102834.2343356-1-lihuisong@huawei.com>

Add the RTE_LCORE_ prefix to the lcore role enum values in
rte_lcore_role_t to follow DPDK naming conventions.

- ROLE_RTE      -> RTE_LCORE_ROLE_RTE
- ROLE_OFF      -> RTE_LCORE_ROLE_OFF
- ROLE_SERVICE  -> RTE_LCORE_ROLE_SERVICE
- ROLE_NON_EAL  -> RTE_LCORE_ROLE_NON_EAL

Old names are kept as macros aliasing to the new names to preserve
backward compatibility.

Suggested-by: Thomas Monjalon <thomas@monjalon.net>
Signed-off-by: Huisong Li <lihuisong@huawei.com>
---
 lib/eal/include/rte_lcore.h | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/lib/eal/include/rte_lcore.h b/lib/eal/include/rte_lcore.h
index 10f965b4f0..2fc4d0b15b 100644
--- a/lib/eal/include/rte_lcore.h
+++ b/lib/eal/include/rte_lcore.h
@@ -31,12 +31,18 @@ RTE_DECLARE_PER_LCORE(unsigned, _lcore_id);  /**< Per thread "lcore id". */
  * The lcore role (used in RTE or not).
  */
 enum rte_lcore_role_t {
-	ROLE_RTE,
-	ROLE_OFF,
-	ROLE_SERVICE,
-	ROLE_NON_EAL,
+	RTE_LCORE_ROLE_RTE,
+	RTE_LCORE_ROLE_OFF,
+	RTE_LCORE_ROLE_SERVICE,
+	RTE_LCORE_ROLE_NON_EAL,
 };
 
+/* Old lcore role aliases for backward compatibility. */
+#define ROLE_RTE	RTE_LCORE_ROLE_RTE
+#define ROLE_OFF	RTE_LCORE_ROLE_OFF
+#define ROLE_SERVICE	RTE_LCORE_ROLE_SERVICE
+#define ROLE_NON_EAL	RTE_LCORE_ROLE_NON_EAL
+
 /**
  * Get a lcore's role.
  *
@@ -308,7 +314,8 @@ rte_lcore_callback_unregister(void *handle);
 typedef int (*rte_lcore_iterate_cb)(unsigned int lcore_id, void *arg);
 
 /**
- * Iterate on all active lcores (ROLE_RTE, ROLE_SERVICE and ROLE_NON_EAL).
+ * Iterate on all active lcores (RTE_LCORE_ROLE_RTE, RTE_LCORE_ROLE_SERVICE
+ * and RTE_LCORE_ROLE_NON_EAL).
  * No modification on the lcore states is allowed in the callback.
  *
  * Note: as opposed to init/uninit callbacks, iteration callbacks can be
-- 
2.33.0


^ permalink raw reply related

* [PATCH v1 0/5] prefix lcore role enum values
From: Huisong Li @ 2026-06-17 10:28 UTC (permalink / raw)
  To: thomas; +Cc: mb, andrew.rybchenko, dev, zhanjie9, lihuisong

Add the RTE_LCORE_ prefix to the lcore role enum values in rte_lcore_role_t
to follow DPDK naming conventions.

- ROLE_RTE      -> RTE_LCORE_ROLE_RTE
- ROLE_OFF      -> RTE_LCORE_ROLE_OFF
- ROLE_SERVICE  -> RTE_LCORE_ROLE_SERVICE
- ROLE_NON_EAL  -> RTE_LCORE_ROLE_NON_EAL

Old names are kept as macros aliasing to the new names to preserve
backward compatibility.

Huisong Li (5):
  eal: prefix lcore role enum values
  eal: use new lcore role enum names
  graph: use new lcore role enum names
  net/softnic: use new lcore role enum names
  test: use new lcore role enum names

 app/test/test_lcores.c                       |  2 +-
 app/test/test_mempool.c                      |  2 +-
 drivers/net/softnic/rte_eth_softnic_thread.c |  4 +--
 lib/eal/common/eal_common_lcore.c            | 34 ++++++++++----------
 lib/eal/common/eal_common_options.c          | 28 ++++++++--------
 lib/eal/common/eal_private.h                 |  4 +--
 lib/eal/common/rte_service.c                 | 12 +++----
 lib/eal/include/rte_lcore.h                  | 17 +++++++---
 lib/graph/graph.c                            |  2 +-
 9 files changed, 56 insertions(+), 49 deletions(-)

-- 
2.33.0


^ permalink raw reply

* RE: [EXTERNAL] [PATCH 00/13] Bus cleanup infrastructure and fixes
From: Hemant Agrawal @ 2026-06-17  9:16 UTC (permalink / raw)
  To: David Marchand
  Cc: dev@dpdk.org, thomas@monjalon.net, stephen@networkplumber.org,
	bruce.richardson@intel.com, fengchengwen@huawei.com, Long Li
In-Reply-To: <CAJFAV8z9XmpRfbidbqmrpFtQW_+04V5Qx7GWAP1p9-hY+xu_yw@mail.gmail.com>


> -----Original Message-----
> From: David Marchand <david.marchand@redhat.com>
> Sent: 16 June 2026 13:17
> To: Hemant Agrawal <hemant.agrawal@nxp.com>
> Cc: dev@dpdk.org; thomas@monjalon.net; stephen@networkplumber.org;
> bruce.richardson@intel.com; fengchengwen@huawei.com; Long Li
> <longli@microsoft.com>
> Subject: Re: [EXTERNAL] [PATCH 00/13] Bus cleanup infrastructure and fixes
> Importance: High
> 
> On Tue, 16 Jun 2026 at 08:55, David Marchand
> <david.marchand@redhat.com> wrote:
> >
> > On Tue, 16 Jun 2026 at 01:55, Long Li <longli@microsoft.com> wrote:
> > >
> > > >
> > > > > This series refactors the bus cleanup infrastructure to reduce
> > > > > code duplication and fix resource leaks in several bus drivers.
> > > > > It should address the leak Thomas pointed at.
> > > > >
> > > > > The first part of the series (patches 1-8) addresses several
> > > > > bugs and
> > > > > inconsistencies:
> > > > > - Documentation and log message inconsistencies from earlier bus
> > > > >   refactoring
> > > > > - Device list management issues in dma/idxd and bus/vdev
> > > > > - Resource leaks in PCI and VMBUS bus cleanup (mappings and
> > > > > interrupts)
> > > > > - Simplified device freeing in NXP buses (DPAA and FSLMC)
> > > > > - Deferred interrupt allocation to probe time (NXP buses, VMBUS)
> > > > >
> > > > > The core infrastructure changes (patches 9-10) introduce the
> > > > > generic cleanup
> > > > > framework:
> > > > > - Refactors unplug operations to be the counterpart of
> > > > > probe_device
> > > > > - Implements rte_bus_generic_cleanup() to centralize cleanup
> > > > > logic
> > > > > - Adds .free_device operation to struct rte_bus
> > > > > - Adds compile-time verification that rte_device is at offset 0
> > > > >
> > > > > The final patches (11-13) convert remaining buses to use the
> > > > > generic cleanup
> > > > > helper:
> > > > > - DPAA bus: add unplug support
> > > > > - VMBUS bus: switch to embedded device name and add unplug
> > > > > support
> > > >
> > > > There is a hung on vmbus during device shutdown after applying the
> > > > series, I'm looking into it.
> > >
> > > Turned out to be a test issue. Please see my comments on patch 08, the
> patch set tested well after that fix.
> >
> > Thanks a lot for testing!
> >
> > I'll fix this regression in the next revision.
> 
> Fyi Hemant, this series has a similar regression for dpaa/fslmc bus (interrupt
> handle allocated too late in the device probing flow).
> The implications seem greater than fixing vmbus though, as I am now finding
> bugs on the cleanup side (interrupt eventfd are never closed, for example).
> 
> I'll think about how to fix it in the next revision, one option may be to leave
> dpaa/fslmc alone.. ?
> But in the long run, all bus drivers should behave consistently.
> 
> I'll get back in this thread once I have a better view of the situation.
> 

HI David,
	Give me some time to get this tested on the hardware. 

Regards
Hemant

> 
> --
> David Marchand


^ permalink raw reply

* [PATCH v6 4/4] net/zxdh: optimize Tx xmit pkts performance
From: Junlong Wang @ 2026-06-17  8:28 UTC (permalink / raw)
  To: stephen; +Cc: dev, Junlong Wang
In-Reply-To: <20260617082828.1058127-1-wang.junlong1@zte.com.cn>


[-- Attachment #1.1.1: Type: text/plain, Size: 19733 bytes --]

Add simple Tx xmit functions (zxdh_xmit_pkts_simple)
for single-segment packet xmit.

Signed-off-by: Junlong Wang <wang.junlong1@zte.com.cn>
---
 drivers/net/zxdh/zxdh_ethdev.c |  11 +-
 drivers/net/zxdh/zxdh_queue.h  |   2 +-
 drivers/net/zxdh/zxdh_rxtx.c   | 347 +++++++++++++++++++++++++--------
 drivers/net/zxdh/zxdh_rxtx.h   |  11 +-
 4 files changed, 277 insertions(+), 94 deletions(-)

diff --git a/drivers/net/zxdh/zxdh_ethdev.c b/drivers/net/zxdh/zxdh_ethdev.c
index fe76139f3d..43f823253d 100644
--- a/drivers/net/zxdh/zxdh_ethdev.c
+++ b/drivers/net/zxdh/zxdh_ethdev.c
@@ -490,7 +490,7 @@ zxdh_dev_free_mbufs(struct rte_eth_dev *dev)
 		if (!vq)
 			continue;
 		while ((buf = zxdh_queue_detach_unused(vq)) != NULL)
-			rte_pktmbuf_free(buf);
+			rte_pktmbuf_free_seg(buf);
 		PMD_DRV_LOG(DEBUG, "freeing %s[%d] used and unused buf",
 		"rxq", i * 2);
 	}
@@ -499,7 +499,7 @@ zxdh_dev_free_mbufs(struct rte_eth_dev *dev)
 		if (!vq)
 			continue;
 		while ((buf = zxdh_queue_detach_unused(vq)) != NULL)
-			rte_pktmbuf_free(buf);
+			rte_pktmbuf_free_seg(buf);
 		PMD_DRV_LOG(DEBUG, "freeing %s[%d] used and unused buf",
 		"txq", i * 2 + 1);
 	}
@@ -1291,10 +1291,15 @@ static int zxdh_scattered_rx(struct rte_eth_dev *eth_dev)
 static int32_t
 zxdh_set_rxtx_funcs(struct rte_eth_dev *eth_dev)
 {
+	uint64_t tx_offloads = eth_dev->data->dev_conf.txmode.offloads;
+
 	eth_dev->tx_pkt_prepare = zxdh_xmit_pkts_prepare;
 	eth_dev->data->scattered_rx = zxdh_scattered_rx(eth_dev);
 
-	eth_dev->tx_pkt_burst = &zxdh_xmit_pkts_packed;
+	if (!(tx_offloads & RTE_ETH_TX_OFFLOAD_MULTI_SEGS))
+		eth_dev->tx_pkt_burst = &zxdh_xmit_pkts_simple;
+	else
+		eth_dev->tx_pkt_burst = &zxdh_xmit_pkts_packed;
 
 	if (eth_dev->data->scattered_rx)
 		eth_dev->rx_pkt_burst = &zxdh_recv_pkts_packed;
diff --git a/drivers/net/zxdh/zxdh_queue.h b/drivers/net/zxdh/zxdh_queue.h
index b079272162..091d1f25db 100644
--- a/drivers/net/zxdh/zxdh_queue.h
+++ b/drivers/net/zxdh/zxdh_queue.h
@@ -374,7 +374,7 @@ zxdh_queue_full(const struct zxdh_virtqueue *vq)
 }
 
 static inline void
-zxdh_queue_store_flags_packed(struct zxdh_vring_packed_desc *dp, uint16_t flags)
+zxdh_queue_store_flags_packed(volatile struct zxdh_vring_packed_desc *dp, uint16_t flags)
 {
 	rte_io_wmb();
 	dp->flags = flags;
diff --git a/drivers/net/zxdh/zxdh_rxtx.c b/drivers/net/zxdh/zxdh_rxtx.c
index ab0510a753..4581dbe83a 100644
--- a/drivers/net/zxdh/zxdh_rxtx.c
+++ b/drivers/net/zxdh/zxdh_rxtx.c
@@ -114,6 +114,22 @@
 		RTE_MBUF_F_TX_SEC_OFFLOAD |     \
 		RTE_MBUF_F_TX_UDP_SEG)
 
+#if RTE_CACHE_LINE_SIZE == 128
+#define NEXT_CACHELINE_OFF_16B   8
+#define NEXT_CACHELINE_OFF_8B   16
+#elif RTE_CACHE_LINE_SIZE == 64
+#define NEXT_CACHELINE_OFF_16B   4
+#define NEXT_CACHELINE_OFF_8B    8
+#else
+#define NEXT_CACHELINE_OFF_16B  (RTE_CACHE_LINE_SIZE / 16)
+#define NEXT_CACHELINE_OFF_8B   (RTE_CACHE_LINE_SIZE / 8)
+#endif
+#define N_PER_LOOP  NEXT_CACHELINE_OFF_8B
+#define N_PER_LOOP_MASK (N_PER_LOOP - 1)
+
+#define rxq_get_vq(q) ((q)->vq)
+#define txq_get_vq(q) ((q)->vq)
+
 uint32_t zxdh_outer_l2_type[16] = {
 	0,
 	RTE_PTYPE_L2_ETHER,
@@ -201,43 +217,6 @@ uint32_t zxdh_inner_l4_type[16] = {
 	0,
 };
 
-static void
-zxdh_xmit_cleanup_inorder_packed(struct zxdh_virtqueue *vq, int32_t num)
-{
-	uint16_t used_idx = 0;
-	uint16_t id       = 0;
-	uint16_t curr_id  = 0;
-	uint16_t free_cnt = 0;
-	uint16_t size     = vq->vq_nentries;
-	struct zxdh_vring_packed_desc *desc = vq->vq_packed.ring.desc;
-	struct zxdh_vq_desc_extra     *dxp  = NULL;
-
-	used_idx = vq->vq_used_cons_idx;
-	/* desc_is_used has a load-acquire or rte_io_rmb inside
-	 * and wait for used desc in virtqueue.
-	 */
-	while (num > 0 && desc_is_used(&desc[used_idx], vq)) {
-		id = desc[used_idx].id;
-		do {
-			curr_id = used_idx;
-			dxp = &vq->vq_descx[used_idx];
-			used_idx += dxp->ndescs;
-			free_cnt += dxp->ndescs;
-			num -= dxp->ndescs;
-			if (used_idx >= size) {
-				used_idx -= size;
-				vq->used_wrap_counter ^= 1;
-			}
-			if (dxp->cookie != NULL) {
-				rte_pktmbuf_free(dxp->cookie);
-				dxp->cookie = NULL;
-			}
-		} while (curr_id != id);
-	}
-	vq->vq_used_cons_idx = used_idx;
-	vq->vq_free_cnt += free_cnt;
-}
-
 static inline uint16_t
 zxdh_get_mtu(struct zxdh_virtqueue *vq)
 {
@@ -334,7 +313,7 @@ zxdh_xmit_fill_net_hdr(struct zxdh_virtqueue *vq, struct rte_mbuf *cookie,
 }
 
 static inline void
-zxdh_enqueue_xmit_packed_fast(struct zxdh_virtnet_tx *txvq,
+zxdh_xmit_enqueue_push(struct zxdh_virtnet_tx *txvq,
 						struct rte_mbuf *cookie)
 {
 	struct zxdh_virtqueue *vq = txvq->vq;
@@ -345,7 +324,6 @@ zxdh_enqueue_xmit_packed_fast(struct zxdh_virtnet_tx *txvq,
 	uint8_t hdr_len = vq->hw->dl_net_hdr_len;
 	struct zxdh_vring_packed_desc *dp = &vq->vq_packed.ring.desc[id];
 
-	dxp->ndescs = 1;
 	dxp->cookie = cookie;
 	hdr = rte_pktmbuf_mtod_offset(cookie, struct zxdh_net_hdr_dl *, -hdr_len);
 	zxdh_xmit_fill_net_hdr(vq, cookie, hdr);
@@ -362,52 +340,57 @@ zxdh_enqueue_xmit_packed_fast(struct zxdh_virtnet_tx *txvq,
 }
 
 static inline void
-zxdh_enqueue_xmit_packed(struct zxdh_virtnet_tx *txvq,
+zxdh_xmit_enqueue_append(struct zxdh_virtnet_tx *txvq,
 						struct rte_mbuf *cookie,
 						uint16_t needed)
 {
 	struct zxdh_tx_region *txr = txvq->zxdh_net_hdr_mz->addr;
 	struct zxdh_virtqueue *vq = txvq->vq;
-	uint16_t id = vq->vq_avail_idx;
-	struct zxdh_vq_desc_extra *dxp = &vq->vq_descx[id];
+	struct zxdh_vq_desc_extra *dep = &vq->vq_descx[0];
 	uint16_t head_idx = vq->vq_avail_idx;
 	uint16_t idx = head_idx;
 	struct zxdh_vring_packed_desc *start_dp = vq->vq_packed.ring.desc;
 	struct zxdh_vring_packed_desc *head_dp = &vq->vq_packed.ring.desc[idx];
 	struct zxdh_net_hdr_dl *hdr = NULL;
-
-	uint16_t head_flags = cookie->next ? ZXDH_VRING_DESC_F_NEXT : 0;
+	uint16_t id = vq->vq_avail_idx;
+	struct zxdh_vq_desc_extra *dxp = &vq->vq_descx[id];
 	uint8_t hdr_len = vq->hw->dl_net_hdr_len;
+	uint16_t head_flags = 0;
 
-	dxp->ndescs = needed;
-	dxp->cookie = cookie;
-	head_flags |= vq->cached_flags;
+	/*
+	 * IMPORTANT: For multi-seg packets, we set the head descriptor's cookie to NULL
+	 * and store each segment's mbuf in its corresponding vq_descx[idx].cookie.
+	 * This is required for the per-descriptor mbuf free in zxdh_xmit_fast_flush()
+	 * which uses rte_pktmbuf_free_seg() to free individual segments.
+	 * Any code path that attempts to read vq_descx[head_id].cookie will see NULL
+	 * and must handle this case appropriately.
+	 */
+	dxp->cookie = NULL;
 
+	/* setup first tx ring slot to point to header stored in reserved region. */
 	start_dp[idx].addr = txvq->zxdh_net_hdr_mem + RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
 	start_dp[idx].len  = hdr_len;
-	head_flags |= ZXDH_VRING_DESC_F_NEXT;
+	start_dp[idx].id = idx;
+	head_flags |= vq->cached_flags | ZXDH_VRING_DESC_F_NEXT;
 	hdr = (void *)&txr[idx].tx_hdr;
 
-	rte_prefetch1(hdr);
+	zxdh_xmit_fill_net_hdr(vq, cookie, hdr);
+
 	idx++;
 	if (idx >= vq->vq_nentries) {
 		idx -= vq->vq_nentries;
 		vq->cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
 	}
 
-	zxdh_xmit_fill_net_hdr(vq, cookie, hdr);
-
 	do {
 		start_dp[idx].addr = rte_pktmbuf_iova(cookie);
 		start_dp[idx].len  = cookie->data_len;
-		start_dp[idx].id = id;
-		if (likely(idx != head_idx)) {
-			uint16_t flags = cookie->next ? ZXDH_VRING_DESC_F_NEXT : 0;
-
-			flags |= vq->cached_flags;
-			start_dp[idx].flags = flags;
-		}
+		start_dp[idx].id = idx;
 
+		dep[idx].cookie = cookie;
+		uint16_t flags = cookie->next ? ZXDH_VRING_DESC_F_NEXT : 0;
+		flags |= vq->cached_flags;
+		start_dp[idx].flags = flags;
 		idx++;
 		if (idx >= vq->vq_nentries) {
 			idx -= vq->vq_nentries;
@@ -417,7 +400,6 @@ zxdh_enqueue_xmit_packed(struct zxdh_virtnet_tx *txvq,
 
 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
 	vq->vq_avail_idx = idx;
-
 	zxdh_queue_store_flags_packed(head_dp, head_flags);
 }
 
@@ -456,7 +438,7 @@ zxdh_update_packet_stats(struct zxdh_virtnet_stats *stats, struct rte_mbuf *mbuf
 }
 
 static void
-zxdh_xmit_flush(struct zxdh_virtqueue *vq)
+zxdh_xmit_fast_flush(struct zxdh_virtqueue *vq)
 {
 	uint16_t id       = 0;
 	uint16_t curr_id  = 0;
@@ -472,20 +454,22 @@ zxdh_xmit_flush(struct zxdh_virtqueue *vq)
 	 * for a used descriptor in the virtqueue.
 	 */
 	while (desc_is_used(&desc[used_idx], vq)) {
+		rte_prefetch0(&desc[used_idx + NEXT_CACHELINE_OFF_16B]);
 		id = desc[used_idx].id;
 		do {
+			desc[used_idx].id = used_idx;
 			curr_id = used_idx;
 			dxp = &vq->vq_descx[used_idx];
-			used_idx += dxp->ndescs;
-			free_cnt += dxp->ndescs;
-			if (used_idx >= size) {
-				used_idx -= size;
-				vq->used_wrap_counter ^= 1;
-			}
 			if (dxp->cookie != NULL) {
-				rte_pktmbuf_free(dxp->cookie);
+				rte_pktmbuf_free_seg(dxp->cookie);
 				dxp->cookie = NULL;
 			}
+			used_idx += 1;
+			free_cnt += 1;
+			if (unlikely(used_idx == size)) {
+				used_idx = 0;
+				vq->used_wrap_counter ^= 1;
+			}
 		} while (curr_id != id);
 	}
 	vq->vq_used_cons_idx = used_idx;
@@ -499,13 +483,12 @@ zxdh_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkt
 	struct zxdh_virtqueue  *vq   = txvq->vq;
 	uint16_t nb_tx = 0;
 
-	zxdh_xmit_flush(vq);
+	zxdh_xmit_fast_flush(vq);
 
 	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
 		struct rte_mbuf *txm = tx_pkts[nb_tx];
 		int32_t can_push     = 0;
 		int32_t slots        = 0;
-		int32_t need         = 0;
 
 		rte_prefetch0(txm);
 		/* optimize ring usage */
@@ -522,26 +505,15 @@ zxdh_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkt
 		 * default    => number of segments + 1
 		 **/
 		slots = txm->nb_segs + !can_push;
-		need = slots - vq->vq_free_cnt;
 		/* Positive value indicates it need free vring descriptors */
-		if (unlikely(need > 0)) {
-			zxdh_xmit_cleanup_inorder_packed(vq, need);
-			need = slots - vq->vq_free_cnt;
-			if (unlikely(need > 0)) {
-				PMD_TX_LOG(ERR,
-						" No enough %d free tx descriptors to transmit."
-						"freecnt %d",
-						need,
-						vq->vq_free_cnt);
-				break;
-			}
-		}
+		if (unlikely(slots >  vq->vq_free_cnt))
+			break;
 
 		/* Enqueue Packet buffers */
 		if (can_push)
-			zxdh_enqueue_xmit_packed_fast(txvq, txm);
+			zxdh_xmit_enqueue_push(txvq, txm);
 		else
-			zxdh_enqueue_xmit_packed(txvq, txm, slots);
+			zxdh_xmit_enqueue_append(txvq, txm, slots);
 		zxdh_update_packet_stats(&txvq->stats, txm);
 	}
 	txvq->stats.packets += nb_tx;
@@ -1070,7 +1042,6 @@ uint16_t zxdh_recv_single_pkts(void *rx_queue, struct rte_mbuf **rcv_pkts, uint1
 
 		if (unlikely(zxdh_init_mbuf(rxm, len, hw, &vq->rxq) < 0))
 			continue;
-		rcv_pkts[nb_rx] = rxm;
 		zxdh_update_packet_stats(&rxvq->stats, rxm);
 		nb_rx++;
 	}
@@ -1084,3 +1055,209 @@ uint16_t zxdh_recv_single_pkts(void *rx_queue, struct rte_mbuf **rcv_pkts, uint1
 	}
 	return nb_rx;
 }
+
+static inline void pkt_padding(struct rte_mbuf *cookie, struct zxdh_hw *hw)
+{
+	uint16_t mtu_or_mss = 0;
+	uint16_t pkt_flag_lw16 = ZXDH_NO_IPID_UPDATE;
+	uint16_t l3_offset;
+	uint8_t pcode = ZXDH_PCODE_NO_IP_PKT_TYPE;
+	uint8_t l3_ptype = ZXDH_PI_L3TYPE_NOIP;
+	struct zxdh_pi_hdr *pi_hdr;
+	struct zxdh_pd_hdr_dl *pd_hdr;
+	struct zxdh_net_hdr_dl *net_hdr_dl = hw->net_hdr_dl;
+	uint8_t hdr_len = hw->dl_net_hdr_len;
+	uint16_t ol_flag = 0;
+	struct zxdh_net_hdr_dl *hdr;
+
+	hdr = rte_pktmbuf_mtod_offset(cookie, struct zxdh_net_hdr_dl *, -hdr_len);
+	rte_memcpy(hdr, net_hdr_dl, hdr_len);
+
+	/* Update mbuf to reflect the prepended header */
+	cookie->data_off -= hdr_len;
+	cookie->data_len += hdr_len;
+	cookie->pkt_len += hdr_len;
+
+	if (hw->has_tx_offload) {
+		pi_hdr = &hdr->pipd_hdr_dl.pi_hdr;
+		pd_hdr = &hdr->pipd_hdr_dl.pd_hdr;
+
+		pcode = ZXDH_PCODE_IP_PKT_TYPE;
+		if (cookie->ol_flags & RTE_MBUF_F_TX_IPV6)
+			l3_ptype = ZXDH_PI_L3TYPE_IPV6;
+		else if (cookie->ol_flags & RTE_MBUF_F_TX_IPV4)
+			l3_ptype = ZXDH_PI_L3TYPE_IP;
+		else
+			pcode = ZXDH_PCODE_NO_IP_PKT_TYPE;
+
+		if (cookie->ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
+			mtu_or_mss = (cookie->tso_segsz >= ZXDH_MIN_MSS) ?
+				cookie->tso_segsz : ZXDH_MIN_MSS;
+			pi_hdr->pkt_flag_hi8  |= ZXDH_TX_TCPUDP_CKSUM_CAL;
+			pkt_flag_lw16 |= ZXDH_NO_IP_FRAGMENT | ZXDH_TX_IP_CKSUM_CAL;
+			pcode = ZXDH_PCODE_TCP_PKT_TYPE;
+		} else if (cookie->ol_flags & RTE_MBUF_F_TX_UDP_SEG) {
+			mtu_or_mss = hw->eth_dev->data->mtu;
+			mtu_or_mss = (mtu_or_mss >= ZXDH_MIN_MSS) ? mtu_or_mss : ZXDH_MIN_MSS;
+			pkt_flag_lw16 |= ZXDH_TX_IP_CKSUM_CAL;
+			pi_hdr->pkt_flag_hi8 |= ZXDH_NO_TCP_FRAGMENT | ZXDH_TX_TCPUDP_CKSUM_CAL;
+			pcode = ZXDH_PCODE_UDP_PKT_TYPE;
+		} else {
+			pkt_flag_lw16 |= ZXDH_NO_IP_FRAGMENT;
+			pi_hdr->pkt_flag_hi8 |= ZXDH_NO_TCP_FRAGMENT;
+		}
+
+		if (cookie->ol_flags & RTE_MBUF_F_TX_IP_CKSUM)
+			pkt_flag_lw16 |= ZXDH_TX_IP_CKSUM_CAL;
+
+		if ((cookie->ol_flags & RTE_MBUF_F_TX_UDP_CKSUM) == RTE_MBUF_F_TX_UDP_CKSUM) {
+			pcode = ZXDH_PCODE_UDP_PKT_TYPE;
+			pi_hdr->pkt_flag_hi8 |= ZXDH_TX_TCPUDP_CKSUM_CAL;
+		} else if ((cookie->ol_flags & RTE_MBUF_F_TX_TCP_CKSUM) ==
+			RTE_MBUF_F_TX_TCP_CKSUM) {
+			pcode = ZXDH_PCODE_TCP_PKT_TYPE;
+			pi_hdr->pkt_flag_hi8 |= ZXDH_TX_TCPUDP_CKSUM_CAL;
+		}
+		pkt_flag_lw16 |= (mtu_or_mss >> ZXDH_MTU_MSS_UNIT_SHIFTBIT) & ZXDH_MTU_MSS_MASK;
+		pi_hdr->pkt_flag_lw16 = rte_be_to_cpu_16(pkt_flag_lw16);
+		pi_hdr->pkt_type = l3_ptype | ZXDH_PKT_FORM_CPU | pcode;
+
+		l3_offset = hdr_len + cookie->l2_len;
+		l3_offset += (cookie->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) ?
+					cookie->outer_l2_len + cookie->outer_l3_len : 0;
+		pi_hdr->l3_offset = rte_be_to_cpu_16(l3_offset);
+		pi_hdr->l4_offset = rte_be_to_cpu_16(l3_offset + cookie->l3_len);
+		if (cookie->ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM)
+			ol_flag |= ZXDH_PD_OFFLOAD_OUTER_IPCSUM;
+	} else {
+		pd_hdr = &hdr->pd_hdr;
+	}
+
+	pd_hdr->dst_vfid = rte_be_to_cpu_16(cookie->port);
+
+	if (cookie->ol_flags & (RTE_MBUF_F_TX_VLAN | RTE_MBUF_F_TX_QINQ)) {
+		ol_flag |= ZXDH_PD_OFFLOAD_CVLAN_INSERT;
+		pd_hdr->cvlan_insert = rte_be_to_cpu_16(cookie->vlan_tci);
+		if (cookie->ol_flags & RTE_MBUF_F_TX_QINQ) {
+			ol_flag |= ZXDH_PD_OFFLOAD_SVLAN_INSERT;
+			pd_hdr->svlan_insert = rte_be_to_cpu_16(cookie->vlan_tci_outer);
+		}
+	}
+
+	pd_hdr->ol_flag = rte_be_to_cpu_16(ol_flag);
+}
+
+/*
+ * Populate N_PER_LOOP descriptors with data from N_PER_LOOP single-segment mbufs.
+ * Note: The simple transmit path (zxdh_xmit_pkts_simple) is selected only when
+ * RTE_ETH_TX_OFFLOAD_MULTI_SEGS is disabled, so all packets handled here are
+ * guaranteed to be single-segment.
+ */
+static inline void
+tx_bunch(struct zxdh_virtqueue *vq, volatile struct zxdh_vring_packed_desc *txdp,
+		struct rte_mbuf **pkts, uint16_t start_id)
+{
+	uint16_t flags = vq->cached_flags;
+	int i;
+	for (i = 0; i < N_PER_LOOP; ++i, ++txdp, ++pkts) {
+		/* write data to descriptor */
+		txdp->addr = rte_mbuf_data_iova(*pkts);
+		txdp->len = (*pkts)->data_len;
+		txdp->id = start_id + i;
+		txdp->flags = flags;
+	}
+}
+
+/* Populate 1 descriptor with data from 1 single-segment mbuf */
+static inline void
+tx1(struct zxdh_virtqueue *vq, volatile struct zxdh_vring_packed_desc *txdp,
+		struct rte_mbuf *pkts, uint16_t id)
+{
+	uint16_t flags = vq->cached_flags;
+	txdp->addr = rte_mbuf_data_iova(pkts);
+	txdp->len = pkts->data_len;
+	txdp->id = id;
+	zxdh_queue_store_flags_packed(txdp, flags);
+}
+
+static void submit_to_backend_simple(struct zxdh_virtqueue  *vq,
+			struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+	struct zxdh_hw *hw = vq->hw;
+	struct rte_mbuf *m = NULL;
+	uint16_t id =  vq->vq_avail_idx;
+	struct zxdh_vring_packed_desc *txdp = &vq->vq_packed.ring.desc[id];
+	struct zxdh_vq_desc_extra *dxp = &vq->vq_descx[id];
+	int mainpart, leftover;
+	int i, j;
+
+	/*
+	 * Process most of the packets in chunks of N pkts.  Any
+	 * leftover packets will get processed one at a time.
+	 */
+	mainpart = (nb_pkts & ~N_PER_LOOP_MASK);
+	leftover = (nb_pkts & N_PER_LOOP_MASK);
+
+	for (i = 0; i < mainpart; i += N_PER_LOOP) {
+		rte_prefetch0(dxp + i);
+		rte_prefetch0(tx_pkts + i);
+		for (j = 0; j < N_PER_LOOP; ++j) {
+			m  = *(tx_pkts + i + j);
+			pkt_padding(m, hw);
+			(dxp + i + j)->cookie = (void *)m;
+			zxdh_update_packet_stats(&vq->txq.stats, m);
+		}
+		/* write data to descriptor */
+		tx_bunch(vq, txdp + i, tx_pkts + i, id + i);
+	}
+
+	if (leftover > 0) {
+		rte_prefetch0(dxp + mainpart);
+		rte_prefetch0(tx_pkts + mainpart);
+
+		for (i = 0; i < leftover; ++i) {
+			m =  *(tx_pkts + mainpart + i);
+			pkt_padding(m, hw);
+			(dxp + mainpart + i)->cookie = m;
+			tx1(vq, txdp + mainpart + i, *(tx_pkts + mainpart + i), id + mainpart + i);
+			zxdh_update_packet_stats(&vq->txq.stats, m);
+		}
+	}
+}
+
+uint16_t zxdh_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+	struct zxdh_virtnet_tx *txvq = tx_queue;
+	struct zxdh_virtqueue  *vq   = txq_get_vq(txvq);
+	uint16_t nb_tx = 0, nb_tx_left;
+
+	zxdh_xmit_fast_flush(vq);
+
+	nb_pkts = (uint16_t)RTE_MIN(nb_pkts, vq->vq_free_cnt);
+	if (unlikely(nb_pkts == 0)) {
+		txvq->stats.idle++;
+		return 0;
+	}
+
+	nb_tx_left = nb_pkts;
+	if ((vq->vq_avail_idx + nb_pkts) >= vq->vq_nentries) {
+		nb_tx = vq->vq_nentries - vq->vq_avail_idx;
+		nb_tx_left = nb_pkts - nb_tx;
+		submit_to_backend_simple(vq, tx_pkts, nb_tx);
+		vq->vq_avail_idx = 0;
+		vq->cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
+
+		vq->vq_free_cnt -= nb_tx;
+		tx_pkts += nb_tx;
+	}
+	if (nb_tx_left) {
+		submit_to_backend_simple(vq, tx_pkts, nb_tx_left);
+		vq->vq_avail_idx  += nb_tx_left;
+		vq->vq_free_cnt  -= nb_tx_left;
+	}
+
+	zxdh_queue_notify(vq);
+	txvq->stats.packets += nb_pkts;
+
+	return nb_pkts;
+}
diff --git a/drivers/net/zxdh/zxdh_rxtx.h b/drivers/net/zxdh/zxdh_rxtx.h
index dba9567414..783fb456de 100644
--- a/drivers/net/zxdh/zxdh_rxtx.h
+++ b/drivers/net/zxdh/zxdh_rxtx.h
@@ -56,18 +56,19 @@ struct __rte_cache_aligned zxdh_virtnet_rx {
 
 struct __rte_cache_aligned zxdh_virtnet_tx {
 	struct zxdh_virtqueue         *vq;
-
-	rte_iova_t                zxdh_net_hdr_mem; /* hdr for each xmit packet */
-	uint16_t                  queue_id;           /* DPDK queue index. */
-	uint16_t                  port_id;            /* Device port identifier. */
+	const struct rte_memzone *zxdh_net_hdr_mz;  /* memzone to populate hdr. */
+	rte_iova_t               zxdh_net_hdr_mem; /* hdr for each xmit packet */
 	struct zxdh_virtnet_stats      stats;
 	const struct rte_memzone *mz;                 /* mem zone to populate TX ring. */
-	const struct rte_memzone *zxdh_net_hdr_mz;  /* memzone to populate hdr. */
+	uint64_t offloads;
+	uint16_t                  queue_id;           /* DPDK queue index. */
+	uint16_t                  port_id;            /* Device port identifier. */
 };
 
 uint16_t zxdh_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
 uint16_t zxdh_xmit_pkts_prepare(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
 uint16_t zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
 uint16_t zxdh_recv_single_pkts(void *rx_queue, struct rte_mbuf **rcv_pkts, uint16_t nb_pkts);
+uint16_t zxdh_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
 
 #endif  /* ZXDH_RXTX_H */
-- 
2.27.0

[-- Attachment #1.1.2: Type: text/html , Size: 49044 bytes --]

^ permalink raw reply related

* [PATCH v6 3/4] net/zxdh: optimize Rx recv pkts performance
From: Junlong Wang @ 2026-06-17  8:28 UTC (permalink / raw)
  To: stephen; +Cc: dev, Junlong Wang
In-Reply-To: <20260617082828.1058127-1-wang.junlong1@zte.com.cn>


[-- Attachment #1.1.1: Type: text/plain, Size: 16239 bytes --]

1. Add simple RX recv functions (zxdh_recv_single_pkts)
   for single-segment packet recv.
2. And optimize Rx recv pkts packed ops.
3. Remove unnecessary ZXDH_NET_F_MRG_RXBUF negotiation check and
   some unnecessary statistical counters form the xstats name tables.

Signed-off-by: Junlong Wang <wang.junlong1@zte.com.cn>
---
 drivers/net/zxdh/zxdh_ethdev.c     |  39 +++++--
 drivers/net/zxdh/zxdh_ethdev_ops.c |  23 ++--
 drivers/net/zxdh/zxdh_ethdev_ops.h |   4 +
 drivers/net/zxdh/zxdh_rxtx.c       | 174 +++++++++++++++++++++++------
 drivers/net/zxdh/zxdh_rxtx.h       |  16 +--
 5 files changed, 193 insertions(+), 63 deletions(-)

diff --git a/drivers/net/zxdh/zxdh_ethdev.c b/drivers/net/zxdh/zxdh_ethdev.c
index a383619419..fe76139f3d 100644
--- a/drivers/net/zxdh/zxdh_ethdev.c
+++ b/drivers/net/zxdh/zxdh_ethdev.c
@@ -1263,18 +1263,43 @@ zxdh_dev_close(struct rte_eth_dev *dev)
 	return ret;
 }
 
-static int32_t
-zxdh_set_rxtx_funcs(struct rte_eth_dev *eth_dev)
+/*
+ * Determine whether the current configuration requires support for scattered
+ * receive; return 1 if scattered receive is required and 0 if not.
+ */
+static int zxdh_scattered_rx(struct rte_eth_dev *eth_dev)
 {
-	struct zxdh_hw *hw = eth_dev->data->dev_private;
+	uint16_t buf_size;
 
-	if (!zxdh_pci_with_feature(hw, ZXDH_NET_F_MRG_RXBUF)) {
-		PMD_DRV_LOG(ERR, "port %u not support rx mergeable", eth_dev->data->port_id);
-		return -1;
+	if (eth_dev->data->dev_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO) {
+		eth_dev->data->lro = 1;
+		return 1;
 	}
+
+	if (eth_dev->data->dev_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_SCATTER)
+		return 1;
+
+	PMD_DRV_LOG(DEBUG, "port %u min_rx_buf_size %u",
+		eth_dev->data->port_id, eth_dev->data->min_rx_buf_size);
+	buf_size = eth_dev->data->min_rx_buf_size - RTE_PKTMBUF_HEADROOM;
+	if (eth_dev->data->mtu + ZXDH_ETH_OVERHEAD > buf_size)
+		return 1;
+
+	return 0;
+}
+
+static int32_t
+zxdh_set_rxtx_funcs(struct rte_eth_dev *eth_dev)
+{
 	eth_dev->tx_pkt_prepare = zxdh_xmit_pkts_prepare;
+	eth_dev->data->scattered_rx = zxdh_scattered_rx(eth_dev);
+
 	eth_dev->tx_pkt_burst = &zxdh_xmit_pkts_packed;
-	eth_dev->rx_pkt_burst = &zxdh_recv_pkts_packed;
+
+	if (eth_dev->data->scattered_rx)
+		eth_dev->rx_pkt_burst = &zxdh_recv_pkts_packed;
+	else
+		eth_dev->rx_pkt_burst = &zxdh_recv_single_pkts;
 
 	return 0;
 }
diff --git a/drivers/net/zxdh/zxdh_ethdev_ops.c b/drivers/net/zxdh/zxdh_ethdev_ops.c
index 50247116d9..9a8e05e941 100644
--- a/drivers/net/zxdh/zxdh_ethdev_ops.c
+++ b/drivers/net/zxdh/zxdh_ethdev_ops.c
@@ -95,10 +95,6 @@ static const struct rte_zxdh_xstats_name_off zxdh_rxq_stat_strings[] = {
 	{"good_bytes",             offsetof(struct zxdh_virtnet_rx, stats.bytes)},
 	{"errors",                 offsetof(struct zxdh_virtnet_rx, stats.errors)},
 	{"idle",                   offsetof(struct zxdh_virtnet_rx, stats.idle)},
-	{"full",                   offsetof(struct zxdh_virtnet_rx, stats.full)},
-	{"norefill",               offsetof(struct zxdh_virtnet_rx, stats.norefill)},
-	{"multicast_packets",      offsetof(struct zxdh_virtnet_rx, stats.multicast)},
-	{"broadcast_packets",      offsetof(struct zxdh_virtnet_rx, stats.broadcast)},
 	{"truncated_err",          offsetof(struct zxdh_virtnet_rx, stats.truncated_err)},
 	{"offload_cfg_err",        offsetof(struct zxdh_virtnet_rx, stats.offload_cfg_err)},
 	{"invalid_hdr_len_err",    offsetof(struct zxdh_virtnet_rx, stats.invalid_hdr_len_err)},
@@ -117,14 +113,12 @@ static const struct rte_zxdh_xstats_name_off zxdh_txq_stat_strings[] = {
 	{"good_packets",           offsetof(struct zxdh_virtnet_tx, stats.packets)},
 	{"good_bytes",             offsetof(struct zxdh_virtnet_tx, stats.bytes)},
 	{"errors",                 offsetof(struct zxdh_virtnet_tx, stats.errors)},
-	{"idle",                   offsetof(struct zxdh_virtnet_tx, stats.idle)},
-	{"norefill",               offsetof(struct zxdh_virtnet_tx, stats.norefill)},
-	{"multicast_packets",      offsetof(struct zxdh_virtnet_tx, stats.multicast)},
-	{"broadcast_packets",      offsetof(struct zxdh_virtnet_tx, stats.broadcast)},
+	{"idle",                 offsetof(struct zxdh_virtnet_tx, stats.idle)},
 	{"truncated_err",          offsetof(struct zxdh_virtnet_tx, stats.truncated_err)},
 	{"offload_cfg_err",        offsetof(struct zxdh_virtnet_tx, stats.offload_cfg_err)},
 	{"invalid_hdr_len_err",    offsetof(struct zxdh_virtnet_tx, stats.invalid_hdr_len_err)},
 	{"no_segs_err",            offsetof(struct zxdh_virtnet_tx, stats.no_segs_err)},
+	{"no_free_tx_desc_err",    offsetof(struct zxdh_virtnet_tx, stats.no_free_tx_desc_err)},
 	{"undersize_packets",      offsetof(struct zxdh_virtnet_tx, stats.size_bins[0])},
 	{"size_64_packets",        offsetof(struct zxdh_virtnet_tx, stats.size_bins[1])},
 	{"size_65_127_packets",    offsetof(struct zxdh_virtnet_tx, stats.size_bins[2])},
@@ -2026,6 +2020,19 @@ int zxdh_dev_mtu_set(struct rte_eth_dev *dev, uint16_t new_mtu)
 	uint16_t vfid = zxdh_vport_to_vfid(hw->vport);
 	int ret;
 
+	/* If device is started, refuse mtu that requires the support of
+	 * scattered packets when this feature has not been enabled before.
+	 */
+	if (dev->data->dev_started) {
+		uint32_t buf_size = dev->data->min_rx_buf_size - RTE_PKTMBUF_HEADROOM;
+		uint8_t need_scatter = (uint32_t)ZXDH_MTU_TO_PKTLEN(new_mtu) > buf_size;
+
+		if (need_scatter != dev->data->scattered_rx) {
+			PMD_DRV_LOG(ERR, "Stop port first.");
+			return -EINVAL;
+		}
+	}
+
 	if (hw->is_pf) {
 		ret = zxdh_get_panel_attr(dev, &panel);
 		if (ret != 0) {
diff --git a/drivers/net/zxdh/zxdh_ethdev_ops.h b/drivers/net/zxdh/zxdh_ethdev_ops.h
index 6dfe4be473..c49d79c232 100644
--- a/drivers/net/zxdh/zxdh_ethdev_ops.h
+++ b/drivers/net/zxdh/zxdh_ethdev_ops.h
@@ -40,6 +40,10 @@
 #define ZXDH_SPM_SPEED_4X_100G         RTE_BIT32(10)
 #define ZXDH_SPM_SPEED_4X_200G         RTE_BIT32(11)
 
+#define ZXDH_VLAN_TAG_LEN   4
+#define ZXDH_ETH_OVERHEAD  (RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN + ZXDH_VLAN_TAG_LEN * 2)
+#define ZXDH_MTU_TO_PKTLEN(mtu) ((mtu) + ZXDH_ETH_OVERHEAD)
+
 struct zxdh_np_stats_data {
 	uint64_t n_pkts_dropped;
 	uint64_t n_bytes_dropped;
diff --git a/drivers/net/zxdh/zxdh_rxtx.c b/drivers/net/zxdh/zxdh_rxtx.c
index 93506a4b49..ab0510a753 100644
--- a/drivers/net/zxdh/zxdh_rxtx.c
+++ b/drivers/net/zxdh/zxdh_rxtx.c
@@ -613,10 +613,12 @@ zxdh_dequeue_burst_rx_packed(struct zxdh_virtqueue *vq,
 	uint16_t i, used_idx;
 	uint16_t id;
 
+	used_idx = vq->vq_used_cons_idx;
+	rte_prefetch0(&desc[used_idx]);
+
 	for (i = 0; i < num; i++) {
 		used_idx = vq->vq_used_cons_idx;
-		/**
-		 * desc_is_used has a load-acquire or rte_io_rmb inside
+		/* desc_is_used has a load-acquire or rte_io_rmb inside
 		 * and wait for used desc in virtqueue.
 		 */
 		if (!desc_is_used(&desc[used_idx], vq))
@@ -823,17 +825,52 @@ zxdh_rx_update_mbuf(struct zxdh_hw *hw, struct rte_mbuf *m, struct zxdh_net_hdr_
 	}
 }
 
-static void zxdh_discard_rxbuf(struct zxdh_virtqueue *vq, struct rte_mbuf *m)
+static void refill_desc_unwrap(struct zxdh_virtqueue *vq,
+		struct rte_mbuf **cookie, uint16_t nb_pkts)
 {
-	int32_t error = 0;
-	/*
-	 * Requeue the discarded mbuf. This should always be
-	 * successful since it was just dequeued.
-	 */
-	error = zxdh_enqueue_recv_refill_packed(vq, &m, 1);
-	if (unlikely(error)) {
-		PMD_RX_LOG(ERR, "cannot enqueue discarded mbuf");
-		rte_pktmbuf_free(m);
+	struct zxdh_vring_packed_desc *start_dp = vq->vq_packed.ring.desc;
+	struct zxdh_vq_desc_extra *dxp;
+	uint16_t flags = vq->cached_flags;
+	int32_t i;
+	uint16_t idx;
+
+	idx = vq->vq_avail_idx;
+	for (i = 0; i < nb_pkts; i++) {
+		dxp = &vq->vq_descx[idx];
+		dxp->cookie = (void *)cookie[i];
+		start_dp[idx].addr = rte_mbuf_iova_get(cookie[i]) + RTE_PKTMBUF_HEADROOM;
+		start_dp[idx].len = cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM;
+		zxdh_queue_store_flags_packed(&start_dp[idx], flags);
+		idx++;
+	}
+	vq->vq_avail_idx += nb_pkts;
+	vq->vq_free_cnt = vq->vq_free_cnt - nb_pkts;
+}
+
+static void refill_que_descs(struct zxdh_virtqueue *vq, struct rte_eth_dev *dev)
+{
+	/* free_cnt may include mrg descs */
+	struct rte_mbuf *new_pkts[ZXDH_MBUF_BURST_SZ];
+	uint16_t free_cnt = RTE_MIN(ZXDH_MBUF_BURST_SZ, vq->vq_free_cnt);
+	struct zxdh_virtnet_rx *rxvq = &vq->rxq;
+	uint16_t  unwrap_cnt, left_cnt;
+
+	if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
+		left_cnt = free_cnt;
+		unwrap_cnt = 0;
+		if ((vq->vq_avail_idx + free_cnt) >= vq->vq_nentries) {
+			unwrap_cnt = vq->vq_nentries - vq->vq_avail_idx;
+			left_cnt = free_cnt - unwrap_cnt;
+			refill_desc_unwrap(vq, new_pkts, unwrap_cnt);
+			vq->vq_avail_idx = 0;
+			vq->cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
+		}
+		if (left_cnt)
+			refill_desc_unwrap(vq, new_pkts + unwrap_cnt, left_cnt);
+
+		rte_io_wmb();
+	} else {
+		dev->data->rx_mbuf_alloc_failed += free_cnt;
 	}
 }
 
@@ -852,7 +889,6 @@ zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
 	uint16_t len = 0;
 	uint32_t seg_num = 0;
 	uint32_t seg_res = 0;
-	uint32_t error = 0;
 	uint16_t hdr_size = 0;
 	uint16_t nb_rx = 0;
 	uint16_t i;
@@ -873,7 +909,8 @@ zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
 		rx_pkts[nb_rx] = rxm;
 		prev = rxm;
 		len = lens[i];
-		header = rte_pktmbuf_mtod(rxm, struct zxdh_net_hdr_ul *);
+		header = (struct zxdh_net_hdr_ul *)((char *)
+					rxm->buf_addr + RTE_PKTMBUF_HEADROOM);
 
 		seg_num  = header->type_hdr.num_buffers;
 
@@ -886,7 +923,7 @@ zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
 			rxvq->stats.invalid_hdr_len_err++;
 			continue;
 		}
-		rxm->data_off += hdr_size;
+		rxm->data_off = RTE_PKTMBUF_HEADROOM + hdr_size;
 		rxm->nb_segs = seg_num;
 		rxm->ol_flags = 0;
 		rcvd_pkt_len = len - hdr_size;
@@ -902,18 +939,19 @@ zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
 			len = lens[i];
 			rxm = rcv_pkts[i];
 			rxm->data_len = len;
+			rxm->data_off = RTE_PKTMBUF_HEADROOM;
 			rcvd_pkt_len += len;
 			prev->next = rxm;
 			prev = rxm;
 			rxm->next = NULL;
-			seg_res -= 1;
+			seg_res--;
 		}
 
 		if (!seg_res) {
 			if (rcvd_pkt_len != rx_pkts[nb_rx]->pkt_len) {
 				PMD_RX_LOG(ERR, "dropped rcvd_pkt_len %d pktlen %d",
 					rcvd_pkt_len, rx_pkts[nb_rx]->pkt_len);
-				zxdh_discard_rxbuf(vq, rx_pkts[nb_rx]);
+				rte_pktmbuf_free(rx_pkts[nb_rx]);
 				rxvq->stats.errors++;
 				rxvq->stats.truncated_err++;
 				continue;
@@ -942,14 +980,14 @@ zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
 			prev->next = rxm;
 			prev = rxm;
 			rxm->next = NULL;
-			extra_idx += 1;
+			extra_idx++;
 		}
 		seg_res -= rcv_cnt;
 		if (!seg_res) {
 			if (unlikely(rcvd_pkt_len != rx_pkts[nb_rx]->pkt_len)) {
 				PMD_RX_LOG(ERR, "dropped rcvd_pkt_len %d pktlen %d",
 					rcvd_pkt_len, rx_pkts[nb_rx]->pkt_len);
-				zxdh_discard_rxbuf(vq, rx_pkts[nb_rx]);
+				rte_pktmbuf_free(rx_pkts[nb_rx]);
 				rxvq->stats.errors++;
 				rxvq->stats.truncated_err++;
 				continue;
@@ -961,26 +999,88 @@ zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
 	rxvq->stats.packets += nb_rx;
 
 refill:
-	/* Allocate new mbuf for the used descriptor */
-	if (likely(!zxdh_queue_full(vq))) {
-		struct rte_mbuf *new_pkts[ZXDH_MBUF_BURST_SZ];
-		/* free_cnt may include mrg descs */
-		uint16_t free_cnt = RTE_MIN(vq->vq_free_cnt, ZXDH_MBUF_BURST_SZ);
-
-		if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
-			error = zxdh_enqueue_recv_refill_packed(vq, new_pkts, free_cnt);
-			if (unlikely(error)) {
-				for (i = 0; i < free_cnt; i++)
-					rte_pktmbuf_free(new_pkts[i]);
-			}
+	if (vq->vq_free_cnt > 0) {
+		struct rte_eth_dev *dev = hw->eth_dev;
+		refill_que_descs(vq, dev);
+		zxdh_queue_notify(vq);
+	}
 
-			if (unlikely(zxdh_queue_kick_prepare_packed(vq)))
-				zxdh_queue_notify(vq);
-		} else {
-			struct rte_eth_dev *dev = hw->eth_dev;
+	return nb_rx;
+}
 
-			dev->data->rx_mbuf_alloc_failed += free_cnt;
-		}
+static inline int zxdh_init_mbuf(struct rte_mbuf *rxm, uint16_t len,
+		struct zxdh_hw *hw, struct zxdh_virtnet_rx *rxvq)
+{
+	uint16_t hdr_size = 0;
+	struct zxdh_net_hdr_ul *header;
+
+	header = rte_pktmbuf_mtod(rxm, struct zxdh_net_hdr_ul *);
+	rxm->ol_flags = 0;
+	rxm->vlan_tci = 0;
+	rxm->vlan_tci_outer = 0;
+
+	hdr_size = header->type_hdr.pd_len << 1;
+	if (unlikely(header->type_hdr.num_buffers != 1)) {
+		PMD_RX_LOG(DEBUG, "hdr_size:%u nb_segs %d is invalid",
+			hdr_size, header->type_hdr.num_buffers);
+		rte_pktmbuf_free(rxm);
+		rxvq->stats.invalid_hdr_len_err++;
+		return -1;
+	}
+	zxdh_rx_update_mbuf(hw, rxm, header);
+
+	rxm->nb_segs = 1;
+	rxm->data_off = RTE_PKTMBUF_HEADROOM + hdr_size;
+	rxm->data_len = len - hdr_size;
+	rxm->port = hw->port_id;
+
+	if (rxm->data_len != rxm->pkt_len) {
+		PMD_RX_LOG(ERR, "dropped rcvd_pkt_len %d pktlen %d  bufaddr %p.",
+					rxm->data_len, rxm->pkt_len, rxm->buf_addr);
+		rte_pktmbuf_free(rxm);
+		rxvq->stats.truncated_err++;
+		rxvq->stats.errors++;
+		return -1;
+	}
+	return 0;
+}
+
+uint16_t zxdh_recv_single_pkts(void *rx_queue, struct rte_mbuf **rcv_pkts, uint16_t nb_pkts)
+{
+	struct zxdh_virtnet_rx *rxvq = rx_queue;
+	struct zxdh_virtqueue *vq = rxvq->vq;
+	struct zxdh_hw *hw = vq->hw;
+	uint32_t lens[ZXDH_MBUF_BURST_SZ];
+	uint16_t nb_rx = 0;
+	uint16_t num;
+	uint16_t i;
+
+	num = nb_pkts;
+	if (unlikely(num > ZXDH_MBUF_BURST_SZ))
+		num = ZXDH_MBUF_BURST_SZ;
+	num = zxdh_dequeue_burst_rx_packed(vq, rcv_pkts, lens, num);
+	if (num == 0) {
+		rxvq->stats.idle++;
+		goto refill;
+	}
+
+	for (i = 0; i < num; i++) {
+		struct rte_mbuf *rxm = rcv_pkts[i];
+		uint16_t len = lens[i];
+
+		if (unlikely(zxdh_init_mbuf(rxm, len, hw, &vq->rxq) < 0))
+			continue;
+		rcv_pkts[nb_rx] = rxm;
+		zxdh_update_packet_stats(&rxvq->stats, rxm);
+		nb_rx++;
+	}
+	rxvq->stats.packets += nb_rx;
+
+refill:
+	if (vq->vq_free_cnt > 0) {
+		struct rte_eth_dev *dev = hw->eth_dev;
+		refill_que_descs(vq, dev);
+		zxdh_queue_notify(vq);
 	}
 	return nb_rx;
 }
diff --git a/drivers/net/zxdh/zxdh_rxtx.h b/drivers/net/zxdh/zxdh_rxtx.h
index 424048607e..dba9567414 100644
--- a/drivers/net/zxdh/zxdh_rxtx.h
+++ b/drivers/net/zxdh/zxdh_rxtx.h
@@ -36,29 +36,22 @@ struct zxdh_virtnet_stats {
 	uint64_t bytes;
 	uint64_t errors;
 	uint64_t idle;
-	uint64_t full;
-	uint64_t norefill;
-	uint64_t multicast;
-	uint64_t broadcast;
 	uint64_t truncated_err;
 	uint64_t offload_cfg_err;
 	uint64_t invalid_hdr_len_err;
 	uint64_t no_segs_err;
+	uint64_t no_free_tx_desc_err;
 	uint64_t size_bins[8];
 };
 
 struct __rte_cache_aligned zxdh_virtnet_rx {
 	struct zxdh_virtqueue         *vq;
-
-	uint64_t                  mbuf_initializer; /* value to init mbufs. */
 	struct rte_mempool       *mpool;            /* mempool for mbuf allocation */
-	uint16_t                  queue_id;         /* DPDK queue index. */
-	uint16_t                  port_id;          /* Device port identifier. */
 	struct zxdh_virtnet_stats      stats;
 	const struct rte_memzone *mz;               /* mem zone to populate RX ring. */
-
-	/* dummy mbuf, for wraparound when processing RX ring. */
-	struct rte_mbuf           fake_mbuf;
+	uint64_t offloads;
+	uint16_t                  queue_id;         /* DPDK queue index. */
+	uint16_t                  port_id;          /* Device port identifier. */
 };
 
 struct __rte_cache_aligned zxdh_virtnet_tx {
@@ -75,5 +68,6 @@ struct __rte_cache_aligned zxdh_virtnet_tx {
 uint16_t zxdh_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
 uint16_t zxdh_xmit_pkts_prepare(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
 uint16_t zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
+uint16_t zxdh_recv_single_pkts(void *rx_queue, struct rte_mbuf **rcv_pkts, uint16_t nb_pkts);
 
 #endif  /* ZXDH_RXTX_H */
-- 
2.27.0

[-- Attachment #1.1.2: Type: text/html , Size: 39105 bytes --]

^ permalink raw reply related

* [PATCH v6 2/4] net/zxdh: optimize queue structure to improve performance
From: Junlong Wang @ 2026-06-17  8:28 UTC (permalink / raw)
  To: stephen; +Cc: dev, Junlong Wang
In-Reply-To: <20260617082828.1058127-1-wang.junlong1@zte.com.cn>


[-- Attachment #1.1.1: Type: text/plain, Size: 16846 bytes --]

1. Reorganize structure fields for better cache locality.
2. Remove RX software ring (sw_ring) to reduce memory allocation and
   copy.
3. Remove zxdh_mb(), use native rte_mb().
4. optimize zxdh_queue_notify() functions, remove unnecessary feature
   check.

Signed-off-by: Junlong Wang <wang.junlong1@zte.com.cn>
---
 drivers/net/zxdh/zxdh_ethdev.c |  33 +--------
 drivers/net/zxdh/zxdh_pci.c    |   2 +-
 drivers/net/zxdh/zxdh_queue.c  |  11 ++-
 drivers/net/zxdh/zxdh_queue.h  | 120 ++++++++++++++++-----------------
 drivers/net/zxdh/zxdh_rxtx.c   |  22 +++---
 5 files changed, 77 insertions(+), 111 deletions(-)

diff --git a/drivers/net/zxdh/zxdh_ethdev.c b/drivers/net/zxdh/zxdh_ethdev.c
index 80ff19b3ea..a383619419 100644
--- a/drivers/net/zxdh/zxdh_ethdev.c
+++ b/drivers/net/zxdh/zxdh_ethdev.c
@@ -644,7 +644,6 @@ zxdh_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_logic_qidx)
 	struct zxdh_virtnet_tx *txvq = NULL;
 	struct zxdh_virtqueue *vq = NULL;
 	size_t sz_hdr_mz = 0;
-	void *sw_ring = NULL;
 	int32_t queue_type = zxdh_get_queue_type(vtpci_logic_qidx);
 	int32_t numa_node = dev->device->numa_node;
 	uint16_t vtpci_phy_qidx = 0;
@@ -692,11 +691,10 @@ zxdh_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_logic_qidx)
 	vq->vq_queue_index = vtpci_phy_qidx;
 	vq->vq_nentries = vq_size;
 
-	vq->vq_packed.used_wrap_counter = 1;
-	vq->vq_packed.cached_flags = ZXDH_VRING_PACKED_DESC_F_AVAIL;
-	vq->vq_packed.event_flags_shadow = 0;
+	vq->used_wrap_counter = 1;
+	vq->cached_flags = ZXDH_VRING_PACKED_DESC_F_AVAIL;
 	if (queue_type == ZXDH_VTNET_RQ)
-		vq->vq_packed.cached_flags |= ZXDH_VRING_DESC_F_WRITE;
+		vq->cached_flags |= ZXDH_VRING_DESC_F_WRITE;
 
 	/*
 	 * Reserve a memzone for vring elements
@@ -741,16 +739,6 @@ zxdh_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_logic_qidx)
 	}
 
 	if (queue_type == ZXDH_VTNET_RQ) {
-		size_t sz_sw = (ZXDH_MBUF_BURST_SZ + vq_size) * sizeof(vq->sw_ring[0]);
-
-		sw_ring = rte_zmalloc_socket("sw_ring", sz_sw, RTE_CACHE_LINE_SIZE, numa_node);
-		if (!sw_ring) {
-			PMD_DRV_LOG(ERR, "can not allocate RX soft ring");
-			ret = -ENOMEM;
-			goto fail_q_alloc;
-		}
-
-		vq->sw_ring = sw_ring;
 		rxvq = &vq->rxq;
 		rxvq->vq = vq;
 		rxvq->port_id = dev->data->port_id;
@@ -764,23 +752,9 @@ zxdh_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_logic_qidx)
 		txvq->zxdh_net_hdr_mem = hdr_mz->iova;
 	}
 
-	vq->offset = offsetof(struct rte_mbuf, buf_iova);
 	if (queue_type == ZXDH_VTNET_TQ) {
 		struct zxdh_tx_region *txr = hdr_mz->addr;
-		uint32_t i;
-
 		memset(txr, 0, vq_size * sizeof(*txr));
-		for (i = 0; i < vq_size; i++) {
-			/* first indirect descriptor is always the tx header */
-			struct zxdh_vring_packed_desc *start_dp = txr[i].tx_packed_indir;
-
-			zxdh_vring_desc_init_indirect_packed(start_dp,
-					RTE_DIM(txr[i].tx_packed_indir));
-			start_dp->addr = txvq->zxdh_net_hdr_mem + i * sizeof(*txr) +
-					offsetof(struct zxdh_tx_region, tx_hdr);
-			/* length will be updated to actual pi hdr size when xmit pkt */
-			start_dp->len = 0;
-		}
 	}
 	if (ZXDH_VTPCI_OPS(hw)->setup_queue(hw, vq) < 0) {
 		PMD_DRV_LOG(ERR, "setup_queue failed");
@@ -788,7 +762,6 @@ zxdh_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_logic_qidx)
 	}
 	return 0;
 fail_q_alloc:
-	rte_free(sw_ring);
 	rte_memzone_free(hdr_mz);
 	rte_memzone_free(mz);
 	rte_free(vq);
diff --git a/drivers/net/zxdh/zxdh_pci.c b/drivers/net/zxdh/zxdh_pci.c
index 4ba31905fc..0bc27ed111 100644
--- a/drivers/net/zxdh/zxdh_pci.c
+++ b/drivers/net/zxdh/zxdh_pci.c
@@ -231,7 +231,7 @@ zxdh_notify_queue(struct zxdh_hw *hw, struct zxdh_virtqueue *vq)
 
 	notify_data = ((uint32_t)vq->vq_avail_idx << 16) | vq->vq_queue_index;
 	if (zxdh_pci_with_feature(hw, ZXDH_F_RING_PACKED) &&
-			(vq->vq_packed.cached_flags & ZXDH_VRING_PACKED_DESC_F_AVAIL))
+			(vq->cached_flags & ZXDH_VRING_PACKED_DESC_F_AVAIL))
 		notify_data |= RTE_BIT32(31);
 
 	PMD_DRV_LOG(DEBUG, "queue:%d notify_data 0x%x notify_addr 0x%p",
diff --git a/drivers/net/zxdh/zxdh_queue.c b/drivers/net/zxdh/zxdh_queue.c
index 7162593b16..4668cb5d13 100644
--- a/drivers/net/zxdh/zxdh_queue.c
+++ b/drivers/net/zxdh/zxdh_queue.c
@@ -407,7 +407,7 @@ int32_t zxdh_enqueue_recv_refill_packed(struct zxdh_virtqueue *vq,
 {
 	struct zxdh_vring_packed_desc *start_dp = vq->vq_packed.ring.desc;
 	struct zxdh_vq_desc_extra *dxp;
-	uint16_t flags = vq->vq_packed.cached_flags;
+	uint16_t flags = vq->cached_flags;
 	int32_t i;
 	uint16_t idx;
 
@@ -415,7 +415,6 @@ int32_t zxdh_enqueue_recv_refill_packed(struct zxdh_virtqueue *vq,
 		idx = vq->vq_avail_idx;
 		dxp = &vq->vq_descx[idx];
 		dxp->cookie = (void *)cookie[i];
-		dxp->ndescs = 1;
 		/* rx pkt fill in data_off */
 		start_dp[idx].addr = rte_mbuf_iova_get(cookie[i]) + RTE_PKTMBUF_HEADROOM;
 		start_dp[idx].len = cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM;
@@ -423,8 +422,8 @@ int32_t zxdh_enqueue_recv_refill_packed(struct zxdh_virtqueue *vq,
 		zxdh_queue_store_flags_packed(&start_dp[idx], flags);
 		if (++vq->vq_avail_idx >= vq->vq_nentries) {
 			vq->vq_avail_idx -= vq->vq_nentries;
-			vq->vq_packed.cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
-			flags = vq->vq_packed.cached_flags;
+			vq->cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
+			flags = vq->cached_flags;
 		}
 	}
 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
@@ -467,7 +466,7 @@ void zxdh_queue_rxvq_flush(struct zxdh_virtqueue *vq)
 	int32_t cnt = 0;
 
 	i = vq->vq_used_cons_idx;
-	while (zxdh_desc_used(&descs[i], vq) && cnt++ < vq->vq_nentries) {
+	while (desc_is_used(&descs[i], vq) && cnt++ < vq->vq_nentries) {
 		dxp = &vq->vq_descx[descs[i].id];
 		if (dxp->cookie != NULL) {
 			rte_pktmbuf_free(dxp->cookie);
@@ -477,7 +476,7 @@ void zxdh_queue_rxvq_flush(struct zxdh_virtqueue *vq)
 		vq->vq_used_cons_idx++;
 		if (vq->vq_used_cons_idx >= vq->vq_nentries) {
 			vq->vq_used_cons_idx -= vq->vq_nentries;
-			vq->vq_packed.used_wrap_counter ^= 1;
+			vq->used_wrap_counter ^= 1;
 		}
 		i = vq->vq_used_cons_idx;
 	}
diff --git a/drivers/net/zxdh/zxdh_queue.h b/drivers/net/zxdh/zxdh_queue.h
index 711ea291d0..b079272162 100644
--- a/drivers/net/zxdh/zxdh_queue.h
+++ b/drivers/net/zxdh/zxdh_queue.h
@@ -9,6 +9,7 @@
 
 #include <rte_common.h>
 #include <rte_atomic.h>
+#include <rte_io.h>
 
 #include "zxdh_ethdev.h"
 #include "zxdh_rxtx.h"
@@ -117,7 +118,6 @@ struct zxdh_vring_packed_desc_event {
 };
 
 struct zxdh_vring_packed {
-	uint32_t num;
 	struct zxdh_vring_packed_desc *desc;
 	struct zxdh_vring_packed_desc_event *driver;
 	struct zxdh_vring_packed_desc_event *device;
@@ -129,50 +129,59 @@ struct zxdh_vq_desc_extra {
 	uint16_t next;
 };
 
+struct zxdh_vring {
+	uint32_t num;
+	struct zxdh_vring_desc  *desc;
+	struct zxdh_vring_avail *avail;
+	struct zxdh_vring_used  *used;
+};
+
 struct zxdh_virtqueue {
+	union {
+		struct {
+			struct zxdh_vring ring; /**< vring keeping desc, used and avail */
+		} vq_split;
+		struct __rte_packed_begin {
+			struct zxdh_vring_packed ring;
+		} __rte_packed_end vq_packed;
+	};
 	struct zxdh_hw  *hw; /* < zxdh_hw structure pointer. */
 
-	struct {
-		/* vring keeping descs and events */
-		struct zxdh_vring_packed ring;
-		uint8_t used_wrap_counter;
-		uint8_t rsv;
-		uint16_t cached_flags; /* < cached flags for descs */
-		uint16_t event_flags_shadow;
-		uint16_t rsv1;
-	} vq_packed;
-
-	uint16_t vq_used_cons_idx; /* < last consumed descriptor */
-	uint16_t vq_nentries;  /* < vring desc numbers */
-	uint16_t vq_free_cnt;  /* < num of desc available */
-	uint16_t vq_avail_idx; /* < sync until needed */
-	uint16_t vq_free_thresh; /* < free threshold */
-	uint16_t rsv2;
-
-	void *vq_ring_virt_mem;  /* < linear address of vring */
-	uint32_t vq_ring_size;
+	uint16_t vq_used_cons_idx; /**< last consumed descriptor */
+	uint16_t vq_avail_idx; /**< sync until needed */
+	uint16_t vq_nentries;  /**< vring desc numbers */
+	uint16_t vq_free_cnt;  /**< num of desc available */
+
+	uint16_t cached_flags; /**< cached flags for descs */
+	uint8_t used_wrap_counter;
+	uint8_t rsv;
+	uint16_t vq_free_thresh; /**< free threshold */
+	uint16_t next_qidx;
+
+	void *notify_addr;
 
 	union {
 		struct zxdh_virtnet_rx rxq;
 		struct zxdh_virtnet_tx txq;
 	};
 
-	/*
-	 * physical address of vring, or virtual address
-	 */
-	rte_iova_t vq_ring_mem;
+	uint16_t vq_queue_index; /* PACKED: phy_idx, SPLIT: logic_idx */
+	uint16_t event_flags_shadow;
+	uint32_t vq_ring_size;
 
-	/*
+	/**
 	 * Head of the free chain in the descriptor table. If
 	 * there are no free descriptors, this will be set to
 	 * VQ_RING_DESC_CHAIN_END.
-	 */
+	 **/
 	uint16_t  vq_desc_head_idx;
 	uint16_t  vq_desc_tail_idx;
-	uint16_t  vq_queue_index;   /* < PCI queue index */
-	uint16_t  offset; /* < relative offset to obtain addr in mbuf */
-	uint16_t *notify_addr;
-	struct rte_mbuf **sw_ring;  /* < RX software ring. */
+	uint32_t rsv_8B;
+
+	void *vq_ring_virt_mem;  /**< linear address of vring*/
+	/* physical address of vring, or virtual address for virtio_user. */
+	rte_iova_t vq_ring_mem;
+
 	struct zxdh_vq_desc_extra vq_descx[];
 };
 
@@ -296,10 +305,9 @@ static inline void
 zxdh_vring_init_packed(struct zxdh_vring_packed *vr, uint8_t *p,
 		unsigned long align, uint32_t num)
 {
-	vr->num    = num;
 	vr->desc   = (struct zxdh_vring_packed_desc *)p;
 	vr->driver = (struct zxdh_vring_packed_desc_event *)(p +
-				 vr->num * sizeof(struct zxdh_vring_packed_desc));
+				 num * sizeof(struct zxdh_vring_packed_desc));
 	vr->device = (struct zxdh_vring_packed_desc_event *)RTE_ALIGN_CEIL(((uintptr_t)vr->driver +
 				 sizeof(struct zxdh_vring_packed_desc_event)), align);
 }
@@ -331,30 +339,21 @@ zxdh_vring_desc_init_indirect_packed(struct zxdh_vring_packed_desc *dp, int32_t
 static inline void
 zxdh_queue_disable_intr(struct zxdh_virtqueue *vq)
 {
-	if (vq->vq_packed.event_flags_shadow != ZXDH_RING_EVENT_FLAGS_DISABLE) {
-		vq->vq_packed.event_flags_shadow = ZXDH_RING_EVENT_FLAGS_DISABLE;
-		vq->vq_packed.ring.driver->desc_event_flags = vq->vq_packed.event_flags_shadow;
+	if (vq->event_flags_shadow != ZXDH_RING_EVENT_FLAGS_DISABLE) {
+		vq->event_flags_shadow = ZXDH_RING_EVENT_FLAGS_DISABLE;
+		vq->vq_packed.ring.driver->desc_event_flags = vq->event_flags_shadow;
 	}
 }
 
 static inline void
 zxdh_queue_enable_intr(struct zxdh_virtqueue *vq)
 {
-	if (vq->vq_packed.event_flags_shadow != ZXDH_RING_EVENT_FLAGS_ENABLE) {
-		vq->vq_packed.event_flags_shadow = ZXDH_RING_EVENT_FLAGS_ENABLE;
-		vq->vq_packed.ring.driver->desc_event_flags = vq->vq_packed.event_flags_shadow;
+	if (vq->event_flags_shadow != ZXDH_RING_EVENT_FLAGS_ENABLE) {
+		vq->event_flags_shadow = ZXDH_RING_EVENT_FLAGS_ENABLE;
+		vq->vq_packed.ring.driver->desc_event_flags = vq->event_flags_shadow;
 	}
 }
 
-static inline void
-zxdh_mb(uint8_t weak_barriers)
-{
-	if (weak_barriers)
-		rte_atomic_thread_fence(rte_memory_order_seq_cst);
-	else
-		rte_mb();
-}
-
 static inline
 int32_t desc_is_used(struct zxdh_vring_packed_desc *desc, struct zxdh_virtqueue *vq)
 {
@@ -365,7 +364,7 @@ int32_t desc_is_used(struct zxdh_vring_packed_desc *desc, struct zxdh_virtqueue
 	rte_io_rmb();
 	used = !!(flags & ZXDH_VRING_PACKED_DESC_F_USED);
 	avail = !!(flags & ZXDH_VRING_PACKED_DESC_F_AVAIL);
-	return avail == used && used == vq->vq_packed.used_wrap_counter;
+	return avail == used && used == vq->used_wrap_counter;
 }
 
 static inline int32_t
@@ -381,22 +380,17 @@ zxdh_queue_store_flags_packed(struct zxdh_vring_packed_desc *dp, uint16_t flags)
 	dp->flags = flags;
 }
 
-static inline int32_t
-zxdh_desc_used(struct zxdh_vring_packed_desc *desc, struct zxdh_virtqueue *vq)
-{
-	uint16_t flags;
-	uint16_t used, avail;
-
-	flags = desc->flags;
-	rte_io_rmb();
-	used = !!(flags & ZXDH_VRING_PACKED_DESC_F_USED);
-	avail = !!(flags & ZXDH_VRING_PACKED_DESC_F_AVAIL);
-	return avail == used && used == vq->vq_packed.used_wrap_counter;
-}
-
 static inline void zxdh_queue_notify(struct zxdh_virtqueue *vq)
 {
-	ZXDH_VTPCI_OPS(vq->hw)->notify_queue(vq->hw, vq);
+	/* Bit[0:15]: vq queue index
+	 * Bit[16:30]: avail index
+	 * Bit[31]: avail wrap counter
+	 */
+	uint32_t notify_data = ((uint32_t)(!!(vq->cached_flags &
+		ZXDH_VRING_PACKED_DESC_F_AVAIL)) << 31) |
+		((uint32_t)vq->vq_avail_idx << 16) |
+		vq->vq_queue_index;
+	rte_write32(notify_data, vq->notify_addr);
 }
 
 static inline int32_t
@@ -404,7 +398,7 @@ zxdh_queue_kick_prepare_packed(struct zxdh_virtqueue *vq)
 {
 	uint16_t flags = 0;
 
-	zxdh_mb(1);
+	rte_mb();
 	flags = vq->vq_packed.ring.device->desc_event_flags;
 
 	return (flags != ZXDH_RING_EVENT_FLAGS_DISABLE);
diff --git a/drivers/net/zxdh/zxdh_rxtx.c b/drivers/net/zxdh/zxdh_rxtx.c
index db86922aea..93506a4b49 100644
--- a/drivers/net/zxdh/zxdh_rxtx.c
+++ b/drivers/net/zxdh/zxdh_rxtx.c
@@ -216,7 +216,7 @@ zxdh_xmit_cleanup_inorder_packed(struct zxdh_virtqueue *vq, int32_t num)
 	/* desc_is_used has a load-acquire or rte_io_rmb inside
 	 * and wait for used desc in virtqueue.
 	 */
-	while (num > 0 && zxdh_desc_used(&desc[used_idx], vq)) {
+	while (num > 0 && desc_is_used(&desc[used_idx], vq)) {
 		id = desc[used_idx].id;
 		do {
 			curr_id = used_idx;
@@ -226,7 +226,7 @@ zxdh_xmit_cleanup_inorder_packed(struct zxdh_virtqueue *vq, int32_t num)
 			num -= dxp->ndescs;
 			if (used_idx >= size) {
 				used_idx -= size;
-				vq->vq_packed.used_wrap_counter ^= 1;
+				vq->used_wrap_counter ^= 1;
 			}
 			if (dxp->cookie != NULL) {
 				rte_pktmbuf_free(dxp->cookie);
@@ -340,7 +340,7 @@ zxdh_enqueue_xmit_packed_fast(struct zxdh_virtnet_tx *txvq,
 	struct zxdh_virtqueue *vq = txvq->vq;
 	uint16_t id = vq->vq_avail_idx;
 	struct zxdh_vq_desc_extra *dxp = &vq->vq_descx[id];
-	uint16_t flags = vq->vq_packed.cached_flags;
+	uint16_t flags = vq->cached_flags;
 	struct zxdh_net_hdr_dl *hdr = NULL;
 	uint8_t hdr_len = vq->hw->dl_net_hdr_len;
 	struct zxdh_vring_packed_desc *dp = &vq->vq_packed.ring.desc[id];
@@ -355,7 +355,7 @@ zxdh_enqueue_xmit_packed_fast(struct zxdh_virtnet_tx *txvq,
 	dp->id   = id;
 	if (++vq->vq_avail_idx >= vq->vq_nentries) {
 		vq->vq_avail_idx -= vq->vq_nentries;
-		vq->vq_packed.cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
+		vq->cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
 	}
 	vq->vq_free_cnt--;
 	zxdh_queue_store_flags_packed(dp, flags);
@@ -381,7 +381,7 @@ zxdh_enqueue_xmit_packed(struct zxdh_virtnet_tx *txvq,
 
 	dxp->ndescs = needed;
 	dxp->cookie = cookie;
-	head_flags |= vq->vq_packed.cached_flags;
+	head_flags |= vq->cached_flags;
 
 	start_dp[idx].addr = txvq->zxdh_net_hdr_mem + RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
 	start_dp[idx].len  = hdr_len;
@@ -392,7 +392,7 @@ zxdh_enqueue_xmit_packed(struct zxdh_virtnet_tx *txvq,
 	idx++;
 	if (idx >= vq->vq_nentries) {
 		idx -= vq->vq_nentries;
-		vq->vq_packed.cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
+		vq->cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
 	}
 
 	zxdh_xmit_fill_net_hdr(vq, cookie, hdr);
@@ -404,14 +404,14 @@ zxdh_enqueue_xmit_packed(struct zxdh_virtnet_tx *txvq,
 		if (likely(idx != head_idx)) {
 			uint16_t flags = cookie->next ? ZXDH_VRING_DESC_F_NEXT : 0;
 
-			flags |= vq->vq_packed.cached_flags;
+			flags |= vq->cached_flags;
 			start_dp[idx].flags = flags;
 		}
 
 		idx++;
 		if (idx >= vq->vq_nentries) {
 			idx -= vq->vq_nentries;
-			vq->vq_packed.cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
+			vq->cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
 		}
 	} while ((cookie = cookie->next) != NULL);
 
@@ -480,7 +480,7 @@ zxdh_xmit_flush(struct zxdh_virtqueue *vq)
 			free_cnt += dxp->ndescs;
 			if (used_idx >= size) {
 				used_idx -= size;
-				vq->vq_packed.used_wrap_counter ^= 1;
+				vq->used_wrap_counter ^= 1;
 			}
 			if (dxp->cookie != NULL) {
 				rte_pktmbuf_free(dxp->cookie);
@@ -619,7 +619,7 @@ zxdh_dequeue_burst_rx_packed(struct zxdh_virtqueue *vq,
 		 * desc_is_used has a load-acquire or rte_io_rmb inside
 		 * and wait for used desc in virtqueue.
 		 */
-		if (!zxdh_desc_used(&desc[used_idx], vq))
+		if (!desc_is_used(&desc[used_idx], vq))
 			return i;
 		len[i] = desc[used_idx].len;
 		id = desc[used_idx].id;
@@ -637,7 +637,7 @@ zxdh_dequeue_burst_rx_packed(struct zxdh_virtqueue *vq,
 		vq->vq_used_cons_idx++;
 		if (vq->vq_used_cons_idx >= vq->vq_nentries) {
 			vq->vq_used_cons_idx -= vq->vq_nentries;
-			vq->vq_packed.used_wrap_counter ^= 1;
+			vq->used_wrap_counter ^= 1;
 		}
 	}
 	return i;
-- 
2.27.0

[-- Attachment #1.1.2: Type: text/html , Size: 38856 bytes --]

^ permalink raw reply related

* [PATCH v6 1/4] net/zxdh: fix queue enable intr issues
From: Junlong Wang @ 2026-06-17  8:28 UTC (permalink / raw)
  To: stephen; +Cc: dev, Junlong Wang, stable
In-Reply-To: <20260617082828.1058127-1-wang.junlong1@zte.com.cn>


[-- Attachment #1.1.1: Type: text/plain, Size: 1196 bytes --]

Fix incorrect condition check in zxdh_queue_enable_intr.
Change "==" to "!=", consistent with zxdh_queue_disable_intr logic,
to properly enable interrupts when event_flags_shadow is not
already set to ENABLE state.

Fixes: 7677f3871ef3 ("net/zxdh: setup Rx/Tx queues and interrupt")
Cc: stable@dpdk.org

Signed-off-by: Junlong Wang <wang.junlong1@zte.com.cn>
---
 drivers/net/zxdh/zxdh_queue.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/zxdh/zxdh_queue.h b/drivers/net/zxdh/zxdh_queue.h
index 1a0c8a0d90..711ea291d0 100644
--- a/drivers/net/zxdh/zxdh_queue.h
+++ b/drivers/net/zxdh/zxdh_queue.h
@@ -340,8 +340,8 @@ zxdh_queue_disable_intr(struct zxdh_virtqueue *vq)
 static inline void
 zxdh_queue_enable_intr(struct zxdh_virtqueue *vq)
 {
-	if (vq->vq_packed.event_flags_shadow == ZXDH_RING_EVENT_FLAGS_DISABLE) {
-		vq->vq_packed.event_flags_shadow = ZXDH_RING_EVENT_FLAGS_DISABLE;
+	if (vq->vq_packed.event_flags_shadow != ZXDH_RING_EVENT_FLAGS_ENABLE) {
+		vq->vq_packed.event_flags_shadow = ZXDH_RING_EVENT_FLAGS_ENABLE;
 		vq->vq_packed.ring.driver->desc_event_flags = vq->vq_packed.event_flags_shadow;
 	}
 }
-- 
2.27.0

[-- Attachment #1.1.2: Type: text/html , Size: 2002 bytes --]

^ permalink raw reply related

* [PATCH v6 0/4] net/zxdh: optimize Rx/Tx path performance
From: Junlong Wang @ 2026-06-17  8:28 UTC (permalink / raw)
  To: stephen; +Cc: dev, Junlong Wang
In-Reply-To: <20260606063226.491848-1-wang.junlong1@zte.com.cn>


[-- Attachment #1.1.1: Type: text/plain, Size: 3830 bytes --]

v6:
  - Remove unnecessary error checking code in submit_to_backend_simple() and
    pkt_padding(). Since as the max dl_net_hdr_len is always less than
    RTE_PKTMBUF_HEADROOM, rte_pktmbuf_prepend() cannot fail in the
    simple path (single-segment mbufs).

v5:
  - Reorganize patch series, placing interrupt fix as the first patch
    and fix condition check to properly enable interrupts.
  - Fix zxdh_recv_single_pkts() not compacting rcv_pkts[] on failure,
    which could cause use-after-free and mbuf leak.
  - Fix tx_bunch() and tx1() missing store barrier before setting AVAIL flag,
    preventing data race on weakly-ordered architectures.
  - Fix submit_to_backend_simple() writing descriptors for packets that
    failed pkt_padding(), causing mbuf leak.

v4:
  - fix some AI review issues.
  - fix queue enable intr bug.

v3:
  - remove unnecessary NULL check in zxdh_init_queue.
  - Split Ring: Bit[31] is unused and reserved, zxdh_queue_notify(): removing the
    zxdh_pci_with_feature(hw, ZXDH_F_RING_PACKED) check;
  - remove unnecessary double-free in in zxdh_recv_single_pkts();
  - used rte_pktmbuf_mtod();
  - remove rxq_get_vq(q) macro, use q->vq and apply it consistently;
  - Refactoring scatter and mtu check logic in zxdh_dev_mtu_set();
  - set txdp->id = avail_idx + i in tx_bunch/tx1.
  - add comment documenting zxdh_xmit_enqueue_append() now sets dxp->cookie = NULL for
    the head slot and stores cookies per descriptor via dep[idx].cookie.
  - add one-line comment noting tx_bunch() is the simple path handles single-segment.
  - remove unnecessary Extra initialization and the uint32_t cast.

v2:
  - zxdh_rxtx.c, pkt_padding(): modifyed the return value of pkt_padding();
  - zxdh_rxtx.c, zxdh_recv_single_pkts(): modifyed When zxdh_init_mbuf() fails
    the loop does "continue" and free mbufs;
  - zxdh_rxtx.c, refill_desc_unwrap(): Add rte_io_wmb() before writing flags
    in the refill_que_descs();
  - zxdh_queue.h, zxdh_queue_enable_intr(): Remove unnecessary function of zxdh_queue_enable_intr;
  - zxdh_ethdev.c, zxdh_init_queue(): changed the hdr_mz NULL check logic;

  - zxdh_rxtx.c, zxdh_xmit_pkts_simple()、zxdh_recv_single_pkts(): add stats.bytes count;
  - zxdh_rxtx.c, zxdh_init_mbuf():remove  rte_pktmbuf_dump(stdout, rxm, 40);
  - zxdh_ethdev.c, zxdh_dev_free_mbufs(): using rte_pktmbuf_free() to free mbufs;
  - Splitting into separate patches, structure reorganization and sw_ring removal、
    RX recv optimize、Tx xmit optimize、Tx;

v1:
  This patch optimizes the ZXDH PMD's receive and transmit path for better
  performance through several improvements:

- Add simple TX/RX burst functions (zxdh_xmit_pkts_simple and
  zxdh_recv_single_pkts) for single-segment packet scenarios.
- Remove RX software ring (sw_ring) to reduce memory allocation and
  copy.
- Optimize descriptor management with prefetching and simplified
  cleanup.
- Reorganize structure fields for better cache locality.

  These changes reduce CPU cycles and memory bandwidth consumption,
  resulting in improved packet processing throughput.

Junlong Wang (4):
  net/zxdh: fix queue enable intr issues
  net/zxdh: optimize queue structure to improve performance
  net/zxdh: optimize Rx recv pkts performance
  net/zxdh: optimize Tx xmit pkts performance

 drivers/net/zxdh/zxdh_ethdev.c     |  81 ++---
 drivers/net/zxdh/zxdh_ethdev_ops.c |  23 +-
 drivers/net/zxdh/zxdh_ethdev_ops.h |   4 +
 drivers/net/zxdh/zxdh_pci.c        |   2 +-
 drivers/net/zxdh/zxdh_queue.c      |  11 +-
 drivers/net/zxdh/zxdh_queue.h      | 122 ++++---
 drivers/net/zxdh/zxdh_rxtx.c       | 529 ++++++++++++++++++++++-------
 drivers/net/zxdh/zxdh_rxtx.h       |  27 +-
 8 files changed, 539 insertions(+), 260 deletions(-)

-- 
2.27.0

[-- Attachment #1.1.2: Type: text/html , Size: 6916 bytes --]

^ permalink raw reply

* [PATCH v3] app/testpmd: add VLAN priority insert support
From: Xingui Yang @ 2026-06-17  8:52 UTC (permalink / raw)
  To: dev
  Cc: stephen, david.marchand, aman.deep.singh, fengchengwen,
	yangshuaisong, lihuisong, liuyonglong, kangfenglong
In-Reply-To: <20260616131001.2955655-1-yangxingui@huawei.com/>

The tx_vlan set and tx_qinq set commands now accept full 16-bit VLAN TCI
(Tag Control Information) instead of only 12-bit VLAN ID. This allows
users to set 802.1p priority and CFI/DEI bits for hardware VLAN insertion.

---
v3:
- Remove TX path validation to accept full 16-bit TCI values
- Rename parameter from vlan_id to vlan_tci in code and documentation
- Rename struct fields tx_vlan_id to tx_vlan_tci for consistency
- Rename token variables cmd_tx_vlan_set_vlanid to cmd_tx_vlan_set_vlantci
- Update cmdline.c structure fields, TOKEN definitions, and help strings
- Add documentation with TCI bit layout and calculation examples

Suggested-by: Stephen Hemminger <stephen@networkplumber.org>
Suggested-by: Chengwen Feng <fengchengwen@huawei.com>
Signed-off-by: Xingui Yang <yangxingui@huawei.com>
---
 app/test-pmd/5tswap.h                       |  2 +-
 app/test-pmd/cmdline.c                      | 42 ++++++++++-----------
 app/test-pmd/config.c                       | 22 ++++-------
 app/test-pmd/flowgen.c                      |  4 +-
 app/test-pmd/macfwd.h                       |  4 +-
 app/test-pmd/macswap.h                      |  2 +-
 app/test-pmd/macswap_neon.h                 |  2 +-
 app/test-pmd/macswap_sse.h                  |  2 +-
 app/test-pmd/testpmd.h                      |  8 ++--
 app/test-pmd/txonly.c                       |  4 +-
 doc/guides/rel_notes/release_26_07.rst      | 15 ++++++++
 doc/guides/testpmd_app_ug/testpmd_funcs.rst | 33 +++++++++++++---
 12 files changed, 85 insertions(+), 55 deletions(-)

diff --git a/app/test-pmd/5tswap.h b/app/test-pmd/5tswap.h
index 345c08b4d0..1909d4e2fa 100644
--- a/app/test-pmd/5tswap.h
+++ b/app/test-pmd/5tswap.h
@@ -85,7 +85,7 @@ do_5tswap(struct rte_mbuf *pkts_burst[], uint16_t nb_rx,
 	txp = &ports[fs->tx_port];
 	ol_flags = ol_flags_init(txp->dev_conf.txmode.offloads);
 	vlan_qinq_set(pkts_burst, nb_rx, ol_flags,
-		      txp->tx_vlan_id, txp->tx_vlan_id_outer);
+		      txp->tx_vlan_tci, txp->tx_vlan_tci_outer);
 	for (i = 0; i < nb_rx; i++) {
 		if (likely(i < nb_rx - 1))
 			rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[i+1],
diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
index 3c39e27aa8..233d3b8ee2 100644
--- a/app/test-pmd/cmdline.c
+++ b/app/test-pmd/cmdline.c
@@ -447,9 +447,9 @@ static void cmd_help_long_parsed(void *parsed_result,
 			"rx_vxlan_port rm (udp_port) (port_id)\n"
 			"    Remove an UDP port for VXLAN packet filter on a port\n\n"
 
-			"tx_vlan set (port_id) vlan_id[, vlan_id_outer]\n"
-			"    Set hardware insertion of VLAN IDs (single or double VLAN "
-			"depends on the number of VLAN IDs) in packets sent on a port.\n\n"
+			"tx_vlan set (port_id) vlan_tci[, vlan_tci_outer]\n"
+			"    Set hardware insertion of VLAN TCI (single or double VLAN "
+			"depends on the number of VLAN TCIs) in packets sent on a port.\n\n"
 
 			"tx_vlan set pvid port_id vlan_id (on|off)\n"
 			"    Set port based TX VLAN insertion.\n\n"
@@ -4931,7 +4931,7 @@ struct cmd_tx_vlan_set_result {
 	cmdline_fixed_string_t tx_vlan;
 	cmdline_fixed_string_t set;
 	portid_t port_id;
-	uint16_t vlan_id;
+	uint16_t vlan_tci;
 };
 
 static void
@@ -4949,7 +4949,7 @@ cmd_tx_vlan_set_parsed(void *parsed_result,
 		return;
 	}
 
-	tx_vlan_set(res->port_id, res->vlan_id);
+	tx_vlan_set(res->port_id, res->vlan_tci);
 
 	cmd_reconfig_device_queue(res->port_id, 1, 1);
 }
@@ -4963,21 +4963,21 @@ static cmdline_parse_token_string_t cmd_tx_vlan_set_set =
 static cmdline_parse_token_num_t cmd_tx_vlan_set_portid =
 	TOKEN_NUM_INITIALIZER(struct cmd_tx_vlan_set_result,
 			      port_id, RTE_UINT16);
-static cmdline_parse_token_num_t cmd_tx_vlan_set_vlanid =
+static cmdline_parse_token_num_t cmd_tx_vlan_set_vlantci =
 	TOKEN_NUM_INITIALIZER(struct cmd_tx_vlan_set_result,
-			      vlan_id, RTE_UINT16);
+			      vlan_tci, RTE_UINT16);
 
 static cmdline_parse_inst_t cmd_tx_vlan_set = {
 	.f = cmd_tx_vlan_set_parsed,
 	.data = NULL,
-	.help_str = "tx_vlan set <port_id> <vlan_id>: "
+	.help_str = "tx_vlan set <port_id> <vlan_tci>: "
 		"Enable hardware insertion of a single VLAN header "
-		"with a given TAG Identifier in packets sent on a port",
+		"with a given TCI in packets sent on a port",
 	.tokens = {
 		(void *)&cmd_tx_vlan_set_tx_vlan,
 		(void *)&cmd_tx_vlan_set_set,
 		(void *)&cmd_tx_vlan_set_portid,
-		(void *)&cmd_tx_vlan_set_vlanid,
+		(void *)&cmd_tx_vlan_set_vlantci,
 		NULL,
 	},
 };
@@ -4987,8 +4987,8 @@ struct cmd_tx_vlan_set_qinq_result {
 	cmdline_fixed_string_t tx_vlan;
 	cmdline_fixed_string_t set;
 	portid_t port_id;
-	uint16_t vlan_id;
-	uint16_t vlan_id_outer;
+	uint16_t vlan_tci;
+	uint16_t vlan_tci_outer;
 };
 
 static void
@@ -5006,7 +5006,7 @@ cmd_tx_vlan_set_qinq_parsed(void *parsed_result,
 		return;
 	}
 
-	tx_qinq_set(res->port_id, res->vlan_id, res->vlan_id_outer);
+	tx_qinq_set(res->port_id, res->vlan_tci, res->vlan_tci_outer);
 
 	cmd_reconfig_device_queue(res->port_id, 1, 1);
 }
@@ -5020,25 +5020,25 @@ static cmdline_parse_token_string_t cmd_tx_vlan_set_qinq_set =
 static cmdline_parse_token_num_t cmd_tx_vlan_set_qinq_portid =
 	TOKEN_NUM_INITIALIZER(struct cmd_tx_vlan_set_qinq_result,
 		port_id, RTE_UINT16);
-static cmdline_parse_token_num_t cmd_tx_vlan_set_qinq_vlanid =
+static cmdline_parse_token_num_t cmd_tx_vlan_set_qinq_vlantci =
 	TOKEN_NUM_INITIALIZER(struct cmd_tx_vlan_set_qinq_result,
-		vlan_id, RTE_UINT16);
-static cmdline_parse_token_num_t cmd_tx_vlan_set_qinq_vlanid_outer =
+		vlan_tci, RTE_UINT16);
+static cmdline_parse_token_num_t cmd_tx_vlan_set_qinq_vlantci_outer =
 	TOKEN_NUM_INITIALIZER(struct cmd_tx_vlan_set_qinq_result,
-		vlan_id_outer, RTE_UINT16);
+		vlan_tci_outer, RTE_UINT16);
 
 static cmdline_parse_inst_t cmd_tx_vlan_set_qinq = {
 	.f = cmd_tx_vlan_set_qinq_parsed,
 	.data = NULL,
-	.help_str = "tx_vlan set <port_id> <vlan_id> <outer_vlan_id>: "
+	.help_str = "tx_vlan set <port_id> <vlan_tci> <vlan_tci_outer>: "
 		"Enable hardware insertion of double VLAN header "
-		"with given TAG Identifiers in packets sent on a port",
+		"with given TCIs in packets sent on a port",
 	.tokens = {
 		(void *)&cmd_tx_vlan_set_qinq_tx_vlan,
 		(void *)&cmd_tx_vlan_set_qinq_set,
 		(void *)&cmd_tx_vlan_set_qinq_portid,
-		(void *)&cmd_tx_vlan_set_qinq_vlanid,
-		(void *)&cmd_tx_vlan_set_qinq_vlanid_outer,
+		(void *)&cmd_tx_vlan_set_qinq_vlantci,
+		(void *)&cmd_tx_vlan_set_qinq_vlantci_outer,
 		NULL,
 	},
 };
diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c
index 9d457ca88e..3df7412ef6 100644
--- a/app/test-pmd/config.c
+++ b/app/test-pmd/config.c
@@ -6918,14 +6918,11 @@ vlan_tpid_set(portid_t port_id, enum rte_vlan_type vlan_type, uint16_t tp_id)
 }
 
 void
-tx_vlan_set(portid_t port_id, uint16_t vlan_id)
+tx_vlan_set(portid_t port_id, uint16_t vlan_tci)
 {
 	struct rte_eth_dev_info dev_info;
 	int ret;
 
-	if (vlan_id_is_invalid(vlan_id))
-		return;
-
 	if (ports[port_id].dev_conf.txmode.offloads &
 	    RTE_ETH_TX_OFFLOAD_QINQ_INSERT) {
 		fprintf(stderr, "Error, as QinQ has been enabled.\n");
@@ -6945,20 +6942,15 @@ tx_vlan_set(portid_t port_id, uint16_t vlan_id)
 
 	tx_vlan_reset(port_id);
 	ports[port_id].dev_conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_VLAN_INSERT;
-	ports[port_id].tx_vlan_id = vlan_id;
+	ports[port_id].tx_vlan_tci = vlan_tci;
 }
 
 void
-tx_qinq_set(portid_t port_id, uint16_t vlan_id, uint16_t vlan_id_outer)
+tx_qinq_set(portid_t port_id, uint16_t vlan_tci, uint16_t vlan_tci_outer)
 {
 	struct rte_eth_dev_info dev_info;
 	int ret;
 
-	if (vlan_id_is_invalid(vlan_id))
-		return;
-	if (vlan_id_is_invalid(vlan_id_outer))
-		return;
-
 	ret = eth_dev_info_get_print_err(port_id, &dev_info);
 	if (ret != 0)
 		return;
@@ -6973,8 +6965,8 @@ tx_qinq_set(portid_t port_id, uint16_t vlan_id, uint16_t vlan_id_outer)
 	tx_vlan_reset(port_id);
 	ports[port_id].dev_conf.txmode.offloads |= (RTE_ETH_TX_OFFLOAD_VLAN_INSERT |
 						    RTE_ETH_TX_OFFLOAD_QINQ_INSERT);
-	ports[port_id].tx_vlan_id = vlan_id;
-	ports[port_id].tx_vlan_id_outer = vlan_id_outer;
+	ports[port_id].tx_vlan_tci = vlan_tci;
+	ports[port_id].tx_vlan_tci_outer = vlan_tci_outer;
 }
 
 void
@@ -6983,8 +6975,8 @@ tx_vlan_reset(portid_t port_id)
 	ports[port_id].dev_conf.txmode.offloads &=
 				~(RTE_ETH_TX_OFFLOAD_VLAN_INSERT |
 				  RTE_ETH_TX_OFFLOAD_QINQ_INSERT);
-	ports[port_id].tx_vlan_id = 0;
-	ports[port_id].tx_vlan_id_outer = 0;
+	ports[port_id].tx_vlan_tci = 0;
+	ports[port_id].tx_vlan_tci_outer = 0;
 }
 
 void
diff --git a/app/test-pmd/flowgen.c b/app/test-pmd/flowgen.c
index 53b5f24f11..8dac7b9209 100644
--- a/app/test-pmd/flowgen.c
+++ b/app/test-pmd/flowgen.c
@@ -84,8 +84,8 @@ pkt_burst_flow_gen(struct fwd_stream *fs)
 	rte_pktmbuf_free_bulk(pkts_burst, nb_rx);
 
 	mbp = current_fwd_lcore()->mbp;
-	vlan_tci = ports[fs->tx_port].tx_vlan_id;
-	vlan_tci_outer = ports[fs->tx_port].tx_vlan_id_outer;
+	vlan_tci = ports[fs->tx_port].tx_vlan_tci;
+	vlan_tci_outer = ports[fs->tx_port].tx_vlan_tci_outer;
 
 	tx_offloads = ports[fs->tx_port].dev_conf.txmode.offloads;
 	if (tx_offloads	& RTE_ETH_TX_OFFLOAD_VLAN_INSERT)
diff --git a/app/test-pmd/macfwd.h b/app/test-pmd/macfwd.h
index ae2346e589..5209644bb7 100644
--- a/app/test-pmd/macfwd.h
+++ b/app/test-pmd/macfwd.h
@@ -37,8 +37,8 @@ do_macfwd(struct rte_mbuf *pkts_burst[], uint16_t nb_rx,
 		mb->ol_flags |= ol_flags;
 		mb->l2_len = sizeof(struct rte_ether_hdr);
 		mb->l3_len = sizeof(struct rte_ipv4_hdr);
-		mb->vlan_tci = txp->tx_vlan_id;
-		mb->vlan_tci_outer = txp->tx_vlan_id_outer;
+		mb->vlan_tci = txp->tx_vlan_tci;
+		mb->vlan_tci_outer = txp->tx_vlan_tci_outer;
 	}
 }
 
diff --git a/app/test-pmd/macswap.h b/app/test-pmd/macswap.h
index 29c252bb8f..fe15934d96 100644
--- a/app/test-pmd/macswap.h
+++ b/app/test-pmd/macswap.h
@@ -19,7 +19,7 @@ do_macswap(struct rte_mbuf *pkts[], uint16_t nb,
 
 	ol_flags = ol_flags_init(txp->dev_conf.txmode.offloads);
 	vlan_qinq_set(pkts, nb, ol_flags,
-			txp->tx_vlan_id, txp->tx_vlan_id_outer);
+			txp->tx_vlan_tci, txp->tx_vlan_tci_outer);
 
 	for (i = 0; i < nb; i++) {
 		if (likely(i < nb - 1))
diff --git a/app/test-pmd/macswap_neon.h b/app/test-pmd/macswap_neon.h
index df6c260cd4..195c7d1640 100644
--- a/app/test-pmd/macswap_neon.h
+++ b/app/test-pmd/macswap_neon.h
@@ -32,7 +32,7 @@ do_macswap(struct rte_mbuf *pkts[], uint16_t nb,
 
 	ol_flags = ol_flags_init(txp->dev_conf.txmode.offloads);
 	vlan_qinq_set(pkts, nb, ol_flags,
-			txp->tx_vlan_id, txp->tx_vlan_id_outer);
+			txp->tx_vlan_tci, txp->tx_vlan_tci_outer);
 
 	i = 0;
 	r = nb;
diff --git a/app/test-pmd/macswap_sse.h b/app/test-pmd/macswap_sse.h
index 1f547388b7..341c9ba681 100644
--- a/app/test-pmd/macswap_sse.h
+++ b/app/test-pmd/macswap_sse.h
@@ -29,7 +29,7 @@ do_macswap(struct rte_mbuf *pkts[], uint16_t nb,
 
 	ol_flags = ol_flags_init(txp->dev_conf.txmode.offloads);
 	vlan_qinq_set(pkts, nb, ol_flags,
-			txp->tx_vlan_id, txp->tx_vlan_id_outer);
+			txp->tx_vlan_tci, txp->tx_vlan_tci_outer);
 
 	i = 0;
 	r = nb;
diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
index 3d4b36d668..bf88758118 100644
--- a/app/test-pmd/testpmd.h
+++ b/app/test-pmd/testpmd.h
@@ -346,8 +346,8 @@ struct rte_port {
 	uint16_t		parse_tunnel:1; /**< Parse internal headers */
 	uint16_t                tso_segsz;  /**< Segmentation offload MSS for non-tunneled packets. */
 	uint16_t                tunnel_tso_segsz; /**< Segmentation offload MSS for tunneled pkts. */
-	uint16_t                tx_vlan_id;/**< The tag ID */
-	uint16_t                tx_vlan_id_outer;/**< The outer tag ID */
+	uint16_t                tx_vlan_tci;/**< The TCI */
+	uint16_t                tx_vlan_tci_outer;/**< The outer TCI */
 	volatile uint16_t        port_status;    /**< port started or not */
 	uint8_t                 need_setup;     /**< port just attached */
 	uint8_t                 need_reconfig;  /**< need reconfiguring port or not */
@@ -1142,8 +1142,8 @@ int rx_vft_set(portid_t port_id, uint16_t vlan_id, int on);
 void vlan_extend_set(portid_t port_id, int on);
 void vlan_tpid_set(portid_t port_id, enum rte_vlan_type vlan_type,
 		   uint16_t tp_id);
-void tx_vlan_set(portid_t port_id, uint16_t vlan_id);
-void tx_qinq_set(portid_t port_id, uint16_t vlan_id, uint16_t vlan_id_outer);
+void tx_vlan_set(portid_t port_id, uint16_t vlan_tci);
+void tx_qinq_set(portid_t port_id, uint16_t vlan_tci, uint16_t vlan_tci_outer);
 void tx_vlan_reset(portid_t port_id);
 void tx_vlan_pvid_set(portid_t port_id, uint16_t vlan_id, int on);
 
diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c
index 64893fa205..a4acb85d29 100644
--- a/app/test-pmd/txonly.c
+++ b/app/test-pmd/txonly.c
@@ -325,8 +325,8 @@ pkt_burst_transmit(struct fwd_stream *fs)
 	mbp = current_fwd_lcore()->mbp;
 	txp = &ports[fs->tx_port];
 	tx_offloads = txp->dev_conf.txmode.offloads;
-	vlan_tci = txp->tx_vlan_id;
-	vlan_tci_outer = txp->tx_vlan_id_outer;
+	vlan_tci = txp->tx_vlan_tci;
+	vlan_tci_outer = txp->tx_vlan_tci_outer;
 	if (tx_offloads	& RTE_ETH_TX_OFFLOAD_VLAN_INSERT)
 		ol_flags = RTE_MBUF_F_TX_VLAN;
 	if (tx_offloads & RTE_ETH_TX_OFFLOAD_QINQ_INSERT)
diff --git a/doc/guides/rel_notes/release_26_07.rst b/doc/guides/rel_notes/release_26_07.rst
index 5d7aa8d1bf..03e24030bc 100644
--- a/doc/guides/rel_notes/release_26_07.rst
+++ b/doc/guides/rel_notes/release_26_07.rst
@@ -150,6 +150,21 @@ New Features
   * Added ``eof`` devarg to use link state to signal end of receive file input.
   * Added unit test suite.
 
+* **Updated testpmd application.**
+
+  Added support for setting VLAN priority and CFI/DEI bits in ``tx_vlan set``
+  and ``tx_qinq set`` commands. The ``vlan_tci`` parameter now accepts the
+  full 16-bit VLAN Tag Control Information (TCI) format:
+
+  Priority (bits 13-15)
+     802.1p Class of Service value (0-7).
+
+  CFI/DEI (bit 12)
+     Canonical Format Indicator / Drop Eligible Indicator.
+
+  VLAN ID (bits 0-11)
+     VLAN identifier (0-4095).
+
 * **Added AI review helpers.**
 
   Added AGENTS.md file for AI review
diff --git a/doc/guides/testpmd_app_ug/testpmd_funcs.rst b/doc/guides/testpmd_app_ug/testpmd_funcs.rst
index f0f2b0758b..364d348372 100644
--- a/doc/guides/testpmd_app_ug/testpmd_funcs.rst
+++ b/doc/guides/testpmd_app_ug/testpmd_funcs.rst
@@ -1118,17 +1118,40 @@ Remove an UDP port for VXLAN packet filter on a port::
 tx_vlan set
 ~~~~~~~~~~~
 
-Set hardware insertion of VLAN IDs in packets sent on a port::
+Set hardware insertion of VLAN TCI (Tag Control Information) in packets sent on a port::
 
-   testpmd> tx_vlan set (port_id) vlan_id[, vlan_id_outer]
+   testpmd> tx_vlan set (port_id) vlan_tci[, vlan_tci_outer]
+
+The ``vlan_tci`` parameter accepts the full 16-bit VLAN Tag Control Information (TCI):
+
+Bits 0-11
+   VLAN ID (0-4095).
+
+Bit 12
+   CFI (Canonical Format Indicator) / DEI (Drop Eligible Indicator).
+
+Bits 13-15
+   Priority (0-7, 802.1p Class of Service).
 
 For example, set a single VLAN ID (5) insertion on port 0::
 
-   tx_vlan set 0 5
+   testpmd> tx_vlan set 0 5
+
+Or, set a VLAN with priority 3 and VLAN ID 100 on port 0::
+
+   testpmd> tx_vlan set 0 0x6064
+
+Calculation: ``(priority << 13) | vlan_id``.
+Priority 3 in bits 13-15: ``(3 << 13) = 0x6000``.
+VLAN ID 100 in bits 0-11: ``100 = 0x0064``.
+Combined TCI: ``0x6000 | 0x0064 = 0x6064``.
+
+Or, set double VLAN with priority (inner: priority 2, ID 10; outer: priority 5, ID 20)::
 
-Or, set double VLAN ID (inner: 2, outer: 3) insertion on port 1::
+   testpmd> tx_vlan set 1 0x400A 0xA014
 
-   tx_vlan set 1 2 3
+Inner TCI calculation: ``(2 << 13) | 10 = 0x400A``.
+Outer TCI calculation: ``(5 << 13) | 20 = 0xA014``.
 
 
 tx_vlan set pvid
-- 
2.43.0


^ permalink raw reply related

* Re: [PATCH v2] app/testpmd: add VLAN priority insert support
From: yangxingui @ 2026-06-17  8:42 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: dev, david.marchand, aman.deep.singh, fengchengwen, yangshuaisong,
	lihuisong, liuyonglong, kangfenglong
In-Reply-To: <20260616072328.1dcb8cf8@phoenix.local>



On 2026/6/16 22:23, Stephen Hemminger wrote:
> On Tue, 16 Jun 2026 21:10:01 +0800
> Xingui Yang <yangxingui@huawei.com> wrote:
> 
>> The tx_vlan set and tx_qinq set commands only accepted VLAN ID in range
>> [0, 4095]. This prevented users from setting 802.1p priority and CFI
>> bits when using hardware VLAN insertion.
>>
>> Since mbuf vlan_tci field already supports full 16-bit VLAN Tag Control
>> Information (TCI), relax the validation for TX paths to allow priority
>> and CFI bits. The vlan_id parameter now accepts:
>>    - Bits 0-11:  VLAN ID (0-4095)
>>    - Bit 12:    CFI (Canonical Format Indicator)
>>    - Bits 13-15: Priority (0-7, 802.1p CoS)
>>
>> Suggested-by: Stephen Hemminger <stephen@networkplumber.org>
>> Suggested-by: fengchengwen <fengchengwen@huawei.com>
>> Signed-off-by: Xingui Yang <yangxingui@huawei.com>
>> ---
>> v2:
>> - Removed --enable-vlan-priority option and global variable as suggested
>>    by Stephen Hemminger. The feature is now always enabled for TX paths
>> - RX VLAN filter continues to enforce strict VLAN ID validation as
>>    suggested by fengchengwen
>> - Added documentation updates for testpmd_funcs.rst and release notes
>>
>>   app/test-pmd/config.c                       | 13 ++++++++-----
>>   doc/guides/rel_notes/release_26_07.rst      |  7 +++++++
>>   doc/guides/testpmd_app_ug/testpmd_funcs.rst | 17 ++++++++++++++---
>>   3 files changed, 29 insertions(+), 8 deletions(-)
>>
>> diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c
>> index 9d457ca88e..38758f9c05 100644
>> --- a/app/test-pmd/config.c
>> +++ b/app/test-pmd/config.c
>> @@ -1241,8 +1241,11 @@ void print_valid_ports(void)
>>   }
>>   
>>   static int
>> -vlan_id_is_invalid(uint16_t vlan_id)
>> +vlan_id_is_invalid(uint16_t vlan_id, bool is_tx)
>>   {
>> +	if (is_tx)
>> +		return 0;
>> +
>>   	if (vlan_id < 4096)
>>   		return 0;
>>   	fprintf(stderr, "Invalid vlan_id %d (must be < 4096)\n", vlan_id);
>> @@ -6876,7 +6879,7 @@ rx_vft_set(portid_t port_id, uint16_t vlan_id, int on)
>>   
>>   	if (port_id_is_invalid(port_id, ENABLED_WARN))
>>   		return 1;
>> -	if (vlan_id_is_invalid(vlan_id))
>> +	if (vlan_id_is_invalid(vlan_id, false))
>>   		return 1;
>>   	diag = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
>>   	if (diag == 0)
>> @@ -6923,7 +6926,7 @@ tx_vlan_set(portid_t port_id, uint16_t vlan_id)
>>   	struct rte_eth_dev_info dev_info;
>>   	int ret;
>>   
>> -	if (vlan_id_is_invalid(vlan_id))
>> +	if (vlan_id_is_invalid(vlan_id, true))
>>   		return;
> 
> Why have the is_tx flag if it is always used as constant?
> Just remove the whole vlan_id_is_invalid() branch test in the transmit path.
> Maybe add a comment that any VLAN is allowed on transmit?
> 
> Or make a new function. Since VLAN of 0xffff is reserved. Though you might want
> to allow it since testpmd is for testing even invalid packets.

Hi, Stephen,

Agreed, and I've also taken the opportunity to improve consistency by 
rename vlan_id to vlan_tci. I'll update in next version.

Thanks,
Xingui



^ permalink raw reply

* [PATCH v2 3/3] test/fib6: extended drift test cases
From: Maxime Leroy @ 2026-06-17  8:24 UTC (permalink / raw)
  To: Vladimir Medvedkin; +Cc: dev, Maxime Leroy
In-Reply-To: <20260617082400.142129-1-maxime@leroys.fr>

Four additional test cases exercise scenarios touched by the
byte-boundary tbl8 reservation accounting:

  - test_drift_compression: parent + compressed child, DEL parent
    forces decompression, re-ADD ancestor re-compresses.

  - test_drift_multilevel: /28 + /48 + /96 chain with mixed
    compressed and non-compressed links, then DEL of the middle
    prefix.

  - test_drift_stress: pseudo-random ADD/DEL sequence checking that
    no operation returns -ENOSPC under a leaked rsvd_tbl8s.

  - test_drift_tight_pool: pool sized to exactly the legitimate
    envelope, re-ADD after decompression must succeed.

Signed-off-by: Maxime Leroy <maxime@leroys.fr>
---
 app/test/test_fib6.c | 269 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 265 insertions(+), 4 deletions(-)

diff --git a/app/test/test_fib6.c b/app/test/test_fib6.c
index c4283f3f2d..ad68645428 100644
--- a/app/test/test_fib6.c
+++ b/app/test/test_fib6.c
@@ -26,6 +26,10 @@ static int32_t test_lookup(void);
 static int32_t test_invalid_rcu(void);
 static int32_t test_fib_rcu_sync_rw(void);
 static int32_t test_drift(void);
+static int32_t test_drift_compression(void);
+static int32_t test_drift_multilevel(void);
+static int32_t test_drift_stress(void);
+static int32_t test_drift_tight_pool(void);
 
 #define MAX_ROUTES	(1 << 16)
 /** Maximum number of tbl8 for 2-byte entries */
@@ -601,10 +605,9 @@ test_fib_rcu_sync_rw(void)
 }
 
 /*
- * Reproducer for the rsvd_tbl8s drift bug. depth_diff used to maintain
- * rsvd_tbl8s is computed from the current RIB state, so it is not
- * invariant between the ADD of a prefix and its later DEL when a
- * covering parent prefix is removed in between.
+ * Reproducer for the rsvd_tbl8s drift bug. The tbl8 reservation
+ * accounting must remain balanced even when a covering parent prefix
+ * is removed between an ADD and its later matching DEL.
  *
  * Layout: one /28 parent (fcde::/28) and three /48 siblings under it
  * (fcde:0:6000::/48, fcde:1:6000::/48, fcde:2:6000::/48). The second
@@ -672,6 +675,260 @@ test_drift(void)
 	return TEST_SUCCESS;
 }
 
+/*
+ * Exercise compression (same nh as parent), forced decompression on
+ * DEL parent, then re-compression after re-adding the same ancestor.
+ * The tbl8 reservation accounting must remain balanced even though
+ * the child is physically decompressed/recompressed in the dataplane.
+ *
+ * Layout: parent fcde::/28 and child fcde:0:6000::/48, both nh=1.
+ *
+ *   ADD /28 (no ancestor)                    rsvd_tbl8s += 1
+ *   ADD /48 (compressed under /28)           rsvd_tbl8s += 2
+ *   DEL /28 (decompresses /48)               rsvd unchanged (/48 keeps)
+ *   re-ADD /28 (re-compresses /48)           rsvd unchanged
+ *   DEL /48                                  rsvd_tbl8s -= 2
+ *   DEL /28                                  rsvd_tbl8s -= 1
+ */
+static int32_t
+test_drift_compression(void)
+{
+	struct rte_fib6_conf config = { 0 };
+	struct rte_fib6 *fib;
+	struct rte_ipv6_addr parent = RTE_IPV6(0xfcde, 0, 0, 0, 0, 0, 0, 0);
+	struct rte_ipv6_addr child = RTE_IPV6(0xfcde, 0, 0x6000, 0, 0, 0, 0, 0);
+	int ret;
+
+	config.max_routes = 1024;
+	config.rib_ext_sz = 0;
+	config.default_nh = 0;
+	config.type = RTE_FIB6_TRIE;
+	config.trie.nh_sz = RTE_FIB6_TRIE_2B;
+	config.trie.num_tbl8 = 256;
+
+	fib = rte_fib6_create(__func__, SOCKET_ID_ANY, &config);
+	RTE_TEST_ASSERT(fib != NULL, "Failed to create FIB\n");
+
+	/* Compressed: child shares the parent's nh, modify_dp is skipped */
+	ret = rte_fib6_add(fib, &parent, 28, 1);
+	RTE_TEST_ASSERT(ret == 0, "ADD /28 failed\n");
+	ret = rte_fib6_add(fib, &child, 48, 1);
+	RTE_TEST_ASSERT(ret == 0, "ADD /48 (compressed) failed\n");
+
+	/* DEL parent forces decompression: child must be materialized */
+	ret = rte_fib6_delete(fib, &parent, 28);
+	RTE_TEST_ASSERT(ret == 0, "DEL /28 (decompression) failed\n");
+
+	/* Re-add parent with same nh: child becomes compressed again */
+	ret = rte_fib6_add(fib, &parent, 28, 1);
+	RTE_TEST_ASSERT(ret == 0, "Re-ADD /28 failed\n");
+
+	ret = rte_fib6_delete(fib, &child, 48);
+	RTE_TEST_ASSERT(ret == 0, "DEL /48 failed\n");
+	ret = rte_fib6_delete(fib, &parent, 28);
+	RTE_TEST_ASSERT(ret == 0, "DEL /28 final failed\n");
+
+	rte_fib6_free(fib);
+	return TEST_SUCCESS;
+}
+
+/*
+ * Three-level nesting with compressed and non-compressed paths, then
+ * DEL of the middle prefix. The byte-boundary supernet accounting
+ * must remain balanced through the chain.
+ *
+ * Layout: grand fcde::/28 nh=1, mid fcde:0:6000::/48 nh=1 (compressed
+ * under grand), leaf fcde:0:6000::4000::/96 nh=2 (not compressed).
+ *
+ *   ADD /28 (no ancestor)                    rsvd_tbl8s += 1
+ *   ADD /48 (compressed under /28)           rsvd_tbl8s += 2
+ *   ADD /96 (not compressed under /48)       rsvd_tbl8s += 6
+ *   DEL /48 (leaf /96 still covers 32, 40)   rsvd_tbl8s -= 0
+ *   DEL /28 (only level 24 was solely /28's) rsvd_tbl8s -= 0
+ *   DEL /96 (last route gone, all freed)     rsvd_tbl8s -= 9
+ *
+ * Boundaries get refunded only on the DEL that makes them empty;
+ * intermediate DELs that leave a covering descendant are refund-free.
+ */
+static int32_t
+test_drift_multilevel(void)
+{
+	struct rte_fib6_conf config = { 0 };
+	struct rte_fib6 *fib;
+	struct rte_ipv6_addr grand = RTE_IPV6(0xfcde, 0, 0, 0, 0, 0, 0, 0);
+	struct rte_ipv6_addr mid =   RTE_IPV6(0xfcde, 0, 0x6000, 0, 0, 0, 0, 0);
+	struct rte_ipv6_addr leaf =  RTE_IPV6(0xfcde, 0, 0x6000, 0, 0, 0x4000, 0, 0);
+	int ret;
+
+	config.max_routes = 1024;
+	config.rib_ext_sz = 0;
+	config.default_nh = 0;
+	config.type = RTE_FIB6_TRIE;
+	config.trie.nh_sz = RTE_FIB6_TRIE_2B;
+	config.trie.num_tbl8 = 256;
+
+	fib = rte_fib6_create(__func__, SOCKET_ID_ANY, &config);
+	RTE_TEST_ASSERT(fib != NULL, "Failed to create FIB\n");
+
+	ret = rte_fib6_add(fib, &grand, 28, 1);
+	RTE_TEST_ASSERT(ret == 0, "ADD /28 failed\n");
+	ret = rte_fib6_add(fib, &mid, 48, 1);  /* compressed under /28 */
+	RTE_TEST_ASSERT(ret == 0, "ADD /48 failed\n");
+	ret = rte_fib6_add(fib, &leaf, 96, 2); /* non-compressed under /48 */
+	RTE_TEST_ASSERT(ret == 0, "ADD /96 failed\n");
+
+	/* DEL the middle prefix: byte-boundary accounting must stay
+	 * coherent so the subsequent operations succeed.
+	 */
+	ret = rte_fib6_delete(fib, &mid, 48);
+	RTE_TEST_ASSERT(ret == 0, "DEL /48 failed\n");
+
+	ret = rte_fib6_delete(fib, &grand, 28);
+	RTE_TEST_ASSERT(ret == 0, "DEL /28 failed\n");
+	ret = rte_fib6_delete(fib, &leaf, 96);
+	RTE_TEST_ASSERT(ret == 0, "DEL /96 failed\n");
+
+	rte_fib6_free(fib);
+	return TEST_SUCCESS;
+}
+
+/*
+ * Pseudo-random ADD/DEL sequence over 8 prefixes with varying depths
+ * and next-hops. A hand-rolled LCG (not rte_rand) makes the sequence
+ * reproducible across runs and DPDK versions. After all prefixes are
+ * removed, a final ADD/DEL pair must succeed - it would fail under a
+ * leaked rsvd_tbl8s.
+ *
+ * depths[1] and depths[6] both use /36 on purpose: ips[1] and ips[6]
+ * are distinct prefixes, so this exercises two parallel /36 ADD/DEL
+ * paths that share byte boundaries 24 and 32.
+ */
+static int32_t
+test_drift_stress(void)
+{
+	uint8_t depths[8] = { 28, 36, 40, 48, 64, 80, 36, 128 };
+	struct rte_fib6_conf config = { 0 };
+	struct rte_ipv6_addr ips[8] = {
+		RTE_IPV6(0xfcde, 0, 0, 0, 0, 0, 0, 0),
+		RTE_IPV6(0xfcde, 0x1, 0, 0, 0, 0, 0, 0),
+		RTE_IPV6(0xfcde, 0x2, 0, 0, 0, 0, 0, 0),
+		RTE_IPV6(0xfcde, 0x2, 0x4000, 0, 0, 0, 0, 0),
+		RTE_IPV6(0xfcde, 0x2, 0x4000, 0x1000, 0, 0, 0, 0),
+		RTE_IPV6(0xfcde, 0x2, 0x4000, 0x1000, 0x1, 0, 0, 0),
+		RTE_IPV6(0xfcde, 0x3, 0, 0, 0, 0, 0, 0),
+		RTE_IPV6(0xfcde, 0x3, 0, 0, 0, 0, 0, 0x1),
+	};
+	uint8_t live[8] = { 0 };
+	struct rte_fib6 *fib;
+	uint32_t seed = 0x4242;
+	unsigned int i, idx;
+	int ret;
+
+	config.max_routes = 64;
+	config.rib_ext_sz = 0;
+	config.default_nh = 0;
+	config.type = RTE_FIB6_TRIE;
+	config.trie.nh_sz = RTE_FIB6_TRIE_2B;
+	config.trie.num_tbl8 = 256;
+
+	fib = rte_fib6_create(__func__, SOCKET_ID_ANY, &config);
+	RTE_TEST_ASSERT(fib != NULL, "Failed to create FIB\n");
+
+	for (i = 0; i < 2000; i++) {
+		seed = seed * 1103515245u + 12345u;
+		idx = (seed >> 8) & 7;
+		if (live[idx]) {
+			ret = rte_fib6_delete(fib, &ips[idx], depths[idx]);
+			RTE_TEST_ASSERT(ret == 0,
+				"DEL idx %u (depth /%u) failed (ret=%d)\n",
+				idx, depths[idx], ret);
+			live[idx] = 0;
+		} else {
+			uint64_t nh = ((seed >> 16) & 0xff) + 1;
+			ret = rte_fib6_add(fib, &ips[idx], depths[idx], nh);
+			RTE_TEST_ASSERT(ret == 0,
+				"ADD idx %u (depth /%u nh=%" PRIu64 ") failed (ret=%d)\n",
+				idx, depths[idx], nh, ret);
+			live[idx] = 1;
+		}
+	}
+
+	/* Drain everything */
+	for (i = 0; i < RTE_DIM(live); i++) {
+		if (live[i]) {
+			ret = rte_fib6_delete(fib, &ips[i], depths[i]);
+			RTE_TEST_ASSERT(ret == 0,
+				"final drain DEL idx %u failed (ret=%d)\n",
+				i, ret);
+		}
+	}
+
+	/* If rsvd_tbl8s had leaked, this fresh ADD would fail */
+	ret = rte_fib6_add(fib, &ips[0], depths[0], 0xff);
+	RTE_TEST_ASSERT(ret == 0,
+		"post-drain ADD failed (rsvd leaked?) (ret=%d)\n", ret);
+	ret = rte_fib6_delete(fib, &ips[0], depths[0]);
+	RTE_TEST_ASSERT(ret == 0, "post-drain DEL failed\n");
+
+	rte_fib6_free(fib);
+	return TEST_SUCCESS;
+}
+
+/* Tight-pool re-compression scenario. Pool sized to exactly the
+ * highest legitimate envelope: an ADD that becomes a closer ancestor
+ * of an existing descendant must succeed because the byte-boundary
+ * supernet accounting reports the same envelope post-operation.
+ *
+ *   num_tbl8 = 3
+ *   ADD /28 nh=1            rsvd = 1
+ *   ADD /48 nh=1 (compr.)   rsvd = 3 (/48 reserves 2 new boundaries)
+ *   DEL /28                 rsvd unchanged (/48 still holds them)
+ *   RE-ADD /28 nh=1         rsvd unchanged (already reserved)
+ *                           (pre-fix: pre-check rejects)
+ */
+static int32_t
+test_drift_tight_pool(void)
+{
+	struct rte_fib6_conf config = { 0 };
+	struct rte_fib6 *fib;
+	struct rte_ipv6_addr parent = RTE_IPV6(0xfcde, 0, 0, 0, 0, 0, 0, 0);
+	struct rte_ipv6_addr child = RTE_IPV6(0xfcde, 0, 0x6000, 0, 0, 0, 0, 0);
+	int ret;
+
+	config.max_routes = 16;
+	config.rib_ext_sz = 0;
+	config.default_nh = 0;
+	config.type = RTE_FIB6_TRIE;
+	config.trie.nh_sz = RTE_FIB6_TRIE_2B;
+	config.trie.num_tbl8 = 3;
+
+	fib = rte_fib6_create(__func__, SOCKET_ID_ANY, &config);
+	RTE_TEST_ASSERT(fib != NULL, "Failed to create FIB\n");
+
+	ret = rte_fib6_add(fib, &parent, 28, 1);
+	RTE_TEST_ASSERT(ret == 0, "ADD /28 failed (ret=%d)\n", ret);
+	ret = rte_fib6_add(fib, &child, 48, 1);
+	RTE_TEST_ASSERT(ret == 0, "ADD /48 failed (ret=%d)\n", ret);
+	ret = rte_fib6_delete(fib, &parent, 28);
+	RTE_TEST_ASSERT(ret == 0, "DEL /28 failed (ret=%d)\n", ret);
+
+	/* Re-add /28: byte boundary 24 is already occupied by the /48,
+	 * so the re-added /28 introduces no new reservation. The
+	 * envelope stays at 3 and still fits the pool of 3.
+	 */
+	ret = rte_fib6_add(fib, &parent, 28, 1);
+	RTE_TEST_ASSERT(ret == 0,
+		"Re-ADD /28 spuriously failed (ret=%d)\n", ret);
+
+	ret = rte_fib6_delete(fib, &child, 48);
+	RTE_TEST_ASSERT(ret == 0, "DEL /48 failed (ret=%d)\n", ret);
+	ret = rte_fib6_delete(fib, &parent, 28);
+	RTE_TEST_ASSERT(ret == 0, "Final DEL /28 failed (ret=%d)\n", ret);
+
+	rte_fib6_free(fib);
+	return TEST_SUCCESS;
+}
+
 static struct unit_test_suite fib6_fast_tests = {
 	.suite_name = "fib6 autotest",
 	.setup = NULL,
@@ -685,6 +942,10 @@ static struct unit_test_suite fib6_fast_tests = {
 	TEST_CASE(test_invalid_rcu),
 	TEST_CASE(test_fib_rcu_sync_rw),
 	TEST_CASE(test_drift),
+	TEST_CASE(test_drift_compression),
+	TEST_CASE(test_drift_multilevel),
+	TEST_CASE(test_drift_stress),
+	TEST_CASE(test_drift_tight_pool),
 	TEST_CASES_END()
 	}
 };
-- 
2.43.0


^ permalink raw reply related

* [PATCH v2 2/3] test/fib6: add reproducer for tbl8 reservation drift
From: Maxime Leroy @ 2026-06-17  8:23 UTC (permalink / raw)
  To: Vladimir Medvedkin; +Cc: dev, Maxime Leroy
In-Reply-To: <20260617082400.142129-1-maxime@leroys.fr>

test_drift covers the asymmetric ADD parent / ADD children / DEL
parent / DEL children sequence that wraps rsvd_tbl8s past zero in a
single iteration. After the wrap the next /25+ ADD is rejected with
-ENOSPC even though the tbl8 pool is empty. With the preceding fix
in place the final ADD succeeds.

Signed-off-by: Maxime Leroy <maxime@leroys.fr>
---
 app/test/test_fib6.c | 74 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 74 insertions(+)

diff --git a/app/test/test_fib6.c b/app/test/test_fib6.c
index fffb590dbf..c4283f3f2d 100644
--- a/app/test/test_fib6.c
+++ b/app/test/test_fib6.c
@@ -25,6 +25,7 @@ static int32_t test_get_invalid(void);
 static int32_t test_lookup(void);
 static int32_t test_invalid_rcu(void);
 static int32_t test_fib_rcu_sync_rw(void);
+static int32_t test_drift(void);
 
 #define MAX_ROUTES	(1 << 16)
 /** Maximum number of tbl8 for 2-byte entries */
@@ -599,6 +600,78 @@ test_fib_rcu_sync_rw(void)
 	return status == 0 ? TEST_SUCCESS : TEST_FAILED;
 }
 
+/*
+ * Reproducer for the rsvd_tbl8s drift bug. depth_diff used to maintain
+ * rsvd_tbl8s is computed from the current RIB state, so it is not
+ * invariant between the ADD of a prefix and its later DEL when a
+ * covering parent prefix is removed in between.
+ *
+ * Layout: one /28 parent (fcde::/28) and three /48 siblings under it
+ * (fcde:0:6000::/48, fcde:1:6000::/48, fcde:2:6000::/48). The second
+ * hextet's high 12 bits are zero, so the three /48 IPs all fall inside
+ * the /28.
+ *
+ * One asymmetric sequence is enough to wrap the counter:
+ *   ADD /28                                  rsvd_tbl8s += 1
+ *   ADD /48 child_0,1,2 (with /28 parent)    rsvd_tbl8s += 2 each (+6)
+ *   DEL /28 (sibling /48 found)              rsvd_tbl8s -= 0
+ *   DEL /48 child_0,1,2 (no parent left)     rsvd_tbl8s -= 3 each (-9)
+ */
+static int32_t
+test_drift(void)
+{
+	struct rte_fib6_conf config = { 0 };
+	struct rte_fib6 *fib;
+	struct rte_ipv6_addr parent =
+		RTE_IPV6(0xfcde, 0, 0, 0, 0, 0, 0, 0);
+	struct rte_ipv6_addr child[3] = {
+		RTE_IPV6(0xfcde, 0, 0x6000, 0, 0, 0, 0, 0),
+		RTE_IPV6(0xfcde, 1, 0x6000, 0, 0, 0, 0, 0),
+		RTE_IPV6(0xfcde, 2, 0x6000, 0, 0, 0, 0, 0),
+	};
+	unsigned int c;
+	int ret;
+
+	config.max_routes = 1024;
+	config.rib_ext_sz = 0;
+	config.default_nh = 0;
+	config.type = RTE_FIB6_TRIE;
+	config.trie.nh_sz = RTE_FIB6_TRIE_2B;
+	config.trie.num_tbl8 = 256;
+
+	fib = rte_fib6_create(__func__, SOCKET_ID_ANY, &config);
+	RTE_TEST_ASSERT(fib != NULL, "Failed to create FIB\n");
+
+	ret = rte_fib6_add(fib, &parent, 28, 0xa);
+	RTE_TEST_ASSERT(ret == 0, "ADD /28 failed (ret=%d)\n", ret);
+
+	for (c = 0; c < 3; c++) {
+		ret = rte_fib6_add(fib, &child[c], 48, 0xb + c);
+		RTE_TEST_ASSERT(ret == 0,
+			"ADD /48 child %u failed (ret=%d)\n", c, ret);
+	}
+
+	ret = rte_fib6_delete(fib, &parent, 28);
+	RTE_TEST_ASSERT(ret == 0, "DEL /28 failed (ret=%d)\n", ret);
+
+	for (c = 0; c < 3; c++) {
+		ret = rte_fib6_delete(fib, &child[c], 48);
+		RTE_TEST_ASSERT(ret == 0,
+			"DEL /48 child %u failed (ret=%d)\n", c, ret);
+	}
+
+	/* Pre-fix: -ENOSPC. Post-fix: succeeds. */
+	ret = rte_fib6_add(fib, &parent, 28, 0xe);
+	RTE_TEST_ASSERT(ret == 0,
+		"Fresh ADD /28 spuriously failed (ret=%d)\n", ret);
+
+	ret = rte_fib6_delete(fib, &parent, 28);
+	RTE_TEST_ASSERT(ret == 0, "Final DEL /28 failed (ret=%d)\n", ret);
+
+	rte_fib6_free(fib);
+	return TEST_SUCCESS;
+}
+
 static struct unit_test_suite fib6_fast_tests = {
 	.suite_name = "fib6 autotest",
 	.setup = NULL,
@@ -611,6 +684,7 @@ static struct unit_test_suite fib6_fast_tests = {
 	TEST_CASE(test_lookup),
 	TEST_CASE(test_invalid_rcu),
 	TEST_CASE(test_fib_rcu_sync_rw),
+	TEST_CASE(test_drift),
 	TEST_CASES_END()
 	}
 };
-- 
2.43.0


^ permalink raw reply related

* [PATCH v2 1/3] fib6: fix tbl8 reservation drift in trie
From: Maxime Leroy @ 2026-06-17  8:23 UTC (permalink / raw)
  To: Vladimir Medvedkin; +Cc: dev, Maxime Leroy, stable
In-Reply-To: <20260617082400.142129-1-maxime@leroys.fr>

trie_modify() maintained rsvd_tbl8s incrementally: it added a
depth_diff at ADD and subtracted one at DEL. Each depth_diff was an
independent estimate derived from the covering parent returned by a
lookup at the time of the operation. The parent seen at an ADD could
differ from the one seen at the matching DEL (a covering parent added
or removed in between), so the amount subtracted did not match the
amount added. rsvd_tbl8s was not tied to any function of the current
route set, so nothing corrected the accumulating error; it eventually
wrapped to UINT32_MAX, rejecting all subsequent /25+ ADDs with -ENOSPC.

Define rsvd_tbl8s as the number of tbl8 levels the current route set
needs: one per byte boundary (24, 32, ..., 120) whose supernet holds
at least one longer prefix. count_empty_levels(node) returns how many
of those levels the node occupies on its own in the committed RIB:
zero if it still has more specifics, otherwise CEIL(depth, 8) -
CEIL(parent_depth, 8) in tbl8 units (has_children + parent depth). On
ADD that is the levels the node is first to occupy; on DEL the levels
it is last to vacate. The per-node increment and decrement need not
match (deleting a prefix that still has a more specific frees nothing,
and that level is released later by whichever prefix leaves last), yet
rsvd_tbl8s stays equal to the number of occupied levels at every step,
so it tracks the RIB exactly and cannot drift.

Add the internal RIB helpers rte_rib6_node_has_children() and
rte_rib6_get_parent() used by the count.

Fixes: c3e12e0f0354 ("fib: add dataplane algorithm for IPv6")
Cc: stable@dpdk.org

Signed-off-by: Maxime Leroy <maxime@leroys.fr>
Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
---
 lib/fib/trie.c          | 83 +++++++++++++++++++++--------------------
 lib/rib/rib6_internal.h | 22 +++++++++++
 lib/rib/rte_rib6.c      | 15 ++++++++
 3 files changed, 80 insertions(+), 40 deletions(-)
 create mode 100644 lib/rib/rib6_internal.h

diff --git a/lib/fib/trie.c b/lib/fib/trie.c
index 99272f45bd..187360d1c8 100644
--- a/lib/fib/trie.c
+++ b/lib/fib/trie.c
@@ -13,6 +13,7 @@
 
 #include <rte_rib6.h>
 #include <rte_fib6.h>
+#include <rib6_internal.h>
 #include "fib_log.h"
 #include "trie.h"
 
@@ -534,19 +535,47 @@ modify_dp(struct rte_trie_tbl *dp, struct rte_rib6 *rib,
 	return 0;
 }
 
+/*
+ * Count number of TBL8s that can be freed after deleting a prefix or allocated
+ * after adding a prefix.
+ */
+static uint8_t
+count_empty_levels(const struct rte_rib6_node *node)
+{
+	struct rte_rib6_node *parent;
+	uint8_t depth, parent_depth = 24;
+
+	/* more specifics present */
+	if (rte_rib6_node_has_children(node))
+		return 0;
+
+	rte_rib6_get_depth(node, &depth);
+	depth = RTE_MAX(depth, 24);
+
+	/* we know parent depth lt a target node depth
+	 * also, there exists tbl8 path up to RTE_ALIGN_CEIL(parent depth, 8)
+	 */
+	parent = rte_rib6_get_parent(node);
+	if (parent != NULL) {
+		rte_rib6_get_depth(parent, &parent_depth);
+		parent_depth = RTE_MAX(parent_depth, 24);
+	}
+
+	return (RTE_ALIGN_CEIL(depth, 8) - RTE_ALIGN_CEIL(parent_depth, 8)) >> 3;
+}
+
 int
 trie_modify(struct rte_fib6 *fib, const struct rte_ipv6_addr *ip,
 	uint8_t depth, uint64_t next_hop, int op)
 {
 	struct rte_trie_tbl *dp;
 	struct rte_rib6 *rib;
-	struct rte_rib6_node *tmp = NULL;
 	struct rte_rib6_node *node;
 	struct rte_rib6_node *parent;
-	struct rte_ipv6_addr ip_masked, tmp_ip;
+	struct rte_ipv6_addr ip_masked;
 	int ret = 0;
 	uint64_t par_nh, node_nh;
-	uint8_t tmp_depth, depth_diff = 0, parent_depth = 24;
+	uint8_t new_levels;
 
 	if ((fib == NULL) || (ip == NULL) || (depth > RTE_IPV6_MAX_DEPTH))
 		return -EINVAL;
@@ -559,37 +588,6 @@ trie_modify(struct rte_fib6 *fib, const struct rte_ipv6_addr *ip,
 	ip_masked = *ip;
 	rte_ipv6_addr_mask(&ip_masked, depth);
 
-	if (depth > 24) {
-		tmp = rte_rib6_get_nxt(rib, &ip_masked,
-			RTE_ALIGN_FLOOR(depth, 8), NULL,
-			RTE_RIB6_GET_NXT_ALL);
-		if (tmp && op == RTE_FIB6_DEL) {
-			/* in case of delete operation, skip the prefix we are going to delete */
-			rte_rib6_get_ip(tmp, &tmp_ip);
-			rte_rib6_get_depth(tmp, &tmp_depth);
-			if (rte_ipv6_addr_eq(&ip_masked, &tmp_ip) && depth == tmp_depth)
-				tmp = rte_rib6_get_nxt(rib, &ip_masked,
-					RTE_ALIGN_FLOOR(depth, 8), tmp, RTE_RIB6_GET_NXT_ALL);
-		}
-
-		if (tmp == NULL) {
-			tmp = rte_rib6_lookup(rib, ip);
-			/**
-			 * in case of delete operation, lookup returns the prefix
-			 * we are going to delete. Find the parent.
-			 */
-			if (tmp && op == RTE_FIB6_DEL)
-				tmp = rte_rib6_lookup_parent(tmp);
-
-			if (tmp != NULL) {
-				rte_rib6_get_depth(tmp, &tmp_depth);
-				parent_depth = RTE_MAX(tmp_depth, 24);
-			}
-			depth_diff = RTE_ALIGN_CEIL(depth, 8) -
-				RTE_ALIGN_CEIL(parent_depth, 8);
-			depth_diff = depth_diff >> 3;
-		}
-	}
 	node = rte_rib6_lookup_exact(rib, &ip_masked, depth);
 	switch (op) {
 	case RTE_FIB6_ADD:
@@ -604,12 +602,16 @@ trie_modify(struct rte_fib6 *fib, const struct rte_ipv6_addr *ip,
 			return ret;
 		}
 
-		if ((depth > 24) && (dp->rsvd_tbl8s + depth_diff > dp->number_tbl8s))
-			return -ENOSPC;
-
 		node = rte_rib6_insert(rib, &ip_masked, depth);
 		if (node == NULL)
 			return -rte_errno;
+
+		new_levels = count_empty_levels(node);
+		if (dp->rsvd_tbl8s + new_levels > dp->number_tbl8s) {
+			rte_rib6_remove(rib, &ip_masked, depth);
+			return -ENOSPC;
+		}
+
 		rte_rib6_set_nh(node, next_hop);
 		parent = rte_rib6_lookup_parent(node);
 		if (parent != NULL) {
@@ -623,7 +625,7 @@ trie_modify(struct rte_fib6 *fib, const struct rte_ipv6_addr *ip,
 			return ret;
 		}
 successfully_added:
-		dp->rsvd_tbl8s += depth_diff;
+		dp->rsvd_tbl8s += new_levels;
 		return 0;
 	case RTE_FIB6_DEL:
 		if (node == NULL)
@@ -641,9 +643,10 @@ trie_modify(struct rte_fib6 *fib, const struct rte_ipv6_addr *ip,
 
 		if (ret != 0)
 			return ret;
-		rte_rib6_remove(rib, ip, depth);
 
-		dp->rsvd_tbl8s -= depth_diff;
+		dp->rsvd_tbl8s -= count_empty_levels(node);
+		rte_rib6_remove(rib, &ip_masked, depth);
+
 		return 0;
 	default:
 		break;
diff --git a/lib/rib/rib6_internal.h b/lib/rib/rib6_internal.h
new file mode 100644
index 0000000000..0f8b5955bb
--- /dev/null
+++ b/lib/rib/rib6_internal.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2026 Maxime Leroy, Free Mobile
+ */
+
+#ifndef _RIB6_INTERNAL_H_
+#define _RIB6_INTERNAL_H_
+
+#include <stdbool.h>
+
+#include <rte_compat.h>
+
+struct rte_rib6_node;
+
+__rte_internal
+bool
+rte_rib6_node_has_children(const struct rte_rib6_node *node);
+
+__rte_internal
+struct rte_rib6_node *
+rte_rib6_get_parent(const struct rte_rib6_node *node);
+
+#endif /* _RIB6_INTERNAL_H_ */
diff --git a/lib/rib/rte_rib6.c b/lib/rib/rte_rib6.c
index ec8ff68e87..918ddbdfd3 100644
--- a/lib/rib/rte_rib6.c
+++ b/lib/rib/rte_rib6.c
@@ -19,6 +19,7 @@
 #include <rte_rib6.h>
 
 #include "rib_log.h"
+#include "rib6_internal.h"
 
 #define RTE_RIB_VALID_NODE	1
 /* Maximum length of a RIB6 name. */
@@ -424,6 +425,20 @@ rte_rib6_get_depth(const struct rte_rib6_node *node, uint8_t *depth)
 	return 0;
 }
 
+RTE_EXPORT_INTERNAL_SYMBOL(rte_rib6_node_has_children)
+bool
+rte_rib6_node_has_children(const struct rte_rib6_node *node)
+{
+	return node->left != NULL || node->right != NULL;
+}
+
+RTE_EXPORT_INTERNAL_SYMBOL(rte_rib6_get_parent)
+struct rte_rib6_node *
+rte_rib6_get_parent(const struct rte_rib6_node *node)
+{
+	return node->parent;
+}
+
 RTE_EXPORT_SYMBOL(rte_rib6_get_ext)
 void *
 rte_rib6_get_ext(struct rte_rib6_node *node)
-- 
2.43.0


^ permalink raw reply related

* [PATCH v2 0/3] fib6: fix tbl8 reservation drift
From: Maxime Leroy @ 2026-06-17  8:23 UTC (permalink / raw)
  To: Vladimir Medvedkin; +Cc: dev, Maxime Leroy
In-Reply-To: <20260522145855.1748406-1-maxime@leroys.fr>

trie_modify() maintained rsvd_tbl8s by computing a depth_diff from the
current RIB topology at both ADD and DEL. The two values diverge when
the RIB changes between an ADD and its later DEL (a covering parent
added or removed), so rsvd_tbl8s eventually wraps to UINT32_MAX and
rejects all subsequent /25+ ADDs with -ENOSPC. A zebra-kill /
reconverge cycle on a live BGP router reproduces it.

The fix computes the reservation from the RIB node shape:
count_empty_levels() returns the number of byte boundaries between the
prefix and its covering parent that no other prefix occupies
(has_children + parent depth), in O(1). It is the count of tbl8 levels
the current route set needs, so ADD/DEL accounting stays consistent and
cannot drift.

Patch 1 is the minimal self-contained fix (Fixes: + Cc: stable).
Patches 2-3 add the reproducer and extended regression tests.

Validated on a live BGP router (grout + FRR, 128 IPv6 prefixes):
RSVD_TBL8 returned to its pre-cycle value (70) after a zebra-kill /
reconverge cycle.

Maxime Leroy (3):
  fib6: fix tbl8 reservation drift in trie
  test/fib6: add reproducer for tbl8 reservation drift
  test/fib6: extended drift test cases

 app/test/test_fib6.c    | 335 ++++++++++++++++++++++++++++++++++++++++
 lib/fib/trie.c          |  83 +++++-----
 lib/rib/rib6_internal.h |  22 +++
 lib/rib/rte_rib6.c      |  15 ++
 4 files changed, 415 insertions(+), 40 deletions(-)
 create mode 100644 lib/rib/rib6_internal.h

---
v2:
* Compute the empty-level count directly from the RIB node
  (rte_rib6_node_has_children + rte_rib6_get_parent, O(1)) instead of
  the v1 multi-level supernet scan over byte boundaries.
* Drop v1 patches 4-5 (valid_descendants counter + single-descent
  helper): no longer needed, the node-based count is already O(1), so
  rte_rib6 needs no new per-node accounting field.

v1:
* Keep rsvd_tbl8s; recompute it via topology-stable empty-supernet
  count (dir24_8 pattern at 13 levels) instead of RIB-derived
  depth_diff.
* Drop RFC patch 3/3 (no longer needed).
* Add extended regression tests.
* Add patches 4-5: RIB valid_descendants + single-descent helper
  (optional perf optimization; not for stable).
* Production-validated on a live BGP router.

--
2.43.0

^ permalink raw reply

* [PATCH v8 21/21] net/txgbe: fix temperature track for AML NIC
From: Zaiyu Wang @ 2026-06-17  8:13 UTC (permalink / raw)
  To: dev; +Cc: Zaiyu Wang, stable, Jiawen Wu
In-Reply-To: <20260617081309.19124-1-zaiyuwang@trustnetic.com>

Previously, temperature tracking for the amlite NIC was handled by
firmware together with the hardware setup. However, the firmware-based
PHY configuration has proven to be unstable.

Re-add the temperature tracking function directly in the driver and
invoke it periodically to ensure the PHY remains calibrated. According
to the hardware recommendation, the tracking sequence should be run at
least every 100 ms to keep temperature drift within 5 °C. Considering
the software and hardware overhead, a 2-second interval is used as a
practical trade-off that still meets stability requirements while
minimizing performance impact.

The periodic tracking is implemented using a timer in the driver, and
the sequence itself is the same as the one originally performed during
link setup.

Fixes: fb6eb170dfa2 ("net/txgbe: add basic link configuration for Amber-Lite")
Cc: stable@dpdk.org

Signed-off-by: Zaiyu Wang <zaiyuwang@trustnetic.com>
---
 drivers/net/txgbe/txgbe_ethdev.c | 44 +++++++++++++++++++++++++++++++-
 drivers/net/txgbe/txgbe_ethdev.h |  1 +
 2 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/drivers/net/txgbe/txgbe_ethdev.c b/drivers/net/txgbe/txgbe_ethdev.c
index f2c3a35900..2ed9a8c179 100644
--- a/drivers/net/txgbe/txgbe_ethdev.c
+++ b/drivers/net/txgbe/txgbe_ethdev.c
@@ -2011,8 +2011,10 @@ txgbe_dev_start(struct rte_eth_dev *dev)
 	txgbe_filter_restore(dev);
 
 	hw->bp_event_interval = 100 * 1000;
-	if (hw->mac.type == txgbe_mac_aml || hw->mac.type == txgbe_mac_aml40)
+	if (hw->mac.type == txgbe_mac_aml || hw->mac.type == txgbe_mac_aml40) {
 		rte_eal_alarm_set(hw->bp_event_interval, txgbe_dev_e56_check_bp_event, dev);
+		rte_eal_alarm_set(1000 * 1000 * 2, txgbe_dev_check_aml_temp_event, dev);
+	}
 
 	if (tm_conf->root && !tm_conf->committed)
 		PMD_DRV_LOG(WARNING,
@@ -2060,6 +2062,7 @@ txgbe_dev_stop(struct rte_eth_dev *dev)
 
 	if (hw->mac.type == txgbe_mac_aml || hw->mac.type == txgbe_mac_aml40) {
 		rte_eal_alarm_cancel(txgbe_dev_e56_check_bp_event, dev);
+		rte_eal_alarm_cancel(txgbe_dev_check_aml_temp_event, dev);
 		rte_eal_alarm_cancel(txgbe_dev_setup_link_alarm_handler_aml, hw);
 	}
 
@@ -2932,6 +2935,45 @@ txgbe_dev_supported_ptypes_get(struct rte_eth_dev *dev, size_t *no_of_elements)
 	return NULL;
 }
 
+void txgbe_dev_check_aml_temp_event(void *param)
+{
+	struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
+	struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
+	uint32_t link_speed = 0, val = 0;
+	s32 status = 0;
+	int temp;
+
+	if (hw == NULL)
+		return;
+
+	status = txgbe_e56_get_temp(hw, &temp);
+	if (status)
+		temp = DEFAULT_TEMP;
+
+	if (!(temp - hw->temperature > 4 ||
+		hw->temperature - temp > 4))
+		goto out;
+
+	hw->temperature = temp;
+	val = rd32(hw, TXGBE_PORT);
+	if (val & TXGBE_AMLITE_LED_LINK_40G)
+		link_speed = TXGBE_LINK_SPEED_40GB_FULL;
+	else if (val & TXGBE_AMLITE_LED_LINK_25G)
+		link_speed = TXGBE_LINK_SPEED_25GB_FULL;
+	else
+		link_speed = TXGBE_LINK_SPEED_10GB_FULL;
+
+	rte_spinlock_lock(&hw->phy_lock);
+	if (hw->mac.type == txgbe_mac_aml)
+		txgbe_temp_track_seq(hw, link_speed);
+	else if (hw->mac.type == txgbe_mac_aml40)
+		txgbe_temp_track_seq_40g(hw, link_speed);
+	rte_spinlock_unlock(&hw->phy_lock);
+
+out:
+	rte_eal_alarm_set(1000 * 1000 * 2, txgbe_dev_check_aml_temp_event, dev);
+}
+
 void txgbe_dev_e56_check_bp_event(void *param)
 {
 	struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
diff --git a/drivers/net/txgbe/txgbe_ethdev.h b/drivers/net/txgbe/txgbe_ethdev.h
index 309db3bfe9..c32c61d8bf 100644
--- a/drivers/net/txgbe/txgbe_ethdev.h
+++ b/drivers/net/txgbe/txgbe_ethdev.h
@@ -747,5 +747,6 @@ void txgbe_vlan_hw_strip_bitmap_set(struct rte_eth_dev *dev,
 		uint16_t queue, bool on);
 void txgbe_config_vlan_strip_on_all_queues(struct rte_eth_dev *dev,
 						  int mask);
+void txgbe_dev_check_aml_temp_event(void *param);
 void txgbe_dev_e56_check_bp_event(void *param);
 #endif /* _TXGBE_ETHDEV_H_ */
-- 
2.21.0.windows.1


^ permalink raw reply related

* [PATCH v8 20/21] net/txgbe: fix to enable Tx desc check
From: Zaiyu Wang @ 2026-06-17  8:13 UTC (permalink / raw)
  To: dev; +Cc: Zaiyu Wang, stable, Jiawen Wu
In-Reply-To: <20260617081309.19124-1-zaiyuwang@trustnetic.com>

Now lib security is enabled by default, and cannot be disabled if the
driver is intended to be used. So Tdm_desc_chk is always unable to enable.
Remove this restriction, and just enable the corresponding queue check.

Fixes: 0eabdfcd4af4 ("net/txgbe: enable Tx descriptor error interrupt")
Cc: stable@dpdk.org

Signed-off-by: Zaiyu Wang <zaiyuwang@trustnetic.com>
---
 drivers/net/txgbe/txgbe_rxtx.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/net/txgbe/txgbe_rxtx.c b/drivers/net/txgbe/txgbe_rxtx.c
index 0a9fc3ddfd..f51c6193a9 100644
--- a/drivers/net/txgbe/txgbe_rxtx.c
+++ b/drivers/net/txgbe/txgbe_rxtx.c
@@ -4761,6 +4761,12 @@ txgbe_dev_tx_init(struct rte_eth_dev *dev)
 		wr32(hw, TXGBE_TXRP(txq->reg_idx), 0);
 		wr32(hw, TXGBE_TXWP(txq->reg_idx), 0);
 
+#ifdef RTE_LIB_SECURITY
+		if (!txq->using_ipsec)
+#endif
+			wr32m(hw, TXGBE_TDM_DESC_CHK(txq->reg_idx / 32),
+			      RTE_BIT32(txq->reg_idx % 32), RTE_BIT32(txq->reg_idx % 32));
+
 		if (txq->headwb_mem) {
 			uint32_t txdctl;
 
@@ -4778,11 +4784,6 @@ txgbe_dev_tx_init(struct rte_eth_dev *dev)
 		}
 	}
 
-#ifndef RTE_LIB_SECURITY
-	for (i = 0; i < 4; i++)
-		wr32(hw, TXGBE_TDM_DESC_CHK(i), 0xFFFFFFFF);
-#endif
-
 	/* Device configured with multiple TX queues. */
 	txgbe_dev_mq_tx_configure(dev);
 }
-- 
2.21.0.windows.1


^ permalink raw reply related

* [PATCH v8 19/21] net/txgbe: fix to reset Tx write-back pointer
From: Zaiyu Wang @ 2026-06-17  8:13 UTC (permalink / raw)
  To: dev; +Cc: Zaiyu Wang, stable, Jiawen Wu
In-Reply-To: <20260617081309.19124-1-zaiyuwang@trustnetic.com>

The write-back pointer was not reset when the Tx queue was reset. This
leads to the wrong Tx desc free logic. Move the resetting of pointer into
txq->ops->reset(txq).

Fixes: 8ada71d0bb7f ("net/txgbe: add Tx head write-back mode for Amber-Lite")
Cc: stable@dpdk.org

Signed-off-by: Zaiyu Wang <zaiyuwang@trustnetic.com>
---
 drivers/net/txgbe/txgbe_rxtx.c            | 45 +++++++++++++----------
 drivers/net/txgbe/txgbe_rxtx.h            |  1 +
 drivers/net/txgbe/txgbe_rxtx_vec_common.h |  7 ++++
 3 files changed, 33 insertions(+), 20 deletions(-)

diff --git a/drivers/net/txgbe/txgbe_rxtx.c b/drivers/net/txgbe/txgbe_rxtx.c
index 3525b708f8..0a9fc3ddfd 100644
--- a/drivers/net/txgbe/txgbe_rxtx.c
+++ b/drivers/net/txgbe/txgbe_rxtx.c
@@ -2313,6 +2313,12 @@ txgbe_reset_tx_queue(struct txgbe_tx_queue *txq)
 	txq->tx_next_dd = (uint16_t)(txq->tx_free_thresh - 1);
 	txq->tx_tail = 0;
 
+	/* Zero out headwb_mem memory */
+	if (txq->headwb_mem) {
+		for (i = 0; i < txq->headwb_size; i++)
+			txq->headwb_mem[i] = 0;
+	}
+
 	/*
 	 * Always allow 1 descriptor to be un-allocated to avoid
 	 * a H/W race condition
@@ -2412,7 +2418,7 @@ txgbe_get_tx_port_offloads(struct rte_eth_dev *dev)
 	return tx_offload_capa;
 }
 
-static int
+static void
 txgbe_setup_headwb_resources(struct rte_eth_dev *dev,
 					void *tx_queue,
 					unsigned int socket_id)
@@ -2420,33 +2426,33 @@ txgbe_setup_headwb_resources(struct rte_eth_dev *dev,
 	struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
 	const struct rte_memzone *headwb;
 	struct txgbe_tx_queue *txq = tx_queue;
-	u8 i, headwb_size = 0;
+	u8 headwb_size = 0;
 
-	if (hw->mac.type != txgbe_mac_aml && hw->mac.type != txgbe_mac_aml40) {
-		txq->headwb_mem = NULL;
-		return 0;
-	}
+	if (hw->mac.type != txgbe_mac_aml && hw->mac.type != txgbe_mac_aml40)
+		goto out;
+
+	if (!hw->devarg.tx_headwb)
+		goto out;
 
-	headwb_size = hw->devarg.tx_headwb_size;
+	headwb_size = txq->headwb_size;
 	headwb = rte_eth_dma_zone_reserve(dev, "tx_headwb_mem", txq->queue_id,
 			sizeof(u32) * headwb_size,
 			TXGBE_ALIGN, socket_id);
 
 	if (headwb == NULL) {
-		DEBUGOUT("Fail to setup headwb resources: no mem");
-		txgbe_tx_queue_release(txq);
-		return -ENOMEM;
+		PMD_DRV_LOG(INFO,
+			    "Failed to allocate headwb memory for Tx queue %u, change to SP mode",
+			    txq->queue_id);
+		goto out;
 	}
 
 	txq->headwb = headwb;
 	txq->headwb_dma = TMZ_PADDR(headwb);
 	txq->headwb_mem = (uint32_t *)TMZ_VADDR(headwb);
+	return;
 
-	/* Zero out headwb_mem memory */
-	for (i = 0; i < headwb_size; i++)
-		txq->headwb_mem[i] = 0;
-
-	return 0;
+out:
+	txq->headwb_mem = NULL;
 }
 
 int __rte_cold
@@ -2542,6 +2548,7 @@ txgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
 	txq->offloads = offloads;
 	txq->ops = &def_txq_ops;
 	txq->tx_deferred_start = tx_conf->tx_deferred_start;
+	txq->headwb_size = hw->devarg.tx_headwb_size;
 #ifdef RTE_LIB_SECURITY
 	txq->using_ipsec = !!(dev->data->dev_conf.txmode.offloads &
 			RTE_ETH_TX_OFFLOAD_SECURITY);
@@ -2577,8 +2584,7 @@ txgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
 	/* set up scalar TX function as appropriate */
 	txgbe_set_tx_function(dev, txq);
 
-	if (hw->devarg.tx_headwb)
-		err = txgbe_setup_headwb_resources(dev, txq, socket_id);
+	txgbe_setup_headwb_resources(dev, txq, socket_id);
 
 	txq->ops->reset(txq);
 	txq->desc_error = 0;
@@ -4755,15 +4761,14 @@ txgbe_dev_tx_init(struct rte_eth_dev *dev)
 		wr32(hw, TXGBE_TXRP(txq->reg_idx), 0);
 		wr32(hw, TXGBE_TXWP(txq->reg_idx), 0);
 
-		if ((hw->mac.type == txgbe_mac_aml || hw->mac.type == txgbe_mac_aml40) &&
-		     hw->devarg.tx_headwb) {
+		if (txq->headwb_mem) {
 			uint32_t txdctl;
 
 			wr32(hw, TXGBE_PX_TR_HEAD_ADDRL(txq->reg_idx),
 				(uint32_t)(txq->headwb_dma & BIT_MASK32));
 			wr32(hw, TXGBE_PX_TR_HEAD_ADDRH(txq->reg_idx),
 				(uint32_t)(txq->headwb_dma >> 32));
-			if (hw->devarg.tx_headwb_size == 16)
+			if (txq->headwb_size == 16)
 				txdctl = TXGBE_PX_TR_CFG_HEAD_WB |
 					 TXGBE_PX_TR_CFG_HEAD_WB_64BYTE;
 			else
diff --git a/drivers/net/txgbe/txgbe_rxtx.h b/drivers/net/txgbe/txgbe_rxtx.h
index 43c818cfbf..5d2e33a8d4 100644
--- a/drivers/net/txgbe/txgbe_rxtx.h
+++ b/drivers/net/txgbe/txgbe_rxtx.h
@@ -416,6 +416,7 @@ struct txgbe_tx_queue {
 	uint64_t	    desc_error;
 	bool		    resetting;
 	const struct rte_memzone *headwb;
+	uint16_t             headwb_size;
 	uint64_t             headwb_dma;
 	volatile uint32_t    *headwb_mem;
 };
diff --git a/drivers/net/txgbe/txgbe_rxtx_vec_common.h b/drivers/net/txgbe/txgbe_rxtx_vec_common.h
index 77d7ff785b..6e561aff30 100644
--- a/drivers/net/txgbe/txgbe_rxtx_vec_common.h
+++ b/drivers/net/txgbe/txgbe_rxtx_vec_common.h
@@ -252,6 +252,13 @@ _txgbe_reset_tx_queue_vec(struct txgbe_tx_queue *txq)
 	txq->tx_next_dd = (uint16_t)(txq->tx_free_thresh - 1);
 
 	txq->tx_tail = 0;
+
+	/* Zero out headwb_mem memory */
+	if (txq->headwb_mem) {
+		for (i = 0; i < txq->headwb_size; i++)
+			txq->headwb_mem[i] = 0;
+	}
+
 	/*
 	 * Always allow 1 descriptor to be un-allocated to avoid
 	 * a H/W race condition
-- 
2.21.0.windows.1


^ permalink raw reply related

* [PATCH v8 18/21] net/txgbe: fix get EEPROM operation
From: Zaiyu Wang @ 2026-06-17  8:13 UTC (permalink / raw)
  To: dev; +Cc: Zaiyu Wang, stable, Jiawen Wu
In-Reply-To: <20260617081309.19124-1-zaiyuwang@trustnetic.com>

The original I2C access flow in the module information retrieval
process was flawed. Correct the implementation to properly fetch
module info.

Fixes: abf042d32b39 ("net/txgbe: add Amber-Lite 25G/40G NICs")
Cc: stable@dpdk.org

Signed-off-by: Zaiyu Wang <zaiyuwang@trustnetic.com>
---
 drivers/net/txgbe/base/txgbe_phy.h |  1 +
 drivers/net/txgbe/txgbe_ethdev.c   | 81 +++++++++++++++++++++++++++---
 2 files changed, 76 insertions(+), 6 deletions(-)

diff --git a/drivers/net/txgbe/base/txgbe_phy.h b/drivers/net/txgbe/base/txgbe_phy.h
index 31bdceb35b..a5df015a4d 100644
--- a/drivers/net/txgbe/base/txgbe_phy.h
+++ b/drivers/net/txgbe/base/txgbe_phy.h
@@ -245,6 +245,7 @@
 /* EEPROM (dev_addr = 0xA0) */
 #define TXGBE_I2C_EEPROM_DEV_ADDR	0xA0
 #define TXGBE_SFF_IDENTIFIER		0x00
+#define TXGBE_SFF_8636_STATUS_OFFSET	0x02
 #define TXGBE_SFF_IDENTIFIER_SFP	0x03
 #define TXGBE_SFF_VENDOR_OUI_BYTE0	0x25
 #define TXGBE_SFF_VENDOR_OUI_BYTE1	0x26
diff --git a/drivers/net/txgbe/txgbe_ethdev.c b/drivers/net/txgbe/txgbe_ethdev.c
index c0471e3d0f..f2c3a35900 100644
--- a/drivers/net/txgbe/txgbe_ethdev.c
+++ b/drivers/net/txgbe/txgbe_ethdev.c
@@ -5462,23 +5462,92 @@ txgbe_get_module_eeprom(struct rte_eth_dev *dev,
 	uint8_t databyte = 0xFF;
 	uint8_t *data = info->data;
 	uint32_t i = 0;
+	bool is_sfp = false;
+	uint32_t value;
+	uint8_t identifier = 0;
+	uint16_t offset;
+	uint8_t page = 0;
+	bool is_flat_mem = false;
+
+	if (hw->mac.type == txgbe_mac_aml40) {
+		value = rd32(hw, TXGBE_GPIOEXT);
+		if (value & TXGBE_SFP1_MOD_PRST_LS)
+			return -EIO;
+	}
+
+	if (hw->mac.type == txgbe_mac_aml) {
+		value = rd32(hw, TXGBE_GPIOEXT);
+		if (value & TXGBE_SFP1_MOD_ABS_LS)
+			return -EIO;
+	}
 
 	if (info->length == 0)
 		return -EINVAL;
 
-	for (i = info->offset; i < info->offset + info->length; i++) {
-		if (i < RTE_ETH_MODULE_SFF_8079_LEN)
-			status = hw->phy.read_i2c_eeprom(hw, i, &databyte);
-		else
-			status = hw->phy.read_i2c_sff8472(hw, i, &databyte);
+	status = hw->mac.acquire_swfw_sync(hw, TXGBE_MNGSEM_SWPHY);
+	if (status)
+		return -EBUSY;
+
+	status = hw->phy.read_i2c_eeprom(hw,
+					     TXGBE_SFF_IDENTIFIER,
+					     &identifier);
+	if (status != 0)
+		goto ERROR_IO;
 
+	if (identifier == TXGBE_SFF_IDENTIFIER_SFP) {
+		is_sfp = true;
+	} else {
+		uint8_t rdata = 0;
+
+		status = hw->phy.read_i2c_sff8636(hw, 0,
+						  TXGBE_SFF_8636_STATUS_OFFSET,
+						  &rdata);
 		if (status != 0)
-			return -EIO;
+			goto ERROR_IO;
 
+		if (rdata & 0x4)
+			is_flat_mem = true;
+	}
+
+	memset(data, 0, info->length);
+
+	for (i = info->offset; i < info->offset + info->length; i++) {
+		databyte = 0;
+
+		if (is_sfp) {
+			if (i < RTE_ETH_MODULE_SFF_8079_LEN)
+				status = hw->phy.read_i2c_eeprom(hw, i,
+					       &databyte);
+			else
+				status = hw->phy.read_i2c_sff8472(hw, i,
+					       &databyte);
+
+			if (status != 0)
+				goto ERROR_IO;
+		} else {
+			offset = i;
+			page = 0;
+			while (offset >= RTE_ETH_MODULE_SFF_8436_LEN) {
+				offset -= RTE_ETH_MODULE_SFF_8436_LEN / 2;
+				page++;
+			}
+			if (page == 0 || !is_flat_mem) {
+				status = hw->phy.read_i2c_sff8636(hw, page, offset,
+					       &databyte);
+				if (status != 0)
+					goto ERROR_IO;
+			}
+		}
 		data[i - info->offset] = databyte;
 	}
 
+	hw->mac.release_swfw_sync(hw, TXGBE_MNGSEM_SWPHY);
 	return 0;
+
+ERROR_IO:
+	PMD_DRV_LOG(ERR, "I2C IO ERROR.");
+	hw->mac.release_swfw_sync(hw, TXGBE_MNGSEM_SWPHY);
+	return -EIO;
 }
 
 bool
-- 
2.21.0.windows.1


^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox