DPDK-dev Archive on lore.kernel.org

DPDK-dev Archive on lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH v3 04/20] common/cnxk: update NIX irq handler
From: Rahul Bhansali @ 2026-06-15 16:24 UTC (permalink / raw)
  To: dev, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori,
	Satha Rao, Harman Kalra
  Cc: jerinj, Rahul Bhansali
In-Reply-To: <20260615162446.578336-1-rbhansali@marvell.com>

Move queue context dump and register print before interrupt
clear in NIX irq handler.

Signed-off-by: Rahul Bhansali <rbhansali@marvell.com>
---
Changes in v3: No change.
Changes in v2: No change.

 drivers/common/cnxk/roc_nix_irq.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/common/cnxk/roc_nix_irq.c b/drivers/common/cnxk/roc_nix_irq.c
index 2b731302cd..6874435a4e 100644
--- a/drivers/common/cnxk/roc_nix_irq.c
+++ b/drivers/common/cnxk/roc_nix_irq.c
@@ -168,7 +168,7 @@ nix_lf_q_irq_get_and_clear(struct nix *nix, uint16_t q, uint32_t off,
 	reg = roc_atomic64_add_nosync(wdata, (int64_t *)(nix->base + off));

 	if (reg & BIT_ULL(42) /* OP_ERR */) {
-		plt_err("Failed execute irq get off=0x%x", off);
+		plt_err("Failed execute irq get queue=%d off=0x%x", q, off);
 		return 0;
 	}
 	qint = reg & 0xff;
@@ -262,6 +262,10 @@ nix_lf_q_irq(void *param)
 	plt_err("Queue_intr=0x%" PRIx64 " qintx=%d pf=%d, vf=%d", intr, qintx,
 		dev->pf, dev->vf);

+	/* Dump registers to std out */
+	roc_nix_lf_reg_dump(nix_priv_to_roc_nix(nix), NULL);
+	roc_nix_queues_ctx_dump(nix_priv_to_roc_nix(nix), NULL);
+
 	/* Handle RQ interrupts */
 	for (q = 0; q < nix->nb_rx_queues; q++) {
 		rq = q % nix->qints;
@@ -323,10 +327,6 @@ nix_lf_q_irq(void *param)
 	/* Clear interrupt */
 	plt_write64(intr, nix->base + NIX_LF_QINTX_INT(qintx));

-	/* Dump registers to std out */
-	roc_nix_lf_reg_dump(nix_priv_to_roc_nix(nix), NULL);
-	roc_nix_queues_ctx_dump(nix_priv_to_roc_nix(nix), NULL);
-
 	/* Call reset callback */
 	if (intr_cb && dev->ops->q_err_cb)
 		dev->ops->q_err_cb(nix_priv_to_roc_nix(nix), NULL);
--
2.34.1


^ permalink raw reply related

* [PATCH v3 03/20] common/cnxk: additional NIX SQ ctx fields prints
From: Rahul Bhansali @ 2026-06-15 16:24 UTC (permalink / raw)
  To: dev, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori,
	Satha Rao, Harman Kalra
  Cc: jerinj, Rahul Bhansali
In-Reply-To: <20260615162446.578336-1-rbhansali@marvell.com>

Additional debug prints for CN20k NIX SQ ctx dump

Signed-off-by: Rahul Bhansali <rbhansali@marvell.com>
---
Changes in v3: No change.
Changes in v2: fix ubuntu clang stdatmoic compile failure.

 drivers/common/cnxk/roc_nix_debug.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/common/cnxk/roc_nix_debug.c b/drivers/common/cnxk/roc_nix_debug.c
index 11994bf131..d4b2b86916 100644
--- a/drivers/common/cnxk/roc_nix_debug.c
+++ b/drivers/common/cnxk/roc_nix_debug.c
@@ -540,6 +540,8 @@ nix_cn10k_lf_sq_dump(__io struct nix_cn10k_sq_ctx_s *ctx, uint32_t *sqb_aura_p,
 static inline void
 nix_lf_sq_dump(__io struct nix_cn20k_sq_ctx_s *ctx, uint32_t *sqb_aura_p, FILE *file)
 {
+	int64_t *sq_cnt_ptr = NULL;
+
 	nix_dump(file, "W0: sqe_way_mask \t\t%d\nW0: cq \t\t\t\t%d",
 		 ctx->sqe_way_mask, ctx->cq);
 	nix_dump(file, "W0: sdp_mcast \t\t\t%d\nW0: substream \t\t\t0x%03x",
@@ -561,6 +563,7 @@ nix_lf_sq_dump(__io struct nix_cn20k_sq_ctx_s *ctx, uint32_t *sqb_aura_p, FILE *
 	nix_dump(file, "W2: smq_rr_count[ub:lb] \t\t%x:%x\n", ctx->smq_rr_count_ub,
 		 ctx->smq_rr_count_lb);

+	nix_dump(file, "W3: update_sq_count\t\t%d\n", ctx->update_sq_count);
 	nix_dump(file, "W3: smq_next_sq_vld\t\t%d\nW3: smq_pend\t\t\t%d",
 		 ctx->smq_next_sq_vld, ctx->smq_pend);
 	nix_dump(file, "W3: smenq_next_sqb_vld  \t%d\nW3: head_offset\t\t\t%d",
@@ -588,6 +591,12 @@ nix_lf_sq_dump(__io struct nix_cn20k_sq_ctx_s *ctx, uint32_t *sqb_aura_p, FILE *
 		 ctx->vfi_lso_sizem1);
 	nix_dump(file, "W9: vfi_lso_total\t\t%d", ctx->vfi_lso_total);

+	nix_dump(file, "W10: sq_count_iova \t\t0x%" PRIx64 "", (uint64_t)ctx->sq_count_iova);
+	sq_cnt_ptr = (int64_t *)(uintptr_t)(ctx->sq_count_iova << 3);
+	if (sq_cnt_ptr && ctx->update_sq_count)
+		nix_dump(file, "sq_count value \t\t0x%" PRIx64 "",
+			 plt_atomic_load_explicit((uint64_t __plt_atomic *)sq_cnt_ptr,
+						  plt_memory_order_relaxed));
 	nix_dump(file, "W10: scm_lso_rem \t\t0x%" PRIx64 "", (uint64_t)ctx->scm_lso_rem);
 	nix_dump(file, "W11: octs \t\t\t0x%" PRIx64 "", (uint64_t)ctx->octs);
 	nix_dump(file, "W12: pkts \t\t\t0x%" PRIx64 "", (uint64_t)ctx->pkts);
--
2.34.1


^ permalink raw reply related

* [PATCH v3 02/20] common/cnxk: add API of SA valid for cn20k platform
From: Rahul Bhansali @ 2026-06-15 16:24 UTC (permalink / raw)
  To: dev, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori,
	Satha Rao, Harman Kalra
  Cc: jerinj, Rahul Bhansali
In-Reply-To: <20260615162446.578336-1-rbhansali@marvell.com>

Add API to get SA valid configuration for cn20k platform.

Signed-off-by: Rahul Bhansali <rbhansali@marvell.com>
---
Changes in v3: No change.
Changes in v2: No change.

 drivers/common/cnxk/cnxk_security.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/drivers/common/cnxk/cnxk_security.c b/drivers/common/cnxk/cnxk_security.c
index 6b51055100..6f46ad3276 100644
--- a/drivers/common/cnxk/cnxk_security.c
+++ b/drivers/common/cnxk/cnxk_security.c
@@ -606,6 +606,20 @@ cnxk_ot_ipsec_outb_sa_valid(struct roc_ot_ipsec_outb_sa *sa)
 	return !!sa->w2.s.valid;
 }

+RTE_EXPORT_INTERNAL_SYMBOL(cnxk_ow_ipsec_inb_sa_valid)
+bool
+cnxk_ow_ipsec_inb_sa_valid(struct roc_ow_ipsec_inb_sa *sa)
+{
+	return !!sa->w2.s.valid;
+}
+
+RTE_EXPORT_INTERNAL_SYMBOL(cnxk_ow_ipsec_outb_sa_valid)
+bool
+cnxk_ow_ipsec_outb_sa_valid(struct roc_ow_ipsec_outb_sa *sa)
+{
+	return !!sa->w2.s.valid;
+}
+
 RTE_EXPORT_INTERNAL_SYMBOL(cnxk_ipsec_ivlen_get)
 uint8_t
 cnxk_ipsec_ivlen_get(enum rte_crypto_cipher_algorithm c_algo,
--
2.34.1


^ permalink raw reply related

* [PATCH v3 01/20] net/cnxk: update mbuf next field for multi segment
From: Rahul Bhansali @ 2026-06-15 16:24 UTC (permalink / raw)
  To: dev, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori,
	Satha Rao, Harman Kalra
  Cc: jerinj, Rahul Bhansali
In-Reply-To: <20260611073311.3129711-1-rbhansali@marvell.com>

As per the requirement of rte_mbuf_raw_reset_bulk(), the mbuf's
'next' and 'nb_segs' fields are required to be reset.
This reset these field for multi-segment mbufs on cn9k platform.

Signed-off-by: Rahul Bhansali <rbhansali@marvell.com>
---
Changes in v3: No change.
Changes in v2: No change.

 drivers/net/cnxk/cn9k_rx.h |  8 --------
 drivers/net/cnxk/cn9k_tx.h | 42 ++++++++++++++++++--------------------
 2 files changed, 20 insertions(+), 30 deletions(-)

diff --git a/drivers/net/cnxk/cn9k_rx.h b/drivers/net/cnxk/cn9k_rx.h
index 79b56fe160..5ccdc5dee1 100644
--- a/drivers/net/cnxk/cn9k_rx.h
+++ b/drivers/net/cnxk/cn9k_rx.h
@@ -410,8 +410,6 @@ cn9k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag,
 		 * Hence, flag argument is not required.
 		 */
 		nix_cqe_xtract_mseg(rx, mbuf, val, 0);
-	else
-		mbuf->next = NULL;
 }

 static inline uint16_t
@@ -826,12 +824,6 @@ cn9k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
 			nix_cqe_xtract_mseg((union nix_rx_parse_u *)
 						(cq0 + CQE_SZ(3) + 8), mbuf3,
 					    mbuf_initializer, flags);
-		} else {
-			/* Update that no more segments */
-			mbuf0->next = NULL;
-			mbuf1->next = NULL;
-			mbuf2->next = NULL;
-			mbuf3->next = NULL;
 		}

 		/* Store the mbufs to rx_pkts */
diff --git a/drivers/net/cnxk/cn9k_tx.h b/drivers/net/cnxk/cn9k_tx.h
index 32665d2050..0ec448e36c 100644
--- a/drivers/net/cnxk/cn9k_tx.h
+++ b/drivers/net/cnxk/cn9k_tx.h
@@ -665,14 +665,14 @@ cn9k_nix_prepare_mseg(struct cn9k_eth_txq *txq, struct rte_mbuf *m, struct rte_m
 #else
 	RTE_SET_USED(cookie);
 #endif
-#ifdef RTE_ENABLE_ASSERT
-	m->next = NULL;
-	m->nb_segs = 1;
-#endif
-	m = m_next;
-	if (!m)
+	if (likely(!m_next))
 		goto done;

+	if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)) {
+		m->next = NULL;
+		m->nb_segs = 1;
+	}
+	m = m_next;
 	/* Fill mbuf segments */
 	do {
 		m_next = m->next;
@@ -704,12 +704,13 @@ cn9k_nix_prepare_mseg(struct cn9k_eth_txq *txq, struct rte_mbuf *m, struct rte_m
 			sg_u = sg->u;
 			slist++;
 		}
-#ifdef RTE_ENABLE_ASSERT
-		m->next = NULL;
-#endif
+		if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F))
+			m->next = NULL;
 		m = m_next;
 	} while (nb_segs);

+	if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F))
+		rte_io_wmb();
 done:
 	sg->u = sg_u;
 	sg->segs = i;
@@ -720,9 +721,6 @@ cn9k_nix_prepare_mseg(struct cn9k_eth_txq *txq, struct rte_mbuf *m, struct rte_m
 	segdw += (off >> 1) + 1 + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
 	send_hdr->w0.sizem1 = segdw - 1;

-#ifdef RTE_ENABLE_ASSERT
-	rte_io_wmb();
-#endif
 	return segdw;
 }

@@ -950,10 +948,10 @@ cn9k_nix_prepare_mseg_vec_list(struct cn9k_eth_txq *txq,
 	RTE_SET_USED(cookie);
 #endif

-#ifdef RTE_ENABLE_ASSERT
-	m->next = NULL;
-	m->nb_segs = 1;
-#endif
+	if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)) {
+		m->next = NULL;
+		m->nb_segs = 1;
+	}
 	m = m_next;
 	/* Fill mbuf segments */
 	do {
@@ -984,9 +982,8 @@ cn9k_nix_prepare_mseg_vec_list(struct cn9k_eth_txq *txq,
 			sg_u = sg->u;
 			slist++;
 		}
-#ifdef RTE_ENABLE_ASSERT
-		m->next = NULL;
-#endif
+		if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F))
+			m->next = NULL;
 		m = m_next;
 	} while (nb_segs);

@@ -1002,9 +999,6 @@ cn9k_nix_prepare_mseg_vec_list(struct cn9k_eth_txq *txq,
 		 !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
 	send_hdr->w0.sizem1 = segdw - 1;

-#ifdef RTE_ENABLE_ASSERT
-	rte_io_wmb();
-#endif
 	return segdw;
 }

@@ -1089,6 +1083,10 @@ cn9k_nix_xmit_pkts_mseg_vector(uint64x2_t *cmd0, uint64x2_t *cmd1,
 		}
 	}

+	/* Multi segment mbufs */
+	if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F))
+		rte_io_wmb();
+
 	for (j = 0; j < NIX_DESCS_PER_LOOP;) {
 		/* Fit consecutive packets in same LMTLINE. */
 		if ((segdw[j] + segdw[j + 1]) <= 8) {
--
2.34.1


^ permalink raw reply related

* RE: [PATCH v5 0/3] eal/topology: introduce topology-aware lcore grouping
From: Varghese, Vipin @ 2026-06-15 14:34 UTC (permalink / raw)
  To: Varghese, Vipin, Stephen Hemminger
  Cc: dev@dpdk.org, Tummala, Sivaprasad, konstantin.ananyev@huawei.com,
	wathsala.vithanage@arm.com, bruce.richardson@intel.com,
	viktorin@cesnet.cz, mb@smartsharesystems.com
In-Reply-To: <DS0PR12MB66086B0C37890ED5D4B5AD5F82092@DS0PR12MB6608.namprd12.prod.outlook.com>

AMD General

Hi All,

Quick update, implementing CPU_ALLOC instead of rte_cpu_set is turing out to eat a lot of memory.

I am working on logic to use CPU_ALLOC when CPU_SIZE > rte_cpu_Set capacity or find smarter ways not to pre-alloc extra.

Note: for accommodating CPU alloc we need to do it for all valid l1, l2, l3 and l4. On 384 single socket machine it exhausts memory (heap) like anything.

> -----Original Message-----
> From: Varghese, Vipin <Vipin.Varghese@amd.com>
> Sent: Thursday, May 28, 2026 12:48 PM
> To: Stephen Hemminger <stephen@networkplumber.org>
> Cc: dev@dpdk.org; Tummala, Sivaprasad <Sivaprasad.Tummala@amd.com>;
> konstantin.ananyev@huawei.com; wathsala.vithanage@arm.com;
> bruce.richardson@intel.com; viktorin@cesnet.cz; mb@smartsharesystems.com
> Subject: RE: [PATCH v5 0/3] eal/topology: introduce topology-aware lcore grouping
>
> Caution: This message originated from an External Source. Use proper caution
> when opening attachments, clicking links, or responding.
>
>
> AMD General
>
> Sharing v6 with fixes and cache-id fetch shortly.
>
> > -----Original Message-----
> > From: Stephen Hemminger <stephen@networkplumber.org>
> > Sent: Wednesday, April 15, 2026 1:52 AM
> > To: Varghese, Vipin <Vipin.Varghese@amd.com>
> > Cc: dev@dpdk.org; Tummala, Sivaprasad <Sivaprasad.Tummala@amd.com>;
> > konstantin.ananyev@huawei.com; wathsala.vithanage@arm.com;
> > bruce.richardson@intel.com; viktorin@cesnet.cz;
> > mb@smartsharesystems.com
> > Subject: Re: [PATCH v5 0/3] eal/topology: introduce topology-aware
> > lcore grouping
> >
> > Caution: This message originated from an External Source. Use proper
> > caution when opening attachments, clicking links, or responding.
> >
> >
> > On Wed, 15 Apr 2026 01:08:18 +0530
> > Vipin Varghese <vipin.varghese@amd.com> wrote:
> >
> > > This series introduces a topology library that groups DPDK lcores
> > > based on CPU cache hierarchy and NUMA topology. The goal is to
> > > provide a stable and explicit API that allows applications to select
> > > lcores with better locality and cache sharing characteristics.
> > >
> > > The series includes:
> > >   - EAL support for topology discovery using hwloc and domain-based lcore
> > >     grouping (L1/L2/L3/L4/NUMA)
> > >   - Topology-aware test cases validating API behavior and edge conditions
> > >   - Programmer’s guide describing the topology library and APIs
> > >
> > > The API is marked experimental and does not change existing lcore
> > > behavior unless explicitly used by the application.
> > >
> > > Changes in v5:
> > >   - Addressed review comments from v4
> > >   - Fixed ARM cross-compilation issues
> > >   - Cleaned up domain iteration and error handling
> > >   - Updated tests to cover domain edge cases
> > >   - Documentation refinements and API usage clarification
> > >
> > > Changes in v4:
> > >   - Corrected domain selection semantics
> > >   - Updated example usage
> > >   - Fixed naming and typo issues
> > >
> > > Changes in v3:
> > >   - Fixed macro naming (USE_NO_TOPOLOGY)
> > >   - Minor cleanups based on early feedback
> > >
> > > Tested on:
> > >   - AMD EPYC (Milan, Genoa, Siena, Turin, Turin-Dense, Sorano)
> > >   - Intel Xeon (SPR-SP, GNR-SP)
> > >   - ARM Ampere
> > >   - NVIDIA Grace Superchip
> > >
> > > Dependencies:
> > >   - hwloc-dev (tested with 2.10.0)
> > >
> > > Patch breakdown:
> > >   1/3 eal/topology: add topology grouping for lcores
> > >   2/3 app: add topology-aware test cases
> > >   3/3 doc: add topology library documentation
> > >
> > > Future Work:
> > >  - integrate into examples
> > >   -- hellowrld: ready
> > >   -- pkt-distributor: in-progress
> > >   -- l2fwd: ready
> > >   -- l3fwd: to start
> > >   -- eventdevpipeline: PoC ready
> > >  - integrate topology test
> > >   -- crypto: yet to start
> > >   -- compression: yet to start
> > >   -- dma: PoC ready
> > >  - add new features for
> > >   -- PQoS: yet to start
> > >   -- Data Injection: PoC with BRDCM Thor-2 ready
> > >
> > > Tested OS: Linux only, need help with BSD and Windows
> > >
> > > Tested with and without hwloc-dev library for
> > >  - Ampere, aarch64, Neoverse-N1, NUMA-2, 256 CPU threads
> > >  - Grace superchip, aarch64, Neoverse-V2, NUMA-2, 144 CPU threads
> > >  - Intel GNR-SP, 6767P, NUMA-2, 256 Threads
> > >  - AMD EPYC Siena, 8534P, NUMA-1, 128 Threads
> > >  - AMD EPYC Sorano, 8635P, NUMA-1, 168 Threads
> > >
> > > Signed-off-by: Vipin Varghese <vipin.varghese@amd.com> ``
> > >
> > > Vipin Varghese (3):
> > >   eal/topology: add Topology grouping for lcores
> > >   app: add topology aware test case
> > >   doc: add new section topology
> > >
> > >  app/test/meson.build                   |   1 +
> > >  app/test/test_ring_perf.c              | 416 +++++++++++++-
> > >  app/test/test_stack_perf.c             | 409 ++++++++++++++
> > >  app/test/test_topology.c               | 676 ++++++++++++++++++++++
> > >  config/meson.build                     |  18 +
> > >  doc/api/doxy-api-index.md              |   1 +
> > >  doc/guides/prog_guide/index.rst        |   3 +-
> > >  doc/guides/prog_guide/topology_lib.rst | 155 +++++
> > >  lib/eal/common/eal_private.h           |  74 +++
> > >  lib/eal/common/eal_topology.c          | 747 +++++++++++++++++++++++++
> > >  lib/eal/common/meson.build             |   1 +
> > >  lib/eal/freebsd/eal.c                  |  10 +-
> > >  lib/eal/include/meson.build            |   1 +
> > >  lib/eal/include/rte_topology.h         | 255 +++++++++
> > >  lib/eal/linux/eal.c                    |   7 +
> > >  lib/eal/meson.build                    |   4 +
> > >  16 files changed, 2773 insertions(+), 5 deletions(-)  create mode
> > > 100644 app/test/test_topology.c  create mode 100644
> > > doc/guides/prog_guide/topology_lib.rst
> > >  create mode 100644 lib/eal/common/eal_topology.c  create mode
> > > 100644 lib/eal/include/rte_topology.h
> > >
> >
> > AI review reported lots of possible correctness problems:
> >
> >
> > # DPDK Patch Review - bundle-1843-topology-v5.mbox
> >
> > ## Overview
> >
> > This patch series introduces topology awareness to DPDK, allowing
> > applications to select logical cores based on CPU cache and I/O
> > topology. The series adds a new `rte_topology` library with APIs to
> > query and enumerate logical cores within topology domains (L1/L2/L3/L4/NUMA).
> >
> > ---
> >
> > ## Correctness Bugs (HIGHEST PRIORITY - report at >=50% confidence)
> >
> > ### Patch 1/3: lib/eal/common/eal_topology.c
> >
> > #### **Error: Resource leak on `eal_topology_map_layer()` failure**
> >
> > In `rte_eal_topology_init()`, if any call to
> > `eal_topology_map_layer()` fails (returns < 0), the code calls
> > `rte_eal_topology_release()` which frees previously allocated layers.
> > However, the `hwloc_topology_t` handle `topo_cnfg.topology` is **NOT**
> destroyed before returning. This leaks the hwloc topology handle.
> >
> > **Location:** `lib/eal/common/eal_topology.c:685-712`
> >
> > ```c
> > for (int i = 0; i < 5; i++) {
> >     *layers[i].count = hwloc_get_nbobjs_by_depth(topo_cnfg.topology,
> > layers[i].depth);
> >     if (eal_topology_map_layer(topo_cnfg.topology, layers[i].depth, layers[i].count,
> >         layers[i].ptr, layers[i].total_cores, layers[i].name) < 0) {
> >         rte_eal_topology_release();  /* frees layer memory */
> >         return -1;  /* BUG: topo_cnfg.topology NOT destroyed here */
> >     }
> > }
> >
> > hwloc_topology_destroy(topo_cnfg.topology);  /* only reached on
> > success */ topo_cnfg.topology = NULL; ```
> >
> > **Fix:** Destroy the topology before returning on error:
> >
> > ```c
> > if (eal_topology_map_layer(...) < 0) {
> >     hwloc_topology_destroy(topo_cnfg.topology);
> >     topo_cnfg.topology = NULL;
> >     rte_eal_topology_release();
> >     return -1;
> > }
> > ```
> >
> > ---
> >
> > #### **Error: Potential use-after-free in `eal_topology_map_layer()`
> > on partial allocation failure**
> >
> > In `eal_topology_map_layer()`, the code allocates `dm->cores` for each
> > domain. If a later allocation in the same loop iteration fails (e.g.,
> > for domain `j+1`), the function returns -1 immediately **without
> > freeing `dm->cores` already allocated for earlier domains**. The
> > caller (`rte_eal_topology_init()`) then calls
> > `rte_eal_topology_release()`, which expects `layer_ptr[j]` to be
> > non-NULL but `layer_ptr[j]->cores` may be uninitialized or garbage if
> > the allocation failed before that point. This can cause a use-after-free or double-
> free when `rte_eal_topology_release()` calls `rte_free(d->map[i]->cores)`.
> >
> > **Location:** `lib/eal/common/eal_topology.c:537-557`
> >
> > ```c
> > for (uint16_t j = 0; j < *layer_cnt; j++) {
> >     hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, j);
> >     int cpu_count = hwloc_bitmap_weight(obj->cpuset);
> >     if (cpu_count == -1)
> >         continue;
> >
> >     struct core_domain_mapping *dm =
> >         rte_zmalloc(NULL, sizeof(struct core_domain_mapping), 0);
> >     if (!dm)
> >         return -1;  /* BUG: leaks layer_ptr array allocated by caller
> > */
> >
> >     (*layer_ptr)[j] = dm;
> >     CPU_ZERO(&dm->core_set);
> >     dm->core_count = 0;
> >
> >     dm->cores = rte_malloc(NULL, sizeof(uint16_t) * cpu_count, 0);
> >     if (!dm->cores)
> >         return -1;  /* BUG: leaks dm (just allocated) and previous
> > entries */ } ```
> >
> > **Fix:** On allocation failure, free all previously allocated entries before returning:
> >
> > ```c
> > if (!dm) {
> >     /* Free all previously allocated entries */
> >     for (uint16_t k = 0; k < j; k++) {
> >         if ((*layer_ptr)[k]) {
> >             rte_free((*layer_ptr)[k]->cores);
> >             rte_free((*layer_ptr)[k]);
> >         }
> >     }
> >     rte_free(*layer_ptr);
> >     return -1;
> > }
> >
> > /* Same cleanup for dm->cores allocation failure */ if (!dm->cores) {
> >     rte_free(dm);
> >     for (uint16_t k = 0; k < j; k++) {
> >         if ((*layer_ptr)[k]) {
> >             rte_free((*layer_ptr)[k]->cores);
> >             rte_free((*layer_ptr)[k]);
> >         }
> >     }
> >     rte_free(*layer_ptr);
> >     return -1;
> > }
> > ```
> >
> > ---
> >
> > #### **Error: NULL pointer dereference in `eal_topology_map_layer()`
> > when `hwloc_get_obj_by_depth()` returns NULL**
> >
> > In the second loop of `eal_topology_map_layer()` (lines 560-620), the
> > code calls `hwloc_get_obj_by_depth(topology, depth, j)` and checks `if
> > (!obj || hwloc_bitmap_iszero(obj->cpuset))` to skip NULL objects.
> > However, in the **inner
> > loop** (lines 579-618), the code calls
> > `hwloc_get_obj_by_depth(topology, depth, k)` and assigns it to
> > `obj_core` but does **NOT** check if `obj_core` is NULL before calling
> > `hwloc_bitmap_weight(obj_core->cpuset)` on line 581. If
> > `hwloc_get_obj_by_depth()` returns NULL for domain `k`, this will dereference a
> NULL pointer.
> >
> > **Location:** `lib/eal/common/eal_topology.c:579-582`
> >
> > ```c
> > for (uint16_t k = 0; k < *layer_cnt; k++) {
> >     hwloc_obj_t obj_core = hwloc_get_obj_by_depth(topology, depth, k);
> >     int cpu_count_core = hwloc_bitmap_weight(obj_core->cpuset);  /*
> > NULL deref if obj_core == NULL */
> >     if (cpu_count_core == -1)
> >         continue;
> > ```
> >
> > **Fix:** Check `obj_core` before dereferencing:
> >
> > ```c
> > hwloc_obj_t obj_core = hwloc_get_obj_by_depth(topology, depth, k); if (!obj_core)
> >     continue;
> > int cpu_count_core = hwloc_bitmap_weight(obj_core->cpuset);
> > if (cpu_count_core == -1)
> >     continue;
> > ```
> >
> > ---
> >
> > #### **Error: Incorrect second argument to
> > `rte_topo_get_nth_lcore_from_domain()` in `get_same_l1_domains()`
> > (test_ring_perf.c and test_stack_perf.c)**
> >
> > In both `app/test/test_ring_perf.c:290` and
> > `app/test/test_stack_perf.c:258`, the function `get_same_l1_domains()` calls:
> >
> > ```c
> > id2 = rte_topo_get_nth_lcore_from_domain(domain, 0, 0,
> > RTE_TOPO_DOMAIN_L1); ```
> >
> > The second argument (`lcore_pos`) is `0`, which is the same as for
> > `id1`. This will assign **the same lcore** to both `id1` and `id2`,
> > causing the subsequent check `if
> > (id1 == id2) return 3;` to always trigger. This is a logic error: the
> > intent is clearly to get two **different** lcores from the same domain.
> >
> > **Location:** `app/test/test_ring_perf.c:287-290` and
> > `app/test/test_stack_perf.c:255-258`
> >
> > **Fix:** Use position `1` for the second lcore:
> >
> > ```c
> > id1 = rte_topo_get_nth_lcore_from_domain(domain, 0, 0,
> > RTE_TOPO_DOMAIN_L1);
> > id2 = rte_topo_get_nth_lcore_from_domain(domain, 1, 0,
> > RTE_TOPO_DOMAIN_L1); ```
> >
> > ---
> >
> > #### **Error: Iteration condition in `test_main_lcore_in_domain()`
> > uses wrong domain type for lookup**
> >
> > In `app/test/test_topology.c:211`, the loop iterates over
> > `domain_count` for `domain_types[d]`, but the call to
> > `rte_topo_is_main_lcore_in_domain()` uses `RTE_TOPO_DOMAIN_NUMA`
> > instead of `domain_types[d]`. This means the test only checks the NUMA
> > domain regardless of which domain type `d` selects (L1/L2/L3/L4).
> >
> > **Location:** `app/test/test_topology.c:206-216`
> >
> > ```c
> > for (unsigned int d = 0; d < RTE_DIM(domain_types); d++) {
> >     bool main_lcore_found = false;
> >     unsigned int domain_count = rte_topo_get_domain_count(domain_types[d]);
> >     for (unsigned int dmn_idx = 0; dmn_idx < domain_count; dmn_idx++) {
> >         main_lcore_found =
> > rte_topo_is_main_lcore_in_domain(RTE_TOPO_DOMAIN_NUMA,  /* BUG:
> should
> > be domain_types[d] */
> >             dmn_idx);
> >         if (main_lcore_found)
> >             break;
> >     }
> > ```
> >
> > **Fix:**
> >
> > ```c
> > main_lcore_found = rte_topo_is_main_lcore_in_domain(domain_types[d],
> > dmn_idx); ```
> >
> > ---
> >
> > #### **Error: Infinite loop risk in
> > `rte_topo_get_nth_lcore_from_domain()` when `ptr-
> > >core_count` is 0**
> >
> > In `lib/eal/common/eal_topology.c:296-318`, the function enters a
> > `while (1)` loop that increments `new_lcore_pos`. If `ptr->core_count`
> > is 0 (which the code checks earlier but does not return immediately),
> > the loop will wrap `new_lcore_pos` back to
> > 0 indefinitely, never breaking. While the function returns
> > `RTE_MAX_LCORE` if `ptr-
> > >core_count == 0` before the loop, the logic flow is unclear and the
> > >loop body does
> > not have a clear termination condition if the core count is 0.
> >
> > **Location:** `lib/eal/common/eal_topology.c:283-318`
> >
> > **Fix:** Add a sanity check inside the loop to prevent infinite iteration:
> >
> > ```c
> > unsigned int iterations = 0;
> > while (1) {
> >     if (iterations++ > ptr->core_count * 2)  /* safety limit */
> >         return RTE_MAX_LCORE;
> >     /* ... rest of loop ... */
> > }
> > ```
> >
> > However, the real issue is that the code already returns
> > `RTE_MAX_LCORE` if `ptr->core_count == 0` on line 287, so this is more
> > of a defensive-programming note. The function should be refactored for clarity.
> >
> > ---
> >
> > #### **Error: Missing NULL check after `get_domain_lcore_mapping()` in
> > `rte_topo_get_next_lcore()`**
> >
> > In `rte_topo_get_next_lcore()`, the code calls
> > `get_domain_lcore_mapping(flag, lcore_domain)` and checks if `ptr` is
> > NULL on line 350. However, if `ptr` is NULL, the function returns
> > `RTE_MAX_LCORE`. This is correct, but the subsequent logic on line 381
> > calls `rte_topo_is_main_lcore_in_domain(flag, lcore_domain)`, which
> > internally may call `get_domain_lcore_mapping()` again. If that call
> > also returns NULL (which it will if the domain is invalid), the
> > function `rte_topo_is_main_lcore_in_domain()` will return `false`,
> > which is safe. However, the logic is fragile and should explicitly handle the NULL
> case to avoid relying on transitive safety.
> >
> > **Location:** `lib/eal/common/eal_topology.c:381`
> >
> > **Recommendation:** The code is technically safe but could be clearer.
> > No change required, but consider restructuring for maintainability.
> >
> > ---
> >
> > ### Patch 2/3: app/test Topology Tests
> >
> > #### **Error: Macro
> `RTE_TOPO_FOREACH_WORKER_LCORE_IN_DOMAIN`
> > declares variable in macro expansion (shadowing risk)**
> >
> > In `lib/eal/include/rte_topology.h:243-248`, the macro
> > `RTE_TOPO_FOREACH_WORKER_LCORE_IN_DOMAIN` declares a local
> variable
> > `main_lcore` inside the macro expansion:
> >
> > ```c
> > #define RTE_TOPO_FOREACH_WORKER_LCORE_IN_DOMAIN(lcore,
> > domain_indx, flag)  \
> >     lcore = rte_topo_get_nth_lcore_from_domain(domain, 0, 0, flag);  \
> >     uint16_t main_lcore = rte_get_main_lcore();  \
> >     for (lcore = (lcore != main_lcore) ? \
> >         lcore : rte_topo_get_next_lcore(lcore, 1, 0, flag);  \
> >         lcore < RTE_MAX_LCORE;  \
> >         lcore = rte_topo_get_next_lcore(lcore, 1, 0, flag)) ```
> >
> > This can cause a compiler error or shadowing if the caller already has
> > a variable named `main_lcore` in scope. Additionally, the macro uses
> > `domain` (line 244) but the parameter is `domain_indx`, which is a typo and will
> cause a compilation error.
> >
> > **Location:** `lib/eal/include/rte_topology.h:243-248`
> >
> > **Fix:** Wrap in a `do { } while (0)` and use a uniquely-named
> > variable, or document that the macro must not be used if `main_lcore`
> > is already declared. Also fix the
> > typo:
> >
> > ```c
> > #define RTE_TOPO_FOREACH_WORKER_LCORE_IN_DOMAIN(lcore,
> > domain_indx, flag)  \
> >     lcore = rte_topo_get_nth_lcore_from_domain(domain_indx, 0, 0, flag);  \
> >     uint16_t __topo_main_lcore = rte_get_main_lcore();  \
> >     for (lcore = (lcore != __topo_main_lcore) ? \
> >         lcore : rte_topo_get_next_lcore(lcore, 1, 0, flag);  \
> >         lcore < RTE_MAX_LCORE;  \
> >         lcore = rte_topo_get_next_lcore(lcore, 1, 0, flag)) ```
> >
> > ---
> >
> > ### Summary of Correctness Bugs
> >
> > 1. **hwloc topology leak** on `eal_topology_map_layer()` failure 2.
> > **Resource
> > leak** in `eal_topology_map_layer()` on partial allocation failure 3.
> > **NULL pointer
> > dereference** in `eal_topology_map_layer()` inner loop 4. **Logic
> > error** in `get_same_l1_domains()` (same lcore assigned to `id1` and
> > `id2`) 5. **Wrong domain type** in `test_main_lcore_in_domain()` (uses
> > `RTE_TOPO_DOMAIN_NUMA` instead of `domain_types[d]`) 6. **Macro typo**
> > in `RTE_TOPO_FOREACH_WORKER_LCORE_IN_DOMAIN` (uses `domain`
> instead of
> > `domain_indx`) 7. **Potential infinite loop** in
> > `rte_topo_get_nth_lcore_from_domain()` if `ptr->core_count == 0`
> > (mitigated by early

^ permalink raw reply

* Question regarding duplicate fragment handling in DPDK IP reassembly library
From: Samyak Jain @ 2026-06-15 12:39 UTC (permalink / raw)
  To: dev@dpdk.org; +Cc: Vikash kumar, Ankur Bharadwaj

[-- Attachment #1: Type: text/plain, Size: 731 bytes --]

Hi DPDK Community,

I am using DPDK 25.11 and evaluating the IP reassembly library
(librte_ip_frag).

During testing, I observed that duplicate fragments appear to cause reassembly failure and the fragment context gets invalidated.

I would like to know:

1. Is duplicate fragment handling intentionally unsupported in
   rte_ipv4_frag_reassemble_packet() / rte_ipv6_frag_reassemble_packet()?

2. Has there been any upstream discussion or patch to support
   duplicate fragments while still rejecting conflicting
   fragments?

3. Are there any recommended approaches for applications that need
   Linux-like duplicate fragment tolerance?

Any guidance would be appreciated.

Thanks & Regards,
Samyak Jain

[-- Attachment #2: Type: text/html, Size: 4975 bytes --]

^ permalink raw reply

* Re: [RFC] devtools: add tool calling support to review-patch.py
From: David Marchand @ 2026-06-15 11:34 UTC (permalink / raw)
  To: Aaron Conole; +Cc: dev, Stephen Hemminger
In-Reply-To: <20260609182652.1053422-1-aconole@redhat.com>

On Tue, 9 Jun 2026 at 20:26, Aaron Conole <aconole@redhat.com> wrote:
>
> Add an iterative tool-use loop to review-patch.py for the Anthropic
> and OpenAI providers. The reviewer can now look up additional context
> from the DPDK source tree when the patch alone is insufficient,
> rather than having to guess at surrounding code, API contracts, or
> function signatures.
>
> Tool calling is enabled by default with a limit of 10 rounds. Pass
> '--tool-rounds 0' to disable it and restore the previous single-shot
> behavior.  The round limit prevents runaway cost on large patches
> that when reached will force the model to deliver a final judgement.
>
> Initial tool set:
>   - grep           Searches for regex across the file system with
>                    optional path restrictions and case-insensitive
>                    matches.
>   - file_read      Line range read of a specific path.
>
> Both tools are limited to the repository root to prevent path
> traversal.  Path outputs are relative to the repo root.
>
> The system prompt is extended when tool calling is active to
> encourage the model to use tools only when genuinely needed,
> keeping unnecessary round trips and token costs under control
> and to a minimum.
>
> Internally, _common.py gains send_request_raw() (returning the
> raw response dict) so the tool-calling loops can inspect
> stop_reason / finish_reason before extracting text.
>
> Signed-off-by: Aaron Conole <aconole@redhat.com>

- I got a strange comment that a (valid) sha1 (from a Fixes: tag) was
unknown to the grep tool.


- Are those tools returning results on the current working directory?
If so, the result may differ depending on whether you applied the series or not.

Did you consider wrapping around "git grep $pattern origin/main" /
"git show origin/main:$file" ?
(determining the correct git reference may be hard..)


-- 
David Marchand


^ permalink raw reply

* [PATCH v1 1/1] net/i40e: allow discontiguous queue lists in hash
From: Anatoly Burakov @ 2026-06-15 11:01 UTC (permalink / raw)
  To: dev, Bruce Richardson

Due to recent refactors and code unification, there are now the following
properties of RSS queue list that can be checked by common infrastructure:

- Monotony (i.e. queue indices always increase, never decrease)
- No duplication (i.e. can't have the same index specified twice)
- Contiguousness (i.e. can't have holes in the queue list)

The latter is an optional feature that can be enabled with a flag. However,
previous hash code only enforced contiguousness for queue *regions* but not
queue *lists*, whereas after the refactor, all queue lists were required to
be contiguous. This is an unnecessary restriction, and it breaks backwards
compatibility.

Fix it by only specifying contiguousness requirement for the VLAN branch
where we are actually looking for a queue *region* not queue *list*.

Fixes: 0185303c2e24 ("net/i40e: refactor RSS flow parameter checks")

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 drivers/net/intel/i40e/i40e_hash.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/intel/i40e/i40e_hash.c b/drivers/net/intel/i40e/i40e_hash.c
index 3c1302469c..8b80d0a91c 100644
--- a/drivers/net/intel/i40e/i40e_hash.c
+++ b/drivers/net/intel/i40e/i40e_hash.c
@@ -1238,7 +1238,6 @@ i40e_hash_parse(struct rte_eth_dev *dev,
 		},
 		.max_actions = 1,
 		.driver_ctx = dev->data->dev_private,
-		.rss_queues_contig = true,
 		/* each pattern type will add specific check function */
 	};
 	const struct rte_flow_action_rss *rss_act;
@@ -1265,6 +1264,8 @@ i40e_hash_parse(struct rte_eth_dev *dev,
 	/* VLAN path */
 	if (is_vlan) {
 		ac_param.check = i40e_hash_validate_queue_region;
+		/* queue regions must be contiguous */
+		ac_param.rss_queues_contig = true;
 		ret = ci_flow_check_actions(actions, &ac_param, &parsed_actions, error);
 		if (ret)
 			return ret;
-- 
2.47.3


^ permalink raw reply related

* Re: [PATCH 05/15] doc: improve clarity and consistency in DMA sample app guide
From: fengchengwen @ 2026-06-15  9:50 UTC (permalink / raw)
  To: Stephen Hemminger, dev; +Cc: Kevin Laatz, Bruce Richardson
In-Reply-To: <20260611212119.1026721-6-stephen@networkplumber.org>

Acked-by: Chengwen Feng <fengchengwen@huawei.com>

On 6/12/2026 5:18 AM, Stephen Hemminger wrote:
> Enhanced the DMA sample application documentation:
> - Simplified MAC address modification description using bullet points
> - Improved grammar and readability throughout
> - Standardized terminology (DMAdev, Tx/Rx port formatting)
> - Fixed article usage and clarified technical explanations
> - Enhanced sentence structure for better flow
> - Corrected minor grammatical issues and typos
> 
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>


^ permalink raw reply

* Re: [PATCH] app/testpmd: add VLAN priority insert support
From: fengchengwen @ 2026-06-15  9:46 UTC (permalink / raw)
  To: Xingui Yang, dev
  Cc: stephen, david.marchand, aman.deep.singh, yangshuaisong,
	lihuisong, liuyonglong, kangfenglong
In-Reply-To: <20260612081411.2798403-1-yangxingui@huawei.com>

On 6/12/2026 4:14 PM, Xingui Yang wrote:
> The tx_vlan set command currently only accepts a VLAN ID in range
> [0, 4095].  This patch adds support for an extended format that includes
> 802.1p priority and CFI bits, allowing users to set the VLAN priority
> tag when inserting VLAN headers in TX packets.
> 
> The extended format is:
>   bit 0-11:  VLAN ID (0-4095)
>   bit 12:    CFI (Canonical Format Indicator)
>   bit 13-15: Priority (0-7, 802.1p CoS)
> 
> This is consistent with the VLAN tag structure used by
> rte_eth_dev_set_vlan_pvid() where the PVID field encodes VLAN ID, CFI
> and priority in the same format.
> 
> A new command line option --enable-vlan-priority is added to enable this
> feature. By default, the feature is disabled to maintain backward
> compatibility with existing users. When enabled, the
> vlan_id_is_invalid() function allows any 16-bit value to pass, while the
> full 16-bit value (including CFI and priority bits) is passed to the
> driver for hardware VLAN insertion.
> 
> Signed-off-by: Xingui Yang <yangxingui@huawei.com>
> ---
>  app/test-pmd/config.c     | 24 +++++++++++++++---------
>  app/test-pmd/parameters.c |  6 ++++++
>  app/test-pmd/testpmd.c    |  5 +++++
>  app/test-pmd/testpmd.h    |  2 ++
>  4 files changed, 28 insertions(+), 9 deletions(-)
> 
> diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c
> index 36b9b023e2..80cde109e6 100644
> --- a/app/test-pmd/config.c
> +++ b/app/test-pmd/config.c
> @@ -1241,12 +1241,18 @@ void print_valid_ports(void)
>  }
>  
>  static int
> -vlan_id_is_invalid(uint16_t vlan_id)
> +vlan_id_is_invalid(uint16_t vlan_id, int vlan_priority_ena)
>  {
> -	if (vlan_id < 4096)
> -		return 0;
> -	fprintf(stderr, "Invalid vlan_id %d (must be < 4096)\n", vlan_id);
> -	return 1;
> +	if (!vlan_priority_ena && vlan_id >= 4096) {
> +		fprintf(stderr, "Invalid vlan_id %d (must be < 4096)\n", vlan_id);
> +		return 1;
> +	}
> +
> +	/*
> +	 * When vlan_priority_ena is enabled, allow any 16-bit value
> +	 * to pass priority and CFI bits to the driver.
> +	 */
> +	return 0;
>  }
>  
>  static uint32_t
> @@ -6876,7 +6882,7 @@ rx_vft_set(portid_t port_id, uint16_t vlan_id, int on)
>  
>  	if (port_id_is_invalid(port_id, ENABLED_WARN))
>  		return 1;
> -	if (vlan_id_is_invalid(vlan_id))
> +	if (vlan_id_is_invalid(vlan_id, vlan_priority_insert_ena))

Just vlan_id_is_invalid(vlan_id, false) because Rx is no need to impl this.

>  		return 1;
>  	diag = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
>  	if (diag == 0)
> @@ -6923,7 +6929,7 @@ tx_vlan_set(portid_t port_id, uint16_t vlan_id)
>  	struct rte_eth_dev_info dev_info;
>  	int ret;
>  
> -	if (vlan_id_is_invalid(vlan_id))
> +	if (vlan_id_is_invalid(vlan_id, vlan_priority_insert_ena))
>  		return;
>  
>  	if (ports[port_id].dev_conf.txmode.offloads &
> @@ -6954,9 +6960,9 @@ tx_qinq_set(portid_t port_id, uint16_t vlan_id, uint16_t vlan_id_outer)
>  	struct rte_eth_dev_info dev_info;
>  	int ret;
>  
> -	if (vlan_id_is_invalid(vlan_id))
> +	if (vlan_id_is_invalid(vlan_id, vlan_priority_insert_ena))
>  		return;
> -	if (vlan_id_is_invalid(vlan_id_outer))
> +	if (vlan_id_is_invalid(vlan_id_outer, vlan_priority_insert_ena))
>  		return;
>  
>  	ret = eth_dev_info_get_print_err(port_id, &dev_info);
> diff --git a/app/test-pmd/parameters.c b/app/test-pmd/parameters.c
> index 8c3b1244e7..3f37498d3b 100644
> --- a/app/test-pmd/parameters.c
> +++ b/app/test-pmd/parameters.c
> @@ -117,6 +117,8 @@ enum {
>  	TESTPMD_OPT_ENABLE_HW_VLAN_EXTEND_NUM,
>  #define TESTPMD_OPT_ENABLE_HW_QINQ_STRIP "enable-hw-qinq-strip"
>  	TESTPMD_OPT_ENABLE_HW_QINQ_STRIP_NUM,
> +#define TESTPMD_OPT_ENABLE_VLAN_PRIORITY "enable-vlan-priority"
> +	TESTPMD_OPT_ENABLE_VLAN_PRIORITY_NUM,

How about TESTPMD_OPT_ENABLE_VLAN_INSERT_PRI "enable-vlan-insert-pri"

>  #define TESTPMD_OPT_ENABLE_DROP_EN "enable-drop-en"
>  	TESTPMD_OPT_ENABLE_DROP_EN_NUM,
>  #define TESTPMD_OPT_DISABLE_RSS "disable-rss"
> @@ -461,6 +463,7 @@ usage(char* progname)
>  	printf("  --enable-hw-vlan-strip: enable hardware vlan strip.\n");
>  	printf("  --enable-hw-vlan-extend: enable hardware vlan extend.\n");
>  	printf("  --enable-hw-qinq-strip: enable hardware qinq strip.\n");
> +	printf("  --enable-vlan-priority: enable vlan priority insert.\n");
>  	printf("  --enable-drop-en: enable per queue packet drop.\n");
>  	printf("  --disable-rss: disable rss.\n");
>  	printf("  --enable-rss: Force rss even for single-queue operation.\n");
> @@ -1259,6 +1262,9 @@ launch_args_parse(int argc, char** argv)
>  		case TESTPMD_OPT_ENABLE_HW_QINQ_STRIP_NUM:
>  			rx_offloads |= RTE_ETH_RX_OFFLOAD_QINQ_STRIP;
>  			break;
> +		case TESTPMD_OPT_ENABLE_VLAN_PRIORITY_NUM:
> +			vlan_priority_insert_ena = 1;

How about tx_insert_vlan_pri_en

> +			break;
>  		case TESTPMD_OPT_ENABLE_DROP_EN_NUM:
>  			rx_drop_en = 1;
>  			break;
> diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
> index 457bb6d3fe..0239ec59de 100644
> --- a/app/test-pmd/testpmd.c
> +++ b/app/test-pmd/testpmd.c
> @@ -307,6 +307,11 @@ uint16_t mb_mempool_cache = DEF_MBUF_CACHE; /**< Size of mbuf mempool cache. */
>  /* current configuration is in DCB or not,0 means it is not in DCB mode */
>  uint8_t dcb_config = 0;
>  
> +/*
> + * Configurable value of vlan priority insert enable.
> + */
> +uint8_t vlan_priority_insert_ena;
> +
>  /*
>   * Configurable number of RX/TX queues.
>   */
> diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
> index 04fdc2db42..104a6e73be 100644
> --- a/app/test-pmd/testpmd.h
> +++ b/app/test-pmd/testpmd.h
> @@ -618,6 +618,8 @@ extern uint64_t noisy_lkup_num_reads_writes;
>  
>  extern uint8_t dcb_config;
>  
> +extern uint8_t vlan_priority_insert_ena;
> +
>  extern uint32_t mbuf_data_size_n;
>  extern uint16_t mbuf_data_size[MAX_SEGS_BUFFER_SPLIT];
>  /**< Mbuf data space size. */

We need also update the testpmd document

Thanks


^ permalink raw reply

* Re: [PATCH] net/iavf: fix scalar Rx path zero-length segment
From: Bruce Richardson @ 2026-06-15  9:33 UTC (permalink / raw)
  To: Loftus, Ciara; +Cc: dev@dpdk.org, stable@dpdk.org, Doherty, Declan
In-Reply-To: <IA4PR11MB92788A2FA65444A5BDC62C0E8EE62@IA4PR11MB9278.namprd11.prod.outlook.com>

On Mon, Jun 15, 2026 at 10:17:41AM +0100, Loftus, Ciara wrote:
> > Subject: Re: [PATCH] net/iavf: fix scalar Rx path zero-length segment
> > 
> > On Fri, Jun 12, 2026 at 02:35:31PM +0000, Ciara Loftus wrote:
> > > When hardware CRC stripping is active, a frame whose on-wire size is an
> > > exact multiple of the Rx buffer size can cause the NIC to fill the final
> > > data descriptor and place the four CRC bytes into a separate trailing
> > > descriptor. After hardware stripping, that descriptor carries zero bytes
> > > of payload.
> > >
> > > The existing CRC cleanup code only handles a zero-length trailing segment
> > > when software CRC stripping is enabled. When hardware stripping is
> > > active, the zero-length mbuf is silently chained to the reassembled
> > > packet. Forwarding such a packet causes a zero-length Tx descriptor,
> > > triggering a Malicious Driver Detection event on the PF and resetting
> > > the VF.
> > >
> > > Fix by adding logic to detect a zero-length final segment when hardware
> > > CRC stripping is active, and freeing it.
> > >
> > > Fixes: a2b29a7733ef ("net/avf: enable basic Rx Tx")
> > > Fixes: b8b4c54ef9b0 ("net/iavf: support flexible Rx descriptor in normal
> > path")
> > > Cc: stable@dpdk.org
> > >
> > > Signed-off-by: Declan Doherty <declan.doherty@intel.com>
> > > Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
> > > ---
> > >  drivers/net/intel/iavf/iavf_rxtx.c | 16 ++++++++++++++++
> > >  1 file changed, 16 insertions(+)
> > >
> > > diff --git a/drivers/net/intel/iavf/iavf_rxtx.c
> > b/drivers/net/intel/iavf/iavf_rxtx.c
> > > index a57af7faed..86ebb2618d 100644
> > > --- a/drivers/net/intel/iavf/iavf_rxtx.c
> > > +++ b/drivers/net/intel/iavf/iavf_rxtx.c
> > > @@ -1716,6 +1716,14 @@ iavf_recv_scattered_pkts_flex_rxd(void
> > *rx_queue, struct rte_mbuf **rx_pkts,
> > >  				rxm->data_len = (uint16_t)(rx_packet_len -
> > >
> > 	RTE_ETHER_CRC_LEN);
> > >  			}
> > > +		} else if (unlikely(rx_packet_len == 0)) {
> > > +			/*
> > > +			 * NIC split CRC bytes into a trailing segment which is
> > > +			 * now empty after hardware CRC stripping. Free it.
> > > +			 */
> > > +			rte_pktmbuf_free_seg(rxm);
> > > +			first_seg->nb_segs--;
> > > +			last_seg->next = NULL;
> > >  		}
> > >
> > 
> > The vector paths also handle scattered packets (via reassembly). Do they
> > need a fix for this? What about the other drivers that work on the PF, such
> > as ice/i40e?
> 
> The vector paths use the common ci_rx_reassemble_packets which already
> handles the zero-length trailing segment case correctly. When
> crc_len == 0 and the last segment has data_len == 0, the empty segment
> is freed.
> 
> The ice scalar path had the same issue but it was patched in 2022:
> https://git.dpdk.org/dpdk/commit/?id=90ba4442058a14763e57ca96d03ab1e6044e3e5c
> I cannot reproduce the behaviour on i40e hardware (either PF or VF) so I
> don't think it needs to be patched as the HW seems to behave
> differently.
> 

Thanks for clarifying.

Acked-by: Bruce Richardson <bruce.richardson@intel.com>

As an asside for future work: we should consider if we can also convert the
scalar Rx paths for our drivers to match that of the vector, where we do a
simplified receive per descriptor, pretending that each descriptor is its
own packet, and then use the reassemble_packets call to put any scattered
packets back together. I would if that would lead to better scalar Rx
performance.

/Bruce

^ permalink raw reply

* Re: [PATCH 8/9] ethdev: keep fast-path ops valid after port stop
From: David Marchand @ 2026-06-15  9:26 UTC (permalink / raw)
  To: Maxime Leroy
  Cc: hemant.agrawal, sachin.saxena, dev, stable, Thomas Monjalon,
	Andrew Rybchenko, Morten Brørup, Sunil Kumar Kori
In-Reply-To: <20260611154926.392670-9-maxime@leroys.fr>

On Thu, 11 Jun 2026 at 17:51, Maxime Leroy <maxime@leroys.fr> wrote:
>
> eth_dev_fp_ops_reset() restores a port's fast-path ops on stop/release
> via a compound literal, so every field it omits is zeroed to NULL. It
> sets only rx_pkt_burst/tx_pkt_burst (and the rxq/txq data), leaving
> rx_queue_count, tx_queue_count, rx/tx_descriptor_status, tx_pkt_prepare
> and the recycle callbacks NULL.
>
> In non-debug builds these ops are reached through an unguarded indirect
> call (the NULL check exists only under RTE_ETHDEV_DEBUG_RX/TX). So a
> thread calling e.g. rte_eth_rx_queue_count() on a port being stopped
> dereferences NULL and crashes, while the same race on rte_eth_rx_burst()
> is harmless because the burst ops are reset to dummies. A poll-mode
> worker re-checking rx_queue_count before arming the Rx interrupt and
> sleeping hits exactly this.
>
> Reset these ops to the same dummies eth_dev_set_dummy_fops() installs,
> so a stopped port behaves like a freshly allocated one: every fast-path
> op is a safe no-op, none is NULL.
>
> Fixes: 066f3d9cc21c ("ethdev: remove callback checks from fast path")
> Cc: stable@dpdk.org
> Signed-off-by: Maxime Leroy <maxime@leroys.fr>
> ---
>  lib/ethdev/ethdev_private.c | 7 +++++++
>  1 file changed, 7 insertions(+)
>
> diff --git a/lib/ethdev/ethdev_private.c b/lib/ethdev/ethdev_private.c
> index 72a0723846..75ea3eedff 100644
> --- a/lib/ethdev/ethdev_private.c
> +++ b/lib/ethdev/ethdev_private.c
> @@ -263,6 +263,13 @@ eth_dev_fp_ops_reset(struct rte_eth_fp_ops *fpo)
>         *fpo = (struct rte_eth_fp_ops) {
>                 .rx_pkt_burst = dummy_eth_rx_burst,
>                 .tx_pkt_burst = dummy_eth_tx_burst,
> +               .tx_pkt_prepare = rte_eth_tx_pkt_prepare_dummy,
> +               .rx_queue_count = rte_eth_queue_count_dummy,
> +               .tx_queue_count = rte_eth_queue_count_dummy,
> +               .rx_descriptor_status = rte_eth_descriptor_status_dummy,
> +               .tx_descriptor_status = rte_eth_descriptor_status_dummy,
> +               .recycle_tx_mbufs_reuse = rte_eth_recycle_tx_mbufs_reuse_dummy,
> +               .recycle_rx_descriptors_refill = rte_eth_recycle_rx_descriptors_refill_dummy,
>                 .rxq = {
>                         .data = (void **)&dummy_queues_array[port_id],
>                         .clbk = dummy_data,

Could we replace eth_dev_set_dummy_fops() with a call to
eth_dev_fp_ops_reset() in rte_eth_dev_allocate?
I don't like keeping two separate helpers.


-- 
David Marchand


^ permalink raw reply

* [DPDK/core Bug 1955] [dpdk26.07-rc1] power_intel_uncore: start dpdk-l3fwd-power failed on Ubuntu26.04
From: bugzilla @ 2026-06-15  9:25 UTC (permalink / raw)
  To: dev

http://bugs.dpdk.org/show_bug.cgi?id=1955

            Bug ID: 1955
           Summary: [dpdk26.07-rc1] power_intel_uncore: start
                    dpdk-l3fwd-power failed on Ubuntu26.04
           Product: DPDK
           Version: 26.07
          Hardware: All
                OS: All
            Status: UNCONFIRMED
          Severity: normal
          Priority: Normal
         Component: core
          Assignee: dev@dpdk.org
          Reporter: daxuex.gao@intel.com
  Target Milestone: ---

Environment
===========
DPDK version: dpdk-26.07-rc1 (c429b06df56788795f8)
OS: Ubuntu 26.04 LTS/Linux 7.0.0-14-generic
Compiler: gcc version 15.2.0 (Ubuntu 15.2.0-16ubuntu1)
NIC hardware: Ethernet Controller E810-C for SFP 1593 [8086:1593 (rev 01)]
NIC firmware: 
  driver: vfio-pci
  kdriver: ice-2.6.4
  fw: 5.00 0x80021c11 1.4002.0
  ddp: ICE OS Package version 1.3.59.0

Test Setup
Steps to reproduce
==================
1.Build dpdk 
CC=gcc meson -Dlibdir=lib  --default-library=static x86_64-native-linuxapp-gcc
ninja -C x86_64-native-linuxapp-gcc
meson configure -Dexamples=l3fwd-power x86_64-native-linuxapp-gcc
ninja -C x86_64-native-linuxapp-gcc

2.Start dpdk-l3fwd-power
usertools/dpdk-devbind.py --force --bind=vfio-pci 0000:38:00.0 0000:38:00.1
/root/dpdk/x86_64-native-linuxapp-gcc/examples/dpdk-l3fwd-power  -l 1-2 -n 1 --
-p 0x1 -P --config="(0,0,2)" -u

Results: 
========
# /root/dpdk/x86_64-native-linuxapp-gcc/examples/dpdk-l3fwd-power  -l 1-2 -n 1
-- -p 0x1 -P --config="(0,0,2)" -u
EAL: Detected CPU lcores: 112
EAL: Detected NUMA nodes: 8
EAL: Detected static linkage of DPDK
EAL: Multi-process socket /var/run/dpdk/rte/mp_socket
EAL: Selected IOVA mode 'VA'
EAL: VFIO support initialized
Promiscuous mode selected
/root/dpdk/x86_64-native-linuxapp-gcc/examples/dpdk-l3fwd-power [EAL options]
-- -p PORTMASK -P  [--config (port,queue,lcore)[,(port,queue,lcore]] 
[--high-perf-cores CORELIST  [--perf-config
(port,queue,hi_perf,lcore_index)[,(port,queue,hi_perf,lcore_index]] 
[--max-pkt-len PKTLEN]
  -p PORTMASK: hexadecimal bitmask of ports to configure
  -P: enable promiscuous mode
  -u: set min/max frequency for uncore to minimum value
  -U: set min/max frequency for uncore to maximum value
  -i (frequency index): set min/max frequency for uncore to specified frequency
index
  --config (port,queue,lcore): rx queues configuration
  --eth-link-speed: force link speed
  --cpu-resume-latency LATENCY: set CPU resume latency to control C-state
selection, 0 : just allow to enter C0-state
  --high-perf-cores CORELIST: list of high performance cores
  --perf-config: similar as config, cores specified as indices for bins
containing high or regular performance cores
  --no-numa: optional, disable numa awareness
  --max-pkt-len PKTLEN: maximum packet length in decimal (64-9600)
  --parse-ptype: parse packet type by software
  --legacy: use legacy interrupt-based scaling
 --telemetry: enable telemetry mode, to update empty polls, full polls, and
core busyness to telemetry
 --interrupt-only: enable interrupt-only mode
 --pmd-mgmt MODE: enable PMD power management mode. Currently supported modes:
baseline, monitor, pause, scale
  --max-empty-polls MAX_EMPTY_POLLS: number of empty polls to wait before
entering sleep state
  --pause-duration DURATION: set the duration, in microseconds, of the pause
callback
  --scale-freq-min FREQ_MIN: set minimum frequency for scaling mode for all
application lcores (FREQ_MIN must be in kHz, in increments of 100MHz)
  --scale-freq-max FREQ_MAX: set maximum frequency for scaling mode for all
application lcores (FREQ_MAX must be in kHz, in increments of 100MHz)
EAL: Error - exiting with code: 1
Invalid L3FWD parameters


Expected Result:
Startup successful 

Regression
Is this issue a regression: (Y/N)Y

-- 
You are receiving this mail because:
You are the assignee for the bug.

^ permalink raw reply

* Re: [PATCH v4 0/9] power: centralize lcore ID validation
From: fengchengwen @ 2026-06-15  9:22 UTC (permalink / raw)
  To: Huisong Li, thomas, anatoly.burakov, sivaprasad.tummala
  Cc: dev, stephen, yangxingui, zhanjie9
In-Reply-To: <20260615073050.1996063-1-lihuisong@huawei.com>

LGTM
Series-acked-by: Chengwen Feng <fengchengwen@huawei.com>

On 6/15/2026 3:30 PM, Huisong Li wrote:
> This series centralizes the lcore ID verification in the power cpufreq
> framework, replacing the per-driver range checks with a common validation.
> 
> Background
> ----------
> Currently, various cpufreq drivers implement their own lcore ID checks,
> which are limited to simple range validation against RTE_MAX_LCORE and
> involve significant code duplication across 12+ functions per driver.
> The checks are duplicated across all drivers — any change requires
> updating 5+ drivers identically. Moreover, these checks do not verify
> whether the lcore is actually managed by the application. So it is better
> to verify lcore ID in cpufreq core.
> 
> For cpufreq-related APIs, although service cores do not typically invoke
> these APIs, they may operate in polling modes where power management is
> required. To maintain compatibility with applications using service cores,
> the validation logic now explicitly accepts both ROLE_RTE and ROLE_SERVICE.
> 
> The usage of power QoS APIs are similar to that of cpufreq. They also can
> accepts ROLE_RTE and ROLE_SERVICE.
> 
> For PMD power management APIs, the lcore must be ROLE_RTE because these
> are used together with the data plane of ethdev PMD. Hence,
> rte_lcore_is_enabled() is used for validation.
> 
> Key Changes:
> ------------
> Patch 1: Adds a common macro (RTE_POWER_VALID_LCOREID_OR_ERR_RET)
>          that accepts both roles.
> Patch 2: Adds the validation to the cpufreq framework layer.
> Patches 3-7: Remove the now-redundant per-driver RTE_MAX_LCORE checks.
> Patch 8: Update power QoS to use the new validation, allowing
>          service cores to configure QoS parameters.
> Patch 9: Add lcore validation to PMD management functions.
> 
> Changes:
> --------
> v4: remove the patch that add the helper function rte_lcore_is_eal_managed.
> 
> v3:
>  - update release note.
>  - add __rte_experimental for new helper function.
>  - restructure this patch set to facilitate review.
> 
> v2:
>  - allow the service cores to set power API.
> 
> ----
> 
> Huisong Li (9):
>   power: add a common macro to verify lcore ID
>   power/cpufreq: add the lcore ID verification to framework
>   power/acpi: remove redundant lcore ID checks
>   power/amd_pstate: remove redundant lcore ID checks
>   power/cppc: remove redundant lcore ID checks
>   power/intel_pstate: remove redundant lcore ID checks
>   power/kvm_vm: remove redundant lcore ID checks
>   power: allow the service core to config power QoS
>   power: add lcore ID check for PMD mgmt
> 
>  doc/guides/rel_notes/release_26_07.rst        |  7 ++
>  drivers/power/acpi/acpi_cpufreq.c             | 65 -------------------
>  drivers/power/amd_pstate/amd_pstate_cpufreq.c | 65 -------------------
>  drivers/power/cppc/cppc_cpufreq.c             | 65 -------------------
>  .../power/intel_pstate/intel_pstate_cpufreq.c | 65 -------------------
>  drivers/power/kvm_vm/guest_channel.c          | 22 -------
>  drivers/power/kvm_vm/kvm_vm.c                 | 10 ---
>  lib/power/power_common.h                      |  8 +++
>  lib/power/rte_power_cpufreq.c                 | 13 ++++
>  lib/power/rte_power_pmd_mgmt.c                | 21 +++---
>  lib/power/rte_power_qos.c                     | 10 +--
>  11 files changed, 41 insertions(+), 310 deletions(-)
> 


^ permalink raw reply

* RE: [PATCH] net/iavf: fix scalar Rx path zero-length segment
From: Loftus, Ciara @ 2026-06-15  9:17 UTC (permalink / raw)
  To: Richardson, Bruce; +Cc: dev@dpdk.org, stable@dpdk.org, Doherty, Declan
In-Reply-To: <aiwo_4aTWi8kBIkG@bricha3-mobl1.ger.corp.intel.com>

> Subject: Re: [PATCH] net/iavf: fix scalar Rx path zero-length segment
> 
> On Fri, Jun 12, 2026 at 02:35:31PM +0000, Ciara Loftus wrote:
> > When hardware CRC stripping is active, a frame whose on-wire size is an
> > exact multiple of the Rx buffer size can cause the NIC to fill the final
> > data descriptor and place the four CRC bytes into a separate trailing
> > descriptor. After hardware stripping, that descriptor carries zero bytes
> > of payload.
> >
> > The existing CRC cleanup code only handles a zero-length trailing segment
> > when software CRC stripping is enabled. When hardware stripping is
> > active, the zero-length mbuf is silently chained to the reassembled
> > packet. Forwarding such a packet causes a zero-length Tx descriptor,
> > triggering a Malicious Driver Detection event on the PF and resetting
> > the VF.
> >
> > Fix by adding logic to detect a zero-length final segment when hardware
> > CRC stripping is active, and freeing it.
> >
> > Fixes: a2b29a7733ef ("net/avf: enable basic Rx Tx")
> > Fixes: b8b4c54ef9b0 ("net/iavf: support flexible Rx descriptor in normal
> path")
> > Cc: stable@dpdk.org
> >
> > Signed-off-by: Declan Doherty <declan.doherty@intel.com>
> > Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
> > ---
> >  drivers/net/intel/iavf/iavf_rxtx.c | 16 ++++++++++++++++
> >  1 file changed, 16 insertions(+)
> >
> > diff --git a/drivers/net/intel/iavf/iavf_rxtx.c
> b/drivers/net/intel/iavf/iavf_rxtx.c
> > index a57af7faed..86ebb2618d 100644
> > --- a/drivers/net/intel/iavf/iavf_rxtx.c
> > +++ b/drivers/net/intel/iavf/iavf_rxtx.c
> > @@ -1716,6 +1716,14 @@ iavf_recv_scattered_pkts_flex_rxd(void
> *rx_queue, struct rte_mbuf **rx_pkts,
> >  				rxm->data_len = (uint16_t)(rx_packet_len -
> >
> 	RTE_ETHER_CRC_LEN);
> >  			}
> > +		} else if (unlikely(rx_packet_len == 0)) {
> > +			/*
> > +			 * NIC split CRC bytes into a trailing segment which is
> > +			 * now empty after hardware CRC stripping. Free it.
> > +			 */
> > +			rte_pktmbuf_free_seg(rxm);
> > +			first_seg->nb_segs--;
> > +			last_seg->next = NULL;
> >  		}
> >
> 
> The vector paths also handle scattered packets (via reassembly). Do they
> need a fix for this? What about the other drivers that work on the PF, such
> as ice/i40e?

The vector paths use the common ci_rx_reassemble_packets which already
handles the zero-length trailing segment case correctly. When
crc_len == 0 and the last segment has data_len == 0, the empty segment
is freed.

The ice scalar path had the same issue but it was patched in 2022:
https://git.dpdk.org/dpdk/commit/?id=90ba4442058a14763e57ca96d03ab1e6044e3e5c
I cannot reproduce the behaviour on i40e hardware (either PF or VF) so I
don't think it needs to be patched as the HW seems to behave
differently.

> 
> /Bruce
> 
> >  		first_seg->port = rxq->port_id;
> > @@ -1884,6 +1892,14 @@ iavf_recv_scattered_pkts(void *rx_queue, struct
> rte_mbuf **rx_pkts,
> >  			} else
> >  				rxm->data_len = (uint16_t)(rx_packet_len -
> >
> 	RTE_ETHER_CRC_LEN);
> > +		} else if (unlikely(rx_packet_len == 0)) {
> > +			/*
> > +			 * NIC split CRC bytes into a trailing segment which is
> > +			 * now empty after hardware CRC stripping. Free it.
> > +			 */
> > +			rte_pktmbuf_free_seg(rxm);
> > +			first_seg->nb_segs--;
> > +			last_seg->next = NULL;
> >  		}
> >
> >  		first_seg->port = rxq->port_id;
> > --
> > 2.43.0
> >

^ permalink raw reply

* Re: [PATCH v2] app/testpmd: add padding mode to txonly engine
From: yangxingui @ 2026-06-15  9:12 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: dev, david.marchand, aman.deep.singh, fengchengwen, yangshuaisong,
	lihuisong, liuyonglong, kangfenglong
In-Reply-To: <20260612091343.14344ef7@phoenix.local>



On 2026/6/13 0:13, Stephen Hemminger wrote:
> On Fri, 12 Jun 2026 17:12:17 +0800
> Xingui Yang <yangxingui@huawei.com> wrote:
> 
>> Add a new padding mode to the txonly forwarding engine, which allows
>> sending packets with configurable small sizes without standard L2/L3
>> headers. This is useful for testing NIC padding logic.
>>
>> When padding mode is enabled via --tx-pkt-pad-mode flag:
>> - l2_len and l3_len are set to 0 instead of standard header lengths
>> - Packet data is filled with a static pattern instead of
>>    Ethernet/IP/UDP headers
>> - Minimum packet length validation is bypassed to allow small
>>    packet sizes (e.g., set txpkts 14)
>>
>> Signed-off-by: Xingui Yang <yangxingui@huawei.com>
>> Signed-off-by: Huisong Li <lihuisong@huawei.com>
>> ---
>> v2: Fix compilation exception of unterminated-string-initialization
>> ---
> 
> What about something like this (*not tested*) patch.

Hi Stephen,

Thank you for the valuable suggestion! Your approach is cleaner and
more elegant for generating runt frames with truncated headers.

However, our use case requires sending packets smaller than the
Ethernet header size (e.g., `set txpkts 9` or `set txpkts 2`).
These ultra-small packets are needed to test NIC padding logic where
no standard L2/L3 headers are present at all.

We could combine both approaches:
- Allow packet lengths down to 1 byte (remove the 14-byte minimum)
- Dynamically compute l2_len/l3_len based on actual packet length
- Use a fill pattern for packets too small to hold Ethernet header
- Keep the checksum offload handling from your patch

This way testpmd can support the full range:
- Normal packets (>= 14 + 20 + 8 bytes): full headers with checksum
- Runt frames (14-42 bytes): truncated headers, checksums disabled
- Ultra-small packets (<14 bytes): fill pattern only, l2_len/l3_len = 0

Would this combined approach be acceptable? We can submit a v3 patch
incorporating your suggestions plus support for ultra-small packets.

Thanks,
Xingui Yang

^ permalink raw reply

* Re: [PATCH v2] eal: add destructor to unregister tailq on unload
From: David Marchand @ 2026-06-15  7:57 UTC (permalink / raw)
  To: Stephen Hemminger, fengchengwen
  Cc: dev, stable, Bruce Richardson, Neil Horman
In-Reply-To: <20260610085749.7bb0a4f3@phoenix.local>

On Wed, 10 Jun 2026 at 17:58, Stephen Hemminger
<stephen@networkplumber.org> wrote:
>
> On Wed, 10 Jun 2026 09:19:42 +0800
> fengchengwen <fengchengwen@huawei.com> wrote:
>
> > >
> > > +RTE_EXPORT_SYMBOL(rte_eal_tailq_unregister)
> >
> > this should be with EXPERIMENTAL
>
> Not possible, this is part of the EAL_REGISTER_TAILQ macro and usage
> is under the covers. So if anything was marked experimental it would
> fail code that did not allow experimental

Indeed.


> > > +void
> > > +rte_eal_tailq_unregister(struct rte_tailq_elem *t)
> > > +{
> > > +   TAILQ_REMOVE(&rte_tailq_elem_head, t, next);
> >
> > We need first make sure it exist the tailq, just like TAILQ_FOREACH rte_eal_tailq_local_register()
>
> Ok cheap scan since not in critical path.

I had excluded this point when looking at the v2 patch, considering
that abort() is called in the constructor on failure, and destructors
are not called after abort().

Is your concern that rte_eal_tailq_unregister could be called
directory by the application with random pointer?
A check would be safer on paper, but it seems strange to protect here.

Am I missing another case?


-- 
David Marchand


^ permalink raw reply

* [PATCH v4 9/9] power: add lcore ID check for PMD mgmt
From: Huisong Li @ 2026-06-15  7:30 UTC (permalink / raw)
  To: thomas, anatoly.burakov, sivaprasad.tummala
  Cc: dev, stephen, fengchengwen, yangxingui, zhanjie9, lihuisong
In-Reply-To: <20260615073050.1996063-1-lihuisong@huawei.com>

The pmd_mgmt lib is mainly used together with the data plane of ethdev
PMD. The core in data plane is ROLE_RTE. So use rte_lcore_is_enabled
to verify it.

Fixes: 426511683762 ("power: add get/set min/max scaling frequencies API")
Cc: stable@dpdk.org

Signed-off-by: Huisong Li <lihuisong@huawei.com>
---
 lib/power/rte_power_pmd_mgmt.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/lib/power/rte_power_pmd_mgmt.c b/lib/power/rte_power_pmd_mgmt.c
index a4d53aac2a..a5fc1c3a94 100644
--- a/lib/power/rte_power_pmd_mgmt.c
+++ b/lib/power/rte_power_pmd_mgmt.c
@@ -511,7 +511,8 @@ rte_power_ethdev_pmgmt_queue_enable(unsigned int lcore_id, uint16_t port_id,
 
 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
 
-	if (queue_id >= RTE_MAX_QUEUES_PER_PORT || lcore_id >= RTE_MAX_LCORE) {
+	if (queue_id >= RTE_MAX_QUEUES_PER_PORT ||
+	    !rte_lcore_is_enabled(lcore_id)) {
 		ret = -EINVAL;
 		goto end;
 	}
@@ -627,7 +628,7 @@ rte_power_ethdev_pmgmt_queue_disable(unsigned int lcore_id,
 
 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
 
-	if (lcore_id >= RTE_MAX_LCORE || queue_id >= RTE_MAX_QUEUES_PER_PORT)
+	if (!rte_lcore_is_enabled(lcore_id) || queue_id >= RTE_MAX_QUEUES_PER_PORT)
 		return -EINVAL;
 
 	/* check if the queue is stopped */
@@ -729,8 +730,8 @@ RTE_EXPORT_SYMBOL(rte_power_pmd_mgmt_set_scaling_freq_min)
 int
 rte_power_pmd_mgmt_set_scaling_freq_min(unsigned int lcore, unsigned int min)
 {
-	if (lcore >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Invalid lcore ID: %u", lcore);
+	if (!rte_lcore_is_enabled(lcore)) {
+		POWER_LOG(ERR, "lcore id %u is not enabled", lcore);
 		return -EINVAL;
 	}
 
@@ -747,8 +748,8 @@ RTE_EXPORT_SYMBOL(rte_power_pmd_mgmt_set_scaling_freq_max)
 int
 rte_power_pmd_mgmt_set_scaling_freq_max(unsigned int lcore, unsigned int max)
 {
-	if (lcore >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Invalid lcore ID: %u", lcore);
+	if (!rte_lcore_is_enabled(lcore)) {
+		POWER_LOG(ERR, "lcore id %u is not enabled", lcore);
 		return -EINVAL;
 	}
 
@@ -769,8 +770,8 @@ RTE_EXPORT_SYMBOL(rte_power_pmd_mgmt_get_scaling_freq_min)
 int
 rte_power_pmd_mgmt_get_scaling_freq_min(unsigned int lcore)
 {
-	if (lcore >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Invalid lcore ID: %u", lcore);
+	if (!rte_lcore_is_enabled(lcore)) {
+		POWER_LOG(ERR, "lcore id %u is not enabled", lcore);
 		return -EINVAL;
 	}
 
@@ -784,8 +785,8 @@ RTE_EXPORT_SYMBOL(rte_power_pmd_mgmt_get_scaling_freq_max)
 int
 rte_power_pmd_mgmt_get_scaling_freq_max(unsigned int lcore)
 {
-	if (lcore >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Invalid lcore ID: %u", lcore);
+	if (!rte_lcore_is_enabled(lcore)) {
+		POWER_LOG(ERR, "lcore id %u is not enabled", lcore);
 		return -EINVAL;
 	}
 
-- 
2.33.0


^ permalink raw reply related

* [PATCH v4 8/9] power: allow the service core to config power QoS
From: Huisong Li @ 2026-06-15  7:30 UTC (permalink / raw)
  To: thomas, anatoly.burakov, sivaprasad.tummala
  Cc: dev, stephen, fengchengwen, yangxingui, zhanjie9, lihuisong
In-Reply-To: <20260615073050.1996063-1-lihuisong@huawei.com>

The lcore ID verification in power QoS API used to use
rte_lcore_is_enabled(), which only accepts the lcore with
ROLE_RTE role. But service core thread (ROLE_SERVICE) can
also use power QoS API.

So use RTE_POWER_VALID_LCOREID_OR_ERR_RET to verify the
lcore ID. This change makes the power QoS API accept both
ROLE_RTE and ROLE_SERVICE lcores.

Signed-off-by: Huisong Li <lihuisong@huawei.com>
---
 doc/guides/rel_notes/release_26_07.rst |  5 ++++-
 lib/power/rte_power_qos.c              | 10 ++--------
 2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/doc/guides/rel_notes/release_26_07.rst b/doc/guides/rel_notes/release_26_07.rst
index aeabb908ec..5eb3974ddb 100644
--- a/doc/guides/rel_notes/release_26_07.rst
+++ b/doc/guides/rel_notes/release_26_07.rst
@@ -158,7 +158,10 @@ New Features
 * **Added a common macro to verify lcore ID in power core.**
 
   Added the ``RTE_POWER_VALID_LCOREID_OR_ERR_RET`` macro to verify lcore ID
-  in power core.
+  in power core. The power QoS library also updated its lcore validation to
+  use this macro, so service cores (``ROLE_SERVICE``) are now permitted.
+
+
 
 Removed Items
 -------------
diff --git a/lib/power/rte_power_qos.c b/lib/power/rte_power_qos.c
index f991230532..d8d8d36a76 100644
--- a/lib/power/rte_power_qos.c
+++ b/lib/power/rte_power_qos.c
@@ -27,10 +27,7 @@ rte_power_qos_set_cpu_resume_latency(uint16_t lcore_id, int latency)
 	FILE *f;
 	int ret;
 
-	if (!rte_lcore_is_enabled(lcore_id)) {
-		POWER_LOG(ERR, "lcore id %u is not enabled", lcore_id);
-		return -EINVAL;
-	}
+	RTE_POWER_VALID_LCOREID_OR_ERR_RET(lcore_id, -EINVAL);
 	ret = power_get_lcore_mapped_cpu_id(lcore_id, &cpu_id);
 	if (ret != 0)
 		return ret;
@@ -82,10 +79,7 @@ rte_power_qos_get_cpu_resume_latency(uint16_t lcore_id)
 	FILE *f;
 	int ret;
 
-	if (!rte_lcore_is_enabled(lcore_id)) {
-		POWER_LOG(ERR, "lcore id %u is not enabled", lcore_id);
-		return -EINVAL;
-	}
+	RTE_POWER_VALID_LCOREID_OR_ERR_RET(lcore_id, -EINVAL);
 	ret = power_get_lcore_mapped_cpu_id(lcore_id, &cpu_id);
 	if (ret != 0)
 		return ret;
-- 
2.33.0


^ permalink raw reply related

* [PATCH v4 7/9] power/kvm_vm: remove redundant lcore ID checks
From: Huisong Li @ 2026-06-15  7:30 UTC (permalink / raw)
  To: thomas, anatoly.burakov, sivaprasad.tummala
  Cc: dev, stephen, fengchengwen, yangxingui, zhanjie9, lihuisong
In-Reply-To: <20260615073050.1996063-1-lihuisong@huawei.com>

Now that the cpufreq framework validates the lcore ID using
RTE_POWER_VALID_LCOREID_OR_ERR_RET() before dispatching to any driver,
each individual cpufreq driver no longer needs its own range
check against RTE_MAX_LCORE.

Remove the duplicated lcore ID checks from the kvm_vm cpufreq
driver and its guest_channel helper.

Signed-off-by: Huisong Li <lihuisong@huawei.com>
---
 drivers/power/kvm_vm/guest_channel.c | 22 ----------------------
 drivers/power/kvm_vm/kvm_vm.c        | 10 ----------
 2 files changed, 32 deletions(-)

diff --git a/drivers/power/kvm_vm/guest_channel.c b/drivers/power/kvm_vm/guest_channel.c
index 42bfcedb56..dc8fe05fef 100644
--- a/drivers/power/kvm_vm/guest_channel.c
+++ b/drivers/power/kvm_vm/guest_channel.c
@@ -61,11 +61,6 @@ guest_channel_host_connect(const char *path, unsigned int lcore_id)
 	char fd_path[PATH_MAX];
 	int fd = -1;
 
-	if (lcore_id >= RTE_MAX_LCORE) {
-		GUEST_CHANNEL_LOG(ERR, "Channel(%u) is out of range 0...%d",
-				lcore_id, RTE_MAX_LCORE-1);
-		return -1;
-	}
 	/* check if path is already open */
 	if (global_fds[lcore_id] != -1) {
 		GUEST_CHANNEL_LOG(ERR, "Channel(%u) is already open with fd %d",
@@ -127,12 +122,6 @@ guest_channel_send_msg(struct rte_power_channel_packet *pkt,
 	int ret, buffer_len = sizeof(*pkt);
 	void *buffer = pkt;
 
-	if (lcore_id >= RTE_MAX_LCORE) {
-		GUEST_CHANNEL_LOG(ERR, "Channel(%u) is out of range 0...%d",
-				lcore_id, RTE_MAX_LCORE-1);
-		return -1;
-	}
-
 	if (global_fds[lcore_id] < 0) {
 		GUEST_CHANNEL_LOG(ERR, "Channel is not connected");
 		return -1;
@@ -169,12 +158,6 @@ int power_guest_channel_read_msg(void *pkt,
 	if (pkt_len == 0 || pkt == NULL)
 		return -1;
 
-	if (lcore_id >= RTE_MAX_LCORE) {
-		GUEST_CHANNEL_LOG(ERR, "Channel(%u) is out of range 0...%d",
-				lcore_id, RTE_MAX_LCORE-1);
-		return -1;
-	}
-
 	if (global_fds[lcore_id] < 0) {
 		GUEST_CHANNEL_LOG(ERR, "Channel is not connected");
 		return -1;
@@ -225,11 +208,6 @@ int rte_power_guest_channel_receive_msg(void *pkt,
 void
 guest_channel_host_disconnect(unsigned int lcore_id)
 {
-	if (lcore_id >= RTE_MAX_LCORE) {
-		GUEST_CHANNEL_LOG(ERR, "Channel(%u) is out of range 0...%d",
-				lcore_id, RTE_MAX_LCORE-1);
-		return;
-	}
 	if (global_fds[lcore_id] < 0)
 		return;
 	close(global_fds[lcore_id]);
diff --git a/drivers/power/kvm_vm/kvm_vm.c b/drivers/power/kvm_vm/kvm_vm.c
index 5754a441cd..e8b454bb55 100644
--- a/drivers/power/kvm_vm/kvm_vm.c
+++ b/drivers/power/kvm_vm/kvm_vm.c
@@ -24,11 +24,6 @@ power_kvm_vm_check_supported(void)
 int
 power_kvm_vm_init(unsigned int lcore_id)
 {
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Core(%u) is out of range 0...%d",
-				lcore_id, RTE_MAX_LCORE-1);
-		return -1;
-	}
 	pkt[lcore_id].command = RTE_POWER_CPU_POWER;
 	pkt[lcore_id].resource_id = lcore_id;
 	return guest_channel_host_connect(FD_PATH, lcore_id);
@@ -73,11 +68,6 @@ send_msg(unsigned int lcore_id, uint32_t scale_direction)
 {
 	int ret;
 
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Core(%u) is out of range 0...%d",
-				lcore_id, RTE_MAX_LCORE-1);
-		return -1;
-	}
 	pkt[lcore_id].unit = scale_direction;
 	ret = guest_channel_send_msg(&pkt[lcore_id], lcore_id);
 	if (ret == 0)
-- 
2.33.0


^ permalink raw reply related

* [PATCH v4 6/9] power/intel_pstate: remove redundant lcore ID checks
From: Huisong Li @ 2026-06-15  7:30 UTC (permalink / raw)
  To: thomas, anatoly.burakov, sivaprasad.tummala
  Cc: dev, stephen, fengchengwen, yangxingui, zhanjie9, lihuisong
In-Reply-To: <20260615073050.1996063-1-lihuisong@huawei.com>

Now that the cpufreq framework validates the lcore ID using
RTE_POWER_VALID_LCOREID_OR_ERR_RET() before dispatching to any driver,
each individual cpufreq driver no longer needs its own range
check against RTE_MAX_LCORE.

Remove the duplicated lcore ID checks from the intel_pstate
cpufreq driver ops.

Signed-off-by: Huisong Li <lihuisong@huawei.com>
---
 .../power/intel_pstate/intel_pstate_cpufreq.c | 65 -------------------
 1 file changed, 65 deletions(-)

diff --git a/drivers/power/intel_pstate/intel_pstate_cpufreq.c b/drivers/power/intel_pstate/intel_pstate_cpufreq.c
index 22a1b4465a..dfbb5635a1 100644
--- a/drivers/power/intel_pstate/intel_pstate_cpufreq.c
+++ b/drivers/power/intel_pstate/intel_pstate_cpufreq.c
@@ -540,12 +540,6 @@ power_pstate_cpufreq_init(unsigned int lcore_id)
 		return -1;
 	}
 
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Lcore id %u can not exceed %u",
-				lcore_id, RTE_MAX_LCORE - 1U);
-		return -1;
-	}
-
 	pi = &lcore_power_info[lcore_id];
 	exp_state = POWER_IDLE;
 	/* The power in use state works as a guard variable between
@@ -622,11 +616,6 @@ power_pstate_cpufreq_exit(unsigned int lcore_id)
 	struct pstate_power_info *pi;
 	uint32_t exp_state;
 
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Lcore id %u can not exceeds %u",
-				lcore_id, RTE_MAX_LCORE - 1U);
-		return -1;
-	}
 	pi = &lcore_power_info[lcore_id];
 
 	exp_state = POWER_USED;
@@ -680,11 +669,6 @@ power_pstate_cpufreq_freqs(unsigned int lcore_id, uint32_t *freqs, uint32_t num)
 {
 	struct pstate_power_info *pi;
 
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Invalid lcore ID");
-		return 0;
-	}
-
 	if (freqs == NULL) {
 		POWER_LOG(ERR, "NULL buffer supplied");
 		return 0;
@@ -703,11 +687,6 @@ power_pstate_cpufreq_freqs(unsigned int lcore_id, uint32_t *freqs, uint32_t num)
 uint32_t
 power_pstate_cpufreq_get_freq(unsigned int lcore_id)
 {
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Invalid lcore ID");
-		return RTE_POWER_INVALID_FREQ_INDEX;
-	}
-
 	return lcore_power_info[lcore_id].curr_idx;
 }
 
@@ -715,11 +694,6 @@ power_pstate_cpufreq_get_freq(unsigned int lcore_id)
 int
 power_pstate_cpufreq_set_freq(unsigned int lcore_id, uint32_t index)
 {
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Invalid lcore ID");
-		return -1;
-	}
-
 	return set_freq_internal(&(lcore_power_info[lcore_id]), index);
 }
 
@@ -728,11 +702,6 @@ power_pstate_cpufreq_freq_up(unsigned int lcore_id)
 {
 	struct pstate_power_info *pi;
 
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Invalid lcore ID");
-		return -1;
-	}
-
 	pi = &lcore_power_info[lcore_id];
 	if (pi->curr_idx == 0 ||
 	    (pi->curr_idx == 1 && pi->turbo_available && !pi->turbo_enable))
@@ -747,11 +716,6 @@ power_pstate_cpufreq_freq_down(unsigned int lcore_id)
 {
 	struct pstate_power_info *pi;
 
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Invalid lcore ID");
-		return -1;
-	}
-
 	pi = &lcore_power_info[lcore_id];
 	if (pi->curr_idx + 1 == pi->nb_freqs)
 		return 0;
@@ -763,11 +727,6 @@ power_pstate_cpufreq_freq_down(unsigned int lcore_id)
 int
 power_pstate_cpufreq_freq_max(unsigned int lcore_id)
 {
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Invalid lcore ID");
-		return -1;
-	}
-
 	/* Frequencies in the array are from high to low. */
 	if (lcore_power_info[lcore_id].turbo_available) {
 		if (lcore_power_info[lcore_id].turbo_enable)
@@ -788,11 +747,6 @@ power_pstate_cpufreq_freq_min(unsigned int lcore_id)
 {
 	struct pstate_power_info *pi;
 
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Invalid lcore ID");
-		return -1;
-	}
-
 	pi = &lcore_power_info[lcore_id];
 
 	/* Frequencies in the array are from high to low. */
@@ -805,11 +759,6 @@ power_pstate_turbo_status(unsigned int lcore_id)
 {
 	struct pstate_power_info *pi;
 
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Invalid lcore ID");
-		return -1;
-	}
-
 	pi = &lcore_power_info[lcore_id];
 
 	return pi->turbo_enable;
@@ -820,11 +769,6 @@ power_pstate_enable_turbo(unsigned int lcore_id)
 {
 	struct pstate_power_info *pi;
 
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Invalid lcore ID");
-		return -1;
-	}
-
 	pi = &lcore_power_info[lcore_id];
 
 	if (pi->turbo_available)
@@ -846,11 +790,6 @@ power_pstate_disable_turbo(unsigned int lcore_id)
 {
 	struct pstate_power_info *pi;
 
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Invalid lcore ID");
-		return -1;
-	}
-
 	pi = &lcore_power_info[lcore_id];
 
 	pi->turbo_enable = 0;
@@ -874,10 +813,6 @@ int power_pstate_get_capabilities(unsigned int lcore_id,
 {
 	struct pstate_power_info *pi;
 
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Invalid lcore ID");
-		return -1;
-	}
 	if (caps == NULL) {
 		POWER_LOG(ERR, "Invalid argument");
 		return -1;
-- 
2.33.0


^ permalink raw reply related

* [PATCH v4 5/9] power/cppc: remove redundant lcore ID checks
From: Huisong Li @ 2026-06-15  7:30 UTC (permalink / raw)
  To: thomas, anatoly.burakov, sivaprasad.tummala
  Cc: dev, stephen, fengchengwen, yangxingui, zhanjie9, lihuisong
In-Reply-To: <20260615073050.1996063-1-lihuisong@huawei.com>

Now that the cpufreq framework validates the lcore ID using
RTE_POWER_VALID_LCOREID_OR_ERR_RET() before dispatching to any driver,
each individual cpufreq driver no longer needs its own range
check against RTE_MAX_LCORE.

Remove the duplicated lcore ID checks from the cppc cpufreq
driver ops.

Signed-off-by: Huisong Li <lihuisong@huawei.com>
---
 drivers/power/cppc/cppc_cpufreq.c | 65 -------------------------------
 1 file changed, 65 deletions(-)

diff --git a/drivers/power/cppc/cppc_cpufreq.c b/drivers/power/cppc/cppc_cpufreq.c
index 9ae25bad27..aed44c1212 100644
--- a/drivers/power/cppc/cppc_cpufreq.c
+++ b/drivers/power/cppc/cppc_cpufreq.c
@@ -337,12 +337,6 @@ power_cppc_cpufreq_init(unsigned int lcore_id)
 		return -1;
 	}
 
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Lcore id %u can not exceeds %u",
-				lcore_id, RTE_MAX_LCORE - 1U);
-		return -1;
-	}
-
 	pi = &lcore_power_info[lcore_id];
 	exp_state = POWER_IDLE;
 	/* The power in use state works as a guard variable between
@@ -420,11 +414,6 @@ power_cppc_cpufreq_exit(unsigned int lcore_id)
 	struct cppc_power_info *pi;
 	uint32_t exp_state;
 
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Lcore id %u can not exceeds %u",
-				lcore_id, RTE_MAX_LCORE - 1U);
-		return -1;
-	}
 	pi = &lcore_power_info[lcore_id];
 	exp_state = POWER_USED;
 	/* The power in use state works as a guard variable between
@@ -470,11 +459,6 @@ power_cppc_cpufreq_freqs(unsigned int lcore_id, uint32_t *freqs, uint32_t num)
 {
 	struct cppc_power_info *pi;
 
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Invalid lcore ID");
-		return 0;
-	}
-
 	if (freqs == NULL) {
 		POWER_LOG(ERR, "NULL buffer supplied");
 		return 0;
@@ -493,22 +477,12 @@ power_cppc_cpufreq_freqs(unsigned int lcore_id, uint32_t *freqs, uint32_t num)
 uint32_t
 power_cppc_cpufreq_get_freq(unsigned int lcore_id)
 {
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Invalid lcore ID");
-		return RTE_POWER_INVALID_FREQ_INDEX;
-	}
-
 	return lcore_power_info[lcore_id].curr_idx;
 }
 
 int
 power_cppc_cpufreq_set_freq(unsigned int lcore_id, uint32_t index)
 {
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Invalid lcore ID");
-		return -1;
-	}
-
 	return set_freq_internal(&(lcore_power_info[lcore_id]), index);
 }
 
@@ -517,11 +491,6 @@ power_cppc_cpufreq_freq_down(unsigned int lcore_id)
 {
 	struct cppc_power_info *pi;
 
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Invalid lcore ID");
-		return -1;
-	}
-
 	pi = &lcore_power_info[lcore_id];
 	if (pi->curr_idx + 1 == pi->nb_freqs)
 		return 0;
@@ -535,11 +504,6 @@ power_cppc_cpufreq_freq_up(unsigned int lcore_id)
 {
 	struct cppc_power_info *pi;
 
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Invalid lcore ID");
-		return -1;
-	}
-
 	pi = &lcore_power_info[lcore_id];
 	if (pi->curr_idx == 0 || (pi->curr_idx == 1 &&
 		pi->turbo_available && !pi->turbo_enable))
@@ -552,11 +516,6 @@ power_cppc_cpufreq_freq_up(unsigned int lcore_id)
 int
 power_cppc_cpufreq_freq_max(unsigned int lcore_id)
 {
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Invalid lcore ID");
-		return -1;
-	}
-
 	/* Frequencies in the array are from high to low. */
 	if (lcore_power_info[lcore_id].turbo_available) {
 		if (lcore_power_info[lcore_id].turbo_enable)
@@ -576,11 +535,6 @@ power_cppc_cpufreq_freq_min(unsigned int lcore_id)
 {
 	struct cppc_power_info *pi;
 
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Invalid lcore ID");
-		return -1;
-	}
-
 	pi = &lcore_power_info[lcore_id];
 
 	/* Frequencies in the array are from high to low. */
@@ -592,11 +546,6 @@ power_cppc_turbo_status(unsigned int lcore_id)
 {
 	struct cppc_power_info *pi;
 
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Invalid lcore ID");
-		return -1;
-	}
-
 	pi = &lcore_power_info[lcore_id];
 
 	return pi->turbo_enable;
@@ -607,11 +556,6 @@ power_cppc_enable_turbo(unsigned int lcore_id)
 {
 	struct cppc_power_info *pi;
 
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Invalid lcore ID");
-		return -1;
-	}
-
 	pi = &lcore_power_info[lcore_id];
 
 	if (pi->turbo_available)
@@ -643,11 +587,6 @@ power_cppc_disable_turbo(unsigned int lcore_id)
 {
 	struct cppc_power_info *pi;
 
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Invalid lcore ID");
-		return -1;
-	}
-
 	pi = &lcore_power_info[lcore_id];
 
 	pi->turbo_enable = 0;
@@ -671,10 +610,6 @@ power_cppc_get_capabilities(unsigned int lcore_id,
 {
 	struct cppc_power_info *pi;
 
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Invalid lcore ID");
-		return -1;
-	}
 	if (caps == NULL) {
 		POWER_LOG(ERR, "Invalid argument");
 		return -1;
-- 
2.33.0


^ permalink raw reply related

* [PATCH v4 4/9] power/amd_pstate: remove redundant lcore ID checks
From: Huisong Li @ 2026-06-15  7:30 UTC (permalink / raw)
  To: thomas, anatoly.burakov, sivaprasad.tummala
  Cc: dev, stephen, fengchengwen, yangxingui, zhanjie9, lihuisong
In-Reply-To: <20260615073050.1996063-1-lihuisong@huawei.com>

Now that the cpufreq framework validates the lcore ID using
RTE_POWER_VALID_LCOREID_OR_ERR_RET() before dispatching to any driver,
each individual cpufreq driver no longer needs its own range
check against RTE_MAX_LCORE.

Remove the duplicated lcore ID checks from the amd_pstate
cpufreq driver ops.

Signed-off-by: Huisong Li <lihuisong@huawei.com>
---
 drivers/power/amd_pstate/amd_pstate_cpufreq.c | 65 -------------------
 1 file changed, 65 deletions(-)

diff --git a/drivers/power/amd_pstate/amd_pstate_cpufreq.c b/drivers/power/amd_pstate/amd_pstate_cpufreq.c
index bc67981d71..af9c1309f3 100644
--- a/drivers/power/amd_pstate/amd_pstate_cpufreq.c
+++ b/drivers/power/amd_pstate/amd_pstate_cpufreq.c
@@ -351,12 +351,6 @@ power_amd_pstate_cpufreq_init(unsigned int lcore_id)
 		return -1;
 	}
 
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Lcore id %u can not exceeds %u",
-				lcore_id, RTE_MAX_LCORE - 1U);
-		return -1;
-	}
-
 	pi = &lcore_power_info[lcore_id];
 	exp_state = POWER_IDLE;
 	/* The power in use state works as a guard variable between
@@ -434,11 +428,6 @@ power_amd_pstate_cpufreq_exit(unsigned int lcore_id)
 	struct amd_pstate_power_info *pi;
 	uint32_t exp_state;
 
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Lcore id %u can not exceeds %u",
-				lcore_id, RTE_MAX_LCORE - 1U);
-		return -1;
-	}
 	pi = &lcore_power_info[lcore_id];
 	exp_state = POWER_USED;
 	/* The power in use state works as a guard variable between
@@ -484,11 +473,6 @@ power_amd_pstate_cpufreq_freqs(unsigned int lcore_id, uint32_t *freqs, uint32_t
 {
 	struct amd_pstate_power_info *pi;
 
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Invalid lcore ID");
-		return 0;
-	}
-
 	if (freqs == NULL) {
 		POWER_LOG(ERR, "NULL buffer supplied");
 		return 0;
@@ -507,22 +491,12 @@ power_amd_pstate_cpufreq_freqs(unsigned int lcore_id, uint32_t *freqs, uint32_t
 uint32_t
 power_amd_pstate_cpufreq_get_freq(unsigned int lcore_id)
 {
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Invalid lcore ID");
-		return RTE_POWER_INVALID_FREQ_INDEX;
-	}
-
 	return lcore_power_info[lcore_id].curr_idx;
 }
 
 int
 power_amd_pstate_cpufreq_set_freq(unsigned int lcore_id, uint32_t index)
 {
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Invalid lcore ID");
-		return -1;
-	}
-
 	return set_freq_internal(&(lcore_power_info[lcore_id]), index);
 }
 
@@ -531,11 +505,6 @@ power_amd_pstate_cpufreq_freq_down(unsigned int lcore_id)
 {
 	struct amd_pstate_power_info *pi;
 
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Invalid lcore ID");
-		return -1;
-	}
-
 	pi = &lcore_power_info[lcore_id];
 	if (pi->curr_idx + 1 == pi->nb_freqs)
 		return 0;
@@ -549,11 +518,6 @@ power_amd_pstate_cpufreq_freq_up(unsigned int lcore_id)
 {
 	struct amd_pstate_power_info *pi;
 
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Invalid lcore ID");
-		return -1;
-	}
-
 	pi = &lcore_power_info[lcore_id];
 	if (pi->curr_idx == 0 || (pi->curr_idx == pi->nom_idx &&
 		pi->turbo_available && !pi->turbo_enable))
@@ -566,11 +530,6 @@ power_amd_pstate_cpufreq_freq_up(unsigned int lcore_id)
 int
 power_amd_pstate_cpufreq_freq_max(unsigned int lcore_id)
 {
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Invalid lcore ID");
-		return -1;
-	}
-
 	/* Frequencies in the array are from high to low. */
 	if (lcore_power_info[lcore_id].turbo_available) {
 		if (lcore_power_info[lcore_id].turbo_enable)
@@ -591,11 +550,6 @@ power_amd_pstate_cpufreq_freq_min(unsigned int lcore_id)
 {
 	struct amd_pstate_power_info *pi;
 
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Invalid lcore ID");
-		return -1;
-	}
-
 	pi = &lcore_power_info[lcore_id];
 
 	/* Frequencies in the array are from high to low. */
@@ -607,11 +561,6 @@ power_amd_pstate_turbo_status(unsigned int lcore_id)
 {
 	struct amd_pstate_power_info *pi;
 
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Invalid lcore ID");
-		return -1;
-	}
-
 	pi = &lcore_power_info[lcore_id];
 
 	return pi->turbo_enable;
@@ -622,11 +571,6 @@ power_amd_pstate_enable_turbo(unsigned int lcore_id)
 {
 	struct amd_pstate_power_info *pi;
 
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Invalid lcore ID");
-		return -1;
-	}
-
 	pi = &lcore_power_info[lcore_id];
 
 	if (pi->turbo_available)
@@ -658,11 +602,6 @@ power_amd_pstate_disable_turbo(unsigned int lcore_id)
 {
 	struct amd_pstate_power_info *pi;
 
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Invalid lcore ID");
-		return -1;
-	}
-
 	pi = &lcore_power_info[lcore_id];
 
 	pi->turbo_enable = 0;
@@ -686,10 +625,6 @@ power_amd_pstate_get_capabilities(unsigned int lcore_id,
 {
 	struct amd_pstate_power_info *pi;
 
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Invalid lcore ID");
-		return -1;
-	}
 	if (caps == NULL) {
 		POWER_LOG(ERR, "Invalid argument");
 		return -1;
-- 
2.33.0


^ permalink raw reply related

* [PATCH v4 1/9] power: add a common macro to verify lcore ID
From: Huisong Li @ 2026-06-15  7:30 UTC (permalink / raw)
  To: thomas, anatoly.burakov, sivaprasad.tummala
  Cc: dev, stephen, fengchengwen, yangxingui, zhanjie9, lihuisong
In-Reply-To: <20260615073050.1996063-1-lihuisong@huawei.com>

There are many places to verify lcore ID in power. It is necessary
to add a common macro in power core.

According to the applicattion in l3fwd-power, the lcore must be at
least within the RTE_MAX_LCORE range and be a core of the ROLE_RTE
role. But the service on the ROLE_SERVICE core also can use and
require power management feature.

So this common macro restricts that the lcore ID must be ROLE_RTE
and ROLE_SERVICE.

Signed-off-by: Huisong Li <lihuisong@huawei.com>
---
 doc/guides/rel_notes/release_26_07.rst | 4 ++++
 lib/power/power_common.h               | 8 ++++++++
 2 files changed, 12 insertions(+)

diff --git a/doc/guides/rel_notes/release_26_07.rst b/doc/guides/rel_notes/release_26_07.rst
index 5d7aa8d1bf..aeabb908ec 100644
--- a/doc/guides/rel_notes/release_26_07.rst
+++ b/doc/guides/rel_notes/release_26_07.rst
@@ -155,6 +155,10 @@ New Features
   Added AGENTS.md file for AI review
   and supporting scripts to review patches and documentation.
 
+* **Added a common macro to verify lcore ID in power core.**
+
+  Added the ``RTE_POWER_VALID_LCOREID_OR_ERR_RET`` macro to verify lcore ID
+  in power core.
 
 Removed Items
 -------------
diff --git a/lib/power/power_common.h b/lib/power/power_common.h
index e2d5b68a17..370c5246c6 100644
--- a/lib/power/power_common.h
+++ b/lib/power/power_common.h
@@ -25,6 +25,14 @@ extern int rte_power_logtype;
 
 #define POWER_CONVERT_TO_DECIMAL 10
 
+#define RTE_POWER_VALID_LCOREID_OR_ERR_RET(lcore_id, retval) do {   \
+	if (rte_eal_lcore_role(lcore_id) != ROLE_RTE &&             \
+	    rte_eal_lcore_role(lcore_id) != ROLE_SERVICE) {         \
+		POWER_LOG(ERR, "lcore id %u is invalid", lcore_id); \
+		return retval;                                      \
+	}                                                           \
+} while (0)
+
 /* check if scaling driver matches one we want */
 __rte_internal
 int cpufreq_check_scaling_driver(const char *driver);
-- 
2.33.0


^ permalink raw reply related

* [PATCH v4 3/9] power/acpi: remove redundant lcore ID checks
From: Huisong Li @ 2026-06-15  7:30 UTC (permalink / raw)
  To: thomas, anatoly.burakov, sivaprasad.tummala
  Cc: dev, stephen, fengchengwen, yangxingui, zhanjie9, lihuisong
In-Reply-To: <20260615073050.1996063-1-lihuisong@huawei.com>

Now that the cpufreq framework validates the lcore ID using
RTE_POWER_VALID_LCOREID_OR_ERR_RET() before dispatching to any driver,
each individual cpufreq driver no longer needs its own range
check against RTE_MAX_LCORE.

Remove the duplicated lcore ID checks from the acpi cpufreq
driver ops.

Signed-off-by: Huisong Li <lihuisong@huawei.com>
---
 drivers/power/acpi/acpi_cpufreq.c | 65 -------------------------------
 1 file changed, 65 deletions(-)

diff --git a/drivers/power/acpi/acpi_cpufreq.c b/drivers/power/acpi/acpi_cpufreq.c
index 875c66336d..af85a8cdec 100644
--- a/drivers/power/acpi/acpi_cpufreq.c
+++ b/drivers/power/acpi/acpi_cpufreq.c
@@ -234,12 +234,6 @@ power_acpi_cpufreq_init(unsigned int lcore_id)
 		return -1;
 	}
 
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Lcore id %u can not exceeds %u",
-				lcore_id, RTE_MAX_LCORE - 1U);
-		return -1;
-	}
-
 	pi = &lcore_power_info[lcore_id];
 	exp_state = POWER_IDLE;
 	/* The power in use state works as a guard variable between
@@ -311,11 +305,6 @@ power_acpi_cpufreq_exit(unsigned int lcore_id)
 	struct acpi_power_info *pi;
 	uint32_t exp_state;
 
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Lcore id %u can not exceeds %u",
-				lcore_id, RTE_MAX_LCORE - 1U);
-		return -1;
-	}
 	pi = &lcore_power_info[lcore_id];
 	exp_state = POWER_USED;
 	/* The power in use state works as a guard variable between
@@ -365,11 +354,6 @@ power_acpi_cpufreq_freqs(unsigned int lcore_id, uint32_t *freqs, uint32_t num)
 {
 	struct acpi_power_info *pi;
 
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Invalid lcore ID");
-		return 0;
-	}
-
 	if (freqs == NULL) {
 		POWER_LOG(ERR, "NULL buffer supplied");
 		return 0;
@@ -388,22 +372,12 @@ power_acpi_cpufreq_freqs(unsigned int lcore_id, uint32_t *freqs, uint32_t num)
 uint32_t
 power_acpi_cpufreq_get_freq(unsigned int lcore_id)
 {
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Invalid lcore ID");
-		return RTE_POWER_INVALID_FREQ_INDEX;
-	}
-
 	return lcore_power_info[lcore_id].curr_idx;
 }
 
 int
 power_acpi_cpufreq_set_freq(unsigned int lcore_id, uint32_t index)
 {
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Invalid lcore ID");
-		return -1;
-	}
-
 	return set_freq_internal(&(lcore_power_info[lcore_id]), index);
 }
 
@@ -412,11 +386,6 @@ power_acpi_cpufreq_freq_down(unsigned int lcore_id)
 {
 	struct acpi_power_info *pi;
 
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Invalid lcore ID");
-		return -1;
-	}
-
 	pi = &lcore_power_info[lcore_id];
 	if (pi->curr_idx + 1 == pi->nb_freqs)
 		return 0;
@@ -430,11 +399,6 @@ power_acpi_cpufreq_freq_up(unsigned int lcore_id)
 {
 	struct acpi_power_info *pi;
 
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Invalid lcore ID");
-		return -1;
-	}
-
 	pi = &lcore_power_info[lcore_id];
 	if (pi->curr_idx == 0 ||
 	    (pi->curr_idx == 1 && pi->turbo_available && !pi->turbo_enable))
@@ -447,11 +411,6 @@ power_acpi_cpufreq_freq_up(unsigned int lcore_id)
 int
 power_acpi_cpufreq_freq_max(unsigned int lcore_id)
 {
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Invalid lcore ID");
-		return -1;
-	}
-
 	/* Frequencies in the array are from high to low. */
 	if (lcore_power_info[lcore_id].turbo_available) {
 		if (lcore_power_info[lcore_id].turbo_enable)
@@ -471,11 +430,6 @@ power_acpi_cpufreq_freq_min(unsigned int lcore_id)
 {
 	struct acpi_power_info *pi;
 
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Invalid lcore ID");
-		return -1;
-	}
-
 	pi = &lcore_power_info[lcore_id];
 
 	/* Frequencies in the array are from high to low. */
@@ -488,11 +442,6 @@ power_acpi_turbo_status(unsigned int lcore_id)
 {
 	struct acpi_power_info *pi;
 
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Invalid lcore ID");
-		return -1;
-	}
-
 	pi = &lcore_power_info[lcore_id];
 
 	return pi->turbo_enable;
@@ -504,11 +453,6 @@ power_acpi_enable_turbo(unsigned int lcore_id)
 {
 	struct acpi_power_info *pi;
 
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Invalid lcore ID");
-		return -1;
-	}
-
 	pi = &lcore_power_info[lcore_id];
 
 	if (pi->turbo_available)
@@ -537,11 +481,6 @@ power_acpi_disable_turbo(unsigned int lcore_id)
 {
 	struct acpi_power_info *pi;
 
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Invalid lcore ID");
-		return -1;
-	}
-
 	pi = &lcore_power_info[lcore_id];
 
 	 pi->turbo_enable = 0;
@@ -564,10 +503,6 @@ int power_acpi_get_capabilities(unsigned int lcore_id,
 {
 	struct acpi_power_info *pi;
 
-	if (lcore_id >= RTE_MAX_LCORE) {
-		POWER_LOG(ERR, "Invalid lcore ID");
-		return -1;
-	}
 	if (caps == NULL) {
 		POWER_LOG(ERR, "Invalid argument");
 		return -1;
-- 
2.33.0


^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox