DPDK-dev Archive on lore.kernel.org

DPDK-dev Archive on lore.kernel.org
 help / color / mirror / Atom feed

* Re: [PATCH v4 09/23] net/sxe2: support IPsec inline protocol offload
From: Stephen Hemminger @ 2026-06-19 20:54 UTC (permalink / raw)
  To: liujie5; +Cc: dev
In-Reply-To: <20260619080812.1543972-1-liujie5@linkdatatechnology.com>

On Fri, 19 Jun 2026 16:08:12 +0800
liujie5@linkdatatechnology.com wrote:

> diff --git a/drivers/net/sxe2/sxe2_cmd_chnl.c b/drivers/net/sxe2/sxe2_cmd_chnl.c
> index 19323ffcc4..7711e8e57d 100644
> --- a/drivers/net/sxe2/sxe2_cmd_chnl.c
> +++ b/drivers/net/sxe2/sxe2_cmd_chnl.c
> @@ -877,3 +877,200 @@ int32_t sxe2_drv_tm_commit(struct sxe2_adapter *adapter)
>  l_end:
...

> +int32_t sxe2_drv_ipsec_txsa_delete(struct sxe2_adapter *adapter,
> +					   uint16_t sa_id)
> +{
> +	struct sxe2_drv_ipsec_txsa_del_req req = { 0 };
> +	struct sxe2_drv_cmd_params cmd             = { 0 };
> +	struct sxe2_common_device *cdev = adapter->cdev;
> +	int32_t ret                                 = -1;
> +
> +	req.sa_idx = rte_cpu_to_le_16(sa_id);
> +	sxe2_drv_cmd_params_fill(adapter, &cmd, SXE2_DRV_CMD_IPSEC_TXSA_DEL,
> +				 &req, sizeof(req),
> +				 NULL, 0);
> +	ret = sxe2_drv_cmd_exec(cdev, &cmd);
> +	if (ret)
> +		PMD_DEV_LOG_ERR(adapter, DRV,
> +				"Failed to delete tx sa, sa id: %u, ret: %d.",
> +				sa_id, ret);
> +
> +	return ret;
> +}
> +

git merge doesn't like extra blank lines at end of file.
Applying: net/sxe2: support IPsec inline protocol offload
/home/shemminger/DPDK/main/.git/worktrees/sxe2/rebase-apply/patch:236: new blank line at EOF.
+
warning: 1 line adds whitespace errors.


^ permalink raw reply

* [PATCH v2] graph: add optional profiling stats
From: Morten Brørup @ 2026-06-19 20:25 UTC (permalink / raw)
  To: dev, Jerin Jacob, Kiran Kumar K, Nithin Dabilpuram, Zhirun Yan
  Cc: Morten Brørup

Added graph node profiling stats, build time configurable by enabling
RTE_GRAPH_PROFILE in rte_config.h.

Signed-off-by: Morten Brørup <mb@smartsharesystems.com>
---
v2:
* Fix indentation.
---
 config/rte_config.h                 |  1 +
 lib/graph/graph_debug.c             | 29 ++++++++++++++++++++++++++++-
 lib/graph/node.c                    |  2 ++
 lib/graph/rte_graph_worker_common.h | 23 ++++++++++++++++++++---
 4 files changed, 51 insertions(+), 4 deletions(-)

diff --git a/config/rte_config.h b/config/rte_config.h
index 0447cdf2ad..1942c1b1ec 100644
--- a/config/rte_config.h
+++ b/config/rte_config.h
@@ -106,6 +106,7 @@
 /* rte_graph defines */
 #define RTE_GRAPH_BURST_SIZE 256
 #define RTE_LIBRTE_GRAPH_STATS 1
+/* RTE_GRAPH_PROFILE is not set */
 
 /****** driver defines ********/
 
diff --git a/lib/graph/graph_debug.c b/lib/graph/graph_debug.c
index e3b8cccdc1..b1028f88ed 100644
--- a/lib/graph/graph_debug.c
+++ b/lib/graph/graph_debug.c
@@ -92,7 +92,34 @@ rte_graph_obj_dump(FILE *f, struct rte_graph *g, bool all)
 			fprintf(f, "       total_sched_fail=%" PRId64 "\n",
 				n->dispatch.total_sched_fail);
 		}
-		fprintf(f, "       total_calls=%" PRId64 "\n", n->total_calls);
+		fprintf(f, "       total_calls=%" PRIu64 "\n", n->total_calls);
+		fprintf(f, "       total_cycles=%" PRIu64 "\n", n->total_cycles);
+#ifdef RTE_GRAPH_PROFILE
+		uint64_t calls_2_or_more = n->total_calls -
+				(n->usage_stats[0].calls + n->usage_stats[1].calls);
+		double avg_objs_2_or_more = calls_2_or_more == 0 ? (double)2 :
+				(double)(n->total_objs - n->usage_stats[1].calls) /
+				(double)calls_2_or_more;
+		fprintf(f, "       calls_0=%" PRIu64 ", _1=%" PRIu64 ", _%.1f=%" PRIu64 "\n",
+				n->usage_stats[0].calls,
+				n->usage_stats[1].calls,
+				avg_objs_2_or_more,
+				calls_2_or_more);
+		fprintf(f, "       cycles_0=%" PRIu64 ", _1=%" PRIu64 ", _%.1f=%" PRIu64 "\n",
+				n->usage_stats[0].cycles,
+				n->usage_stats[1].cycles,
+				avg_objs_2_or_more,
+				n->total_cycles -
+				(n->usage_stats[0].cycles + n->usage_stats[1].cycles));
+		fprintf(f, "       cycles_per_call_1=%.1f, _%.1f=%.1f\n",
+				n->usage_stats[1].calls == 0 ? (double)0 :
+				(double)n->usage_stats[1].cycles / (double)n->usage_stats[1].calls,
+				avg_objs_2_or_more,
+				calls_2_or_more == 0 ? (double)0 :
+				(double)(n->total_cycles -
+				(n->usage_stats[0].cycles + n->usage_stats[1].cycles)) /
+				(double)calls_2_or_more);
+#endif
 		for (i = 0; i < n->nb_edges; i++)
 			fprintf(f, "          edge[%d] <%s>\n", i,
 				n->nodes[i]->name);
diff --git a/lib/graph/node.c b/lib/graph/node.c
index 1fce3e6632..19b38881ae 100644
--- a/lib/graph/node.c
+++ b/lib/graph/node.c
@@ -110,10 +110,12 @@ __rte_node_register(const struct rte_node_register *reg)
 	rte_edge_t i;
 	size_t sz;
 
+#ifndef RTE_GRAPH_PROFILE
 	/* Limit Node specific metadata to one cacheline on 64B CL machine */
 	RTE_BUILD_BUG_ON((offsetof(struct rte_node, nodes) -
 			  offsetof(struct rte_node, ctx)) !=
 			 RTE_CACHE_LINE_MIN_SIZE);
+#endif
 
 	graph_spinlock_lock();
 
diff --git a/lib/graph/rte_graph_worker_common.h b/lib/graph/rte_graph_worker_common.h
index 4ab53a533e..43ce23765b 100644
--- a/lib/graph/rte_graph_worker_common.h
+++ b/lib/graph/rte_graph_worker_common.h
@@ -144,12 +144,22 @@ struct __rte_cache_aligned rte_node {
 			rte_node_process_t process; /**< Process function. */
 			uint64_t process_u64;
 		};
+		/** Fast path area cache line 3. */
+#ifdef RTE_GRAPH_PROFILE
+		struct {
+			uint64_t calls;
+			uint64_t cycles;
+		} usage_stats[2];	/**< Usage when this node processed 0 or 1 objects. */
+		/** Fast path area cache line 4. */
+#endif
 		alignas(RTE_CACHE_LINE_MIN_SIZE) struct rte_node *nodes[]; /**< Next nodes. */
 	};
 };
 
+#ifndef RTE_GRAPH_PROFILE
 static_assert(offsetof(struct rte_node, nodes) - offsetof(struct rte_node, ctx)
 	== RTE_CACHE_LINE_MIN_SIZE, "rte_node fast path area must fit in 64 bytes");
+#endif
 
 /**
  * @internal
@@ -197,7 +207,7 @@ void __rte_node_stream_alloc_size(struct rte_graph *graph,
 static __rte_always_inline void
 __rte_node_process(struct rte_graph *graph, struct rte_node *node)
 {
-	uint64_t start;
+	uint64_t cycles;
 	uint16_t rc;
 	void **objs;
 
@@ -206,11 +216,18 @@ __rte_node_process(struct rte_graph *graph, struct rte_node *node)
 	rte_prefetch0(objs);
 
 	if (rte_graph_has_stats_feature()) {
-		start = rte_rdtsc();
+		cycles = -rte_rdtsc();
 		rc = node->process(graph, node, objs, node->idx);
-		node->total_cycles += rte_rdtsc() - start;
+		cycles += rte_rdtsc();
+		node->total_cycles += cycles;
 		node->total_calls++;
 		node->total_objs += rc;
+#ifdef RTE_GRAPH_PROFILE
+		if (rc <= 1) {
+			node->usage_stats[rc].calls++;
+			node->usage_stats[rc].cycles += cycles;
+		}
+#endif
 	} else {
 		node->process(graph, node, objs, node->idx);
 	}
-- 
2.43.0


^ permalink raw reply related

* [PATCH] graph: add optional profiling stats
From: Morten Brørup @ 2026-06-19 20:20 UTC (permalink / raw)
  To: dev, Jerin Jacob, Kiran Kumar K, Nithin Dabilpuram, Zhirun Yan
  Cc: Morten Brørup

Added graph node profiling stats, build time configurable by enabling
RTE_GRAPH_PROFILE in rte_config.h.

Signed-off-by: Morten Brørup <mb@smartsharesystems.com>
---
 config/rte_config.h                 |  1 +
 lib/graph/graph_debug.c             | 29 ++++++++++++++++++++++++++++-
 lib/graph/node.c                    |  2 ++
 lib/graph/rte_graph_worker_common.h | 23 ++++++++++++++++++++---
 4 files changed, 51 insertions(+), 4 deletions(-)

diff --git a/config/rte_config.h b/config/rte_config.h
index 0447cdf2ad..1942c1b1ec 100644
--- a/config/rte_config.h
+++ b/config/rte_config.h
@@ -106,6 +106,7 @@
 /* rte_graph defines */
 #define RTE_GRAPH_BURST_SIZE 256
 #define RTE_LIBRTE_GRAPH_STATS 1
+/* RTE_GRAPH_PROFILE is not set */
 
 /****** driver defines ********/
 
diff --git a/lib/graph/graph_debug.c b/lib/graph/graph_debug.c
index e3b8cccdc1..883e37707c 100644
--- a/lib/graph/graph_debug.c
+++ b/lib/graph/graph_debug.c
@@ -92,7 +92,34 @@ rte_graph_obj_dump(FILE *f, struct rte_graph *g, bool all)
 			fprintf(f, "       total_sched_fail=%" PRId64 "\n",
 				n->dispatch.total_sched_fail);
 		}
-		fprintf(f, "       total_calls=%" PRId64 "\n", n->total_calls);
+		fprintf(f, "       total_calls=%" PRIu64 "\n", n->total_calls);
+		fprintf(f, "       total_cycles=%" PRIu64 "\n", n->total_cycles);
+#ifdef RTE_GRAPH_PROFILE
+		uint64_t calls_2_or_more = n->total_calls -
+				(n->usage_stats[0].calls + n->usage_stats[1].calls);
+		double avg_objs_2_or_more = calls_2_or_more == 0 ? (double)2 :
+				(double)(n->total_objs - n->usage_stats[1].calls) /
+				(double)calls_2_or_more;
+		fprintf(f, "       calls_0=%" PRIu64 ", _1=%" PRIu64 ", _%.1f=%" PRIu64 "\n",
+				n->usage_stats[0].calls,
+				n->usage_stats[1].calls,
+				avg_objs_2_or_more,
+				calls_2_or_more);
+		fprintf(f, "       cycles_0=%" PRIu64 ", _1=%" PRIu64 ", _%.1f=%" PRIu64 "\n",
+				n->usage_stats[0].cycles,
+				n->usage_stats[1].cycles,
+				avg_objs_2_or_more,
+				n->total_cycles -
+				(n->usage_stats[0].cycles + n->usage_stats[1].cycles));
+		fprintf(f, "       cycles_per_call_1=%.1f, _%.1f=%.1f\n",
+				n->usage_stats[1].calls == 0 ? (double)0 :
+				(double)n->usage_stats[1].cycles / (double)n->usage_stats[1].calls,
+				avg_objs_2_or_more,
+				calls_2_or_more == 0 ? (double)0 :
+				(double)(n->total_cycles -
+				(n->usage_stats[0].cycles + n->usage_stats[1].cycles)) /
+                (double)calls_2_or_more);
+#endif
 		for (i = 0; i < n->nb_edges; i++)
 			fprintf(f, "          edge[%d] <%s>\n", i,
 				n->nodes[i]->name);
diff --git a/lib/graph/node.c b/lib/graph/node.c
index 1fce3e6632..19b38881ae 100644
--- a/lib/graph/node.c
+++ b/lib/graph/node.c
@@ -110,10 +110,12 @@ __rte_node_register(const struct rte_node_register *reg)
 	rte_edge_t i;
 	size_t sz;
 
+#ifndef RTE_GRAPH_PROFILE
 	/* Limit Node specific metadata to one cacheline on 64B CL machine */
 	RTE_BUILD_BUG_ON((offsetof(struct rte_node, nodes) -
 			  offsetof(struct rte_node, ctx)) !=
 			 RTE_CACHE_LINE_MIN_SIZE);
+#endif
 
 	graph_spinlock_lock();
 
diff --git a/lib/graph/rte_graph_worker_common.h b/lib/graph/rte_graph_worker_common.h
index 4ab53a533e..43ce23765b 100644
--- a/lib/graph/rte_graph_worker_common.h
+++ b/lib/graph/rte_graph_worker_common.h
@@ -144,12 +144,22 @@ struct __rte_cache_aligned rte_node {
 			rte_node_process_t process; /**< Process function. */
 			uint64_t process_u64;
 		};
+		/** Fast path area cache line 3. */
+#ifdef RTE_GRAPH_PROFILE
+		struct {
+			uint64_t calls;
+			uint64_t cycles;
+		} usage_stats[2];	/**< Usage when this node processed 0 or 1 objects. */
+		/** Fast path area cache line 4. */
+#endif
 		alignas(RTE_CACHE_LINE_MIN_SIZE) struct rte_node *nodes[]; /**< Next nodes. */
 	};
 };
 
+#ifndef RTE_GRAPH_PROFILE
 static_assert(offsetof(struct rte_node, nodes) - offsetof(struct rte_node, ctx)
 	== RTE_CACHE_LINE_MIN_SIZE, "rte_node fast path area must fit in 64 bytes");
+#endif
 
 /**
  * @internal
@@ -197,7 +207,7 @@ void __rte_node_stream_alloc_size(struct rte_graph *graph,
 static __rte_always_inline void
 __rte_node_process(struct rte_graph *graph, struct rte_node *node)
 {
-	uint64_t start;
+	uint64_t cycles;
 	uint16_t rc;
 	void **objs;
 
@@ -206,11 +216,18 @@ __rte_node_process(struct rte_graph *graph, struct rte_node *node)
 	rte_prefetch0(objs);
 
 	if (rte_graph_has_stats_feature()) {
-		start = rte_rdtsc();
+		cycles = -rte_rdtsc();
 		rc = node->process(graph, node, objs, node->idx);
-		node->total_cycles += rte_rdtsc() - start;
+		cycles += rte_rdtsc();
+		node->total_cycles += cycles;
 		node->total_calls++;
 		node->total_objs += rc;
+#ifdef RTE_GRAPH_PROFILE
+		if (rc <= 1) {
+			node->usage_stats[rc].calls++;
+			node->usage_stats[rc].cycles += cycles;
+		}
+#endif
 	} else {
 		node->process(graph, node, objs, node->idx);
 	}
-- 
2.43.0


^ permalink raw reply related

* RE: [PATCH v1 0/5] prefix lcore role enum values
From: Morten Brørup @ 2026-06-19 20:11 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: Thomas Monjalon, Huisong Li, andrew.rybchenko, dev, zhanjie9
In-Reply-To: <20260619083934.510bd2d4@phoenix.local>

> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> Sent: Friday, 19 June 2026 17.40
> 
> On Fri, 19 Jun 2026 09:54:51 +0200
> Morten Brørup <mb@smartsharesystems.com> wrote:
> 
> > > > The problem with this patch it causes build failures now with abi
> > > diff.
> > >
> > > It is probably a bug of an old version of abidiff.
> > > I recommend updating.
> >
> > With the #define's the ABI has not changed. It's probably too
> indirect for abidiff to understand.
> > If we absolutely want to please abidiff, we could keep the existing
> enums and #define RTE_LCORE_ROLE_RTE ROLE_RTE for now.
> > But I'm in favor of what was done already.
> 
> The build failures on github, not in my local builds.
> https://github.com/ovsrobot/dpdk/actions/runs/27789889172/job/822359650
> 90
> 
> It makes looking at patchwork dashboard difficult, all patches show up
> with red mark

So maybe we can choose the path of pleasing abidiff...
Keep the existing enums, and #define the new RTE_LCORE_ prefixed variants, and use those in the code.

Later, with an ABI breaking release, we can swap.
Or maybe we just wait until an ABI breaking release to fix this.


^ permalink raw reply

* [PATCH 10/10] net/enetc4: add cacheable BD ring support with SW cache maintenance
From: Gagandeep Singh @ 2026-06-19 18:44 UTC (permalink / raw)
  To: dev; +Cc: hemant.agrawal
In-Reply-To: <20260619184427.522518-1-g.singh@nxp.com>

On non-cache-coherent platforms such as i.MX95, the BD ring memory
may be mapped as cacheable (normal memory) while the ENETC hardware
DMA engine writes and reads descriptors without CPU cache snooping.
SW must therefore perform explicit cache maintenance to keep CPU
caches and DDR coherent.

TX path (enetc_xmit_pkts_cacheable):
  - Flush each segment's payload cache lines to PoC (dcbf) before
    the BD is handed to HW, so HW DMA reads the correct data.
  - After all BDs for a burst are written, flush the BD cache lines
    (dcbf, one per 64-byte group of 4 BDs) so HW can read the
    updated descriptors.

RX refill (enetc_refill_rx_ring):
  - After writing each full 4-BD cache-line group, dcbf that group
    so HW sees the buffer addresses and cleared lstatus fields.
  - Flush any partial trailing group before updating the ring tail.

RX receive (enetc_recv_pkts_cacheable via enetc_clean_rx_ring_cacheable):
  - Before reading BD status, dccivac the current BD cache line so
    stale CPU-cached BD data is discarded and fresh HW-written
    content is fetched from DDR.
  - After a BD is consumed, dccivac each payload cache line so the
    CPU reads the DMA'd packet data, not stale cached bytes.

Signed-off-by: Gagandeep Singh <g.singh@nxp.com>
---
 drivers/net/enetc/enetc.h         |  21 +++
 drivers/net/enetc/enetc4_ethdev.c |  40 +++--
 drivers/net/enetc/enetc_rxtx.c    | 274 ++++++++++++++++++++++++++++++
 3 files changed, 320 insertions(+), 15 deletions(-)

diff --git a/drivers/net/enetc/enetc.h b/drivers/net/enetc/enetc.h
index 99b1e91..9f98480 100644
--- a/drivers/net/enetc/enetc.h
+++ b/drivers/net/enetc/enetc.h
@@ -96,6 +96,7 @@ struct enetc_bdr {
 	uint64_t ierrors;
 	uint8_t rx_deferred_start;
 	uint8_t tx_deferred_start;
+	uint64_t bd_base_p;
 };
 
 struct enetc_eth_hw {
@@ -312,8 +313,28 @@ uint16_t enetc_recv_pkts(void *rxq, struct rte_mbuf **rx_pkts,
 		uint16_t nb_pkts);
 uint16_t enetc_recv_pkts_nc(void *rxq, struct rte_mbuf **rx_pkts,
 		uint16_t nb_pkts);
+uint16_t enetc_xmit_pkts_cacheable(void *txq, struct rte_mbuf **tx_pkts,
+		uint16_t nb_pkts);
+uint16_t enetc_recv_pkts_cacheable(void *rxq, struct rte_mbuf **rx_pkts,
+		uint16_t nb_pkts);
 
 int enetc_refill_rx_ring(struct enetc_bdr *rx_ring, const int buff_cnt);
+
+/*
+ * Cache-maintenance constants for cacheable BD ring mode.
+ *
+ * BD = 16 bytes, cache line = 64 bytes => 4 BDs per cache line.
+ * Every dcbf in enetc_refill_rx_ring() flushes a full 64-byte cache line.
+ * To ensure each dcbf covers only fully-written BDs the caller
+ * must pass a count rounded DOWN to a multiple of ENETC_BD_PER_CL so that
+ * the last partial group is left in cache to be completed and flushed in
+ * the next call.
+ */
+#define ENETC_BD_PER_CL		(RTE_CACHE_LINE_SIZE / sizeof(union enetc_rx_bd))
+#define ENETC_BD_PER_CL_MASK	(ENETC_BD_PER_CL - 1)
+/* Round n DOWN to the nearest multiple of ENETC_BD_PER_CL. */
+#define ENETC_BD_ALIGN_DOWN(n)	((n) & ~(unsigned int)ENETC_BD_PER_CL_MASK)
+
 void enetc4_dev_hw_init(struct rte_eth_dev *eth_dev);
 void enetc_print_ethaddr(const char *name, const struct rte_ether_addr *eth_addr);
 
diff --git a/drivers/net/enetc/enetc4_ethdev.c b/drivers/net/enetc/enetc4_ethdev.c
index d54051f..04dc306 100644
--- a/drivers/net/enetc/enetc4_ethdev.c
+++ b/drivers/net/enetc/enetc4_ethdev.c
@@ -281,12 +281,14 @@ enetc4_alloc_txbdr(struct enetc_bdr *txr, uint16_t nb_desc)
 	int size;
 
 	size = nb_desc * sizeof(struct enetc_swbd);
-	txr->q_swbd = rte_malloc(NULL, size, ENETC_BD_RING_ALIGN);
+	/* Zero q_swbd so buffer_addr is NULL for all uninitialized slots. */
+	txr->q_swbd = rte_zmalloc(NULL, size, ENETC_BD_RING_ALIGN);
 	if (txr->q_swbd == NULL)
 		return -ENOMEM;
 
-	size = nb_desc * sizeof(struct enetc_bdr);
-	txr->bd_base = rte_malloc(NULL, size, ENETC_BD_RING_ALIGN);
+	/* Allocate the TX BD ring: each BD is struct enetc_tx_bd (16 bytes). */
+	size = nb_desc * sizeof(struct enetc_tx_bd);
+	txr->bd_base = rte_zmalloc(NULL, size, ENETC_BD_RING_ALIGN);
 	if (txr->bd_base == NULL) {
 		rte_free(txr->q_swbd);
 		txr->q_swbd = NULL;
@@ -441,12 +443,14 @@ enetc4_alloc_rxbdr(struct enetc_bdr *rxr, uint16_t nb_desc)
 	int size;
 
 	size = nb_desc * sizeof(struct enetc_swbd);
-	rxr->q_swbd = rte_malloc(NULL, size, ENETC_BD_RING_ALIGN);
+	/* Zero q_swbd so buffer_addr is NULL for all uninitialized slots. */
+	rxr->q_swbd = rte_zmalloc(NULL, size, ENETC_BD_RING_ALIGN);
 	if (rxr->q_swbd == NULL)
 		return -ENOMEM;
 
-	size = nb_desc * sizeof(struct enetc_bdr);
-	rxr->bd_base = rte_malloc(NULL, size, ENETC_BD_RING_ALIGN);
+	/* Allocate the RX BD ring: each BD is union enetc_rx_bd (16 bytes). */
+	size = nb_desc * sizeof(union enetc_rx_bd);
+	rxr->bd_base = rte_zmalloc(NULL, size, ENETC_BD_RING_ALIGN);
 	if (rxr->bd_base == NULL) {
 		rte_free(rxr->q_swbd);
 		rxr->q_swbd = NULL;
@@ -481,7 +485,7 @@ enetc4_setup_rxbdr(struct enetc_hw *hw, struct enetc_bdr *rx_ring,
 	rx_ring->mb_pool = mb_pool;
 	rx_ring->rcir = (void *)((size_t)hw->reg +
 			ENETC_BDR(RX, idx, ENETC_RBCIR));
-	enetc_refill_rx_ring(rx_ring, (enetc_bd_unused(rx_ring)));
+	enetc_refill_rx_ring(rx_ring, ENETC_BD_ALIGN_DOWN(enetc_bd_unused(rx_ring)));
 	buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rx_ring->mb_pool) -
 		   RTE_PKTMBUF_HEADROOM);
 	enetc4_rxbdr_wr(hw, idx, ENETC_RBBSR, buf_size);
@@ -743,12 +747,17 @@ enetc4_dev_configure(struct rte_eth_dev *dev)
 
 	PMD_INIT_FUNC_TRACE();
 
-	max_len = dev->data->dev_conf.rxmode.mtu + RTE_ETHER_HDR_LEN +
-		  RTE_ETHER_CRC_LEN;
-	enetc4_port_wr(enetc_hw, ENETC4_PM_MAXFRM(0), ENETC_SET_MAXFRM(max_len));
+	/* Port-level register writes are PF-only; skip for VF devices */
+	if (hw->device_id != ENETC4_DEV_ID_VF) {
+		max_len = dev->data->dev_conf.rxmode.mtu + RTE_ETHER_HDR_LEN +
+			  RTE_ETHER_CRC_LEN;
+		enetc4_port_wr(enetc_hw, ENETC4_PM_MAXFRM(0),
+			       ENETC_SET_MAXFRM(max_len));
 
-	val = ENETC4_MAC_MAXFRM_SIZE | SDU_TYPE_MPDU;
-	enetc4_port_wr(enetc_hw, ENETC4_PTCTMSDUR(0), val | SDU_TYPE_MPDU);
+		val = ENETC4_MAC_MAXFRM_SIZE | SDU_TYPE_MPDU;
+		enetc4_port_wr(enetc_hw, ENETC4_PTCTMSDUR(0),
+			       val | SDU_TYPE_MPDU);
+	}
 
 	/* Rx offloads which are enabled by default */
 	if (dev_rx_offloads_sup & ~rx_offloads) {
@@ -770,7 +779,8 @@ enetc4_dev_configure(struct rte_eth_dev *dev)
 	if (rx_offloads & (RTE_ETH_RX_OFFLOAD_UDP_CKSUM | RTE_ETH_RX_OFFLOAD_TCP_CKSUM))
 		checksum &= ~L4_CKSUM;
 
-	enetc4_port_wr(enetc_hw, ENETC4_PARCSCR, checksum);
+	if (hw->device_id != ENETC4_DEV_ID_VF)
+		enetc4_port_wr(enetc_hw, ENETC4_PARCSCR, checksum);
 
 	/* Enable interrupts */
 	if (hw->device_id == ENETC4_DEV_ID_VF) {
@@ -1033,8 +1043,8 @@ enetc4_dev_hw_init(struct rte_eth_dev *eth_dev)
 		ENETC_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
 	struct rte_pci_device *pci_dev = RTE_CLASS_TO_BUS_DEVICE(eth_dev, *pci_dev);
 
-	eth_dev->rx_pkt_burst = &enetc_recv_pkts_nc;
-	eth_dev->tx_pkt_burst = &enetc_xmit_pkts_nc;
+	eth_dev->rx_pkt_burst = &enetc_recv_pkts_cacheable;
+	eth_dev->tx_pkt_burst = &enetc_xmit_pkts_cacheable;
 
 	/* Retrieving and storing the HW base address of device */
 	hw->hw.reg = (void *)pci_dev->mem_resource[0].addr;
diff --git a/drivers/net/enetc/enetc_rxtx.c b/drivers/net/enetc/enetc_rxtx.c
index a37c835..c737b22 100644
--- a/drivers/net/enetc/enetc_rxtx.c
+++ b/drivers/net/enetc/enetc_rxtx.c
@@ -26,6 +26,7 @@ enetc_clean_tx_ring(struct enetc_bdr *tx_ring)
 	struct enetc_swbd *tx_swbd, *tx_swbd_base;
 	int i, hwci, bd_count;
 	struct rte_mbuf *m[ENETC_RXBD_BUNDLE];
+	struct enetc_tx_bd *txbd;
 
 	/* we don't need barriers here, we just want a relatively current value
 	 * from HW.
@@ -51,6 +52,13 @@ enetc_clean_tx_ring(struct enetc_bdr *tx_ring)
 		/* It seems calling rte_pktmbuf_free is wasting a lot of cycles,
 		 * make a list and call _free when it's done.
 		 */
+		/* Clear flags on the reclaimed BD so that dcbf in the
+		 * cacheable TX path never flushes a stale flags_F to memory
+		 * before the new BD fields are fully written.
+		 */
+		txbd = ENETC_TXBD(*tx_ring, i);
+		txbd->flags = 0;
+
 		if (tx_frm_cnt == ENETC_RXBD_BUNDLE) {
 			rte_pktmbuf_free_bulk(m, tx_frm_cnt);
 			tx_frm_cnt = 0;
@@ -217,6 +225,7 @@ enetc_refill_rx_ring(struct enetc_bdr *rx_ring, const int buff_cnt)
 {
 	struct enetc_swbd *rx_swbd;
 	union enetc_rx_bd *rxbd;
+	union enetc_rx_bd *grp_start_rxbd;
 	int i, j, k = ENETC_RXBD_BUNDLE;
 	struct rte_mbuf *m[ENETC_RXBD_BUNDLE];
 	struct rte_mempool *mb_pool;
@@ -225,6 +234,7 @@ enetc_refill_rx_ring(struct enetc_bdr *rx_ring, const int buff_cnt)
 	mb_pool = rx_ring->mb_pool;
 	rx_swbd = &rx_ring->q_swbd[i];
 	rxbd = ENETC_RXBD(*rx_ring, i);
+	grp_start_rxbd = rxbd;
 	for (j = 0; j < buff_cnt; j++) {
 		/* bulk alloc for the next up to 8 BDs */
 		if (k == ENETC_RXBD_BUNDLE) {
@@ -246,12 +256,29 @@ enetc_refill_rx_ring(struct enetc_bdr *rx_ring, const int buff_cnt)
 		i++;
 		k++;
 		if (unlikely(i == rx_ring->bd_count)) {
+			/*
+			 * Ring wrap: flush the current partial or full group
+			 * before resetting the pointer to index 0.
+			 */
+			dcbf((void *)grp_start_rxbd);
 			i = 0;
 			rxbd = ENETC_RXBD(*rx_ring, i);
 			rx_swbd = &rx_ring->q_swbd[i];
+			grp_start_rxbd = rxbd;
+		} else if ((i & ENETC_BD_PER_CL_MASK) == 0) {
+			/*
+			 * Completed a full 4-BD group (one cache line).
+			 * Flush it to PoC so HW sees the updated descriptors.
+			 */
+			dcbf((void *)grp_start_rxbd);
+			grp_start_rxbd = rxbd;
 		}
 	}
 
+	/* Flush any remaining partial group at the end of the fill. */
+	if (j && (i & ENETC_BD_PER_CL_MASK) != 0)
+		dcbf((void *)grp_start_rxbd);
+
 	if (likely(j)) {
 		rx_ring->next_to_alloc = i;
 		rx_ring->next_to_use = i;
@@ -597,3 +624,250 @@ enetc_recv_pkts(void *rxq, struct rte_mbuf **rx_pkts,
 
 	return enetc_clean_rx_ring(rx_ring, rx_pkts, nb_pkts);
 }
+
+/* --- Cacheable BD ring TX path with SW cache maintenance (dcbf) --- */
+
+uint16_t
+enetc_xmit_pkts_cacheable(void *tx_queue,
+		struct rte_mbuf **tx_pkts,
+		uint16_t nb_pkts)
+{
+	int i, start, bds_to_use;
+	struct enetc_tx_bd *txbd;
+	struct enetc_bdr *tx_ring = (struct enetc_bdr *)tx_queue;
+	unsigned int j;
+	uint8_t *data;
+	struct rte_mbuf *seg;
+	uint16_t seg_len, segs_per_pkt;
+	bool is_first_seg;
+	int first_bd_idx, bd_count;
+
+	i = tx_ring->next_to_use;
+	bds_to_use = enetc_bd_unused(tx_ring);
+	bd_count = tx_ring->bd_count;
+	start = 0;
+
+	/*
+	 * Remember the first BD index of this batch so we can flush the
+	 * BD cache lines to PoC after all descriptors are written.
+	 */
+	first_bd_idx = i;
+
+	while (start < nb_pkts) {
+		seg = tx_pkts[start];
+		segs_per_pkt = seg->nb_segs;
+
+		if (bds_to_use < segs_per_pkt)
+			break;
+
+		is_first_seg = true;
+		while (seg) {
+			tx_ring->q_swbd[i].buffer_addr = NULL;
+			seg_len = rte_pktmbuf_data_len(seg);
+			data = rte_pktmbuf_mtod(seg, void *);
+
+			/*
+			 * Flush packet data cache lines to PoC so HW DMA
+			 * reads the correct payload from memory.
+			 */
+			for (j = 0; j < seg_len; j += RTE_CACHE_LINE_SIZE)
+				dcbf(data + j);
+
+			/*
+			 * Cover the last byte of an unaligned buffer to
+			 * ensure the full payload is clean to the Point of
+			 * Coherency.
+			 */
+			dcbf(data + (seg_len - 1));
+			txbd = ENETC_TXBD(*tx_ring, i);
+			txbd->flags = 0;
+			if (is_first_seg) {
+				tx_ring->q_swbd[i].buffer_addr = seg;
+				txbd->frm_len = rte_pktmbuf_pkt_len(seg);
+				if (seg->ol_flags & ENETC4_TX_CKSUM_OFFLOAD_MASK)
+					enetc4_tx_offload_checksum(seg, txbd);
+				is_first_seg = false;
+			}
+
+			txbd->buf_len = rte_cpu_to_le_16(seg_len);
+			txbd->addr = rte_cpu_to_le_64(rte_mbuf_data_iova(seg));
+			seg = seg->next;
+			i++;
+			bds_to_use--;
+
+			if (unlikely(i == bd_count))
+				i = 0;
+		}
+
+		/*
+		 * Set the frame-last flag on the final BD of this packet.
+		 * This is the last write to the BD group; the cache flush
+		 * below will push all BDs to memory afterwards.
+		 */
+		txbd->flags |= rte_cpu_to_le_16(ENETC4_TXBD_FLAGS_F);
+		start++;
+	}
+
+	/*
+	 * Flush TX BDs to PoC so HW (non-cache-coherent i.MX95) can read
+	 * the descriptors from memory.  TX BDs are 16 B each; 4 BDs share
+	 * one 64-byte cache line.  Walk from the cache-line-aligned start
+	 * of first_bd_idx to just past the last written BD, one dcbf per
+	 * cache line.
+	 *
+	 * The flush must happen AFTER all BD fields (including flags_F) are
+	 * written, so HW never sees a partial descriptor.
+	 */
+	if (likely(start > 0)) {
+		int n = first_bd_idx & ~ENETC_BD_PER_CL_MASK;
+		int written = (i - n + bd_count) % bd_count;
+
+		if (written == 0)
+			written = bd_count;
+		written = (written + ENETC_BD_PER_CL_MASK) & ~ENETC_BD_PER_CL_MASK;
+
+		while (written > 0) {
+			dcbf((void *)ENETC_TXBD(*tx_ring, n));
+			n = (n + ENETC_BD_PER_CL) % bd_count;
+			written -= ENETC_BD_PER_CL;
+		}
+	}
+
+	enetc_clean_tx_ring(tx_ring);
+	tx_ring->next_to_use = i;
+	enetc_wr_reg(tx_ring->tcir, i);
+
+	return start;
+}
+
+/* --- Cacheable BD ring RX path with SW cache maintenance (dccivac) --- */
+
+static int
+enetc_clean_rx_ring_cacheable(struct enetc_bdr *rx_ring,
+		struct rte_mbuf **rx_pkts,
+		int work_limit)
+{
+	int rx_frm_cnt = 0;
+	int cleaned_cnt, i;
+	struct enetc_swbd *rx_swbd;
+	union enetc_rx_bd *rxbd, rxbd_temp;
+	struct rte_mbuf *first_seg = NULL, *cur_seg = NULL;
+	uint32_t bd_status;
+	uint8_t *data;
+	uint32_t j;
+	struct rte_mbuf *seg;
+	uint16_t data_len;
+
+	i = rx_ring->next_to_clean;
+	rxbd = ENETC_RXBD(*rx_ring, i);
+	cleaned_cnt = enetc_bd_unused(rx_ring);
+	rx_swbd = &rx_ring->q_swbd[i];
+
+	/*
+	 * On i.MX95 the BD ring is in cacheable hugepage memory but the
+	 * platform is non-cache-coherent.  HW writes RX BDs to DDR
+	 * without snooping the CPU cache, so stale cached copies of BD
+	 * status fields must be discarded before the CPU reads them.
+	 *
+	 * Ideal instruction: DC IVAC (invalidate only, no writeback).
+	 * ARM64 constraint: DC IVAC requires EL1 privilege; executing it
+	 * from EL0 (DPDK userspace) raises a fault.  The only EL0-safe
+	 * cache maintenance instruction that invalidates is DC CIVAC
+	 * (clean + invalidate, dccivac).
+	 *
+	 * Safety of using dccivac here:
+	 * enetc_refill_rx_ring() issues dcbf() on every BD group before
+	 * returning ownership to HW.  After dcbf the CPU cache lines are
+	 * marked clean (no dirty data).  When dccivac runs, the "clean"
+	 * phase finds nothing dirty to write back, so it behaves as a
+	 * pure invalidate - exactly what we need.
+	 *
+	 * Granularity: BD = 16 B, cache line = 64 B, so one dccivac
+	 * covers exactly 4 BDs.  Invalidate at each 4-BD boundary.
+	 */
+	dccivac((void *)ENETC_RXBD(*rx_ring,
+			(i & ~(int)ENETC_BD_PER_CL_MASK)));
+
+	while (likely(rx_frm_cnt < work_limit)) {
+#ifdef RTE_ARCH_32
+		rte_memcpy(&rxbd_temp, rxbd, 16);
+#else
+		__uint128_t *dst128 = (__uint128_t *)&rxbd_temp;
+		const __uint128_t *src128 = (const __uint128_t *)rxbd;
+		*dst128 = *src128;
+#endif
+		bd_status = rte_le_to_cpu_32(rxbd_temp.r.lstatus);
+
+		if (!(bd_status & ENETC_RXBD_LSTATUS_R))
+			break;
+		if (rxbd_temp.r.error)
+			rx_ring->ierrors++;
+
+		seg = rx_swbd->buffer_addr;
+		data_len = rte_le_to_cpu_16(rxbd_temp.r.buf_len);
+		seg->data_len = data_len;
+		if (!first_seg) {
+			first_seg = seg;
+			cur_seg = seg;
+			first_seg->pkt_len = data_len;
+			enetc_dev_rx_parse(first_seg,
+					   rxbd_temp.r.parse_summary);
+			first_seg->hash.rss = rxbd_temp.r.rss_hash;
+		} else {
+			first_seg->pkt_len += data_len;
+			first_seg->nb_segs++;
+			cur_seg->next = seg;
+			cur_seg = seg;
+		}
+
+		/*
+		 * Invalidate packet data cache lines so the CPU reads the
+		 * payload that HW DMA'd into memory, not stale cached bytes.
+		 */
+		data = rte_pktmbuf_mtod(seg, void *);
+		for (j = 0; j < data_len; j += RTE_CACHE_LINE_SIZE)
+			dccivac(data + j);
+		/* Cover the last byte of an unaligned buffer. */
+		dccivac(data + (data_len - 1));
+
+		if (bd_status & ENETC_RXBD_LSTATUS_F) {
+			seg->next = NULL;
+			first_seg->pkt_len -= rx_ring->crc_len;
+			rx_pkts[rx_frm_cnt] = first_seg;
+			rx_frm_cnt++;
+			first_seg = NULL;
+		}
+
+		cleaned_cnt++;
+		rx_swbd++;
+		i++;
+		if (unlikely(i == rx_ring->bd_count)) {
+			i = 0;
+			rx_swbd = &rx_ring->q_swbd[i];
+		}
+		rxbd = ENETC_RXBD(*rx_ring, i);
+
+		/*
+		 * Crossed a 4-BD (cache-line) boundary: invalidate the new
+		 * group so the next four status reads fetch fresh DDR data
+		 * written by HW.
+		 */
+		if ((i & ENETC_BD_PER_CL_MASK) == 0 &&
+		    likely(rx_frm_cnt < work_limit))
+			dccivac((void *)rxbd);
+	}
+
+	rx_ring->next_to_clean = i;
+	enetc_refill_rx_ring(rx_ring, ENETC_BD_ALIGN_DOWN(cleaned_cnt));
+
+	return rx_frm_cnt;
+}
+
+uint16_t
+enetc_recv_pkts_cacheable(void *rxq, struct rte_mbuf **rx_pkts,
+		uint16_t nb_pkts)
+{
+	struct enetc_bdr *rx_ring = (struct enetc_bdr *)rxq;
+
+	return enetc_clean_rx_ring_cacheable(rx_ring, rx_pkts, nb_pkts);
+}
-- 
2.25.1


^ permalink raw reply related

* [PATCH 09/10] net/enetc: set user configurable priority to TX rings
From: Gagandeep Singh @ 2026-06-19 18:44 UTC (permalink / raw)
  To: dev; +Cc: hemant.agrawal, Vanshika Shukla
In-Reply-To: <20260619184427.522518-1-g.singh@nxp.com>

From: Vanshika Shukla <vanshika.shukla@nxp.com>

Add devarg 'enetc4_txq_prior' to allow per-queue TX ring priority
configuration. The value is a '|'-separated list of TBMR priority
bits, one per TX queue (e.g. 'enetc4_txq_prior=1|2|3').

Store the parsed priorities in hw->txq_prior and apply them in
enetc4_tx_queue_setup() when enabling the ring.

Signed-off-by: Vanshika Shukla <vanshika.shukla@nxp.com>
---
 drivers/net/enetc/enetc.h         |  1 +
 drivers/net/enetc/enetc4_ethdev.c | 71 ++++++++++++++++++++++++++++++-
 2 files changed, 71 insertions(+), 1 deletion(-)

diff --git a/drivers/net/enetc/enetc.h b/drivers/net/enetc/enetc.h
index 2cdb3c7..99b1e91 100644
--- a/drivers/net/enetc/enetc.h
+++ b/drivers/net/enetc/enetc.h
@@ -111,6 +111,7 @@ struct enetc_eth_hw {
 	uint32_t max_tx_queues;
 	uint32_t vsi_timeout; /* VSI-PSI message wait timeout (iterations) */
 	uint32_t vsi_delay;   /* VSI-PSI message wait delay (us) */
+	uint32_t *txq_prior;  /* per-queue TX priority (TBMR priority bits) */
 };
 
 /*
diff --git a/drivers/net/enetc/enetc4_ethdev.c b/drivers/net/enetc/enetc4_ethdev.c
index 154fc09..d54051f 100644
--- a/drivers/net/enetc/enetc4_ethdev.c
+++ b/drivers/net/enetc/enetc4_ethdev.c
@@ -3,6 +3,7 @@
  */
 
 #include <stdbool.h>
+#include <rte_kvargs.h>
 #include <rte_random.h>
 #include <dpaax_iova_table.h>
 
@@ -10,6 +11,65 @@
 #include "enetc_logs.h"
 #include "enetc.h"
 
+#define ENETC4_TXQ_PRIORITIES	"enetc4_txq_prior"
+
+static int
+parse_txq_prior(const char *key __rte_unused, const char *value, void *opaque)
+{
+	struct rte_eth_dev *dev = (struct rte_eth_dev *)opaque;
+	struct enetc_eth_hw *hw =
+		ENETC_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	char *input_str = strdup(value);
+	char *str;
+	uint32_t i = 0;
+
+	hw->txq_prior = rte_zmalloc(NULL,
+				    hw->max_tx_queues * sizeof(uint32_t), 0);
+	if (!hw->txq_prior) {
+		free(input_str);
+		return -1;
+	}
+
+	str = strtok(input_str, "|");
+	while (str != NULL && i < hw->max_tx_queues) {
+		hw->txq_prior[i++] = (uint32_t)atoi(str);
+		str = strtok(NULL, "|");
+	}
+
+	free(input_str);
+	return 0;
+}
+
+static int
+enetc4_get_devargs(struct rte_eth_dev *dev, const char *key)
+{
+	struct rte_devargs *devargs = dev->device->devargs;
+	struct rte_kvargs *kvlist;
+
+	if (!devargs)
+		return 0;
+
+	kvlist = rte_kvargs_parse(devargs->args, NULL);
+	if (!kvlist)
+		return 0;
+
+	if (!rte_kvargs_count(kvlist, key)) {
+		rte_kvargs_free(kvlist);
+		return 0;
+	}
+
+	if (!strcmp(key, ENETC4_TXQ_PRIORITIES)) {
+		if (rte_kvargs_process(kvlist, key,
+				       parse_txq_prior, (void *)dev) < 0) {
+			rte_kvargs_free(kvlist);
+			return 0;
+		}
+	}
+
+	rte_kvargs_free(kvlist);
+	return 0;
+}
+
 /* Supported Rx offloads */
 static uint64_t dev_rx_offloads_sup =
 	RTE_ETH_RX_OFFLOAD_IPV4_CKSUM |
@@ -310,9 +370,14 @@ enetc4_tx_queue_setup(struct rte_eth_dev *dev,
 	data->tx_queues[queue_idx] = tx_ring;
 	tx_ring->tx_deferred_start = tx_conf->tx_deferred_start;
 	if (!tx_conf->tx_deferred_start) {
+		uint32_t tx_en = ENETC_TBMR_EN;
+
+		/* apply TX queue priority if configured */
+		if (priv->hw.txq_prior)
+			tx_en |= priv->hw.txq_prior[tx_ring->index];
 		/* enable ring */
 		enetc4_txbdr_wr(&priv->hw.hw, tx_ring->index,
-			       ENETC_TBMR, ENETC_TBMR_EN);
+			       ENETC_TBMR, tx_en);
 		dev->data->tx_queue_state[tx_ring->index] =
 			       RTE_ETH_QUEUE_STATE_STARTED;
 	} else {
@@ -1009,6 +1074,8 @@ enetc4_dev_init(struct rte_eth_dev *eth_dev)
 	hw->max_tx_queues = si_cap & ENETC_SICAPR0_BDR_MASK;
 	hw->max_rx_queues = (si_cap >> 16) & ENETC_SICAPR0_BDR_MASK;
 
+	enetc4_get_devargs(eth_dev, ENETC4_TXQ_PRIORITIES);
+
 	ENETC_PMD_DEBUG("Max RX queues = %d Max TX queues = %d",
 			hw->max_rx_queues, hw->max_tx_queues);
 	error = enetc4_mac_init(hw, eth_dev);
@@ -1065,4 +1132,6 @@ static struct rte_pci_driver rte_enetc4_pmd = {
 RTE_PMD_REGISTER_PCI(net_enetc4, rte_enetc4_pmd);
 RTE_PMD_REGISTER_PCI_TABLE(net_enetc4, pci_id_enetc4_map);
 RTE_PMD_REGISTER_KMOD_DEP(net_enetc4, "* vfio-pci");
+RTE_PMD_REGISTER_PARAM_STRING(net_enetc4,
+			      ENETC4_TXQ_PRIORITIES "=<string>");
 RTE_LOG_REGISTER_DEFAULT(enetc4_logtype_pmd, NOTICE);
-- 
2.25.1


^ permalink raw reply related

* [PATCH 08/10] net/enetc: add devargs to control VSI-PSI timeout and delay
From: Gagandeep Singh @ 2026-06-19 18:44 UTC (permalink / raw)
  To: dev; +Cc: hemant.agrawal
In-Reply-To: <20260619184427.522518-1-g.singh@nxp.com>

Add two new devargs for ENETC4 VF:
- enetc4_vsi_timeout: VSI-PSI message wait timeout (iteration count)
- enetc4_vsi_delay: VSI-PSI message wait delay in microseconds

Store the values in struct enetc_eth_hw and use them in
enetc4_msg_vsi_send() instead of the hardcoded defaults.
Fall back to ENETC4_DEF_VSI_WAIT_TIMEOUT_UPDATE /
ENETC4_DEF_VSI_WAIT_DELAY_UPDATE when not set.

Signed-off-by: Gagandeep Singh <g.singh@nxp.com>
---
 drivers/net/enetc/enetc.h     |  2 ++
 drivers/net/enetc/enetc4_vf.c | 54 ++++++++++++++++++++++++-----------
 2 files changed, 39 insertions(+), 17 deletions(-)

diff --git a/drivers/net/enetc/enetc.h b/drivers/net/enetc/enetc.h
index 439d2d6..2cdb3c7 100644
--- a/drivers/net/enetc/enetc.h
+++ b/drivers/net/enetc/enetc.h
@@ -109,6 +109,8 @@ struct enetc_eth_hw {
 	uint32_t num_rss;
 	uint32_t max_rx_queues;
 	uint32_t max_tx_queues;
+	uint32_t vsi_timeout; /* VSI-PSI message wait timeout (iterations) */
+	uint32_t vsi_delay;   /* VSI-PSI message wait delay (us) */
 };
 
 /*
diff --git a/drivers/net/enetc/enetc4_vf.c b/drivers/net/enetc/enetc4_vf.c
index 44c0dc0..79a08b3 100644
--- a/drivers/net/enetc/enetc4_vf.c
+++ b/drivers/net/enetc/enetc4_vf.c
@@ -10,6 +10,8 @@
 #include "enetc.h"
 
 #define ENETC4_VSI_DISABLE		"enetc4_vsi_disable"
+#define ENETC4_VSI_TIMEOUT		"enetc4_vsi_timeout"
+#define ENETC4_VSI_DELAY		"enetc4_vsi_delay"
 
 #define ENETC_CRC_TABLE_SIZE		256
 #define ENETC_POLY			0x1021
@@ -262,10 +264,13 @@ enetc4_process_psi_msg(struct rte_eth_dev *eth_dev, struct enetc_hw *enetc_hw)
 }
 
 static int
-enetc4_msg_vsi_send(struct enetc_hw *enetc_hw, struct enetc_msg_swbd *msg)
+enetc4_msg_vsi_send(struct enetc_eth_hw *hw, struct enetc_msg_swbd *msg)
 {
-	int timeout = ENETC4_DEF_VSI_WAIT_TIMEOUT_UPDATE;
-	int delay_us = ENETC4_DEF_VSI_WAIT_DELAY_UPDATE;
+	struct enetc_hw *enetc_hw = &hw->hw;
+	int timeout = hw->vsi_timeout ? (int)hw->vsi_timeout :
+					ENETC4_DEF_VSI_WAIT_TIMEOUT_UPDATE;
+	int delay_us = hw->vsi_delay ? (int)hw->vsi_delay :
+				       ENETC4_DEF_VSI_WAIT_DELAY_UPDATE;
 	uint8_t class_id = 0;
 	int err = 0;
 	int vsimsgsr;
@@ -382,7 +387,7 @@ enetc4_vf_set_mac_addr(struct rte_eth_dev *dev, struct rte_ether_addr *addr)
 					ENETC_CMD_ID_SET_PRIMARY_MAC, 0, 0, 0);
 
 	/* send the command and wait */
-	err = enetc4_msg_vsi_send(enetc_hw, msg);
+	err = enetc4_msg_vsi_send(hw, msg);
 	if (err) {
 		ENETC_PMD_ERR("VSI message send error");
 		goto end;
@@ -426,7 +431,6 @@ static int
 enetc4_vf_promisc_send_message(struct rte_eth_dev *dev, bool promisc_en)
 {
 	struct enetc_eth_hw *hw = ENETC_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-	struct enetc_hw *enetc_hw = &hw->hw;
 	struct enetc_msg_cmd_set_promisc *cmd;
 	struct enetc_msg_swbd *msg;
 	uint32_t msg_size;
@@ -466,7 +470,7 @@ enetc4_vf_promisc_send_message(struct rte_eth_dev *dev, bool promisc_en)
 				ENETC_CMD_ID_SET_MAC_PROMISCUOUS, 0, 0, 0);
 
 	/* send the command and wait */
-	err = enetc4_msg_vsi_send(enetc_hw, msg);
+	err = enetc4_msg_vsi_send(hw, msg);
 	if (err) {
 		ENETC_PMD_ERR("VSI message send error");
 		goto end;
@@ -483,7 +487,6 @@ static int
 enetc4_vf_allmulti_send_message(struct rte_eth_dev *dev, bool mc_promisc)
 {
 	struct enetc_eth_hw *hw = ENETC_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-	struct enetc_hw *enetc_hw = &hw->hw;
 	struct enetc_msg_cmd_set_promisc *cmd;
 	struct enetc_msg_swbd *msg;
 	uint32_t msg_size;
@@ -524,7 +527,7 @@ enetc4_vf_allmulti_send_message(struct rte_eth_dev *dev, bool mc_promisc)
 				ENETC_CMD_ID_SET_MAC_PROMISCUOUS, 0, 0, 0);
 
 	/* send the command and wait */
-	err = enetc4_msg_vsi_send(enetc_hw, msg);
+	err = enetc4_msg_vsi_send(hw, msg);
 	if (err) {
 		ENETC_PMD_ERR("VSI message send error");
 		goto end;
@@ -630,7 +633,7 @@ enetc4_vf_get_link_status(struct rte_eth_dev *dev, struct enetc_psi_reply_msg *r
 			ENETC_CMD_ID_GET_LINK_STATUS, 0, 0, 0);
 
 	/* send the command and wait */
-	err = enetc4_msg_vsi_send(enetc_hw, msg);
+	err = enetc4_msg_vsi_send(hw, msg);
 	if (err) {
 		ENETC_PMD_ERR("VSI message send error");
 		goto end;
@@ -676,7 +679,7 @@ enetc4_vf_get_link_speed(struct rte_eth_dev *dev, struct enetc_psi_reply_msg *re
 			ENETC_CMD_ID_GET_LINK_SPEED, 0, 0, 0);
 
 	/* send the command and wait */
-	err = enetc4_msg_vsi_send(enetc_hw, msg);
+	err = enetc4_msg_vsi_send(hw, msg);
 	if (err) {
 		ENETC_PMD_ERR("VSI message send error");
 		goto end;
@@ -819,7 +822,6 @@ static int
 enetc4_vf_vlan_promisc(struct rte_eth_dev *dev, bool promisc_en)
 {
 	struct enetc_eth_hw *hw = ENETC_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-	struct enetc_hw *enetc_hw = &hw->hw;
 	struct enetc_msg_cmd_set_vlan_promisc *cmd;
 	struct enetc_msg_swbd *msg;
 	uint32_t msg_size;
@@ -858,7 +860,7 @@ enetc4_vf_vlan_promisc(struct rte_eth_dev *dev, bool promisc_en)
 				ENETC_CMD_ID_SET_VLAN_PROMISCUOUS, 0, 0, 0);
 
 	/* send the command and wait */
-	err = enetc4_msg_vsi_send(enetc_hw, msg);
+	err = enetc4_msg_vsi_send(hw, msg);
 	if (err) {
 		ENETC_PMD_ERR("VSI message send error");
 		goto end;
@@ -921,7 +923,7 @@ enetc4_vf_mac_addr_add(struct rte_eth_dev *dev, struct rte_ether_addr *addr,
 			ENETC_MSG_ADD_EXACT_MAC_ENTRIES, 0, 0, 0);
 
 	/* send the command and wait */
-	err = enetc4_msg_vsi_send(enetc_hw, msg);
+	err = enetc4_msg_vsi_send(hw, msg);
 	if (err) {
 		ENETC_PMD_ERR("VSI message send error");
 		goto end;
@@ -1021,7 +1023,7 @@ static int enetc4_vf_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id,
 	}
 
 	/* send the command and wait */
-	err = enetc4_msg_vsi_send(enetc_hw, msg);
+	err = enetc4_msg_vsi_send(hw, msg);
 	if (err) {
 		ENETC_PMD_ERR("VSI message send error");
 		goto end;
@@ -1104,7 +1106,6 @@ static int
 enetc4_vf_link_register_notif(struct rte_eth_dev *dev, bool enable)
 {
 	struct enetc_eth_hw *hw = ENETC_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-	struct enetc_hw *enetc_hw = &hw->hw;
 	struct enetc_msg_swbd *msg;
 	struct rte_eth_link link;
 	uint32_t msg_size;
@@ -1138,7 +1139,7 @@ enetc4_vf_link_register_notif(struct rte_eth_dev *dev, bool enable)
 			cmd, 0, 0, 0);
 
 	/* send the command and wait */
-	err = enetc4_msg_vsi_send(enetc_hw, msg);
+	err = enetc4_msg_vsi_send(hw, msg);
 	if (err)
 		ENETC_PMD_ERR("VSI msg error for link status notification");
 
@@ -1322,12 +1323,29 @@ enetc4_vf_dev_init(struct rte_eth_dev *eth_dev)
 		kvlist = rte_kvargs_parse(eth_dev->device->devargs->args,
 					  NULL);
 		if (kvlist) {
+			const char *val;
+
 			if (rte_kvargs_count(kvlist, ENETC4_VSI_DISABLE) != 0) {
 				ENETC_PMD_NOTICE("VSI messaging disabled by devarg");
 				eth_dev->dev_ops = &enetc4_vf_ops_no_vsi_m;
 			} else {
 				eth_dev->dev_ops = &enetc4_vf_ops;
 			}
+
+			/* parse optional VSI-PSI timeout devarg */
+			val = rte_kvargs_get(kvlist, ENETC4_VSI_TIMEOUT);
+			if (val) {
+				hw->vsi_timeout = (uint32_t)strtoul(val, NULL, 0);
+				ENETC_PMD_NOTICE("VSI timeout set to %u", hw->vsi_timeout);
+			}
+
+			/* parse optional VSI-PSI delay devarg */
+			val = rte_kvargs_get(kvlist, ENETC4_VSI_DELAY);
+			if (val) {
+				hw->vsi_delay = (uint32_t)strtoul(val, NULL, 0);
+				ENETC_PMD_NOTICE("VSI delay set to %u us", hw->vsi_delay);
+			}
+
 			rte_kvargs_free(kvlist);
 		} else {
 			eth_dev->dev_ops = &enetc4_vf_ops;
@@ -1443,5 +1461,7 @@ RTE_PMD_REGISTER_PCI(net_enetc4_vf, rte_enetc4_vf_pmd);
 RTE_PMD_REGISTER_PCI_TABLE(net_enetc4_vf, pci_vf_id_enetc4_map);
 RTE_PMD_REGISTER_KMOD_DEP(net_enetc4_vf, "* igb_uio | uio_pci_generic");
 RTE_PMD_REGISTER_PARAM_STRING(net_enetc4_vf,
-			      ENETC4_VSI_DISABLE "=<any>");
+			      ENETC4_VSI_DISABLE "=<any> "
+			      ENETC4_VSI_TIMEOUT "=<uint> "
+			      ENETC4_VSI_DELAY "=<uint>");
 RTE_LOG_REGISTER_DEFAULT(enetc4_vf_logtype_pmd, NOTICE);
-- 
2.25.1


^ permalink raw reply related

* [PATCH 06/10] net/enetc: support scatter-gather
From: Gagandeep Singh @ 2026-06-19 18:44 UTC (permalink / raw)
  To: dev; +Cc: hemant.agrawal, Vanshika Shukla
In-Reply-To: <20260619184427.522518-1-g.singh@nxp.com>

From: Vanshika Shukla <vanshika.shukla@nxp.com>

Add scatter-gather support for ENETC4 PMD:
- Add ENETC_RXBD_LSTATUS_R/F bits for RX BD status
- Add ENETC4_MAX_SEGS (63) for max segments per TX packet
- Update enetc4_vf_dev_infos_get to fill nb_seg_max, offloads,
  max queues and packet length
- Extend enetc_xmit_pkts_nc to handle multi-segment mbufs
- Extend enetc_clean_rx_ring_nc to chain scatter-gather segments
  using LSTATUS_R/F bits

Signed-off-by: Vanshika Shukla <vanshika.shukla@nxp.com>
---
 drivers/net/enetc/base/enetc_hw.h |   2 +
 drivers/net/enetc/enetc.h         |   4 +-
 drivers/net/enetc/enetc4_vf.c     |  46 ++++++++---
 drivers/net/enetc/enetc_rxtx.c    | 124 +++++++++++++++++++-----------
 4 files changed, 119 insertions(+), 57 deletions(-)

diff --git a/drivers/net/enetc/base/enetc_hw.h b/drivers/net/enetc/base/enetc_hw.h
index f79c950..6e96562 100644
--- a/drivers/net/enetc/base/enetc_hw.h
+++ b/drivers/net/enetc/base/enetc_hw.h
@@ -230,6 +230,8 @@ enum enetc_bdr_type {TX, RX};
 			(0x0005 | ENETC_PKT_TYPE_IPV4)
 #define ENETC_PKT_TYPE_IPV6_ESP \
 			(0x0005 | ENETC_PKT_TYPE_IPV6)
+#define ENETC_RXBD_LSTATUS_R	BIT(30)
+#define ENETC_RXBD_LSTATUS_F	BIT(31)
 
 /* PCI device info */
 struct enetc_hw {
diff --git a/drivers/net/enetc/enetc.h b/drivers/net/enetc/enetc.h
index 4d99b5b..439d2d6 100644
--- a/drivers/net/enetc/enetc.h
+++ b/drivers/net/enetc/enetc.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright 2018-2019,2024 NXP
+ * Copyright 2018-2019,2024-2026 NXP
  */
 
 #ifndef _ENETC_H_
@@ -28,6 +28,8 @@
 #define MIN_BD_COUNT   32
 /* BD ALIGN */
 #define BD_ALIGN       8
+/* Max segments per ENETC4 TX packet (scatter-gather) */
+#define ENETC4_MAX_SEGS	63
 
 /* minimum frame size supported */
 #define ENETC_MAC_MINFRM_SIZE	68
diff --git a/drivers/net/enetc/enetc4_vf.c b/drivers/net/enetc/enetc4_vf.c
index bec7128..9dc4e1d 100644
--- a/drivers/net/enetc/enetc4_vf.c
+++ b/drivers/net/enetc/enetc4_vf.c
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright 2024 NXP
+ * Copyright 2024-2026 NXP
  */
 
 #include <stdbool.h>
@@ -18,8 +18,19 @@ uint16_t enetc_crc_table[ENETC_CRC_TABLE_SIZE];
 bool enetc_crc_gen;
 
 /* Supported Rx offloads */
-static uint64_t dev_vf_rx_offloads_sup =
-	RTE_ETH_RX_OFFLOAD_VLAN_FILTER;
+static uint64_t dev_rx_offloads_sup =
+	RTE_ETH_RX_OFFLOAD_IPV4_CKSUM |
+	RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
+	RTE_ETH_RX_OFFLOAD_TCP_CKSUM |
+	RTE_ETH_RX_OFFLOAD_VLAN_FILTER |
+	RTE_ETH_RX_OFFLOAD_SCATTER;
+
+/* Supported Tx offloads */
+static uint64_t dev_tx_offloads_sup =
+	RTE_ETH_TX_OFFLOAD_IPV4_CKSUM |
+	RTE_ETH_TX_OFFLOAD_UDP_CKSUM |
+	RTE_ETH_TX_OFFLOAD_TCP_CKSUM |
+	RTE_ETH_TX_OFFLOAD_MULTI_SEGS;
 
 static void
 enetc_gen_crc_table(void)
@@ -61,21 +72,38 @@ static int
 enetc4_vf_dev_infos_get(struct rte_eth_dev *dev,
 			struct rte_eth_dev_info *dev_info)
 {
-	int ret = 0;
+	struct enetc_eth_hw *hw =
+		ENETC_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
 	PMD_INIT_FUNC_TRACE();
 
-	ret = enetc4_dev_infos_get(dev, dev_info);
-	if (ret)
-		return ret;
-
+	dev_info->rx_desc_lim = (struct rte_eth_desc_lim) {
+		.nb_max = MAX_BD_COUNT,
+		.nb_min = MIN_BD_COUNT,
+		.nb_align = BD_ALIGN,
+		.nb_seg_max = ENETC4_MAX_SEGS,
+		.nb_mtu_seg_max = ENETC4_MAX_SEGS,
+	};
+	dev_info->tx_desc_lim = (struct rte_eth_desc_lim) {
+		.nb_max = MAX_BD_COUNT,
+		.nb_min = MIN_BD_COUNT,
+		.nb_align = BD_ALIGN,
+		.nb_seg_max = ENETC4_MAX_SEGS,
+		.nb_mtu_seg_max = ENETC4_MAX_SEGS,
+	};
+	dev_info->max_rx_queues = hw->max_rx_queues;
+	dev_info->max_tx_queues = hw->max_tx_queues;
+	dev_info->max_rx_pktlen = ENETC4_MAC_MAXFRM_SIZE;
 	dev_info->max_mtu = dev_info->max_rx_pktlen - (RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN);
 	dev_info->max_mac_addrs = ENETC4_MAC_ENTRIES;
-	dev_info->rx_offload_capa |= dev_vf_rx_offloads_sup;
+	dev_info->rx_offload_capa = dev_rx_offloads_sup;
+	dev_info->tx_offload_capa = dev_tx_offloads_sup;
+	dev_info->flow_type_rss_offloads = ENETC_RSS_OFFLOAD_ALL;
 
 	return 0;
 }
 
+
 int
 enetc4_vf_dev_stop(struct rte_eth_dev *dev __rte_unused)
 {
diff --git a/drivers/net/enetc/enetc_rxtx.c b/drivers/net/enetc/enetc_rxtx.c
index c87349f..a37c835 100644
--- a/drivers/net/enetc/enetc_rxtx.c
+++ b/drivers/net/enetc/enetc_rxtx.c
@@ -149,54 +149,64 @@ enetc_xmit_pkts_nc(void *tx_queue,
 		struct rte_mbuf **tx_pkts,
 		uint16_t nb_pkts)
 {
-	struct enetc_swbd *tx_swbd;
-	int i, start, bds_to_use;
-	struct enetc_tx_bd *txbd;
 	struct enetc_bdr *tx_ring = (struct enetc_bdr *)tx_queue;
-	unsigned int buflen, j;
+	int i, start, bds_to_use, bd_count;
+	struct enetc_tx_bd *txbd;
+	struct rte_mbuf *seg;
+	uint16_t seg_len, segs_per_pkt;
+	bool is_first_seg;
+	unsigned int j;
 	uint8_t *data;
 
 	i = tx_ring->next_to_use;
-
 	bds_to_use = enetc_bd_unused(tx_ring);
-	if (bds_to_use < nb_pkts)
-		nb_pkts = bds_to_use;
-
+	bd_count = tx_ring->bd_count;
 	start = 0;
-	while (nb_pkts--) {
-		tx_ring->q_swbd[i].buffer_addr = tx_pkts[start];
 
-		buflen = rte_pktmbuf_pkt_len(tx_ring->q_swbd[i].buffer_addr);
-		data = rte_pktmbuf_mtod(tx_ring->q_swbd[i].buffer_addr, void *);
-		for (j = 0; j <= buflen; j += RTE_CACHE_LINE_SIZE)
-			dcbf(data + j);
+	while (start < nb_pkts) {
+		seg = tx_pkts[start];
+		segs_per_pkt = seg->nb_segs;
 
-		txbd = ENETC_TXBD(*tx_ring, i);
-		txbd->flags = 0;
-		if (tx_ring->q_swbd[i].buffer_addr->ol_flags & ENETC4_TX_CKSUM_OFFLOAD_MASK)
-			enetc4_tx_offload_checksum(tx_ring->q_swbd[i].buffer_addr, txbd);
+		if (bds_to_use < segs_per_pkt)
+			break;
 
-		tx_swbd = &tx_ring->q_swbd[i];
-		txbd->frm_len = buflen;
-		txbd->buf_len = txbd->frm_len;
-		txbd->addr = (uint64_t)(uintptr_t)
-		rte_cpu_to_le_64((size_t)tx_swbd->buffer_addr->buf_iova +
-				 tx_swbd->buffer_addr->data_off);
+		is_first_seg = true;
+		while (seg) {
+			tx_ring->q_swbd[i].buffer_addr = NULL;
+			seg_len = rte_pktmbuf_data_len(seg);
+			data = rte_pktmbuf_mtod(seg, void *);
+
+			/* Flush payload to PoC so HW DMA reads the correct data. */
+			for (j = 0; j < seg_len; j += RTE_CACHE_LINE_SIZE)
+				dcbf(data + j);
+			/* Cover the last byte of an unaligned buffer. */
+			dcbf(data + (seg_len - 1));
+
+			txbd = ENETC_TXBD(*tx_ring, i);
+			txbd->flags = 0;
+			if (is_first_seg) {
+				tx_ring->q_swbd[i].buffer_addr = tx_pkts[start];
+				txbd->frm_len = rte_pktmbuf_pkt_len(seg);
+				if (seg->ol_flags & ENETC4_TX_CKSUM_OFFLOAD_MASK)
+					enetc4_tx_offload_checksum(seg, txbd);
+				is_first_seg = false;
+			}
+
+			txbd->buf_len = rte_cpu_to_le_16(seg_len);
+			txbd->addr = rte_cpu_to_le_64(rte_mbuf_data_iova(seg));
+			seg = seg->next;
+			i++;
+			bds_to_use--;
+			if (unlikely(i == bd_count))
+				i = 0;
+		}
+
+		/* Set the frame-last flag on the final BD of this packet. */
 		txbd->flags |= rte_cpu_to_le_16(ENETC4_TXBD_FLAGS_F);
-		i++;
 		start++;
-		if (unlikely(i == tx_ring->bd_count))
-			i = 0;
 	}
 
-	/* we're only cleaning up the Tx ring here, on the assumption that
-	 * software is slower than hardware and hardware completed sending
-	 * older frames out by now.
-	 * We're also cleaning up the ring before kicking off Tx for the new
-	 * batch to minimize chances of contention on the Tx ring
-	 */
 	enetc_clean_tx_ring(tx_ring);
-
 	tx_ring->next_to_use = i;
 	enetc_wr_reg(tx_ring->tcir, i);
 	return start;
@@ -501,38 +511,59 @@ enetc_clean_rx_ring_nc(struct enetc_bdr *rx_ring,
 	int cleaned_cnt, i;
 	struct enetc_swbd *rx_swbd;
 	union enetc_rx_bd *rxbd, rxbd_temp;
+	struct rte_mbuf *first_seg = NULL, *cur_seg = NULL;
 	uint32_t bd_status;
 	uint8_t *data;
 	uint32_t j;
+	struct rte_mbuf *seg;
+	uint16_t data_len;
 
 	/* next descriptor to process */
 	i = rx_ring->next_to_clean;
-	/* next descriptor to process */
 	rxbd = ENETC_RXBD(*rx_ring, i);
-
 	cleaned_cnt = enetc_bd_unused(rx_ring);
 	rx_swbd = &rx_ring->q_swbd[i];
 
 	while (likely(rx_frm_cnt < work_limit)) {
 		rxbd_temp = *rxbd;
 		bd_status = rte_le_to_cpu_32(rxbd_temp.r.lstatus);
-		if (!bd_status)
+		/* LSTATUS_R indicates this BD has been written by HW */
+		if (!(bd_status & ENETC_RXBD_LSTATUS_R))
 			break;
 		if (rxbd_temp.r.error)
 			rx_ring->ierrors++;
 
-		rx_swbd->buffer_addr->pkt_len = rxbd_temp.r.buf_len -
-						rx_ring->crc_len;
-		rx_swbd->buffer_addr->data_len = rx_swbd->buffer_addr->pkt_len;
-		rx_swbd->buffer_addr->hash.rss = rxbd_temp.r.rss_hash;
-		enetc_dev_rx_parse(rx_swbd->buffer_addr,
-				   rxbd_temp.r.parse_summary);
+		seg = rx_swbd->buffer_addr;
+		data_len = rte_le_to_cpu_16(rxbd_temp.r.buf_len);
+		seg->data_len = data_len;
+
+		if (!first_seg) {
+			first_seg = seg;
+			cur_seg = seg;
+			first_seg->pkt_len = data_len;
+			enetc_dev_rx_parse(first_seg, rxbd_temp.r.parse_summary);
+			first_seg->hash.rss = rxbd_temp.r.rss_hash;
+		} else {
+			first_seg->pkt_len += data_len;
+			first_seg->nb_segs++;
+			cur_seg->next = seg;
+			cur_seg = seg;
+		}
 
-		data = rte_pktmbuf_mtod(rx_swbd->buffer_addr, void *);
-		for (j = 0; j <= rx_swbd->buffer_addr->pkt_len; j += RTE_CACHE_LINE_SIZE)
+		/* Invalidate packet data cache lines so CPU reads HW-written data. */
+		data = rte_pktmbuf_mtod(seg, void *);
+		for (j = 0; j < data_len; j += RTE_CACHE_LINE_SIZE)
 			dccivac(data + j);
+		dccivac(data + (data_len - 1));
+
+		if (bd_status & ENETC_RXBD_LSTATUS_F) {
+			seg->next = NULL;
+			first_seg->pkt_len -= rx_ring->crc_len;
+			rx_pkts[rx_frm_cnt] = first_seg;
+			rx_frm_cnt++;
+			first_seg = NULL;
+		}
 
-		rx_pkts[rx_frm_cnt] = rx_swbd->buffer_addr;
 		cleaned_cnt++;
 		rx_swbd++;
 		i++;
@@ -541,7 +572,6 @@ enetc_clean_rx_ring_nc(struct enetc_bdr *rx_ring,
 			rx_swbd = &rx_ring->q_swbd[i];
 		}
 		rxbd = ENETC_RXBD(*rx_ring, i);
-		rx_frm_cnt++;
 	}
 
 	rx_ring->next_to_clean = i;
-- 
2.25.1


^ permalink raw reply related

* [PATCH 07/10] net/enetc: add option to disable VSI messaging
From: Gagandeep Singh @ 2026-06-19 18:44 UTC (permalink / raw)
  To: dev; +Cc: hemant.agrawal
In-Reply-To: <20260619184427.522518-1-g.singh@nxp.com>

Add devarg 'enetc4_vsi_disable' to allow disabling features
dependent on VSI-PSI messaging. This is useful for testing DPDK
with a PF driver that does not support VSI-PSI messages.

When the devarg is present, a reduced ops table
(enetc4_vf_ops_no_vsi_m) is used that replaces link_update with
a no-op stub and omits MAC/VLAN filter ops that require VSI msgs.

Signed-off-by: Gagandeep Singh <g.singh@nxp.com>
---
 drivers/net/enetc/enetc4_vf.c | 61 +++++++++++++++++++++++++++++++++--
 1 file changed, 58 insertions(+), 3 deletions(-)

diff --git a/drivers/net/enetc/enetc4_vf.c b/drivers/net/enetc/enetc4_vf.c
index 9dc4e1d..44c0dc0 100644
--- a/drivers/net/enetc/enetc4_vf.c
+++ b/drivers/net/enetc/enetc4_vf.c
@@ -3,11 +3,14 @@
  */
 
 #include <stdbool.h>
+#include <rte_kvargs.h>
 #include <rte_random.h>
 #include <dpaax_iova_table.h>
 #include "enetc_logs.h"
 #include "enetc.h"
 
+#define ENETC4_VSI_DISABLE		"enetc4_vsi_disable"
+
 #define ENETC_CRC_TABLE_SIZE		256
 #define ENETC_POLY			0x1021
 #define ENETC_CRC_INIT			0xffff
@@ -687,6 +690,13 @@ enetc4_vf_get_link_speed(struct rte_eth_dev *dev, struct enetc_psi_reply_msg *re
 	return err;
 }
 
+static int
+enetc4_vf_link_update_dummy(struct rte_eth_dev *dev __rte_unused,
+			    int wait_to_complete __rte_unused)
+{
+	return 0;
+}
+
 static int
 enetc4_vf_link_update(struct rte_eth_dev *dev, int wait_to_complete __rte_unused)
 {
@@ -1148,6 +1158,27 @@ static const struct rte_pci_id pci_vf_id_enetc4_map[] = {
 };
 
 /* Features supported by this driver */
+/* ops table used when VSI messaging is disabled */
+static const struct eth_dev_ops enetc4_vf_ops_no_vsi_m = {
+	.dev_configure        = enetc4_dev_configure,
+	.dev_start            = enetc4_vf_dev_start,
+	.dev_stop             = enetc4_vf_dev_stop,
+	.dev_close            = enetc4_dev_close,
+	.stats_get            = enetc4_vf_stats_get,
+	.dev_infos_get        = enetc4_vf_dev_infos_get,
+	.mtu_set              = enetc4_vf_mtu_set,
+	.link_update	      = enetc4_vf_link_update_dummy,
+	.rx_queue_setup       = enetc4_rx_queue_setup,
+	.rx_queue_start       = enetc4_rx_queue_start,
+	.rx_queue_stop        = enetc4_rx_queue_stop,
+	.rx_queue_release     = enetc4_rx_queue_release,
+	.tx_queue_setup       = enetc4_tx_queue_setup,
+	.tx_queue_start       = enetc4_tx_queue_start,
+	.tx_queue_stop        = enetc4_tx_queue_stop,
+	.tx_queue_release     = enetc4_tx_queue_release,
+	.dev_supported_ptypes_get = enetc4_supported_ptypes_get,
+};
+
 static const struct eth_dev_ops enetc4_vf_ops = {
 	.dev_configure        = enetc4_dev_configure,
 	.dev_start            = enetc4_vf_dev_start,
@@ -1283,7 +1314,28 @@ enetc4_vf_dev_init(struct rte_eth_dev *eth_dev)
 	struct enetc_hw *enetc_hw = &hw->hw;
 
 	PMD_INIT_FUNC_TRACE();
-	eth_dev->dev_ops = &enetc4_vf_ops;
+
+	/* check if VSI messaging should be disabled via devarg */
+	if (eth_dev->device->devargs) {
+		struct rte_kvargs *kvlist;
+
+		kvlist = rte_kvargs_parse(eth_dev->device->devargs->args,
+					  NULL);
+		if (kvlist) {
+			if (rte_kvargs_count(kvlist, ENETC4_VSI_DISABLE) != 0) {
+				ENETC_PMD_NOTICE("VSI messaging disabled by devarg");
+				eth_dev->dev_ops = &enetc4_vf_ops_no_vsi_m;
+			} else {
+				eth_dev->dev_ops = &enetc4_vf_ops;
+			}
+			rte_kvargs_free(kvlist);
+		} else {
+			eth_dev->dev_ops = &enetc4_vf_ops;
+		}
+	} else {
+		eth_dev->dev_ops = &enetc4_vf_ops;
+	}
+
 	enetc4_dev_hw_init(eth_dev);
 
 	si_cap = enetc_rd(enetc_hw, ENETC_SICAPR0);
@@ -1304,8 +1356,9 @@ enetc4_vf_dev_init(struct rte_eth_dev *eth_dev)
 	ENETC_PMD_DEBUG("port_id %d vendorID=0x%x deviceID=0x%x",
 			eth_dev->data->port_id, pci_dev->id.vendor_id,
 			pci_dev->id.device_id);
-	/* update link */
-	enetc4_vf_link_update(eth_dev, 0);
+	/* update link if VSI messaging is enabled */
+	if (eth_dev->dev_ops == &enetc4_vf_ops)
+		enetc4_vf_link_update(eth_dev, 0);
 
 	return 0;
 }
@@ -1389,4 +1442,6 @@ static struct rte_pci_driver rte_enetc4_vf_pmd = {
 RTE_PMD_REGISTER_PCI(net_enetc4_vf, rte_enetc4_vf_pmd);
 RTE_PMD_REGISTER_PCI_TABLE(net_enetc4_vf, pci_vf_id_enetc4_map);
 RTE_PMD_REGISTER_KMOD_DEP(net_enetc4_vf, "* igb_uio | uio_pci_generic");
+RTE_PMD_REGISTER_PARAM_STRING(net_enetc4_vf,
+			      ENETC4_VSI_DISABLE "=<any>");
 RTE_LOG_REGISTER_DEFAULT(enetc4_vf_logtype_pmd, NOTICE);
-- 
2.25.1


^ permalink raw reply related

* [PATCH 05/10] net/enetc: update random MAC generation code
From: Gagandeep Singh @ 2026-06-19 18:44 UTC (permalink / raw)
  To: dev; +Cc: hemant.agrawal
In-Reply-To: <20260619184427.522518-1-g.singh@nxp.com>

Use rte_eth_random_addr() instead of manual rte_rand() based MAC
generation. Also handle VF path by writing to ENETC_SIPMAR0/1 instead
of ENETC_PSIPMAR0/1 when running as a VF.

Signed-off-by: Gagandeep Singh <g.singh@nxp.com>
---
 drivers/net/enetc/enetc_ethdev.c | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/drivers/net/enetc/enetc_ethdev.c b/drivers/net/enetc/enetc_ethdev.c
index 407179f..427da87 100644
--- a/drivers/net/enetc/enetc_ethdev.c
+++ b/drivers/net/enetc/enetc_ethdev.c
@@ -196,20 +196,18 @@ enetc_hardware_init(struct enetc_eth_hw *hw)
 	}
 
 	if ((high_mac | low_mac) == 0) {
-		char *first_byte;
-
 		ENETC_PMD_NOTICE("MAC is not available for this SI, "
 				"set random MAC");
-		mac = (uint32_t *)hw->mac.addr;
-		*mac = (uint32_t)rte_rand();
-		first_byte = (char *)mac;
-		*first_byte &= 0xfe;	/* clear multicast bit */
-		*first_byte |= 0x02;	/* set local assignment bit (IEEE802) */
-
-		enetc_port_wr(enetc_hw, ENETC_PSIPMAR0(0), *mac);
-		mac++;
-		*mac = (uint16_t)rte_rand();
-		enetc_port_wr(enetc_hw, ENETC_PSIPMAR1(0), *mac);
+		rte_eth_random_addr(hw->mac.addr);
+		high_mac = *(uint32_t *)hw->mac.addr;
+		low_mac = *(uint16_t *)(hw->mac.addr + 4);
+		if (hw->device_id == ENETC_DEV_ID_VF) {
+			enetc_wr(enetc_hw, ENETC_SIPMAR0, high_mac);
+			enetc_wr(enetc_hw, ENETC_SIPMAR1, low_mac);
+		} else {
+			enetc_port_wr(enetc_hw, ENETC_PSIPMAR0(0), high_mac);
+			enetc_port_wr(enetc_hw, ENETC_PSIPMAR1(0), low_mac);
+		}
 		enetc_print_ethaddr("New address: ",
 			      (const struct rte_ether_addr *)hw->mac.addr);
 	}
-- 
2.25.1


^ permalink raw reply related

* [PATCH 04/10] net/enetc: support ESP packet type in packet parsing
From: Gagandeep Singh @ 2026-06-19 18:44 UTC (permalink / raw)
  To: dev; +Cc: hemant.agrawal
In-Reply-To: <20260619184427.522518-1-g.singh@nxp.com>

Add ESP (Encapsulating Security Payload) packet type definitions and
handling to the RX packet parsing path. Also update the supported
ptypes array to advertise ESP tunnel type support.

Signed-off-by: Gagandeep Singh <g.singh@nxp.com>
---
 drivers/net/enetc/base/enetc_hw.h |  4 ++++
 drivers/net/enetc/enetc_ethdev.c  |  4 +++-
 drivers/net/enetc/enetc_rxtx.c    | 10 ++++++++++
 3 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/drivers/net/enetc/base/enetc_hw.h b/drivers/net/enetc/base/enetc_hw.h
index 19efadd..f79c950 100644
--- a/drivers/net/enetc/base/enetc_hw.h
+++ b/drivers/net/enetc/base/enetc_hw.h
@@ -226,6 +226,10 @@ enum enetc_bdr_type {TX, RX};
 			(0x0003 | ENETC_PKT_TYPE_IPV4)
 #define ENETC_PKT_TYPE_IPV6_ICMP \
 			(0x0003 | ENETC_PKT_TYPE_IPV6)
+#define ENETC_PKT_TYPE_IPV4_ESP \
+			(0x0005 | ENETC_PKT_TYPE_IPV4)
+#define ENETC_PKT_TYPE_IPV6_ESP \
+			(0x0005 | ENETC_PKT_TYPE_IPV6)
 
 /* PCI device info */
 struct enetc_hw {
diff --git a/drivers/net/enetc/enetc_ethdev.c b/drivers/net/enetc/enetc_ethdev.c
index f41f3c1..407179f 100644
--- a/drivers/net/enetc/enetc_ethdev.c
+++ b/drivers/net/enetc/enetc_ethdev.c
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright 2018-2024 NXP
+ * Copyright 2018-2026 NXP
  */
 
 #include <stdbool.h>
@@ -95,6 +95,8 @@ enetc_supported_ptypes_get(struct rte_eth_dev *dev __rte_unused,
 		RTE_PTYPE_L4_UDP,
 		RTE_PTYPE_L4_SCTP,
 		RTE_PTYPE_L4_ICMP,
+		RTE_PTYPE_TUNNEL_ESP,
+		RTE_PTYPE_UNKNOWN,
 	};
 
 	*no_of_elements = RTE_DIM(ptypes);
diff --git a/drivers/net/enetc/enetc_rxtx.c b/drivers/net/enetc/enetc_rxtx.c
index b44e6f3..c87349f 100644
--- a/drivers/net/enetc/enetc_rxtx.c
+++ b/drivers/net/enetc/enetc_rxtx.c
@@ -370,6 +370,16 @@ enetc_dev_rx_parse(struct rte_mbuf *m, uint16_t parse_results)
 				 RTE_PTYPE_L3_IPV6 |
 				 RTE_PTYPE_L4_UDP;
 		return;
+	case ENETC_PKT_TYPE_IPV4_ESP:
+		m->packet_type = RTE_PTYPE_L2_ETHER |
+				 RTE_PTYPE_L3_IPV4 |
+				 RTE_PTYPE_TUNNEL_ESP;
+		return;
+	case ENETC_PKT_TYPE_IPV6_ESP:
+		m->packet_type = RTE_PTYPE_L2_ETHER |
+				 RTE_PTYPE_L3_IPV6 |
+				 RTE_PTYPE_TUNNEL_ESP;
+		return;
 	case ENETC_PKT_TYPE_IPV4_SCTP:
 		m->packet_type = RTE_PTYPE_L2_ETHER |
 				 RTE_PTYPE_L3_IPV4 |
-- 
2.25.1


^ permalink raw reply related

* [PATCH 03/10] net/enetc: fix queue initialization
From: Gagandeep Singh @ 2026-06-19 18:44 UTC (permalink / raw)
  To: dev; +Cc: hemant.agrawal, stable
In-Reply-To: <20260619184427.522518-1-g.singh@nxp.com>

Hardware can misbehave if the user tries to reset the consumer and
producer indexes without resetting the ring.

This patch adds the ring reset step before resetting the indexes.

Fixes: 6c9c5aadc0e0 ("net/enetc: support ENETC4 queue API")
Cc: stable@dpdk.org

Signed-off-by: Gagandeep Singh <g.singh@nxp.com>
---
 drivers/net/enetc/enetc4_ethdev.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/net/enetc/enetc4_ethdev.c b/drivers/net/enetc/enetc4_ethdev.c
index 78eba70..154fc09 100644
--- a/drivers/net/enetc/enetc4_ethdev.c
+++ b/drivers/net/enetc/enetc4_ethdev.c
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright 2024 NXP
+ * Copyright 2024-2026 NXP
  */
 
 #include <stdbool.h>
@@ -279,6 +279,7 @@ enetc4_tx_queue_setup(struct rte_eth_dev *dev,
 		     const struct rte_eth_txconf *tx_conf)
 {
 	int err;
+	uint32_t tx_data;
 	struct enetc_bdr *tx_ring;
 	struct rte_eth_dev_data *data = dev->data;
 	struct enetc_eth_adapter *priv =
@@ -301,6 +302,10 @@ enetc4_tx_queue_setup(struct rte_eth_dev *dev,
 		goto fail;
 
 	tx_ring->ndev = dev;
+	/* reset queue */
+	tx_data = enetc4_txbdr_rd(&priv->hw.hw, tx_ring->index, ENETC_TBMR);
+	tx_data &= ~ENETC_TBMR_EN;
+	enetc4_txbdr_wr(&priv->hw.hw, tx_ring->index, ENETC_TBMR, tx_data);
 	enetc4_setup_txbdr(&priv->hw.hw, tx_ring);
 	data->tx_queues[queue_idx] = tx_ring;
 	tx_ring->tx_deferred_start = tx_conf->tx_deferred_start;
@@ -427,6 +432,7 @@ enetc4_rx_queue_setup(struct rte_eth_dev *dev,
 		     struct rte_mempool *mb_pool)
 {
 	int err = 0;
+	uint32_t rx_enable;
 	struct enetc_bdr *rx_ring;
 	struct rte_eth_dev_data *data =  dev->data;
 	struct enetc_eth_adapter *adapter =
@@ -450,6 +456,10 @@ enetc4_rx_queue_setup(struct rte_eth_dev *dev,
 		goto fail;
 
 	rx_ring->ndev = dev;
+	/* reset queue */
+	rx_enable = enetc4_rxbdr_rd(&adapter->hw.hw, rx_ring->index, ENETC_RBMR);
+	rx_enable &= ~ENETC_RBMR_EN;
+	enetc4_rxbdr_wr(&adapter->hw.hw, rx_ring->index, ENETC_RBMR, rx_enable);
 	enetc4_setup_rxbdr(&adapter->hw.hw, rx_ring, mb_pool);
 	data->rx_queues[rx_queue_id] = rx_ring;
 	rx_ring->rx_deferred_start = rx_conf->rx_deferred_start;
-- 
2.25.1


^ permalink raw reply related

* [PATCH 02/10] net/enetc: fix TX BDs flag overwrite issue
From: Gagandeep Singh @ 2026-06-19 18:44 UTC (permalink / raw)
  To: dev; +Cc: hemant.agrawal, stable
In-Reply-To: <20260619184427.522518-1-g.singh@nxp.com>

Zero the flags field before setting offload bits and set the
frame-last flag (F) after all descriptor fields are written.
This prevents stale flag bits from a previous packet corrupting
the current descriptor.

Fixes: 72f491f1e53c ("net/enetc: optimize ENETC4 data path")
Cc: stable@dpdk.org

Signed-off-by: Gagandeep Singh <g.singh@nxp.com>
---
 drivers/net/enetc/enetc_rxtx.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/enetc/enetc_rxtx.c b/drivers/net/enetc/enetc_rxtx.c
index a2b8153..b44e6f3 100644
--- a/drivers/net/enetc/enetc_rxtx.c
+++ b/drivers/net/enetc/enetc_rxtx.c
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright 2018-2024 NXP
+ * Copyright 2018-2026 NXP
  */
 
 #include <stdbool.h>
@@ -172,7 +172,7 @@ enetc_xmit_pkts_nc(void *tx_queue,
 			dcbf(data + j);
 
 		txbd = ENETC_TXBD(*tx_ring, i);
-		txbd->flags = rte_cpu_to_le_16(ENETC4_TXBD_FLAGS_F);
+		txbd->flags = 0;
 		if (tx_ring->q_swbd[i].buffer_addr->ol_flags & ENETC4_TX_CKSUM_OFFLOAD_MASK)
 			enetc4_tx_offload_checksum(tx_ring->q_swbd[i].buffer_addr, txbd);
 
@@ -182,6 +182,7 @@ enetc_xmit_pkts_nc(void *tx_queue,
 		txbd->addr = (uint64_t)(uintptr_t)
 		rte_cpu_to_le_64((size_t)tx_swbd->buffer_addr->buf_iova +
 				 tx_swbd->buffer_addr->data_off);
+		txbd->flags |= rte_cpu_to_le_16(ENETC4_TXBD_FLAGS_F);
 		i++;
 		start++;
 		if (unlikely(i == tx_ring->bd_count))
-- 
2.25.1


^ permalink raw reply related

* [PATCH 01/10] net/enetc: fix TX BD structure
From: Gagandeep Singh @ 2026-06-19 18:44 UTC (permalink / raw)
  To: dev; +Cc: hemant.agrawal, stable
In-Reply-To: <20260619184427.522518-1-g.singh@nxp.com>

The flags field in struct enetc_tx_bd was declared as uint16_t but
ENETC4 TX BDs only use an 8-bit flags byte. Fix the type to uint8_t
to match the hardware descriptor layout.

Fixes: 696fa399d797 ("net/enetc: add PMD with basic operations")
Cc: stable@dpdk.org

Signed-off-by: Gagandeep Singh <g.singh@nxp.com>
---
 drivers/net/enetc/base/enetc_hw.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/net/enetc/base/enetc_hw.h b/drivers/net/enetc/base/enetc_hw.h
index 173d677..19efadd 100644
--- a/drivers/net/enetc/base/enetc_hw.h
+++ b/drivers/net/enetc/base/enetc_hw.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright 2018-2024 NXP
+ * Copyright 2018-2026 NXP
  */
 
 #ifndef _ENETC_HW_H_
@@ -198,8 +198,7 @@ enum enetc_bdr_type {TX, RX};
 
 #define ENETC_TX_ADDR(txq, addr) ((void *)((txq)->enetc_txbdr + (addr)))
 
-#define ENETC_TXBD_FLAGS_IE		BIT(13)
-#define ENETC_TXBD_FLAGS_F		BIT(15)
+#define ENETC_TXBD_FLAGS_F		BIT(7)
 
 /* ENETC Parsed values (Little Endian) */
 #define ENETC_PARSE_ERROR		0x8000
@@ -262,7 +261,7 @@ struct enetc_tx_bd {
 			uint8_t l3t:1;
 			uint8_t resv:5;
 			uint8_t l4t:3;
-			uint16_t flags;
+			uint8_t flags;
 		};/* default layout */
 		uint32_t txstart;
 		uint32_t lstatus;
-- 
2.25.1


^ permalink raw reply related

* [PATCH 00/10] NXP ENETC driver related changes
From: Gagandeep Singh @ 2026-06-19 18:44 UTC (permalink / raw)
  To: dev; +Cc: hemant.agrawal

ENETC driver related changes series

Gagandeep Singh (8):
  net/enetc: fix TX BD structure
  net/enetc: fix TX BDs flag overwrite issue
  net/enetc: fix queue initialization
  net/enetc: support ESP packet type in packet parsing
  net/enetc: update random MAC generation code
  net/enetc: add option to disable VSI messaging
  net/enetc: add devargs to control VSI-PSI timeout and delay
  net/enetc4: add cacheable BD ring support with SW cache maintenance

Vanshika Shukla (2):
  net/enetc: support scatter-gather
  net/enetc: set user configurable priority to TX rings

 drivers/net/enetc/base/enetc_hw.h |  13 +-
 drivers/net/enetc/enetc.h         |  28 +-
 drivers/net/enetc/enetc4_ethdev.c | 123 +++++++--
 drivers/net/enetc/enetc4_vf.c     | 159 ++++++++++--
 drivers/net/enetc/enetc_ethdev.c  |  26 +-
 drivers/net/enetc/enetc_rxtx.c    | 411 ++++++++++++++++++++++++++----
 6 files changed, 649 insertions(+), 111 deletions(-)

-- 
2.25.1


^ permalink raw reply

* Re: [PATCH v4 00/23] et/sxe2: added Linkdata sxe2 ethernet driver
From: Stephen Hemminger @ 2026-06-19 17:31 UTC (permalink / raw)
  To: liujie5; +Cc: dev
In-Reply-To: <20260619080156.1539964-1-liujie5@linkdatatechnology.com>

On Fri, 19 Jun 2026 16:01:56 +0800
liujie5@linkdatatechnology.com wrote:

> From: Jie Liu <liujie5@linkdatatechnology.com>
> 
> This patch set implements core functionality for the SXE2 PMD,
> including basic driver framework, data path setup, and advanced
> offload features (VLAN, RSS,TM, PTP etc.).
> 
> V19:
>  - remove software statistics devargs
> 
> Jie Liu (23):
>   net/sxe2: remove software statistics devargs
>   net/sxe2: support AVX512 vectorized path for Rx and Tx
>   net/sxe2: add AVX2 vector data path for Rx and Tx
>   net/sxe2: add supported packet types get callback
>   net/sxe2: add link update callback
>   net/sxe2: support L2 filtering and MAC config
>   drivers: support RSS feature
>   net/sxe2: support TM hierarchy and shaping
>   net/sxe2: support IPsec inline protocol offload
>   net/sxe2: support statistics and multi-process
>   drivers: interrupt handling
>   net/sxe2: add NEON vec Rx/Tx burst functions
>   drivers: add support for VF representors
>   net/sxe2: add support for custom UDP tunnel ports
>   net/sxe2: support firmware version reading
>   net/sxe2: implement get monitor address
>   common/sxe2: add shared SFP module definitions
>   net/sxe2: support SFP module info and EEPROM access
>   net/sxe2: implement private dump info
>   net/sxe2: add mbuf validation in Tx debug mode
>   common/sxe2: add callback for memory event handling
>   net/sxe2: add private devargs parsing
>   net/sxe2: update sxe2 feature matrix docs
> 
>  doc/guides/nics/features/sxe2.ini          |   56 +
>  doc/guides/nics/sxe2.rst                   |  164 ++
>  drivers/common/sxe2/sxe2_common.c          |  156 ++
>  drivers/common/sxe2/sxe2_common.h          |    4 +
>  drivers/common/sxe2/sxe2_flow_public.h     |  633 +++++++
>  drivers/common/sxe2/sxe2_ioctl_chnl.c      |  178 +-
>  drivers/common/sxe2/sxe2_ioctl_chnl_func.h |   18 +
>  drivers/common/sxe2/sxe2_msg.h             |  118 ++
>  drivers/net/sxe2/meson.build               |   52 +
>  drivers/net/sxe2/sxe2_cmd_chnl.c           | 1587 +++++++++++++++-
>  drivers/net/sxe2/sxe2_cmd_chnl.h           |  139 ++
>  drivers/net/sxe2/sxe2_drv_cmd.h            |  523 +++++-
>  drivers/net/sxe2/sxe2_dump.c               |  302 +++
>  drivers/net/sxe2/sxe2_dump.h               |   12 +
>  drivers/net/sxe2/sxe2_ethdev.c             | 1513 ++++++++++++++-
>  drivers/net/sxe2/sxe2_ethdev.h             |  112 +-
>  drivers/net/sxe2/sxe2_ethdev_repr.c        |  609 ++++++
>  drivers/net/sxe2/sxe2_ethdev_repr.h        |   32 +
>  drivers/net/sxe2/sxe2_filter.c             |  895 +++++++++
>  drivers/net/sxe2/sxe2_filter.h             |  100 +
>  drivers/net/sxe2/sxe2_flow.c               | 1394 ++++++++++++++
>  drivers/net/sxe2/sxe2_flow.h               |   30 +
>  drivers/net/sxe2/sxe2_flow_define.h        |  144 ++
>  drivers/net/sxe2/sxe2_flow_parse_action.c  | 1182 ++++++++++++
>  drivers/net/sxe2/sxe2_flow_parse_action.h  |   23 +
>  drivers/net/sxe2/sxe2_flow_parse_engine.c  |  106 ++
>  drivers/net/sxe2/sxe2_flow_parse_engine.h  |   13 +
>  drivers/net/sxe2/sxe2_flow_parse_pattern.c | 1935 +++++++++++++++++++
>  drivers/net/sxe2/sxe2_flow_parse_pattern.h |   46 +
>  drivers/net/sxe2/sxe2_ipsec.c              | 1565 ++++++++++++++++
>  drivers/net/sxe2/sxe2_ipsec.h              |  254 +++
>  drivers/net/sxe2/sxe2_irq.c                | 1026 ++++++++++
>  drivers/net/sxe2/sxe2_irq.h                |   25 +
>  drivers/net/sxe2/sxe2_mac.c                |  530 ++++++
>  drivers/net/sxe2/sxe2_mac.h                |   84 +
>  drivers/net/sxe2/sxe2_mp.c                 |  414 ++++
>  drivers/net/sxe2/sxe2_mp.h                 |   67 +
>  drivers/net/sxe2/sxe2_queue.c              |   17 +-
>  drivers/net/sxe2/sxe2_queue.h              |   15 +-
>  drivers/net/sxe2/sxe2_rss.c                |  584 ++++++
>  drivers/net/sxe2/sxe2_rss.h                |   81 +
>  drivers/net/sxe2/sxe2_rx.c                 |   93 +-
>  drivers/net/sxe2/sxe2_rx.h                 |    2 +
>  drivers/net/sxe2/sxe2_security.c           |  335 ++++
>  drivers/net/sxe2/sxe2_security.h           |   77 +
>  drivers/net/sxe2/sxe2_stats.c              |  586 ++++++
>  drivers/net/sxe2/sxe2_stats.h              |   39 +
>  drivers/net/sxe2/sxe2_switchdev.c          |  332 ++++
>  drivers/net/sxe2/sxe2_switchdev.h          |   33 +
>  drivers/net/sxe2/sxe2_tm.c                 | 1151 ++++++++++++
>  drivers/net/sxe2/sxe2_tm.h                 |   76 +
>  drivers/net/sxe2/sxe2_tx.c                 |    7 +
>  drivers/net/sxe2/sxe2_txrx.c               | 1968 +++++++++++++++++++-
>  drivers/net/sxe2/sxe2_txrx.h               |    8 +
>  drivers/net/sxe2/sxe2_txrx_check_mbuf.c    |  595 ++++++
>  drivers/net/sxe2/sxe2_txrx_check_mbuf.h    |   38 +
>  drivers/net/sxe2/sxe2_txrx_poll.c          |  281 ++-
>  drivers/net/sxe2/sxe2_txrx_vec.c           |   46 +-
>  drivers/net/sxe2/sxe2_txrx_vec.h           |   38 +-
>  drivers/net/sxe2/sxe2_txrx_vec_avx2.c      |  748 ++++++++
>  drivers/net/sxe2/sxe2_txrx_vec_avx512.c    |  868 +++++++++
>  drivers/net/sxe2/sxe2_txrx_vec_common.h    |   53 +-
>  drivers/net/sxe2/sxe2_txrx_vec_neon.c      |  691 +++++++
>  drivers/net/sxe2/sxe2_txrx_vec_sse.c       |   29 +-
>  drivers/net/sxe2/sxe2_vsi.c                |  146 ++
>  drivers/net/sxe2/sxe2_vsi.h                |   12 +-
>  drivers/net/sxe2/sxe2vf_regs.h             |   85 +
>  67 files changed, 24809 insertions(+), 266 deletions(-)
>  create mode 100644 drivers/common/sxe2/sxe2_flow_public.h
>  create mode 100644 drivers/common/sxe2/sxe2_msg.h
>  create mode 100644 drivers/net/sxe2/sxe2_dump.c
>  create mode 100644 drivers/net/sxe2/sxe2_dump.h
>  create mode 100644 drivers/net/sxe2/sxe2_ethdev_repr.c
>  create mode 100644 drivers/net/sxe2/sxe2_ethdev_repr.h
>  create mode 100644 drivers/net/sxe2/sxe2_filter.c
>  create mode 100644 drivers/net/sxe2/sxe2_filter.h
>  create mode 100644 drivers/net/sxe2/sxe2_flow.c
>  create mode 100644 drivers/net/sxe2/sxe2_flow.h
>  create mode 100644 drivers/net/sxe2/sxe2_flow_define.h
>  create mode 100644 drivers/net/sxe2/sxe2_flow_parse_action.c
>  create mode 100644 drivers/net/sxe2/sxe2_flow_parse_action.h
>  create mode 100644 drivers/net/sxe2/sxe2_flow_parse_engine.c
>  create mode 100644 drivers/net/sxe2/sxe2_flow_parse_engine.h
>  create mode 100644 drivers/net/sxe2/sxe2_flow_parse_pattern.c
>  create mode 100644 drivers/net/sxe2/sxe2_flow_parse_pattern.h
>  create mode 100644 drivers/net/sxe2/sxe2_ipsec.c
>  create mode 100644 drivers/net/sxe2/sxe2_ipsec.h
>  create mode 100644 drivers/net/sxe2/sxe2_irq.c
>  create mode 100644 drivers/net/sxe2/sxe2_mac.c
>  create mode 100644 drivers/net/sxe2/sxe2_mac.h
>  create mode 100644 drivers/net/sxe2/sxe2_mp.c
>  create mode 100644 drivers/net/sxe2/sxe2_mp.h
>  create mode 100644 drivers/net/sxe2/sxe2_rss.c
>  create mode 100644 drivers/net/sxe2/sxe2_rss.h
>  create mode 100644 drivers/net/sxe2/sxe2_security.c
>  create mode 100644 drivers/net/sxe2/sxe2_security.h
>  create mode 100644 drivers/net/sxe2/sxe2_stats.c
>  create mode 100644 drivers/net/sxe2/sxe2_stats.h
>  create mode 100644 drivers/net/sxe2/sxe2_switchdev.c
>  create mode 100644 drivers/net/sxe2/sxe2_switchdev.h
>  create mode 100644 drivers/net/sxe2/sxe2_tm.c
>  create mode 100644 drivers/net/sxe2/sxe2_tm.h
>  create mode 100644 drivers/net/sxe2/sxe2_txrx_check_mbuf.c
>  create mode 100644 drivers/net/sxe2/sxe2_txrx_check_mbuf.h
>  create mode 100644 drivers/net/sxe2/sxe2_txrx_vec_avx2.c
>  create mode 100644 drivers/net/sxe2/sxe2_txrx_vec_avx512.c
>  create mode 100644 drivers/net/sxe2/sxe2_txrx_vec_neon.c
>  create mode 100644 drivers/net/sxe2/sxe2vf_regs.h
> 

This is look much better, there are a few minor things that you probably
want to address before I merge it.

The (overly verbose) AI feedback is...

[PATCH v4 00/23] sxe2 driver feature additions

This is in good shape. Substantive structural progress on essentially
everything I raised against v3.

Verified across the assembled tree:

- All 23 commits build cleanly end-to-end. git bisect now works. This is
  the first revision of the series where that's been true.
- No LLM citation placeholders remain in commit messages. The v3 19/20
  message with "[citation:1][citation:3][citation:5]" markers and the
  "approximately X%" placeholder are both gone.
- The atomic-sw-stats fix is properly placed. 01/23 is a clean standalone
  cleanup commit that removes RTE_ATOMIC qualifiers from
  sxe2_rxq_sw_stats, replaces the atomic load/store/fetch_add calls with
  plain operations, removes the if(sw_stats_en) gating, removes the now-
  unused #include <rte_stdatomic.h>, and renames high_performance_mode to
  no_sched_mode to match the devargs string. Verified zero atomic
  operations on sw_stats remain in the assembled tree.
- drv-sw-stats devarg removed entirely (defined, parsed, but unused in
  v3 - now gone).
- All surviving devargs are documented in doc/guides/nics/sxe2.rst with
  substantive explanations covering what each parameter does, valid
  values, defaults, and trade-offs.
- The v3 19/20 patch is split into 21/23 (memseg-walk callback
  infrastructure, common/sxe2 only) and 22/23 (devargs parsing,
  net/sxe2). Both commit messages now describe one thing each.
- The 469-entry runtime ptype-table initialiser is now a file-scope
  `static const alignas(RTE_CACHE_LINE_SIZE) uint32_t
  sxe2_ptype_tbl[]` with C99 designated initialisers.
- Patch 02/23 (AVX512) scope is tightened - dropped from 13 files to 6,
  and the files it touches are all AVX512-related now.
- The v3 03/20 patch is split into 04/23 ("supported packet types") and
  05/23 ("link update callback"), addressing the scope-drift complaint.

Three remaining items, none blocking:

[PATCH v4 04/23] subject still does not match content

The commit message says the patch adds `dev_supported_ptypes_get`, and
the patch adds that callback - but it also creates the entire 1793-line
drivers/net/sxe2/sxe2_txrx.c with the Tx/Rx framework, packet-type
constant table, classification helpers, etc. The ptype callback is a
small piece of what this patch does. Either rename the subject to
something like "net/sxe2: add Rx/Tx framework and packet types callback"
(more honest) or split the txrx framework into a separate prior commit
with ptype-callback registration as a small follow-up.

[PATCH v4 04/23] ptype table refactor is incomplete

The static const table is correct, but adapter->ptype_tbl is still
declared in struct sxe2_adapter and sxe2_init_ptype_tbl() now just
memcpy's the const table into the per-adapter copy at init. The vec
paths in sxe2_txrx_vec_avx2.c, _avx512.c, _sse.c and the poll path all
read through rxq->vsi->adapter->ptype_tbl[] rather than the file-scope
const. To finish: remove the adapter field, remove sxe2_init_ptype_tbl,
and have all readers reference sxe2_ptype_tbl directly. The inner-loop
saves one indirection per packet, and per-port memory drops by
SXE2_MAX_PTYPE_NUM * 4 bytes.

[PATCH v4 22/23] flow-duplicate-pattern still defaults to 1

This devarg now has good documentation, but the documentation
clarifies the design objection rather than resolving it: a boolean
that toggles "duplicate rte_flow rules are rejected with EEXIST" vs
"duplicate rte_flow rules are accepted" is a per-boot toggle for
standard-API contract semantics. Standard APIs shouldn't behave
differently based on a vendor devarg. Pick one policy (rejecting
duplicates with EEXIST is what every other PMD does), apply it
unconditionally, and remove the devarg. The
switch_pattern_dup_allow rule metadata can stay if hardware needs it
internally - just don't expose the policy as a boot-time knob.

The other surviving devargs are acceptable as posted:
- no-sched-mode: kernel-coexistence rationale documented, defensible.
- rx-low-latency: ITR throttling threshold, well-documented trade-off,
  precedent in other PMDs.
- function-flow-direct: DPDK/kernel flow-table coexistence policy with
  no rte_flow analogue. The documentation explains this clearly.
- fnav-stat-type: hardware counter-mode selection. The cleaner long-
  term shape would be separate xstats names, but the current form is
  documented and reasonable for now.
- sched-layer-mode: hardware-imposed TM hierarchy cap. Should ideally
  be exposed via rte_tm_capabilities_get and selected at hierarchy
  build time rather than via devarg; worth raising as a future rte_tm
  enhancement.

Minor cosmetic:

In sxe2_parse_no_sched_mode() (22/23) the local variable is still
named `high_performance_mode`. The struct field rename in 01/23
didn't propagate to this parser local. Cosmetic.

Once 22/23 drops flow-duplicate-pattern and 04/23's subject is
either renamed or split, I'd consider this ready.


^ permalink raw reply

* Re: [PATCH v3 00/18] net/dpaa: bug fixes for bus, net and fmlib drivers
From: Stephen Hemminger @ 2026-06-19 17:28 UTC (permalink / raw)
  To: Hemant Agrawal; +Cc: david.marchand, dev
In-Reply-To: <20260619103901.2274740-1-hemant.agrawal@nxp.com>

On Fri, 19 Jun 2026 16:08:43 +0530
Hemant Agrawal <hemant.agrawal@nxp.com> wrote:

> This series contains bug fixes for the DPAA PMD (bus/dpaa, net/dpaa,
> net/dpaa/fmlib and dma/dpaa).
> 
> v3 changes (AI code review feedback):
> - P05: Clarify commit message: p_dev == NULL is equivalent to h_scheme == NULL
>   since p_dev = (t_device *)h_scheme; consistent with all sibling functions
> - P16: Add comment explaining the intentional loop continuation; clarify
>   commit message about the loop design
> - P17: Add DPAA_DP_LOG(WARNING) before silent return on l3_len == 0 to
>   aid debugging of corrupt/uninitialized mbufs
> 
> v2 changes:
> - P05: Fix commit message API name
> - P08: Guard DPAA_PUSH_QUEUES_NUMBER env-var for LS1043A (errata)
> - P09: Document dpaa_finish() removal
> - P10: Fix wrong Fixes: tag
> - P11: Split into two patches with correct Fixes: tags
> - P13: Also fix rx_buf_diallocate -> rx_buf_deallocate
> 
> All patches are bug fixes tagged with Fixes: and Cc: stable@dpdk.org.
> 
> Gagandeep Singh (3):
>   bus/dpaa: fix device probe issue
>   net/dpaa: fix device remove
>   net/dpaa: fix invalid check on interrupt unregister
> 
> Hemant Agrawal (11):
>   bus/dpaa: fix error handling of qman_create_fq
>   bus/dpaa: fix fqid endianness
>   bus/dpaa: fix error handling in qman_query
>   net/dpaa: fix modify cgr to use index
>   bus/dpaa: fix fd leak for ccsr mmap
>   net/dpaa: fix xstat name for tx undersized counter
>   net/dpaa: fix xstat string typos in BMI stats table
>   net/dpaa: remove duplicate ptype entries
>   net/dpaa: fix wrong buffer in xstats get by id
>   net/dpaa: fix null l3_len check in checksum offload
>   net/dpaa: fix mbuf leak in SG fd creation
> 
> Jun Yang (1):
>   bus/dpaa: fix BMI RX stats register offset
> 
> Prashant Gupta (1):
>   net/dpaa/fmlib: add null check in scheme delete
> 
> Vanshika Shukla (2):
>   net/dpaa: fix port_handle leak in fm_prev_cleanup
>   dma/dpaa: fix out-of-bounds access in SG descriptor enqueue
> 
>  drivers/bus/dpaa/base/qbman/bman_driver.c |  3 ++-
>  drivers/bus/dpaa/base/qbman/qman.c        | 11 ++++++---
>  drivers/bus/dpaa/base/qbman/qman_driver.c |  6 ++---
>  drivers/bus/dpaa/dpaa_bus.c               | 17 ++++++-------
>  drivers/bus/dpaa/include/fman.h           |  6 ++---
>  drivers/dma/dpaa/dpaa_qdma.c              |  7 +++++-
>  drivers/net/dpaa/dpaa_ethdev.c            | 30 +++++++++++------------
>  drivers/net/dpaa/dpaa_flow.c              |  4 +++
>  drivers/net/dpaa/dpaa_rxtx.c              |  5 ++++
>  drivers/net/dpaa/fmlib/fm_lib.c           |  3 +++
>  10 files changed, 56 insertions(+), 36 deletions(-)
> 

Applied to next-net with some minor changes to commit message to fix capitalization complaints from check-git-log

^ permalink raw reply

* Re: [PATCH v2 0/4] net/bond: fixes and cleanup
From: Stephen Hemminger @ 2026-06-19 17:22 UTC (permalink / raw)
  To: dev
In-Reply-To: <20260529000157.235931-1-stephen@networkplumber.org>

On Thu, 28 May 2026 16:59:12 -0700
Stephen Hemminger <stephen@networkplumber.org> wrote:

> Automated analysis of the bonding found a few minor things.
> The bug fix is in patch 3 for secondary process crash does rx/tx. 
> 
> The cleanups are in handling of 8023ad mode setting
> and the logging macros.
> 
> v2 - feedback about the mode setting and log messages
> 
> Stephen Hemminger (4):
>   net/bonding: make 8023ad enable function void
>   net/bonding: check mode before setting dedicated queues
>   net/bonding: prevent crash on Rx/Tx from secondary process
>   net/bonding: remove redundant function names from log
> 
>  drivers/net/bonding/eth_bond_8023ad_private.h | 17 +----
>  drivers/net/bonding/rte_eth_bond_8023ad.c     | 16 ++--
>  drivers/net/bonding/rte_eth_bond_api.c        |  4 +-
>  drivers/net/bonding/rte_eth_bond_pmd.c        | 73 ++++++++++++++-----
>  4 files changed, 67 insertions(+), 43 deletions(-)
> 

Applied to net-next, took Bruce's suggestion to split the first patch.

^ permalink raw reply

* Re: [PATCH 2/6] ip_frag: discard datagrams with overlapping fragments
From: Stephen Hemminger @ 2026-06-19 17:01 UTC (permalink / raw)
  To: Morten Brørup; +Cc: dev, stable, Konstantin Ananyev
In-Reply-To: <98CBD80474FA8B44BF855DF32C47DC35F6592A@smartserver.smartshare.dk>

On Fri, 19 Jun 2026 15:12:21 +0200
Morten Brørup <mb@smartsharesystems.com> wrote:

> > +		/*
> > +		 * Overlap with an existing fragment. Per RFC 8200 section
> > 4.5
> > +		 * (and RFC 5722) the datagram must be discarded; the same
> > is
> > +		 * applied to IPv4. Free all collected fragments, drop this
> > one,
> > +		 * and invalidate the entry.
> > +		 */
> > +		if (ofs < fp->frags[i].ofs + fp->frags[i].len &&
> > +				fp->frags[i].ofs < ofs + len) {  
> 
> This only catches fragments that are smaller than existing fragments, i.e. fit within one of the existing fragments.
> It should be:
> if ((ofs >= fp->frags[i].ofs &&
> 		ofs < fp->frags[i].ofs + fp->frags[i].len) ||
> 		(ofs + len >= fp->frags[i].ofs &&
> 		ofs + len < fp->frags[i].ofs + fp->frags[i].len)) {
> 
> > +			ip_frag_free(fp, dr);

The code here is comparing an incoming fragment N against existing fragment E,
using half-open ranges [start, end).

The test in the patch is symmetric in N and E.
       ofs < e.ofs + e.len && e.ofs < ofs + len

The one you propose tests that either endpoint of N lands inside E.

Take a fixed stored fragment E = [200, 400) and run several incoming fragments through both.
 N0 = ofs, N1 = ofs+len.

N inside E: N = [250, 300)

E:        |=========|        (200..400)
N:           |===|           (250..300)

Patch: 250 < 400 && 200 < 300 → T && T → overlap. 
Proposed: (250≥200 && 250<400) → T → overlap. 
Both agree.

N encloses E: N = [100, 500)

E:        |=========|        (200..400)
N:      |=============|      (100..500)

Patch: 100 < 400 && 200 < 500 → T && T → overlap.
Proposed: (100≥200 && …) → F, (500≥200 && 500<400) → T && F → F, so F || F → no overlap, MISSED.

This is the case the new version version drops. Neither endpoint of N (100 or 500) sits inside [200,400), 
because N straddles E completely, so new version endpoint-in-E check fails even though the ranges clearly overlap. 
Patch version catches it because the interval test doesn't care which range is larger.

N partial on the left: N = [100, 300)

E:        |=========|        (200..400)
N:      |======|             (100..300)

Patch: 100 < 400 && 200 < 300 → T → overlap.
Proposed: (300≥200 && 300<400) → T → overlap. 
Agree.

N partial on the right: N = [300, 500) — symmetric to the above, both catch it.

So on the four genuine-overlap geometries, your suggestion catches all four and his misses the enclosing one. 
That is not right since the enclosing overlap is a legitimate attack shape (a big fragment overwriting a smaller stored one).

There is another issue.
The >= on the exclusive end produces a false positive on fragments that merely abut, which is the normal case.
Take E already stored as [1400, 2800) and an in-order-but-late fragment N = [0, 1400) arriving after it (ordinary out-of-order delivery):

N:      |======|             (0..1400)
E:             |======|      (1400..2800)

These share no bytes; byte 1400 belongs only to E. 
Patch: 0 < 2800 && 1400 < 1400 → T && F → no overlap, correct. 
Proposed: (1400≥1400 && 1400<2800) → T && T → overlap, wrong. 
This test would discard a perfectly valid datagram whenever a left-abutting fragment arrives after its neighbor.
Adjacent fragments abutting is what fragmentation produces by design, so this would fire constantly under reordering.

Bottom line: the patch was correct as far as I can tell.

^ permalink raw reply

* [PATCH v2] net/bnxt: avoid link flap on flow control set
From: Mohammad Shuab Siddique @ 2026-06-19 16:31 UTC (permalink / raw)
  To: dev; +Cc: kishore.padmanabha, stable, Chenna Arnoori,
	Mohammad Shuab Siddique
In-Reply-To: <20260616232300.359253-1-mohammad-shuab.siddique@broadcom.com>

From: Chenna Arnoori <chenna.arnoori@broadcom.com>

When OVS-DPDK reconciles port state, it calls flow_ctrl_get followed
by flow_ctrl_set if the returned configuration differs from its
database. The driver was reporting autoneg=1 whenever the firmware
had auto_pause set (including the AUTONEG_PAUSE bit 0x4), even
though no pause autoneg was actually requested. This mismatch
caused OVS to repeatedly call flow_ctrl_set, which triggered a
full link reconfig with PHY reset, flapping the link every time
any interface change occurred on the system.

Two problems were fixed. First, flow_ctrl_set was clearing all
autoneg bits instead of only the flow-control autoneg bit,
which also wiped the speed autoneg state. Second, the pause
set path was abandoning its own HWRM request and calling the
full link config function, which built a separate request
without the pause fields that set_pause_common would have
computed.

Port the kernel bnxt_en driver's approach: add a
set_link_common helper that merges link and speed fields into
an existing HWRM request, and rework set_pause to build a
single combined request with both pause and link fields when
a full reconfig is needed. When only pause changes without an
auto-to-force transition, a pause-only PHY config is sent
without a full link reprogram.

Fixes: 8aaf473bbed6 ("net/bnxt: add flow control operations")
Cc: stable@dpdk.org
Signed-off-by: Chenna Arnoori <chenna.arnoori@broadcom.com>
Signed-off-by: Mohammad Shuab Siddique <mohammad-shuab.siddique@broadcom.com>
---
 doc/guides/rel_notes/release_26_07.rst |  25 ++++
 drivers/net/bnxt/bnxt.h                |  10 ++
 drivers/net/bnxt/bnxt_ethdev.c         |  12 +-
 drivers/net/bnxt/bnxt_hwrm.c           | 151 +++++++++++++++++++++++++
 drivers/net/bnxt/bnxt_hwrm.h           |   1 +
 5 files changed, 197 insertions(+), 2 deletions(-)

diff --git a/doc/guides/rel_notes/release_26_07.rst b/doc/guides/rel_notes/release_26_07.rst
index b5285af5fe..ee80a37055 100644
--- a/doc/guides/rel_notes/release_26_07.rst
+++ b/doc/guides/rel_notes/release_26_07.rst
@@ -209,6 +209,31 @@ ABI Changes
 * No ABI change that would break compatibility with 25.11.
 
 
+Bug Fixes and Other Changes
+---------------------------
+
+.. This section should contain bug fixes added to the relevant
+   stable branch. Sample format:
+
+   * **code/area: Fixed issue in <component>.**
+
+     Fixed a specific issue with the following impact, caused by the following
+     action, and the resolution.
+
+   This section is a comment. Do not overwrite or remove it.
+   Also, make sure to start the actual text at the margin.
+   =======================================================
+
+* **net/bnxt: Fixed link flapping on flow control configuration.**
+
+  Fixed an issue where setting flow control parameters via
+  ``rte_eth_dev_flow_ctrl_set()`` triggered an unconditional PHY reset,
+  causing repeated link flaps in environments such as OVS-DPDK that
+  periodically reconcile port state. The fix ports the kernel bnxt_en
+  approach of building a combined pause+link HWRM request rather than
+  delegating to the full link reconfiguration path.
+
+
 Known Issues
 ------------
 
diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index 7515f0564f..0ea18fb134 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -88,6 +88,8 @@
 #define HWRM_VERSION_1_9_1		0x10901
 #define HWRM_VERSION_1_9_2		0x10903
 #define HWRM_VERSION_1_10_2_13		0x10a020d
+/* Minimum spec version that supports AUTONEG_PAUSE bit in auto_pause field */
+#define HWRM_SPEC_CODE_AUTONEG_PAUSE	0x10201
 
 #define BNXT_MAX_MTU		9574
 #define BNXT_NUM_VLANS		2
@@ -333,6 +335,14 @@ struct bnxt_link_info {
 	uint8_t                 active_lanes;
 	uint8_t			option_flags;
 	uint16_t                pmd_speed_lanes;
+	/* Bitmask tracking which autoneg modes are active */
+	uint8_t			autoneg;
+#define BNXT_AUTONEG_SPEED		1 /* speed autoneg enabled */
+#define BNXT_AUTONEG_FLOW_CTRL		2 /* pause/flow-ctrl autoneg enabled */
+	/* True after autoneg->forced FC transition; cleared once set_pause
+	 * sends the combined pause+link HWRM request successfully.
+	 */
+	bool			link_reconfig_needed;
 };
 
 #define BNXT_COS_QUEUE_COUNT	8
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index 1b8cf3a52a..467b551bd9 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -2565,7 +2565,7 @@ static int bnxt_flow_ctrl_get_op(struct rte_eth_dev *dev,
 		return rc;
 
 	memset(fc_conf, 0, sizeof(*fc_conf));
-	if (bp->link_info->auto_pause)
+	if (bp->link_info->autoneg & BNXT_AUTONEG_FLOW_CTRL)
 		fc_conf->autoneg = 1;
 	switch (bp->link_info->pause) {
 	case 0:
@@ -2601,6 +2601,14 @@ static int bnxt_flow_ctrl_set_op(struct rte_eth_dev *dev,
 		return -ENOTSUP;
 	}
 
+	if (fc_conf->autoneg) {
+		bp->link_info->autoneg |= BNXT_AUTONEG_FLOW_CTRL;
+	} else {
+		if (bp->link_info->autoneg & BNXT_AUTONEG_FLOW_CTRL)
+			bp->link_info->link_reconfig_needed = true;
+		bp->link_info->autoneg &= ~BNXT_AUTONEG_FLOW_CTRL;
+	}
+
 	switch (fc_conf->mode) {
 	case RTE_ETH_FC_NONE:
 		bp->link_info->auto_pause = 0;
@@ -2642,7 +2650,7 @@ static int bnxt_flow_ctrl_set_op(struct rte_eth_dev *dev,
 		}
 		break;
 	}
-	return bnxt_set_hwrm_link_config(bp, true);
+	return bnxt_hwrm_set_pause(bp);
 }
 
 /* Add UDP tunneling port */
diff --git a/drivers/net/bnxt/bnxt_hwrm.c b/drivers/net/bnxt/bnxt_hwrm.c
index 0c82935de9..a0983183c0 100644
--- a/drivers/net/bnxt/bnxt_hwrm.c
+++ b/drivers/net/bnxt/bnxt_hwrm.c
@@ -1876,6 +1876,22 @@ static int bnxt_hwrm_port_phy_qcfg(struct bnxt *bp,
 	link_info->auto_pause = resp->auto_pause;
 	link_info->force_pause = resp->force_pause;
 	link_info->auto_mode = resp->auto_mode;
+	link_info->autoneg = 0;
+
+	if (link_info->auto_mode != HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_MODE_NONE) {
+		link_info->autoneg = BNXT_AUTONEG_SPEED;
+		if (bp->hwrm_spec_code >= HWRM_SPEC_CODE_AUTONEG_PAUSE) {
+			if (link_info->auto_pause &
+			    HWRM_PORT_PHY_CFG_INPUT_AUTO_PAUSE_AUTONEG_PAUSE)
+				link_info->autoneg |=
+					BNXT_AUTONEG_FLOW_CTRL;
+		} else {
+			link_info->autoneg |= BNXT_AUTONEG_FLOW_CTRL;
+		}
+	} else {
+		link_info->autoneg = 0;
+	}
+
 	link_info->phy_type = resp->phy_type;
 	link_info->media_type = resp->media_type;
 
@@ -4048,6 +4064,141 @@ static int bnxt_hwrm_port_phy_cfg_v2(struct bnxt *bp, struct bnxt_link_info *con
 	return rc;
 }
 
+static void
+bnxt_hwrm_set_pause_common(struct bnxt *bp,
+			   struct hwrm_port_phy_cfg_input *req)
+{
+	struct bnxt_link_info *link_info = bp->link_info;
+
+	if (link_info->autoneg & BNXT_AUTONEG_FLOW_CTRL) {
+		if (bp->hwrm_spec_code >= HWRM_SPEC_CODE_AUTONEG_PAUSE)
+			req->auto_pause =
+			    HWRM_PORT_PHY_CFG_INPUT_AUTO_PAUSE_AUTONEG_PAUSE;
+		if (link_info->auto_pause &
+		    HWRM_PORT_PHY_CFG_INPUT_AUTO_PAUSE_RX)
+			req->auto_pause |=
+			    HWRM_PORT_PHY_CFG_INPUT_AUTO_PAUSE_RX;
+		if (link_info->auto_pause &
+		    HWRM_PORT_PHY_CFG_INPUT_AUTO_PAUSE_TX)
+			req->auto_pause |=
+			    HWRM_PORT_PHY_CFG_INPUT_AUTO_PAUSE_TX;
+		req->enables |=
+			rte_cpu_to_le_32(HWRM_PORT_PHY_CFG_INPUT_ENABLES_AUTO_PAUSE);
+	} else {
+		if (link_info->force_pause &
+		    HWRM_PORT_PHY_CFG_INPUT_FORCE_PAUSE_RX)
+			req->force_pause |=
+			    HWRM_PORT_PHY_CFG_INPUT_FORCE_PAUSE_RX;
+		if (link_info->force_pause &
+		    HWRM_PORT_PHY_CFG_INPUT_FORCE_PAUSE_TX)
+			req->force_pause |=
+			    HWRM_PORT_PHY_CFG_INPUT_FORCE_PAUSE_TX;
+		req->enables |=
+			rte_cpu_to_le_32(HWRM_PORT_PHY_CFG_INPUT_ENABLES_FORCE_PAUSE);
+		if (bp->hwrm_spec_code >= HWRM_SPEC_CODE_AUTONEG_PAUSE) {
+			req->auto_pause = req->force_pause;
+			req->enables |=
+				rte_cpu_to_le_32(HWRM_PORT_PHY_CFG_INPUT_ENABLES_AUTO_PAUSE);
+		}
+	}
+}
+
+static void
+bnxt_hwrm_set_link_common(struct bnxt *bp, struct hwrm_port_phy_cfg_input *req)
+{
+	struct rte_eth_conf *dev_conf = &bp->eth_dev->data->dev_conf;
+	uint16_t autoneg, speed;
+
+	autoneg = bnxt_check_eth_link_autoneg(dev_conf->link_speeds);
+
+	if (BNXT_CHIP_P5(bp) &&
+	    dev_conf->link_speeds & RTE_ETH_LINK_SPEED_40G)
+		autoneg = 0;
+
+	if (autoneg == 1 && BNXT_CHIP_P5(bp) &&
+	    bp->link_info->auto_mode == 0 &&
+	    bp->link_info->force_pam4_link_speed ==
+	    HWRM_PORT_PHY_CFG_INPUT_FORCE_PAM4_LINK_SPEED_200GB)
+		autoneg = 0;
+
+	speed = bnxt_parse_eth_link_speed(bp, dev_conf->link_speeds,
+					  bp->link_info);
+	req->flags |=
+		rte_cpu_to_le_32(HWRM_PORT_PHY_CFG_INPUT_FLAGS_RESET_PHY);
+
+	if (autoneg == 1 &&
+	    (bp->link_info->support_auto_speeds ||
+	     bp->link_info->support_pam4_auto_speeds)) {
+		uint16_t spd_mask;
+		uint32_t en;
+
+		req->flags |=
+			rte_cpu_to_le_32(HWRM_PORT_PHY_CFG_INPUT_FLAGS_RESTART_AUTONEG);
+		spd_mask = bnxt_parse_eth_link_speed_mask(bp,
+							  dev_conf->link_speeds);
+		req->auto_link_speed_mask = rte_cpu_to_le_16(spd_mask);
+		req->auto_link_pam4_speed_mask =
+			rte_cpu_to_le_16(bp->link_info->auto_pam4_link_speed_mask);
+		en = HWRM_PORT_PHY_CFG_IN_EN_AUTO_LINK_SPEED_MASK |
+		     HWRM_PORT_PHY_CFG_IN_EN_AUTO_PAM4_LINK_SPD_MASK |
+		     HWRM_PORT_PHY_CFG_INPUT_ENABLES_AUTO_MODE;
+		req->enables |= rte_cpu_to_le_32(en);
+		req->auto_mode =
+			HWRM_PORT_PHY_CFG_INPUT_AUTO_MODE_SPEED_MASK;
+	} else {
+		uint32_t en;
+
+		req->flags |=
+			rte_cpu_to_le_32(HWRM_PORT_PHY_CFG_INPUT_FLAGS_FORCE);
+		if (speed) {
+			if (bp->link_info->link_signal_mode) {
+				req->force_pam4_link_speed =
+					rte_cpu_to_le_16(speed);
+				en = HWRM_PORT_PHY_CFG_IN_EN_FORCE_PAM4_LINK_SPEED;
+				req->enables |= rte_cpu_to_le_32(en);
+			} else {
+				req->force_link_speed =
+					rte_cpu_to_le_16(speed);
+			}
+		}
+	}
+	req->auto_duplex =
+		bnxt_parse_eth_link_duplex(dev_conf->link_speeds);
+	req->enables |=
+		rte_cpu_to_le_32(HWRM_PORT_PHY_CFG_INPUT_ENABLES_AUTO_DUPLEX);
+}
+
+int bnxt_hwrm_set_pause(struct bnxt *bp)
+{
+	struct hwrm_port_phy_cfg_input req = {0};
+	struct hwrm_port_phy_cfg_output *resp = bp->hwrm_cmd_resp_addr;
+	struct bnxt_link_info *link_info = bp->link_info;
+	int rc;
+
+	HWRM_PREP(&req, HWRM_PORT_PHY_CFG, BNXT_USE_CHIMP_MB);
+
+	bnxt_hwrm_set_pause_common(bp, &req);
+
+	if ((link_info->autoneg & BNXT_AUTONEG_FLOW_CTRL) ||
+	    link_info->link_reconfig_needed)
+		bnxt_hwrm_set_link_common(bp, &req);
+
+	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
+
+	HWRM_CHECK_RESULT();
+
+	if (!rc) {
+		if (!(link_info->autoneg & BNXT_AUTONEG_FLOW_CTRL)) {
+			link_info->pause = link_info->force_pause;
+			link_info->auto_pause = 0;
+		}
+		link_info->link_reconfig_needed = false;
+	}
+
+	HWRM_UNLOCK();
+	return rc;
+}
+
 static int bnxt_set_hwrm_link_config_v2(struct bnxt *bp, bool link_up)
 {
 	struct rte_eth_conf *dev_conf = &bp->eth_dev->data->dev_conf;
diff --git a/drivers/net/bnxt/bnxt_hwrm.h b/drivers/net/bnxt/bnxt_hwrm.h
index fc56223ab4..3034803023 100644
--- a/drivers/net/bnxt/bnxt_hwrm.h
+++ b/drivers/net/bnxt/bnxt_hwrm.h
@@ -261,6 +261,7 @@ void bnxt_free_hwrm_rx_ring(struct bnxt *bp, int queue_index);
 int bnxt_alloc_hwrm_resources(struct bnxt *bp);
 int bnxt_get_hwrm_link_config(struct bnxt *bp, struct rte_eth_link *link);
 int bnxt_set_hwrm_link_config(struct bnxt *bp, bool link_up);
+int bnxt_hwrm_set_pause(struct bnxt *bp);
 int bnxt_hwrm_func_qcfg(struct bnxt *bp, uint16_t *mtu);
 int bnxt_hwrm_func_resc_qcaps(struct bnxt *bp);
 int bnxt_hwrm_func_reserve_vf_resc(struct bnxt *bp, bool test);
-- 
2.47.3


^ permalink raw reply related

* Re: [PATCH v1 0/5] prefix lcore role enum values
From: Stephen Hemminger @ 2026-06-19 15:39 UTC (permalink / raw)
  To: Morten Brørup
  Cc: Thomas Monjalon, Huisong Li, andrew.rybchenko, dev, zhanjie9
In-Reply-To: <98CBD80474FA8B44BF855DF32C47DC35F65929@smartserver.smartshare.dk>

On Fri, 19 Jun 2026 09:54:51 +0200
Morten Brørup <mb@smartsharesystems.com> wrote:

> > > The problem with this patch it causes build failures now with abi  
> > diff.
> > 
> > It is probably a bug of an old version of abidiff.
> > I recommend updating.  
> 
> With the #define's the ABI has not changed. It's probably too indirect for abidiff to understand.
> If we absolutely want to please abidiff, we could keep the existing enums and #define RTE_LCORE_ROLE_RTE ROLE_RTE for now.
> But I'm in favor of what was done already.

The build failures on github, not in my local builds.
https://github.com/ovsrobot/dpdk/actions/runs/27789889172/job/82235965090

It makes looking at patchwork dashboard difficult, all patches show up with red mark

^ permalink raw reply

* [v2] crypto/qat: require IPsec MB for HMAC precomputes
From: Emma Finn @ 2026-06-19 13:53 UTC (permalink / raw)
  To: Kai Ji; +Cc: dev, Emma Finn
In-Reply-To: <20260611085237.1459286-1-emma.finn@intel.com>

IPsec MB library (v1.4.0+) is now required for HMAC precomputes as
OpenSSL 3.0 removed SHA*_Transform APIs. OpenSSL remains optional
for DOCSIS BPI cipher fallback via EVP API.

On x86: IPsec MB required, OpenSSL optional (DOCSIS fallback)
On ARM: IPsec MB required, OpenSSL required (DOCSIS support)

Signed-off-by: Emma Finn <emma.finn@intel.com>
---
v2:
* Fix resource leak in ossl_legacy_provider_load()
* Added release note
---
 doc/guides/cryptodevs/qat.rst          |  28 +-
 doc/guides/rel_notes/release_26_07.rst |   8 +
 drivers/common/qat/meson.build         |  56 ++--
 drivers/crypto/qat/qat_sym_session.c   | 440 +++----------------------
 4 files changed, 100 insertions(+), 432 deletions(-)

diff --git a/doc/guides/cryptodevs/qat.rst b/doc/guides/cryptodevs/qat.rst
index 0c2b85444e..4e60e8343c 100644
--- a/doc/guides/cryptodevs/qat.rst
+++ b/doc/guides/cryptodevs/qat.rst
@@ -352,15 +352,25 @@ To use this feature the user must set the devarg on process start as a device ad
  -a 03:01.1,qat_sym_cipher_crc_enable=1
 
 
-Running QAT PMD with Intel IPsec MB library for symmetric precomputes function
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-The QAT PMD uses Intel IPsec MB library for partial hash calculation
-in symmetric precomputes function by default,
-the minimum required version of IPsec MB library is v1.4.
-If this version of IPsec is not met, it will fallback to use OpenSSL.
-ARM will always default to using OpenSSL
-as ARM IPsec MB does not support the necessary algorithms.
+Running QAT PMD with Intel IPsec MB library
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The QAT PMD requires IPsec MB library for HMAC partial hash calculation
+in symmetric precomputes function. OpenSSL 3.0+ removed the low-level SHA*_Transform APIs
+that were previously used for HMAC precomputes.
+
+**On x86 platforms:**
+
+* Intel IPsec MB library (v1.4.0+) is required for HMAC precomputes
+* OpenSSL (3.0+) is optional for DOCSIS BPI cipher fallback
+
+**On ARM platforms:**
+
+* ARM IPsec MB library from ``gitlab.arm.com/arm-reference-solutions/ipsec-mb``
+  is required for HMAC precomputes.
+* OpenSSL (3.0+) is required for DOCSIS BPI cipher algorithms. ARM IPsec MB does not
+  implement CFB-one-byte cipher modes needed for DOCSIS. Without OpenSSL, DOCSIS
+  algorithms will not be available on ARM.
 
 
 Device and driver naming
diff --git a/doc/guides/rel_notes/release_26_07.rst b/doc/guides/rel_notes/release_26_07.rst
index 34809a4850..809ec19d66 100644
--- a/doc/guides/rel_notes/release_26_07.rst
+++ b/doc/guides/rel_notes/release_26_07.rst
@@ -155,6 +155,14 @@ New Features
   Added AGENTS.md file for AI review
   and supporting scripts to review patches and documentation.
 
+* **Updated QAT PMD dependency requirements.**
+
+  The QAT crypto PMD now requires IPsec MB library (v1.4.0+) for HMAC precomputes
+  on all platforms. OpenSSL 3.0+ is now optional and used only for DOCSIS BPI cipher
+  fallback. Previously, QAT could build with OpenSSL-only on x86.
+
+  On ARM, both IPsec MB and OpenSSL are required for full functionality.
+
 
 Removed Items
 -------------
diff --git a/drivers/common/qat/meson.build b/drivers/common/qat/meson.build
index 31e06f4376..969ff9e8ee 100644
--- a/drivers/common/qat/meson.build
+++ b/drivers/common/qat/meson.build
@@ -27,47 +27,61 @@ if disable_drivers.contains(qat_compress_path)
             'Explicitly disabled via build config')
 endif
 
-libcrypto = dependency('libcrypto', required: false, method: 'pkg-config')
+# IPsec MB is REQUIRED for HMAC precomputes (no OpenSSL 3.0 alternative)
+# OpenSSL is OPTIONAL for DOCSIS BPI cipher fallback
+IMB_required_ver = '1.4.0'
 
-if arch_subdir == 'arm'
-    if libcrypto.found()
-        ext_deps += libcrypto
-        dpdk_conf.set('RTE_QAT_OPENSSL', true)
+# Only check IPsec MB if qat_crypto wasn't already disabled
+if qat_crypto
+    if arch_subdir == 'arm'
+        IMB_header = '#include<ipsec-mb.h>'
     else
-        qat_crypto = false
-        dpdk_drvs_disabled += qat_crypto_path
-        set_variable('drv_' + qat_crypto_path.underscorify() + '_disable_reason',
-        'missing dependency for Arm, libcrypto')
+        IMB_header = '#include<intel-ipsec-mb.h>'
     endif
-else
-    IMB_required_ver = '1.4.0'
-    IMB_header = '#include<intel-ipsec-mb.h>'
+
+    # Check for IPsec MB library (required)
     libipsecmb = cc.find_library('IPSec_MB', required: false)
     if libipsecmb.found() and cc.links(
             'int main(void) {return 0;}', dependencies: libipsecmb)
-        # version comes with quotes, so we split based on " and take the middle
         imb_ver = cc.get_define('IMB_VERSION_STR',
             prefix : IMB_header).split('"')[1]
 
         if (imb_ver.version_compare('>=' + IMB_required_ver))
             ext_deps += libipsecmb
-        elif libcrypto.found()
-            ext_deps += libcrypto
-            dpdk_conf.set('RTE_QAT_OPENSSL', true)
+            dpdk_conf.set('RTE_QAT_IPSECMB', true)
         else
             qat_crypto = false
             dpdk_drvs_disabled += qat_crypto_path
             set_variable('drv_' + qat_crypto_path.underscorify() + '_disable_reason',
-                'missing dependency, libipsecmb or libcrypto')
+                'IPSec_MB version >= @0@ is required, found version @1@'.format(
+                    IMB_required_ver, imb_ver))
         endif
-    elif libcrypto.found()
-        ext_deps += libcrypto
-        dpdk_conf.set('RTE_QAT_OPENSSL', true)
     else
         qat_crypto = false
         dpdk_drvs_disabled += qat_crypto_path
         set_variable('drv_' + qat_crypto_path.underscorify() + '_disable_reason',
-            'missing dependency, libipsecmb or libcrypto')
+            'missing required dependency, libIPSec_MB >= @0@'.format(IMB_required_ver))
+    endif
+endif
+
+# Check for OpenSSL (optional, for DOCSIS BPI cipher fallback)
+openssl_required_ver = '3.0.0'
+if qat_crypto
+    libcrypto = dependency('libcrypto', required: false, method: 'pkg-config', version: '>= ' + openssl_required_ver)
+    if libcrypto.found()
+        ext_deps += libcrypto
+        dpdk_conf.set('RTE_QAT_OPENSSL', true)
+        if arch_subdir == 'arm'
+            message('QAT: Using OpenSSL @0@ for DOCSIS on ARM'.format(libcrypto.version()))
+        else
+            message('QAT: OpenSSL @0@ available for DOCSIS fallback'.format(libcrypto.version()))
+        endif
+    else
+        if arch_subdir == 'arm'
+            warning('QAT: OpenSSL >= @0@ not found - DOCSIS algorithms will not be available on ARM'.format(openssl_required_ver))
+        else
+            message('QAT: OpenSSL >= @0@ not found - DOCSIS will use IPsec MB only'.format(openssl_required_ver))
+        endif
     endif
 endif
 
diff --git a/drivers/crypto/qat/qat_sym_session.c b/drivers/crypto/qat/qat_sym_session.c
index ff01db4372..81f6bacff9 100644
--- a/drivers/crypto/qat/qat_sym_session.c
+++ b/drivers/crypto/qat/qat_sym_session.c
@@ -2,19 +2,18 @@
  * Copyright(c) 2015-2022 Intel Corporation
  */
 
-#define OPENSSL_API_COMPAT 0x10100000L
-
-#ifdef RTE_QAT_OPENSSL
-#include <openssl/sha.h>	/* Needed to calculate pre-compute values */
-#include <openssl/aes.h>	/* Needed to calculate pre-compute values */
-#include <openssl/md5.h>	/* Needed to calculate pre-compute values */
-#include <openssl/evp.h>	/* Needed for bpi runt block processing */
-#endif
-
-#ifndef RTE_QAT_OPENSSL
-#ifndef RTE_ARCH_ARM
+/* IPsec MB is required for HMAC precomputes (OpenSSL 3.0 removed Transform APIs)
+ * OpenSSL is optional for DOCSIS BPI cipher fallback
+ */
+#ifdef RTE_ARCH_ARM
+#include <ipsec-mb.h>
+#else
 #include <intel-ipsec-mb.h>
 #endif
+
+#ifdef RTE_QAT_OPENSSL
+#define OPENSSL_API_COMPAT 0x30000000L
+#include <openssl/evp.h>	/* For DOCSIS BPI cipher fallback */
 #endif
 
 #include <rte_memcpy.h>
@@ -38,9 +37,8 @@
 static OSSL_PROVIDER * legacy_lib;
 static OSSL_PROVIDER *default_lib;
 
-/* Some cryptographic algorithms such as MD and DES are now considered legacy
- * and not enabled by default in OpenSSL 3.0. Load up lagacy provider as MD5
- * DES are needed in QAT pre-computes and secure session creation.
+/* DES is considered legacy and not enabled by default in OpenSSL 3.0.
+ * Load legacy provider for DES-DOCSISBPI cipher fallback support.
  */
 static int ossl_legacy_provider_load(void)
 {
@@ -52,7 +50,8 @@ static int ossl_legacy_provider_load(void)
 	default_lib = OSSL_PROVIDER_load(NULL, "default");
 	if (default_lib == NULL) {
 		OSSL_PROVIDER_unload(legacy_lib);
-		return  -EINVAL;
+		legacy_lib = NULL;
+		return -EINVAL;
 	}
 
 	return 0;
@@ -60,8 +59,14 @@ static int ossl_legacy_provider_load(void)
 
 static void ossl_legacy_provider_unload(void)
 {
-	OSSL_PROVIDER_unload(legacy_lib);
-	OSSL_PROVIDER_unload(default_lib);
+	if (legacy_lib != NULL) {
+		OSSL_PROVIDER_unload(legacy_lib);
+		legacy_lib = NULL;
+	}
+	if (default_lib != NULL) {
+		OSSL_PROVIDER_unload(default_lib);
+		default_lib = NULL;
+	}
 }
 #endif
 #endif
@@ -635,7 +640,9 @@ qat_sym_session_configure(struct rte_cryptodev *dev,
 
 #ifdef RTE_QAT_OPENSSL
 #if (OPENSSL_VERSION_NUMBER >= 0x30000000L)
-	ossl_legacy_provider_load();
+	ret = ossl_legacy_provider_load();
+	if (ret != 0)
+		return ret;
 #endif
 #endif
 	ret = qat_sym_session_set_parameters(dev, xform,
@@ -644,7 +651,11 @@ qat_sym_session_configure(struct rte_cryptodev *dev,
 	if (ret != 0) {
 		QAT_LOG(ERR,
 		    "Crypto QAT PMD: failed to configure session parameters");
-
+#ifdef RTE_QAT_OPENSSL
+#if (OPENSSL_VERSION_NUMBER >= 0x30000000L)
+		ossl_legacy_provider_unload();
+#endif
+#endif
 		return ret;
 	}
 
@@ -1412,339 +1423,9 @@ static int qat_hash_get_block_size(enum icp_qat_hw_auth_algo qat_hash_alg)
 #define HMAC_OPAD_VALUE	0x5c
 #define HASH_XCBC_PRECOMP_KEY_NUM 3
 
-#ifdef RTE_QAT_OPENSSL
-static int partial_hash_sha1(uint8_t *data_in, uint8_t *data_out)
-{
-	SHA_CTX ctx;
-
-	if (!SHA1_Init(&ctx))
-		return -EFAULT;
-	SHA1_Transform(&ctx, data_in);
-	rte_memcpy(data_out, &ctx, SHA_DIGEST_LENGTH);
-	return 0;
-}
-
-static int partial_hash_sha224(uint8_t *data_in, uint8_t *data_out)
-{
-	SHA256_CTX ctx;
-
-	if (!SHA224_Init(&ctx))
-		return -EFAULT;
-	SHA256_Transform(&ctx, data_in);
-	rte_memcpy(data_out, &ctx, SHA256_DIGEST_LENGTH);
-	return 0;
-}
-
-static int partial_hash_sha256(uint8_t *data_in, uint8_t *data_out)
-{
-	SHA256_CTX ctx;
-
-	if (!SHA256_Init(&ctx))
-		return -EFAULT;
-	SHA256_Transform(&ctx, data_in);
-	rte_memcpy(data_out, &ctx, SHA256_DIGEST_LENGTH);
-	return 0;
-}
-
-static int partial_hash_sha384(uint8_t *data_in, uint8_t *data_out)
-{
-	SHA512_CTX ctx;
-
-	if (!SHA384_Init(&ctx))
-		return -EFAULT;
-	SHA512_Transform(&ctx, data_in);
-	rte_memcpy(data_out, &ctx, SHA512_DIGEST_LENGTH);
-	return 0;
-}
-
-static int partial_hash_sha512(uint8_t *data_in, uint8_t *data_out)
-{
-	SHA512_CTX ctx;
-
-	if (!SHA512_Init(&ctx))
-		return -EFAULT;
-	SHA512_Transform(&ctx, data_in);
-	rte_memcpy(data_out, &ctx, SHA512_DIGEST_LENGTH);
-	return 0;
-}
-
-static int partial_hash_md5(uint8_t *data_in, uint8_t *data_out)
-{
-	MD5_CTX ctx;
-
-	if (!MD5_Init(&ctx))
-		return -EFAULT;
-	MD5_Transform(&ctx, data_in);
-	rte_memcpy(data_out, &ctx, MD5_DIGEST_LENGTH);
-
-	return 0;
-}
-
-static void aes_cmac_key_derive(uint8_t *base, uint8_t *derived)
-{
-	int i;
-
-	derived[0] = base[0] << 1;
-	for (i = 1; i < ICP_QAT_HW_AES_BLK_SZ ; i++) {
-		derived[i] = base[i] << 1;
-		derived[i - 1] |= base[i] >> 7;
-	}
-
-	if (base[0] & 0x80)
-		derived[ICP_QAT_HW_AES_BLK_SZ - 1] ^= QAT_AES_CMAC_CONST_RB;
-}
-
-static int
-partial_hash_compute(enum icp_qat_hw_auth_algo hash_alg,
-		uint8_t *data_in, uint8_t *data_out)
-{
-	int digest_size;
-	uint8_t digest[qat_hash_get_digest_size(
-			ICP_QAT_HW_AUTH_ALGO_DELIMITER)];
-	uint32_t *hash_state_out_be32;
-	uint64_t *hash_state_out_be64;
-	int i;
-
-	/* Initialize to avoid gcc warning */
-	memset(digest, 0, sizeof(digest));
-
-	digest_size = qat_hash_get_digest_size(hash_alg);
-	if (digest_size <= 0)
-		return -EFAULT;
-
-	hash_state_out_be32 = (uint32_t *)data_out;
-	hash_state_out_be64 = (uint64_t *)data_out;
-
-	switch (hash_alg) {
-	case ICP_QAT_HW_AUTH_ALGO_SHA1:
-		if (partial_hash_sha1(data_in, digest))
-			return -EFAULT;
-		for (i = 0; i < digest_size >> 2; i++, hash_state_out_be32++)
-			*hash_state_out_be32 =
-				rte_bswap32(*(((uint32_t *)digest)+i));
-		break;
-	case ICP_QAT_HW_AUTH_ALGO_SHA224:
-		if (partial_hash_sha224(data_in, digest))
-			return -EFAULT;
-		for (i = 0; i < digest_size >> 2; i++, hash_state_out_be32++)
-			*hash_state_out_be32 =
-				rte_bswap32(*(((uint32_t *)digest)+i));
-		break;
-	case ICP_QAT_HW_AUTH_ALGO_SHA256:
-		if (partial_hash_sha256(data_in, digest))
-			return -EFAULT;
-		for (i = 0; i < digest_size >> 2; i++, hash_state_out_be32++)
-			*hash_state_out_be32 =
-				rte_bswap32(*(((uint32_t *)digest)+i));
-		break;
-	case ICP_QAT_HW_AUTH_ALGO_SHA384:
-		if (partial_hash_sha384(data_in, digest))
-			return -EFAULT;
-		for (i = 0; i < digest_size >> 3; i++, hash_state_out_be64++)
-			*hash_state_out_be64 =
-				rte_bswap64(*(((uint64_t *)digest)+i));
-		break;
-	case ICP_QAT_HW_AUTH_ALGO_SHA512:
-		if (partial_hash_sha512(data_in, digest))
-			return -EFAULT;
-		for (i = 0; i < digest_size >> 3; i++, hash_state_out_be64++)
-			*hash_state_out_be64 =
-				rte_bswap64(*(((uint64_t *)digest)+i));
-		break;
-	case ICP_QAT_HW_AUTH_ALGO_MD5:
-		if (partial_hash_md5(data_in, data_out))
-			return -EFAULT;
-		break;
-	default:
-		QAT_LOG(ERR, "invalid hash alg %u", hash_alg);
-		return -EFAULT;
-	}
-
-	return 0;
-}
-
-static const uint8_t AES_CMAC_SEED[ICP_QAT_HW_AES_128_KEY_SZ];
-
-static int qat_sym_do_precomputes(enum icp_qat_hw_auth_algo hash_alg,
-				const uint8_t *auth_key,
-				uint16_t auth_keylen,
-				uint8_t *p_state_buf,
-				uint16_t *p_state_len,
-				uint8_t aes_cmac)
-{
-	int block_size;
-	uint8_t ipad[qat_hash_get_block_size(ICP_QAT_HW_AUTH_ALGO_DELIMITER)];
-	uint8_t opad[qat_hash_get_block_size(ICP_QAT_HW_AUTH_ALGO_DELIMITER)];
-	int i;
-
-	if (hash_alg == ICP_QAT_HW_AUTH_ALGO_AES_XCBC_MAC) {
-
-		/* CMAC */
-		if (aes_cmac) {
-			AES_KEY enc_key;
-			uint8_t *in = NULL;
-			uint8_t k0[ICP_QAT_HW_AES_128_KEY_SZ];
-			uint8_t *k1, *k2;
-
-			auth_keylen = ICP_QAT_HW_AES_128_KEY_SZ;
-
-			in = rte_zmalloc("AES CMAC K1",
-					 ICP_QAT_HW_AES_128_KEY_SZ, 16);
-
-			if (in == NULL) {
-				QAT_LOG(ERR, "Failed to alloc memory");
-				return -ENOMEM;
-			}
-
-			rte_memcpy(in, AES_CMAC_SEED,
-				   ICP_QAT_HW_AES_128_KEY_SZ);
-			rte_memcpy(p_state_buf, auth_key, auth_keylen);
-
-			if (AES_set_encrypt_key(auth_key, auth_keylen << 3,
-				&enc_key) != 0) {
-				rte_free_sensitive(in);
-				return -EFAULT;
-			}
-
-			AES_encrypt(in, k0, &enc_key);
-
-			k1 = p_state_buf + ICP_QAT_HW_AES_XCBC_MAC_STATE1_SZ;
-			k2 = k1 + ICP_QAT_HW_AES_XCBC_MAC_STATE1_SZ;
-
-			aes_cmac_key_derive(k0, k1);
-			aes_cmac_key_derive(k1, k2);
-
-			rte_memzero_explicit(k0, ICP_QAT_HW_AES_128_KEY_SZ);
-			*p_state_len = ICP_QAT_HW_AES_XCBC_MAC_STATE2_SZ;
-			rte_free_sensitive(in);
-			goto out;
-		} else {
-			static uint8_t qat_aes_xcbc_key_seed[
-					ICP_QAT_HW_AES_XCBC_MAC_STATE2_SZ] = {
-				0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
-				0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
-				0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
-				0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
-				0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
-				0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
-			};
-
-			uint8_t *in = NULL;
-			uint8_t *out = p_state_buf;
-			int x;
-			AES_KEY enc_key;
-
-			in = rte_zmalloc("working mem for key",
-					ICP_QAT_HW_AES_XCBC_MAC_STATE2_SZ, 16);
-			if (in == NULL) {
-				QAT_LOG(ERR, "Failed to alloc memory");
-				return -ENOMEM;
-			}
-
-			rte_memcpy(in, qat_aes_xcbc_key_seed,
-					ICP_QAT_HW_AES_XCBC_MAC_STATE2_SZ);
-			for (x = 0; x < HASH_XCBC_PRECOMP_KEY_NUM; x++) {
-				if (AES_set_encrypt_key(auth_key,
-							auth_keylen << 3,
-							&enc_key) != 0) {
-					rte_free_sensitive(in -
-							   (x * ICP_QAT_HW_AES_XCBC_MAC_KEY_SZ));
-					rte_memzero_explicit(out -
-							     (x * ICP_QAT_HW_AES_XCBC_MAC_KEY_SZ),
-							     ICP_QAT_HW_AES_XCBC_MAC_STATE2_SZ);
-					return -EFAULT;
-				}
-				AES_encrypt(in, out, &enc_key);
-				in += ICP_QAT_HW_AES_XCBC_MAC_KEY_SZ;
-				out += ICP_QAT_HW_AES_XCBC_MAC_KEY_SZ;
-			}
-			*p_state_len = ICP_QAT_HW_AES_XCBC_MAC_STATE2_SZ;
-			rte_free_sensitive(in - x*ICP_QAT_HW_AES_XCBC_MAC_KEY_SZ);
-			goto out;
-		}
-
-	} else if ((hash_alg == ICP_QAT_HW_AUTH_ALGO_GALOIS_128) ||
-		(hash_alg == ICP_QAT_HW_AUTH_ALGO_GALOIS_64)) {
-		uint8_t *in = NULL;
-		uint8_t *out = p_state_buf;
-		AES_KEY enc_key;
-
-		memset(p_state_buf, 0, ICP_QAT_HW_GALOIS_H_SZ +
-				ICP_QAT_HW_GALOIS_LEN_A_SZ +
-				ICP_QAT_HW_GALOIS_E_CTR0_SZ);
-		in = rte_zmalloc("working mem for key",
-				ICP_QAT_HW_GALOIS_H_SZ, 16);
-		if (in == NULL) {
-			QAT_LOG(ERR, "Failed to alloc memory");
-			return -ENOMEM;
-		}
-
-		rte_memzero_explicit(in, ICP_QAT_HW_GALOIS_H_SZ);
-		if (AES_set_encrypt_key(auth_key, auth_keylen << 3,
-			&enc_key) != 0) {
-			return -EFAULT;
-		}
-		AES_encrypt(in, out, &enc_key);
-		*p_state_len = ICP_QAT_HW_GALOIS_H_SZ +
-				ICP_QAT_HW_GALOIS_LEN_A_SZ +
-				ICP_QAT_HW_GALOIS_E_CTR0_SZ;
-		rte_free_sensitive(in);
-		return 0;
-	}
-
-	block_size = qat_hash_get_block_size(hash_alg);
-	if (block_size < 0)
-		return block_size;
-	/* init ipad and opad from key and xor with fixed values */
-	memset(ipad, 0, block_size);
-	memset(opad, 0, block_size);
-
-	if (auth_keylen > (unsigned int)block_size) {
-		QAT_LOG(ERR, "invalid keylen %u", auth_keylen);
-		return -EFAULT;
-	}
-
-	RTE_VERIFY(auth_keylen <= sizeof(ipad));
-	RTE_VERIFY(auth_keylen <= sizeof(opad));
-
-	rte_memcpy(ipad, auth_key, auth_keylen);
-	rte_memcpy(opad, auth_key, auth_keylen);
-
-	for (i = 0; i < block_size; i++) {
-		uint8_t *ipad_ptr = ipad + i;
-		uint8_t *opad_ptr = opad + i;
-		*ipad_ptr ^= HMAC_IPAD_VALUE;
-		*opad_ptr ^= HMAC_OPAD_VALUE;
-	}
-
-	/* do partial hash of ipad and copy to state1 */
-	if (partial_hash_compute(hash_alg, ipad, p_state_buf)) {
-		rte_memzero_explicit(ipad, block_size);
-		rte_memzero_explicit(opad, block_size);
-		QAT_LOG(ERR, "ipad precompute failed");
-		return -EFAULT;
-	}
-
-	/*
-	 * State len is a multiple of 8, so may be larger than the digest.
-	 * Put the partial hash of opad state_len bytes after state1
-	 */
-	*p_state_len = qat_hash_get_state1_size(hash_alg);
-	if (partial_hash_compute(hash_alg, opad, p_state_buf + *p_state_len)) {
-		rte_memzero_explicit(ipad, block_size);
-		rte_memzero_explicit(opad, block_size);
-		QAT_LOG(ERR, "opad precompute failed");
-		return -EFAULT;
-	}
-
-	/*  don't leave data lying around */
-	rte_memzero_explicit(ipad, block_size);
-	rte_memzero_explicit(opad, block_size);
-out:
-	return 0;
-}
-
-#else
+/* HMAC precomputes always use IPsec MB (OpenSSL 3.0 removed SHA*_Transform APIs)
+ * OpenSSL is only used for DOCSIS BPI cipher fallback (via EVP API)
+ */
 
 static int aes_ipsecmb_job(uint8_t *in, uint8_t *out, IMB_MGR *m,
 		const uint8_t *key, uint16_t auth_keylen)
@@ -1992,7 +1673,6 @@ static int qat_sym_do_precomputes_ipsec_mb(enum icp_qat_hw_auth_algo hash_alg,
 	free_mb_mgr(m);
 	return ret;
 }
-#endif
 
 static void
 qat_sym_session_init_common_hdr(struct qat_sym_session *session)
@@ -2482,16 +2162,9 @@ static int qat_sym_cd_auth_set(struct qat_sym_session *cdesc,
 			break;
 		}
 		/* SHA-1 HMAC */
-#ifdef RTE_QAT_OPENSSL
-		ret = qat_sym_do_precomputes(ICP_QAT_HW_AUTH_ALGO_SHA1, authkey,
-			authkeylen, cdesc->cd_cur_ptr, &state1_size,
-			cdesc->aes_cmac);
-
-#else
 		ret = qat_sym_do_precomputes_ipsec_mb(ICP_QAT_HW_AUTH_ALGO_SHA1,
 			authkey, authkeylen, cdesc->cd_cur_ptr, &state1_size,
 			cdesc->aes_cmac);
-#endif
 
 		if (ret) {
 			QAT_LOG(ERR, "(SHA)precompute failed");
@@ -2509,15 +2182,9 @@ static int qat_sym_cd_auth_set(struct qat_sym_session *cdesc,
 			break;
 		}
 		/* SHA-224 HMAC */
-#ifdef RTE_QAT_OPENSSL
-		ret = qat_sym_do_precomputes(ICP_QAT_HW_AUTH_ALGO_SHA224, authkey,
-			authkeylen, cdesc->cd_cur_ptr, &state1_size,
-			cdesc->aes_cmac);
-#else
 		ret = qat_sym_do_precomputes_ipsec_mb(ICP_QAT_HW_AUTH_ALGO_SHA224,
 			authkey, authkeylen, cdesc->cd_cur_ptr, &state1_size,
 			cdesc->aes_cmac);
-#endif
 		if (ret) {
 			QAT_LOG(ERR, "(SHA)precompute failed");
 			return -EFAULT;
@@ -2534,15 +2201,9 @@ static int qat_sym_cd_auth_set(struct qat_sym_session *cdesc,
 			break;
 		}
 		/* SHA-256 HMAC */
-#ifdef RTE_QAT_OPENSSL
-		ret = qat_sym_do_precomputes(ICP_QAT_HW_AUTH_ALGO_SHA256, authkey,
-			authkeylen, cdesc->cd_cur_ptr, &state1_size,
-			cdesc->aes_cmac);
-#else
 		ret = qat_sym_do_precomputes_ipsec_mb(ICP_QAT_HW_AUTH_ALGO_SHA256,
 			authkey, authkeylen, cdesc->cd_cur_ptr, &state1_size,
 			cdesc->aes_cmac);
-#endif
 		if (ret) {
 			QAT_LOG(ERR, "(SHA)precompute failed");
 			return -EFAULT;
@@ -2559,15 +2220,9 @@ static int qat_sym_cd_auth_set(struct qat_sym_session *cdesc,
 			break;
 		}
 		/* SHA-384 HMAC */
-#ifdef RTE_QAT_OPENSSL
-		ret = qat_sym_do_precomputes(ICP_QAT_HW_AUTH_ALGO_SHA384, authkey,
-			authkeylen, cdesc->cd_cur_ptr, &state1_size,
-			cdesc->aes_cmac);
-#else
 		ret = qat_sym_do_precomputes_ipsec_mb(ICP_QAT_HW_AUTH_ALGO_SHA384,
 			authkey, authkeylen, cdesc->cd_cur_ptr, &state1_size,
 			cdesc->aes_cmac);
-#endif
 		if (ret) {
 			QAT_LOG(ERR, "(SHA)precompute failed");
 			return -EFAULT;
@@ -2584,15 +2239,9 @@ static int qat_sym_cd_auth_set(struct qat_sym_session *cdesc,
 			break;
 		}
 		/* SHA-512 HMAC */
-#ifdef RTE_QAT_OPENSSL
-		ret = qat_sym_do_precomputes(ICP_QAT_HW_AUTH_ALGO_SHA512, authkey,
-			authkeylen, cdesc->cd_cur_ptr, &state1_size,
-			cdesc->aes_cmac);
-#else
 		ret = qat_sym_do_precomputes_ipsec_mb(ICP_QAT_HW_AUTH_ALGO_SHA512,
 			authkey, authkeylen, cdesc->cd_cur_ptr, &state1_size,
 			cdesc->aes_cmac);
-#endif
 		if (ret) {
 			QAT_LOG(ERR, "(SHA)precompute failed");
 			return -EFAULT;
@@ -2628,16 +2277,10 @@ static int qat_sym_cd_auth_set(struct qat_sym_session *cdesc,
 
 		if (cdesc->aes_cmac)
 			memset(cdesc->cd_cur_ptr, 0, state1_size);
-#ifdef RTE_QAT_OPENSSL
-		ret = qat_sym_do_precomputes(ICP_QAT_HW_AUTH_ALGO_AES_XCBC_MAC,
-			authkey, authkeylen, cdesc->cd_cur_ptr + state1_size,
-			&state2_size, cdesc->aes_cmac);
-#else
 		ret = qat_sym_do_precomputes_ipsec_mb(
 			ICP_QAT_HW_AUTH_ALGO_AES_XCBC_MAC,
 			authkey, authkeylen, cdesc->cd_cur_ptr + state1_size,
 			&state2_size, cdesc->aes_cmac);
-#endif
 		if (ret) {
 			QAT_LOG(ERR, "(%s)precompute failed",
 				cdesc->aes_cmac ? "CMAC" : "XCBC");
@@ -2654,15 +2297,9 @@ static int qat_sym_cd_auth_set(struct qat_sym_session *cdesc,
 	case ICP_QAT_HW_AUTH_ALGO_GALOIS_64:
 		cdesc->qat_proto_flag = QAT_CRYPTO_PROTO_FLAG_GCM;
 		state1_size = ICP_QAT_HW_GALOIS_128_STATE1_SZ;
-#ifdef RTE_QAT_OPENSSL
-		ret = qat_sym_do_precomputes(cdesc->qat_hash_alg, authkey,
-			authkeylen, cdesc->cd_cur_ptr + state1_size,
-			&state2_size, cdesc->aes_cmac);
-#else
 		ret = qat_sym_do_precomputes_ipsec_mb(cdesc->qat_hash_alg, authkey,
 			authkeylen, cdesc->cd_cur_ptr + state1_size,
 			&state2_size, cdesc->aes_cmac);
-#endif
 		if (ret) {
 			QAT_LOG(ERR, "(GCM)precompute failed");
 			return -EFAULT;
@@ -2734,15 +2371,9 @@ static int qat_sym_cd_auth_set(struct qat_sym_session *cdesc,
 		auth_param->hash_state_sz = ICP_QAT_HW_ZUC_256_IV_SZ >> 3;
 		break;
 	case ICP_QAT_HW_AUTH_ALGO_MD5:
-#ifdef RTE_QAT_OPENSSL
-		ret = qat_sym_do_precomputes(ICP_QAT_HW_AUTH_ALGO_MD5, authkey,
-			authkeylen, cdesc->cd_cur_ptr, &state1_size,
-			cdesc->aes_cmac);
-#else
 		ret = qat_sym_do_precomputes_ipsec_mb(ICP_QAT_HW_AUTH_ALGO_MD5,
 			authkey, authkeylen, cdesc->cd_cur_ptr, &state1_size,
 			cdesc->aes_cmac);
-#endif
 		if (ret) {
 			QAT_LOG(ERR, "(MD5)precompute failed");
 			return -EFAULT;
@@ -3197,6 +2828,11 @@ qat_security_session_create(void *dev,
 			sess_private_data, SECURITY_GET_SESS_PRIV_IOVA(sess));
 	if (ret != 0) {
 		QAT_LOG(ERR, "Failed to configure session parameters");
+#ifdef RTE_QAT_OPENSSL
+#if (OPENSSL_VERSION_NUMBER >= 0x30000000L)
+		ossl_legacy_provider_unload();
+#endif
+#endif
 		return ret;
 	}
 
-- 
2.43.0


^ permalink raw reply related

* RE: [PATCH 0/6] ip_frag: fix reassembly defects and add test
From: Morten Brørup @ 2026-06-19 13:24 UTC (permalink / raw)
  To: Stephen Hemminger, dev
In-Reply-To: <20260616210656.464062-1-stephen@networkplumber.org>

> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> Sent: Tuesday, 16 June 2026 23.06
> 
> The IP reassembly library tracks only a running byte total and reserved
> slots for the first and last fragments, with no coverage map. As a
> result
> it mishandles duplicate, overlapping, oversized, and misheadered
> fragments, and the IPv4 key is missing a field RFC 791 requires. There
> was also no functional test to catch any of it.
> 
> These came out of reviewing a duplicate-fragment report on the list.
> 
> Patches 1 and 2 are interdependent: the overlap discard relies on the
> duplicate handling so an exact duplicate is dropped on its own rather
> than discarding the whole datagram. The rest are independent.
> 
> Patch 6 adds a functional test modeled on the Linux selftest
> ip_defrag.c.
> It passes on this series; with any single fix reverted the matching
> case
> fails.

With patch 2/6 fixed,
Series-acked-by: Morten Brørup <mb@smartsharesystems.com>


^ permalink raw reply

* RE: [PATCH 2/6] ip_frag: discard datagrams with overlapping fragments
From: Morten Brørup @ 2026-06-19 13:12 UTC (permalink / raw)
  To: Stephen Hemminger, dev; +Cc: stable, Konstantin Ananyev
In-Reply-To: <20260616210656.464062-3-stephen@networkplumber.org>

> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> Sent: Tuesday, 16 June 2026 23.06
> 
> Existing code does not handle overlapping fragments.
> 
> RFC 8200 (IPv6) requires that on overlap all reassembly is abandoned
> andall received fragments are dropped. RFC 791 (IPv4) originally called
> fortrimming and rewriting, but Linux discards for IPv4 as well, since
> overlap has no legitimate use and is a known attack vector.
> 
> Depends on the duplicate-tolerance change so that an exact duplicate is
> dropped on its own rather than discarding the whole datagram.
> 
> Fixes: cc8f4d020c0b ("examples/ip_reassembly: initial import")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
> ---
>  lib/ip_frag/ip_frag_internal.c | 34 ++++++++++++++++++++++++++--------
>  1 file changed, 26 insertions(+), 8 deletions(-)
> 
> diff --git a/lib/ip_frag/ip_frag_internal.c
> b/lib/ip_frag/ip_frag_internal.c
> index 9a03ef995a..2505314a29 100644
> --- a/lib/ip_frag/ip_frag_internal.c
> +++ b/lib/ip_frag/ip_frag_internal.c
> @@ -92,16 +92,34 @@ ip_frag_process(struct ip_frag_pkt *fp, struct
> rte_ip_frag_death_row *dr,
>  	uint32_t i, idx;
> 
>  	/*
> -	 * Discard an exact duplicate fragment. If a previously stored
> fragment
> -	 * already covers the same offset and length, this fragment
> carries no
> -	 * new data. Reassembly is tolerant of duplicates (RFC 791), so
> drop
> -	 * only this mbuf and keep the reassembly entry intact rather
> than
> -	 * treating it as an error. Fragments overlapping an existing one
> with
> -	 * different bounds are not handled here.
> +	 * Scan the fragments already collected for this datagram before
> +	 * storing the new one. The stored set is kept free of duplicates
> and
> +	 * overlaps, so a single pass is sufficient.
>  	 */
>  	for (i = 0; i != fp->last_idx; i++) {
> -		if (fp->frags[i].mb != NULL && fp->frags[i].ofs == ofs &&
> -				fp->frags[i].len == len) {
> +		if (fp->frags[i].mb == NULL)
> +			continue;
> +
> +		/*
> +		 * Exact duplicate: carries no new data. Reassembly
> tolerates
> +		 * duplicates (RFC 791), so drop only this mbuf and keep
> the
> +		 * entry.
> +		 */
> +		if (fp->frags[i].ofs == ofs && fp->frags[i].len == len) {
> +			IP_FRAG_MBUF2DR(dr, mb);
> +			return NULL;
> +		}
> +
> +		/*
> +		 * Overlap with an existing fragment. Per RFC 8200 section
> 4.5
> +		 * (and RFC 5722) the datagram must be discarded; the same
> is
> +		 * applied to IPv4. Free all collected fragments, drop this
> one,
> +		 * and invalidate the entry.
> +		 */
> +		if (ofs < fp->frags[i].ofs + fp->frags[i].len &&
> +				fp->frags[i].ofs < ofs + len) {

This only catches fragments that are smaller than existing fragments, i.e. fit within one of the existing fragments.
It should be:
if ((ofs >= fp->frags[i].ofs &&
		ofs < fp->frags[i].ofs + fp->frags[i].len) ||
		(ofs + len >= fp->frags[i].ofs &&
		ofs + len < fp->frags[i].ofs + fp->frags[i].len)) {

> +			ip_frag_free(fp, dr);
> +			ip_frag_key_invalidate(&fp->key);
>  			IP_FRAG_MBUF2DR(dr, mb);
>  			return NULL;
>  		}
> --
> 2.53.0


^ permalink raw reply

* [PATCH 18/18] dma/dpaa: add SG data validation and ERR050757 fix
From: Hemant Agrawal @ 2026-06-19 12:29 UTC (permalink / raw)
  To: stephen, david.marchand, dev; +Cc: Gagandeep Singh
In-Reply-To: <20260619122922.3774666-1-hemant.agrawal@nxp.com>

From: Gagandeep Singh <g.singh@nxp.com>

Add scatter-gather (SG) support to the QDMA driver, enabled by default
via the s_sg_enable flag. Add optional data validation mode controlled
by the s_data_validation flag for debugging transfer correctness.

Add a workaround for hardware errata ERR050757: when
RTE_DMA_DPAA_ERRATA_ERR050757 is defined, configure the source frame
descriptor with stride settings (sss/ssd = FSL_QDMA_CMD_SS_ERR050757_LEN)
to force PCI read transactions to stay within the errata-safe length
limit, preventing data corruption on affected silicon.

Signed-off-by: Gagandeep Singh <g.singh@nxp.com>
---
 drivers/dma/dpaa/dpaa_qdma.c | 99 +++++++++++++++++++++++++++---------
 1 file changed, 75 insertions(+), 24 deletions(-)

diff --git a/drivers/dma/dpaa/dpaa_qdma.c b/drivers/dma/dpaa/dpaa_qdma.c
index bf2a373d70..ca615f8d8b 100644
--- a/drivers/dma/dpaa/dpaa_qdma.c
+++ b/drivers/dma/dpaa/dpaa_qdma.c
@@ -9,9 +9,14 @@
 #include "dpaa_qdma.h"
 #include "dpaa_qdma_logs.h"
 
+static int s_data_validation;
+static int s_hw_err_check;
+static int s_sg_enable = 1;
 static uint32_t s_sg_max_entry_sz = 2000;
-static bool s_hw_err_check;
 
+#ifdef RTE_DMA_DPAA_ERRATA_ERR050757
+static int s_pci_read = 1;
+#endif
 #define DPAA_DMA_ERROR_CHECK "dpaa_dma_err_check"
 
 static inline void
@@ -112,7 +117,8 @@ dma_pool_alloc(char *nm, int size, int aligned, dma_addr_t *phy_addr)
 	if (!virt_addr)
 		return NULL;
 
-	*phy_addr = rte_mem_virt2iova(virt_addr);
+	if (phy_addr)
+		*phy_addr = rte_mem_virt2iova(virt_addr);
 
 	return virt_addr;
 }
@@ -392,6 +398,8 @@ fsl_qdma_data_validation(struct fsl_qdma_desc *desc[],
 	char err_msg[512];
 	int offset;
 
+	if (likely(!s_data_validation))
+		return;
 
 	offset = sprintf(err_msg, "Fatal TC%d/queue%d: ",
 		fsl_queue->block_id,
@@ -716,19 +724,21 @@ fsl_qdma_enqueue_desc_single(struct fsl_qdma_queue *fsl_queue,
 	ft = fsl_queue->ft[fsl_queue->ci];
 
 #ifdef RTE_DMA_DPAA_ERRATA_ERR050757
-	sdf = &ft->df.sdf;
-	sdf->srttype = FSL_QDMA_CMD_RWTTYPE;
+	if (s_pci_read) {
+		sdf = &ft->df.sdf;
+		sdf->srttype = FSL_QDMA_CMD_RWTTYPE;
 #ifdef RTE_DMA_DPAA_ERRATA_ERR050265
-	sdf->prefetch = 1;
+		sdf->prefetch = 1;
 #endif
-	if (len > FSL_QDMA_CMD_SS_ERR050757_LEN) {
-		sdf->ssen = 1;
-		sdf->sss = FSL_QDMA_CMD_SS_ERR050757_LEN;
-		sdf->ssd = FSL_QDMA_CMD_SS_ERR050757_LEN;
-	} else {
-		sdf->ssen = 0;
-		sdf->sss = 0;
-		sdf->ssd = 0;
+		if (len > FSL_QDMA_CMD_SS_ERR050757_LEN) {
+			sdf->ssen = 1;
+			sdf->sss = FSL_QDMA_CMD_SS_ERR050757_LEN;
+			sdf->ssd = FSL_QDMA_CMD_SS_ERR050757_LEN;
+		} else {
+			sdf->ssen = 0;
+			sdf->sss = 0;
+			sdf->ssd = 0;
+		}
 	}
 #endif
 	csgf_src = &ft->desc_sbuf;
@@ -837,19 +847,21 @@ fsl_qdma_enqueue_desc_sg(struct fsl_qdma_queue *fsl_queue)
 	csgf_src->length = total_len;
 	csgf_dest->length = total_len;
 #ifdef RTE_DMA_DPAA_ERRATA_ERR050757
-	sdf = &ft->df.sdf;
-	sdf->srttype = FSL_QDMA_CMD_RWTTYPE;
+	if (s_pci_read) {
+		sdf = &ft->df.sdf;
+		sdf->srttype = FSL_QDMA_CMD_RWTTYPE;
 #ifdef RTE_DMA_DPAA_ERRATA_ERR050265
-	sdf->prefetch = 1;
+		sdf->prefetch = 1;
 #endif
-	if (total_len > FSL_QDMA_CMD_SS_ERR050757_LEN) {
-		sdf->ssen = 1;
-		sdf->sss = FSL_QDMA_CMD_SS_ERR050757_LEN;
-		sdf->ssd = FSL_QDMA_CMD_SS_ERR050757_LEN;
-	} else {
-		sdf->ssen = 0;
-		sdf->sss = 0;
-		sdf->ssd = 0;
+		if (total_len > FSL_QDMA_CMD_SS_ERR050757_LEN) {
+			sdf->ssen = 1;
+			sdf->sss = FSL_QDMA_CMD_SS_ERR050757_LEN;
+			sdf->ssd = FSL_QDMA_CMD_SS_ERR050757_LEN;
+		} else {
+			sdf->ssen = 0;
+			sdf->sss = 0;
+			sdf->ssd = 0;
+		}
 	}
 #endif
 	ret = fsl_qdma_enqueue_desc_to_ring(fsl_queue, num);
@@ -888,6 +900,25 @@ fsl_qdma_enqueue_desc(struct fsl_qdma_queue *fsl_queue)
 			fsl_queue->pending_num = 0;
 		}
 		return ret;
+	} else if (!s_sg_enable) {
+		while (fsl_queue->pending_num > 0) {
+			ret = fsl_qdma_enqueue_desc_single(fsl_queue,
+				fsl_queue->pending_desc[start].dst,
+				fsl_queue->pending_desc[start].src,
+				fsl_queue->pending_desc[start].len);
+			if (!ret) {
+				start = (start + 1) &
+					(fsl_queue->pending_max - 1);
+				fsl_queue->pending_start = start;
+				fsl_queue->pending_num--;
+			} else {
+				DPAA_QDMA_ERR("Eq pending desc failed(%d)",
+					ret);
+				return -EIO;
+			}
+		}
+
+		return 0;
 	}
 
 	return fsl_qdma_enqueue_desc_sg(fsl_queue);
@@ -1344,6 +1375,26 @@ dpaa_qdma_init(struct rte_dma_dev *dmadev)
 		DPAA_QDMA_INFO("Enable DMA error checks");
 	}
 
+	if (getenv("DPAA_QDMA_DATA_VALIDATION"))
+		s_data_validation = 1;
+
+	if (getenv("DPAA_QDMA_HW_ERR_CHECK"))
+		s_hw_err_check = 1;
+
+	penv = getenv("DPAA_QDMA_SG_ENABLE");
+	if (penv)
+		s_sg_enable = atoi(penv);
+
+	penv = getenv("DPAA_QDMA_SG_MAX_ENTRY_SIZE");
+	if (penv)
+		s_sg_max_entry_sz = atoi(penv);
+
+#ifdef RTE_DMA_DPAA_ERRATA_ERR050757
+	penv = getenv("DPAA_QDMA_PCI_READ");
+	if (penv)
+		s_pci_read = atoi(penv);
+#endif
+
 	fsl_qdma->n_queues = QDMA_QUEUES * QDMA_BLOCKS;
 	fsl_qdma->num_blocks = QDMA_BLOCKS;
 	fsl_qdma->block_offset = QDMA_BLOCK_OFFSET;
-- 
2.25.1


^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox