DPDK-dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] common/cnxk: fix inline dev null dereference
From: Aarnav JP @ 2026-06-23  8:54 UTC (permalink / raw)
  To: dev, Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori,
	Satha Rao, Harman Kalra, Rakesh Kudurumalla
  Cc: jerinj, rbhansali, Aarnav JP, stable

inl_dev is initialized to NULL and only assigned within the
if (idev && idev->nix_inl_dev) block.
Move inl_dev->res_addr_offset and inl_dev->cpt_cq_ena
accesses inside this null-guarded block in
nix_inl_inb_ipsec_sa_tbl_setup() and nix_inl_reass_inb_sa_tbl_setup()
to avoid dereferencing a null pointer.

Fixes: 3fdf3e53f3c4 ("common/cnxk: enable CPT CQ for inline IPsec inbound")
Cc: stable@dpdk.org

Signed-off-by: Aarnav JP <ajp@marvell.com>
---
 drivers/common/cnxk/roc_nix_inl.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/common/cnxk/roc_nix_inl.c b/drivers/common/cnxk/roc_nix_inl.c
index db101e71a5..935dd37778 100644
--- a/drivers/common/cnxk/roc_nix_inl.c
+++ b/drivers/common/cnxk/roc_nix_inl.c
@@ -409,7 +409,7 @@ nix_inl_inb_ipsec_sa_tbl_setup(struct roc_nix *roc_nix)
 	struct nix_inl_dev *inl_dev = NULL;
 	uint64_t max_sa, i, sa_pow2_sz;
 	uint64_t sa_idx_w, lenm1_max;
-	uint64_t res_addr_offset;
+	uint64_t res_addr_offset = 0;
 	uint8_t profile_id = 0;
 	struct mbox *mbox;
 	size_t inb_sa_sz;
@@ -503,13 +503,12 @@ nix_inl_inb_ipsec_sa_tbl_setup(struct roc_nix *roc_nix)
 				def_cptq = 0;
 			else
 				def_cptq = inl_dev->nix_inb_qids[inl_dev->inb_cpt_lf_id];
+			res_addr_offset = (uint64_t)(inl_dev->res_addr_offset & 0xFF) << 48;
+			if (res_addr_offset)
+				res_addr_offset |= (1UL << 56);
+			cpt_cq_ena = (uint64_t)inl_dev->cpt_cq_ena << 63;
 		}
 
-		res_addr_offset = (uint64_t)(inl_dev->res_addr_offset & 0xFF) << 48;
-		if (res_addr_offset)
-			res_addr_offset |= (1UL << 56);
-
-		cpt_cq_ena = (uint64_t)inl_dev->cpt_cq_ena << 63;
 		lf_cfg->enable = 1;
 		lf_cfg->profile_id = profile_id; /* IPsec profile is 0th one */
 		lf_cfg->rx_inline_sa_base = (uintptr_t)nix->inb_sa_base[profile_id] | cpt_cq_ena;
-- 
2.43.0


^ permalink raw reply related

* Re: [PATCH v5] graph: add optional profiling stats
From: saeed bishara @ 2026-06-23  8:33 UTC (permalink / raw)
  To: Morten Brørup
  Cc: Jerin Jacob, dev, Jerin Jacob, Kiran Kumar K, Nithin Dabilpuram,
	Zhirun Yan
In-Reply-To: <CALBAE1OZDXfH1FoVLJ=vXVqr=9hYPgqeCzN8h278wN8FbU+XaQ@mail.gmail.com>

> > > > +               /** Fast path area cache line 3. */
> > > > +#ifdef RTE_GRAPH_PROFILE
> > > > +               struct {
> > > > +                       uint64_t calls;     /**< Calls processing
> > > resp. 0 or 1 objects. */
> > > > +                       uint64_t cycles;    /**< Cycles spent
> > > processing resp. 0 or 1 objects. */
> > > > +               } usage_stats[2];       /**< Usage when this node
> > > processed 0 or 1 objects. */
> > > > +               uint64_t full_burst_calls;  /**< Calls processing a
> > > full burst of objects. */
> > > > +               uint64_t full_burst_cycles; /**< Cycles spent
> > > processing a full burst of objects. */
> > > > +               uint64_t half_burst_calls;  /**< Calls processing a
> > > half burst of objects. */
> > > > +               uint64_t half_burst_cycles; /**< Cycles spent
> > > processing a half burst of objects. */
> > > > +               /** Fast path area cache line 4. */
> > > > +#endif
> > >
> > > Is it an ABI breakage?
Can you consider one array for all cases?
also, instead of adding cacheline for this profiling data, can we
share with line 1 that used solely for xstats?

^ permalink raw reply

* Re: [PATCH] app/dma_perf: skip case if worker maps to main lcore
From: Bruce Richardson @ 2026-06-23  8:07 UTC (permalink / raw)
  To: Rupesh Chiluka; +Cc: Cheng Jiang, Chengwen Feng, dev, gakhil, anoobj, ktejasree
In-Reply-To: <20260623045841.2602104-1-rchiluka@marvell.com>

On Tue, Jun 23, 2026 at 10:28:41AM +0530, Rupesh Chiluka wrote:
> Refuse to run DMA/CPU mem-copy cases when any worker is bound to the
> EAL main lcore.
> 

Can you explain a bit more why?

> Signed-off-by: Rupesh Chiluka <rchiluka@marvell.com>
> ---
>  app/test-dma-perf/main.c | 9 +++++++++
>  1 file changed, 9 insertions(+)
> 
> diff --git a/app/test-dma-perf/main.c b/app/test-dma-perf/main.c
> index 4249dcfd3d..b6aa5b8401 100644
> --- a/app/test-dma-perf/main.c
> +++ b/app/test-dma-perf/main.c
> @@ -109,6 +109,7 @@ run_test_case(struct test_configure *case_cfg)
>  static void
>  run_test(uint32_t case_id, struct test_configure *case_cfg)
>  {
> +	uint32_t main_lcore = rte_get_main_lcore();
>  	uint32_t nb_lcores = rte_lcore_count();
>  	struct test_configure_entry *mem_size = &case_cfg->mem_size;
>  	struct test_configure_entry *buf_size = &case_cfg->buf_size;
> @@ -122,6 +123,14 @@ run_test(uint32_t case_id, struct test_configure *case_cfg)
>  		return;
>  	}
>  
> +	for (uint32_t i = 0; i < case_cfg->num_worker; i++) {
> +		if (case_cfg->dma_config[i].lcore_dma_map.lcore == main_lcore) {
> +			printf("Case %u: worker %u cannot run on the EAL main lcore (%u).\n",
> +			       case_id, i, main_lcore);
> +			return;
> +		}
> +	}
> +
>  	printf("Number of used lcores: %u.\n", nb_lcores);
>  
>  	if (mem_size->incr != 0)
> -- 
> 2.48.1
> 

^ permalink raw reply

* Re: [PATCH] common/mlx5: fix high SMMU TLB miss with mempool alignment
From: yangxingui @ 2026-06-23  7:11 UTC (permalink / raw)
  To: dev
  Cc: stephen, david.marchand, thomas, dsosnowski, viacheslavo, bingz,
	orika, suanmingm, matan, dmitry.kozliuk, fengchengwen,
	yangshuaisong, lihuisong, liuyonglong, kangfenglong
In-Reply-To: <20260612071434.2722918-1-yangxingui@huawei.com>

Friendly ping...

On 2026/6/12 15:14, Xingui Yang wrote:
> From: Shuaisong Yang <yangshuaisong@h-partners.com>
> 
> On Kunpeng SoC with mlx CX7, dpdk-l3fwd with intra-NUMA core pinning
> under SMMU nonstrict/strict mode shows about 30% performance degradation
> compared to cross-NUMA pinning. With SMMU disabled or passthrough mode,
> intra-NUMA performs as expected (slightly better than cross-NUMA).
> 
> CX7 in NUMA1
> NUMA node0 CPU(s):    0-39
> NUMA node1 CPU(s):    40-79
> 
> intra-NUMA:
> dpdk-l3fwd -l 40-55 -n 4 -a 0000:17:00.1,mprq_en=1 -- -p 0x1 -P \
>    --config='(0,0,40),(0,1,41),(0,2,42),(0,3,43),(0,4,44),\
>              (0,5,45),(0,6,46),(0,7,47),(0,8,48),(0,9,49),\
>              (0,10,50),(0,11,51),(0,12,52),(0,13,53),\
>              (0,14,54),(0,15,55)' \
>    --rx-queue-size=4096 --tx-queue-size=4096 --rx-burst=64
> 
> cross-NUMA:
> dpdk-l3fwd -l 11-26 -n 4 -a 0000:17:00.1,mprq_en=1 -- -p 0x1 -P \
>    --config='(0,0,11),(0,1,12),(0,2,13),(0,3,14),(0,4,15),\
>              (0,5,16),(0,6,17),(0,7,18),(0,8,19),(0,9,20),\
>              (0,10,21),(0,11,22),(0,12,23),(0,13,24),\
>              (0,14,25),(0,15,26)' \
>    --rx-queue-size=4096 --tx-queue-size=4096 --rx-burst=64
> 
> The root cause is that under SMMU enabled mode, the mempool allocated
> for intra-NUMA pinning is aligned to system page size instead of
> hugepage size, while cross-NUMA pinning correctly uses hugepage size
> alignment. This causes high TLB miss rates under SMMU.
> 
> Align all memory ranges to hugepage boundaries during mempool
> registration to ensure hugepage_sz alignment, thereby reducing TLB
> misses and fixing the intra-NUMA performance degradation.
> 
> Fixes: 690b2a88c2f7 ("common/mlx5: add mempool registration facilities")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Shuaisong Yang <yangshuaisong@h-partners.com>
> Signed-off-by: Xingui Yang <yangxingui@huawei.com>
> ---
>   .mailmap                             |  1 +
>   drivers/common/mlx5/mlx5_common_mr.c | 53 +++++++++++++++++++---------
>   2 files changed, 37 insertions(+), 17 deletions(-)
> 
> diff --git a/.mailmap b/.mailmap
> index 4001e5fb0e..e13e88db1b 100644
> --- a/.mailmap
> +++ b/.mailmap
> @@ -1979,3 +1979,4 @@ Zongyu Wu <wuzongyu1@huawei.com>
>   Zorik Machulsky <zorik@amazon.com>
>   Zyta Szpak <zyta@marvell.com> <zr@semihalf.com>
>   Zyta Szpak <zyta@marvell.com> <zyta.szpak@semihalf.com>
> +Shuaisong Yang <yangshuaisong@h-partners.com>
> diff --git a/drivers/common/mlx5/mlx5_common_mr.c b/drivers/common/mlx5/mlx5_common_mr.c
> index aa2d5e88a4..aee037abb4 100644
> --- a/drivers/common/mlx5/mlx5_common_mr.c
> +++ b/drivers/common/mlx5/mlx5_common_mr.c
> @@ -1524,7 +1524,9 @@ mlx5_get_mempool_ranges(struct rte_mempool *mp, bool is_extmem,
>    * @param[in] is_extmem
>    *   Whether the pool is contains only external pinned buffers.
>    * @param[out] out
> - *   Receives memory ranges to register, aligned to the system page size.
> + *   Receives memory ranges to register. Aligned to the hugepage size
> + *   if all ranges reside on hugepages of the same size,
> + *   otherwise aligned to the system page size.
>    *   The caller must release them with free().
>    * @param[out] out_n
>    *   Receives the number of @p out items.
> @@ -1541,7 +1543,9 @@ mlx5_mempool_reg_analyze(struct rte_mempool *mp, bool is_extmem,
>   {
>   	struct mlx5_range *ranges = NULL;
>   	unsigned int i, ranges_n = 0;
> +	bool same_hugepage_sz = true;
>   	struct rte_memseg_list *msl;
> +	uint64_t hugepage_sz = 0;
>   
>   	if (mlx5_get_mempool_ranges(mp, is_extmem, &ranges, &ranges_n) < 0) {
>   		DRV_LOG(ERR, "Cannot get address ranges for mempool %s",
> @@ -1552,28 +1556,43 @@ mlx5_mempool_reg_analyze(struct rte_mempool *mp, bool is_extmem,
>   	*share_hugepage = false;
>   	msl = rte_mem_virt2memseg_list((void *)ranges[0].start);
>   	if (msl != NULL) {
> -		uint64_t hugepage_sz = 0;
> +		hugepage_sz = msl->page_sz;
>   
>   		/* Check that all ranges are on pages of the same size. */
>   		for (i = 0; i < ranges_n; i++) {
> -			if (hugepage_sz != 0 && hugepage_sz != msl->page_sz)
> +			struct rte_memseg_list *range_msl;
> +			range_msl = rte_mem_virt2memseg_list(
> +					(void *)ranges[i].start);
> +			if (range_msl == NULL ||
> +			    range_msl->page_sz != hugepage_sz) {
> +				same_hugepage_sz = false;
>   				break;
> -			hugepage_sz = msl->page_sz;
> +			}
>   		}
> -		if (i == ranges_n) {
> -			/*
> -			 * If the entire pool is within one hugepage,
> -			 * combine all ranges into one of the hugepage size.
> -			 */
> -			uintptr_t reg_start = ranges[0].start;
> -			uintptr_t reg_end = ranges[ranges_n - 1].end;
> -			uintptr_t hugepage_start =
> -				RTE_ALIGN_FLOOR(reg_start, hugepage_sz);
> -			uintptr_t hugepage_end = hugepage_start + hugepage_sz;
> -			if (reg_end < hugepage_end) {
> -				ranges[0].start = hugepage_start;
> +	}
> +	if (same_hugepage_sz && hugepage_sz > 0) {
> +		unsigned int orig_ranges_n = ranges_n;
> +
> +		for (i = 0; i < ranges_n; i++) {
> +			ranges[i].start = RTE_ALIGN_FLOOR(ranges[i].start,
> +							  hugepage_sz);
> +			ranges[i].end = RTE_ALIGN_CEIL(ranges[i].end,
> +							hugepage_sz);
> +		}
> +		ranges_n = 1;
> +		for (i = 1; i < orig_ranges_n; i++) {
> +			if (ranges[ranges_n - 1].end >= ranges[i].start)
> +				ranges[ranges_n - 1].end =
> +					RTE_MAX(ranges[ranges_n - 1].end,
> +						ranges[i].end);
> +			else
> +				ranges[ranges_n++] = ranges[i];
> +		}
> +		if (ranges_n == 1) {
> +			uintptr_t hugepage_end = ranges[0].start + hugepage_sz;
> +
> +			if (ranges[0].end <= hugepage_end) {
>   				ranges[0].end = hugepage_end;
> -				ranges_n = 1;
>   				*share_hugepage = true;
>   			}
>   		}
> 

^ permalink raw reply

* RE: [PATCH v5] graph: add optional profiling stats
From: Morten Brørup @ 2026-06-23  7:10 UTC (permalink / raw)
  To: Jerin Jacob, thomas, david.marchand
  Cc: dev, Jerin Jacob, Kiran Kumar K, Nithin Dabilpuram, Zhirun Yan
In-Reply-To: <CALBAE1OZDXfH1FoVLJ=vXVqr=9hYPgqeCzN8h278wN8FbU+XaQ@mail.gmail.com>

+Thomas Monjalon & +David Marchand, as intended by Jerin

> From: Jerin Jacob [mailto:jerinjacobk@gmail.com]
> Sent: Tuesday, 23 June 2026 08.57
> 
> On Tue, Jun 23, 2026 at 12:15 PM Morten Brørup
> <mb@smartsharesystems.com> wrote:
> >
> > > From: Jerin Jacob [mailto:jerinjacobk@gmail.com]
> > > Sent: Tuesday, 23 June 2026 07.13
> > >
> > > On Mon, Jun 22, 2026 at 12:11 AM Morten Brørup
> > > <mb@smartsharesystems.com> wrote:
> > > >
> > > > Added graph node profiling stats, build time configurable by
> enabling
> > > > RTE_GRAPH_PROFILE in rte_config.h.
> > > >
> > > > Signed-off-by: Morten Brørup <mb@smartsharesystems.com>
> > >
> > > Please update app/test/test_graph.c to validate this featue.
> >
> > Ack.
> >
> > > > @@ -92,7 +92,62 @@ rte_graph_obj_dump(FILE *f, struct rte_graph
> *g,
> > > bool all)
> > > >                         fprintf(f, "       total_sched_fail=%"
> PRId64
> > > "\n",
> > > >                                 n->dispatch.total_sched_fail);
> > > >                 }
> > > > -               fprintf(f, "       total_calls=%" PRId64 "\n", n-
> > > >total_calls);
> > > > +               fprintf(f, "       total_calls=%" PRIu64 "\n", n-
> > > >total_calls);
> > > > +               if (rte_graph_has_stats_feature()) {
> > > > +                       fprintf(f, "       total_cycles=%" PRIu64
> ",
> > > avg cycles/call=%.1f\n",
> > > > +                                       n->total_cycles,
> > > > +                                       n->total_calls == 0 ?
> > > (double)0 :
> > > > +                                       (double)n->total_cycles /
> > > (double)n->total_calls);
> > > > +               }
> > > > +#ifdef RTE_GRAPH_PROFILE
> > >
> > >
> > > Please introduce rte_graph_has_profile_featue() similar to
> > > rte_graph_has_stats_feature() to reduce if def clutter as possible.
> >
> > Disagree, see below.
> >
> > >
> > > > +               uint64_t calls = n->usage_stats[0].calls;
> > > > +               fprintf(f, "       objs[0]\n");
> > > > +               fprintf(f, "         calls=%" PRIu64 ", cycles=%"
> > > PRIu64 ", avg cycles/call=%.1f\n",
> > > > +                               calls,
> > >
> > > >
> > > > diff --git a/lib/graph/rte_graph_worker_common.h
> > > b/lib/graph/rte_graph_worker_common.h
> > > > index 4ab53a533e..0d8039575d 100644
> > > > --- a/lib/graph/rte_graph_worker_common.h
> > > > +++ b/lib/graph/rte_graph_worker_common.h
> > > > @@ -144,12 +144,26 @@ struct __rte_cache_aligned rte_node {
> > > >                         rte_node_process_t process; /**< Process
> > > function. */
> > > >                         uint64_t process_u64;
> > > >                 };
> > > > +               /** Fast path area cache line 3. */
> > > > +#ifdef RTE_GRAPH_PROFILE
> > > > +               struct {
> > > > +                       uint64_t calls;     /**< Calls processing
> > > resp. 0 or 1 objects. */
> > > > +                       uint64_t cycles;    /**< Cycles spent
> > > processing resp. 0 or 1 objects. */
> > > > +               } usage_stats[2];       /**< Usage when this node
> > > processed 0 or 1 objects. */
> > > > +               uint64_t full_burst_calls;  /**< Calls processing
> a
> > > full burst of objects. */
> > > > +               uint64_t full_burst_cycles; /**< Cycles spent
> > > processing a full burst of objects. */
> > > > +               uint64_t half_burst_calls;  /**< Calls processing
> a
> > > half burst of objects. */
> > > > +               uint64_t half_burst_cycles; /**< Cycles spent
> > > processing a half burst of objects. */
> > > > +               /** Fast path area cache line 4. */
> > > > +#endif
> > >
> > > Is it an ABI breakage?
> >
> > No. The modifications are enclosed in #ifdef, and disabled by
> default.
> > It is generally required that when rte_config.h options are modified,
> both the application and DPDK itself are built together; and then
> API/ABI breakage becomes irrelevant.
> 
> 
> Yes. I don't know the current policy for this. Adding @Thomas Monjalon
> @David Marchand
> 
> 
> >
> > IMO, we should keep our structures lean in release builds. This means
> that fields used for detailed profiling, advanced debugging, cookie
> validation, etc. should use the #ifdef pattern rather than the
> rte_lib_has_some_feature() pattern; especially if they affect the size
> of a structure. And when those fields are not present, any code
> accessing them cannot use the rte_lib_has_some_feature() pattern.
> > The mbuf and mempool libraries also use #ifdef pattern for similar
> features.
> 
> Yes for the structure inclusion we can use #ifdef. But inside the
> function we can use rte_lib_has_some_feature() scheme. Reasons are :
> 1)It will remove the ifdef cultter
> 2)Detect the compilation issue even if the feature is disabled. This
> will make sure reduce the build options to enable build sanity
> 3) Compiler is smart enough to understand to disable the block if the
> feature is not enabled.(Just like #ifdef)

I agree with these advantages.
But a function using rte_lib_has_some_feature() cannot access non-existing fields:
https://godbolt.org/z/s3nKx45Ms

So sometimes #ifdef is required in the code too.

> 
> 
> 
> >
> > >
> > > >                 alignas(RTE_CACHE_LINE_MIN_SIZE) struct rte_node
> > > *nodes[]; /**< Next nodes. */
> > > >         };
> > > >  };
> > > >

^ permalink raw reply

* Re: [PATCH v5] graph: add optional profiling stats
From: Jerin Jacob @ 2026-06-23  6:56 UTC (permalink / raw)
  To: Morten Brørup
  Cc: dev, Jerin Jacob, Kiran Kumar K, Nithin Dabilpuram, Zhirun Yan
In-Reply-To: <98CBD80474FA8B44BF855DF32C47DC35F65937@smartserver.smartshare.dk>

On Tue, Jun 23, 2026 at 12:15 PM Morten Brørup <mb@smartsharesystems.com> wrote:
>
> > From: Jerin Jacob [mailto:jerinjacobk@gmail.com]
> > Sent: Tuesday, 23 June 2026 07.13
> >
> > On Mon, Jun 22, 2026 at 12:11 AM Morten Brørup
> > <mb@smartsharesystems.com> wrote:
> > >
> > > Added graph node profiling stats, build time configurable by enabling
> > > RTE_GRAPH_PROFILE in rte_config.h.
> > >
> > > Signed-off-by: Morten Brørup <mb@smartsharesystems.com>
> >
> > Please update app/test/test_graph.c to validate this featue.
>
> Ack.
>
> > > @@ -92,7 +92,62 @@ rte_graph_obj_dump(FILE *f, struct rte_graph *g,
> > bool all)
> > >                         fprintf(f, "       total_sched_fail=%" PRId64
> > "\n",
> > >                                 n->dispatch.total_sched_fail);
> > >                 }
> > > -               fprintf(f, "       total_calls=%" PRId64 "\n", n-
> > >total_calls);
> > > +               fprintf(f, "       total_calls=%" PRIu64 "\n", n-
> > >total_calls);
> > > +               if (rte_graph_has_stats_feature()) {
> > > +                       fprintf(f, "       total_cycles=%" PRIu64 ",
> > avg cycles/call=%.1f\n",
> > > +                                       n->total_cycles,
> > > +                                       n->total_calls == 0 ?
> > (double)0 :
> > > +                                       (double)n->total_cycles /
> > (double)n->total_calls);
> > > +               }
> > > +#ifdef RTE_GRAPH_PROFILE
> >
> >
> > Please introduce rte_graph_has_profile_featue() similar to
> > rte_graph_has_stats_feature() to reduce if def clutter as possible.
>
> Disagree, see below.
>
> >
> > > +               uint64_t calls = n->usage_stats[0].calls;
> > > +               fprintf(f, "       objs[0]\n");
> > > +               fprintf(f, "         calls=%" PRIu64 ", cycles=%"
> > PRIu64 ", avg cycles/call=%.1f\n",
> > > +                               calls,
> >
> > >
> > > diff --git a/lib/graph/rte_graph_worker_common.h
> > b/lib/graph/rte_graph_worker_common.h
> > > index 4ab53a533e..0d8039575d 100644
> > > --- a/lib/graph/rte_graph_worker_common.h
> > > +++ b/lib/graph/rte_graph_worker_common.h
> > > @@ -144,12 +144,26 @@ struct __rte_cache_aligned rte_node {
> > >                         rte_node_process_t process; /**< Process
> > function. */
> > >                         uint64_t process_u64;
> > >                 };
> > > +               /** Fast path area cache line 3. */
> > > +#ifdef RTE_GRAPH_PROFILE
> > > +               struct {
> > > +                       uint64_t calls;     /**< Calls processing
> > resp. 0 or 1 objects. */
> > > +                       uint64_t cycles;    /**< Cycles spent
> > processing resp. 0 or 1 objects. */
> > > +               } usage_stats[2];       /**< Usage when this node
> > processed 0 or 1 objects. */
> > > +               uint64_t full_burst_calls;  /**< Calls processing a
> > full burst of objects. */
> > > +               uint64_t full_burst_cycles; /**< Cycles spent
> > processing a full burst of objects. */
> > > +               uint64_t half_burst_calls;  /**< Calls processing a
> > half burst of objects. */
> > > +               uint64_t half_burst_cycles; /**< Cycles spent
> > processing a half burst of objects. */
> > > +               /** Fast path area cache line 4. */
> > > +#endif
> >
> > Is it an ABI breakage?
>
> No. The modifications are enclosed in #ifdef, and disabled by default.
> It is generally required that when rte_config.h options are modified, both the application and DPDK itself are built together; and then API/ABI breakage becomes irrelevant.


Yes. I don't know the current policy for this. Adding @Thomas Monjalon
@David Marchand


>
> IMO, we should keep our structures lean in release builds. This means that fields used for detailed profiling, advanced debugging, cookie validation, etc. should use the #ifdef pattern rather than the rte_lib_has_some_feature() pattern; especially if they affect the size of a structure. And when those fields are not present, any code accessing them cannot use the rte_lib_has_some_feature() pattern.
> The mbuf and mempool libraries also use #ifdef pattern for similar features.

Yes for the structure inclusion we can use #ifdef. But inside the
function we can use rte_lib_has_some_feature() scheme. Reasons are :
1)It will remove the ifdef cultter
2)Detect the compilation issue even if the feature is disabled. This
will make sure reduce the build options to enable build sanity
3) Compiler is smart enough to understand to disable the block if the
feature is not enabled.(Just like #ifdef)



>
> >
> > >                 alignas(RTE_CACHE_LINE_MIN_SIZE) struct rte_node
> > *nodes[]; /**< Next nodes. */
> > >         };
> > >  };
> > >

^ permalink raw reply

* RE: [PATCH v5] graph: add optional profiling stats
From: Morten Brørup @ 2026-06-23  6:45 UTC (permalink / raw)
  To: Jerin Jacob
  Cc: dev, Jerin Jacob, Kiran Kumar K, Nithin Dabilpuram, Zhirun Yan
In-Reply-To: <CALBAE1OCbZ9GB1oGYgR5hRtzjRmev2ZFOaSNndf02AZptihOcw@mail.gmail.com>

> From: Jerin Jacob [mailto:jerinjacobk@gmail.com]
> Sent: Tuesday, 23 June 2026 07.13
> 
> On Mon, Jun 22, 2026 at 12:11 AM Morten Brørup
> <mb@smartsharesystems.com> wrote:
> >
> > Added graph node profiling stats, build time configurable by enabling
> > RTE_GRAPH_PROFILE in rte_config.h.
> >
> > Signed-off-by: Morten Brørup <mb@smartsharesystems.com>
> 
> Please update app/test/test_graph.c to validate this featue.

Ack.

> > @@ -92,7 +92,62 @@ rte_graph_obj_dump(FILE *f, struct rte_graph *g,
> bool all)
> >                         fprintf(f, "       total_sched_fail=%" PRId64
> "\n",
> >                                 n->dispatch.total_sched_fail);
> >                 }
> > -               fprintf(f, "       total_calls=%" PRId64 "\n", n-
> >total_calls);
> > +               fprintf(f, "       total_calls=%" PRIu64 "\n", n-
> >total_calls);
> > +               if (rte_graph_has_stats_feature()) {
> > +                       fprintf(f, "       total_cycles=%" PRIu64 ",
> avg cycles/call=%.1f\n",
> > +                                       n->total_cycles,
> > +                                       n->total_calls == 0 ?
> (double)0 :
> > +                                       (double)n->total_cycles /
> (double)n->total_calls);
> > +               }
> > +#ifdef RTE_GRAPH_PROFILE
> 
> 
> Please introduce rte_graph_has_profile_featue() similar to
> rte_graph_has_stats_feature() to reduce if def clutter as possible.

Disagree, see below.

> 
> > +               uint64_t calls = n->usage_stats[0].calls;
> > +               fprintf(f, "       objs[0]\n");
> > +               fprintf(f, "         calls=%" PRIu64 ", cycles=%"
> PRIu64 ", avg cycles/call=%.1f\n",
> > +                               calls,
> 
> >
> > diff --git a/lib/graph/rte_graph_worker_common.h
> b/lib/graph/rte_graph_worker_common.h
> > index 4ab53a533e..0d8039575d 100644
> > --- a/lib/graph/rte_graph_worker_common.h
> > +++ b/lib/graph/rte_graph_worker_common.h
> > @@ -144,12 +144,26 @@ struct __rte_cache_aligned rte_node {
> >                         rte_node_process_t process; /**< Process
> function. */
> >                         uint64_t process_u64;
> >                 };
> > +               /** Fast path area cache line 3. */
> > +#ifdef RTE_GRAPH_PROFILE
> > +               struct {
> > +                       uint64_t calls;     /**< Calls processing
> resp. 0 or 1 objects. */
> > +                       uint64_t cycles;    /**< Cycles spent
> processing resp. 0 or 1 objects. */
> > +               } usage_stats[2];       /**< Usage when this node
> processed 0 or 1 objects. */
> > +               uint64_t full_burst_calls;  /**< Calls processing a
> full burst of objects. */
> > +               uint64_t full_burst_cycles; /**< Cycles spent
> processing a full burst of objects. */
> > +               uint64_t half_burst_calls;  /**< Calls processing a
> half burst of objects. */
> > +               uint64_t half_burst_cycles; /**< Cycles spent
> processing a half burst of objects. */
> > +               /** Fast path area cache line 4. */
> > +#endif
> 
> Is it an ABI breakage?

No. The modifications are enclosed in #ifdef, and disabled by default.
It is generally required that when rte_config.h options are modified, both the application and DPDK itself are built together; and then API/ABI breakage becomes irrelevant.

IMO, we should keep our structures lean in release builds. This means that fields used for detailed profiling, advanced debugging, cookie validation, etc. should use the #ifdef pattern rather than the rte_lib_has_some_feature() pattern; especially if they affect the size of a structure. And when those fields are not present, any code accessing them cannot use the rte_lib_has_some_feature() pattern.
The mbuf and mempool libraries also use #ifdef pattern for similar features.

> 
> >                 alignas(RTE_CACHE_LINE_MIN_SIZE) struct rte_node
> *nodes[]; /**< Next nodes. */
> >         };
> >  };
> >

^ permalink raw reply

* [PATCH v7 4/4] net/zxdh: optimize Tx xmit pkts performance
From: Junlong Wang @ 2026-06-23  6:09 UTC (permalink / raw)
  To: stephen; +Cc: dev, Junlong Wang
In-Reply-To: <20260623060909.97023-1-wang.junlong1@zte.com.cn>


[-- Attachment #1.1.1: Type: text/plain, Size: 21243 bytes --]

Add simple Tx xmit functions (zxdh_xmit_pkts_simple)
for single-segment packet xmit.

Signed-off-by: Junlong Wang <wang.junlong1@zte.com.cn>
---
 drivers/net/zxdh/zxdh_ethdev.c |  15 +-
 drivers/net/zxdh/zxdh_queue.h  |   2 +-
 drivers/net/zxdh/zxdh_rxtx.c   | 389 ++++++++++++++++++++++++++-------
 drivers/net/zxdh/zxdh_rxtx.h   |  13 +-
 4 files changed, 323 insertions(+), 96 deletions(-)

diff --git a/drivers/net/zxdh/zxdh_ethdev.c b/drivers/net/zxdh/zxdh_ethdev.c
index fe76139f3d..cf0395aee8 100644
--- a/drivers/net/zxdh/zxdh_ethdev.c
+++ b/drivers/net/zxdh/zxdh_ethdev.c
@@ -490,7 +490,7 @@ zxdh_dev_free_mbufs(struct rte_eth_dev *dev)
 		if (!vq)
 			continue;
 		while ((buf = zxdh_queue_detach_unused(vq)) != NULL)
-			rte_pktmbuf_free(buf);
+			rte_pktmbuf_free_seg(buf);
 		PMD_DRV_LOG(DEBUG, "freeing %s[%d] used and unused buf",
 		"rxq", i * 2);
 	}
@@ -499,7 +499,7 @@ zxdh_dev_free_mbufs(struct rte_eth_dev *dev)
 		if (!vq)
 			continue;
 		while ((buf = zxdh_queue_detach_unused(vq)) != NULL)
-			rte_pktmbuf_free(buf);
+			rte_pktmbuf_free_seg(buf);
 		PMD_DRV_LOG(DEBUG, "freeing %s[%d] used and unused buf",
 		"txq", i * 2 + 1);
 	}
@@ -1291,10 +1291,17 @@ static int zxdh_scattered_rx(struct rte_eth_dev *eth_dev)
 static int32_t
 zxdh_set_rxtx_funcs(struct rte_eth_dev *eth_dev)
 {
-	eth_dev->tx_pkt_prepare = zxdh_xmit_pkts_prepare;
+	uint64_t tx_offloads = eth_dev->data->dev_conf.txmode.offloads;
+
 	eth_dev->data->scattered_rx = zxdh_scattered_rx(eth_dev);
 
-	eth_dev->tx_pkt_burst = &zxdh_xmit_pkts_packed;
+	if (!(tx_offloads & RTE_ETH_TX_OFFLOAD_MULTI_SEGS)) {
+		eth_dev->tx_pkt_prepare = zxdh_xmit_pkts_simple_prepare;
+		eth_dev->tx_pkt_burst = &zxdh_xmit_pkts_simple;
+	} else {
+		eth_dev->tx_pkt_prepare = zxdh_xmit_pkts_prepare;
+		eth_dev->tx_pkt_burst = &zxdh_xmit_pkts_packed;
+	}
 
 	if (eth_dev->data->scattered_rx)
 		eth_dev->rx_pkt_burst = &zxdh_recv_pkts_packed;
diff --git a/drivers/net/zxdh/zxdh_queue.h b/drivers/net/zxdh/zxdh_queue.h
index b079272162..091d1f25db 100644
--- a/drivers/net/zxdh/zxdh_queue.h
+++ b/drivers/net/zxdh/zxdh_queue.h
@@ -374,7 +374,7 @@ zxdh_queue_full(const struct zxdh_virtqueue *vq)
 }
 
 static inline void
-zxdh_queue_store_flags_packed(struct zxdh_vring_packed_desc *dp, uint16_t flags)
+zxdh_queue_store_flags_packed(volatile struct zxdh_vring_packed_desc *dp, uint16_t flags)
 {
 	rte_io_wmb();
 	dp->flags = flags;
diff --git a/drivers/net/zxdh/zxdh_rxtx.c b/drivers/net/zxdh/zxdh_rxtx.c
index ab0510a753..ffacb5d94f 100644
--- a/drivers/net/zxdh/zxdh_rxtx.c
+++ b/drivers/net/zxdh/zxdh_rxtx.c
@@ -114,6 +114,22 @@
 		RTE_MBUF_F_TX_SEC_OFFLOAD |     \
 		RTE_MBUF_F_TX_UDP_SEG)
 
+#if RTE_CACHE_LINE_SIZE == 128
+#define NEXT_CACHELINE_OFF_16B   8
+#define NEXT_CACHELINE_OFF_8B   16
+#elif RTE_CACHE_LINE_SIZE == 64
+#define NEXT_CACHELINE_OFF_16B   4
+#define NEXT_CACHELINE_OFF_8B    8
+#else
+#define NEXT_CACHELINE_OFF_16B  (RTE_CACHE_LINE_SIZE / 16)
+#define NEXT_CACHELINE_OFF_8B   (RTE_CACHE_LINE_SIZE / 8)
+#endif
+#define N_PER_LOOP  NEXT_CACHELINE_OFF_8B
+#define N_PER_LOOP_MASK (N_PER_LOOP - 1)
+
+#define rxq_get_vq(q) ((q)->vq)
+#define txq_get_vq(q) ((q)->vq)
+
 uint32_t zxdh_outer_l2_type[16] = {
 	0,
 	RTE_PTYPE_L2_ETHER,
@@ -201,43 +217,6 @@ uint32_t zxdh_inner_l4_type[16] = {
 	0,
 };
 
-static void
-zxdh_xmit_cleanup_inorder_packed(struct zxdh_virtqueue *vq, int32_t num)
-{
-	uint16_t used_idx = 0;
-	uint16_t id       = 0;
-	uint16_t curr_id  = 0;
-	uint16_t free_cnt = 0;
-	uint16_t size     = vq->vq_nentries;
-	struct zxdh_vring_packed_desc *desc = vq->vq_packed.ring.desc;
-	struct zxdh_vq_desc_extra     *dxp  = NULL;
-
-	used_idx = vq->vq_used_cons_idx;
-	/* desc_is_used has a load-acquire or rte_io_rmb inside
-	 * and wait for used desc in virtqueue.
-	 */
-	while (num > 0 && desc_is_used(&desc[used_idx], vq)) {
-		id = desc[used_idx].id;
-		do {
-			curr_id = used_idx;
-			dxp = &vq->vq_descx[used_idx];
-			used_idx += dxp->ndescs;
-			free_cnt += dxp->ndescs;
-			num -= dxp->ndescs;
-			if (used_idx >= size) {
-				used_idx -= size;
-				vq->used_wrap_counter ^= 1;
-			}
-			if (dxp->cookie != NULL) {
-				rte_pktmbuf_free(dxp->cookie);
-				dxp->cookie = NULL;
-			}
-		} while (curr_id != id);
-	}
-	vq->vq_used_cons_idx = used_idx;
-	vq->vq_free_cnt += free_cnt;
-}
-
 static inline uint16_t
 zxdh_get_mtu(struct zxdh_virtqueue *vq)
 {
@@ -334,7 +313,7 @@ zxdh_xmit_fill_net_hdr(struct zxdh_virtqueue *vq, struct rte_mbuf *cookie,
 }
 
 static inline void
-zxdh_enqueue_xmit_packed_fast(struct zxdh_virtnet_tx *txvq,
+zxdh_xmit_enqueue_push(struct zxdh_virtnet_tx *txvq,
 						struct rte_mbuf *cookie)
 {
 	struct zxdh_virtqueue *vq = txvq->vq;
@@ -345,7 +324,6 @@ zxdh_enqueue_xmit_packed_fast(struct zxdh_virtnet_tx *txvq,
 	uint8_t hdr_len = vq->hw->dl_net_hdr_len;
 	struct zxdh_vring_packed_desc *dp = &vq->vq_packed.ring.desc[id];
 
-	dxp->ndescs = 1;
 	dxp->cookie = cookie;
 	hdr = rte_pktmbuf_mtod_offset(cookie, struct zxdh_net_hdr_dl *, -hdr_len);
 	zxdh_xmit_fill_net_hdr(vq, cookie, hdr);
@@ -362,52 +340,57 @@ zxdh_enqueue_xmit_packed_fast(struct zxdh_virtnet_tx *txvq,
 }
 
 static inline void
-zxdh_enqueue_xmit_packed(struct zxdh_virtnet_tx *txvq,
+zxdh_xmit_enqueue_append(struct zxdh_virtnet_tx *txvq,
 						struct rte_mbuf *cookie,
 						uint16_t needed)
 {
 	struct zxdh_tx_region *txr = txvq->zxdh_net_hdr_mz->addr;
 	struct zxdh_virtqueue *vq = txvq->vq;
-	uint16_t id = vq->vq_avail_idx;
-	struct zxdh_vq_desc_extra *dxp = &vq->vq_descx[id];
+	struct zxdh_vq_desc_extra *dep = &vq->vq_descx[0];
 	uint16_t head_idx = vq->vq_avail_idx;
 	uint16_t idx = head_idx;
 	struct zxdh_vring_packed_desc *start_dp = vq->vq_packed.ring.desc;
 	struct zxdh_vring_packed_desc *head_dp = &vq->vq_packed.ring.desc[idx];
 	struct zxdh_net_hdr_dl *hdr = NULL;
-
-	uint16_t head_flags = cookie->next ? ZXDH_VRING_DESC_F_NEXT : 0;
+	uint16_t id = vq->vq_avail_idx;
+	struct zxdh_vq_desc_extra *dxp = &vq->vq_descx[id];
 	uint8_t hdr_len = vq->hw->dl_net_hdr_len;
+	uint16_t head_flags = 0;
 
-	dxp->ndescs = needed;
-	dxp->cookie = cookie;
-	head_flags |= vq->cached_flags;
+	/*
+	 * IMPORTANT: For multi-seg packets, we set the head descriptor's cookie to NULL
+	 * and store each segment's mbuf in its corresponding vq_descx[idx].cookie.
+	 * This is required for the per-descriptor mbuf free in zxdh_xmit_fast_flush()
+	 * which uses rte_pktmbuf_free_seg() to free individual segments.
+	 * Any code path that attempts to read vq_descx[head_id].cookie will see NULL
+	 * and must handle this case appropriately.
+	 */
+	dxp->cookie = NULL;
 
+	/* setup first tx ring slot to point to header stored in reserved region. */
 	start_dp[idx].addr = txvq->zxdh_net_hdr_mem + RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
 	start_dp[idx].len  = hdr_len;
-	head_flags |= ZXDH_VRING_DESC_F_NEXT;
+	start_dp[idx].id = idx;
+	head_flags |= vq->cached_flags | ZXDH_VRING_DESC_F_NEXT;
 	hdr = (void *)&txr[idx].tx_hdr;
 
-	rte_prefetch1(hdr);
+	zxdh_xmit_fill_net_hdr(vq, cookie, hdr);
+
 	idx++;
 	if (idx >= vq->vq_nentries) {
 		idx -= vq->vq_nentries;
 		vq->cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
 	}
 
-	zxdh_xmit_fill_net_hdr(vq, cookie, hdr);
-
 	do {
 		start_dp[idx].addr = rte_pktmbuf_iova(cookie);
 		start_dp[idx].len  = cookie->data_len;
-		start_dp[idx].id = id;
-		if (likely(idx != head_idx)) {
-			uint16_t flags = cookie->next ? ZXDH_VRING_DESC_F_NEXT : 0;
-
-			flags |= vq->cached_flags;
-			start_dp[idx].flags = flags;
-		}
+		start_dp[idx].id = idx;
 
+		dep[idx].cookie = cookie;
+		uint16_t flags = cookie->next ? ZXDH_VRING_DESC_F_NEXT : 0;
+		flags |= vq->cached_flags;
+		start_dp[idx].flags = flags;
 		idx++;
 		if (idx >= vq->vq_nentries) {
 			idx -= vq->vq_nentries;
@@ -417,7 +400,6 @@ zxdh_enqueue_xmit_packed(struct zxdh_virtnet_tx *txvq,
 
 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
 	vq->vq_avail_idx = idx;
-
 	zxdh_queue_store_flags_packed(head_dp, head_flags);
 }
 
@@ -456,7 +438,7 @@ zxdh_update_packet_stats(struct zxdh_virtnet_stats *stats, struct rte_mbuf *mbuf
 }
 
 static void
-zxdh_xmit_flush(struct zxdh_virtqueue *vq)
+zxdh_xmit_fast_flush(struct zxdh_virtqueue *vq)
 {
 	uint16_t id       = 0;
 	uint16_t curr_id  = 0;
@@ -472,20 +454,22 @@ zxdh_xmit_flush(struct zxdh_virtqueue *vq)
 	 * for a used descriptor in the virtqueue.
 	 */
 	while (desc_is_used(&desc[used_idx], vq)) {
+		rte_prefetch0(&desc[used_idx + NEXT_CACHELINE_OFF_16B]);
 		id = desc[used_idx].id;
 		do {
+			desc[used_idx].id = used_idx;
 			curr_id = used_idx;
 			dxp = &vq->vq_descx[used_idx];
-			used_idx += dxp->ndescs;
-			free_cnt += dxp->ndescs;
-			if (used_idx >= size) {
-				used_idx -= size;
-				vq->used_wrap_counter ^= 1;
-			}
 			if (dxp->cookie != NULL) {
-				rte_pktmbuf_free(dxp->cookie);
+				rte_pktmbuf_free_seg(dxp->cookie);
 				dxp->cookie = NULL;
 			}
+			used_idx += 1;
+			free_cnt += 1;
+			if (unlikely(used_idx == size)) {
+				used_idx = 0;
+				vq->used_wrap_counter ^= 1;
+			}
 		} while (curr_id != id);
 	}
 	vq->vq_used_cons_idx = used_idx;
@@ -499,13 +483,12 @@ zxdh_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkt
 	struct zxdh_virtqueue  *vq   = txvq->vq;
 	uint16_t nb_tx = 0;
 
-	zxdh_xmit_flush(vq);
+	zxdh_xmit_fast_flush(vq);
 
 	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
 		struct rte_mbuf *txm = tx_pkts[nb_tx];
 		int32_t can_push     = 0;
 		int32_t slots        = 0;
-		int32_t need         = 0;
 
 		rte_prefetch0(txm);
 		/* optimize ring usage */
@@ -522,26 +505,15 @@ zxdh_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkt
 		 * default    => number of segments + 1
 		 **/
 		slots = txm->nb_segs + !can_push;
-		need = slots - vq->vq_free_cnt;
 		/* Positive value indicates it need free vring descriptors */
-		if (unlikely(need > 0)) {
-			zxdh_xmit_cleanup_inorder_packed(vq, need);
-			need = slots - vq->vq_free_cnt;
-			if (unlikely(need > 0)) {
-				PMD_TX_LOG(ERR,
-						" No enough %d free tx descriptors to transmit."
-						"freecnt %d",
-						need,
-						vq->vq_free_cnt);
-				break;
-			}
-		}
+		if (unlikely(slots >  vq->vq_free_cnt))
+			break;
 
 		/* Enqueue Packet buffers */
 		if (can_push)
-			zxdh_enqueue_xmit_packed_fast(txvq, txm);
+			zxdh_xmit_enqueue_push(txvq, txm);
 		else
-			zxdh_enqueue_xmit_packed(txvq, txm, slots);
+			zxdh_xmit_enqueue_append(txvq, txm, slots);
 		zxdh_update_packet_stats(&txvq->stats, txm);
 	}
 	txvq->stats.packets += nb_tx;
@@ -602,6 +574,48 @@ uint16_t zxdh_xmit_pkts_prepare(void *tx_queue, struct rte_mbuf **tx_pkts,
 	return nb_tx;
 }
 
+uint16_t zxdh_xmit_pkts_simple_prepare(void *tx_queue, struct rte_mbuf **tx_pkts,
+				uint16_t nb_pkts)
+{
+	struct zxdh_virtnet_tx *txvq = tx_queue;
+	struct zxdh_hw *hw = txvq->vq->hw;
+	uint16_t nb_tx;
+
+	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
+		struct rte_mbuf *m = tx_pkts[nb_tx];
+		int32_t error;
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+		error = rte_validate_tx_offload(m);
+		if (unlikely(error)) {
+			rte_errno = -error;
+			break;
+		}
+#endif
+
+		error = rte_net_intel_cksum_prepare(m);
+		if (unlikely(error)) {
+			rte_errno = -error;
+			break;
+		}
+		if (m->data_off < ZXDH_DL_NET_HDR_SIZE) {
+			PMD_TX_LOG(ERR, "HEADROOM too small for zxdh Tx downlink header");
+			txvq->stats.invalid_hdr_len_err += nb_pkts - nb_tx;
+			rte_errno = ENOMEM;
+			break;
+		}
+
+		error = dl_net_hdr_check(m, hw);
+		if (unlikely(error)) {
+			rte_errno = ENOTSUP;
+			txvq->stats.errors += nb_pkts - nb_tx;
+			txvq->stats.offload_cfg_err += nb_pkts - nb_tx;
+			break;
+		}
+	}
+	return nb_tx;
+}
+
 static uint16_t
 zxdh_dequeue_burst_rx_packed(struct zxdh_virtqueue *vq,
 					struct rte_mbuf **rx_pkts,
@@ -1070,7 +1084,6 @@ uint16_t zxdh_recv_single_pkts(void *rx_queue, struct rte_mbuf **rcv_pkts, uint1
 
 		if (unlikely(zxdh_init_mbuf(rxm, len, hw, &vq->rxq) < 0))
 			continue;
-		rcv_pkts[nb_rx] = rxm;
 		zxdh_update_packet_stats(&rxvq->stats, rxm);
 		nb_rx++;
 	}
@@ -1084,3 +1097,209 @@ uint16_t zxdh_recv_single_pkts(void *rx_queue, struct rte_mbuf **rcv_pkts, uint1
 	}
 	return nb_rx;
 }
+
+static inline void pkt_padding(struct rte_mbuf *cookie, struct zxdh_hw *hw)
+{
+	uint16_t mtu_or_mss = 0;
+	uint16_t pkt_flag_lw16 = ZXDH_NO_IPID_UPDATE;
+	uint16_t l3_offset;
+	uint8_t pcode = ZXDH_PCODE_NO_IP_PKT_TYPE;
+	uint8_t l3_ptype = ZXDH_PI_L3TYPE_NOIP;
+	struct zxdh_pi_hdr *pi_hdr;
+	struct zxdh_pd_hdr_dl *pd_hdr;
+	struct zxdh_net_hdr_dl *net_hdr_dl = hw->net_hdr_dl;
+	uint8_t hdr_len = hw->dl_net_hdr_len;
+	uint16_t ol_flag = 0;
+	struct zxdh_net_hdr_dl *hdr;
+
+	hdr = rte_pktmbuf_mtod_offset(cookie, struct zxdh_net_hdr_dl *, -hdr_len);
+	rte_memcpy(hdr, net_hdr_dl, hdr_len);
+
+	/* Update mbuf to reflect the prepended header */
+	cookie->data_off -= hdr_len;
+	cookie->data_len += hdr_len;
+	cookie->pkt_len += hdr_len;
+
+	if (hw->has_tx_offload) {
+		pi_hdr = &hdr->pipd_hdr_dl.pi_hdr;
+		pd_hdr = &hdr->pipd_hdr_dl.pd_hdr;
+
+		pcode = ZXDH_PCODE_IP_PKT_TYPE;
+		if (cookie->ol_flags & RTE_MBUF_F_TX_IPV6)
+			l3_ptype = ZXDH_PI_L3TYPE_IPV6;
+		else if (cookie->ol_flags & RTE_MBUF_F_TX_IPV4)
+			l3_ptype = ZXDH_PI_L3TYPE_IP;
+		else
+			pcode = ZXDH_PCODE_NO_IP_PKT_TYPE;
+
+		if (cookie->ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
+			mtu_or_mss = (cookie->tso_segsz >= ZXDH_MIN_MSS) ?
+				cookie->tso_segsz : ZXDH_MIN_MSS;
+			pi_hdr->pkt_flag_hi8  |= ZXDH_TX_TCPUDP_CKSUM_CAL;
+			pkt_flag_lw16 |= ZXDH_NO_IP_FRAGMENT | ZXDH_TX_IP_CKSUM_CAL;
+			pcode = ZXDH_PCODE_TCP_PKT_TYPE;
+		} else if (cookie->ol_flags & RTE_MBUF_F_TX_UDP_SEG) {
+			mtu_or_mss = hw->eth_dev->data->mtu;
+			mtu_or_mss = (mtu_or_mss >= ZXDH_MIN_MSS) ? mtu_or_mss : ZXDH_MIN_MSS;
+			pkt_flag_lw16 |= ZXDH_TX_IP_CKSUM_CAL;
+			pi_hdr->pkt_flag_hi8 |= ZXDH_NO_TCP_FRAGMENT | ZXDH_TX_TCPUDP_CKSUM_CAL;
+			pcode = ZXDH_PCODE_UDP_PKT_TYPE;
+		} else {
+			pkt_flag_lw16 |= ZXDH_NO_IP_FRAGMENT;
+			pi_hdr->pkt_flag_hi8 |= ZXDH_NO_TCP_FRAGMENT;
+		}
+
+		if (cookie->ol_flags & RTE_MBUF_F_TX_IP_CKSUM)
+			pkt_flag_lw16 |= ZXDH_TX_IP_CKSUM_CAL;
+
+		if ((cookie->ol_flags & RTE_MBUF_F_TX_UDP_CKSUM) == RTE_MBUF_F_TX_UDP_CKSUM) {
+			pcode = ZXDH_PCODE_UDP_PKT_TYPE;
+			pi_hdr->pkt_flag_hi8 |= ZXDH_TX_TCPUDP_CKSUM_CAL;
+		} else if ((cookie->ol_flags & RTE_MBUF_F_TX_TCP_CKSUM) ==
+			RTE_MBUF_F_TX_TCP_CKSUM) {
+			pcode = ZXDH_PCODE_TCP_PKT_TYPE;
+			pi_hdr->pkt_flag_hi8 |= ZXDH_TX_TCPUDP_CKSUM_CAL;
+		}
+		pkt_flag_lw16 |= (mtu_or_mss >> ZXDH_MTU_MSS_UNIT_SHIFTBIT) & ZXDH_MTU_MSS_MASK;
+		pi_hdr->pkt_flag_lw16 = rte_be_to_cpu_16(pkt_flag_lw16);
+		pi_hdr->pkt_type = l3_ptype | ZXDH_PKT_FORM_CPU | pcode;
+
+		l3_offset = hdr_len + cookie->l2_len;
+		l3_offset += (cookie->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) ?
+					cookie->outer_l2_len + cookie->outer_l3_len : 0;
+		pi_hdr->l3_offset = rte_be_to_cpu_16(l3_offset);
+		pi_hdr->l4_offset = rte_be_to_cpu_16(l3_offset + cookie->l3_len);
+		if (cookie->ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM)
+			ol_flag |= ZXDH_PD_OFFLOAD_OUTER_IPCSUM;
+	} else {
+		pd_hdr = &hdr->pd_hdr;
+	}
+
+	pd_hdr->dst_vfid = rte_be_to_cpu_16(cookie->port);
+
+	if (cookie->ol_flags & (RTE_MBUF_F_TX_VLAN | RTE_MBUF_F_TX_QINQ)) {
+		ol_flag |= ZXDH_PD_OFFLOAD_CVLAN_INSERT;
+		pd_hdr->cvlan_insert = rte_be_to_cpu_16(cookie->vlan_tci);
+		if (cookie->ol_flags & RTE_MBUF_F_TX_QINQ) {
+			ol_flag |= ZXDH_PD_OFFLOAD_SVLAN_INSERT;
+			pd_hdr->svlan_insert = rte_be_to_cpu_16(cookie->vlan_tci_outer);
+		}
+	}
+
+	pd_hdr->ol_flag = rte_be_to_cpu_16(ol_flag);
+}
+
+/*
+ * Populate N_PER_LOOP descriptors with data from N_PER_LOOP single-segment mbufs.
+ * Note: The simple transmit path (zxdh_xmit_pkts_simple) is selected only when
+ * RTE_ETH_TX_OFFLOAD_MULTI_SEGS is disabled, so all packets handled here are
+ * guaranteed to be single-segment.
+ */
+static inline void
+tx_bunch(struct zxdh_virtqueue *vq, volatile struct zxdh_vring_packed_desc *txdp,
+		struct rte_mbuf **pkts, uint16_t start_id)
+{
+	uint16_t flags = vq->cached_flags;
+	int i;
+	for (i = 0; i < N_PER_LOOP; ++i, ++txdp, ++pkts) {
+		/* write data to descriptor */
+		txdp->addr = rte_mbuf_data_iova(*pkts);
+		txdp->len = (*pkts)->data_len;
+		txdp->id = start_id + i;
+		txdp->flags = flags;
+	}
+}
+
+/* Populate 1 descriptor with data from 1 single-segment mbuf */
+static inline void
+tx1(struct zxdh_virtqueue *vq, volatile struct zxdh_vring_packed_desc *txdp,
+		struct rte_mbuf *pkts, uint16_t id)
+{
+	uint16_t flags = vq->cached_flags;
+	txdp->addr = rte_mbuf_data_iova(pkts);
+	txdp->len = pkts->data_len;
+	txdp->id = id;
+	zxdh_queue_store_flags_packed(txdp, flags);
+}
+
+static void submit_to_backend_simple(struct zxdh_virtqueue  *vq,
+			struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+	struct zxdh_hw *hw = vq->hw;
+	struct rte_mbuf *m = NULL;
+	uint16_t id =  vq->vq_avail_idx;
+	struct zxdh_vring_packed_desc *txdp = &vq->vq_packed.ring.desc[id];
+	struct zxdh_vq_desc_extra *dxp = &vq->vq_descx[id];
+	int mainpart, leftover;
+	int i, j;
+
+	/*
+	 * Process most of the packets in chunks of N pkts.  Any
+	 * leftover packets will get processed one at a time.
+	 */
+	mainpart = (nb_pkts & ~N_PER_LOOP_MASK);
+	leftover = (nb_pkts & N_PER_LOOP_MASK);
+
+	for (i = 0; i < mainpart; i += N_PER_LOOP) {
+		rte_prefetch0(dxp + i);
+		rte_prefetch0(tx_pkts + i);
+		for (j = 0; j < N_PER_LOOP; ++j) {
+			m  = *(tx_pkts + i + j);
+			pkt_padding(m, hw);
+			(dxp + i + j)->cookie = (void *)m;
+			zxdh_update_packet_stats(&vq->txq.stats, m);
+		}
+		/* write data to descriptor */
+		tx_bunch(vq, txdp + i, tx_pkts + i, id + i);
+	}
+
+	if (leftover > 0) {
+		rte_prefetch0(dxp + mainpart);
+		rte_prefetch0(tx_pkts + mainpart);
+
+		for (i = 0; i < leftover; ++i) {
+			m =  *(tx_pkts + mainpart + i);
+			pkt_padding(m, hw);
+			(dxp + mainpart + i)->cookie = m;
+			tx1(vq, txdp + mainpart + i, *(tx_pkts + mainpart + i), id + mainpart + i);
+			zxdh_update_packet_stats(&vq->txq.stats, m);
+		}
+	}
+}
+
+uint16_t zxdh_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+	struct zxdh_virtnet_tx *txvq = tx_queue;
+	struct zxdh_virtqueue  *vq   = txq_get_vq(txvq);
+	uint16_t nb_tx = 0, nb_tx_left;
+
+	zxdh_xmit_fast_flush(vq);
+
+	nb_pkts = (uint16_t)RTE_MIN(nb_pkts, vq->vq_free_cnt);
+	if (unlikely(nb_pkts == 0)) {
+		txvq->stats.idle++;
+		return 0;
+	}
+
+	nb_tx_left = nb_pkts;
+	if ((vq->vq_avail_idx + nb_pkts) >= vq->vq_nentries) {
+		nb_tx = vq->vq_nentries - vq->vq_avail_idx;
+		nb_tx_left = nb_pkts - nb_tx;
+		submit_to_backend_simple(vq, tx_pkts, nb_tx);
+		vq->vq_avail_idx = 0;
+		vq->cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
+
+		vq->vq_free_cnt -= nb_tx;
+		tx_pkts += nb_tx;
+	}
+	if (nb_tx_left) {
+		submit_to_backend_simple(vq, tx_pkts, nb_tx_left);
+		vq->vq_avail_idx  += nb_tx_left;
+		vq->vq_free_cnt  -= nb_tx_left;
+	}
+
+	zxdh_queue_notify(vq);
+	txvq->stats.packets += nb_pkts;
+
+	return nb_pkts;
+}
diff --git a/drivers/net/zxdh/zxdh_rxtx.h b/drivers/net/zxdh/zxdh_rxtx.h
index dba9567414..627e8b05c3 100644
--- a/drivers/net/zxdh/zxdh_rxtx.h
+++ b/drivers/net/zxdh/zxdh_rxtx.h
@@ -56,18 +56,19 @@ struct __rte_cache_aligned zxdh_virtnet_rx {
 
 struct __rte_cache_aligned zxdh_virtnet_tx {
 	struct zxdh_virtqueue         *vq;
-
-	rte_iova_t                zxdh_net_hdr_mem; /* hdr for each xmit packet */
-	uint16_t                  queue_id;           /* DPDK queue index. */
-	uint16_t                  port_id;            /* Device port identifier. */
+	const struct rte_memzone *zxdh_net_hdr_mz;  /* memzone to populate hdr. */
+	rte_iova_t               zxdh_net_hdr_mem; /* hdr for each xmit packet */
 	struct zxdh_virtnet_stats      stats;
 	const struct rte_memzone *mz;                 /* mem zone to populate TX ring. */
-	const struct rte_memzone *zxdh_net_hdr_mz;  /* memzone to populate hdr. */
+	uint64_t offloads;
+	uint16_t                  queue_id;           /* DPDK queue index. */
+	uint16_t                  port_id;            /* Device port identifier. */
 };
 
 uint16_t zxdh_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
 uint16_t zxdh_xmit_pkts_prepare(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
 uint16_t zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
 uint16_t zxdh_recv_single_pkts(void *rx_queue, struct rte_mbuf **rcv_pkts, uint16_t nb_pkts);
-
+uint16_t zxdh_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
+uint16_t zxdh_xmit_pkts_simple_prepare(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
 #endif  /* ZXDH_RXTX_H */
-- 
2.27.0

[-- Attachment #1.1.2: Type: text/html , Size: 53255 bytes --]

^ permalink raw reply related

* [PATCH v7 3/4] net/zxdh: optimize Rx recv pkts performance
From: Junlong Wang @ 2026-06-23  6:09 UTC (permalink / raw)
  To: stephen; +Cc: dev, Junlong Wang
In-Reply-To: <20260623060909.97023-1-wang.junlong1@zte.com.cn>


[-- Attachment #1.1.1: Type: text/plain, Size: 16239 bytes --]

1. Add simple RX recv functions (zxdh_recv_single_pkts)
   for single-segment packet recv.
2. And optimize Rx recv pkts packed ops.
3. Remove unnecessary ZXDH_NET_F_MRG_RXBUF negotiation check and
   some unnecessary statistical counters form the xstats name tables.

Signed-off-by: Junlong Wang <wang.junlong1@zte.com.cn>
---
 drivers/net/zxdh/zxdh_ethdev.c     |  39 +++++--
 drivers/net/zxdh/zxdh_ethdev_ops.c |  23 ++--
 drivers/net/zxdh/zxdh_ethdev_ops.h |   4 +
 drivers/net/zxdh/zxdh_rxtx.c       | 174 +++++++++++++++++++++++------
 drivers/net/zxdh/zxdh_rxtx.h       |  16 +--
 5 files changed, 193 insertions(+), 63 deletions(-)

diff --git a/drivers/net/zxdh/zxdh_ethdev.c b/drivers/net/zxdh/zxdh_ethdev.c
index a383619419..fe76139f3d 100644
--- a/drivers/net/zxdh/zxdh_ethdev.c
+++ b/drivers/net/zxdh/zxdh_ethdev.c
@@ -1263,18 +1263,43 @@ zxdh_dev_close(struct rte_eth_dev *dev)
 	return ret;
 }
 
-static int32_t
-zxdh_set_rxtx_funcs(struct rte_eth_dev *eth_dev)
+/*
+ * Determine whether the current configuration requires support for scattered
+ * receive; return 1 if scattered receive is required and 0 if not.
+ */
+static int zxdh_scattered_rx(struct rte_eth_dev *eth_dev)
 {
-	struct zxdh_hw *hw = eth_dev->data->dev_private;
+	uint16_t buf_size;
 
-	if (!zxdh_pci_with_feature(hw, ZXDH_NET_F_MRG_RXBUF)) {
-		PMD_DRV_LOG(ERR, "port %u not support rx mergeable", eth_dev->data->port_id);
-		return -1;
+	if (eth_dev->data->dev_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO) {
+		eth_dev->data->lro = 1;
+		return 1;
 	}
+
+	if (eth_dev->data->dev_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_SCATTER)
+		return 1;
+
+	PMD_DRV_LOG(DEBUG, "port %u min_rx_buf_size %u",
+		eth_dev->data->port_id, eth_dev->data->min_rx_buf_size);
+	buf_size = eth_dev->data->min_rx_buf_size - RTE_PKTMBUF_HEADROOM;
+	if (eth_dev->data->mtu + ZXDH_ETH_OVERHEAD > buf_size)
+		return 1;
+
+	return 0;
+}
+
+static int32_t
+zxdh_set_rxtx_funcs(struct rte_eth_dev *eth_dev)
+{
 	eth_dev->tx_pkt_prepare = zxdh_xmit_pkts_prepare;
+	eth_dev->data->scattered_rx = zxdh_scattered_rx(eth_dev);
+
 	eth_dev->tx_pkt_burst = &zxdh_xmit_pkts_packed;
-	eth_dev->rx_pkt_burst = &zxdh_recv_pkts_packed;
+
+	if (eth_dev->data->scattered_rx)
+		eth_dev->rx_pkt_burst = &zxdh_recv_pkts_packed;
+	else
+		eth_dev->rx_pkt_burst = &zxdh_recv_single_pkts;
 
 	return 0;
 }
diff --git a/drivers/net/zxdh/zxdh_ethdev_ops.c b/drivers/net/zxdh/zxdh_ethdev_ops.c
index 50247116d9..9a8e05e941 100644
--- a/drivers/net/zxdh/zxdh_ethdev_ops.c
+++ b/drivers/net/zxdh/zxdh_ethdev_ops.c
@@ -95,10 +95,6 @@ static const struct rte_zxdh_xstats_name_off zxdh_rxq_stat_strings[] = {
 	{"good_bytes",             offsetof(struct zxdh_virtnet_rx, stats.bytes)},
 	{"errors",                 offsetof(struct zxdh_virtnet_rx, stats.errors)},
 	{"idle",                   offsetof(struct zxdh_virtnet_rx, stats.idle)},
-	{"full",                   offsetof(struct zxdh_virtnet_rx, stats.full)},
-	{"norefill",               offsetof(struct zxdh_virtnet_rx, stats.norefill)},
-	{"multicast_packets",      offsetof(struct zxdh_virtnet_rx, stats.multicast)},
-	{"broadcast_packets",      offsetof(struct zxdh_virtnet_rx, stats.broadcast)},
 	{"truncated_err",          offsetof(struct zxdh_virtnet_rx, stats.truncated_err)},
 	{"offload_cfg_err",        offsetof(struct zxdh_virtnet_rx, stats.offload_cfg_err)},
 	{"invalid_hdr_len_err",    offsetof(struct zxdh_virtnet_rx, stats.invalid_hdr_len_err)},
@@ -117,14 +113,12 @@ static const struct rte_zxdh_xstats_name_off zxdh_txq_stat_strings[] = {
 	{"good_packets",           offsetof(struct zxdh_virtnet_tx, stats.packets)},
 	{"good_bytes",             offsetof(struct zxdh_virtnet_tx, stats.bytes)},
 	{"errors",                 offsetof(struct zxdh_virtnet_tx, stats.errors)},
-	{"idle",                   offsetof(struct zxdh_virtnet_tx, stats.idle)},
-	{"norefill",               offsetof(struct zxdh_virtnet_tx, stats.norefill)},
-	{"multicast_packets",      offsetof(struct zxdh_virtnet_tx, stats.multicast)},
-	{"broadcast_packets",      offsetof(struct zxdh_virtnet_tx, stats.broadcast)},
+	{"idle",                 offsetof(struct zxdh_virtnet_tx, stats.idle)},
 	{"truncated_err",          offsetof(struct zxdh_virtnet_tx, stats.truncated_err)},
 	{"offload_cfg_err",        offsetof(struct zxdh_virtnet_tx, stats.offload_cfg_err)},
 	{"invalid_hdr_len_err",    offsetof(struct zxdh_virtnet_tx, stats.invalid_hdr_len_err)},
 	{"no_segs_err",            offsetof(struct zxdh_virtnet_tx, stats.no_segs_err)},
+	{"no_free_tx_desc_err",    offsetof(struct zxdh_virtnet_tx, stats.no_free_tx_desc_err)},
 	{"undersize_packets",      offsetof(struct zxdh_virtnet_tx, stats.size_bins[0])},
 	{"size_64_packets",        offsetof(struct zxdh_virtnet_tx, stats.size_bins[1])},
 	{"size_65_127_packets",    offsetof(struct zxdh_virtnet_tx, stats.size_bins[2])},
@@ -2026,6 +2020,19 @@ int zxdh_dev_mtu_set(struct rte_eth_dev *dev, uint16_t new_mtu)
 	uint16_t vfid = zxdh_vport_to_vfid(hw->vport);
 	int ret;
 
+	/* If device is started, refuse mtu that requires the support of
+	 * scattered packets when this feature has not been enabled before.
+	 */
+	if (dev->data->dev_started) {
+		uint32_t buf_size = dev->data->min_rx_buf_size - RTE_PKTMBUF_HEADROOM;
+		uint8_t need_scatter = (uint32_t)ZXDH_MTU_TO_PKTLEN(new_mtu) > buf_size;
+
+		if (need_scatter != dev->data->scattered_rx) {
+			PMD_DRV_LOG(ERR, "Stop port first.");
+			return -EINVAL;
+		}
+	}
+
 	if (hw->is_pf) {
 		ret = zxdh_get_panel_attr(dev, &panel);
 		if (ret != 0) {
diff --git a/drivers/net/zxdh/zxdh_ethdev_ops.h b/drivers/net/zxdh/zxdh_ethdev_ops.h
index 6dfe4be473..c49d79c232 100644
--- a/drivers/net/zxdh/zxdh_ethdev_ops.h
+++ b/drivers/net/zxdh/zxdh_ethdev_ops.h
@@ -40,6 +40,10 @@
 #define ZXDH_SPM_SPEED_4X_100G         RTE_BIT32(10)
 #define ZXDH_SPM_SPEED_4X_200G         RTE_BIT32(11)
 
+#define ZXDH_VLAN_TAG_LEN   4
+#define ZXDH_ETH_OVERHEAD  (RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN + ZXDH_VLAN_TAG_LEN * 2)
+#define ZXDH_MTU_TO_PKTLEN(mtu) ((mtu) + ZXDH_ETH_OVERHEAD)
+
 struct zxdh_np_stats_data {
 	uint64_t n_pkts_dropped;
 	uint64_t n_bytes_dropped;
diff --git a/drivers/net/zxdh/zxdh_rxtx.c b/drivers/net/zxdh/zxdh_rxtx.c
index 93506a4b49..ab0510a753 100644
--- a/drivers/net/zxdh/zxdh_rxtx.c
+++ b/drivers/net/zxdh/zxdh_rxtx.c
@@ -613,10 +613,12 @@ zxdh_dequeue_burst_rx_packed(struct zxdh_virtqueue *vq,
 	uint16_t i, used_idx;
 	uint16_t id;
 
+	used_idx = vq->vq_used_cons_idx;
+	rte_prefetch0(&desc[used_idx]);
+
 	for (i = 0; i < num; i++) {
 		used_idx = vq->vq_used_cons_idx;
-		/**
-		 * desc_is_used has a load-acquire or rte_io_rmb inside
+		/* desc_is_used has a load-acquire or rte_io_rmb inside
 		 * and wait for used desc in virtqueue.
 		 */
 		if (!desc_is_used(&desc[used_idx], vq))
@@ -823,17 +825,52 @@ zxdh_rx_update_mbuf(struct zxdh_hw *hw, struct rte_mbuf *m, struct zxdh_net_hdr_
 	}
 }
 
-static void zxdh_discard_rxbuf(struct zxdh_virtqueue *vq, struct rte_mbuf *m)
+static void refill_desc_unwrap(struct zxdh_virtqueue *vq,
+		struct rte_mbuf **cookie, uint16_t nb_pkts)
 {
-	int32_t error = 0;
-	/*
-	 * Requeue the discarded mbuf. This should always be
-	 * successful since it was just dequeued.
-	 */
-	error = zxdh_enqueue_recv_refill_packed(vq, &m, 1);
-	if (unlikely(error)) {
-		PMD_RX_LOG(ERR, "cannot enqueue discarded mbuf");
-		rte_pktmbuf_free(m);
+	struct zxdh_vring_packed_desc *start_dp = vq->vq_packed.ring.desc;
+	struct zxdh_vq_desc_extra *dxp;
+	uint16_t flags = vq->cached_flags;
+	int32_t i;
+	uint16_t idx;
+
+	idx = vq->vq_avail_idx;
+	for (i = 0; i < nb_pkts; i++) {
+		dxp = &vq->vq_descx[idx];
+		dxp->cookie = (void *)cookie[i];
+		start_dp[idx].addr = rte_mbuf_iova_get(cookie[i]) + RTE_PKTMBUF_HEADROOM;
+		start_dp[idx].len = cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM;
+		zxdh_queue_store_flags_packed(&start_dp[idx], flags);
+		idx++;
+	}
+	vq->vq_avail_idx += nb_pkts;
+	vq->vq_free_cnt = vq->vq_free_cnt - nb_pkts;
+}
+
+static void refill_que_descs(struct zxdh_virtqueue *vq, struct rte_eth_dev *dev)
+{
+	/* free_cnt may include mrg descs */
+	struct rte_mbuf *new_pkts[ZXDH_MBUF_BURST_SZ];
+	uint16_t free_cnt = RTE_MIN(ZXDH_MBUF_BURST_SZ, vq->vq_free_cnt);
+	struct zxdh_virtnet_rx *rxvq = &vq->rxq;
+	uint16_t  unwrap_cnt, left_cnt;
+
+	if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
+		left_cnt = free_cnt;
+		unwrap_cnt = 0;
+		if ((vq->vq_avail_idx + free_cnt) >= vq->vq_nentries) {
+			unwrap_cnt = vq->vq_nentries - vq->vq_avail_idx;
+			left_cnt = free_cnt - unwrap_cnt;
+			refill_desc_unwrap(vq, new_pkts, unwrap_cnt);
+			vq->vq_avail_idx = 0;
+			vq->cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
+		}
+		if (left_cnt)
+			refill_desc_unwrap(vq, new_pkts + unwrap_cnt, left_cnt);
+
+		rte_io_wmb();
+	} else {
+		dev->data->rx_mbuf_alloc_failed += free_cnt;
 	}
 }
 
@@ -852,7 +889,6 @@ zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
 	uint16_t len = 0;
 	uint32_t seg_num = 0;
 	uint32_t seg_res = 0;
-	uint32_t error = 0;
 	uint16_t hdr_size = 0;
 	uint16_t nb_rx = 0;
 	uint16_t i;
@@ -873,7 +909,8 @@ zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
 		rx_pkts[nb_rx] = rxm;
 		prev = rxm;
 		len = lens[i];
-		header = rte_pktmbuf_mtod(rxm, struct zxdh_net_hdr_ul *);
+		header = (struct zxdh_net_hdr_ul *)((char *)
+					rxm->buf_addr + RTE_PKTMBUF_HEADROOM);
 
 		seg_num  = header->type_hdr.num_buffers;
 
@@ -886,7 +923,7 @@ zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
 			rxvq->stats.invalid_hdr_len_err++;
 			continue;
 		}
-		rxm->data_off += hdr_size;
+		rxm->data_off = RTE_PKTMBUF_HEADROOM + hdr_size;
 		rxm->nb_segs = seg_num;
 		rxm->ol_flags = 0;
 		rcvd_pkt_len = len - hdr_size;
@@ -902,18 +939,19 @@ zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
 			len = lens[i];
 			rxm = rcv_pkts[i];
 			rxm->data_len = len;
+			rxm->data_off = RTE_PKTMBUF_HEADROOM;
 			rcvd_pkt_len += len;
 			prev->next = rxm;
 			prev = rxm;
 			rxm->next = NULL;
-			seg_res -= 1;
+			seg_res--;
 		}
 
 		if (!seg_res) {
 			if (rcvd_pkt_len != rx_pkts[nb_rx]->pkt_len) {
 				PMD_RX_LOG(ERR, "dropped rcvd_pkt_len %d pktlen %d",
 					rcvd_pkt_len, rx_pkts[nb_rx]->pkt_len);
-				zxdh_discard_rxbuf(vq, rx_pkts[nb_rx]);
+				rte_pktmbuf_free(rx_pkts[nb_rx]);
 				rxvq->stats.errors++;
 				rxvq->stats.truncated_err++;
 				continue;
@@ -942,14 +980,14 @@ zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
 			prev->next = rxm;
 			prev = rxm;
 			rxm->next = NULL;
-			extra_idx += 1;
+			extra_idx++;
 		}
 		seg_res -= rcv_cnt;
 		if (!seg_res) {
 			if (unlikely(rcvd_pkt_len != rx_pkts[nb_rx]->pkt_len)) {
 				PMD_RX_LOG(ERR, "dropped rcvd_pkt_len %d pktlen %d",
 					rcvd_pkt_len, rx_pkts[nb_rx]->pkt_len);
-				zxdh_discard_rxbuf(vq, rx_pkts[nb_rx]);
+				rte_pktmbuf_free(rx_pkts[nb_rx]);
 				rxvq->stats.errors++;
 				rxvq->stats.truncated_err++;
 				continue;
@@ -961,26 +999,88 @@ zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts,
 	rxvq->stats.packets += nb_rx;
 
 refill:
-	/* Allocate new mbuf for the used descriptor */
-	if (likely(!zxdh_queue_full(vq))) {
-		struct rte_mbuf *new_pkts[ZXDH_MBUF_BURST_SZ];
-		/* free_cnt may include mrg descs */
-		uint16_t free_cnt = RTE_MIN(vq->vq_free_cnt, ZXDH_MBUF_BURST_SZ);
-
-		if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
-			error = zxdh_enqueue_recv_refill_packed(vq, new_pkts, free_cnt);
-			if (unlikely(error)) {
-				for (i = 0; i < free_cnt; i++)
-					rte_pktmbuf_free(new_pkts[i]);
-			}
+	if (vq->vq_free_cnt > 0) {
+		struct rte_eth_dev *dev = hw->eth_dev;
+		refill_que_descs(vq, dev);
+		zxdh_queue_notify(vq);
+	}
 
-			if (unlikely(zxdh_queue_kick_prepare_packed(vq)))
-				zxdh_queue_notify(vq);
-		} else {
-			struct rte_eth_dev *dev = hw->eth_dev;
+	return nb_rx;
+}
 
-			dev->data->rx_mbuf_alloc_failed += free_cnt;
-		}
+static inline int zxdh_init_mbuf(struct rte_mbuf *rxm, uint16_t len,
+		struct zxdh_hw *hw, struct zxdh_virtnet_rx *rxvq)
+{
+	uint16_t hdr_size = 0;
+	struct zxdh_net_hdr_ul *header;
+
+	header = rte_pktmbuf_mtod(rxm, struct zxdh_net_hdr_ul *);
+	rxm->ol_flags = 0;
+	rxm->vlan_tci = 0;
+	rxm->vlan_tci_outer = 0;
+
+	hdr_size = header->type_hdr.pd_len << 1;
+	if (unlikely(header->type_hdr.num_buffers != 1)) {
+		PMD_RX_LOG(DEBUG, "hdr_size:%u nb_segs %d is invalid",
+			hdr_size, header->type_hdr.num_buffers);
+		rte_pktmbuf_free(rxm);
+		rxvq->stats.invalid_hdr_len_err++;
+		return -1;
+	}
+	zxdh_rx_update_mbuf(hw, rxm, header);
+
+	rxm->nb_segs = 1;
+	rxm->data_off = RTE_PKTMBUF_HEADROOM + hdr_size;
+	rxm->data_len = len - hdr_size;
+	rxm->port = hw->port_id;
+
+	if (rxm->data_len != rxm->pkt_len) {
+		PMD_RX_LOG(ERR, "dropped rcvd_pkt_len %d pktlen %d  bufaddr %p.",
+					rxm->data_len, rxm->pkt_len, rxm->buf_addr);
+		rte_pktmbuf_free(rxm);
+		rxvq->stats.truncated_err++;
+		rxvq->stats.errors++;
+		return -1;
+	}
+	return 0;
+}
+
+uint16_t zxdh_recv_single_pkts(void *rx_queue, struct rte_mbuf **rcv_pkts, uint16_t nb_pkts)
+{
+	struct zxdh_virtnet_rx *rxvq = rx_queue;
+	struct zxdh_virtqueue *vq = rxvq->vq;
+	struct zxdh_hw *hw = vq->hw;
+	uint32_t lens[ZXDH_MBUF_BURST_SZ];
+	uint16_t nb_rx = 0;
+	uint16_t num;
+	uint16_t i;
+
+	num = nb_pkts;
+	if (unlikely(num > ZXDH_MBUF_BURST_SZ))
+		num = ZXDH_MBUF_BURST_SZ;
+	num = zxdh_dequeue_burst_rx_packed(vq, rcv_pkts, lens, num);
+	if (num == 0) {
+		rxvq->stats.idle++;
+		goto refill;
+	}
+
+	for (i = 0; i < num; i++) {
+		struct rte_mbuf *rxm = rcv_pkts[i];
+		uint16_t len = lens[i];
+
+		if (unlikely(zxdh_init_mbuf(rxm, len, hw, &vq->rxq) < 0))
+			continue;
+		rcv_pkts[nb_rx] = rxm;
+		zxdh_update_packet_stats(&rxvq->stats, rxm);
+		nb_rx++;
+	}
+	rxvq->stats.packets += nb_rx;
+
+refill:
+	if (vq->vq_free_cnt > 0) {
+		struct rte_eth_dev *dev = hw->eth_dev;
+		refill_que_descs(vq, dev);
+		zxdh_queue_notify(vq);
 	}
 	return nb_rx;
 }
diff --git a/drivers/net/zxdh/zxdh_rxtx.h b/drivers/net/zxdh/zxdh_rxtx.h
index 424048607e..dba9567414 100644
--- a/drivers/net/zxdh/zxdh_rxtx.h
+++ b/drivers/net/zxdh/zxdh_rxtx.h
@@ -36,29 +36,22 @@ struct zxdh_virtnet_stats {
 	uint64_t bytes;
 	uint64_t errors;
 	uint64_t idle;
-	uint64_t full;
-	uint64_t norefill;
-	uint64_t multicast;
-	uint64_t broadcast;
 	uint64_t truncated_err;
 	uint64_t offload_cfg_err;
 	uint64_t invalid_hdr_len_err;
 	uint64_t no_segs_err;
+	uint64_t no_free_tx_desc_err;
 	uint64_t size_bins[8];
 };
 
 struct __rte_cache_aligned zxdh_virtnet_rx {
 	struct zxdh_virtqueue         *vq;
-
-	uint64_t                  mbuf_initializer; /* value to init mbufs. */
 	struct rte_mempool       *mpool;            /* mempool for mbuf allocation */
-	uint16_t                  queue_id;         /* DPDK queue index. */
-	uint16_t                  port_id;          /* Device port identifier. */
 	struct zxdh_virtnet_stats      stats;
 	const struct rte_memzone *mz;               /* mem zone to populate RX ring. */
-
-	/* dummy mbuf, for wraparound when processing RX ring. */
-	struct rte_mbuf           fake_mbuf;
+	uint64_t offloads;
+	uint16_t                  queue_id;         /* DPDK queue index. */
+	uint16_t                  port_id;          /* Device port identifier. */
 };
 
 struct __rte_cache_aligned zxdh_virtnet_tx {
@@ -75,5 +68,6 @@ struct __rte_cache_aligned zxdh_virtnet_tx {
 uint16_t zxdh_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
 uint16_t zxdh_xmit_pkts_prepare(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
 uint16_t zxdh_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
+uint16_t zxdh_recv_single_pkts(void *rx_queue, struct rte_mbuf **rcv_pkts, uint16_t nb_pkts);
 
 #endif  /* ZXDH_RXTX_H */
-- 
2.27.0

[-- Attachment #1.1.2: Type: text/html , Size: 39105 bytes --]

^ permalink raw reply related

* [PATCH v7 2/4] net/zxdh: optimize queue structure to improve performance
From: Junlong Wang @ 2026-06-23  6:09 UTC (permalink / raw)
  To: stephen; +Cc: dev, Junlong Wang
In-Reply-To: <20260623060909.97023-1-wang.junlong1@zte.com.cn>


[-- Attachment #1.1.1: Type: text/plain, Size: 16846 bytes --]

1. Reorganize structure fields for better cache locality.
2. Remove RX software ring (sw_ring) to reduce memory allocation and
   copy.
3. Remove zxdh_mb(), use native rte_mb().
4. optimize zxdh_queue_notify() functions, remove unnecessary feature
   check.

Signed-off-by: Junlong Wang <wang.junlong1@zte.com.cn>
---
 drivers/net/zxdh/zxdh_ethdev.c |  33 +--------
 drivers/net/zxdh/zxdh_pci.c    |   2 +-
 drivers/net/zxdh/zxdh_queue.c  |  11 ++-
 drivers/net/zxdh/zxdh_queue.h  | 120 ++++++++++++++++-----------------
 drivers/net/zxdh/zxdh_rxtx.c   |  22 +++---
 5 files changed, 77 insertions(+), 111 deletions(-)

diff --git a/drivers/net/zxdh/zxdh_ethdev.c b/drivers/net/zxdh/zxdh_ethdev.c
index 80ff19b3ea..a383619419 100644
--- a/drivers/net/zxdh/zxdh_ethdev.c
+++ b/drivers/net/zxdh/zxdh_ethdev.c
@@ -644,7 +644,6 @@ zxdh_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_logic_qidx)
 	struct zxdh_virtnet_tx *txvq = NULL;
 	struct zxdh_virtqueue *vq = NULL;
 	size_t sz_hdr_mz = 0;
-	void *sw_ring = NULL;
 	int32_t queue_type = zxdh_get_queue_type(vtpci_logic_qidx);
 	int32_t numa_node = dev->device->numa_node;
 	uint16_t vtpci_phy_qidx = 0;
@@ -692,11 +691,10 @@ zxdh_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_logic_qidx)
 	vq->vq_queue_index = vtpci_phy_qidx;
 	vq->vq_nentries = vq_size;
 
-	vq->vq_packed.used_wrap_counter = 1;
-	vq->vq_packed.cached_flags = ZXDH_VRING_PACKED_DESC_F_AVAIL;
-	vq->vq_packed.event_flags_shadow = 0;
+	vq->used_wrap_counter = 1;
+	vq->cached_flags = ZXDH_VRING_PACKED_DESC_F_AVAIL;
 	if (queue_type == ZXDH_VTNET_RQ)
-		vq->vq_packed.cached_flags |= ZXDH_VRING_DESC_F_WRITE;
+		vq->cached_flags |= ZXDH_VRING_DESC_F_WRITE;
 
 	/*
 	 * Reserve a memzone for vring elements
@@ -741,16 +739,6 @@ zxdh_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_logic_qidx)
 	}
 
 	if (queue_type == ZXDH_VTNET_RQ) {
-		size_t sz_sw = (ZXDH_MBUF_BURST_SZ + vq_size) * sizeof(vq->sw_ring[0]);
-
-		sw_ring = rte_zmalloc_socket("sw_ring", sz_sw, RTE_CACHE_LINE_SIZE, numa_node);
-		if (!sw_ring) {
-			PMD_DRV_LOG(ERR, "can not allocate RX soft ring");
-			ret = -ENOMEM;
-			goto fail_q_alloc;
-		}
-
-		vq->sw_ring = sw_ring;
 		rxvq = &vq->rxq;
 		rxvq->vq = vq;
 		rxvq->port_id = dev->data->port_id;
@@ -764,23 +752,9 @@ zxdh_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_logic_qidx)
 		txvq->zxdh_net_hdr_mem = hdr_mz->iova;
 	}
 
-	vq->offset = offsetof(struct rte_mbuf, buf_iova);
 	if (queue_type == ZXDH_VTNET_TQ) {
 		struct zxdh_tx_region *txr = hdr_mz->addr;
-		uint32_t i;
-
 		memset(txr, 0, vq_size * sizeof(*txr));
-		for (i = 0; i < vq_size; i++) {
-			/* first indirect descriptor is always the tx header */
-			struct zxdh_vring_packed_desc *start_dp = txr[i].tx_packed_indir;
-
-			zxdh_vring_desc_init_indirect_packed(start_dp,
-					RTE_DIM(txr[i].tx_packed_indir));
-			start_dp->addr = txvq->zxdh_net_hdr_mem + i * sizeof(*txr) +
-					offsetof(struct zxdh_tx_region, tx_hdr);
-			/* length will be updated to actual pi hdr size when xmit pkt */
-			start_dp->len = 0;
-		}
 	}
 	if (ZXDH_VTPCI_OPS(hw)->setup_queue(hw, vq) < 0) {
 		PMD_DRV_LOG(ERR, "setup_queue failed");
@@ -788,7 +762,6 @@ zxdh_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_logic_qidx)
 	}
 	return 0;
 fail_q_alloc:
-	rte_free(sw_ring);
 	rte_memzone_free(hdr_mz);
 	rte_memzone_free(mz);
 	rte_free(vq);
diff --git a/drivers/net/zxdh/zxdh_pci.c b/drivers/net/zxdh/zxdh_pci.c
index 4ba31905fc..0bc27ed111 100644
--- a/drivers/net/zxdh/zxdh_pci.c
+++ b/drivers/net/zxdh/zxdh_pci.c
@@ -231,7 +231,7 @@ zxdh_notify_queue(struct zxdh_hw *hw, struct zxdh_virtqueue *vq)
 
 	notify_data = ((uint32_t)vq->vq_avail_idx << 16) | vq->vq_queue_index;
 	if (zxdh_pci_with_feature(hw, ZXDH_F_RING_PACKED) &&
-			(vq->vq_packed.cached_flags & ZXDH_VRING_PACKED_DESC_F_AVAIL))
+			(vq->cached_flags & ZXDH_VRING_PACKED_DESC_F_AVAIL))
 		notify_data |= RTE_BIT32(31);
 
 	PMD_DRV_LOG(DEBUG, "queue:%d notify_data 0x%x notify_addr 0x%p",
diff --git a/drivers/net/zxdh/zxdh_queue.c b/drivers/net/zxdh/zxdh_queue.c
index 7162593b16..4668cb5d13 100644
--- a/drivers/net/zxdh/zxdh_queue.c
+++ b/drivers/net/zxdh/zxdh_queue.c
@@ -407,7 +407,7 @@ int32_t zxdh_enqueue_recv_refill_packed(struct zxdh_virtqueue *vq,
 {
 	struct zxdh_vring_packed_desc *start_dp = vq->vq_packed.ring.desc;
 	struct zxdh_vq_desc_extra *dxp;
-	uint16_t flags = vq->vq_packed.cached_flags;
+	uint16_t flags = vq->cached_flags;
 	int32_t i;
 	uint16_t idx;
 
@@ -415,7 +415,6 @@ int32_t zxdh_enqueue_recv_refill_packed(struct zxdh_virtqueue *vq,
 		idx = vq->vq_avail_idx;
 		dxp = &vq->vq_descx[idx];
 		dxp->cookie = (void *)cookie[i];
-		dxp->ndescs = 1;
 		/* rx pkt fill in data_off */
 		start_dp[idx].addr = rte_mbuf_iova_get(cookie[i]) + RTE_PKTMBUF_HEADROOM;
 		start_dp[idx].len = cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM;
@@ -423,8 +422,8 @@ int32_t zxdh_enqueue_recv_refill_packed(struct zxdh_virtqueue *vq,
 		zxdh_queue_store_flags_packed(&start_dp[idx], flags);
 		if (++vq->vq_avail_idx >= vq->vq_nentries) {
 			vq->vq_avail_idx -= vq->vq_nentries;
-			vq->vq_packed.cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
-			flags = vq->vq_packed.cached_flags;
+			vq->cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
+			flags = vq->cached_flags;
 		}
 	}
 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
@@ -467,7 +466,7 @@ void zxdh_queue_rxvq_flush(struct zxdh_virtqueue *vq)
 	int32_t cnt = 0;
 
 	i = vq->vq_used_cons_idx;
-	while (zxdh_desc_used(&descs[i], vq) && cnt++ < vq->vq_nentries) {
+	while (desc_is_used(&descs[i], vq) && cnt++ < vq->vq_nentries) {
 		dxp = &vq->vq_descx[descs[i].id];
 		if (dxp->cookie != NULL) {
 			rte_pktmbuf_free(dxp->cookie);
@@ -477,7 +476,7 @@ void zxdh_queue_rxvq_flush(struct zxdh_virtqueue *vq)
 		vq->vq_used_cons_idx++;
 		if (vq->vq_used_cons_idx >= vq->vq_nentries) {
 			vq->vq_used_cons_idx -= vq->vq_nentries;
-			vq->vq_packed.used_wrap_counter ^= 1;
+			vq->used_wrap_counter ^= 1;
 		}
 		i = vq->vq_used_cons_idx;
 	}
diff --git a/drivers/net/zxdh/zxdh_queue.h b/drivers/net/zxdh/zxdh_queue.h
index 711ea291d0..b079272162 100644
--- a/drivers/net/zxdh/zxdh_queue.h
+++ b/drivers/net/zxdh/zxdh_queue.h
@@ -9,6 +9,7 @@
 
 #include <rte_common.h>
 #include <rte_atomic.h>
+#include <rte_io.h>
 
 #include "zxdh_ethdev.h"
 #include "zxdh_rxtx.h"
@@ -117,7 +118,6 @@ struct zxdh_vring_packed_desc_event {
 };
 
 struct zxdh_vring_packed {
-	uint32_t num;
 	struct zxdh_vring_packed_desc *desc;
 	struct zxdh_vring_packed_desc_event *driver;
 	struct zxdh_vring_packed_desc_event *device;
@@ -129,50 +129,59 @@ struct zxdh_vq_desc_extra {
 	uint16_t next;
 };
 
+struct zxdh_vring {
+	uint32_t num;
+	struct zxdh_vring_desc  *desc;
+	struct zxdh_vring_avail *avail;
+	struct zxdh_vring_used  *used;
+};
+
 struct zxdh_virtqueue {
+	union {
+		struct {
+			struct zxdh_vring ring; /**< vring keeping desc, used and avail */
+		} vq_split;
+		struct __rte_packed_begin {
+			struct zxdh_vring_packed ring;
+		} __rte_packed_end vq_packed;
+	};
 	struct zxdh_hw  *hw; /* < zxdh_hw structure pointer. */
 
-	struct {
-		/* vring keeping descs and events */
-		struct zxdh_vring_packed ring;
-		uint8_t used_wrap_counter;
-		uint8_t rsv;
-		uint16_t cached_flags; /* < cached flags for descs */
-		uint16_t event_flags_shadow;
-		uint16_t rsv1;
-	} vq_packed;
-
-	uint16_t vq_used_cons_idx; /* < last consumed descriptor */
-	uint16_t vq_nentries;  /* < vring desc numbers */
-	uint16_t vq_free_cnt;  /* < num of desc available */
-	uint16_t vq_avail_idx; /* < sync until needed */
-	uint16_t vq_free_thresh; /* < free threshold */
-	uint16_t rsv2;
-
-	void *vq_ring_virt_mem;  /* < linear address of vring */
-	uint32_t vq_ring_size;
+	uint16_t vq_used_cons_idx; /**< last consumed descriptor */
+	uint16_t vq_avail_idx; /**< sync until needed */
+	uint16_t vq_nentries;  /**< vring desc numbers */
+	uint16_t vq_free_cnt;  /**< num of desc available */
+
+	uint16_t cached_flags; /**< cached flags for descs */
+	uint8_t used_wrap_counter;
+	uint8_t rsv;
+	uint16_t vq_free_thresh; /**< free threshold */
+	uint16_t next_qidx;
+
+	void *notify_addr;
 
 	union {
 		struct zxdh_virtnet_rx rxq;
 		struct zxdh_virtnet_tx txq;
 	};
 
-	/*
-	 * physical address of vring, or virtual address
-	 */
-	rte_iova_t vq_ring_mem;
+	uint16_t vq_queue_index; /* PACKED: phy_idx, SPLIT: logic_idx */
+	uint16_t event_flags_shadow;
+	uint32_t vq_ring_size;
 
-	/*
+	/**
 	 * Head of the free chain in the descriptor table. If
 	 * there are no free descriptors, this will be set to
 	 * VQ_RING_DESC_CHAIN_END.
-	 */
+	 **/
 	uint16_t  vq_desc_head_idx;
 	uint16_t  vq_desc_tail_idx;
-	uint16_t  vq_queue_index;   /* < PCI queue index */
-	uint16_t  offset; /* < relative offset to obtain addr in mbuf */
-	uint16_t *notify_addr;
-	struct rte_mbuf **sw_ring;  /* < RX software ring. */
+	uint32_t rsv_8B;
+
+	void *vq_ring_virt_mem;  /**< linear address of vring*/
+	/* physical address of vring, or virtual address for virtio_user. */
+	rte_iova_t vq_ring_mem;
+
 	struct zxdh_vq_desc_extra vq_descx[];
 };
 
@@ -296,10 +305,9 @@ static inline void
 zxdh_vring_init_packed(struct zxdh_vring_packed *vr, uint8_t *p,
 		unsigned long align, uint32_t num)
 {
-	vr->num    = num;
 	vr->desc   = (struct zxdh_vring_packed_desc *)p;
 	vr->driver = (struct zxdh_vring_packed_desc_event *)(p +
-				 vr->num * sizeof(struct zxdh_vring_packed_desc));
+				 num * sizeof(struct zxdh_vring_packed_desc));
 	vr->device = (struct zxdh_vring_packed_desc_event *)RTE_ALIGN_CEIL(((uintptr_t)vr->driver +
 				 sizeof(struct zxdh_vring_packed_desc_event)), align);
 }
@@ -331,30 +339,21 @@ zxdh_vring_desc_init_indirect_packed(struct zxdh_vring_packed_desc *dp, int32_t
 static inline void
 zxdh_queue_disable_intr(struct zxdh_virtqueue *vq)
 {
-	if (vq->vq_packed.event_flags_shadow != ZXDH_RING_EVENT_FLAGS_DISABLE) {
-		vq->vq_packed.event_flags_shadow = ZXDH_RING_EVENT_FLAGS_DISABLE;
-		vq->vq_packed.ring.driver->desc_event_flags = vq->vq_packed.event_flags_shadow;
+	if (vq->event_flags_shadow != ZXDH_RING_EVENT_FLAGS_DISABLE) {
+		vq->event_flags_shadow = ZXDH_RING_EVENT_FLAGS_DISABLE;
+		vq->vq_packed.ring.driver->desc_event_flags = vq->event_flags_shadow;
 	}
 }
 
 static inline void
 zxdh_queue_enable_intr(struct zxdh_virtqueue *vq)
 {
-	if (vq->vq_packed.event_flags_shadow != ZXDH_RING_EVENT_FLAGS_ENABLE) {
-		vq->vq_packed.event_flags_shadow = ZXDH_RING_EVENT_FLAGS_ENABLE;
-		vq->vq_packed.ring.driver->desc_event_flags = vq->vq_packed.event_flags_shadow;
+	if (vq->event_flags_shadow != ZXDH_RING_EVENT_FLAGS_ENABLE) {
+		vq->event_flags_shadow = ZXDH_RING_EVENT_FLAGS_ENABLE;
+		vq->vq_packed.ring.driver->desc_event_flags = vq->event_flags_shadow;
 	}
 }
 
-static inline void
-zxdh_mb(uint8_t weak_barriers)
-{
-	if (weak_barriers)
-		rte_atomic_thread_fence(rte_memory_order_seq_cst);
-	else
-		rte_mb();
-}
-
 static inline
 int32_t desc_is_used(struct zxdh_vring_packed_desc *desc, struct zxdh_virtqueue *vq)
 {
@@ -365,7 +364,7 @@ int32_t desc_is_used(struct zxdh_vring_packed_desc *desc, struct zxdh_virtqueue
 	rte_io_rmb();
 	used = !!(flags & ZXDH_VRING_PACKED_DESC_F_USED);
 	avail = !!(flags & ZXDH_VRING_PACKED_DESC_F_AVAIL);
-	return avail == used && used == vq->vq_packed.used_wrap_counter;
+	return avail == used && used == vq->used_wrap_counter;
 }
 
 static inline int32_t
@@ -381,22 +380,17 @@ zxdh_queue_store_flags_packed(struct zxdh_vring_packed_desc *dp, uint16_t flags)
 	dp->flags = flags;
 }
 
-static inline int32_t
-zxdh_desc_used(struct zxdh_vring_packed_desc *desc, struct zxdh_virtqueue *vq)
-{
-	uint16_t flags;
-	uint16_t used, avail;
-
-	flags = desc->flags;
-	rte_io_rmb();
-	used = !!(flags & ZXDH_VRING_PACKED_DESC_F_USED);
-	avail = !!(flags & ZXDH_VRING_PACKED_DESC_F_AVAIL);
-	return avail == used && used == vq->vq_packed.used_wrap_counter;
-}
-
 static inline void zxdh_queue_notify(struct zxdh_virtqueue *vq)
 {
-	ZXDH_VTPCI_OPS(vq->hw)->notify_queue(vq->hw, vq);
+	/* Bit[0:15]: vq queue index
+	 * Bit[16:30]: avail index
+	 * Bit[31]: avail wrap counter
+	 */
+	uint32_t notify_data = ((uint32_t)(!!(vq->cached_flags &
+		ZXDH_VRING_PACKED_DESC_F_AVAIL)) << 31) |
+		((uint32_t)vq->vq_avail_idx << 16) |
+		vq->vq_queue_index;
+	rte_write32(notify_data, vq->notify_addr);
 }
 
 static inline int32_t
@@ -404,7 +398,7 @@ zxdh_queue_kick_prepare_packed(struct zxdh_virtqueue *vq)
 {
 	uint16_t flags = 0;
 
-	zxdh_mb(1);
+	rte_mb();
 	flags = vq->vq_packed.ring.device->desc_event_flags;
 
 	return (flags != ZXDH_RING_EVENT_FLAGS_DISABLE);
diff --git a/drivers/net/zxdh/zxdh_rxtx.c b/drivers/net/zxdh/zxdh_rxtx.c
index db86922aea..93506a4b49 100644
--- a/drivers/net/zxdh/zxdh_rxtx.c
+++ b/drivers/net/zxdh/zxdh_rxtx.c
@@ -216,7 +216,7 @@ zxdh_xmit_cleanup_inorder_packed(struct zxdh_virtqueue *vq, int32_t num)
 	/* desc_is_used has a load-acquire or rte_io_rmb inside
 	 * and wait for used desc in virtqueue.
 	 */
-	while (num > 0 && zxdh_desc_used(&desc[used_idx], vq)) {
+	while (num > 0 && desc_is_used(&desc[used_idx], vq)) {
 		id = desc[used_idx].id;
 		do {
 			curr_id = used_idx;
@@ -226,7 +226,7 @@ zxdh_xmit_cleanup_inorder_packed(struct zxdh_virtqueue *vq, int32_t num)
 			num -= dxp->ndescs;
 			if (used_idx >= size) {
 				used_idx -= size;
-				vq->vq_packed.used_wrap_counter ^= 1;
+				vq->used_wrap_counter ^= 1;
 			}
 			if (dxp->cookie != NULL) {
 				rte_pktmbuf_free(dxp->cookie);
@@ -340,7 +340,7 @@ zxdh_enqueue_xmit_packed_fast(struct zxdh_virtnet_tx *txvq,
 	struct zxdh_virtqueue *vq = txvq->vq;
 	uint16_t id = vq->vq_avail_idx;
 	struct zxdh_vq_desc_extra *dxp = &vq->vq_descx[id];
-	uint16_t flags = vq->vq_packed.cached_flags;
+	uint16_t flags = vq->cached_flags;
 	struct zxdh_net_hdr_dl *hdr = NULL;
 	uint8_t hdr_len = vq->hw->dl_net_hdr_len;
 	struct zxdh_vring_packed_desc *dp = &vq->vq_packed.ring.desc[id];
@@ -355,7 +355,7 @@ zxdh_enqueue_xmit_packed_fast(struct zxdh_virtnet_tx *txvq,
 	dp->id   = id;
 	if (++vq->vq_avail_idx >= vq->vq_nentries) {
 		vq->vq_avail_idx -= vq->vq_nentries;
-		vq->vq_packed.cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
+		vq->cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
 	}
 	vq->vq_free_cnt--;
 	zxdh_queue_store_flags_packed(dp, flags);
@@ -381,7 +381,7 @@ zxdh_enqueue_xmit_packed(struct zxdh_virtnet_tx *txvq,
 
 	dxp->ndescs = needed;
 	dxp->cookie = cookie;
-	head_flags |= vq->vq_packed.cached_flags;
+	head_flags |= vq->cached_flags;
 
 	start_dp[idx].addr = txvq->zxdh_net_hdr_mem + RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
 	start_dp[idx].len  = hdr_len;
@@ -392,7 +392,7 @@ zxdh_enqueue_xmit_packed(struct zxdh_virtnet_tx *txvq,
 	idx++;
 	if (idx >= vq->vq_nentries) {
 		idx -= vq->vq_nentries;
-		vq->vq_packed.cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
+		vq->cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
 	}
 
 	zxdh_xmit_fill_net_hdr(vq, cookie, hdr);
@@ -404,14 +404,14 @@ zxdh_enqueue_xmit_packed(struct zxdh_virtnet_tx *txvq,
 		if (likely(idx != head_idx)) {
 			uint16_t flags = cookie->next ? ZXDH_VRING_DESC_F_NEXT : 0;
 
-			flags |= vq->vq_packed.cached_flags;
+			flags |= vq->cached_flags;
 			start_dp[idx].flags = flags;
 		}
 
 		idx++;
 		if (idx >= vq->vq_nentries) {
 			idx -= vq->vq_nentries;
-			vq->vq_packed.cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
+			vq->cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
 		}
 	} while ((cookie = cookie->next) != NULL);
 
@@ -480,7 +480,7 @@ zxdh_xmit_flush(struct zxdh_virtqueue *vq)
 			free_cnt += dxp->ndescs;
 			if (used_idx >= size) {
 				used_idx -= size;
-				vq->vq_packed.used_wrap_counter ^= 1;
+				vq->used_wrap_counter ^= 1;
 			}
 			if (dxp->cookie != NULL) {
 				rte_pktmbuf_free(dxp->cookie);
@@ -619,7 +619,7 @@ zxdh_dequeue_burst_rx_packed(struct zxdh_virtqueue *vq,
 		 * desc_is_used has a load-acquire or rte_io_rmb inside
 		 * and wait for used desc in virtqueue.
 		 */
-		if (!zxdh_desc_used(&desc[used_idx], vq))
+		if (!desc_is_used(&desc[used_idx], vq))
 			return i;
 		len[i] = desc[used_idx].len;
 		id = desc[used_idx].id;
@@ -637,7 +637,7 @@ zxdh_dequeue_burst_rx_packed(struct zxdh_virtqueue *vq,
 		vq->vq_used_cons_idx++;
 		if (vq->vq_used_cons_idx >= vq->vq_nentries) {
 			vq->vq_used_cons_idx -= vq->vq_nentries;
-			vq->vq_packed.used_wrap_counter ^= 1;
+			vq->used_wrap_counter ^= 1;
 		}
 	}
 	return i;
-- 
2.27.0

[-- Attachment #1.1.2: Type: text/html , Size: 38856 bytes --]

^ permalink raw reply related

* [PATCH v7 1/4] net/zxdh: fix queue enable intr issues
From: Junlong Wang @ 2026-06-23  6:09 UTC (permalink / raw)
  To: stephen; +Cc: dev, Junlong Wang, stable
In-Reply-To: <20260623060909.97023-1-wang.junlong1@zte.com.cn>


[-- Attachment #1.1.1: Type: text/plain, Size: 1196 bytes --]

Fix incorrect condition check in zxdh_queue_enable_intr.
Change "==" to "!=", consistent with zxdh_queue_disable_intr logic,
to properly enable interrupts when event_flags_shadow is not
already set to ENABLE state.

Fixes: 7677f3871ef3 ("net/zxdh: setup Rx/Tx queues and interrupt")
Cc: stable@dpdk.org

Signed-off-by: Junlong Wang <wang.junlong1@zte.com.cn>
---
 drivers/net/zxdh/zxdh_queue.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/zxdh/zxdh_queue.h b/drivers/net/zxdh/zxdh_queue.h
index 1a0c8a0d90..711ea291d0 100644
--- a/drivers/net/zxdh/zxdh_queue.h
+++ b/drivers/net/zxdh/zxdh_queue.h
@@ -340,8 +340,8 @@ zxdh_queue_disable_intr(struct zxdh_virtqueue *vq)
 static inline void
 zxdh_queue_enable_intr(struct zxdh_virtqueue *vq)
 {
-	if (vq->vq_packed.event_flags_shadow == ZXDH_RING_EVENT_FLAGS_DISABLE) {
-		vq->vq_packed.event_flags_shadow = ZXDH_RING_EVENT_FLAGS_DISABLE;
+	if (vq->vq_packed.event_flags_shadow != ZXDH_RING_EVENT_FLAGS_ENABLE) {
+		vq->vq_packed.event_flags_shadow = ZXDH_RING_EVENT_FLAGS_ENABLE;
 		vq->vq_packed.ring.driver->desc_event_flags = vq->vq_packed.event_flags_shadow;
 	}
 }
-- 
2.27.0

[-- Attachment #1.1.2: Type: text/html , Size: 2002 bytes --]

^ permalink raw reply related

* [PATCH v7 0/4] net/zxdh: optimize Rx/Tx path performance
From: Junlong Wang @ 2026-06-23  6:09 UTC (permalink / raw)
  To: stephen; +Cc: dev, Junlong Wang
In-Reply-To: <20260617082828.1058127-1-wang.junlong1@zte.com.cn>


[-- Attachment #1.1.1: Type: text/plain, Size: 3958 bytes --]

v7:
  - Add a new xmit prepare func for xmit_pkts_simple, which will checked the size of
    ZXDH_DL_NET_HDR_SIZE and RTE_PKTMBUF_HEADROOM.

v6:
  - Remove unnecessary error checking code in submit_to_backend_simple() and
    pkt_padding(). Since as the max dl_net_hdr_len is always less than
    RTE_PKTMBUF_HEADROOM, rte_pktmbuf_prepend() cannot fail in the
    simple path (single-segment mbufs).
v5:
  - Reorganize patch series, placing interrupt fix as the first patch
    and fix condition check to properly enable interrupts.
  - Fix zxdh_recv_single_pkts() not compacting rcv_pkts[] on failure,
    which could cause use-after-free and mbuf leak.
  - Fix tx_bunch() and tx1() missing store barrier before setting AVAIL flag,
    preventing data race on weakly-ordered architectures.
  - Fix submit_to_backend_simple() writing descriptors for packets that
    failed pkt_padding(), causing mbuf leak.
v4:
  - fix some AI review issues.
  - fix queue enable intr bug.
v3:
  - remove unnecessary NULL check in zxdh_init_queue.
  - Split Ring: Bit[31] is unused and reserved, zxdh_queue_notify(): removing the
    zxdh_pci_with_feature(hw, ZXDH_F_RING_PACKED) check;
  - remove unnecessary double-free in in zxdh_recv_single_pkts();
  - used rte_pktmbuf_mtod();
  - remove rxq_get_vq(q) macro, use q->vq and apply it consistently;
  - Refactoring scatter and mtu check logic in zxdh_dev_mtu_set();
  - set txdp->id = avail_idx + i in tx_bunch/tx1.
  - add comment documenting zxdh_xmit_enqueue_append() now sets dxp->cookie = NULL for
    the head slot and stores cookies per descriptor via dep[idx].cookie.
  - add one-line comment noting tx_bunch() is the simple path handles single-segment.
  - remove unnecessary Extra initialization and the uint32_t cast.
v2:
  - zxdh_rxtx.c, pkt_padding(): modifyed the return value of pkt_padding();
  - zxdh_rxtx.c, zxdh_recv_single_pkts(): modifyed When zxdh_init_mbuf() fails
    the loop does "continue" and free mbufs;
  - zxdh_rxtx.c, refill_desc_unwrap(): Add rte_io_wmb() before writing flags
    in the refill_que_descs();
  - zxdh_queue.h, zxdh_queue_enable_intr(): Remove unnecessary function of zxdh_queue_enable_intr;
  - zxdh_ethdev.c, zxdh_init_queue(): changed the hdr_mz NULL check logic;
  - zxdh_rxtx.c, zxdh_xmit_pkts_simple()、zxdh_recv_single_pkts(): add stats.bytes count;
  - zxdh_rxtx.c, zxdh_init_mbuf():remove  rte_pktmbuf_dump(stdout, rxm, 40);
  - zxdh_ethdev.c, zxdh_dev_free_mbufs(): using rte_pktmbuf_free() to free mbufs;
  - Splitting into separate patches, structure reorganization and sw_ring removal、
    RX recv optimize、Tx xmit optimize、Tx;
v1:
  This patch optimizes the ZXDH PMD's receive and transmit path for better
  performance through several improvements:
- Add simple TX/RX burst functions (zxdh_xmit_pkts_simple and
  zxdh_recv_single_pkts) for single-segment packet scenarios.
- Remove RX software ring (sw_ring) to reduce memory allocation and
  copy.
- Optimize descriptor management with prefetching and simplified
  cleanup.
- Reorganize structure fields for better cache locality.
  These changes reduce CPU cycles and memory bandwidth consumption,
  resulting in improved packet processing throughput.

Junlong Wang (4):
  net/zxdh: fix queue enable intr issues
  net/zxdh: optimize queue structure to improve performance
  net/zxdh: optimize Rx recv pkts performance
  net/zxdh: optimize Tx xmit pkts performance

 drivers/net/zxdh/zxdh_ethdev.c     |  83 +++--
 drivers/net/zxdh/zxdh_ethdev_ops.c |  23 +-
 drivers/net/zxdh/zxdh_ethdev_ops.h |   4 +
 drivers/net/zxdh/zxdh_pci.c        |   2 +-
 drivers/net/zxdh/zxdh_queue.c      |  11 +-
 drivers/net/zxdh/zxdh_queue.h      | 122 +++---
 drivers/net/zxdh/zxdh_rxtx.c       | 571 ++++++++++++++++++++++-------
 drivers/net/zxdh/zxdh_rxtx.h       |  29 +-
 8 files changed, 584 insertions(+), 261 deletions(-)

-- 
2.27.0

[-- Attachment #1.1.2: Type: text/html , Size: 7131 bytes --]

^ permalink raw reply

* RE: [PATCH v2 0/9] ENETC driver related changes series
From: Gagandeep Singh @ 2026-06-23  6:02 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: dev@dpdk.org, Hemant Agrawal
In-Reply-To: <20260622080615.6f397799@phoenix.local>

Hi,

> -----Original Message-----
> From: Stephen Hemminger <stephen@networkplumber.org>
> Sent: Monday, June 22, 2026 8:36 PM
> To: Gagandeep Singh <G.Singh@nxp.com>
> Cc: dev@dpdk.org; Hemant Agrawal <hemant.agrawal@nxp.com>
> Subject: Re: [PATCH v2 0/9] ENETC driver related changes series
> 
> On Mon, 22 Jun 2026 17:05:08 +0530
> Gagandeep Singh <g.singh@nxp.com> wrote:
> 
> > V2 changes:
> >   - Fixed an un-used variable compilation issue reported on fedora:43-gcc-
> minsize
> >   - Fixed various AI reported issues:
> > 	- Release notes updated for all new devargs
> > 	- enect4.ini features doc updated for scattered RX.
> > 	- removed Not required RTE_PTYPE_UNKNOWN.
> > 	- Fixed mid-frame mbuf leak in SG case.
> > 	- Enabled SG for enetc4 PF also.
> > 	- move to calloc from rte_zmalloc in parse_txq_prior().
> > 	- added vaidation checks on strdup, strtoul.
> > 	- added NC devargs to use cacheable ops conditionally.
> > 	- removed dead code like bd_base_p etc.
> > 	- Fixed rte_cpu_to_le_16() conversion on flags and combined
> > 	  all flags related patches in one patch.
> > 	- Fixed memory leak issue due to TXQ priority patch.
> >    - There were some false positives, I have ignored them:
> > 	Race condition on flags field:
> > 		clean_tx_ring only touches HW-completed BDs
> (next_to_clean→hwci),
> > 		never newly-submitted BDs; doorbell hasn't fired yet.
> > 	Missing dcbf in clean_tx_ring:
> > 		DPDK is single-threaded per queue; TX path always overwrites
> > 		flags completely before dcbf.
> > 	TX dcbf granularity with wrap:
> > 		Safe (AI admits it).
> > 	RX refill flush at wrap:
> > 		In-loop dcbf at i & mask == 0 already flushes aligned groups;
> > 		trailing flush only needed for partial groups.
> > 	RX reading before invalidate:
> > 		dccivac precedes the read for every group in the loop
> >
> > Gagandeep Singh (7):
> >   net/enetc: fix TX BD structure
> >   net/enetc: fix queue initialization
> >   net/enetc: support ESP packet type in packet parsing
> >   net/enetc: update random MAC generation code
> >   net/enetc: add option to disable VSI messaging
> >   net/enetc: add devargs to control VSI-PSI timeout and delay
> >   net/enetc4: add cacheable BD ring support with SW cache maintenance
> >
> > Vanshika Shukla (2):
> >   net/enetc: support scatter-gather
> >   net/enetc: set user configurable priority to TX rings
> >
> >  doc/guides/nics/features/enetc4.ini    |   1 +
> >  doc/guides/rel_notes/release_26_07.rst |  10 +
> >  drivers/net/enetc/base/enetc_hw.h      |  13 +-
> >  drivers/net/enetc/enetc.h              |  31 +-
> >  drivers/net/enetc/enetc4_ethdev.c      | 172 ++++++++--
> >  drivers/net/enetc/enetc4_vf.c          | 204 ++++++++++--
> >  drivers/net/enetc/enetc_ethdev.c       |  25 +-
> >  drivers/net/enetc/enetc_rxtx.c         | 430 ++++++++++++++++++++++---
> >  8 files changed, 768 insertions(+), 118 deletions(-)
> >
> 
> Better but still had some AI feedback if I asked it for more complete review.
> Agree that putting new devargs in doc is needed.
> 
> Error
> =====
> 
> [PATCH v2 7/9] net/enetc: add devargs to control VSI-PSI timeout and delay
> 
> drivers/net/enetc/enetc4_vf.c, enetc4_vf_dev_init()
> 
>   kvlist is leaked on the two invalid-value error paths. It is
>   allocated by rte_kvargs_parse() (line 1347) and only freed at
>   line 1385, but both
> 
>       return -1;   /* invalid VSI Timeout, line 1367 */
>       return -1;   /* invalid VSI Delay,   line 1380 */
> 
>   return before that free. A malformed enetc4_vsi_timeout= or
>   enetc4_vsi_delay= leaks the kvargs structure on every probe.
> 
>   Free before returning, e.g.:
> 
>       if (errno != 0 || hw->vsi_timeout == 0) {
>               ENETC_PMD_ERR("Invalid VSI Timeout value = %u",
>                               hw->vsi_timeout);
>               rte_kvargs_free(kvlist);
>               return -1;
>       }
> 
>   (same for the delay path), or restructure with a goto.
> 
> 
Fixed in v3.

> Warning
> =======
> 
> Series (patches 6-9)
> 
>   The new runtime devargs - enetc4_vsi_disable, enetc4_vsi_timeout,
>   enetc4_vsi_delay, enetc4_txq_prior, and nc - are registered via
>   RTE_PMD_REGISTER_PARAM_STRING and noted in the release notes, but
>   doc/guides/nics/enetc4.rst has no Runtime Configuration section
>   describing them. Convention is to document devargs in the NIC guide
>   so users can find the syntax (e.g. the nc=1 / 'a|b|c' priority list
>   formats are non-obvious).
> 
Documentation is done in v3

> Info
> ====
> 
> [PATCH v2 5/9] and [PATCH v2 9/9] - RX multi-segment reassembly
> 
>   In enetc_clean_rx_ring_nc() and enetc_clean_rx_ring_cacheable(),
>   on the frame-last BD:
> 
>       first_seg->pkt_len -= rx_ring->crc_len;
> 
>   reduces pkt_len but leaves the final segment's data_len unchanged,
>   so pkt_len != sum(data_len) when crc_len is non-zero. The old
>   single-segment path kept them equal (pkt_len = data_len = buf_len
>   - crc_len).
> 
>   This is currently unreachable: enetc4 does not advertise
>   RTE_ETH_RX_OFFLOAD_KEEP_CRC, so crc_len is always 0 and the
>   subtraction is a no-op. Flagging only so the asymmetry is on record
>   if KEEP_CRC is ever added - at that point the last segment's
>   data_len would need the same adjustment (and the CRC may straddle
>   the last two segments).
I have noted it down, will submit the changes as a separate patch when
We will support KEEP_CRC in enetc4. 


^ permalink raw reply

* [PATCH v3 9/9] net/enetc4: add cacheable BD ring support with SW cache maintenance
From: Gagandeep Singh @ 2026-06-23  6:00 UTC (permalink / raw)
  To: dev; +Cc: hemant.agrawal, Gagandeep Singh
In-Reply-To: <20260623060004.2187716-1-g.singh@nxp.com>

On non-cache-coherent platforms such as i.MX95, the BD ring memory
may be mapped as cacheable (normal memory) while the ENETC hardware
DMA engine writes and reads descriptors without CPU cache snooping.
SW must therefore perform explicit cache maintenance to keep CPU
caches and DDR coherent.

TX path (enetc_xmit_pkts_cacheable):
  - Flush each segment's payload cache lines to PoC (dcbf) before
    the BD is handed to HW, so HW DMA reads the correct data.
  - After all BDs for a burst are written, flush the BD cache lines
    (dcbf, one per 64-byte group of 4 BDs) so HW can read the
    updated descriptors.

RX refill (enetc_refill_rx_ring):
  - After writing each full 4-BD cache-line group, dcbf that group
    so HW sees the buffer addresses and cleared lstatus fields.
  - Flush any partial trailing group before updating the ring tail.

RX receive (enetc_recv_pkts_cacheable via enetc_clean_rx_ring_cacheable):
  - Before reading BD status, dccivac the current BD cache line so
    stale CPU-cached BD data is discarded and fresh HW-written
    content is fetched from DDR.
  - After a BD is consumed, dccivac each payload cache line so the
    CPU reads the DMA'd packet data, not stale cached bytes.

Add a new devarg 'nc=1' that allows selecting the non-cacheable
RX/TX ops.
When nc=1 is specified:
  eth_dev->rx_pkt_burst = &enetc_recv_pkts_nc;
  eth_dev->tx_pkt_burst = &enetc_xmit_pkts_nc;

The default (nc not set or nc=0) keeps the cacheable path with
SW cache maintenance (dccivac/dcbf):
  eth_dev->rx_pkt_burst = &enetc_recv_pkts_cacheable;
  eth_dev->tx_pkt_burst = &enetc_xmit_pkts_cacheable;

Usage:
  --allow 0000:00:01.0,nc=1

Signed-off-by: Gagandeep Singh <g.singh@nxp.com>
---
 doc/guides/nics/enetc4.rst             |  10 +
 doc/guides/rel_notes/release_26_07.rst |   1 +
 drivers/net/enetc/enetc.h              |  21 ++
 drivers/net/enetc/enetc4_ethdev.c      |  71 +++++--
 drivers/net/enetc/enetc4_vf.c          |  35 +++-
 drivers/net/enetc/enetc_rxtx.c         | 278 ++++++++++++++++++++++++-
 6 files changed, 398 insertions(+), 18 deletions(-)

diff --git a/doc/guides/nics/enetc4.rst b/doc/guides/nics/enetc4.rst
index 844b290..16b1ba6 100644
--- a/doc/guides/nics/enetc4.rst
+++ b/doc/guides/nics/enetc4.rst
@@ -140,3 +140,13 @@ PF/Common devargs
   Usage example::
 
     dpdk-testpmd -a 0000:00:00.0,enetc4_txq_prior=1|2|3 -- -i
+
+``nc``
+  Select non-cacheable RX/TX ops (BD rings mapped as non-cacheable memory).
+  Set to ``1`` to use non-cacheable descriptor ring operations.
+  By default, cacheable BD rings with software cache maintenance are used.
+  Applies to both PF and VF.
+
+  Usage example::
+
+    dpdk-testpmd -a 0000:00:00.0,nc=1 -- -i
diff --git a/doc/guides/rel_notes/release_26_07.rst b/doc/guides/rel_notes/release_26_07.rst
index 495eba0..b208229 100644
--- a/doc/guides/rel_notes/release_26_07.rst
+++ b/doc/guides/rel_notes/release_26_07.rst
@@ -196,6 +196,7 @@ New Features
   * Added devargs options ``enetc4_vsi_timeout`` and ``enetc4_vsi_delay``
     for VSI-PSI messaging timeout and delay.
   * Added devargs option ``enetc4_txq_prior`` to set TX queues priorities.
+  * Added devargs option ``nc`` to select non-cacheable RX/TX ops.
 
 Removed Items
 -------------
diff --git a/drivers/net/enetc/enetc.h b/drivers/net/enetc/enetc.h
index c12597b..d3a8b8e 100644
--- a/drivers/net/enetc/enetc.h
+++ b/drivers/net/enetc/enetc.h
@@ -115,6 +115,7 @@ struct enetc_eth_hw {
 	uint32_t vsi_timeout; /* VSI-PSI message wait timeout (iterations) */
 	uint32_t vsi_delay;   /* VSI-PSI message wait delay (us) */
 	uint32_t *txq_prior;  /* per-queue TX priority (TBMR priority bits) */
+	uint8_t nc_mode;      /* 1 = non-cacheable BD memory, use _nc ops */
 };
 
 /*
@@ -315,8 +316,28 @@ uint16_t enetc_recv_pkts(void *rxq, struct rte_mbuf **rx_pkts,
 		uint16_t nb_pkts);
 uint16_t enetc_recv_pkts_nc(void *rxq, struct rte_mbuf **rx_pkts,
 		uint16_t nb_pkts);
+uint16_t enetc_xmit_pkts_cacheable(void *txq, struct rte_mbuf **tx_pkts,
+		uint16_t nb_pkts);
+uint16_t enetc_recv_pkts_cacheable(void *rxq, struct rte_mbuf **rx_pkts,
+		uint16_t nb_pkts);
 
 int enetc_refill_rx_ring(struct enetc_bdr *rx_ring, const int buff_cnt);
+
+/*
+ * Cache-maintenance constants for cacheable BD ring mode.
+ *
+ * BD = 16 bytes, cache line = 64 bytes => 4 BDs per cache line.
+ * Every dcbf in enetc_refill_rx_ring() flushes a full 64-byte cache line.
+ * To ensure each dcbf covers only fully-written BDs the caller
+ * must pass a count rounded DOWN to a multiple of ENETC_BD_PER_CL so that
+ * the last partial group is left in cache to be completed and flushed in
+ * the next call.
+ */
+#define ENETC_BD_PER_CL		(RTE_CACHE_LINE_SIZE / sizeof(union enetc_rx_bd))
+#define ENETC_BD_PER_CL_MASK	(ENETC_BD_PER_CL - 1)
+/* Round n DOWN to the nearest multiple of ENETC_BD_PER_CL. */
+#define ENETC_BD_ALIGN_DOWN(n)	((n) & ~(unsigned int)ENETC_BD_PER_CL_MASK)
+
 void enetc4_dev_hw_init(struct rte_eth_dev *eth_dev);
 void enetc_print_ethaddr(const char *name, const struct rte_ether_addr *eth_addr);
 
diff --git a/drivers/net/enetc/enetc4_ethdev.c b/drivers/net/enetc/enetc4_ethdev.c
index 7e2d665..2ddd63d 100644
--- a/drivers/net/enetc/enetc4_ethdev.c
+++ b/drivers/net/enetc/enetc4_ethdev.c
@@ -12,6 +12,7 @@
 #include "enetc.h"
 
 #define ENETC4_TXQ_PRIORITIES	"enetc4_txq_prior"
+#define ENETC4_NC_MEMORY	"nc"
 
 static int
 parse_txq_prior(const char *key __rte_unused, const char *value, void *opaque)
@@ -42,6 +43,19 @@ parse_txq_prior(const char *key __rte_unused, const char *value, void *opaque)
 	return 0;
 }
 
+static int
+parse_nc(const char *key __rte_unused, const char *value, void *extra_args)
+{
+	struct rte_eth_dev *dev = extra_args;
+	struct enetc_eth_hw *hw =
+		ENETC_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+	if (value && atoi(value) == 1)
+		hw->nc_mode = 1;
+
+	return 0;
+}
+
 static int
 enetc4_get_devargs(struct rte_eth_dev *dev, const char *key)
 {
@@ -67,6 +81,13 @@ enetc4_get_devargs(struct rte_eth_dev *dev, const char *key)
 			return 0;
 		}
 	}
+	if (!strcmp(key, ENETC4_NC_MEMORY)) {
+		if (rte_kvargs_process(kvlist, key,
+				       parse_nc, (void *)dev) < 0) {
+			rte_kvargs_free(kvlist);
+			return 0;
+		}
+	}
 
 	rte_kvargs_free(kvlist);
 	return 0;
@@ -289,12 +310,14 @@ enetc4_alloc_txbdr(struct enetc_bdr *txr, uint16_t nb_desc)
 	int size;
 
 	size = nb_desc * sizeof(struct enetc_swbd);
-	txr->q_swbd = rte_malloc(NULL, size, ENETC_BD_RING_ALIGN);
+	/* Zero q_swbd so buffer_addr is NULL for all uninitialized slots. */
+	txr->q_swbd = rte_zmalloc(NULL, size, ENETC_BD_RING_ALIGN);
 	if (txr->q_swbd == NULL)
 		return -ENOMEM;
 
-	size = nb_desc * sizeof(struct enetc_bdr);
-	txr->bd_base = rte_malloc(NULL, size, ENETC_BD_RING_ALIGN);
+	/* Allocate the TX BD ring: each BD is struct enetc_tx_bd (16 bytes). */
+	size = nb_desc * sizeof(struct enetc_tx_bd);
+	txr->bd_base = rte_zmalloc(NULL, size, ENETC_BD_RING_ALIGN);
 	if (txr->bd_base == NULL) {
 		rte_free(txr->q_swbd);
 		txr->q_swbd = NULL;
@@ -449,12 +472,14 @@ enetc4_alloc_rxbdr(struct enetc_bdr *rxr, uint16_t nb_desc)
 	int size;
 
 	size = nb_desc * sizeof(struct enetc_swbd);
-	rxr->q_swbd = rte_malloc(NULL, size, ENETC_BD_RING_ALIGN);
+	/* Zero q_swbd so buffer_addr is NULL for all uninitialized slots. */
+	rxr->q_swbd = rte_zmalloc(NULL, size, ENETC_BD_RING_ALIGN);
 	if (rxr->q_swbd == NULL)
 		return -ENOMEM;
 
-	size = nb_desc * sizeof(struct enetc_bdr);
-	rxr->bd_base = rte_malloc(NULL, size, ENETC_BD_RING_ALIGN);
+	/* Allocate the RX BD ring: each BD is union enetc_rx_bd (16 bytes). */
+	size = nb_desc * sizeof(union enetc_rx_bd);
+	rxr->bd_base = rte_zmalloc(NULL, size, ENETC_BD_RING_ALIGN);
 	if (rxr->bd_base == NULL) {
 		rte_free(rxr->q_swbd);
 		rxr->q_swbd = NULL;
@@ -489,7 +514,7 @@ enetc4_setup_rxbdr(struct enetc_hw *hw, struct enetc_bdr *rx_ring,
 	rx_ring->mb_pool = mb_pool;
 	rx_ring->rcir = (void *)((size_t)hw->reg +
 			ENETC_BDR(RX, idx, ENETC_RBCIR));
-	enetc_refill_rx_ring(rx_ring, (enetc_bd_unused(rx_ring)));
+	enetc_refill_rx_ring(rx_ring, ENETC_BD_ALIGN_DOWN(enetc_bd_unused(rx_ring)));
 	buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rx_ring->mb_pool) -
 		   RTE_PKTMBUF_HEADROOM);
 	enetc4_rxbdr_wr(hw, idx, ENETC_RBBSR, buf_size);
@@ -751,12 +776,17 @@ enetc4_dev_configure(struct rte_eth_dev *dev)
 
 	PMD_INIT_FUNC_TRACE();
 
-	max_len = dev->data->dev_conf.rxmode.mtu + RTE_ETHER_HDR_LEN +
-		  RTE_ETHER_CRC_LEN;
-	enetc4_port_wr(enetc_hw, ENETC4_PM_MAXFRM(0), ENETC_SET_MAXFRM(max_len));
+	/* Port-level register writes are PF-only; skip for VF devices */
+	if (hw->device_id != ENETC4_DEV_ID_VF) {
+		max_len = dev->data->dev_conf.rxmode.mtu + RTE_ETHER_HDR_LEN +
+			  RTE_ETHER_CRC_LEN;
+		enetc4_port_wr(enetc_hw, ENETC4_PM_MAXFRM(0),
+			       ENETC_SET_MAXFRM(max_len));
 
-	val = ENETC4_MAC_MAXFRM_SIZE | SDU_TYPE_MPDU;
-	enetc4_port_wr(enetc_hw, ENETC4_PTCTMSDUR(0), val | SDU_TYPE_MPDU);
+		val = ENETC4_MAC_MAXFRM_SIZE | SDU_TYPE_MPDU;
+		enetc4_port_wr(enetc_hw, ENETC4_PTCTMSDUR(0),
+			       val | SDU_TYPE_MPDU);
+	}
 
 	/* Rx offloads which are enabled by default */
 	if (dev_rx_offloads_sup & ~rx_offloads) {
@@ -778,7 +808,8 @@ enetc4_dev_configure(struct rte_eth_dev *dev)
 	if (rx_offloads & (RTE_ETH_RX_OFFLOAD_UDP_CKSUM | RTE_ETH_RX_OFFLOAD_TCP_CKSUM))
 		checksum &= ~L4_CKSUM;
 
-	enetc4_port_wr(enetc_hw, ENETC4_PARCSCR, checksum);
+	if (hw->device_id != ENETC4_DEV_ID_VF)
+		enetc4_port_wr(enetc_hw, ENETC4_PARCSCR, checksum);
 
 	/* Enable interrupts */
 	if (hw->device_id == ENETC4_DEV_ID_VF) {
@@ -1041,8 +1072,8 @@ enetc4_dev_hw_init(struct rte_eth_dev *eth_dev)
 		ENETC_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
 	struct rte_pci_device *pci_dev = RTE_CLASS_TO_BUS_DEVICE(eth_dev, *pci_dev);
 
-	eth_dev->rx_pkt_burst = &enetc_recv_pkts_nc;
-	eth_dev->tx_pkt_burst = &enetc_xmit_pkts_nc;
+	eth_dev->rx_pkt_burst = &enetc_recv_pkts_cacheable;
+	eth_dev->tx_pkt_burst = &enetc_xmit_pkts_cacheable;
 
 	/* Retrieving and storing the HW base address of device */
 	hw->hw.reg = (void *)pci_dev->mem_resource[0].addr;
@@ -1082,7 +1113,14 @@ enetc4_dev_init(struct rte_eth_dev *eth_dev)
 	hw->max_tx_queues = si_cap & ENETC_SICAPR0_BDR_MASK;
 	hw->max_rx_queues = (si_cap >> 16) & ENETC_SICAPR0_BDR_MASK;
 
+	hw->nc_mode = 0;
 	enetc4_get_devargs(eth_dev, ENETC4_TXQ_PRIORITIES);
+	enetc4_get_devargs(eth_dev, ENETC4_NC_MEMORY);
+	if (hw->nc_mode) {
+		eth_dev->rx_pkt_burst = &enetc_recv_pkts_nc;
+		eth_dev->tx_pkt_burst = &enetc_xmit_pkts_nc;
+		ENETC_PMD_LOG(INFO, "nc=1: using non-cacheable BD ops (_nc)");
+	}
 
 	ENETC_PMD_DEBUG("Max RX queues = %d Max TX queues = %d",
 			hw->max_rx_queues, hw->max_tx_queues);
@@ -1149,5 +1187,6 @@ RTE_PMD_REGISTER_PCI(net_enetc4, rte_enetc4_pmd);
 RTE_PMD_REGISTER_PCI_TABLE(net_enetc4, pci_id_enetc4_map);
 RTE_PMD_REGISTER_KMOD_DEP(net_enetc4, "* vfio-pci");
 RTE_PMD_REGISTER_PARAM_STRING(net_enetc4,
-			      ENETC4_TXQ_PRIORITIES "=<string>");
+			      ENETC4_TXQ_PRIORITIES "=<string> "
+			      ENETC4_NC_MEMORY "=<int>");
 RTE_LOG_REGISTER_DEFAULT(enetc4_logtype_pmd, NOTICE);
diff --git a/drivers/net/enetc/enetc4_vf.c b/drivers/net/enetc/enetc4_vf.c
index 62206d7..ef5f1e6 100644
--- a/drivers/net/enetc/enetc4_vf.c
+++ b/drivers/net/enetc/enetc4_vf.c
@@ -12,6 +12,30 @@
 #define ENETC4_VSI_DISABLE		"enetc4_vsi_disable"
 #define ENETC4_VSI_TIMEOUT		"enetc4_vsi_timeout"
 #define ENETC4_VSI_DELAY		"enetc4_vsi_delay"
+#define ENETC4_NC_MEMORY		"nc"
+
+static void
+enetc4_vf_get_devarg_nc(struct rte_eth_dev *dev)
+{
+	struct enetc_eth_hw *hw =
+		ENETC_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	struct rte_devargs *devargs = dev->device->devargs;
+	struct rte_kvargs *kvlist;
+	const char *val;
+
+	if (!devargs)
+		return;
+
+	kvlist = rte_kvargs_parse(devargs->args, NULL);
+	if (!kvlist)
+		return;
+
+	val = rte_kvargs_get(kvlist, ENETC4_NC_MEMORY);
+	if (val && atoi(val) == 1)
+		hw->nc_mode = 1;
+
+	rte_kvargs_free(kvlist);
+}
 
 #define ENETC_CRC_TABLE_SIZE		256
 #define ENETC_POLY			0x1021
@@ -1370,6 +1394,14 @@ enetc4_vf_dev_init(struct rte_eth_dev *eth_dev)
 
 	enetc4_dev_hw_init(eth_dev);
 
+	hw->nc_mode = 0;
+	enetc4_vf_get_devarg_nc(eth_dev);
+	if (hw->nc_mode) {
+		eth_dev->rx_pkt_burst = &enetc_recv_pkts_nc;
+		eth_dev->tx_pkt_burst = &enetc_xmit_pkts_nc;
+		ENETC_PMD_LOG(INFO, "nc=1: using non-cacheable BD ops (_nc)");
+	}
+
 	si_cap = enetc_rd(enetc_hw, ENETC_SICAPR0);
 	hw->max_tx_queues = si_cap & ENETC_SICAPR0_BDR_MASK;
 	hw->max_rx_queues = (si_cap >> 16) & ENETC_SICAPR0_BDR_MASK;
@@ -1477,5 +1509,6 @@ RTE_PMD_REGISTER_KMOD_DEP(net_enetc4_vf, "* igb_uio | uio_pci_generic");
 RTE_PMD_REGISTER_PARAM_STRING(net_enetc4_vf,
 			      ENETC4_VSI_DISABLE "=<any> "
 			      ENETC4_VSI_TIMEOUT "=<uint> "
-			      ENETC4_VSI_DELAY "=<uint>");
+			      ENETC4_VSI_DELAY "=<uint> "
+			      ENETC4_NC_MEMORY "=<int>");
 RTE_LOG_REGISTER_DEFAULT(enetc4_vf_logtype_pmd, NOTICE);
diff --git a/drivers/net/enetc/enetc_rxtx.c b/drivers/net/enetc/enetc_rxtx.c
index e4f5608..2cd74f5 100644
--- a/drivers/net/enetc/enetc_rxtx.c
+++ b/drivers/net/enetc/enetc_rxtx.c
@@ -26,6 +26,7 @@ enetc_clean_tx_ring(struct enetc_bdr *tx_ring)
 	struct enetc_swbd *tx_swbd, *tx_swbd_base;
 	int i, hwci, bd_count;
 	struct rte_mbuf *m[ENETC_RXBD_BUNDLE];
+	struct enetc_tx_bd *txbd;
 
 	/* we don't need barriers here, we just want a relatively current value
 	 * from HW.
@@ -51,6 +52,13 @@ enetc_clean_tx_ring(struct enetc_bdr *tx_ring)
 		/* It seems calling rte_pktmbuf_free is wasting a lot of cycles,
 		 * make a list and call _free when it's done.
 		 */
+		/* Clear flags on the reclaimed BD so that dcbf in the
+		 * cacheable TX path never flushes a stale flags_F to memory
+		 * before the new BD fields are fully written.
+		 */
+		txbd = ENETC_TXBD(*tx_ring, i);
+		txbd->flags = 0;
+
 		if (tx_frm_cnt == ENETC_RXBD_BUNDLE) {
 			rte_pktmbuf_free_bulk(m, tx_frm_cnt);
 			tx_frm_cnt = 0;
@@ -202,7 +210,8 @@ enetc_xmit_pkts_nc(void *tx_queue,
 		}
 
 		/* Set the frame-last flag on the final BD of this packet. */
-		txbd->flags |= ENETC4_TXBD_FLAGS_F;
+		if (likely(txbd))
+			txbd->flags |= ENETC4_TXBD_FLAGS_F;
 		start++;
 	}
 
@@ -217,6 +226,7 @@ enetc_refill_rx_ring(struct enetc_bdr *rx_ring, const int buff_cnt)
 {
 	struct enetc_swbd *rx_swbd;
 	union enetc_rx_bd *rxbd;
+	union enetc_rx_bd *grp_start_rxbd;
 	int i, j, k = ENETC_RXBD_BUNDLE;
 	struct rte_mbuf *m[ENETC_RXBD_BUNDLE];
 	struct rte_mempool *mb_pool;
@@ -225,6 +235,7 @@ enetc_refill_rx_ring(struct enetc_bdr *rx_ring, const int buff_cnt)
 	mb_pool = rx_ring->mb_pool;
 	rx_swbd = &rx_ring->q_swbd[i];
 	rxbd = ENETC_RXBD(*rx_ring, i);
+	grp_start_rxbd = rxbd;
 	for (j = 0; j < buff_cnt; j++) {
 		/* bulk alloc for the next up to 8 BDs */
 		if (k == ENETC_RXBD_BUNDLE) {
@@ -246,12 +257,29 @@ enetc_refill_rx_ring(struct enetc_bdr *rx_ring, const int buff_cnt)
 		i++;
 		k++;
 		if (unlikely(i == rx_ring->bd_count)) {
+			/*
+			 * Ring wrap: flush the current partial or full group
+			 * before resetting the pointer to index 0.
+			 */
+			dcbf((void *)grp_start_rxbd);
 			i = 0;
 			rxbd = ENETC_RXBD(*rx_ring, i);
 			rx_swbd = &rx_ring->q_swbd[i];
+			grp_start_rxbd = rxbd;
+		} else if ((i & ENETC_BD_PER_CL_MASK) == 0) {
+			/*
+			 * Completed a full 4-BD group (one cache line).
+			 * Flush it to PoC so HW sees the updated descriptors.
+			 */
+			dcbf((void *)grp_start_rxbd);
+			grp_start_rxbd = rxbd;
 		}
 	}
 
+	/* Flush any remaining partial group at the end of the fill. */
+	if (j && (i & ENETC_BD_PER_CL_MASK) != 0)
+		dcbf((void *)grp_start_rxbd);
+
 	if (likely(j)) {
 		rx_ring->next_to_alloc = i;
 		rx_ring->next_to_use = i;
@@ -604,3 +632,251 @@ enetc_recv_pkts(void *rxq, struct rte_mbuf **rx_pkts,
 
 	return enetc_clean_rx_ring(rx_ring, rx_pkts, nb_pkts);
 }
+
+/* --- Cacheable BD ring TX path with SW cache maintenance (dcbf) --- */
+
+uint16_t
+enetc_xmit_pkts_cacheable(void *tx_queue,
+		struct rte_mbuf **tx_pkts,
+		uint16_t nb_pkts)
+{
+	int i, start, bds_to_use;
+	struct enetc_tx_bd *txbd;
+	struct enetc_bdr *tx_ring = (struct enetc_bdr *)tx_queue;
+	unsigned int j;
+	uint8_t *data;
+	struct rte_mbuf *seg;
+	uint16_t seg_len, segs_per_pkt;
+	bool is_first_seg;
+	int first_bd_idx, bd_count;
+
+	i = tx_ring->next_to_use;
+	bds_to_use = enetc_bd_unused(tx_ring);
+	bd_count = tx_ring->bd_count;
+	start = 0;
+
+	/*
+	 * Remember the first BD index of this batch so we can flush the
+	 * BD cache lines to PoC after all descriptors are written.
+	 */
+	first_bd_idx = i;
+
+	while (start < nb_pkts) {
+		seg = tx_pkts[start];
+		segs_per_pkt = seg->nb_segs;
+
+		if (bds_to_use < segs_per_pkt)
+			break;
+
+		is_first_seg = true;
+		while (seg) {
+			tx_ring->q_swbd[i].buffer_addr = NULL;
+			seg_len = rte_pktmbuf_data_len(seg);
+			data = rte_pktmbuf_mtod(seg, void *);
+
+			/*
+			 * Flush packet data cache lines to PoC so HW DMA
+			 * reads the correct payload from memory.
+			 */
+			for (j = 0; j < seg_len; j += RTE_CACHE_LINE_SIZE)
+				dcbf(data + j);
+
+			/*
+			 * Cover the last byte of an unaligned buffer to
+			 * ensure the full payload is clean to the Point of
+			 * Coherency.
+			 */
+			dcbf(data + (seg_len - 1));
+			txbd = ENETC_TXBD(*tx_ring, i);
+			txbd->flags = 0;
+			if (is_first_seg) {
+				tx_ring->q_swbd[i].buffer_addr = seg;
+				txbd->frm_len = rte_pktmbuf_pkt_len(seg);
+				if (seg->ol_flags & ENETC4_TX_CKSUM_OFFLOAD_MASK)
+					enetc4_tx_offload_checksum(seg, txbd);
+				is_first_seg = false;
+			}
+
+			txbd->buf_len = rte_cpu_to_le_16(seg_len);
+			txbd->addr = rte_cpu_to_le_64(rte_mbuf_data_iova(seg));
+			seg = seg->next;
+			i++;
+			bds_to_use--;
+
+			if (unlikely(i == bd_count))
+				i = 0;
+		}
+
+		/*
+		 * Set the frame-last flag on the final BD of this packet.
+		 * This is the last write to the BD group; the cache flush
+		 * below will push all BDs to memory afterwards.
+		 */
+		if (likely(txbd))
+			txbd->flags |= ENETC4_TXBD_FLAGS_F;
+		start++;
+	}
+
+	/*
+	 * Flush TX BDs to PoC so HW (non-cache-coherent i.MX95) can read
+	 * the descriptors from memory.  TX BDs are 16 B each; 4 BDs share
+	 * one 64-byte cache line.  Walk from the cache-line-aligned start
+	 * of first_bd_idx to just past the last written BD, one dcbf per
+	 * cache line.
+	 *
+	 * The flush must happen AFTER all BD fields (including flags_F) are
+	 * written, so HW never sees a partial descriptor.
+	 */
+	if (likely(start > 0)) {
+		int n = first_bd_idx & ~ENETC_BD_PER_CL_MASK;
+		int written = (i - n + bd_count) % bd_count;
+
+		if (written == 0)
+			written = bd_count;
+		written = (written + ENETC_BD_PER_CL_MASK) & ~ENETC_BD_PER_CL_MASK;
+
+		while (written > 0) {
+			dcbf((void *)ENETC_TXBD(*tx_ring, n));
+			n = (n + ENETC_BD_PER_CL) % bd_count;
+			written -= ENETC_BD_PER_CL;
+		}
+	}
+
+	enetc_clean_tx_ring(tx_ring);
+	tx_ring->next_to_use = i;
+	enetc_wr_reg(tx_ring->tcir, i);
+
+	return start;
+}
+
+/* --- Cacheable BD ring RX path with SW cache maintenance (dccivac) --- */
+
+static int
+enetc_clean_rx_ring_cacheable(struct enetc_bdr *rx_ring,
+		struct rte_mbuf **rx_pkts,
+		int work_limit)
+{
+	int rx_frm_cnt = 0;
+	int cleaned_cnt, i;
+	struct enetc_swbd *rx_swbd;
+	union enetc_rx_bd *rxbd;
+	struct rte_mbuf *first_seg, *cur_seg;
+	uint32_t bd_status;
+	uint8_t *data;
+	uint32_t j;
+	struct rte_mbuf *seg;
+	uint16_t data_len;
+
+	i = rx_ring->next_to_clean;
+	rxbd = ENETC_RXBD(*rx_ring, i);
+	cleaned_cnt = enetc_bd_unused(rx_ring);
+	rx_swbd = &rx_ring->q_swbd[i];
+
+	/* Restore partial multi-segment chain from a previous burst. */
+	first_seg = rx_ring->pkt_first_seg;
+	cur_seg = rx_ring->pkt_last_seg;
+
+	/*
+	 * On i.MX95 the BD ring is in cacheable hugepage memory but the
+	 * platform is non-cache-coherent.  HW writes RX BDs to DDR
+	 * without snooping the CPU cache, so stale cached copies of BD
+	 * status fields must be discarded before the CPU reads them.
+	 *
+	 * Ideal instruction: DC IVAC (invalidate only, no writeback).
+	 * ARM64 constraint: DC IVAC requires EL1 privilege; executing it
+	 * from EL0 (DPDK userspace) raises a fault.  The only EL0-safe
+	 * cache maintenance instruction that invalidates is DC CIVAC
+	 * (clean + invalidate, dccivac).
+	 *
+	 * Safety of using dccivac here:
+	 * enetc_refill_rx_ring() issues dcbf() on every BD group before
+	 * returning ownership to HW.  After dcbf the CPU cache lines are
+	 * marked clean (no dirty data).  When dccivac runs, the "clean"
+	 * phase finds nothing dirty to write back, so it behaves as a
+	 * pure invalidate - exactly what we need.
+	 *
+	 * Granularity: BD = 16 B, cache line = 64 B, so one dccivac
+	 * covers exactly 4 BDs.  Invalidate at each 4-BD boundary.
+	 */
+	dccivac((void *)ENETC_RXBD(*rx_ring,
+			(i & ~(int)ENETC_BD_PER_CL_MASK)));
+
+	while (likely(rx_frm_cnt < work_limit)) {
+		bd_status = rte_le_to_cpu_32(rxbd->r.lstatus);
+
+		if (!(bd_status & ENETC_RXBD_LSTATUS_R))
+			break;
+		if (rxbd->r.error)
+			rx_ring->ierrors++;
+
+		seg = rx_swbd->buffer_addr;
+		data_len = rte_le_to_cpu_16(rxbd->r.buf_len);
+		seg->data_len = data_len;
+		if (!first_seg) {
+			first_seg = seg;
+			cur_seg = seg;
+			first_seg->pkt_len = data_len;
+			enetc_dev_rx_parse(first_seg,
+					   rxbd->r.parse_summary);
+			first_seg->hash.rss = rxbd->r.rss_hash;
+		} else {
+			first_seg->pkt_len += data_len;
+			first_seg->nb_segs++;
+			cur_seg->next = seg;
+			cur_seg = seg;
+		}
+
+		/*
+		 * Invalidate packet data cache lines so the CPU reads the
+		 * payload that HW DMA'd into memory, not stale cached bytes.
+		 */
+		data = rte_pktmbuf_mtod(seg, void *);
+		for (j = 0; j < data_len; j += RTE_CACHE_LINE_SIZE)
+			dccivac(data + j);
+		/* Cover the last byte of an unaligned buffer. */
+		dccivac(data + (data_len - 1));
+
+		if (bd_status & ENETC_RXBD_LSTATUS_F) {
+			seg->next = NULL;
+			first_seg->pkt_len -= rx_ring->crc_len;
+			rx_pkts[rx_frm_cnt] = first_seg;
+			rx_frm_cnt++;
+			first_seg = NULL;
+		}
+
+		cleaned_cnt++;
+		rx_swbd++;
+		i++;
+		if (unlikely(i == rx_ring->bd_count)) {
+			i = 0;
+			rx_swbd = &rx_ring->q_swbd[i];
+		}
+		rxbd = ENETC_RXBD(*rx_ring, i);
+
+		/*
+		 * Crossed a 4-BD (cache-line) boundary: invalidate the new
+		 * group so the next four status reads fetch fresh DDR data
+		 * written by HW.
+		 */
+		if ((i & ENETC_BD_PER_CL_MASK) == 0 &&
+		    likely(rx_frm_cnt < work_limit))
+			dccivac((void *)rxbd);
+	}
+
+	/* Save partial chain for the next burst if frame is incomplete. */
+	rx_ring->pkt_first_seg = first_seg;
+	rx_ring->pkt_last_seg = cur_seg;
+	rx_ring->next_to_clean = i;
+	enetc_refill_rx_ring(rx_ring, ENETC_BD_ALIGN_DOWN(cleaned_cnt));
+
+	return rx_frm_cnt;
+}
+
+uint16_t
+enetc_recv_pkts_cacheable(void *rxq, struct rte_mbuf **rx_pkts,
+		uint16_t nb_pkts)
+{
+	struct enetc_bdr *rx_ring = (struct enetc_bdr *)rxq;
+
+	return enetc_clean_rx_ring_cacheable(rx_ring, rx_pkts, nb_pkts);
+}
-- 
2.25.1


^ permalink raw reply related

* [PATCH v3 8/9] net/enetc: set user configurable priority to TX rings
From: Gagandeep Singh @ 2026-06-23  6:00 UTC (permalink / raw)
  To: dev; +Cc: hemant.agrawal, Vanshika Shukla
In-Reply-To: <20260623060004.2187716-1-g.singh@nxp.com>

From: Vanshika Shukla <vanshika.shukla@nxp.com>

Add devarg 'enetc4_txq_prior' to allow per-queue TX ring priority
configuration. The value is a '|'-separated list of TBMR priority
bits, one per TX queue (e.g. 'enetc4_txq_prior=1|2|3').
The configuration accepts values only up to the maximum supported
TX queues. Any additional values beyond this supported range
are discarded.

Store the parsed priorities in hw->txq_prior and apply them in
enetc4_tx_queue_setup() when enabling the ring.

Signed-off-by: Vanshika Shukla <vanshika.shukla@nxp.com>
---
 doc/guides/nics/enetc4.rst             | 12 ++++
 doc/guides/rel_notes/release_26_07.rst |  1 +
 drivers/net/enetc/enetc.h              |  1 +
 drivers/net/enetc/enetc4_ethdev.c      | 81 +++++++++++++++++++++++++-
 4 files changed, 94 insertions(+), 1 deletion(-)

diff --git a/doc/guides/nics/enetc4.rst b/doc/guides/nics/enetc4.rst
index 3c4af22..844b290 100644
--- a/doc/guides/nics/enetc4.rst
+++ b/doc/guides/nics/enetc4.rst
@@ -128,3 +128,15 @@ VF-specific devargs
   Usage example::
 
     dpdk-testpmd -a 0000:00:01.0,enetc4_vsi_delay=10 -- -i
+
+PF/Common devargs
+~~~~~~~~~~~~~~~~~
+
+``enetc4_txq_prior``
+  Set per-queue TX ring priority (TBMR bits).
+  The value is a ``|``-separated list of priority values, one per TX queue.
+  Values beyond the maximum supported TX queue count are discarded.
+
+  Usage example::
+
+    dpdk-testpmd -a 0000:00:00.0,enetc4_txq_prior=1|2|3 -- -i
diff --git a/doc/guides/rel_notes/release_26_07.rst b/doc/guides/rel_notes/release_26_07.rst
index 192623d..495eba0 100644
--- a/doc/guides/rel_notes/release_26_07.rst
+++ b/doc/guides/rel_notes/release_26_07.rst
@@ -195,6 +195,7 @@ New Features
     messaging.
   * Added devargs options ``enetc4_vsi_timeout`` and ``enetc4_vsi_delay``
     for VSI-PSI messaging timeout and delay.
+  * Added devargs option ``enetc4_txq_prior`` to set TX queues priorities.
 
 Removed Items
 -------------
diff --git a/drivers/net/enetc/enetc.h b/drivers/net/enetc/enetc.h
index 80844e9..c12597b 100644
--- a/drivers/net/enetc/enetc.h
+++ b/drivers/net/enetc/enetc.h
@@ -114,6 +114,7 @@ struct enetc_eth_hw {
 	uint32_t max_tx_queues;
 	uint32_t vsi_timeout; /* VSI-PSI message wait timeout (iterations) */
 	uint32_t vsi_delay;   /* VSI-PSI message wait delay (us) */
+	uint32_t *txq_prior;  /* per-queue TX priority (TBMR priority bits) */
 };
 
 /*
diff --git a/drivers/net/enetc/enetc4_ethdev.c b/drivers/net/enetc/enetc4_ethdev.c
index ad1ef4d..7e2d665 100644
--- a/drivers/net/enetc/enetc4_ethdev.c
+++ b/drivers/net/enetc/enetc4_ethdev.c
@@ -3,6 +3,7 @@
  */
 
 #include <stdbool.h>
+#include <rte_kvargs.h>
 #include <rte_random.h>
 #include <dpaax_iova_table.h>
 
@@ -10,6 +11,67 @@
 #include "enetc_logs.h"
 #include "enetc.h"
 
+#define ENETC4_TXQ_PRIORITIES	"enetc4_txq_prior"
+
+static int
+parse_txq_prior(const char *key __rte_unused, const char *value, void *opaque)
+{
+	struct rte_eth_dev *dev = (struct rte_eth_dev *)opaque;
+	struct enetc_eth_hw *hw =
+		ENETC_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	char *input_str = strdup(value);
+	char *str;
+	uint32_t i = 0;
+
+	if (!input_str)
+		return -ENOMEM;
+
+	hw->txq_prior = calloc(hw->max_tx_queues, sizeof(uint32_t));
+	if (!hw->txq_prior) {
+		free(input_str);
+		return -ENOMEM;
+	}
+
+	str = strtok(input_str, "|");
+	while (str != NULL && i < hw->max_tx_queues) {
+		hw->txq_prior[i++] = (uint32_t)atoi(str);
+		str = strtok(NULL, "|");
+	}
+
+	free(input_str);
+	return 0;
+}
+
+static int
+enetc4_get_devargs(struct rte_eth_dev *dev, const char *key)
+{
+	struct rte_devargs *devargs = dev->device->devargs;
+	struct rte_kvargs *kvlist;
+
+	if (!devargs)
+		return 0;
+
+	kvlist = rte_kvargs_parse(devargs->args, NULL);
+	if (!kvlist)
+		return 0;
+
+	if (!rte_kvargs_count(kvlist, key)) {
+		rte_kvargs_free(kvlist);
+		return 0;
+	}
+
+	if (!strcmp(key, ENETC4_TXQ_PRIORITIES)) {
+		if (rte_kvargs_process(kvlist, key,
+				       parse_txq_prior, (void *)dev) < 0) {
+			rte_kvargs_free(kvlist);
+			return 0;
+		}
+	}
+
+	rte_kvargs_free(kvlist);
+	return 0;
+}
+
 /* Supported Rx offloads */
 static uint64_t dev_rx_offloads_sup =
 	RTE_ETH_RX_OFFLOAD_IPV4_CKSUM |
@@ -316,9 +378,14 @@ enetc4_tx_queue_setup(struct rte_eth_dev *dev,
 	data->tx_queues[queue_idx] = tx_ring;
 	tx_ring->tx_deferred_start = tx_conf->tx_deferred_start;
 	if (!tx_conf->tx_deferred_start) {
+		uint32_t tx_en = ENETC_TBMR_EN;
+
+		/* apply TX queue priority if configured */
+		if (priv->hw.txq_prior)
+			tx_en |= priv->hw.txq_prior[tx_ring->index];
 		/* enable ring */
 		enetc4_txbdr_wr(&priv->hw.hw, tx_ring->index,
-			       ENETC_TBMR, ENETC_TBMR_EN);
+			       ENETC_TBMR, tx_en);
 		dev->data->tx_queue_state[tx_ring->index] =
 			       RTE_ETH_QUEUE_STATE_STARTED;
 	} else {
@@ -1015,6 +1082,8 @@ enetc4_dev_init(struct rte_eth_dev *eth_dev)
 	hw->max_tx_queues = si_cap & ENETC_SICAPR0_BDR_MASK;
 	hw->max_rx_queues = (si_cap >> 16) & ENETC_SICAPR0_BDR_MASK;
 
+	enetc4_get_devargs(eth_dev, ENETC4_TXQ_PRIORITIES);
+
 	ENETC_PMD_DEBUG("Max RX queues = %d Max TX queues = %d",
 			hw->max_rx_queues, hw->max_tx_queues);
 	error = enetc4_mac_init(hw, eth_dev);
@@ -1041,8 +1110,16 @@ enetc4_dev_init(struct rte_eth_dev *eth_dev)
 static int
 enetc4_dev_uninit(struct rte_eth_dev *eth_dev)
 {
+	struct enetc_eth_hw *hw =
+		ENETC_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
+
 	PMD_INIT_FUNC_TRACE();
 
+	if (hw->txq_prior) {
+		free(hw->txq_prior);
+		hw->txq_prior = NULL;
+	}
+
 	return enetc4_dev_close(eth_dev);
 }
 
@@ -1071,4 +1148,6 @@ static struct rte_pci_driver rte_enetc4_pmd = {
 RTE_PMD_REGISTER_PCI(net_enetc4, rte_enetc4_pmd);
 RTE_PMD_REGISTER_PCI_TABLE(net_enetc4, pci_id_enetc4_map);
 RTE_PMD_REGISTER_KMOD_DEP(net_enetc4, "* vfio-pci");
+RTE_PMD_REGISTER_PARAM_STRING(net_enetc4,
+			      ENETC4_TXQ_PRIORITIES "=<string>");
 RTE_LOG_REGISTER_DEFAULT(enetc4_logtype_pmd, NOTICE);
-- 
2.25.1


^ permalink raw reply related

* [PATCH v3 7/9] net/enetc: add devargs to control VSI-PSI timeout and delay
From: Gagandeep Singh @ 2026-06-23  6:00 UTC (permalink / raw)
  To: dev; +Cc: hemant.agrawal, Gagandeep Singh
In-Reply-To: <20260623060004.2187716-1-g.singh@nxp.com>

Add two new devargs for ENETC4 VF:
- enetc4_vsi_timeout: VSI-PSI message wait timeout (iteration count)
- enetc4_vsi_delay: VSI-PSI message wait delay in microseconds

Store the values in struct enetc_eth_hw and use them in
enetc4_msg_vsi_send() instead of the hardcoded defaults.
Fall back to ENETC4_DEF_VSI_WAIT_TIMEOUT_UPDATE /
ENETC4_DEF_VSI_WAIT_DELAY_UPDATE when not set.

Signed-off-by: Gagandeep Singh <g.singh@nxp.com>
---
 doc/guides/nics/enetc4.rst             | 18 +++++++
 doc/guides/rel_notes/release_26_07.rst |  2 +
 drivers/net/enetc/enetc.h              |  2 +
 drivers/net/enetc/enetc4_vf.c          | 68 +++++++++++++++++++-------
 4 files changed, 73 insertions(+), 17 deletions(-)

diff --git a/doc/guides/nics/enetc4.rst b/doc/guides/nics/enetc4.rst
index 7b94941..3c4af22 100644
--- a/doc/guides/nics/enetc4.rst
+++ b/doc/guides/nics/enetc4.rst
@@ -110,3 +110,21 @@ VF-specific devargs
   Usage example::
 
     dpdk-testpmd -a 0000:00:01.0,enetc4_vsi_disable -- -i
+
+``enetc4_vsi_timeout``
+  Set the VSI-PSI message wait timeout as an iteration count.
+  Controls how many polling iterations the driver waits for a VSI-PSI
+  response before timing out.
+  Defaults to ``ENETC4_DEF_VSI_WAIT_TIMEOUT_UPDATE`` when not set.
+
+  Usage example::
+
+    dpdk-testpmd -a 0000:00:01.0,enetc4_vsi_timeout=200 -- -i
+
+``enetc4_vsi_delay``
+  Set the VSI-PSI message wait delay in microseconds between polling iterations.
+  Defaults to ``ENETC4_DEF_VSI_WAIT_DELAY_UPDATE`` when not set.
+
+  Usage example::
+
+    dpdk-testpmd -a 0000:00:01.0,enetc4_vsi_delay=10 -- -i
diff --git a/doc/guides/rel_notes/release_26_07.rst b/doc/guides/rel_notes/release_26_07.rst
index 783ad16..192623d 100644
--- a/doc/guides/rel_notes/release_26_07.rst
+++ b/doc/guides/rel_notes/release_26_07.rst
@@ -193,6 +193,8 @@ New Features
   * Added scatter-gather support for ENETC4 PFs and VFs.
   * Added devargs option ``enetc4_vsi_disable`` to disable VSI-PSI
     messaging.
+  * Added devargs options ``enetc4_vsi_timeout`` and ``enetc4_vsi_delay``
+    for VSI-PSI messaging timeout and delay.
 
 Removed Items
 -------------
diff --git a/drivers/net/enetc/enetc.h b/drivers/net/enetc/enetc.h
index 01da898..80844e9 100644
--- a/drivers/net/enetc/enetc.h
+++ b/drivers/net/enetc/enetc.h
@@ -112,6 +112,8 @@ struct enetc_eth_hw {
 	uint32_t num_rss;
 	uint32_t max_rx_queues;
 	uint32_t max_tx_queues;
+	uint32_t vsi_timeout; /* VSI-PSI message wait timeout (iterations) */
+	uint32_t vsi_delay;   /* VSI-PSI message wait delay (us) */
 };
 
 /*
diff --git a/drivers/net/enetc/enetc4_vf.c b/drivers/net/enetc/enetc4_vf.c
index 44c0dc0..62206d7 100644
--- a/drivers/net/enetc/enetc4_vf.c
+++ b/drivers/net/enetc/enetc4_vf.c
@@ -10,6 +10,8 @@
 #include "enetc.h"
 
 #define ENETC4_VSI_DISABLE		"enetc4_vsi_disable"
+#define ENETC4_VSI_TIMEOUT		"enetc4_vsi_timeout"
+#define ENETC4_VSI_DELAY		"enetc4_vsi_delay"
 
 #define ENETC_CRC_TABLE_SIZE		256
 #define ENETC_POLY			0x1021
@@ -262,10 +264,13 @@ enetc4_process_psi_msg(struct rte_eth_dev *eth_dev, struct enetc_hw *enetc_hw)
 }
 
 static int
-enetc4_msg_vsi_send(struct enetc_hw *enetc_hw, struct enetc_msg_swbd *msg)
+enetc4_msg_vsi_send(struct enetc_eth_hw *hw, struct enetc_msg_swbd *msg)
 {
-	int timeout = ENETC4_DEF_VSI_WAIT_TIMEOUT_UPDATE;
-	int delay_us = ENETC4_DEF_VSI_WAIT_DELAY_UPDATE;
+	struct enetc_hw *enetc_hw = &hw->hw;
+	int timeout = hw->vsi_timeout ? (int)hw->vsi_timeout :
+					ENETC4_DEF_VSI_WAIT_TIMEOUT_UPDATE;
+	int delay_us = hw->vsi_delay ? (int)hw->vsi_delay :
+				       ENETC4_DEF_VSI_WAIT_DELAY_UPDATE;
 	uint8_t class_id = 0;
 	int err = 0;
 	int vsimsgsr;
@@ -382,7 +387,7 @@ enetc4_vf_set_mac_addr(struct rte_eth_dev *dev, struct rte_ether_addr *addr)
 					ENETC_CMD_ID_SET_PRIMARY_MAC, 0, 0, 0);
 
 	/* send the command and wait */
-	err = enetc4_msg_vsi_send(enetc_hw, msg);
+	err = enetc4_msg_vsi_send(hw, msg);
 	if (err) {
 		ENETC_PMD_ERR("VSI message send error");
 		goto end;
@@ -426,7 +431,6 @@ static int
 enetc4_vf_promisc_send_message(struct rte_eth_dev *dev, bool promisc_en)
 {
 	struct enetc_eth_hw *hw = ENETC_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-	struct enetc_hw *enetc_hw = &hw->hw;
 	struct enetc_msg_cmd_set_promisc *cmd;
 	struct enetc_msg_swbd *msg;
 	uint32_t msg_size;
@@ -466,7 +470,7 @@ enetc4_vf_promisc_send_message(struct rte_eth_dev *dev, bool promisc_en)
 				ENETC_CMD_ID_SET_MAC_PROMISCUOUS, 0, 0, 0);
 
 	/* send the command and wait */
-	err = enetc4_msg_vsi_send(enetc_hw, msg);
+	err = enetc4_msg_vsi_send(hw, msg);
 	if (err) {
 		ENETC_PMD_ERR("VSI message send error");
 		goto end;
@@ -483,7 +487,6 @@ static int
 enetc4_vf_allmulti_send_message(struct rte_eth_dev *dev, bool mc_promisc)
 {
 	struct enetc_eth_hw *hw = ENETC_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-	struct enetc_hw *enetc_hw = &hw->hw;
 	struct enetc_msg_cmd_set_promisc *cmd;
 	struct enetc_msg_swbd *msg;
 	uint32_t msg_size;
@@ -524,7 +527,7 @@ enetc4_vf_allmulti_send_message(struct rte_eth_dev *dev, bool mc_promisc)
 				ENETC_CMD_ID_SET_MAC_PROMISCUOUS, 0, 0, 0);
 
 	/* send the command and wait */
-	err = enetc4_msg_vsi_send(enetc_hw, msg);
+	err = enetc4_msg_vsi_send(hw, msg);
 	if (err) {
 		ENETC_PMD_ERR("VSI message send error");
 		goto end;
@@ -630,7 +633,7 @@ enetc4_vf_get_link_status(struct rte_eth_dev *dev, struct enetc_psi_reply_msg *r
 			ENETC_CMD_ID_GET_LINK_STATUS, 0, 0, 0);
 
 	/* send the command and wait */
-	err = enetc4_msg_vsi_send(enetc_hw, msg);
+	err = enetc4_msg_vsi_send(hw, msg);
 	if (err) {
 		ENETC_PMD_ERR("VSI message send error");
 		goto end;
@@ -676,7 +679,7 @@ enetc4_vf_get_link_speed(struct rte_eth_dev *dev, struct enetc_psi_reply_msg *re
 			ENETC_CMD_ID_GET_LINK_SPEED, 0, 0, 0);
 
 	/* send the command and wait */
-	err = enetc4_msg_vsi_send(enetc_hw, msg);
+	err = enetc4_msg_vsi_send(hw, msg);
 	if (err) {
 		ENETC_PMD_ERR("VSI message send error");
 		goto end;
@@ -819,7 +822,6 @@ static int
 enetc4_vf_vlan_promisc(struct rte_eth_dev *dev, bool promisc_en)
 {
 	struct enetc_eth_hw *hw = ENETC_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-	struct enetc_hw *enetc_hw = &hw->hw;
 	struct enetc_msg_cmd_set_vlan_promisc *cmd;
 	struct enetc_msg_swbd *msg;
 	uint32_t msg_size;
@@ -858,7 +860,7 @@ enetc4_vf_vlan_promisc(struct rte_eth_dev *dev, bool promisc_en)
 				ENETC_CMD_ID_SET_VLAN_PROMISCUOUS, 0, 0, 0);
 
 	/* send the command and wait */
-	err = enetc4_msg_vsi_send(enetc_hw, msg);
+	err = enetc4_msg_vsi_send(hw, msg);
 	if (err) {
 		ENETC_PMD_ERR("VSI message send error");
 		goto end;
@@ -921,7 +923,7 @@ enetc4_vf_mac_addr_add(struct rte_eth_dev *dev, struct rte_ether_addr *addr,
 			ENETC_MSG_ADD_EXACT_MAC_ENTRIES, 0, 0, 0);
 
 	/* send the command and wait */
-	err = enetc4_msg_vsi_send(enetc_hw, msg);
+	err = enetc4_msg_vsi_send(hw, msg);
 	if (err) {
 		ENETC_PMD_ERR("VSI message send error");
 		goto end;
@@ -1021,7 +1023,7 @@ static int enetc4_vf_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id,
 	}
 
 	/* send the command and wait */
-	err = enetc4_msg_vsi_send(enetc_hw, msg);
+	err = enetc4_msg_vsi_send(hw, msg);
 	if (err) {
 		ENETC_PMD_ERR("VSI message send error");
 		goto end;
@@ -1104,7 +1106,6 @@ static int
 enetc4_vf_link_register_notif(struct rte_eth_dev *dev, bool enable)
 {
 	struct enetc_eth_hw *hw = ENETC_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-	struct enetc_hw *enetc_hw = &hw->hw;
 	struct enetc_msg_swbd *msg;
 	struct rte_eth_link link;
 	uint32_t msg_size;
@@ -1138,7 +1139,7 @@ enetc4_vf_link_register_notif(struct rte_eth_dev *dev, bool enable)
 			cmd, 0, 0, 0);
 
 	/* send the command and wait */
-	err = enetc4_msg_vsi_send(enetc_hw, msg);
+	err = enetc4_msg_vsi_send(hw, msg);
 	if (err)
 		ENETC_PMD_ERR("VSI msg error for link status notification");
 
@@ -1322,12 +1323,43 @@ enetc4_vf_dev_init(struct rte_eth_dev *eth_dev)
 		kvlist = rte_kvargs_parse(eth_dev->device->devargs->args,
 					  NULL);
 		if (kvlist) {
+			const char *val;
+
 			if (rte_kvargs_count(kvlist, ENETC4_VSI_DISABLE) != 0) {
 				ENETC_PMD_NOTICE("VSI messaging disabled by devarg");
 				eth_dev->dev_ops = &enetc4_vf_ops_no_vsi_m;
 			} else {
 				eth_dev->dev_ops = &enetc4_vf_ops;
 			}
+
+			/* parse optional VSI-PSI timeout devarg */
+			val = rte_kvargs_get(kvlist, ENETC4_VSI_TIMEOUT);
+			if (val) {
+				errno = 0;
+				hw->vsi_timeout = (uint32_t)strtoul(val, NULL, 0);
+				if (errno != 0 || hw->vsi_timeout == 0) {
+					ENETC_PMD_ERR("Invalid VSI Timeout value = %u",
+							hw->vsi_timeout);
+					rte_kvargs_free(kvlist);
+					return -1;
+				}
+				ENETC_PMD_NOTICE("VSI timeout set to %u", hw->vsi_timeout);
+			}
+
+			/* parse optional VSI-PSI delay devarg */
+			val = rte_kvargs_get(kvlist, ENETC4_VSI_DELAY);
+			if (val) {
+				errno = 0;
+				hw->vsi_delay = (uint32_t)strtoul(val, NULL, 0);
+				if (errno != 0 || hw->vsi_delay == 0) {
+					ENETC_PMD_ERR("Invalid VSI Delay value = %u",
+							hw->vsi_delay);
+					rte_kvargs_free(kvlist);
+					return -1;
+				}
+				ENETC_PMD_NOTICE("VSI delay set to %u us", hw->vsi_delay);
+			}
+
 			rte_kvargs_free(kvlist);
 		} else {
 			eth_dev->dev_ops = &enetc4_vf_ops;
@@ -1443,5 +1475,7 @@ RTE_PMD_REGISTER_PCI(net_enetc4_vf, rte_enetc4_vf_pmd);
 RTE_PMD_REGISTER_PCI_TABLE(net_enetc4_vf, pci_vf_id_enetc4_map);
 RTE_PMD_REGISTER_KMOD_DEP(net_enetc4_vf, "* igb_uio | uio_pci_generic");
 RTE_PMD_REGISTER_PARAM_STRING(net_enetc4_vf,
-			      ENETC4_VSI_DISABLE "=<any>");
+			      ENETC4_VSI_DISABLE "=<any> "
+			      ENETC4_VSI_TIMEOUT "=<uint> "
+			      ENETC4_VSI_DELAY "=<uint>");
 RTE_LOG_REGISTER_DEFAULT(enetc4_vf_logtype_pmd, NOTICE);
-- 
2.25.1


^ permalink raw reply related

* [PATCH v3 5/9] net/enetc: support scatter-gather
From: Gagandeep Singh @ 2026-06-23  6:00 UTC (permalink / raw)
  To: dev; +Cc: hemant.agrawal, Vanshika Shukla
In-Reply-To: <20260623060004.2187716-1-g.singh@nxp.com>

From: Vanshika Shukla <vanshika.shukla@nxp.com>

Add scatter-gather support for ENETC4 PMD:
- Add ENETC_RXBD_LSTATUS_R/F bits for RX BD status
- Add ENETC4_MAX_SEGS (63) for max segments per TX packet
- Update enetc4_vf_dev_infos_get to fill nb_seg_max, offloads,
  max queues and packet length
- Extend enetc_xmit_pkts_nc to handle multi-segment mbufs
- Extend enetc_clean_rx_ring_nc to chain scatter-gather segments
  using LSTATUS_R/F bits

Signed-off-by: Vanshika Shukla <vanshika.shukla@nxp.com>
---
 doc/guides/nics/features/enetc4.ini    |   1 +
 doc/guides/rel_notes/release_26_07.rst |   3 +-
 drivers/net/enetc/base/enetc_hw.h      |   2 +
 drivers/net/enetc/enetc.h              |   7 +-
 drivers/net/enetc/enetc4_ethdev.c      |  10 +-
 drivers/net/enetc/enetc4_vf.c          |  46 +++++++--
 drivers/net/enetc/enetc_rxtx.c         | 129 ++++++++++++++++---------
 7 files changed, 139 insertions(+), 59 deletions(-)

diff --git a/doc/guides/nics/features/enetc4.ini b/doc/guides/nics/features/enetc4.ini
index 87425f4..698140e 100644
--- a/doc/guides/nics/features/enetc4.ini
+++ b/doc/guides/nics/features/enetc4.ini
@@ -17,6 +17,7 @@ Basic stats          = Y
 L3 checksum offload  = Y
 L4 checksum offload  = Y
 Queue start/stop     = Y
+Scattered Rx         = Y
 Linux                = Y
 ARMv8                = Y
 Usage doc            = Y
diff --git a/doc/guides/rel_notes/release_26_07.rst b/doc/guides/rel_notes/release_26_07.rst
index 35476c2..f900145 100644
--- a/doc/guides/rel_notes/release_26_07.rst
+++ b/doc/guides/rel_notes/release_26_07.rst
@@ -189,7 +189,8 @@ New Features
 
 * **Updated NXP ENETC ethernet driver.**
 
-  * Added support for ESP packet type in packet parsing
+  * Added support for ESP packet type in packet parsing.
+  * Added scatter-gather support for ENETC4 PFs and VFs.
 
 Removed Items
 -------------
diff --git a/drivers/net/enetc/base/enetc_hw.h b/drivers/net/enetc/base/enetc_hw.h
index f79c950..6e96562 100644
--- a/drivers/net/enetc/base/enetc_hw.h
+++ b/drivers/net/enetc/base/enetc_hw.h
@@ -230,6 +230,8 @@ enum enetc_bdr_type {TX, RX};
 			(0x0005 | ENETC_PKT_TYPE_IPV4)
 #define ENETC_PKT_TYPE_IPV6_ESP \
 			(0x0005 | ENETC_PKT_TYPE_IPV6)
+#define ENETC_RXBD_LSTATUS_R	BIT(30)
+#define ENETC_RXBD_LSTATUS_F	BIT(31)
 
 /* PCI device info */
 struct enetc_hw {
diff --git a/drivers/net/enetc/enetc.h b/drivers/net/enetc/enetc.h
index 4d99b5b..01da898 100644
--- a/drivers/net/enetc/enetc.h
+++ b/drivers/net/enetc/enetc.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright 2018-2019,2024 NXP
+ * Copyright 2018-2019,2024-2026 NXP
  */
 
 #ifndef _ENETC_H_
@@ -28,6 +28,8 @@
 #define MIN_BD_COUNT   32
 /* BD ALIGN */
 #define BD_ALIGN       8
+/* Max segments per ENETC4 TX packet (scatter-gather) */
+#define ENETC4_MAX_SEGS	63
 
 /* minimum frame size supported */
 #define ENETC_MAC_MINFRM_SIZE	68
@@ -90,6 +92,9 @@ struct enetc_bdr {
 		int next_to_alloc; /* Rx */
 	};
 	struct rte_mempool *mb_pool;   /* mbuf pool to populate RX ring. */
+	/* Partial scatter-gather chain persisted across burst calls. */
+	struct rte_mbuf *pkt_first_seg; /* first segment of in-progress frame */
+	struct rte_mbuf *pkt_last_seg;  /* last segment linked so far */
 	struct rte_eth_dev *ndev;
 	uint64_t ierrors;
 	uint8_t rx_deferred_start;
diff --git a/drivers/net/enetc/enetc4_ethdev.c b/drivers/net/enetc/enetc4_ethdev.c
index 154fc09..ad1ef4d 100644
--- a/drivers/net/enetc/enetc4_ethdev.c
+++ b/drivers/net/enetc/enetc4_ethdev.c
@@ -14,13 +14,15 @@
 static uint64_t dev_rx_offloads_sup =
 	RTE_ETH_RX_OFFLOAD_IPV4_CKSUM |
 	RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
-	RTE_ETH_RX_OFFLOAD_TCP_CKSUM;
+	RTE_ETH_RX_OFFLOAD_TCP_CKSUM |
+	RTE_ETH_RX_OFFLOAD_SCATTER;
 
 /* Supported Tx offloads */
 static uint64_t dev_tx_offloads_sup =
 	RTE_ETH_TX_OFFLOAD_IPV4_CKSUM |
 	RTE_ETH_TX_OFFLOAD_UDP_CKSUM |
-	RTE_ETH_TX_OFFLOAD_TCP_CKSUM;
+	RTE_ETH_TX_OFFLOAD_TCP_CKSUM |
+	RTE_ETH_TX_OFFLOAD_MULTI_SEGS;
 
 static int
 enetc4_dev_start(struct rte_eth_dev *dev)
@@ -199,11 +201,15 @@ enetc4_dev_infos_get(struct rte_eth_dev *dev,
 		.nb_max = MAX_BD_COUNT,
 		.nb_min = MIN_BD_COUNT,
 		.nb_align = BD_ALIGN,
+		.nb_seg_max = ENETC4_MAX_SEGS,
+		.nb_mtu_seg_max = ENETC4_MAX_SEGS,
 	};
 	dev_info->tx_desc_lim = (struct rte_eth_desc_lim) {
 		.nb_max = MAX_BD_COUNT,
 		.nb_min = MIN_BD_COUNT,
 		.nb_align = BD_ALIGN,
+		.nb_seg_max = ENETC4_MAX_SEGS,
+		.nb_mtu_seg_max = ENETC4_MAX_SEGS,
 	};
 	dev_info->max_rx_queues = hw->max_rx_queues;
 	dev_info->max_tx_queues = hw->max_tx_queues;
diff --git a/drivers/net/enetc/enetc4_vf.c b/drivers/net/enetc/enetc4_vf.c
index bec7128..9dc4e1d 100644
--- a/drivers/net/enetc/enetc4_vf.c
+++ b/drivers/net/enetc/enetc4_vf.c
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright 2024 NXP
+ * Copyright 2024-2026 NXP
  */
 
 #include <stdbool.h>
@@ -18,8 +18,19 @@ uint16_t enetc_crc_table[ENETC_CRC_TABLE_SIZE];
 bool enetc_crc_gen;
 
 /* Supported Rx offloads */
-static uint64_t dev_vf_rx_offloads_sup =
-	RTE_ETH_RX_OFFLOAD_VLAN_FILTER;
+static uint64_t dev_rx_offloads_sup =
+	RTE_ETH_RX_OFFLOAD_IPV4_CKSUM |
+	RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
+	RTE_ETH_RX_OFFLOAD_TCP_CKSUM |
+	RTE_ETH_RX_OFFLOAD_VLAN_FILTER |
+	RTE_ETH_RX_OFFLOAD_SCATTER;
+
+/* Supported Tx offloads */
+static uint64_t dev_tx_offloads_sup =
+	RTE_ETH_TX_OFFLOAD_IPV4_CKSUM |
+	RTE_ETH_TX_OFFLOAD_UDP_CKSUM |
+	RTE_ETH_TX_OFFLOAD_TCP_CKSUM |
+	RTE_ETH_TX_OFFLOAD_MULTI_SEGS;
 
 static void
 enetc_gen_crc_table(void)
@@ -61,21 +72,38 @@ static int
 enetc4_vf_dev_infos_get(struct rte_eth_dev *dev,
 			struct rte_eth_dev_info *dev_info)
 {
-	int ret = 0;
+	struct enetc_eth_hw *hw =
+		ENETC_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
 	PMD_INIT_FUNC_TRACE();
 
-	ret = enetc4_dev_infos_get(dev, dev_info);
-	if (ret)
-		return ret;
-
+	dev_info->rx_desc_lim = (struct rte_eth_desc_lim) {
+		.nb_max = MAX_BD_COUNT,
+		.nb_min = MIN_BD_COUNT,
+		.nb_align = BD_ALIGN,
+		.nb_seg_max = ENETC4_MAX_SEGS,
+		.nb_mtu_seg_max = ENETC4_MAX_SEGS,
+	};
+	dev_info->tx_desc_lim = (struct rte_eth_desc_lim) {
+		.nb_max = MAX_BD_COUNT,
+		.nb_min = MIN_BD_COUNT,
+		.nb_align = BD_ALIGN,
+		.nb_seg_max = ENETC4_MAX_SEGS,
+		.nb_mtu_seg_max = ENETC4_MAX_SEGS,
+	};
+	dev_info->max_rx_queues = hw->max_rx_queues;
+	dev_info->max_tx_queues = hw->max_tx_queues;
+	dev_info->max_rx_pktlen = ENETC4_MAC_MAXFRM_SIZE;
 	dev_info->max_mtu = dev_info->max_rx_pktlen - (RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN);
 	dev_info->max_mac_addrs = ENETC4_MAC_ENTRIES;
-	dev_info->rx_offload_capa |= dev_vf_rx_offloads_sup;
+	dev_info->rx_offload_capa = dev_rx_offloads_sup;
+	dev_info->tx_offload_capa = dev_tx_offloads_sup;
+	dev_info->flow_type_rss_offloads = ENETC_RSS_OFFLOAD_ALL;
 
 	return 0;
 }
 
+
 int
 enetc4_vf_dev_stop(struct rte_eth_dev *dev __rte_unused)
 {
diff --git a/drivers/net/enetc/enetc_rxtx.c b/drivers/net/enetc/enetc_rxtx.c
index 94177bb..e4f5608 100644
--- a/drivers/net/enetc/enetc_rxtx.c
+++ b/drivers/net/enetc/enetc_rxtx.c
@@ -149,54 +149,64 @@ enetc_xmit_pkts_nc(void *tx_queue,
 		struct rte_mbuf **tx_pkts,
 		uint16_t nb_pkts)
 {
-	struct enetc_swbd *tx_swbd;
-	int i, start, bds_to_use;
-	struct enetc_tx_bd *txbd;
 	struct enetc_bdr *tx_ring = (struct enetc_bdr *)tx_queue;
-	unsigned int buflen, j;
+	int i, start, bds_to_use, bd_count;
+	struct enetc_tx_bd *txbd;
+	struct rte_mbuf *seg;
+	uint16_t seg_len, segs_per_pkt;
+	bool is_first_seg;
+	unsigned int j;
 	uint8_t *data;
 
 	i = tx_ring->next_to_use;
-
 	bds_to_use = enetc_bd_unused(tx_ring);
-	if (bds_to_use < nb_pkts)
-		nb_pkts = bds_to_use;
+	bd_count = tx_ring->bd_count;
 
 	start = 0;
-	while (nb_pkts--) {
-		tx_ring->q_swbd[i].buffer_addr = tx_pkts[start];
+	while (start < nb_pkts) {
+		seg = tx_pkts[start];
+		segs_per_pkt = seg->nb_segs;
 
-		buflen = rte_pktmbuf_pkt_len(tx_ring->q_swbd[i].buffer_addr);
-		data = rte_pktmbuf_mtod(tx_ring->q_swbd[i].buffer_addr, void *);
-		for (j = 0; j <= buflen; j += RTE_CACHE_LINE_SIZE)
-			dcbf(data + j);
+		if (bds_to_use < segs_per_pkt)
+			break;
 
-		txbd = ENETC_TXBD(*tx_ring, i);
-		txbd->flags = 0;
-		if (tx_ring->q_swbd[i].buffer_addr->ol_flags & ENETC4_TX_CKSUM_OFFLOAD_MASK)
-			enetc4_tx_offload_checksum(tx_ring->q_swbd[i].buffer_addr, txbd);
+		is_first_seg = true;
+		while (seg) {
+			tx_ring->q_swbd[i].buffer_addr = NULL;
+			seg_len = rte_pktmbuf_data_len(seg);
+			data = rte_pktmbuf_mtod(seg, void *);
+
+			/* Flush payload to PoC so HW DMA reads the correct data. */
+			for (j = 0; j < seg_len; j += RTE_CACHE_LINE_SIZE)
+				dcbf(data + j);
+			/* Cover the last byte of an unaligned buffer. */
+			dcbf(data + (seg_len - 1));
+
+			txbd = ENETC_TXBD(*tx_ring, i);
+			txbd->flags = 0;
+			if (is_first_seg) {
+				tx_ring->q_swbd[i].buffer_addr = tx_pkts[start];
+				txbd->frm_len = rte_pktmbuf_pkt_len(seg);
+				if (seg->ol_flags & ENETC4_TX_CKSUM_OFFLOAD_MASK)
+					enetc4_tx_offload_checksum(seg, txbd);
+				is_first_seg = false;
+			}
+
+			txbd->buf_len = rte_cpu_to_le_16(seg_len);
+			txbd->addr = rte_cpu_to_le_64(rte_mbuf_data_iova(seg));
+			seg = seg->next;
+			i++;
+			bds_to_use--;
+			if (unlikely(i == bd_count))
+				i = 0;
+		}
 
-		tx_swbd = &tx_ring->q_swbd[i];
-		txbd->frm_len = buflen;
-		txbd->buf_len = txbd->frm_len;
-		txbd->addr = (uint64_t)(uintptr_t)
-		rte_cpu_to_le_64((size_t)tx_swbd->buffer_addr->buf_iova +
-				 tx_swbd->buffer_addr->data_off);
+		/* Set the frame-last flag on the final BD of this packet. */
 		txbd->flags |= ENETC4_TXBD_FLAGS_F;
-		i++;
 		start++;
-		if (unlikely(i == tx_ring->bd_count))
-			i = 0;
 	}
 
-	/* we're only cleaning up the Tx ring here, on the assumption that
-	 * software is slower than hardware and hardware completed sending
-	 * older frames out by now.
-	 * We're also cleaning up the ring before kicking off Tx for the new
-	 * batch to minimize chances of contention on the Tx ring
-	 */
 	enetc_clean_tx_ring(tx_ring);
-
 	tx_ring->next_to_use = i;
 	enetc_wr_reg(tx_ring->tcir, i);
 	return start;
@@ -501,38 +511,63 @@ enetc_clean_rx_ring_nc(struct enetc_bdr *rx_ring,
 	int cleaned_cnt, i;
 	struct enetc_swbd *rx_swbd;
 	union enetc_rx_bd *rxbd, rxbd_temp;
+	struct rte_mbuf *first_seg, *cur_seg;
 	uint32_t bd_status;
 	uint8_t *data;
 	uint32_t j;
+	struct rte_mbuf *seg;
+	uint16_t data_len;
 
 	/* next descriptor to process */
 	i = rx_ring->next_to_clean;
-	/* next descriptor to process */
 	rxbd = ENETC_RXBD(*rx_ring, i);
-
 	cleaned_cnt = enetc_bd_unused(rx_ring);
 	rx_swbd = &rx_ring->q_swbd[i];
 
+	/* Restore partial multi-segment chain from a previous burst. */
+	first_seg = rx_ring->pkt_first_seg;
+	cur_seg = rx_ring->pkt_last_seg;
+
 	while (likely(rx_frm_cnt < work_limit)) {
 		rxbd_temp = *rxbd;
 		bd_status = rte_le_to_cpu_32(rxbd_temp.r.lstatus);
-		if (!bd_status)
+		/* LSTATUS_R indicates this BD has been written by HW */
+		if (!(bd_status & ENETC_RXBD_LSTATUS_R))
 			break;
 		if (rxbd_temp.r.error)
 			rx_ring->ierrors++;
 
-		rx_swbd->buffer_addr->pkt_len = rxbd_temp.r.buf_len -
-						rx_ring->crc_len;
-		rx_swbd->buffer_addr->data_len = rx_swbd->buffer_addr->pkt_len;
-		rx_swbd->buffer_addr->hash.rss = rxbd_temp.r.rss_hash;
-		enetc_dev_rx_parse(rx_swbd->buffer_addr,
-				   rxbd_temp.r.parse_summary);
+		seg = rx_swbd->buffer_addr;
+		data_len = rte_le_to_cpu_16(rxbd_temp.r.buf_len);
+		seg->data_len = data_len;
+
+		if (!first_seg) {
+			first_seg = seg;
+			cur_seg = seg;
+			first_seg->pkt_len = data_len;
+			enetc_dev_rx_parse(first_seg, rxbd_temp.r.parse_summary);
+			first_seg->hash.rss = rxbd_temp.r.rss_hash;
+		} else {
+			first_seg->pkt_len += data_len;
+			first_seg->nb_segs++;
+			cur_seg->next = seg;
+			cur_seg = seg;
+		}
 
-		data = rte_pktmbuf_mtod(rx_swbd->buffer_addr, void *);
-		for (j = 0; j <= rx_swbd->buffer_addr->pkt_len; j += RTE_CACHE_LINE_SIZE)
+		/* Invalidate packet data cache lines so CPU reads HW-written data. */
+		data = rte_pktmbuf_mtod(seg, void *);
+		for (j = 0; j < data_len; j += RTE_CACHE_LINE_SIZE)
 			dccivac(data + j);
+		dccivac(data + (data_len - 1));
+
+		if (bd_status & ENETC_RXBD_LSTATUS_F) {
+			seg->next = NULL;
+			first_seg->pkt_len -= rx_ring->crc_len;
+			rx_pkts[rx_frm_cnt] = first_seg;
+			rx_frm_cnt++;
+			first_seg = NULL;
+		}
 
-		rx_pkts[rx_frm_cnt] = rx_swbd->buffer_addr;
 		cleaned_cnt++;
 		rx_swbd++;
 		i++;
@@ -541,9 +576,11 @@ enetc_clean_rx_ring_nc(struct enetc_bdr *rx_ring,
 			rx_swbd = &rx_ring->q_swbd[i];
 		}
 		rxbd = ENETC_RXBD(*rx_ring, i);
-		rx_frm_cnt++;
 	}
 
+	/* Save partial chain for the next burst if frame is incomplete. */
+	rx_ring->pkt_first_seg = first_seg;
+	rx_ring->pkt_last_seg = cur_seg;
 	rx_ring->next_to_clean = i;
 	enetc_refill_rx_ring(rx_ring, cleaned_cnt);
 
-- 
2.25.1


^ permalink raw reply related

* [PATCH v3 6/9] net/enetc: add option to disable VSI messaging
From: Gagandeep Singh @ 2026-06-23  6:00 UTC (permalink / raw)
  To: dev; +Cc: hemant.agrawal, Gagandeep Singh
In-Reply-To: <20260623060004.2187716-1-g.singh@nxp.com>

Add devarg 'enetc4_vsi_disable' to allow disabling features
dependent on VSI-PSI messaging. This is useful for testing DPDK
with a PF driver that does not support VSI-PSI messages.

When the devarg is present, a reduced ops table
(enetc4_vf_ops_no_vsi_m) is used that replaces link_update with
a no-op stub and omits MAC/VLAN filter ops that require VSI msgs.

Signed-off-by: Gagandeep Singh <g.singh@nxp.com>
---
 doc/guides/nics/enetc4.rst             | 22 +++++++++-
 doc/guides/rel_notes/release_26_07.rst |  2 +
 drivers/net/enetc/enetc4_vf.c          | 61 ++++++++++++++++++++++++--
 3 files changed, 81 insertions(+), 4 deletions(-)

diff --git a/doc/guides/nics/enetc4.rst b/doc/guides/nics/enetc4.rst
index 866d389..7b94941 100644
--- a/doc/guides/nics/enetc4.rst
+++ b/doc/guides/nics/enetc4.rst
@@ -1,5 +1,5 @@
 .. SPDX-License-Identifier: BSD-3-Clause
-   Copyright 2024-2025 NXP
+   Copyright 2024-2026 NXP
 
 ENETC4 Poll Mode Driver
 =======================
@@ -90,3 +90,23 @@ Driver compilation and testing
 Follow instructions available in the document
 :ref:`compiling and testing a PMD for a NIC <pmd_build_and_test>`
 to launch **testpmd**.
+
+
+Driver Arguments (devargs)
+--------------------------
+
+The ENETC4 PMD supports the following device arguments (devargs)
+that can be passed via ``-a`` (allow-list) option in DPDK applications.
+
+VF-specific devargs
+~~~~~~~~~~~~~~~~~~~
+
+``enetc4_vsi_disable``
+  Disable VSI-PSI messaging for the VF.
+  When present, features that require VSI-PSI communication
+  (link update, MAC filter, VLAN filter) are replaced with no-op stubs.
+  Useful when the PF driver does not support VSI-PSI messages.
+
+  Usage example::
+
+    dpdk-testpmd -a 0000:00:01.0,enetc4_vsi_disable -- -i
diff --git a/doc/guides/rel_notes/release_26_07.rst b/doc/guides/rel_notes/release_26_07.rst
index f900145..783ad16 100644
--- a/doc/guides/rel_notes/release_26_07.rst
+++ b/doc/guides/rel_notes/release_26_07.rst
@@ -191,6 +191,8 @@ New Features
 
   * Added support for ESP packet type in packet parsing.
   * Added scatter-gather support for ENETC4 PFs and VFs.
+  * Added devargs option ``enetc4_vsi_disable`` to disable VSI-PSI
+    messaging.
 
 Removed Items
 -------------
diff --git a/drivers/net/enetc/enetc4_vf.c b/drivers/net/enetc/enetc4_vf.c
index 9dc4e1d..44c0dc0 100644
--- a/drivers/net/enetc/enetc4_vf.c
+++ b/drivers/net/enetc/enetc4_vf.c
@@ -3,11 +3,14 @@
  */
 
 #include <stdbool.h>
+#include <rte_kvargs.h>
 #include <rte_random.h>
 #include <dpaax_iova_table.h>
 #include "enetc_logs.h"
 #include "enetc.h"
 
+#define ENETC4_VSI_DISABLE		"enetc4_vsi_disable"
+
 #define ENETC_CRC_TABLE_SIZE		256
 #define ENETC_POLY			0x1021
 #define ENETC_CRC_INIT			0xffff
@@ -687,6 +690,13 @@ enetc4_vf_get_link_speed(struct rte_eth_dev *dev, struct enetc_psi_reply_msg *re
 	return err;
 }
 
+static int
+enetc4_vf_link_update_dummy(struct rte_eth_dev *dev __rte_unused,
+			    int wait_to_complete __rte_unused)
+{
+	return 0;
+}
+
 static int
 enetc4_vf_link_update(struct rte_eth_dev *dev, int wait_to_complete __rte_unused)
 {
@@ -1148,6 +1158,27 @@ static const struct rte_pci_id pci_vf_id_enetc4_map[] = {
 };
 
 /* Features supported by this driver */
+/* ops table used when VSI messaging is disabled */
+static const struct eth_dev_ops enetc4_vf_ops_no_vsi_m = {
+	.dev_configure        = enetc4_dev_configure,
+	.dev_start            = enetc4_vf_dev_start,
+	.dev_stop             = enetc4_vf_dev_stop,
+	.dev_close            = enetc4_dev_close,
+	.stats_get            = enetc4_vf_stats_get,
+	.dev_infos_get        = enetc4_vf_dev_infos_get,
+	.mtu_set              = enetc4_vf_mtu_set,
+	.link_update	      = enetc4_vf_link_update_dummy,
+	.rx_queue_setup       = enetc4_rx_queue_setup,
+	.rx_queue_start       = enetc4_rx_queue_start,
+	.rx_queue_stop        = enetc4_rx_queue_stop,
+	.rx_queue_release     = enetc4_rx_queue_release,
+	.tx_queue_setup       = enetc4_tx_queue_setup,
+	.tx_queue_start       = enetc4_tx_queue_start,
+	.tx_queue_stop        = enetc4_tx_queue_stop,
+	.tx_queue_release     = enetc4_tx_queue_release,
+	.dev_supported_ptypes_get = enetc4_supported_ptypes_get,
+};
+
 static const struct eth_dev_ops enetc4_vf_ops = {
 	.dev_configure        = enetc4_dev_configure,
 	.dev_start            = enetc4_vf_dev_start,
@@ -1283,7 +1314,28 @@ enetc4_vf_dev_init(struct rte_eth_dev *eth_dev)
 	struct enetc_hw *enetc_hw = &hw->hw;
 
 	PMD_INIT_FUNC_TRACE();
-	eth_dev->dev_ops = &enetc4_vf_ops;
+
+	/* check if VSI messaging should be disabled via devarg */
+	if (eth_dev->device->devargs) {
+		struct rte_kvargs *kvlist;
+
+		kvlist = rte_kvargs_parse(eth_dev->device->devargs->args,
+					  NULL);
+		if (kvlist) {
+			if (rte_kvargs_count(kvlist, ENETC4_VSI_DISABLE) != 0) {
+				ENETC_PMD_NOTICE("VSI messaging disabled by devarg");
+				eth_dev->dev_ops = &enetc4_vf_ops_no_vsi_m;
+			} else {
+				eth_dev->dev_ops = &enetc4_vf_ops;
+			}
+			rte_kvargs_free(kvlist);
+		} else {
+			eth_dev->dev_ops = &enetc4_vf_ops;
+		}
+	} else {
+		eth_dev->dev_ops = &enetc4_vf_ops;
+	}
+
 	enetc4_dev_hw_init(eth_dev);
 
 	si_cap = enetc_rd(enetc_hw, ENETC_SICAPR0);
@@ -1304,8 +1356,9 @@ enetc4_vf_dev_init(struct rte_eth_dev *eth_dev)
 	ENETC_PMD_DEBUG("port_id %d vendorID=0x%x deviceID=0x%x",
 			eth_dev->data->port_id, pci_dev->id.vendor_id,
 			pci_dev->id.device_id);
-	/* update link */
-	enetc4_vf_link_update(eth_dev, 0);
+	/* update link if VSI messaging is enabled */
+	if (eth_dev->dev_ops == &enetc4_vf_ops)
+		enetc4_vf_link_update(eth_dev, 0);
 
 	return 0;
 }
@@ -1389,4 +1442,6 @@ static struct rte_pci_driver rte_enetc4_vf_pmd = {
 RTE_PMD_REGISTER_PCI(net_enetc4_vf, rte_enetc4_vf_pmd);
 RTE_PMD_REGISTER_PCI_TABLE(net_enetc4_vf, pci_vf_id_enetc4_map);
 RTE_PMD_REGISTER_KMOD_DEP(net_enetc4_vf, "* igb_uio | uio_pci_generic");
+RTE_PMD_REGISTER_PARAM_STRING(net_enetc4_vf,
+			      ENETC4_VSI_DISABLE "=<any>");
 RTE_LOG_REGISTER_DEFAULT(enetc4_vf_logtype_pmd, NOTICE);
-- 
2.25.1


^ permalink raw reply related

* [PATCH v3 4/9] net/enetc: update random MAC generation code
From: Gagandeep Singh @ 2026-06-23  5:59 UTC (permalink / raw)
  To: dev; +Cc: hemant.agrawal, Gagandeep Singh
In-Reply-To: <20260623060004.2187716-1-g.singh@nxp.com>

Use rte_eth_random_addr() instead of manual rte_rand() based MAC
generation. Also handle VF path by writing to ENETC_SIPMAR0/1 instead
of ENETC_PSIPMAR0/1 when running as a VF.

Signed-off-by: Gagandeep Singh <g.singh@nxp.com>
---
 drivers/net/enetc/enetc_ethdev.c | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/drivers/net/enetc/enetc_ethdev.c b/drivers/net/enetc/enetc_ethdev.c
index 8196377..55b0e0b 100644
--- a/drivers/net/enetc/enetc_ethdev.c
+++ b/drivers/net/enetc/enetc_ethdev.c
@@ -195,20 +195,18 @@ enetc_hardware_init(struct enetc_eth_hw *hw)
 	}
 
 	if ((high_mac | low_mac) == 0) {
-		char *first_byte;
-
 		ENETC_PMD_NOTICE("MAC is not available for this SI, "
 				"set random MAC");
-		mac = (uint32_t *)hw->mac.addr;
-		*mac = (uint32_t)rte_rand();
-		first_byte = (char *)mac;
-		*first_byte &= 0xfe;	/* clear multicast bit */
-		*first_byte |= 0x02;	/* set local assignment bit (IEEE802) */
-
-		enetc_port_wr(enetc_hw, ENETC_PSIPMAR0(0), *mac);
-		mac++;
-		*mac = (uint16_t)rte_rand();
-		enetc_port_wr(enetc_hw, ENETC_PSIPMAR1(0), *mac);
+		rte_eth_random_addr(hw->mac.addr);
+		high_mac = *(uint32_t *)hw->mac.addr;
+		low_mac = *(uint16_t *)(hw->mac.addr + 4);
+		if (hw->device_id == ENETC_DEV_ID_VF) {
+			enetc_wr(enetc_hw, ENETC_SIPMAR0, high_mac);
+			enetc_wr(enetc_hw, ENETC_SIPMAR1, low_mac);
+		} else {
+			enetc_port_wr(enetc_hw, ENETC_PSIPMAR0(0), high_mac);
+			enetc_port_wr(enetc_hw, ENETC_PSIPMAR1(0), low_mac);
+		}
 		enetc_print_ethaddr("New address: ",
 			      (const struct rte_ether_addr *)hw->mac.addr);
 	}
-- 
2.25.1


^ permalink raw reply related

* [PATCH v3 3/9] net/enetc: support ESP packet type in packet parsing
From: Gagandeep Singh @ 2026-06-23  5:59 UTC (permalink / raw)
  To: dev; +Cc: hemant.agrawal, Gagandeep Singh
In-Reply-To: <20260623060004.2187716-1-g.singh@nxp.com>

Add ESP (Encapsulating Security Payload) packet type definitions and
handling to the RX packet parsing path. Also update the supported
ptypes array to advertise ESP tunnel type support.

Signed-off-by: Gagandeep Singh <g.singh@nxp.com>
---
 doc/guides/rel_notes/release_26_07.rst |  3 +++
 drivers/net/enetc/base/enetc_hw.h      |  4 ++++
 drivers/net/enetc/enetc_ethdev.c       |  3 ++-
 drivers/net/enetc/enetc_rxtx.c         | 10 ++++++++++
 4 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/doc/guides/rel_notes/release_26_07.rst b/doc/guides/rel_notes/release_26_07.rst
index fc6e144..35476c2 100644
--- a/doc/guides/rel_notes/release_26_07.rst
+++ b/doc/guides/rel_notes/release_26_07.rst
@@ -187,6 +187,9 @@ New Features
   ``RTE_ETH_EVENT_RECOVERY_FAILED``) to notify upper layers of the
   reset lifecycle.
 
+* **Updated NXP ENETC ethernet driver.**
+
+  * Added support for ESP packet type in packet parsing
 
 Removed Items
 -------------
diff --git a/drivers/net/enetc/base/enetc_hw.h b/drivers/net/enetc/base/enetc_hw.h
index 19efadd..f79c950 100644
--- a/drivers/net/enetc/base/enetc_hw.h
+++ b/drivers/net/enetc/base/enetc_hw.h
@@ -226,6 +226,10 @@ enum enetc_bdr_type {TX, RX};
 			(0x0003 | ENETC_PKT_TYPE_IPV4)
 #define ENETC_PKT_TYPE_IPV6_ICMP \
 			(0x0003 | ENETC_PKT_TYPE_IPV6)
+#define ENETC_PKT_TYPE_IPV4_ESP \
+			(0x0005 | ENETC_PKT_TYPE_IPV4)
+#define ENETC_PKT_TYPE_IPV6_ESP \
+			(0x0005 | ENETC_PKT_TYPE_IPV6)
 
 /* PCI device info */
 struct enetc_hw {
diff --git a/drivers/net/enetc/enetc_ethdev.c b/drivers/net/enetc/enetc_ethdev.c
index f41f3c1..8196377 100644
--- a/drivers/net/enetc/enetc_ethdev.c
+++ b/drivers/net/enetc/enetc_ethdev.c
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright 2018-2024 NXP
+ * Copyright 2018-2026 NXP
  */
 
 #include <stdbool.h>
@@ -95,6 +95,7 @@ enetc_supported_ptypes_get(struct rte_eth_dev *dev __rte_unused,
 		RTE_PTYPE_L4_UDP,
 		RTE_PTYPE_L4_SCTP,
 		RTE_PTYPE_L4_ICMP,
+		RTE_PTYPE_TUNNEL_ESP
 	};
 
 	*no_of_elements = RTE_DIM(ptypes);
diff --git a/drivers/net/enetc/enetc_rxtx.c b/drivers/net/enetc/enetc_rxtx.c
index d3b98b3..94177bb 100644
--- a/drivers/net/enetc/enetc_rxtx.c
+++ b/drivers/net/enetc/enetc_rxtx.c
@@ -370,6 +370,16 @@ enetc_dev_rx_parse(struct rte_mbuf *m, uint16_t parse_results)
 				 RTE_PTYPE_L3_IPV6 |
 				 RTE_PTYPE_L4_UDP;
 		return;
+	case ENETC_PKT_TYPE_IPV4_ESP:
+		m->packet_type = RTE_PTYPE_L2_ETHER |
+				 RTE_PTYPE_L3_IPV4 |
+				 RTE_PTYPE_TUNNEL_ESP;
+		return;
+	case ENETC_PKT_TYPE_IPV6_ESP:
+		m->packet_type = RTE_PTYPE_L2_ETHER |
+				 RTE_PTYPE_L3_IPV6 |
+				 RTE_PTYPE_TUNNEL_ESP;
+		return;
 	case ENETC_PKT_TYPE_IPV4_SCTP:
 		m->packet_type = RTE_PTYPE_L2_ETHER |
 				 RTE_PTYPE_L3_IPV4 |
-- 
2.25.1


^ permalink raw reply related

* [PATCH v3 2/9] net/enetc: fix queue initialization
From: Gagandeep Singh @ 2026-06-23  5:59 UTC (permalink / raw)
  To: dev; +Cc: hemant.agrawal, stable, Gagandeep Singh
In-Reply-To: <20260623060004.2187716-1-g.singh@nxp.com>

Hardware can misbehave if the user tries to reset the consumer and
producer indexes without resetting the ring.

This patch adds the ring reset step before resetting the indexes.

Fixes: 6c9c5aadc0e0 ("net/enetc: support ENETC4 queue API")
Cc: stable@dpdk.org

Signed-off-by: Gagandeep Singh <g.singh@nxp.com>
---
 drivers/net/enetc/enetc4_ethdev.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/net/enetc/enetc4_ethdev.c b/drivers/net/enetc/enetc4_ethdev.c
index 78eba70..154fc09 100644
--- a/drivers/net/enetc/enetc4_ethdev.c
+++ b/drivers/net/enetc/enetc4_ethdev.c
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright 2024 NXP
+ * Copyright 2024-2026 NXP
  */
 
 #include <stdbool.h>
@@ -279,6 +279,7 @@ enetc4_tx_queue_setup(struct rte_eth_dev *dev,
 		     const struct rte_eth_txconf *tx_conf)
 {
 	int err;
+	uint32_t tx_data;
 	struct enetc_bdr *tx_ring;
 	struct rte_eth_dev_data *data = dev->data;
 	struct enetc_eth_adapter *priv =
@@ -301,6 +302,10 @@ enetc4_tx_queue_setup(struct rte_eth_dev *dev,
 		goto fail;
 
 	tx_ring->ndev = dev;
+	/* reset queue */
+	tx_data = enetc4_txbdr_rd(&priv->hw.hw, tx_ring->index, ENETC_TBMR);
+	tx_data &= ~ENETC_TBMR_EN;
+	enetc4_txbdr_wr(&priv->hw.hw, tx_ring->index, ENETC_TBMR, tx_data);
 	enetc4_setup_txbdr(&priv->hw.hw, tx_ring);
 	data->tx_queues[queue_idx] = tx_ring;
 	tx_ring->tx_deferred_start = tx_conf->tx_deferred_start;
@@ -427,6 +432,7 @@ enetc4_rx_queue_setup(struct rte_eth_dev *dev,
 		     struct rte_mempool *mb_pool)
 {
 	int err = 0;
+	uint32_t rx_enable;
 	struct enetc_bdr *rx_ring;
 	struct rte_eth_dev_data *data =  dev->data;
 	struct enetc_eth_adapter *adapter =
@@ -450,6 +456,10 @@ enetc4_rx_queue_setup(struct rte_eth_dev *dev,
 		goto fail;
 
 	rx_ring->ndev = dev;
+	/* reset queue */
+	rx_enable = enetc4_rxbdr_rd(&adapter->hw.hw, rx_ring->index, ENETC_RBMR);
+	rx_enable &= ~ENETC_RBMR_EN;
+	enetc4_rxbdr_wr(&adapter->hw.hw, rx_ring->index, ENETC_RBMR, rx_enable);
 	enetc4_setup_rxbdr(&adapter->hw.hw, rx_ring, mb_pool);
 	data->rx_queues[rx_queue_id] = rx_ring;
 	rx_ring->rx_deferred_start = rx_conf->rx_deferred_start;
-- 
2.25.1


^ permalink raw reply related

* [PATCH v3 1/9] net/enetc: fix TX BD structure
From: Gagandeep Singh @ 2026-06-23  5:59 UTC (permalink / raw)
  To: dev; +Cc: hemant.agrawal, stable, Gagandeep Singh
In-Reply-To: <20260623060004.2187716-1-g.singh@nxp.com>

The flags field in struct enetc_tx_bd was declared as uint16_t but
ENETC4 TX BDs only use an 8-bit flags byte. Fix the type to uint8_t
to match the hardware descriptor layout.

Fixes: 696fa399d797 ("net/enetc: add PMD with basic operations")
Cc: stable@dpdk.org

Signed-off-by: Gagandeep Singh <g.singh@nxp.com>
---
 drivers/net/enetc/base/enetc_hw.h |  7 +++----
 drivers/net/enetc/enetc_rxtx.c    | 17 +++++++++--------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/net/enetc/base/enetc_hw.h b/drivers/net/enetc/base/enetc_hw.h
index 173d677..19efadd 100644
--- a/drivers/net/enetc/base/enetc_hw.h
+++ b/drivers/net/enetc/base/enetc_hw.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright 2018-2024 NXP
+ * Copyright 2018-2026 NXP
  */
 
 #ifndef _ENETC_HW_H_
@@ -198,8 +198,7 @@ enum enetc_bdr_type {TX, RX};
 
 #define ENETC_TX_ADDR(txq, addr) ((void *)((txq)->enetc_txbdr + (addr)))
 
-#define ENETC_TXBD_FLAGS_IE		BIT(13)
-#define ENETC_TXBD_FLAGS_F		BIT(15)
+#define ENETC_TXBD_FLAGS_F		BIT(7)
 
 /* ENETC Parsed values (Little Endian) */
 #define ENETC_PARSE_ERROR		0x8000
@@ -262,7 +261,7 @@ struct enetc_tx_bd {
 			uint8_t l3t:1;
 			uint8_t resv:5;
 			uint8_t l4t:3;
-			uint16_t flags;
+			uint8_t flags;
 		};/* default layout */
 		uint32_t txstart;
 		uint32_t lstatus;
diff --git a/drivers/net/enetc/enetc_rxtx.c b/drivers/net/enetc/enetc_rxtx.c
index a2b8153..d3b98b3 100644
--- a/drivers/net/enetc/enetc_rxtx.c
+++ b/drivers/net/enetc/enetc_rxtx.c
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright 2018-2024 NXP
+ * Copyright 2018-2026 NXP
  */
 
 #include <stdbool.h>
@@ -101,7 +101,7 @@ enetc_xmit_pkts(void *tx_queue,
 		tx_swbd = &tx_ring->q_swbd[i];
 		txbd->frm_len = tx_pkts[start]->pkt_len;
 		txbd->buf_len = txbd->frm_len;
-		txbd->flags = rte_cpu_to_le_16(ENETC_TXBD_FLAGS_F);
+		txbd->flags = ENETC_TXBD_FLAGS_F;
 		txbd->addr = (uint64_t)(uintptr_t)
 		rte_cpu_to_le_64((size_t)tx_swbd->buffer_addr->buf_iova +
 				 tx_swbd->buffer_addr->data_off);
@@ -133,13 +133,13 @@ enetc4_tx_offload_checksum(struct rte_mbuf *mbuf, struct enetc_tx_bd *txbd)
 		txbd->ipcs = ENETC4_TXBD_IPCS;
 		txbd->l3_start = mbuf->l2_len;
 		txbd->l3_hdr_size = mbuf->l3_len / 4;
-		txbd->flags |= rte_cpu_to_le_16(ENETC4_TXBD_FLAGS_L_TX_CKSUM);
+		txbd->flags |= ENETC4_TXBD_FLAGS_L_TX_CKSUM;
 		if ((mbuf->ol_flags & RTE_MBUF_F_TX_UDP_CKSUM) == RTE_MBUF_F_TX_UDP_CKSUM) {
-			txbd->l4t = rte_cpu_to_le_16(ENETC4_TXBD_L4T_UDP);
-			txbd->flags |= rte_cpu_to_le_16(ENETC4_TXBD_FLAGS_L4CS);
+			txbd->l4t = ENETC4_TXBD_L4T_UDP;
+			txbd->flags |= ENETC4_TXBD_FLAGS_L4CS;
 		} else if ((mbuf->ol_flags & RTE_MBUF_F_TX_TCP_CKSUM) == RTE_MBUF_F_TX_TCP_CKSUM) {
-			txbd->l4t = rte_cpu_to_le_16(ENETC4_TXBD_L4T_TCP);
-			txbd->flags |= rte_cpu_to_le_16(ENETC4_TXBD_FLAGS_L4CS);
+			txbd->l4t = ENETC4_TXBD_L4T_TCP;
+			txbd->flags |= ENETC4_TXBD_FLAGS_L4CS;
 		}
 	}
 }
@@ -172,7 +172,7 @@ enetc_xmit_pkts_nc(void *tx_queue,
 			dcbf(data + j);
 
 		txbd = ENETC_TXBD(*tx_ring, i);
-		txbd->flags = rte_cpu_to_le_16(ENETC4_TXBD_FLAGS_F);
+		txbd->flags = 0;
 		if (tx_ring->q_swbd[i].buffer_addr->ol_flags & ENETC4_TX_CKSUM_OFFLOAD_MASK)
 			enetc4_tx_offload_checksum(tx_ring->q_swbd[i].buffer_addr, txbd);
 
@@ -182,6 +182,7 @@ enetc_xmit_pkts_nc(void *tx_queue,
 		txbd->addr = (uint64_t)(uintptr_t)
 		rte_cpu_to_le_64((size_t)tx_swbd->buffer_addr->buf_iova +
 				 tx_swbd->buffer_addr->data_off);
+		txbd->flags |= ENETC4_TXBD_FLAGS_F;
 		i++;
 		start++;
 		if (unlikely(i == tx_ring->bd_count))
-- 
2.25.1


^ permalink raw reply related

* [PATCH v3 0/9] ENETC driver related changes series
From: Gagandeep Singh @ 2026-06-23  5:59 UTC (permalink / raw)
  To: dev; +Cc: hemant.agrawal
In-Reply-To: <20260622113517.1616028-1-g.singh@nxp.com>

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset=y, Size: 2475 bytes --]

V3 changes:
  - Added documentation for all devargs in enetc4.rst.
  - Fixed kvlist memory leak issue.

V2 changes:
  - Fixed an un-used variable compilation issue reported on fedora:43-gcc-minsize
  - Fixed various AI reported issues:
	- Release notes updated for all new devargs
	- enect4.ini features doc updated for scattered RX.
	- removed Not required RTE_PTYPE_UNKNOWN.
	- Fixed mid-frame mbuf leak in SG case.
	- Enabled SG for enetc4 PF also.
	- move to calloc from rte_zmalloc in parse_txq_prior().
	- added vaidation checks on strdup, strtoul.
	- added NC devargs to use cacheable ops conditionally.
	- removed dead code like bd_base_p etc.
	- Fixed rte_cpu_to_le_16() conversion on flags and combined
	  all flags related patches in one patch.
	- Fixed memory leak issue due to TXQ priority patch.
   - There were some false positives, I have ignored them:
	Race condition on flags field:
		clean_tx_ring only touches HW-completed BDs (next_to_clean→hwci),
		never newly-submitted BDs; doorbell hasn't fired yet.
	Missing dcbf in clean_tx_ring:
		DPDK is single-threaded per queue; TX path always overwrites
		flags completely before dcbf.
	TX dcbf granularity with wrap:
		Safe (AI admits it).
	RX refill flush at wrap:
		In-loop dcbf at i & mask == 0 already flushes aligned groups;
		trailing flush only needed for partial groups.
	RX reading before invalidate:
		dccivac precedes the read for every group in the loop

Gagandeep Singh (7):
  net/enetc: fix TX BD structure
  net/enetc: fix queue initialization
  net/enetc: support ESP packet type in packet parsing
  net/enetc: update random MAC generation code
  net/enetc: add option to disable VSI messaging
  net/enetc: add devargs to control VSI-PSI timeout and delay
  net/enetc4: add cacheable BD ring support with SW cache maintenance

Vanshika Shukla (2):
  net/enetc: support scatter-gather
  net/enetc: set user configurable priority to TX rings

 doc/guides/nics/enetc4.rst             |  62 +++-
 doc/guides/nics/features/enetc4.ini    |   1 +
 doc/guides/rel_notes/release_26_07.rst |  10 +
 drivers/net/enetc/base/enetc_hw.h      |  13 +-
 drivers/net/enetc/enetc.h              |  31 +-
 drivers/net/enetc/enetc4_ethdev.c      | 172 ++++++++--
 drivers/net/enetc/enetc4_vf.c          | 206 ++++++++++--
 drivers/net/enetc/enetc_ethdev.c       |  25 +-
 drivers/net/enetc/enetc_rxtx.c         | 430 ++++++++++++++++++++++---
 9 files changed, 831 insertions(+), 119 deletions(-)

-- 
2.25.1


^ permalink raw reply

* [PATCH v2 1/1] common/cnxk: add bulk Rx queue enable/disable
From: rkudurumalla @ 2026-06-23  5:44 UTC (permalink / raw)
  To: Nithin Dabilpuram, Kiran Kumar K, Sunil Kumar Kori, Satha Rao,
	Harman Kalra
  Cc: dev, jerinj, Rakesh Kudurumalla
In-Reply-To: <20260617054943.287815-1-rkudurumalla@marvell.com>

From: Rakesh Kudurumalla <rkudurumalla@marvell.com>

Add roc_nix_rq_multi_ena_dis() to batch RQ enable/disable mailbox
AQ operations and process them in a single mbox_process() call.

Skip mbox_process() when no messages were queued (e.g. all qid are
UINT16_MAX) by checking num_msgs and msg_size via mbox_nonempty_nolock().

Signed-off-by: Rakesh Kudurumalla <rkudurumalla@marvell.com>
---
 drivers/common/cnxk/roc_mbox_priv.h           |   6 +-
 drivers/common/cnxk/roc_nix.h                 |   2 +
 drivers/common/cnxk/roc_nix_queue.c           | 102 ++++++++++++++++++
 .../common/cnxk/roc_platform_base_symbols.c   |   1 +
 4 files changed, 107 insertions(+), 4 deletions(-)

diff --git a/drivers/common/cnxk/roc_mbox_priv.h b/drivers/common/cnxk/roc_mbox_priv.h
index 354c8fa52a..e9da1959b6 100644
--- a/drivers/common/cnxk/roc_mbox_priv.h
+++ b/drivers/common/cnxk/roc_mbox_priv.h
@@ -113,14 +113,12 @@ mbox_rsp_init(uint16_t mbox_id, void *msghdr)
 }
 
 static inline bool
-mbox_nonempty(struct mbox *mbox, int devid)
+mbox_nonempty_nolock(struct mbox *mbox, int devid)
 {
 	struct mbox_dev *mdev = &mbox->dev[devid];
 	bool ret;
 
-	plt_spinlock_lock(&mdev->mbox_lock);
-	ret = mdev->num_msgs != 0;
-	plt_spinlock_unlock(&mdev->mbox_lock);
+	ret = mdev->num_msgs != 0 && mdev->msg_size != 0;
 
 	return ret;
 }
diff --git a/drivers/common/cnxk/roc_nix.h b/drivers/common/cnxk/roc_nix.h
index 8ba8b3e0b6..f495e2a5ad 100644
--- a/drivers/common/cnxk/roc_nix.h
+++ b/drivers/common/cnxk/roc_nix.h
@@ -1094,6 +1094,8 @@ int __roc_api roc_nix_rq_modify(struct roc_nix *roc_nix, struct roc_nix_rq *rq,
 				bool ena);
 int __roc_api roc_nix_rq_cman_config(struct roc_nix *roc_nix, struct roc_nix_rq *rq);
 int __roc_api roc_nix_rq_ena_dis(struct roc_nix_rq *rq, bool enable);
+int __roc_api roc_nix_rq_multi_ena_dis(struct roc_nix *roc_nix, struct roc_nix_rq *rqs,
+				       int nb_rx_queues, bool enable);
 int __roc_api roc_nix_rq_is_sso_enable(struct roc_nix *roc_nix, uint32_t qid);
 int __roc_api roc_nix_rq_fini(struct roc_nix_rq *rq);
 int __roc_api roc_nix_cq_init(struct roc_nix *roc_nix, struct roc_nix_cq *cq);
diff --git a/drivers/common/cnxk/roc_nix_queue.c b/drivers/common/cnxk/roc_nix_queue.c
index ef9b651022..075c9c1591 100644
--- a/drivers/common/cnxk/roc_nix_queue.c
+++ b/drivers/common/cnxk/roc_nix_queue.c
@@ -10,6 +10,49 @@
 /* Default SQB slack per SQ */
 #define ROC_NIX_SQB_SLACK_DFLT 24
 
+typedef void *(*nix_aq_enq_alloc_t)(struct mbox *mbox);
+
+static inline void
+nix_aq_enq_write_rq_ena(void *msg, uint16_t qid, bool enable)
+{
+	struct nix_aq_enq_req *aq = msg;
+
+	aq->qidx = qid;
+	aq->ctype = NIX_AQ_CTYPE_RQ;
+	aq->op = NIX_AQ_INSTOP_WRITE;
+	aq->rq.ena = enable;
+	aq->rq_mask.ena = ~(aq->rq_mask.ena);
+}
+
+static inline int
+nix_rq_bulk_ena_dis_fill(struct mbox *mbox, struct roc_nix_rq *rqs, int nb_rx_queues,
+			 bool enable, nix_aq_enq_alloc_t alloc)
+{
+	int i;
+
+	for (i = 0; i < nb_rx_queues; i++) {
+		void *aq;
+		int rc;
+
+		if (rqs[i].qid == UINT16_MAX)
+			continue;
+
+		aq = alloc(mbox);
+		if (!aq) {
+			rc = mbox_process(mbox);
+			if (rc)
+				return rc;
+			aq = alloc(mbox);
+			if (!aq)
+				return -ENOSPC;
+		}
+
+		nix_aq_enq_write_rq_ena(aq, rqs[i].qid, enable);
+	}
+
+	return 0;
+}
+
 static inline uint32_t
 nix_qsize_to_val(enum nix_q_size qsize)
 {
@@ -47,6 +90,28 @@ nix_rq_vwqe_flush(struct roc_nix_rq *rq, uint16_t vwqe_interval)
 	}
 }
 
+static int
+nix_rq_bulk_ena_dis(struct nix *nix, struct roc_nix_rq *rqs, int nb_rx_queues, bool enable)
+{
+	struct mbox *mbox = mbox_get((&nix->dev)->mbox);
+	nix_aq_enq_alloc_t alloc;
+	int rc;
+
+	if (roc_model_is_cn9k())
+		alloc = (nix_aq_enq_alloc_t)mbox_alloc_msg_nix_aq_enq;
+	else if (roc_model_is_cn10k())
+		alloc = (nix_aq_enq_alloc_t)mbox_alloc_msg_nix_cn10k_aq_enq;
+	else /* CN20K */
+		alloc = (nix_aq_enq_alloc_t)mbox_alloc_msg_nix_cn20k_aq_enq;
+
+	rc = nix_rq_bulk_ena_dis_fill(mbox, rqs, nb_rx_queues, enable, alloc);
+	if (!rc && mbox_nonempty_nolock(mbox, 0))
+		rc = mbox_process(mbox);
+
+	mbox_put(mbox);
+	return rc;
+}
+
 int
 nix_rq_ena_dis(struct dev *dev, struct roc_nix_rq *rq, bool enable)
 {
@@ -126,6 +191,43 @@ roc_nix_sq_ena_dis(struct roc_nix_sq *sq, bool enable)
 	return rc;
 }
 
+int
+roc_nix_rq_multi_ena_dis(struct roc_nix *roc_nix, struct roc_nix_rq *rqs, int nb_rx_queues,
+			 bool enable)
+{
+	struct nix *nix = roc_nix_to_nix_priv(roc_nix);
+	struct roc_nix_rq *rq;
+	int rc, i;
+
+	rc = nix_rq_bulk_ena_dis(nix, rqs, nb_rx_queues, enable);
+	if (rc) {
+		plt_err("Failed to %s Rx queues rc=%d pf=%d vf=%d nb_rx_queues=%d",
+			enable ? "enable" : "disable", rc, nix->dev.pf, nix->dev.vf,
+			nb_rx_queues);
+		return rc;
+	}
+
+	for (i = 0; i < nb_rx_queues; i++) {
+		rq = &rqs[i];
+
+		if (rq->qid == UINT16_MAX)
+			continue;
+
+		nix_rq_vwqe_flush(rq, nix->vwqe_interval);
+
+		/* Check for meta aura if RQ is enabled */
+		if (enable && nix->need_meta_aura) {
+			rc = roc_nix_inl_meta_aura_check(rq->roc_nix, rq);
+			if (rc) {
+				plt_err("Failed meta aura check for rq=%u rc=%d pf=%d vf=%d",
+					rq->qid, rc, nix->dev.pf, nix->dev.vf);
+				return rc;
+			}
+		}
+	}
+	return 0;
+}
+
 int
 roc_nix_rq_ena_dis(struct roc_nix_rq *rq, bool enable)
 {
diff --git a/drivers/common/cnxk/roc_platform_base_symbols.c b/drivers/common/cnxk/roc_platform_base_symbols.c
index ed34d4b05b..08b2f4c6f8 100644
--- a/drivers/common/cnxk/roc_platform_base_symbols.c
+++ b/drivers/common/cnxk/roc_platform_base_symbols.c
@@ -353,6 +353,7 @@ RTE_EXPORT_INTERNAL_SYMBOL(roc_nix_rq_ena_dis)
 RTE_EXPORT_INTERNAL_SYMBOL(roc_nix_rq_is_sso_enable)
 RTE_EXPORT_INTERNAL_SYMBOL(roc_nix_rq_init)
 RTE_EXPORT_INTERNAL_SYMBOL(roc_nix_rq_modify)
+RTE_EXPORT_INTERNAL_SYMBOL(roc_nix_rq_multi_ena_dis)
 RTE_EXPORT_INTERNAL_SYMBOL(roc_nix_rq_cman_config)
 RTE_EXPORT_INTERNAL_SYMBOL(roc_nix_rq_fini)
 RTE_EXPORT_INTERNAL_SYMBOL(roc_nix_cq_init)
-- 
2.25.1


^ permalink raw reply related

* Re: [PATCH v5] graph: add optional profiling stats
From: Jerin Jacob @ 2026-06-23  5:13 UTC (permalink / raw)
  To: Morten Brørup
  Cc: dev, Jerin Jacob, Kiran Kumar K, Nithin Dabilpuram, Zhirun Yan
In-Reply-To: <20260621184140.2878132-1-mb@smartsharesystems.com>

On Mon, Jun 22, 2026 at 12:11 AM Morten Brørup <mb@smartsharesystems.com> wrote:
>
> Added graph node profiling stats, build time configurable by enabling
> RTE_GRAPH_PROFILE in rte_config.h.
>
> Signed-off-by: Morten Brørup <mb@smartsharesystems.com>
> ---
> v5:
> * Added stats for a half burst and a full burst.
> v4:
> * Added documentation. (AI)
> * Added more comments. (AI)
> * Improved dump. (AI)
> * Debug shows both cycles/call and cycles/obj.
> v3:
> * Debug shows cycles/obj instead of cycles/call.
> * Fixed missing --in-reply-to.
> v2:
> * Fixed indentation.
> ---
>  config/rte_config.h                 |  1 +
>  doc/guides/prog_guide/graph_lib.rst |  1 +
>  lib/graph/graph_debug.c             | 57 ++++++++++++++++++++++++++++-
>  lib/graph/node.c                    |  2 +
>  lib/graph/rte_graph_worker_common.h | 33 +++++++++++++++--

Please update app/test/test_graph.c to validate this featue.

>  5 files changed, 90 insertions(+), 4 deletions(-)
>
> diff --git a/config/rte_config.h b/config/rte_config.h
> index 0447cdf2ad..1942c1b1ec 100644
> --- a/config/rte_config.h
> +++ b/config/rte_config.h
> @@ -106,6 +106,7 @@
>  /* rte_graph defines */
>  #define RTE_GRAPH_BURST_SIZE 256
>  #define RTE_LIBRTE_GRAPH_STATS 1
> +/* RTE_GRAPH_PROFILE is not set */
>
>  /****** driver defines ********/
>
> diff --git a/doc/guides/prog_guide/graph_lib.rst b/doc/guides/prog_guide/graph_lib.rst
> index 8dd49c19d2..bc36042296 100644
> --- a/doc/guides/prog_guide/graph_lib.rst
> +++ b/doc/guides/prog_guide/graph_lib.rst
> @@ -49,6 +49,7 @@ Performance tuning parameters
>    RTE_GRAPH_BURST_SIZE config option.
>    The testing shows, on x86 and arm64 servers, The sweet spot is 256 burst
>    size. While on arm64 embedded SoCs, it is either 64 or 128.
> +- Enable the ``RTE_GRAPH_PROFILE`` config option for more profiling details.
>  - Disable node statistics (using ``RTE_LIBRTE_GRAPH_STATS`` config option)
>    if not needed.
>
> diff --git a/lib/graph/graph_debug.c b/lib/graph/graph_debug.c
> index e3b8cccdc1..7c97e23748 100644
> --- a/lib/graph/graph_debug.c
> +++ b/lib/graph/graph_debug.c
> @@ -92,7 +92,62 @@ rte_graph_obj_dump(FILE *f, struct rte_graph *g, bool all)
>                         fprintf(f, "       total_sched_fail=%" PRId64 "\n",
>                                 n->dispatch.total_sched_fail);
>                 }
> -               fprintf(f, "       total_calls=%" PRId64 "\n", n->total_calls);
> +               fprintf(f, "       total_calls=%" PRIu64 "\n", n->total_calls);
> +               if (rte_graph_has_stats_feature()) {
> +                       fprintf(f, "       total_cycles=%" PRIu64 ", avg cycles/call=%.1f\n",
> +                                       n->total_cycles,
> +                                       n->total_calls == 0 ? (double)0 :
> +                                       (double)n->total_cycles / (double)n->total_calls);
> +               }
> +#ifdef RTE_GRAPH_PROFILE


Please introduce rte_graph_has_profile_featue() similar to
rte_graph_has_stats_feature() to reduce if def clutter as possible.

> +               uint64_t calls = n->usage_stats[0].calls;
> +               fprintf(f, "       objs[0]\n");
> +               fprintf(f, "         calls=%" PRIu64 ", cycles=%" PRIu64 ", avg cycles/call=%.1f\n",
> +                               calls,

>
> diff --git a/lib/graph/rte_graph_worker_common.h b/lib/graph/rte_graph_worker_common.h
> index 4ab53a533e..0d8039575d 100644
> --- a/lib/graph/rte_graph_worker_common.h
> +++ b/lib/graph/rte_graph_worker_common.h
> @@ -144,12 +144,26 @@ struct __rte_cache_aligned rte_node {
>                         rte_node_process_t process; /**< Process function. */
>                         uint64_t process_u64;
>                 };
> +               /** Fast path area cache line 3. */
> +#ifdef RTE_GRAPH_PROFILE
> +               struct {
> +                       uint64_t calls;     /**< Calls processing resp. 0 or 1 objects. */
> +                       uint64_t cycles;    /**< Cycles spent processing resp. 0 or 1 objects. */
> +               } usage_stats[2];       /**< Usage when this node processed 0 or 1 objects. */
> +               uint64_t full_burst_calls;  /**< Calls processing a full burst of objects. */
> +               uint64_t full_burst_cycles; /**< Cycles spent processing a full burst of objects. */
> +               uint64_t half_burst_calls;  /**< Calls processing a half burst of objects. */
> +               uint64_t half_burst_cycles; /**< Cycles spent processing a half burst of objects. */
> +               /** Fast path area cache line 4. */
> +#endif

Is it an ABI breakage?

>                 alignas(RTE_CACHE_LINE_MIN_SIZE) struct rte_node *nodes[]; /**< Next nodes. */
>         };
>  };
>

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox