public inbox for dev@dpdk.org
 help / color / mirror / Atom feed
* [PATCH] net/intel: introduce a dedicated idpf complq struct
@ 2026-04-06  4:41 Shaiq Wani
  2026-04-08 15:29 ` Bruce Richardson
  2026-04-08 15:34 ` Bruce Richardson
  0 siblings, 2 replies; 4+ messages in thread
From: Shaiq Wani @ 2026-04-06  4:41 UTC (permalink / raw)
  To: dev, bruce.richardson, aman.deep.singh

The IDPF split-queue completion queue was using ci_tx_queue, a structure
designed for TX descriptor queues, wasting ~96 bytes per completion
queue. Additionally, the CQ-only fields (compl_ring, txqs, tx_start_qid,
expected_gen_id) bloated ci_tx_queue for every other Intel driver that
shares it.

Introduce struct idpf_complq with exactly the fields needed by the
completion queue. This brings the CQ allocation down from ~150 bytes
to ~48 bytes.

Suggested-by: Bruce Richardson <bruce.richardson@intel.com>
Signed-off-by: Shaiq Wani <shaiq.wani@intel.com>
---
 drivers/net/intel/common/tx.h                    | 11 +++--------
 drivers/net/intel/cpfl/cpfl_ethdev.h             |  2 +-
 drivers/net/intel/cpfl/cpfl_rxtx.c               | 11 ++++++-----
 drivers/net/intel/idpf/idpf_common_rxtx.c        |  4 ++--
 drivers/net/intel/idpf/idpf_common_rxtx.h        | 16 +++++++++++++++-
 drivers/net/intel/idpf/idpf_common_rxtx_avx2.c   |  4 ++--
 drivers/net/intel/idpf/idpf_common_rxtx_avx512.c |  4 ++--
 drivers/net/intel/idpf/idpf_rxtx.c               |  2 +-
 8 files changed, 32 insertions(+), 22 deletions(-)

diff --git a/drivers/net/intel/common/tx.h b/drivers/net/intel/common/tx.h
index 283bd58d5d..9da9366046 100644
--- a/drivers/net/intel/common/tx.h
+++ b/drivers/net/intel/common/tx.h
@@ -114,6 +114,7 @@ struct ci_tx_desc {
 
 /* forward declaration of the common intel (ci) queue structure */
 struct ci_tx_queue;
+struct idpf_complq;
 
 /**
  * Structure associated with each descriptor of the TX ring of a TX queue.
@@ -209,18 +210,12 @@ struct ci_tx_queue {
 			uint8_t vf_ctx_initialized; /**< VF context descriptors initialized */
 		};
 		struct { /* idpf specific values */
-				volatile union {
-						struct idpf_flex_tx_sched_desc *desc_ring;
-						struct idpf_splitq_tx_compl_desc *compl_ring;
-				};
-				struct ci_tx_queue *complq;
-				void **txqs;   /*only valid for split queue mode*/
-				uint32_t tx_start_qid;
+				struct idpf_flex_tx_sched_desc *desc_ring;
+				struct idpf_complq *complq;
 				uint32_t latch_idx; /* Tx timestamp latch index */
 				uint16_t sw_nb_desc;
 				uint16_t sw_tail;
 				uint16_t rs_compl_count;
-				uint8_t expected_gen_id;
 		};
 	};
 };
diff --git a/drivers/net/intel/cpfl/cpfl_ethdev.h b/drivers/net/intel/cpfl/cpfl_ethdev.h
index e05a0901d5..d26b2bb0dc 100644
--- a/drivers/net/intel/cpfl/cpfl_ethdev.h
+++ b/drivers/net/intel/cpfl/cpfl_ethdev.h
@@ -188,7 +188,7 @@ struct cpfl_vport {
 	uint16_t nb_p2p_txq;
 
 	struct idpf_rx_queue *p2p_rx_bufq;
-	struct ci_tx_queue *p2p_tx_complq;
+	struct idpf_complq *p2p_tx_complq;
 	bool p2p_manual_bind;
 };
 
diff --git a/drivers/net/intel/cpfl/cpfl_rxtx.c b/drivers/net/intel/cpfl/cpfl_rxtx.c
index ad622b267d..e7e370a208 100644
--- a/drivers/net/intel/cpfl/cpfl_rxtx.c
+++ b/drivers/net/intel/cpfl/cpfl_rxtx.c
@@ -27,7 +27,7 @@ cpfl_tx_hairpin_descq_reset(struct ci_tx_queue *txq)
 }
 
 static inline void
-cpfl_tx_hairpin_complq_reset(struct ci_tx_queue *cq)
+cpfl_tx_hairpin_complq_reset(struct idpf_complq *cq)
 {
 	uint32_t i, size;
 
@@ -483,7 +483,7 @@ cpfl_tx_complq_setup(struct rte_eth_dev *dev, struct ci_tx_queue *txq,
 	struct cpfl_vport *cpfl_vport = dev->data->dev_private;
 	struct idpf_vport *vport = &cpfl_vport->base;
 	const struct rte_memzone *mz;
-	struct ci_tx_queue *cq;
+	struct idpf_complq *cq;
 	int ret;
 
 	cq = rte_zmalloc_socket("cpfl splitq cq",
@@ -813,7 +813,8 @@ cpfl_tx_hairpin_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 	struct cpfl_txq_hairpin_info *hairpin_info;
 	struct idpf_hw *hw = &adapter_base->hw;
 	struct cpfl_tx_queue *cpfl_txq;
-	struct ci_tx_queue *txq, *cq;
+	struct ci_tx_queue *txq;
+	struct idpf_complq *cq;
 	const struct rte_memzone *mz;
 	uint32_t ring_size;
 	uint16_t peer_port, peer_q;
@@ -894,7 +895,7 @@ cpfl_tx_hairpin_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 				  logic_qid, cpfl_vport->p2p_q_chunks_info->tx_qtail_spacing);
 	if (cpfl_vport->p2p_tx_complq == NULL) {
 		cq = rte_zmalloc_socket("cpfl hairpin cq",
-					sizeof(struct ci_tx_queue),
+					sizeof(struct idpf_complq),
 					RTE_CACHE_LINE_SIZE,
 					dev->device->numa_node);
 		if (!cq) {
@@ -996,7 +997,7 @@ cpfl_hairpin_rxq_config(struct idpf_vport *vport, struct cpfl_rx_queue *cpfl_rxq
 int
 cpfl_hairpin_tx_complq_config(struct cpfl_vport *cpfl_vport)
 {
-	struct ci_tx_queue *tx_complq = cpfl_vport->p2p_tx_complq;
+	struct idpf_complq *tx_complq = cpfl_vport->p2p_tx_complq;
 	struct virtchnl2_txq_info txq_info;
 
 	memset(&txq_info, 0, sizeof(txq_info));
diff --git a/drivers/net/intel/idpf/idpf_common_rxtx.c b/drivers/net/intel/idpf/idpf_common_rxtx.c
index f73716e57c..f69ae8b5f0 100644
--- a/drivers/net/intel/idpf/idpf_common_rxtx.c
+++ b/drivers/net/intel/idpf/idpf_common_rxtx.c
@@ -237,7 +237,7 @@ idpf_qc_split_tx_descq_reset(struct ci_tx_queue *txq)
 
 RTE_EXPORT_INTERNAL_SYMBOL(idpf_qc_split_tx_complq_reset)
 void
-idpf_qc_split_tx_complq_reset(struct ci_tx_queue *cq)
+idpf_qc_split_tx_complq_reset(struct idpf_complq *cq)
 {
 	uint32_t i, size;
 
@@ -782,7 +782,7 @@ idpf_dp_splitq_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 }
 
 static inline void
-idpf_split_tx_free(struct ci_tx_queue *cq)
+idpf_split_tx_free(struct idpf_complq *cq)
 {
 	volatile struct idpf_splitq_tx_compl_desc *compl_ring = cq->compl_ring;
 	volatile struct idpf_splitq_tx_compl_desc *txd;
diff --git a/drivers/net/intel/idpf/idpf_common_rxtx.h b/drivers/net/intel/idpf/idpf_common_rxtx.h
index f512700d5f..b2d33287df 100644
--- a/drivers/net/intel/idpf/idpf_common_rxtx.h
+++ b/drivers/net/intel/idpf/idpf_common_rxtx.h
@@ -181,6 +181,20 @@ struct idpf_rxq_ops {
 	void (*release_mbufs)(struct idpf_rx_queue *rxq);
 };
 
+/* Dedicated completion queue structure for IDPF split queue model. */
+struct idpf_complq {
+	volatile struct idpf_splitq_tx_compl_desc *compl_ring;
+	void **txqs;
+	rte_iova_t tx_ring_dma;
+	const struct rte_memzone *mz;
+	uint32_t tx_start_qid;
+	uint16_t nb_tx_desc;
+	uint16_t tx_tail;
+	uint16_t queue_id;
+	uint16_t port_id;
+	uint8_t expected_gen_id;
+};
+
 extern int idpf_timestamp_dynfield_offset;
 extern uint64_t idpf_timestamp_dynflag;
 
@@ -202,7 +216,7 @@ void idpf_qc_single_rx_queue_reset(struct idpf_rx_queue *rxq);
 __rte_internal
 void idpf_qc_split_tx_descq_reset(struct ci_tx_queue *txq);
 __rte_internal
-void idpf_qc_split_tx_complq_reset(struct ci_tx_queue *cq);
+void idpf_qc_split_tx_complq_reset(struct idpf_complq *cq);
 __rte_internal
 void idpf_splitq_rearm_common(struct idpf_rx_queue *rx_bufq);
 __rte_internal
diff --git a/drivers/net/intel/idpf/idpf_common_rxtx_avx2.c b/drivers/net/intel/idpf/idpf_common_rxtx_avx2.c
index db7728afad..3fb2efdb56 100644
--- a/drivers/net/intel/idpf/idpf_common_rxtx_avx2.c
+++ b/drivers/net/intel/idpf/idpf_common_rxtx_avx2.c
@@ -781,9 +781,9 @@ idpf_dp_singleq_xmit_pkts_avx2(void *tx_queue, struct rte_mbuf **tx_pkts,
 }
 
 static __rte_always_inline void
-idpf_splitq_scan_cq_ring(struct ci_tx_queue *cq)
+idpf_splitq_scan_cq_ring(struct idpf_complq *cq)
 {
-	struct idpf_splitq_tx_compl_desc *compl_ring;
+	volatile struct idpf_splitq_tx_compl_desc *compl_ring;
 	struct ci_tx_queue *txq;
 	uint16_t genid, txq_qid, cq_qid, i;
 	uint8_t ctype;
diff --git a/drivers/net/intel/idpf/idpf_common_rxtx_avx512.c b/drivers/net/intel/idpf/idpf_common_rxtx_avx512.c
index 9af275cd9d..8db4c64106 100644
--- a/drivers/net/intel/idpf/idpf_common_rxtx_avx512.c
+++ b/drivers/net/intel/idpf/idpf_common_rxtx_avx512.c
@@ -1101,9 +1101,9 @@ idpf_dp_singleq_xmit_pkts_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,
 }
 
 static __rte_always_inline void
-idpf_splitq_scan_cq_ring(struct ci_tx_queue *cq)
+idpf_splitq_scan_cq_ring(struct idpf_complq *cq)
 {
-	struct idpf_splitq_tx_compl_desc *compl_ring;
+	volatile struct idpf_splitq_tx_compl_desc *compl_ring;
 	struct ci_tx_queue *txq;
 	uint16_t genid, txq_qid, cq_qid, i;
 	uint8_t ctype;
diff --git a/drivers/net/intel/idpf/idpf_rxtx.c b/drivers/net/intel/idpf/idpf_rxtx.c
index b316c77b62..31005ce210 100644
--- a/drivers/net/intel/idpf/idpf_rxtx.c
+++ b/drivers/net/intel/idpf/idpf_rxtx.c
@@ -360,7 +360,7 @@ idpf_tx_complq_setup(struct rte_eth_dev *dev, struct ci_tx_queue *txq,
 {
 	struct idpf_vport *vport = dev->data->dev_private;
 	const struct rte_memzone *mz;
-	struct ci_tx_queue *cq;
+	struct idpf_complq *cq;
 	int ret;
 
 	cq = rte_zmalloc_socket("idpf splitq cq",
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] net/intel: introduce a dedicated idpf complq struct
  2026-04-06  4:41 [PATCH] net/intel: introduce a dedicated idpf complq struct Shaiq Wani
@ 2026-04-08 15:29 ` Bruce Richardson
  2026-04-08 15:34 ` Bruce Richardson
  1 sibling, 0 replies; 4+ messages in thread
From: Bruce Richardson @ 2026-04-08 15:29 UTC (permalink / raw)
  To: Shaiq Wani; +Cc: dev, aman.deep.singh

On Mon, Apr 06, 2026 at 10:11:25AM +0530, Shaiq Wani wrote:
> The IDPF split-queue completion queue was using ci_tx_queue, a structure
> designed for TX descriptor queues, wasting ~96 bytes per completion
> queue. Additionally, the CQ-only fields (compl_ring, txqs, tx_start_qid,
> expected_gen_id) bloated ci_tx_queue for every other Intel driver that
> shares it.
> 
> Introduce struct idpf_complq with exactly the fields needed by the
> completion queue. This brings the CQ allocation down from ~150 bytes
> to ~48 bytes.
> 
> Suggested-by: Bruce Richardson <bruce.richardson@intel.com>
> Signed-off-by: Shaiq Wani <shaiq.wani@intel.com>
> ---
>  drivers/net/intel/common/tx.h                    | 11 +++--------
>  drivers/net/intel/cpfl/cpfl_ethdev.h             |  2 +-
>  drivers/net/intel/cpfl/cpfl_rxtx.c               | 11 ++++++-----
>  drivers/net/intel/idpf/idpf_common_rxtx.c        |  4 ++--
>  drivers/net/intel/idpf/idpf_common_rxtx.h        | 16 +++++++++++++++-
>  drivers/net/intel/idpf/idpf_common_rxtx_avx2.c   |  4 ++--
>  drivers/net/intel/idpf/idpf_common_rxtx_avx512.c |  4 ++--
>  drivers/net/intel/idpf/idpf_rxtx.c               |  2 +-
>  8 files changed, 32 insertions(+), 22 deletions(-)
>
Recheck-request: iol-unit-arm64-testing, iol-intel-Functional, rebase=next-net-intel

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] net/intel: introduce a dedicated idpf complq struct
  2026-04-06  4:41 [PATCH] net/intel: introduce a dedicated idpf complq struct Shaiq Wani
  2026-04-08 15:29 ` Bruce Richardson
@ 2026-04-08 15:34 ` Bruce Richardson
  2026-04-09 10:45   ` Bruce Richardson
  1 sibling, 1 reply; 4+ messages in thread
From: Bruce Richardson @ 2026-04-08 15:34 UTC (permalink / raw)
  To: Shaiq Wani; +Cc: dev, aman.deep.singh

On Mon, Apr 06, 2026 at 10:11:25AM +0530, Shaiq Wani wrote:
> The IDPF split-queue completion queue was using ci_tx_queue, a structure
> designed for TX descriptor queues, wasting ~96 bytes per completion
> queue. Additionally, the CQ-only fields (compl_ring, txqs, tx_start_qid,
> expected_gen_id) bloated ci_tx_queue for every other Intel driver that
> shares it.
> 
> Introduce struct idpf_complq with exactly the fields needed by the
> completion queue. This brings the CQ allocation down from ~150 bytes
> to ~48 bytes.
> 
> Suggested-by: Bruce Richardson <bruce.richardson@intel.com>
> Signed-off-by: Shaiq Wani <shaiq.wani@intel.com>
> ---
>  drivers/net/intel/common/tx.h                    | 11 +++--------
>  drivers/net/intel/cpfl/cpfl_ethdev.h             |  2 +-
>  drivers/net/intel/cpfl/cpfl_rxtx.c               | 11 ++++++-----
>  drivers/net/intel/idpf/idpf_common_rxtx.c        |  4 ++--
>  drivers/net/intel/idpf/idpf_common_rxtx.h        | 16 +++++++++++++++-
>  drivers/net/intel/idpf/idpf_common_rxtx_avx2.c   |  4 ++--
>  drivers/net/intel/idpf/idpf_common_rxtx_avx512.c |  4 ++--
>  drivers/net/intel/idpf/idpf_rxtx.c               |  2 +-
>  8 files changed, 32 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/net/intel/common/tx.h b/drivers/net/intel/common/tx.h
> index 283bd58d5d..9da9366046 100644
> --- a/drivers/net/intel/common/tx.h
> +++ b/drivers/net/intel/common/tx.h
> @@ -114,6 +114,7 @@ struct ci_tx_desc {
>  
>  /* forward declaration of the common intel (ci) queue structure */
>  struct ci_tx_queue;
> +struct idpf_complq;
>  

Minor nit: You probably want to have a "struct idpf_flex_tx_sched_desc;" line here
too, since you reference that struct below also.
If no other issues with this, I can make that change on apply.

Acked-by: Bruce Richardson <bruce.richardson@intel.com>


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] net/intel: introduce a dedicated idpf complq struct
  2026-04-08 15:34 ` Bruce Richardson
@ 2026-04-09 10:45   ` Bruce Richardson
  0 siblings, 0 replies; 4+ messages in thread
From: Bruce Richardson @ 2026-04-09 10:45 UTC (permalink / raw)
  To: Shaiq Wani; +Cc: dev, aman.deep.singh

On Wed, Apr 08, 2026 at 04:34:32PM +0100, Bruce Richardson wrote:
> On Mon, Apr 06, 2026 at 10:11:25AM +0530, Shaiq Wani wrote:
> > The IDPF split-queue completion queue was using ci_tx_queue, a structure
> > designed for TX descriptor queues, wasting ~96 bytes per completion
> > queue. Additionally, the CQ-only fields (compl_ring, txqs, tx_start_qid,
> > expected_gen_id) bloated ci_tx_queue for every other Intel driver that
> > shares it.
> > 
> > Introduce struct idpf_complq with exactly the fields needed by the
> > completion queue. This brings the CQ allocation down from ~150 bytes
> > to ~48 bytes.
> > 
> > Suggested-by: Bruce Richardson <bruce.richardson@intel.com>
> > Signed-off-by: Shaiq Wani <shaiq.wani@intel.com>
> > ---
> >  drivers/net/intel/common/tx.h                    | 11 +++--------
> >  drivers/net/intel/cpfl/cpfl_ethdev.h             |  2 +-
> >  drivers/net/intel/cpfl/cpfl_rxtx.c               | 11 ++++++-----
> >  drivers/net/intel/idpf/idpf_common_rxtx.c        |  4 ++--
> >  drivers/net/intel/idpf/idpf_common_rxtx.h        | 16 +++++++++++++++-
> >  drivers/net/intel/idpf/idpf_common_rxtx_avx2.c   |  4 ++--
> >  drivers/net/intel/idpf/idpf_common_rxtx_avx512.c |  4 ++--
> >  drivers/net/intel/idpf/idpf_rxtx.c               |  2 +-
> >  8 files changed, 32 insertions(+), 22 deletions(-)
> > 
> > diff --git a/drivers/net/intel/common/tx.h b/drivers/net/intel/common/tx.h
> > index 283bd58d5d..9da9366046 100644
> > --- a/drivers/net/intel/common/tx.h
> > +++ b/drivers/net/intel/common/tx.h
> > @@ -114,6 +114,7 @@ struct ci_tx_desc {
> >  
> >  /* forward declaration of the common intel (ci) queue structure */
> >  struct ci_tx_queue;
> > +struct idpf_complq;
> >  
> 
> Minor nit: You probably want to have a "struct idpf_flex_tx_sched_desc;" line here
> too, since you reference that struct below also.
> If no other issues with this, I can make that change on apply.

Actually, since no other pointer types within the tx_queue struct have
early references, we can instead just remove the new complq one too for
consistency.

> 
> Acked-by: Bruce Richardson <bruce.richardson@intel.com>
> 

Applied to dpdk-next-net-intel.
Thanks,
/Bruce

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2026-04-09 10:46 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-04-06  4:41 [PATCH] net/intel: introduce a dedicated idpf complq struct Shaiq Wani
2026-04-08 15:29 ` Bruce Richardson
2026-04-08 15:34 ` Bruce Richardson
2026-04-09 10:45   ` Bruce Richardson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox