public inbox for dev@dpdk.org
 help / color / mirror / Atom feed
From: Bruce Richardson <bruce.richardson@intel.com>
To: dev@dpdk.org
Cc: mb@smartsharesystems.com, Bruce Richardson <bruce.richardson@intel.com>
Subject: [PATCH v2] net/intel: optimize for fast-free hint
Date: Wed,  8 Apr 2026 14:25:15 +0100	[thread overview]
Message-ID: <20260408132515.1314728-1-bruce.richardson@intel.com> (raw)
In-Reply-To: <20260123112032.2174361-1-bruce.richardson@intel.com>

When the fast-free hint is provided to the driver we know that the mbufs
have refcnt of 1 and are from the same mempool. Therefore, we can
optimize a bit for this case by:

* resetting the necessary mbuf fields, ie. nb_seg and next pointer when
  we are accessing the mbuf on writing the descriptor.
* on cleanup of buffers after transmit, we can just write those buffers
  straight to the mempool without accessing them.

Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
V2: Fix issues with original submission:
* missed check for NULL mbufs
* fixed issue with freeing directly from sw_ring in scalar path which
  doesn't work as thats not a flag array of pointers
* fixed missing null assignment in case of large segments for TSO
---
 drivers/net/intel/common/tx.h        | 21 ++++--
 drivers/net/intel/common/tx_scalar.h | 95 ++++++++++++++++++++++------
 2 files changed, 90 insertions(+), 26 deletions(-)

diff --git a/drivers/net/intel/common/tx.h b/drivers/net/intel/common/tx.h
index 283bd58d5d..f2123f069c 100644
--- a/drivers/net/intel/common/tx.h
+++ b/drivers/net/intel/common/tx.h
@@ -363,13 +363,22 @@ ci_txq_release_all_mbufs(struct ci_tx_queue *txq, bool use_ctx)
 		return;
 
 	if (!txq->use_vec_entry) {
-		/* Regular scalar path uses sw_ring with ci_tx_entry */
-		for (uint16_t i = 0; i < txq->nb_tx_desc; i++) {
-			if (txq->sw_ring[i].mbuf != NULL) {
-				rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
-				txq->sw_ring[i].mbuf = NULL;
-			}
+		/* Free mbufs from (last_desc_cleaned + 1) to (tx_tail - 1). */
+		const uint16_t start = (txq->last_desc_cleaned + 1) % txq->nb_tx_desc;
+		const uint16_t nb_desc = txq->nb_tx_desc;
+		const uint16_t end = txq->tx_tail;
+
+		uint16_t i = start;
+		if (end < i) {
+			for (; i < nb_desc; i++)
+				if (txq->sw_ring[i].mbuf != NULL)
+					rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
+			i = 0;
 		}
+		for (; i < end; i++)
+			if (txq->sw_ring[i].mbuf != NULL)
+				rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
+		memset(txq->sw_ring, 0, sizeof(txq->sw_ring[0]) * nb_desc);
 		return;
 	}
 
diff --git a/drivers/net/intel/common/tx_scalar.h b/drivers/net/intel/common/tx_scalar.h
index 9fcd2e4733..adbc4bafee 100644
--- a/drivers/net/intel/common/tx_scalar.h
+++ b/drivers/net/intel/common/tx_scalar.h
@@ -197,16 +197,63 @@ ci_tx_xmit_cleanup(struct ci_tx_queue *txq)
 	const uint16_t rs_idx = (last_desc_cleaned == nb_tx_desc - 1) ?
 			0 :
 			(last_desc_cleaned + 1) >> txq->log2_rs_thresh;
-	uint16_t desc_to_clean_to = (rs_idx << txq->log2_rs_thresh) + (txq->tx_rs_thresh - 1);
+	const uint16_t dd_idx = txq->rs_last_id[rs_idx];
+	const uint16_t first_to_clean = rs_idx << txq->log2_rs_thresh;
 
-	/* Check if descriptor is done  */
-	if ((txd[txq->rs_last_id[rs_idx]].cmd_type_offset_bsz &
-			rte_cpu_to_le_64(CI_TXD_QW1_DTYPE_M)) !=
-				rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DESC_DONE))
+	/* Check if descriptor is done - all drivers use 0xF as done value in bits 3:0 */
+	if ((txd[dd_idx].cmd_type_offset_bsz & rte_cpu_to_le_64(CI_TXD_QW1_DTYPE_M)) !=
+			rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DESC_DONE))
+		/* Descriptor not yet processed by hardware */
 		return -1;
 
+	/* DD bit is set, descriptors are done. Now free the mbufs. */
+	/* Note: nb_tx_desc is guaranteed to be a multiple of tx_rs_thresh,
+	 * validated during queue setup. This means cleanup never wraps around
+	 * the ring within a single burst (e.g., ring=256, rs_thresh=32 gives
+	 * bursts of 0-31, 32-63, ..., 224-255).
+	 */
+	const uint16_t nb_to_clean = txq->tx_rs_thresh;
+	struct ci_tx_entry *sw_ring = txq->sw_ring;
+
+	if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
+		/* FAST_FREE path: mbufs are already reset, just return to pool */
+		void *free[CI_TX_MAX_FREE_BUF_SZ];
+		uint16_t nb_free = 0;
+
+		/* Get cached mempool pointer, or cache it on first use */
+		struct rte_mempool *mp =
+			likely(txq->fast_free_mp != (void *)UINTPTR_MAX) ?
+			txq->fast_free_mp :
+			(txq->fast_free_mp = sw_ring[dd_idx].mbuf->pool);
+
+		/* Pack non-NULL mbufs in-place at start of sw_ring range.
+		 * No modulo needed in loop since we're guaranteed not to wrap.
+		 */
+		for (uint16_t i = 0; i < nb_to_clean; i++) {
+			struct rte_mbuf *m = sw_ring[first_to_clean + i].mbuf;
+			if (m == NULL)
+				continue;
+			free[nb_free++] = m;
+			if (unlikely(nb_free == CI_TX_MAX_FREE_BUF_SZ)) {
+				rte_mempool_put_bulk(mp, free, nb_free);
+				nb_free = 0;
+			}
+		}
+
+		/* Bulk return to mempool using packed sw_ring entries directly */
+		if (nb_free > 0)
+			rte_mempool_put_bulk(mp, free, nb_free);
+	} else {
+		/* Non-FAST_FREE path: use prefree_seg for refcount checks */
+		for (uint16_t i = 0; i < nb_to_clean; i++) {
+			struct rte_mbuf *m = sw_ring[first_to_clean + i].mbuf;
+			if (m != NULL)
+				rte_pktmbuf_free_seg(m);
+		}
+	}
+
 	/* Update the txq to reflect the last descriptor that was cleaned */
-	txq->last_desc_cleaned = desc_to_clean_to;
+	txq->last_desc_cleaned = first_to_clean + txq->tx_rs_thresh - 1;
 	txq->nb_tx_free += txq->tx_rs_thresh;
 
 	return 0;
@@ -450,8 +497,6 @@ ci_xmit_pkts(struct ci_tx_queue *txq,
 			txd = &ci_tx_ring[tx_id];
 			tx_id = txe->next_id;
 
-			if (txe->mbuf)
-				rte_pktmbuf_free_seg(txe->mbuf);
 			txe->mbuf = tx_pkt;
 			/* Setup TX Descriptor */
 			td_cmd |= CI_TX_DESC_CMD_EOP;
@@ -472,10 +517,7 @@ ci_xmit_pkts(struct ci_tx_queue *txq,
 
 			txn = &sw_ring[txe->next_id];
 			RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
-			if (txe->mbuf) {
-				rte_pktmbuf_free_seg(txe->mbuf);
-				txe->mbuf = NULL;
-			}
+			txe->mbuf = NULL;
 
 			write_txd(ctx_txd, cd_qw0, cd_qw1);
 
@@ -489,10 +531,7 @@ ci_xmit_pkts(struct ci_tx_queue *txq,
 
 			txn = &sw_ring[txe->next_id];
 			RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
-			if (txe->mbuf) {
-				rte_pktmbuf_free_seg(txe->mbuf);
-				txe->mbuf = NULL;
-			}
+			txe->mbuf = NULL;
 
 			ipsec_txd[0] = ipsec_qw0;
 			ipsec_txd[1] = ipsec_qw1;
@@ -507,10 +546,21 @@ ci_xmit_pkts(struct ci_tx_queue *txq,
 			txd = &ci_tx_ring[tx_id];
 			txn = &sw_ring[txe->next_id];
 
-			if (txe->mbuf)
-				rte_pktmbuf_free_seg(txe->mbuf);
 			txe->mbuf = m_seg;
 
+			/* For FAST_FREE: reset mbuf fields while we have it in cache.
+			 * FAST_FREE guarantees refcnt=1 and direct mbufs, so we only
+			 * need to reset nb_segs and next pointer as per rte_pktmbuf_prefree_seg.
+			 * Save next pointer before resetting since we need it for loop iteration.
+			 */
+			struct rte_mbuf *next_seg = m_seg->next;
+			if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
+				if (m_seg->nb_segs != 1)
+					m_seg->nb_segs = 1;
+				if (next_seg != NULL)
+					m_seg->next = NULL;
+			}
+
 			/* Setup TX Descriptor */
 			/* Calculate segment length, using IPsec callback if provided */
 			if (ipsec_ops != NULL)
@@ -528,18 +578,23 @@ ci_xmit_pkts(struct ci_tx_queue *txq,
 					((uint64_t)CI_MAX_DATA_PER_TXD << CI_TXD_QW1_TX_BUF_SZ_S) |
 					((uint64_t)td_tag << CI_TXD_QW1_L2TAG1_S);
 				write_txd(txd, buf_dma_addr, cmd_type_offset_bsz);
+				/* txe for this slot has already been written (e.g. above outside
+				 * loop), so we write the extra NULL mbuf pointer for this
+				 * descriptor after we increment txe below.
+				 */
 
 				buf_dma_addr += CI_MAX_DATA_PER_TXD;
 				slen -= CI_MAX_DATA_PER_TXD;
 
 				tx_id = txe->next_id;
 				txe = txn;
+				txe->mbuf = NULL;
 				txd = &ci_tx_ring[tx_id];
 				txn = &sw_ring[txe->next_id];
 			}
 
 			/* fill the last descriptor with End of Packet (EOP) bit */
-			if (m_seg->next == NULL)
+			if (next_seg == NULL)
 				td_cmd |= CI_TX_DESC_CMD_EOP;
 
 			const uint64_t cmd_type_offset_bsz = CI_TX_DESC_DTYPE_DATA |
@@ -551,7 +606,7 @@ ci_xmit_pkts(struct ci_tx_queue *txq,
 
 			tx_id = txe->next_id;
 			txe = txn;
-			m_seg = m_seg->next;
+			m_seg = next_seg;
 		} while (m_seg);
 end_pkt:
 		txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
-- 
2.51.0


  parent reply	other threads:[~2026-04-08 13:25 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-12-15 11:06 mbuf fast-free requirements analysis Morten Brørup
2025-12-15 11:46 ` Bruce Richardson
2026-01-14 15:31   ` Morten Brørup
2026-01-14 16:36     ` Bruce Richardson
2026-01-14 18:05       ` Morten Brørup
2026-01-15  8:46         ` Bruce Richardson
2026-01-15  9:04           ` Morten Brørup
2026-01-23 11:20     ` [PATCH] net/intel: optimize for fast-free hint Bruce Richardson
2026-01-23 12:05       ` Morten Brørup
2026-01-23 12:09         ` Bruce Richardson
2026-01-23 12:27           ` Morten Brørup
2026-01-23 12:53             ` Bruce Richardson
2026-01-23 13:06               ` Morten Brørup
2026-04-08 13:25       ` Bruce Richardson [this message]
2026-04-08 19:27         ` [PATCH v2] " Morten Brørup
2026-01-23 11:33     ` mbuf fast-free requirements analysis Bruce Richardson
2025-12-15 14:41 ` Konstantin Ananyev
2025-12-15 16:14   ` Morten Brørup
2025-12-19 17:08     ` Konstantin Ananyev
2025-12-20  7:33       ` Morten Brørup
2025-12-22 15:22         ` Konstantin Ananyev
2025-12-22 17:11           ` Morten Brørup
2025-12-22 17:43             ` Bruce Richardson
2026-01-13 14:48               ` Konstantin Ananyev
2026-01-13 16:07                 ` Stephen Hemminger
2026-01-14 17:01 ` Bruce Richardson
2026-01-14 17:31   ` Morten Brørup
2026-01-14 17:45     ` Bruce Richardson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260408132515.1314728-1-bruce.richardson@intel.com \
    --to=bruce.richardson@intel.com \
    --cc=dev@dpdk.org \
    --cc=mb@smartsharesystems.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox