All of lore.kernel.org
 help / color / mirror / Atom feed
From: Konstantin Ananyev <konstantin.ananyev@huawei.com>
To: "Morten Brørup" <mb@smartsharesystems.com>,
	"Bruce Richardson" <bruce.richardson@intel.com>,
	"dev@dpdk.org" <dev@dpdk.org>
Subject: RE: [PATCH] net/i40e: Fast release optimizations
Date: Mon, 30 Jun 2025 11:40:55 +0000	[thread overview]
Message-ID: <cda18d9197ed4ee9bd5ed73fd02bd032@huawei.com> (raw)
In-Reply-To: <20250624061238.89259-1-mb@smartsharesystems.com>



> When fast releasing mbufs, the mbufs are not accessed, so do not prefetch
> them.
> This saves a mbuf load operation for each fast released TX mbuf.
> 
> When fast release of mbufs is enabled for a TX queue, cache the mbuf
> mempool pointer in the TX queue structure.
> This saves one mbuf load operation for each burst of fast released TX
> mbufs.
> 
> The txep->mbuf pointer is not used after the mbuf has been freed, so do
> not reset the pointer.
> This saves a txep store operation for each TX mbuf freed.
> 
> Signed-off-by: Morten Brørup <mb@smartsharesystems.com>
> ---
>  drivers/net/intel/common/tx.h                 |  5 +++
>  .../i40e/i40e_recycle_mbufs_vec_common.c      |  4 +-
>  drivers/net/intel/i40e/i40e_rxtx.c            | 39 ++++++++++---------
>  3 files changed, 28 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/net/intel/common/tx.h b/drivers/net/intel/common/tx.h
> index b0a68bae44..54c9b845f7 100644
> --- a/drivers/net/intel/common/tx.h
> +++ b/drivers/net/intel/common/tx.h
> @@ -62,6 +62,11 @@ struct ci_tx_queue {
>  	uint16_t tx_next_dd;
>  	uint16_t tx_next_rs;
>  	uint64_t offloads;
> +	/* Mempool pointer for fast release of mbufs.
> +	 * NULL if disabled, UINTPTR_MAX if enabled and not yet known.
> +	 * Initialized at first use.
> +	 */
> +	struct rte_mempool *fast_free_mp;
>  	uint64_t mbuf_errors;
>  	rte_iova_t tx_ring_dma;        /* TX ring DMA address */
>  	bool tx_deferred_start; /* don't start this queue in dev start */
> diff --git a/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c b/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c
> index 2875c578af..a46605cee9 100644
> --- a/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c
> +++ b/drivers/net/intel/i40e/i40e_recycle_mbufs_vec_common.c
> @@ -106,7 +106,9 @@ i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue,
>  	if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
>  		/* Avoid txq contains buffers from unexpected mempool. */
>  		if (unlikely(recycle_rxq_info->mp
> -					!= txep[0].mbuf->pool))
> +				!= (likely(txq->fast_free_mp != (void *)UINTPTR_MAX) ?
> +				txq->fast_free_mp :
> +				(txq->fast_free_mp = txep[0].mbuf->pool))))
>  			return 0;
> 
>  		/* Directly put mbufs from Tx to Rx. */
> diff --git a/drivers/net/intel/i40e/i40e_rxtx.c b/drivers/net/intel/i40e/i40e_rxtx.c
> index c3ff2e05c3..679c1340b8 100644
> --- a/drivers/net/intel/i40e/i40e_rxtx.c
> +++ b/drivers/net/intel/i40e/i40e_rxtx.c
> @@ -1332,7 +1332,7 @@ static __rte_always_inline int
>  i40e_tx_free_bufs(struct ci_tx_queue *txq)
>  {
>  	struct ci_tx_entry *txep;
> -	uint16_t tx_rs_thresh = txq->tx_rs_thresh;
> +	const uint16_t tx_rs_thresh = txq->tx_rs_thresh;
>  	uint16_t i = 0, j = 0;
>  	struct rte_mbuf *free[RTE_I40E_TX_MAX_FREE_BUF_SZ];
>  	const uint16_t k = RTE_ALIGN_FLOOR(tx_rs_thresh, RTE_I40E_TX_MAX_FREE_BUF_SZ);
> @@ -1345,41 +1345,40 @@ i40e_tx_free_bufs(struct ci_tx_queue *txq)
> 
>  	txep = &txq->sw_ring[txq->tx_next_dd - (tx_rs_thresh - 1)];
> 
> -	for (i = 0; i < tx_rs_thresh; i++)
> -		rte_prefetch0((txep + i)->mbuf);
> -
>  	if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
> +		struct rte_mempool * const fast_free_mp =
> +				likely(txq->fast_free_mp != (void *)UINTPTR_MAX) ?
> +				txq->fast_free_mp :
> +				(txq->fast_free_mp = txep[0].mbuf->pool);
> +

Nit idea.
Acked-by: Konstantin Ananyev <konstantin.ananyev@huawei.com>

Just as a suggestion for further improvement:
can we update (& check) txq->fast_free_mp not at tx_free_bufs() time,
but when we fill txep[] and filling txd[] based on mbuf values?
In theory it should allow to remove the check above.
Also, again in theory, it opens opportunity (with some extra effort) to use  similar optimization rte_mempool_put_bulk)
even for cases when  RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE is not set. 
 

>  		if (k) {
>  			for (j = 0; j != k; j += RTE_I40E_TX_MAX_FREE_BUF_SZ) {
> -				for (i = 0; i < RTE_I40E_TX_MAX_FREE_BUF_SZ; ++i, ++txep) {
> +				for (i = 0; i < RTE_I40E_TX_MAX_FREE_BUF_SZ; ++i, ++txep)
>  					free[i] = txep->mbuf;
> -					txep->mbuf = NULL;
> -				}
> -				rte_mempool_put_bulk(free[0]->pool, (void **)free,
> +				rte_mempool_put_bulk(fast_free_mp, (void **)free,
>  						RTE_I40E_TX_MAX_FREE_BUF_SZ);
>  			}
>  		}
> 
>  		if (m) {
> -			for (i = 0; i < m; ++i, ++txep) {
> +			for (i = 0; i < m; ++i, ++txep)
>  				free[i] = txep->mbuf;
> -				txep->mbuf = NULL;
> -			}
> -			rte_mempool_put_bulk(free[0]->pool, (void **)free, m);
> +			rte_mempool_put_bulk(fast_free_mp, (void **)free, m);
>  		}
>  	} else {
> -		for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
> +		for (i = 0; i < tx_rs_thresh; i++)
> +			rte_prefetch0((txep + i)->mbuf);
> +
> +		for (i = 0; i < tx_rs_thresh; ++i, ++txep)
>  			rte_pktmbuf_free_seg(txep->mbuf);
> -			txep->mbuf = NULL;
> -		}
>  	}
> 
> -	txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
> -	txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
> +	txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + tx_rs_thresh);
> +	txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + tx_rs_thresh);
>  	if (txq->tx_next_dd >= txq->nb_tx_desc)
> -		txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
> +		txq->tx_next_dd = (uint16_t)(tx_rs_thresh - 1);
> 
> -	return txq->tx_rs_thresh;
> +	return tx_rs_thresh;
>  }
> 
>  /* Populate 4 descriptors with data from 4 mbufs */
> @@ -2546,6 +2545,8 @@ i40e_dev_tx_queue_setup(struct rte_eth_dev *dev,
>  	txq->reg_idx = reg_idx;
>  	txq->port_id = dev->data->port_id;
>  	txq->offloads = offloads;
> +	txq->fast_free_mp = offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE ?
> +			(void *)UINTPTR_MAX : NULL;
>  	txq->i40e_vsi = vsi;
>  	txq->tx_deferred_start = tx_conf->tx_deferred_start;
> 
> --
> 2.43.0


  parent reply	other threads:[~2025-06-30 11:41 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-06-24  6:12 [PATCH] net/i40e: Fast release optimizations Morten Brørup
2025-06-25 10:47 ` Bruce Richardson
2025-06-25 11:15   ` Morten Brørup
2025-06-30 11:40 ` Konstantin Ananyev [this message]
2025-06-30 13:45   ` Morten Brørup
2025-06-30 16:06     ` Morten Brørup
2025-07-01  7:31       ` Morten Brørup
2025-07-01  8:16       ` Konstantin Ananyev
2025-07-01  9:09         ` Morten Brørup
2025-07-03  8:12           ` Konstantin Ananyev
2025-07-03 12:02             ` Morten Brørup
2025-08-11 12:47 ` Morten Brørup
2025-12-10 17:00   ` Bruce Richardson
2025-12-11 21:45     ` Morten Brørup
2025-12-12 11:16     ` Morten Brørup

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=cda18d9197ed4ee9bd5ed73fd02bd032@huawei.com \
    --to=konstantin.ananyev@huawei.com \
    --cc=bruce.richardson@intel.com \
    --cc=dev@dpdk.org \
    --cc=mb@smartsharesystems.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.