* [PATCH] net/mlx5: cleanup unused olx parameter in Tx free routines
@ 2026-03-23 10:13 Viacheslav Ovsiienko
2026-03-23 12:41 ` [PATCH v2] " Viacheslav Ovsiienko
0 siblings, 1 reply; 3+ messages in thread
From: Viacheslav Ovsiienko @ 2026-03-23 10:13 UTC (permalink / raw)
To: dev; +Cc: rasland, matan, suanmingm, dsosnowski, stable
The olx parameter is intended to be known in compile time and
widely used for static optimizations while generating the tx_burst
rotuines code from the template.
However, in the mlx5_tx_free_mbuf routine we have the olx parameter
provided in runtime only, for all possible execution paths. And the
only intended optimization (that actually does not happen) is to check
whether multi-buf packets are supported. As fast free offload is not
supported for multi-buf packets, we can simplify the code and get rid of
the unused olx parameter in the entire call chain.
Cc: stable@dpdk.org
Signed-off-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
Acked-by: Dariusz Sosnowski <dsosnowski@nvidia.com>
---
drivers/net/mlx5/mlx5_tx.c | 18 +++++-------------
drivers/net/mlx5/mlx5_tx.h | 28 +++++++++-------------------
2 files changed, 14 insertions(+), 32 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_tx.c b/drivers/net/mlx5/mlx5_tx.c
index 8085b5c306..94644bc3b9 100644
--- a/drivers/net/mlx5/mlx5_tx.c
+++ b/drivers/net/mlx5/mlx5_tx.c
@@ -144,14 +144,10 @@ mlx5_tx_error_cqe_handle(struct mlx5_txq_data *__rte_restrict txq,
* Pointer to TX queue structure.
* @param last_cqe
* valid CQE pointer, if not NULL update txq->wqe_pi and flush the buffers.
- * @param olx
- * Configured Tx offloads mask. It is fully defined at
- * compile time and may be used for optimization.
*/
static __rte_always_inline void
mlx5_tx_comp_flush(struct mlx5_txq_data *__rte_restrict txq,
- volatile struct mlx5_cqe *last_cqe,
- unsigned int olx __rte_unused)
+ volatile struct mlx5_cqe *last_cqe)
{
if (likely(last_cqe != NULL)) {
uint16_t tail;
@@ -159,7 +155,7 @@ mlx5_tx_comp_flush(struct mlx5_txq_data *__rte_restrict txq,
txq->wqe_pi = rte_be_to_cpu_16(last_cqe->wqe_counter);
tail = txq->fcqs[(txq->cq_ci - 1) & txq->cqe_m];
if (likely(tail != txq->elts_tail)) {
- mlx5_tx_free_elts(txq, tail, olx);
+ mlx5_tx_free_elts(txq, tail);
MLX5_ASSERT(tail == txq->elts_tail);
}
}
@@ -172,16 +168,12 @@ mlx5_tx_comp_flush(struct mlx5_txq_data *__rte_restrict txq,
*
* @param txq
* Pointer to TX queue structure.
- * @param olx
- * Configured Tx offloads mask. It is fully defined at
- * compile time and may be used for optimization.
*
* NOTE: not inlined intentionally, it makes tx_burst
* routine smaller, simple and faster - from experiments.
*/
void
-mlx5_tx_handle_completion(struct mlx5_txq_data *__rte_restrict txq,
- unsigned int olx __rte_unused)
+mlx5_tx_handle_completion(struct mlx5_txq_data *__rte_restrict txq)
{
unsigned int count = MLX5_TX_COMP_MAX_CQE;
volatile struct mlx5_cqe *last_cqe = NULL;
@@ -259,7 +251,7 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *__rte_restrict txq,
/* Ring doorbell to notify hardware. */
rte_compiler_barrier();
*txq->cq_db = rte_cpu_to_be_32(txq->cq_ci);
- mlx5_tx_comp_flush(txq, last_cqe, olx);
+ mlx5_tx_comp_flush(txq, last_cqe);
}
}
@@ -280,7 +272,7 @@ mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset)
struct mlx5_txq_data *__rte_restrict txq = tx_queue;
uint16_t used;
- mlx5_tx_handle_completion(txq, 0);
+ mlx5_tx_handle_completion(txq);
used = txq->elts_head - txq->elts_tail;
if (offset < used)
return RTE_ETH_TX_DESC_FULL;
diff --git a/drivers/net/mlx5/mlx5_tx.h b/drivers/net/mlx5/mlx5_tx.h
index 0134a2e003..48dbb2d867 100644
--- a/drivers/net/mlx5/mlx5_tx.h
+++ b/drivers/net/mlx5/mlx5_tx.h
@@ -234,8 +234,7 @@ struct mlx5_external_q *mlx5_ext_txq_get(struct rte_eth_dev *dev, uint16_t idx);
/* mlx5_tx.c */
-void mlx5_tx_handle_completion(struct mlx5_txq_data *__rte_restrict txq,
- unsigned int olx __rte_unused);
+void mlx5_tx_handle_completion(struct mlx5_txq_data *__rte_restrict txq);
int mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset);
void mlx5_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
struct rte_eth_txq_info *qinfo);
@@ -526,15 +525,11 @@ txq_ol_cksum_to_cs(struct rte_mbuf *buf)
* Pointer to array of packets to be free.
* @param pkts_n
* Number of packets to be freed.
- * @param olx
- * Configured Tx offloads mask. It is fully defined at
- * compile time and may be used for optimization.
*/
static __rte_always_inline void
mlx5_tx_free_mbuf(struct mlx5_txq_data *__rte_restrict txq,
struct rte_mbuf **__rte_restrict pkts,
- unsigned int pkts_n,
- unsigned int olx __rte_unused)
+ unsigned int pkts_n)
{
struct rte_mempool *pool = NULL;
struct rte_mbuf **p_free = NULL;
@@ -552,7 +547,7 @@ mlx5_tx_free_mbuf(struct mlx5_txq_data *__rte_restrict txq,
* Free mbufs directly to the pool in bulk
* if fast free offload is engaged
*/
- if (!MLX5_TXOFF_CONFIG(MULTI) && txq->fast_free) {
+ if (txq->fast_free) {
mbuf = *pkts;
pool = mbuf->pool;
rte_mempool_put_bulk(pool, (void *)pkts, pkts_n);
@@ -642,10 +637,9 @@ mlx5_tx_free_mbuf(struct mlx5_txq_data *__rte_restrict txq,
static __rte_noinline void
__mlx5_tx_free_mbuf(struct mlx5_txq_data *__rte_restrict txq,
struct rte_mbuf **__rte_restrict pkts,
- unsigned int pkts_n,
- unsigned int olx __rte_unused)
+ unsigned int pkts_n)
{
- mlx5_tx_free_mbuf(txq, pkts, pkts_n, olx);
+ mlx5_tx_free_mbuf(txq, pkts, pkts_n);
}
/**
@@ -655,14 +649,10 @@ __mlx5_tx_free_mbuf(struct mlx5_txq_data *__rte_restrict txq,
* Pointer to Tx queue structure.
* @param tail
* Index in elts to free up to, becomes new elts tail.
- * @param olx
- * Configured Tx offloads mask. It is fully defined at
- * compile time and may be used for optimization.
*/
static __rte_always_inline void
mlx5_tx_free_elts(struct mlx5_txq_data *__rte_restrict txq,
- uint16_t tail,
- unsigned int olx __rte_unused)
+ uint16_t tail)
{
uint16_t n_elts = tail - txq->elts_tail;
@@ -681,7 +671,7 @@ mlx5_tx_free_elts(struct mlx5_txq_data *__rte_restrict txq,
MLX5_ASSERT(part <= txq->elts_s);
mlx5_tx_free_mbuf(txq,
&txq->elts[txq->elts_tail & txq->elts_m],
- part, olx);
+ part);
txq->elts_tail += part;
n_elts -= part;
} while (n_elts);
@@ -3580,7 +3570,7 @@ mlx5_tx_burst_tmpl(struct mlx5_txq_data *__rte_restrict txq,
* - doorbell the NIC about processed CQEs
*/
rte_prefetch0(*(pkts + loc.pkts_sent));
- mlx5_tx_handle_completion(txq, olx);
+ mlx5_tx_handle_completion(txq);
/*
* Calculate the number of available resources - elts and WQEs.
* There are two possible different scenarios:
@@ -3829,7 +3819,7 @@ mlx5_tx_burst_tmpl(struct mlx5_txq_data *__rte_restrict txq,
txq->stats.opackets += loc.pkts_sent;
#endif
if (MLX5_TXOFF_CONFIG(INLINE) && loc.mbuf_free)
- __mlx5_tx_free_mbuf(txq, pkts, loc.mbuf_free, olx);
+ __mlx5_tx_free_mbuf(txq, pkts, loc.mbuf_free);
/* Trace productive bursts only. */
if (__rte_trace_point_fp_is_enabled() && loc.pkts_sent)
rte_pmd_mlx5_trace_tx_exit(mlx5_read_pcibar_clock_from_txq(txq),
--
2.34.1
^ permalink raw reply related [flat|nested] 3+ messages in thread
* [PATCH v2] net/mlx5: cleanup unused olx parameter in Tx free routines
2026-03-23 10:13 [PATCH] net/mlx5: cleanup unused olx parameter in Tx free routines Viacheslav Ovsiienko
@ 2026-03-23 12:41 ` Viacheslav Ovsiienko
2026-03-24 15:28 ` Raslan Darawsheh
0 siblings, 1 reply; 3+ messages in thread
From: Viacheslav Ovsiienko @ 2026-03-23 12:41 UTC (permalink / raw)
To: dev; +Cc: rasland, matan, suanmingm, dsosnowski
The olx parameter is intended to be known in compile time and
widely used for static optimizations while generating the tx_burst
rotuines code from the template.
However, in the mlx5_tx_free_mbuf routine we have the olx parameter
provided in runtime only, for all possible execution paths. And the
only intended optimization (that actually does not happen) is to check
whether multi-buf packets are supported. As fast free offload is not
supported for multi-buf packets, we can simplify the code and get rid of
the unused olx parameter in the entire call chain.
Signed-off-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
Acked-by: Dariusz Sosnowski <dsosnowski@nvidia.com>
---
v2: updated commit message
---
drivers/net/mlx5/mlx5_tx.c | 18 +++++-------------
drivers/net/mlx5/mlx5_tx.h | 28 +++++++++-------------------
2 files changed, 14 insertions(+), 32 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_tx.c b/drivers/net/mlx5/mlx5_tx.c
index 8085b5c306..94644bc3b9 100644
--- a/drivers/net/mlx5/mlx5_tx.c
+++ b/drivers/net/mlx5/mlx5_tx.c
@@ -144,14 +144,10 @@ mlx5_tx_error_cqe_handle(struct mlx5_txq_data *__rte_restrict txq,
* Pointer to TX queue structure.
* @param last_cqe
* valid CQE pointer, if not NULL update txq->wqe_pi and flush the buffers.
- * @param olx
- * Configured Tx offloads mask. It is fully defined at
- * compile time and may be used for optimization.
*/
static __rte_always_inline void
mlx5_tx_comp_flush(struct mlx5_txq_data *__rte_restrict txq,
- volatile struct mlx5_cqe *last_cqe,
- unsigned int olx __rte_unused)
+ volatile struct mlx5_cqe *last_cqe)
{
if (likely(last_cqe != NULL)) {
uint16_t tail;
@@ -159,7 +155,7 @@ mlx5_tx_comp_flush(struct mlx5_txq_data *__rte_restrict txq,
txq->wqe_pi = rte_be_to_cpu_16(last_cqe->wqe_counter);
tail = txq->fcqs[(txq->cq_ci - 1) & txq->cqe_m];
if (likely(tail != txq->elts_tail)) {
- mlx5_tx_free_elts(txq, tail, olx);
+ mlx5_tx_free_elts(txq, tail);
MLX5_ASSERT(tail == txq->elts_tail);
}
}
@@ -172,16 +168,12 @@ mlx5_tx_comp_flush(struct mlx5_txq_data *__rte_restrict txq,
*
* @param txq
* Pointer to TX queue structure.
- * @param olx
- * Configured Tx offloads mask. It is fully defined at
- * compile time and may be used for optimization.
*
* NOTE: not inlined intentionally, it makes tx_burst
* routine smaller, simple and faster - from experiments.
*/
void
-mlx5_tx_handle_completion(struct mlx5_txq_data *__rte_restrict txq,
- unsigned int olx __rte_unused)
+mlx5_tx_handle_completion(struct mlx5_txq_data *__rte_restrict txq)
{
unsigned int count = MLX5_TX_COMP_MAX_CQE;
volatile struct mlx5_cqe *last_cqe = NULL;
@@ -259,7 +251,7 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *__rte_restrict txq,
/* Ring doorbell to notify hardware. */
rte_compiler_barrier();
*txq->cq_db = rte_cpu_to_be_32(txq->cq_ci);
- mlx5_tx_comp_flush(txq, last_cqe, olx);
+ mlx5_tx_comp_flush(txq, last_cqe);
}
}
@@ -280,7 +272,7 @@ mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset)
struct mlx5_txq_data *__rte_restrict txq = tx_queue;
uint16_t used;
- mlx5_tx_handle_completion(txq, 0);
+ mlx5_tx_handle_completion(txq);
used = txq->elts_head - txq->elts_tail;
if (offset < used)
return RTE_ETH_TX_DESC_FULL;
diff --git a/drivers/net/mlx5/mlx5_tx.h b/drivers/net/mlx5/mlx5_tx.h
index 2f4402eb50..016dba0b03 100644
--- a/drivers/net/mlx5/mlx5_tx.h
+++ b/drivers/net/mlx5/mlx5_tx.h
@@ -234,8 +234,7 @@ struct mlx5_external_q *mlx5_ext_txq_get(struct rte_eth_dev *dev, uint16_t idx);
/* mlx5_tx.c */
-void mlx5_tx_handle_completion(struct mlx5_txq_data *__rte_restrict txq,
- unsigned int olx __rte_unused);
+void mlx5_tx_handle_completion(struct mlx5_txq_data *__rte_restrict txq);
int mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset);
void mlx5_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
struct rte_eth_txq_info *qinfo);
@@ -511,15 +510,11 @@ txq_ol_cksum_to_cs(struct rte_mbuf *buf)
* Pointer to array of packets to be free.
* @param pkts_n
* Number of packets to be freed.
- * @param olx
- * Configured Tx offloads mask. It is fully defined at
- * compile time and may be used for optimization.
*/
static __rte_always_inline void
mlx5_tx_free_mbuf(struct mlx5_txq_data *__rte_restrict txq,
struct rte_mbuf **__rte_restrict pkts,
- unsigned int pkts_n,
- unsigned int olx __rte_unused)
+ unsigned int pkts_n)
{
struct rte_mempool *pool = NULL;
struct rte_mbuf **p_free = NULL;
@@ -537,7 +532,7 @@ mlx5_tx_free_mbuf(struct mlx5_txq_data *__rte_restrict txq,
* Free mbufs directly to the pool in bulk
* if fast free offload is engaged
*/
- if (!MLX5_TXOFF_CONFIG(MULTI) && txq->fast_free) {
+ if (txq->fast_free) {
mbuf = *pkts;
pool = mbuf->pool;
rte_mempool_put_bulk(pool, (void *)pkts, pkts_n);
@@ -627,10 +622,9 @@ mlx5_tx_free_mbuf(struct mlx5_txq_data *__rte_restrict txq,
static __rte_noinline void
__mlx5_tx_free_mbuf(struct mlx5_txq_data *__rte_restrict txq,
struct rte_mbuf **__rte_restrict pkts,
- unsigned int pkts_n,
- unsigned int olx __rte_unused)
+ unsigned int pkts_n)
{
- mlx5_tx_free_mbuf(txq, pkts, pkts_n, olx);
+ mlx5_tx_free_mbuf(txq, pkts, pkts_n);
}
/**
@@ -640,14 +634,10 @@ __mlx5_tx_free_mbuf(struct mlx5_txq_data *__rte_restrict txq,
* Pointer to Tx queue structure.
* @param tail
* Index in elts to free up to, becomes new elts tail.
- * @param olx
- * Configured Tx offloads mask. It is fully defined at
- * compile time and may be used for optimization.
*/
static __rte_always_inline void
mlx5_tx_free_elts(struct mlx5_txq_data *__rte_restrict txq,
- uint16_t tail,
- unsigned int olx __rte_unused)
+ uint16_t tail)
{
uint16_t n_elts = tail - txq->elts_tail;
@@ -666,7 +656,7 @@ mlx5_tx_free_elts(struct mlx5_txq_data *__rte_restrict txq,
MLX5_ASSERT(part <= txq->elts_s);
mlx5_tx_free_mbuf(txq,
&txq->elts[txq->elts_tail & txq->elts_m],
- part, olx);
+ part);
txq->elts_tail += part;
n_elts -= part;
} while (n_elts);
@@ -3565,7 +3555,7 @@ mlx5_tx_burst_tmpl(struct mlx5_txq_data *__rte_restrict txq,
* - doorbell the NIC about processed CQEs
*/
rte_prefetch0(*(pkts + loc.pkts_sent));
- mlx5_tx_handle_completion(txq, olx);
+ mlx5_tx_handle_completion(txq);
/*
* Calculate the number of available resources - elts and WQEs.
* There are two possible different scenarios:
@@ -3814,7 +3804,7 @@ mlx5_tx_burst_tmpl(struct mlx5_txq_data *__rte_restrict txq,
txq->stats.opackets += loc.pkts_sent;
#endif
if (MLX5_TXOFF_CONFIG(INLINE) && loc.mbuf_free)
- __mlx5_tx_free_mbuf(txq, pkts, loc.mbuf_free, olx);
+ __mlx5_tx_free_mbuf(txq, pkts, loc.mbuf_free);
/* Trace productive bursts only. */
if (__rte_trace_point_fp_is_enabled() && loc.pkts_sent)
rte_pmd_mlx5_trace_tx_exit(mlx5_read_pcibar_clock_from_txq(txq),
--
2.34.1
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH v2] net/mlx5: cleanup unused olx parameter in Tx free routines
2026-03-23 12:41 ` [PATCH v2] " Viacheslav Ovsiienko
@ 2026-03-24 15:28 ` Raslan Darawsheh
0 siblings, 0 replies; 3+ messages in thread
From: Raslan Darawsheh @ 2026-03-24 15:28 UTC (permalink / raw)
To: Viacheslav Ovsiienko, dev; +Cc: matan, suanmingm, dsosnowski
Hi,
On 23/03/2026 2:41 PM, Viacheslav Ovsiienko wrote:
> The olx parameter is intended to be known in compile time and
> widely used for static optimizations while generating the tx_burst
> rotuines code from the template.
>
> However, in the mlx5_tx_free_mbuf routine we have the olx parameter
> provided in runtime only, for all possible execution paths. And the
> only intended optimization (that actually does not happen) is to check
> whether multi-buf packets are supported. As fast free offload is not
> supported for multi-buf packets, we can simplify the code and get rid of
> the unused olx parameter in the entire call chain.
>
> Signed-off-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
> Acked-by: Dariusz Sosnowski <dsosnowski@nvidia.com>
>
Patch applied to next-net-mlx,
Kindest regards
Raslan Darawsheh
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2026-03-24 15:28 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-03-23 10:13 [PATCH] net/mlx5: cleanup unused olx parameter in Tx free routines Viacheslav Ovsiienko
2026-03-23 12:41 ` [PATCH v2] " Viacheslav Ovsiienko
2026-03-24 15:28 ` Raslan Darawsheh
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox