* [PATCH net] net/mlx5e: Precompute xdpsq assignments for mlx5e_xdp_xmit()
@ 2026-02-23 0:05 Finn Dayton
2026-02-23 13:46 ` Tariq Toukan
2026-02-24 7:32 ` Tariq Toukan
0 siblings, 2 replies; 5+ messages in thread
From: Finn Dayton @ 2026-02-23 0:05 UTC (permalink / raw)
To: netdev@vger.kernel.org
Cc: Alexei Starovoitov ,, Daniel Borkmann ,, David S. Miller ,,
Jakub Kicinski ,, Jesper Dangaard Brouer ,, John Fastabend ,,
Stanislav Fomichev ,, Saeed Mahameed ,, Leon Romanovsky ,,
Tariq Toukan ,, Mark Bloch ,, Andrew Lunn ,, Eric Dumazet ,,
Paolo Abeni ,, stable@vger.kernel.org, bpf@vger.kernel.org,
linux-rdma@vger.kernel.org, linux-kernel@vger.kernel.org
mlx5e_xdp_xmit() selects an XDP SQ (Send Queue) using smp_processor_id()
(CPU ID). When doing XDP_REDIRECT from a CPU whose ID is
>= priv->channels.num, mlx5e_xdp_xmit() returns -ENXIO and the
redirect fails.
Previous discussion proposed using modulo in mlx5e_xdp_xmit() to map
CPU IDs into the channel range, but modulo/division is too costly in
the hot path.
Instead, this solution precomputes per-cpu priv->xdpsq assignments when
channels are (re)configured and does a single lookup in mlx5e_xdp_xmit().
Because multiple CPUs map to the same xdpsq when CPU count exceeds
channel count, serialize xdp_xmit on the ring with xdp_tx_lock.
Fixes: 58b99ee3e3eb ("net/mlx5e: Add support for XDP_REDIRECT in device-out side")
Link: https://lore.kernel.org/netdev/20251031231038.1092673-1-zijianzhang@bytedance.com/
Link: https://lore.kernel.org/netdev/44f69955-b566-4fb1-904d-f551046ff2d4@gmail.com
Cc: stable@vger.kernel.org # 6.12+
Signed-off-by: Finn Dayton <finnius.dayton@spacex.com>
---
Testing:
- XDP forwarding / XDP_REDIRECT verified with both low CPU ids and
CPU ids > than number of send queues.
- No -ENXIO observed, successful forwarding.
drivers/net/ethernet/mellanox/mlx5/core/en.h | 4 +++
.../net/ethernet/mellanox/mlx5/core/en/xdp.c | 16 +++++++----
.../net/ethernet/mellanox/mlx5/core/en_main.c | 28 +++++++++++++++++++
3 files changed, 43 insertions(+), 5 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index ea2cd1f5d1d0..387954201640 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -519,6 +519,8 @@ struct mlx5e_xdpsq {
/* control path */
struct mlx5_wq_ctrl wq_ctrl;
struct mlx5e_channel *channel;
+ /* serialize writes by multiple CPUs to this send queue */
+ spinlock_t xdp_tx_lock;
} ____cacheline_aligned_in_smp;
struct mlx5e_xdp_buff {
@@ -909,6 +911,8 @@ struct mlx5e_priv {
struct mlx5e_rq drop_rq;
struct mlx5e_channels channels;
+ /* selects the xdpsq during mlx5e_xdp_xmit() */
+ int __percpu *send_queue_idx_ptr;
struct mlx5e_rx_res *rx_res;
u32 *tx_rates;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index 80f9fc10877a..2dd44ad873a1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -845,7 +845,7 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5e_xdpsq *sq;
int nxmit = 0;
- int sq_num;
+ int send_queue_idx = 0;
int i;
/* this flag is sufficient, no need to test internal sq state */
@@ -855,13 +855,19 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
return -EINVAL;
- sq_num = smp_processor_id();
- if (unlikely(sq_num >= priv->channels.num))
+ if (unlikely(!priv->send_queue_idx_ptr))
return -ENXIO;
- sq = priv->channels.c[sq_num]->xdpsq;
+ send_queue_idx = *this_cpu_ptr(priv->send_queue_idx_ptr);
+ if (unlikely(send_queue_idx >= priv->channels.num || send_queue_idx < 0))
+ return -ENXIO;
+ sq = priv->channels.c[send_queue_idx]->xdpsq;
+ /* The number of queues configured on a netdev may be smaller than the
+ * CPU pool, so two CPUs might map to this queue. We must serialize writes.
+ */
+ spin_lock(&sq->xdp_tx_lock);
for (i = 0; i < n; i++) {
struct mlx5e_xmit_data_frags xdptxdf = {};
struct xdp_frame *xdpf = frames[i];
@@ -941,7 +947,7 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
if (flags & XDP_XMIT_FLUSH)
mlx5e_xmit_xdp_doorbell(sq);
-
+ spin_unlock(&sq->xdp_tx_lock);
return nxmit;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 7eb691c2a1bd..adef35d06b89 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1492,6 +1492,7 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
sq->pdev = c->pdev;
sq->mkey_be = c->mkey_be;
sq->channel = c;
+ spin_lock_init(&sq->xdp_tx_lock);
sq->uar_map = c->bfreg->map;
sq->min_inline_mode = params->tx_min_inline_mode;
sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu) - ETH_FCS_LEN;
@@ -3283,10 +3284,30 @@ static void mlx5e_build_txq_maps(struct mlx5e_priv *priv)
smp_wmb();
}
+static void build_priv_to_xdpsq_associations(struct mlx5e_priv *priv)
+{
+ /*
+ * Build the mapping from CPU to XDP send queue index for priv.
+ * This is used by mlx5e_xdp_xmit() to determine which xdpsq (send queue)
+ * should handle the xdptx data, based on the CPU running mlx5e_xdp_xmit()
+ * and the target priv (netdev).
+ */
+ int send_queue_idx, cpu;
+
+ if (unlikely(priv->channels.num == 0))
+ return;
+
+ for_each_possible_cpu(cpu) {
+ send_queue_idx = cpu % priv->channels.num;
+ *per_cpu_ptr(priv->send_queue_idx_ptr, cpu) = send_queue_idx;
+ }
+}
+
void mlx5e_activate_priv_channels(struct mlx5e_priv *priv)
{
mlx5e_build_txq_maps(priv);
mlx5e_activate_channels(priv, &priv->channels);
+ build_priv_to_xdpsq_associations(priv);
mlx5e_xdp_tx_enable(priv);
/* dev_watchdog() wants all TX queues to be started when the carrier is
@@ -6263,8 +6284,14 @@ int mlx5e_priv_init(struct mlx5e_priv *priv,
if (!priv->fec_ranges)
goto err_free_channel_stats;
+ priv->send_queue_idx_ptr = alloc_percpu(int);
+ if (!priv->send_queue_idx_ptr)
+ goto err_free_fec_ranges;
+
return 0;
+err_free_fec_ranges:
+ kfree(priv->fec_ranges);
err_free_channel_stats:
kfree(priv->channel_stats);
err_free_tx_rates:
@@ -6295,6 +6322,7 @@ void mlx5e_priv_cleanup(struct mlx5e_priv *priv)
for (i = 0; i < priv->stats_nch; i++)
kvfree(priv->channel_stats[i]);
kfree(priv->channel_stats);
+ free_percpu(priv->send_queue_idx_ptr);
kfree(priv->tx_rates);
kfree(priv->txq2sq_stats);
kfree(priv->txq2sq);
--
2.43.0
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH net] net/mlx5e: Precompute xdpsq assignments for mlx5e_xdp_xmit()
2026-02-23 0:05 [PATCH net] net/mlx5e: Precompute xdpsq assignments for mlx5e_xdp_xmit() Finn Dayton
@ 2026-02-23 13:46 ` Tariq Toukan
2026-02-24 4:32 ` Finn Dayton
2026-02-24 7:32 ` Tariq Toukan
1 sibling, 1 reply; 5+ messages in thread
From: Tariq Toukan @ 2026-02-23 13:46 UTC (permalink / raw)
To: Finn Dayton, netdev@vger.kernel.org
Cc: Alexei Starovoitov ,, Daniel Borkmann ,, David S. Miller ,,
Jakub Kicinski ,, Jesper Dangaard Brouer ,, John Fastabend ,,
Stanislav Fomichev ,, Saeed Mahameed ,, Leon Romanovsky ,,
Tariq Toukan ,, Mark Bloch ,, Andrew Lunn ,, Eric Dumazet ,,
Paolo Abeni ,, stable@vger.kernel.org, bpf@vger.kernel.org,
linux-rdma@vger.kernel.org, linux-kernel@vger.kernel.org
On 23/02/2026 2:05, Finn Dayton wrote:
> mlx5e_xdp_xmit() selects an XDP SQ (Send Queue) using smp_processor_id()
> (CPU ID). When doing XDP_REDIRECT from a CPU whose ID is
>> = priv->channels.num, mlx5e_xdp_xmit() returns -ENXIO and the
> redirect fails.
>
> Previous discussion proposed using modulo in mlx5e_xdp_xmit() to map
> CPU IDs into the channel range, but modulo/division is too costly in
> the hot path.
>
That discussion reached to an agreement of using a while loop with
subtraction. It's expected to be fast, and optimizes the common case.
> Instead, this solution precomputes per-cpu priv->xdpsq assignments when
> channels are (re)configured and does a single lookup in mlx5e_xdp_xmit().
>
> Because multiple CPUs map to the same xdpsq when CPU count exceeds
> channel count, serialize xdp_xmit on the ring with xdp_tx_lock.
>
What's the advantage of this solution over the one we already agreed to?
> Fixes: 58b99ee3e3eb ("net/mlx5e: Add support for XDP_REDIRECT in device-out side")
> Link: https://lore.kernel.org/netdev/20251031231038.1092673-1-zijianzhang@bytedance.com/
> Link: https://lore.kernel.org/netdev/44f69955-b566-4fb1-904d-f551046ff2d4@gmail.com
> Cc: stable@vger.kernel.org # 6.12+
> Signed-off-by: Finn Dayton <finnius.dayton@spacex.com>
> ---
> Testing:
> - XDP forwarding / XDP_REDIRECT verified with both low CPU ids and
> CPU ids > than number of send queues.
> - No -ENXIO observed, successful forwarding.
>
> drivers/net/ethernet/mellanox/mlx5/core/en.h | 4 +++
> .../net/ethernet/mellanox/mlx5/core/en/xdp.c | 16 +++++++----
> .../net/ethernet/mellanox/mlx5/core/en_main.c | 28 +++++++++++++++++++
> 3 files changed, 43 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
> index ea2cd1f5d1d0..387954201640 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
> @@ -519,6 +519,8 @@ struct mlx5e_xdpsq {
> /* control path */
> struct mlx5_wq_ctrl wq_ctrl;
> struct mlx5e_channel *channel;
> + /* serialize writes by multiple CPUs to this send queue */
> + spinlock_t xdp_tx_lock;
> } ____cacheline_aligned_in_smp;
>
> struct mlx5e_xdp_buff {
> @@ -909,6 +911,8 @@ struct mlx5e_priv {
> struct mlx5e_rq drop_rq;
>
> struct mlx5e_channels channels;
> + /* selects the xdpsq during mlx5e_xdp_xmit() */
> + int __percpu *send_queue_idx_ptr;
> struct mlx5e_rx_res *rx_res;
> u32 *tx_rates;
>
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
> index 80f9fc10877a..2dd44ad873a1 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
> @@ -845,7 +845,7 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
> struct mlx5e_priv *priv = netdev_priv(dev);
> struct mlx5e_xdpsq *sq;
> int nxmit = 0;
> - int sq_num;
> + int send_queue_idx = 0;
> int i;
>
> /* this flag is sufficient, no need to test internal sq state */
> @@ -855,13 +855,19 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
> if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
> return -EINVAL;
>
> - sq_num = smp_processor_id();
>
> - if (unlikely(sq_num >= priv->channels.num))
> + if (unlikely(!priv->send_queue_idx_ptr))
> return -ENXIO;
>
> - sq = priv->channels.c[sq_num]->xdpsq;
> + send_queue_idx = *this_cpu_ptr(priv->send_queue_idx_ptr);
> + if (unlikely(send_queue_idx >= priv->channels.num || send_queue_idx < 0))
> + return -ENXIO;
>
> + sq = priv->channels.c[send_queue_idx]->xdpsq;
> + /* The number of queues configured on a netdev may be smaller than the
> + * CPU pool, so two CPUs might map to this queue. We must serialize writes.
> + */
> + spin_lock(&sq->xdp_tx_lock);
> for (i = 0; i < n; i++) {
> struct mlx5e_xmit_data_frags xdptxdf = {};
> struct xdp_frame *xdpf = frames[i];
> @@ -941,7 +947,7 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
>
> if (flags & XDP_XMIT_FLUSH)
> mlx5e_xmit_xdp_doorbell(sq);
> -
> + spin_unlock(&sq->xdp_tx_lock);
> return nxmit;
> }
>
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> index 7eb691c2a1bd..adef35d06b89 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> @@ -1492,6 +1492,7 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
> sq->pdev = c->pdev;
> sq->mkey_be = c->mkey_be;
> sq->channel = c;
> + spin_lock_init(&sq->xdp_tx_lock);
> sq->uar_map = c->bfreg->map;
> sq->min_inline_mode = params->tx_min_inline_mode;
> sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu) - ETH_FCS_LEN;
> @@ -3283,10 +3284,30 @@ static void mlx5e_build_txq_maps(struct mlx5e_priv *priv)
> smp_wmb();
> }
>
> +static void build_priv_to_xdpsq_associations(struct mlx5e_priv *priv)
> +{
> + /*
> + * Build the mapping from CPU to XDP send queue index for priv.
> + * This is used by mlx5e_xdp_xmit() to determine which xdpsq (send queue)
> + * should handle the xdptx data, based on the CPU running mlx5e_xdp_xmit()
> + * and the target priv (netdev).
> + */
> + int send_queue_idx, cpu;
> +
> + if (unlikely(priv->channels.num == 0))
> + return;
> +
> + for_each_possible_cpu(cpu) {
> + send_queue_idx = cpu % priv->channels.num;
> + *per_cpu_ptr(priv->send_queue_idx_ptr, cpu) = send_queue_idx;
> + }
> +}
> +
> void mlx5e_activate_priv_channels(struct mlx5e_priv *priv)
> {
> mlx5e_build_txq_maps(priv);
> mlx5e_activate_channels(priv, &priv->channels);
> + build_priv_to_xdpsq_associations(priv);
> mlx5e_xdp_tx_enable(priv);
>
> /* dev_watchdog() wants all TX queues to be started when the carrier is
> @@ -6263,8 +6284,14 @@ int mlx5e_priv_init(struct mlx5e_priv *priv,
> if (!priv->fec_ranges)
> goto err_free_channel_stats;
>
> + priv->send_queue_idx_ptr = alloc_percpu(int);
> + if (!priv->send_queue_idx_ptr)
> + goto err_free_fec_ranges;
> +
> return 0;
>
> +err_free_fec_ranges:
> + kfree(priv->fec_ranges);
> err_free_channel_stats:
> kfree(priv->channel_stats);
> err_free_tx_rates:
> @@ -6295,6 +6322,7 @@ void mlx5e_priv_cleanup(struct mlx5e_priv *priv)
> for (i = 0; i < priv->stats_nch; i++)
> kvfree(priv->channel_stats[i]);
> kfree(priv->channel_stats);
> + free_percpu(priv->send_queue_idx_ptr);
> kfree(priv->tx_rates);
> kfree(priv->txq2sq_stats);
> kfree(priv->txq2sq);
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH net] net/mlx5e: Precompute xdpsq assignments for mlx5e_xdp_xmit()
2026-02-23 13:46 ` Tariq Toukan
@ 2026-02-24 4:32 ` Finn Dayton
2026-02-24 7:33 ` Tariq Toukan
0 siblings, 1 reply; 5+ messages in thread
From: Finn Dayton @ 2026-02-24 4:32 UTC (permalink / raw)
To: Tariq Toukan, netdev@vger.kernel.org
Cc: Alexei Starovoitov ,, Daniel Borkmann ,, David S. Miller ,,
Jakub Kicinski ,, Jesper Dangaard Brouer ,, John Fastabend ,,
Stanislav Fomichev ,, Saeed Mahameed ,, Leon Romanovsky ,,
Tariq Toukan ,, Mark Bloch ,, Andrew Lunn ,, Eric Dumazet ,,
Paolo Abeni ,, stable@vger.kernel.org, bpf@vger.kernel.org,
linux-rdma@vger.kernel.org, linux-kernel@vger.kernel.org
On 2/23/26, 5:46 AM, "Tariq Toukan" <ttoukan.linux@gmail.com <mailto:ttoukan.linux@gmail.com>> wrote:
On 23/02/2026 2:05, Finn Dayton wrote:
> mlx5e_xdp_xmit() selects an XDP SQ (Send Queue) using smp_processor_id()
> (CPU ID). When doing XDP_REDIRECT from a CPU whose ID is
>> = priv->channels.num, mlx5e_xdp_xmit() returns -ENXIO and the
> redirect fails.
>
> Previous discussion proposed using modulo in mlx5e_xdp_xmit() to map
> CPU IDs into the channel range, but modulo/division is too costly in
> the hot path.
>
That discussion reached to an agreement of using a while loop with
subtraction. It's expected to be fast, and optimizes the common case.
> Instead, this solution precomputes per-cpu priv->xdpsq assignments when
> channels are (re)configured and does a single lookup in mlx5e_xdp_xmit().
>
> Because multiple CPUs map to the same xdpsq when CPU count exceeds
> channel count, serialize xdp_xmit on the ring with xdp_tx_lock.
>
What's the advantage of this solution over the one we already agreed to?
Subtraction in a while loop is great for the common case (cpu_id < channels.num).
But in worst case, i.e. CPUs >> XDP SQs, the while-subtraction approach
Is O(cpu_id / channels.num) iterations. With 256 CPUs and 8 channels, cpu_id=255 does
31 subtractions; with 2 channels it can be 127 iterations in the mlx5e_xdp_xmit() hot path.
Precomputing moves the expensive operation out of the hot path into (re)configuration
time and makes runtime lookups constant. The tradeoff is added per-cpu state that needs
to be rebuilt whenever channels are reconfigured.
Given modern systems where CPU count can greatly exceed the number of XDP SQs, constant-
time lookups avoid a large/variable loop in the hot path.
Thank you,
Finn
> Fixes: 58b99ee3e3eb ("net/mlx5e: Add support for XDP_REDIRECT in device-out side")
> Link: https://urldefense.us/v3/__https://lore.kernel.org/netdev/20251031231038.1092673-1-zijianzhang@bytedance.com <mailto:20251031231038.1092673-1-zijianzhang@bytedance.com>/__;!!Fqb0NABsjhF0Kh8I!Y83cZX7QBDtN-kWQrfMsOLg2GhV5biqKsT4Dkj0Zk677rm_78pu6_4ij7TtkVuh859B_YQf8n0yDiBMAtI0Caie0lpk$
> Link: https://urldefense.us/v3/__https://lore.kernel.org/netdev/44f69955-b566-4fb1-904d-f551046ff2d4@gmail.com <mailto:44f69955-b566-4fb1-904d-f551046ff2d4@gmail.com>__;!!Fqb0NABsjhF0Kh8I!Y83cZX7QBDtN-kWQrfMsOLg2GhV5biqKsT4Dkj0Zk677rm_78pu6_4ij7TtkVuh859B_YQf8n0yDiBMAtI0CNvBcoVA$
> Cc: stable@vger.kernel.org <mailto:stable@vger.kernel.org> # 6.12+
> Signed-off-by: Finn Dayton <finnius.dayton@spacex.com <mailto:finnius.dayton@spacex.com>>
> ---
> Testing:
> - XDP forwarding / XDP_REDIRECT verified with both low CPU ids and
> CPU ids > than number of send queues.
> - No -ENXIO observed, successful forwarding.
>
> drivers/net/ethernet/mellanox/mlx5/core/en.h | 4 +++
> .../net/ethernet/mellanox/mlx5/core/en/xdp.c | 16 +++++++----
> .../net/ethernet/mellanox/mlx5/core/en_main.c | 28 +++++++++++++++++++
> 3 files changed, 43 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
> index ea2cd1f5d1d0..387954201640 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
> @@ -519,6 +519,8 @@ struct mlx5e_xdpsq {
> /* control path */
> struct mlx5_wq_ctrl wq_ctrl;
> struct mlx5e_channel *channel;
> + /* serialize writes by multiple CPUs to this send queue */
> + spinlock_t xdp_tx_lock;
> } ____cacheline_aligned_in_smp;
>
> struct mlx5e_xdp_buff {
> @@ -909,6 +911,8 @@ struct mlx5e_priv {
> struct mlx5e_rq drop_rq;
>
> struct mlx5e_channels channels;
> + /* selects the xdpsq during mlx5e_xdp_xmit() */
> + int __percpu *send_queue_idx_ptr;
> struct mlx5e_rx_res *rx_res;
> u32 *tx_rates;
>
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
> index 80f9fc10877a..2dd44ad873a1 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
> @@ -845,7 +845,7 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
> struct mlx5e_priv *priv = netdev_priv(dev);
> struct mlx5e_xdpsq *sq;
> int nxmit = 0;
> - int sq_num;
> + int send_queue_idx = 0;
> int i;
>
> /* this flag is sufficient, no need to test internal sq state */
> @@ -855,13 +855,19 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
> if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
> return -EINVAL;
>
> - sq_num = smp_processor_id();
>
> - if (unlikely(sq_num >= priv->channels.num))
> + if (unlikely(!priv->send_queue_idx_ptr))
> return -ENXIO;
>
> - sq = priv->channels.c[sq_num]->xdpsq;
> + send_queue_idx = *this_cpu_ptr(priv->send_queue_idx_ptr);
> + if (unlikely(send_queue_idx >= priv->channels.num || send_queue_idx < 0))
> + return -ENXIO;
>
> + sq = priv->channels.c[send_queue_idx]->xdpsq;
> + /* The number of queues configured on a netdev may be smaller than the
> + * CPU pool, so two CPUs might map to this queue. We must serialize writes.
> + */
> + spin_lock(&sq->xdp_tx_lock);
> for (i = 0; i < n; i++) {
> struct mlx5e_xmit_data_frags xdptxdf = {};
> struct xdp_frame *xdpf = frames[i];
> @@ -941,7 +947,7 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
>
> if (flags & XDP_XMIT_FLUSH)
> mlx5e_xmit_xdp_doorbell(sq);
> -
> + spin_unlock(&sq->xdp_tx_lock);
> return nxmit;
> }
>
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> index 7eb691c2a1bd..adef35d06b89 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> @@ -1492,6 +1492,7 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
> sq->pdev = c->pdev;
> sq->mkey_be = c->mkey_be;
> sq->channel = c;
> + spin_lock_init(&sq->xdp_tx_lock);
> sq->uar_map = c->bfreg->map;
> sq->min_inline_mode = params->tx_min_inline_mode;
> sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu) - ETH_FCS_LEN;
> @@ -3283,10 +3284,30 @@ static void mlx5e_build_txq_maps(struct mlx5e_priv *priv)
> smp_wmb();
> }
>
> +static void build_priv_to_xdpsq_associations(struct mlx5e_priv *priv)
> +{
> + /*
> + * Build the mapping from CPU to XDP send queue index for priv.
> + * This is used by mlx5e_xdp_xmit() to determine which xdpsq (send queue)
> + * should handle the xdptx data, based on the CPU running mlx5e_xdp_xmit()
> + * and the target priv (netdev).
> + */
> + int send_queue_idx, cpu;
> +
> + if (unlikely(priv->channels.num == 0))
> + return;
> +
> + for_each_possible_cpu(cpu) {
> + send_queue_idx = cpu % priv->channels.num;
> + *per_cpu_ptr(priv->send_queue_idx_ptr, cpu) = send_queue_idx;
> + }
> +}
> +
> void mlx5e_activate_priv_channels(struct mlx5e_priv *priv)
> {
> mlx5e_build_txq_maps(priv);
> mlx5e_activate_channels(priv, &priv->channels);
> + build_priv_to_xdpsq_associations(priv);
> mlx5e_xdp_tx_enable(priv);
>
> /* dev_watchdog() wants all TX queues to be started when the carrier is
> @@ -6263,8 +6284,14 @@ int mlx5e_priv_init(struct mlx5e_priv *priv,
> if (!priv->fec_ranges)
> goto err_free_channel_stats;
>
> + priv->send_queue_idx_ptr = alloc_percpu(int);
> + if (!priv->send_queue_idx_ptr)
> + goto err_free_fec_ranges;
> +
> return 0;
>
> +err_free_fec_ranges:
> + kfree(priv->fec_ranges);
> err_free_channel_stats:
> kfree(priv->channel_stats);
> err_free_tx_rates:
> @@ -6295,6 +6322,7 @@ void mlx5e_priv_cleanup(struct mlx5e_priv *priv)
> for (i = 0; i < priv->stats_nch; i++)
> kvfree(priv->channel_stats[i]);
> kfree(priv->channel_stats);
> + free_percpu(priv->send_queue_idx_ptr);
> kfree(priv->tx_rates);
> kfree(priv->txq2sq_stats);
> kfree(priv->txq2sq);
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH net] net/mlx5e: Precompute xdpsq assignments for mlx5e_xdp_xmit()
2026-02-23 0:05 [PATCH net] net/mlx5e: Precompute xdpsq assignments for mlx5e_xdp_xmit() Finn Dayton
2026-02-23 13:46 ` Tariq Toukan
@ 2026-02-24 7:32 ` Tariq Toukan
1 sibling, 0 replies; 5+ messages in thread
From: Tariq Toukan @ 2026-02-24 7:32 UTC (permalink / raw)
To: Finn Dayton, netdev@vger.kernel.org
Cc: Alexei Starovoitov ,, Daniel Borkmann ,, David S. Miller ,,
Jakub Kicinski ,, Jesper Dangaard Brouer ,, John Fastabend ,,
Stanislav Fomichev ,, Saeed Mahameed ,, Leon Romanovsky ,,
Tariq Toukan ,, Mark Bloch ,, Andrew Lunn ,, Eric Dumazet ,,
Paolo Abeni ,, stable@vger.kernel.org, bpf@vger.kernel.org,
linux-rdma@vger.kernel.org, linux-kernel@vger.kernel.org
On 23/02/2026 2:05, Finn Dayton wrote:
> mlx5e_xdp_xmit() selects an XDP SQ (Send Queue) using smp_processor_id()
> (CPU ID). When doing XDP_REDIRECT from a CPU whose ID is
>> = priv->channels.num, mlx5e_xdp_xmit() returns -ENXIO and the
> redirect fails.
>
> Previous discussion proposed using modulo in mlx5e_xdp_xmit() to map
> CPU IDs into the channel range, but modulo/division is too costly in
> the hot path.
>
> Instead, this solution precomputes per-cpu priv->xdpsq assignments when
> channels are (re)configured and does a single lookup in mlx5e_xdp_xmit().
>
> Because multiple CPUs map to the same xdpsq when CPU count exceeds
> channel count, serialize xdp_xmit on the ring with xdp_tx_lock.
>
> Fixes: 58b99ee3e3eb ("net/mlx5e: Add support for XDP_REDIRECT in device-out side")
> Link: https://lore.kernel.org/netdev/20251031231038.1092673-1-zijianzhang@bytedance.com/
> Link: https://lore.kernel.org/netdev/44f69955-b566-4fb1-904d-f551046ff2d4@gmail.com
> Cc: stable@vger.kernel.org # 6.12+
Do not introduce it as a fix. No bug here.
> Signed-off-by: Finn Dayton <finnius.dayton@spacex.com>
> ---
> Testing:
> - XDP forwarding / XDP_REDIRECT verified with both low CPU ids and
> CPU ids > than number of send queues.
> - No -ENXIO observed, successful forwarding.
>
> drivers/net/ethernet/mellanox/mlx5/core/en.h | 4 +++
> .../net/ethernet/mellanox/mlx5/core/en/xdp.c | 16 +++++++----
> .../net/ethernet/mellanox/mlx5/core/en_main.c | 28 +++++++++++++++++++
> 3 files changed, 43 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
> index ea2cd1f5d1d0..387954201640 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
> @@ -519,6 +519,8 @@ struct mlx5e_xdpsq {
> /* control path */
> struct mlx5_wq_ctrl wq_ctrl;
> struct mlx5e_channel *channel;
> + /* serialize writes by multiple CPUs to this send queue */
> + spinlock_t xdp_tx_lock;
You're already inside xdpsq, no need to repeat xdp_tx in field name.
Move field to the section with /* dirtied @xmit */.
Try to find a proper placement with minimal cacheline impact.
> } ____cacheline_aligned_in_smp;
>
> struct mlx5e_xdp_buff {
> @@ -909,6 +911,8 @@ struct mlx5e_priv {
> struct mlx5e_rq drop_rq;
>
> struct mlx5e_channels channels;
> + /* selects the xdpsq during mlx5e_xdp_xmit() */
> + int __percpu *send_queue_idx_ptr;
Move to section with /* priv data path fields - start */.
Similarly to txq2sq, I'd switch the mapping to give xdpsq pointer,
rather than index.
> struct mlx5e_rx_res *rx_res;
> u32 *tx_rates;
>
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
> index 80f9fc10877a..2dd44ad873a1 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
> @@ -845,7 +845,7 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
> struct mlx5e_priv *priv = netdev_priv(dev);
> struct mlx5e_xdpsq *sq;
> int nxmit = 0;
> - int sq_num;
> + int send_queue_idx = 0;
Do not break RCT.
> int i;
>
> /* this flag is sufficient, no need to test internal sq state */
> @@ -855,13 +855,19 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
> if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
> return -EINVAL;
>
> - sq_num = smp_processor_id();
>
> - if (unlikely(sq_num >= priv->channels.num))
> + if (unlikely(!priv->send_queue_idx_ptr))
Should be guaranteed by the safety mechanisms.
Not needed.
> return -ENXIO;
>
> - sq = priv->channels.c[sq_num]->xdpsq;
> + send_queue_idx = *this_cpu_ptr(priv->send_queue_idx_ptr);
> + if (unlikely(send_queue_idx >= priv->channels.num || send_queue_idx < 0))
> + return -ENXIO;
Should be guaranteed by the safety mechanisms.
Not needed.
>
> + sq = priv->channels.c[send_queue_idx]->xdpsq;
> + /* The number of queues configured on a netdev may be smaller than the
> + * CPU pool, so two CPUs might map to this queue. We must serialize writes.
> + */
> + spin_lock(&sq->xdp_tx_lock);
> for (i = 0; i < n; i++) {
> struct mlx5e_xmit_data_frags xdptxdf = {};
> struct xdp_frame *xdpf = frames[i];
> @@ -941,7 +947,7 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
>
> if (flags & XDP_XMIT_FLUSH)
> mlx5e_xmit_xdp_doorbell(sq);
> -
> + spin_unlock(&sq->xdp_tx_lock);
> return nxmit;
> }
>
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> index 7eb691c2a1bd..adef35d06b89 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> @@ -1492,6 +1492,7 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
> sq->pdev = c->pdev;
> sq->mkey_be = c->mkey_be;
> sq->channel = c;
> + spin_lock_init(&sq->xdp_tx_lock);
placement looks arbitrary.
move it so it doesn't interfere the fields assignments/inits.
> sq->uar_map = c->bfreg->map;
> sq->min_inline_mode = params->tx_min_inline_mode;
> sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu) - ETH_FCS_LEN;
> @@ -3283,10 +3284,30 @@ static void mlx5e_build_txq_maps(struct mlx5e_priv *priv)
> smp_wmb();
> }
>
> +static void build_priv_to_xdpsq_associations(struct mlx5e_priv *priv)
Rename. How about mlx5e_build_xdpsq_maps? Similar to mlx5e_build_txq_maps.
> +{
> + /*
Start comment here, don't keep an empty comment line.
> + * Build the mapping from CPU to XDP send queue index for priv.
> + * This is used by mlx5e_xdp_xmit() to determine which xdpsq (send queue)
> + * should handle the xdptx data, based on the CPU running mlx5e_xdp_xmit()
> + * and the target priv (netdev).
> + */
Comment is too long. Can be short and to the point.
> + int send_queue_idx, cpu;
> +
> + if (unlikely(priv->channels.num == 0))
> + return;
> +
Shouldn't be possible.
This will just hide bugs. Remove it.
> + for_each_possible_cpu(cpu) {
> + send_queue_idx = cpu % priv->channels.num;
Not sure this deserves a local var..
> + *per_cpu_ptr(priv->send_queue_idx_ptr, cpu) = send_queue_idx;
> + }
We probably need smp_wmb(); here.
Need to think about it...
> +}
> +
> void mlx5e_activate_priv_channels(struct mlx5e_priv *priv)
> {
> mlx5e_build_txq_maps(priv);
> mlx5e_activate_channels(priv, &priv->channels);
> + build_priv_to_xdpsq_associations(priv);
> mlx5e_xdp_tx_enable(priv);
>
> /* dev_watchdog() wants all TX queues to be started when the carrier is
> @@ -6263,8 +6284,14 @@ int mlx5e_priv_init(struct mlx5e_priv *priv,
> if (!priv->fec_ranges)
> goto err_free_channel_stats;
>
> + priv->send_queue_idx_ptr = alloc_percpu(int);
> + if (!priv->send_queue_idx_ptr)
> + goto err_free_fec_ranges;
> +
> return 0;
>
> +err_free_fec_ranges:
> + kfree(priv->fec_ranges);
> err_free_channel_stats:
> kfree(priv->channel_stats);
> err_free_tx_rates:
> @@ -6295,6 +6322,7 @@ void mlx5e_priv_cleanup(struct mlx5e_priv *priv)
> for (i = 0; i < priv->stats_nch; i++)
> kvfree(priv->channel_stats[i]);
> kfree(priv->channel_stats);
> + free_percpu(priv->send_queue_idx_ptr);
Keep order symmetric to the priv_init calls.
> kfree(priv->tx_rates);
> kfree(priv->txq2sq_stats);
> kfree(priv->txq2sq);
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH net] net/mlx5e: Precompute xdpsq assignments for mlx5e_xdp_xmit()
2026-02-24 4:32 ` Finn Dayton
@ 2026-02-24 7:33 ` Tariq Toukan
0 siblings, 0 replies; 5+ messages in thread
From: Tariq Toukan @ 2026-02-24 7:33 UTC (permalink / raw)
To: Finn Dayton, netdev@vger.kernel.org
Cc: Alexei Starovoitov ,, Daniel Borkmann ,, David S. Miller ,,
Jakub Kicinski ,, Jesper Dangaard Brouer ,, John Fastabend ,,
Stanislav Fomichev ,, Saeed Mahameed ,, Leon Romanovsky ,,
Tariq Toukan ,, Mark Bloch ,, Andrew Lunn ,, Eric Dumazet ,,
Paolo Abeni ,, stable@vger.kernel.org, bpf@vger.kernel.org,
linux-rdma@vger.kernel.org, linux-kernel@vger.kernel.org
On 24/02/2026 6:32, Finn Dayton wrote:
>
>
> On 2/23/26, 5:46 AM, "Tariq Toukan" <ttoukan.linux@gmail.com <mailto:ttoukan.linux@gmail.com>> wrote:
>
>
>
>
>
>
> On 23/02/2026 2:05, Finn Dayton wrote:
>> mlx5e_xdp_xmit() selects an XDP SQ (Send Queue) using smp_processor_id()
>> (CPU ID). When doing XDP_REDIRECT from a CPU whose ID is
>>> = priv->channels.num, mlx5e_xdp_xmit() returns -ENXIO and the
>> redirect fails.
>>
>> Previous discussion proposed using modulo in mlx5e_xdp_xmit() to map
>> CPU IDs into the channel range, but modulo/division is too costly in
>> the hot path.
>>
>
>
> That discussion reached to an agreement of using a while loop with
> subtraction. It's expected to be fast, and optimizes the common case.
>
>
>> Instead, this solution precomputes per-cpu priv->xdpsq assignments when
>> channels are (re)configured and does a single lookup in mlx5e_xdp_xmit().
>>
>> Because multiple CPUs map to the same xdpsq when CPU count exceeds
>> channel count, serialize xdp_xmit on the ring with xdp_tx_lock.
>>
>
>
> What's the advantage of this solution over the one we already agreed to?
>
> Subtraction in a while loop is great for the common case (cpu_id < channels.num).
> But in worst case, i.e. CPUs >> XDP SQs, the while-subtraction approach
> Is O(cpu_id / channels.num) iterations. With 256 CPUs and 8 channels, cpu_id=255 does
> 31 subtractions; with 2 channels it can be 127 iterations in the mlx5e_xdp_xmit() hot path.
> Precomputing moves the expensive operation out of the hot path into (re)configuration
> time and makes runtime lookups constant. The tradeoff is added per-cpu state that needs
> to be rebuilt whenever channels are reconfigured.
>
> Given modern systems where CPU count can greatly exceed the number of XDP SQs, constant-
> time lookups avoid a large/variable loop in the hot path.
>
Valid point.
The while loop solution is significantly simpler.
We can go with this one, but must make sure we take care of all needed
syncs, for safety.
See comments on original mail.
Also, is the performance impact measurable?
Do you see the regression in your tests?
> Thank you,
> Finn
>
>> Fixes: 58b99ee3e3eb ("net/mlx5e: Add support for XDP_REDIRECT in device-out side")
>> Link: https://urldefense.us/v3/__https://lore.kernel.org/netdev/20251031231038.1092673-1-zijianzhang@bytedance.com <mailto:20251031231038.1092673-1-zijianzhang@bytedance.com>/__;!!Fqb0NABsjhF0Kh8I!Y83cZX7QBDtN-kWQrfMsOLg2GhV5biqKsT4Dkj0Zk677rm_78pu6_4ij7TtkVuh859B_YQf8n0yDiBMAtI0Caie0lpk$
>> Link: https://urldefense.us/v3/__https://lore.kernel.org/netdev/44f69955-b566-4fb1-904d-f551046ff2d4@gmail.com <mailto:44f69955-b566-4fb1-904d-f551046ff2d4@gmail.com>__;!!Fqb0NABsjhF0Kh8I!Y83cZX7QBDtN-kWQrfMsOLg2GhV5biqKsT4Dkj0Zk677rm_78pu6_4ij7TtkVuh859B_YQf8n0yDiBMAtI0CNvBcoVA$
>> Cc: stable@vger.kernel.org <mailto:stable@vger.kernel.org> # 6.12+
>> Signed-off-by: Finn Dayton <finnius.dayton@spacex.com <mailto:finnius.dayton@spacex.com>>
>> ---
>> Testing:
>> - XDP forwarding / XDP_REDIRECT verified with both low CPU ids and
>> CPU ids > than number of send queues.
>> - No -ENXIO observed, successful forwarding.
>>
>> drivers/net/ethernet/mellanox/mlx5/core/en.h | 4 +++
>> .../net/ethernet/mellanox/mlx5/core/en/xdp.c | 16 +++++++----
>> .../net/ethernet/mellanox/mlx5/core/en_main.c | 28 +++++++++++++++++++
>> 3 files changed, 43 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
>> index ea2cd1f5d1d0..387954201640 100644
>> --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
>> @@ -519,6 +519,8 @@ struct mlx5e_xdpsq {
>> /* control path */
>> struct mlx5_wq_ctrl wq_ctrl;
>> struct mlx5e_channel *channel;
>> + /* serialize writes by multiple CPUs to this send queue */
>> + spinlock_t xdp_tx_lock;
>> } ____cacheline_aligned_in_smp;
>>
>> struct mlx5e_xdp_buff {
>> @@ -909,6 +911,8 @@ struct mlx5e_priv {
>> struct mlx5e_rq drop_rq;
>>
>> struct mlx5e_channels channels;
>> + /* selects the xdpsq during mlx5e_xdp_xmit() */
>> + int __percpu *send_queue_idx_ptr;
>> struct mlx5e_rx_res *rx_res;
>> u32 *tx_rates;
>>
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
>> index 80f9fc10877a..2dd44ad873a1 100644
>> --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
>> @@ -845,7 +845,7 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
>> struct mlx5e_priv *priv = netdev_priv(dev);
>> struct mlx5e_xdpsq *sq;
>> int nxmit = 0;
>> - int sq_num;
>> + int send_queue_idx = 0;
>> int i;
>>
>> /* this flag is sufficient, no need to test internal sq state */
>> @@ -855,13 +855,19 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
>> if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
>> return -EINVAL;
>>
>> - sq_num = smp_processor_id();
>>
>> - if (unlikely(sq_num >= priv->channels.num))
>> + if (unlikely(!priv->send_queue_idx_ptr))
>> return -ENXIO;
>>
>> - sq = priv->channels.c[sq_num]->xdpsq;
>> + send_queue_idx = *this_cpu_ptr(priv->send_queue_idx_ptr);
>> + if (unlikely(send_queue_idx >= priv->channels.num || send_queue_idx < 0))
>> + return -ENXIO;
>>
>> + sq = priv->channels.c[send_queue_idx]->xdpsq;
>> + /* The number of queues configured on a netdev may be smaller than the
>> + * CPU pool, so two CPUs might map to this queue. We must serialize writes.
>> + */
>> + spin_lock(&sq->xdp_tx_lock);
>> for (i = 0; i < n; i++) {
>> struct mlx5e_xmit_data_frags xdptxdf = {};
>> struct xdp_frame *xdpf = frames[i];
>> @@ -941,7 +947,7 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
>>
>> if (flags & XDP_XMIT_FLUSH)
>> mlx5e_xmit_xdp_doorbell(sq);
>> -
>> + spin_unlock(&sq->xdp_tx_lock);
>> return nxmit;
>> }
>>
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
>> index 7eb691c2a1bd..adef35d06b89 100644
>> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
>> @@ -1492,6 +1492,7 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
>> sq->pdev = c->pdev;
>> sq->mkey_be = c->mkey_be;
>> sq->channel = c;
>> + spin_lock_init(&sq->xdp_tx_lock);
>> sq->uar_map = c->bfreg->map;
>> sq->min_inline_mode = params->tx_min_inline_mode;
>> sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu) - ETH_FCS_LEN;
>> @@ -3283,10 +3284,30 @@ static void mlx5e_build_txq_maps(struct mlx5e_priv *priv)
>> smp_wmb();
>> }
>>
>> +static void build_priv_to_xdpsq_associations(struct mlx5e_priv *priv)
>> +{
>> + /*
>> + * Build the mapping from CPU to XDP send queue index for priv.
>> + * This is used by mlx5e_xdp_xmit() to determine which xdpsq (send queue)
>> + * should handle the xdptx data, based on the CPU running mlx5e_xdp_xmit()
>> + * and the target priv (netdev).
>> + */
>> + int send_queue_idx, cpu;
>> +
>> + if (unlikely(priv->channels.num == 0))
>> + return;
>> +
>> + for_each_possible_cpu(cpu) {
>> + send_queue_idx = cpu % priv->channels.num;
>> + *per_cpu_ptr(priv->send_queue_idx_ptr, cpu) = send_queue_idx;
>> + }
>> +}
>> +
>> void mlx5e_activate_priv_channels(struct mlx5e_priv *priv)
>> {
>> mlx5e_build_txq_maps(priv);
>> mlx5e_activate_channels(priv, &priv->channels);
>> + build_priv_to_xdpsq_associations(priv);
>> mlx5e_xdp_tx_enable(priv);
>>
>> /* dev_watchdog() wants all TX queues to be started when the carrier is
>> @@ -6263,8 +6284,14 @@ int mlx5e_priv_init(struct mlx5e_priv *priv,
>> if (!priv->fec_ranges)
>> goto err_free_channel_stats;
>>
>> + priv->send_queue_idx_ptr = alloc_percpu(int);
>> + if (!priv->send_queue_idx_ptr)
>> + goto err_free_fec_ranges;
>> +
>> return 0;
>>
>> +err_free_fec_ranges:
>> + kfree(priv->fec_ranges);
>> err_free_channel_stats:
>> kfree(priv->channel_stats);
>> err_free_tx_rates:
>> @@ -6295,6 +6322,7 @@ void mlx5e_priv_cleanup(struct mlx5e_priv *priv)
>> for (i = 0; i < priv->stats_nch; i++)
>> kvfree(priv->channel_stats[i]);
>> kfree(priv->channel_stats);
>> + free_percpu(priv->send_queue_idx_ptr);
>> kfree(priv->tx_rates);
>> kfree(priv->txq2sq_stats);
>> kfree(priv->txq2sq);
>
>
>
>
>
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2026-02-24 7:33 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-02-23 0:05 [PATCH net] net/mlx5e: Precompute xdpsq assignments for mlx5e_xdp_xmit() Finn Dayton
2026-02-23 13:46 ` Tariq Toukan
2026-02-24 4:32 ` Finn Dayton
2026-02-24 7:33 ` Tariq Toukan
2026-02-24 7:32 ` Tariq Toukan
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox