public inbox for netdev@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH net] net/mlx5e: Precompute xdpsq assignments for mlx5e_xdp_xmit()
@ 2026-02-23  0:05 Finn Dayton
  2026-02-23 13:46 ` Tariq Toukan
  2026-02-24  7:32 ` Tariq Toukan
  0 siblings, 2 replies; 5+ messages in thread
From: Finn Dayton @ 2026-02-23  0:05 UTC (permalink / raw)
  To: netdev@vger.kernel.org
  Cc: Alexei Starovoitov ,, Daniel Borkmann ,, David S. Miller  ,,
	Jakub Kicinski ,, Jesper Dangaard Brouer ,, John Fastabend ,,
	Stanislav Fomichev ,, Saeed Mahameed ,, Leon Romanovsky ,,
	Tariq Toukan ,, Mark Bloch ,, Andrew Lunn ,, Eric Dumazet ,,
	Paolo Abeni ,, stable@vger.kernel.org, bpf@vger.kernel.org,
	linux-rdma@vger.kernel.org, linux-kernel@vger.kernel.org

mlx5e_xdp_xmit() selects an XDP SQ (Send Queue) using smp_processor_id()
(CPU ID). When doing XDP_REDIRECT from a CPU whose ID is
>= priv->channels.num, mlx5e_xdp_xmit() returns -ENXIO and the
redirect fails.

Previous discussion proposed using modulo in mlx5e_xdp_xmit() to map
CPU IDs into the channel range, but modulo/division is too costly in
the hot path.

Instead, this solution precomputes per-cpu priv->xdpsq assignments when
channels are (re)configured and does a single lookup in  mlx5e_xdp_xmit().

Because multiple CPUs map to the same xdpsq when CPU count exceeds
channel count, serialize xdp_xmit on the ring with xdp_tx_lock.

Fixes: 58b99ee3e3eb ("net/mlx5e: Add support for XDP_REDIRECT in device-out side")
Link: https://lore.kernel.org/netdev/20251031231038.1092673-1-zijianzhang@bytedance.com/
Link: https://lore.kernel.org/netdev/44f69955-b566-4fb1-904d-f551046ff2d4@gmail.com
Cc: stable@vger.kernel.org # 6.12+
Signed-off-by: Finn Dayton <finnius.dayton@spacex.com>
---
Testing:
 - XDP forwarding / XDP_REDIRECT verified with both low CPU ids and
   CPU ids > than number of send queues.
 - No -ENXIO observed, successful forwarding.

 drivers/net/ethernet/mellanox/mlx5/core/en.h  |  4 +++
 .../net/ethernet/mellanox/mlx5/core/en/xdp.c  | 16 +++++++----
 .../net/ethernet/mellanox/mlx5/core/en_main.c | 28 +++++++++++++++++++
 3 files changed, 43 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index ea2cd1f5d1d0..387954201640 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -519,6 +519,8 @@ struct mlx5e_xdpsq {
 	/* control path */
 	struct mlx5_wq_ctrl        wq_ctrl;
 	struct mlx5e_channel      *channel;
+	/* serialize writes by multiple CPUs to this send queue */
+	spinlock_t xdp_tx_lock;
 } ____cacheline_aligned_in_smp;
 
 struct mlx5e_xdp_buff {
@@ -909,6 +911,8 @@ struct mlx5e_priv {
 	struct mlx5e_rq            drop_rq;
 
 	struct mlx5e_channels      channels;
+	/* selects the xdpsq during mlx5e_xdp_xmit() */
+	int __percpu              *send_queue_idx_ptr;
 	struct mlx5e_rx_res       *rx_res;
 	u32                       *tx_rates;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index 80f9fc10877a..2dd44ad873a1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -845,7 +845,7 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
 	struct mlx5e_priv *priv = netdev_priv(dev);
 	struct mlx5e_xdpsq *sq;
 	int nxmit = 0;
-	int sq_num;
+	int send_queue_idx = 0;
 	int i;
 
 	/* this flag is sufficient, no need to test internal sq state */
@@ -855,13 +855,19 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
 	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
 		return -EINVAL;
 
-	sq_num = smp_processor_id();
 
-	if (unlikely(sq_num >= priv->channels.num))
+	if (unlikely(!priv->send_queue_idx_ptr))
 		return -ENXIO;
 
-	sq = priv->channels.c[sq_num]->xdpsq;
+	send_queue_idx = *this_cpu_ptr(priv->send_queue_idx_ptr);
+	if (unlikely(send_queue_idx >= priv->channels.num || send_queue_idx < 0))
+		return -ENXIO;
 
+	sq = priv->channels.c[send_queue_idx]->xdpsq;
+	/* The number of queues configured on a netdev may be smaller than the
+	 * CPU pool, so two CPUs might map to this queue. We must serialize writes.
+	 */
+	spin_lock(&sq->xdp_tx_lock);
 	for (i = 0; i < n; i++) {
 		struct mlx5e_xmit_data_frags xdptxdf = {};
 		struct xdp_frame *xdpf = frames[i];
@@ -941,7 +947,7 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
 
 	if (flags & XDP_XMIT_FLUSH)
 		mlx5e_xmit_xdp_doorbell(sq);
-
+	spin_unlock(&sq->xdp_tx_lock);
 	return nxmit;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 7eb691c2a1bd..adef35d06b89 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1492,6 +1492,7 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
 	sq->pdev      = c->pdev;
 	sq->mkey_be   = c->mkey_be;
 	sq->channel   = c;
+	spin_lock_init(&sq->xdp_tx_lock);
 	sq->uar_map   = c->bfreg->map;
 	sq->min_inline_mode = params->tx_min_inline_mode;
 	sq->hw_mtu    = MLX5E_SW2HW_MTU(params, params->sw_mtu) - ETH_FCS_LEN;
@@ -3283,10 +3284,30 @@ static void mlx5e_build_txq_maps(struct mlx5e_priv *priv)
 	smp_wmb();
 }
 
+static void build_priv_to_xdpsq_associations(struct mlx5e_priv *priv)
+{
+	/*
+	 * Build the mapping from CPU to XDP send queue index for priv.
+	 * This is used by mlx5e_xdp_xmit() to determine which xdpsq (send queue)
+	 * should handle the xdptx data, based on the CPU running mlx5e_xdp_xmit()
+	 * and the target priv (netdev).
+	 */
+	int send_queue_idx, cpu;
+
+	if (unlikely(priv->channels.num == 0))
+		return;
+
+	for_each_possible_cpu(cpu) {
+		send_queue_idx = cpu % priv->channels.num;
+		*per_cpu_ptr(priv->send_queue_idx_ptr, cpu) = send_queue_idx;
+	}
+}
+
 void mlx5e_activate_priv_channels(struct mlx5e_priv *priv)
 {
 	mlx5e_build_txq_maps(priv);
 	mlx5e_activate_channels(priv, &priv->channels);
+	build_priv_to_xdpsq_associations(priv);
 	mlx5e_xdp_tx_enable(priv);
 
 	/* dev_watchdog() wants all TX queues to be started when the carrier is
@@ -6263,8 +6284,14 @@ int mlx5e_priv_init(struct mlx5e_priv *priv,
 	if (!priv->fec_ranges)
 		goto err_free_channel_stats;
 
+	priv->send_queue_idx_ptr = alloc_percpu(int);
+	if (!priv->send_queue_idx_ptr)
+		goto err_free_fec_ranges;
+
 	return 0;
 
+err_free_fec_ranges:
+	kfree(priv->fec_ranges);
 err_free_channel_stats:
 	kfree(priv->channel_stats);
 err_free_tx_rates:
@@ -6295,6 +6322,7 @@ void mlx5e_priv_cleanup(struct mlx5e_priv *priv)
 	for (i = 0; i < priv->stats_nch; i++)
 		kvfree(priv->channel_stats[i]);
 	kfree(priv->channel_stats);
+	free_percpu(priv->send_queue_idx_ptr);
 	kfree(priv->tx_rates);
 	kfree(priv->txq2sq_stats);
 	kfree(priv->txq2sq);
-- 
2.43.0




^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2026-02-24  7:33 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-02-23  0:05 [PATCH net] net/mlx5e: Precompute xdpsq assignments for mlx5e_xdp_xmit() Finn Dayton
2026-02-23 13:46 ` Tariq Toukan
2026-02-24  4:32   ` Finn Dayton
2026-02-24  7:33     ` Tariq Toukan
2026-02-24  7:32 ` Tariq Toukan

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox