public inbox for netdev@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] net/mlx5: poll mlx5 eq during irq migration
@ 2026-03-04 16:17 Praveen Kumar Kannoju
  2026-03-04 20:11 ` Jason Gunthorpe
                   ` (4 more replies)
  0 siblings, 5 replies; 13+ messages in thread
From: Praveen Kumar Kannoju @ 2026-03-04 16:17 UTC (permalink / raw)
  To: saeedm, leon, tariqt, mbloch, andrew+netdev, davem, edumazet,
	kuba, pabeni, netdev, linux-rdma, linux-kernel
  Cc: rama.nichanamatlu, manjunath.b.patil, anand.a.khoje,
	Praveen Kumar Kannoju

Interrupt lost scenario has been observed in multiple issues during IRQ
migration due to cpu scaling activity. This further led to the presence of
unhandled EQE's causing corresponding Mellanox transmission queues to
become full and get timedout. This patch overcomes this situation by
polling the EQ associated with the IRQ which undergoes migration, to
recover any unhandled EQE's and keep the transmission uninterrupted from
the corresponding queue.

Signed-off-by: Praveen Kumar Kannoju <praveen.kannoju@oracle.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eq.c  | 41 +++++++++++++++++++
 .../net/ethernet/mellanox/mlx5/core/lib/eq.h  |  1 +
 2 files changed, 42 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 25499da177bc..4f0653305f46 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -22,6 +22,10 @@
 #include "devlink.h"
 #include "en_accel/ipsec.h"
 
+unsigned int mlx5_reap_eq_irq_aff_change;
+module_param(mlx5_reap_eq_irq_aff_change, int, 0644);
+MODULE_PARM_DESC(mlx5_reap_eq_irq_aff_change, "mlx5_reap_eq_irq_aff_change: 0 = Disable MLX5 EQ Reap upon IRQ affinity change, \
+		 1 = Enable MLX5 EQ Reap upon IRQ affinity change. Default=0");
 enum {
 	MLX5_EQE_OWNER_INIT_VAL	= 0x1,
 };
@@ -951,10 +955,36 @@ static int alloc_rmap(struct mlx5_core_dev *mdev) { return 0; }
 static void free_rmap(struct mlx5_core_dev *mdev) {}
 #endif
 
+void mlx5_eq_reap_irq_notify(struct irq_affinity_notify *notify, const cpumask_t *mask)
+{
+	u32 eqe_count;
+	struct mlx5_eq_comp *eq = container_of(notify, struct mlx5_eq_comp, notify);
+
+	if (mlx5_reap_eq_irq_aff_change) {
+		mlx5_core_warn(eq->core.dev, "irqn = 0x%x migration notified, EQ 0x%x: Cons = 0x%x\n",
+			       eq->core.irqn, eq->core.eqn, eq->core.cons_index);
+
+		while (!rtnl_trylock())
+			msleep(20);
+
+		eqe_count = mlx5_eq_poll_irq_disabled(eq);
+		if (eqe_count)
+			mlx5_core_warn(eq->core.dev, "Recovered %d eqes on EQ 0x%x\n",
+				       eqe_count, eq->core.eqn);
+		rtnl_unlock();
+	}
+}
+
+void mlx5_eq_reap_irq_release(struct kref *ref) {}
+
 static void destroy_comp_eq(struct mlx5_core_dev *dev, struct mlx5_eq_comp *eq, u16 vecidx)
 {
 	struct mlx5_eq_table *table = dev->priv.eq_table;
 
+	if (irq_set_affinity_notifier(eq->core.irqn, NULL))
+		mlx5_core_warn(dev, "failed to unset EQ 0x%x to irq 0x%x affinty\n",
+			       eq->core.eqn, eq->core.irqn);
+
 	xa_erase(&table->comp_eqs, vecidx);
 	mlx5_eq_disable(dev, &eq->core, &eq->irq_nb);
 	if (destroy_unmap_eq(dev, &eq->core))
@@ -990,6 +1020,7 @@ static int create_comp_eq(struct mlx5_core_dev *dev, u16 vecidx)
 	struct mlx5_irq *irq;
 	int nent;
 	int err;
+	int ret;
 
 	lockdep_assert_held(&table->comp_lock);
 	if (table->curr_comp_eqs == table->max_comp_eqs) {
@@ -1036,6 +1067,16 @@ static int create_comp_eq(struct mlx5_core_dev *dev, u16 vecidx)
 	if (err)
 		goto disable_eq;
 
+	eq->notify.notify = mlx5_eq_reap_irq_notify;
+	eq->notify.release = mlx5_eq_reap_irq_release;
+	ret = irq_set_affinity_notifier(eq->core.irqn, &eq->notify);
+	if (ret) {
+		mlx5_core_warn(dev, "mlx5_eq_reap_irq_nofifier: EQ 0x%x irqn = 0x%x irq_set_affinity_notifier failed: %d\n",
+			       eq->core.eqn, eq->core.irqn, ret);
+	}
+	mlx5_core_dbg(dev, "mlx5_eq_reap_irq_nofifier: EQ 0x%x irqn = 0x%x irq_set_affinity_notifier set.\n",
+		      eq->core.eqn, eq->core.irqn);
+
 	table->curr_comp_eqs++;
 	return eq->core.eqn;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
index b1edc71ffc6d..669bacb9e390 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
@@ -46,6 +46,7 @@ struct mlx5_eq_comp {
 	struct notifier_block   irq_nb;
 	struct mlx5_eq_tasklet  tasklet_ctx;
 	struct list_head        list;
+	struct irq_affinity_notify notify;
 };
 
 static inline u32 eq_get_size(struct mlx5_eq *eq)
-- 
2.43.7


^ permalink raw reply related	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2026-03-20 16:32 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-03-04 16:17 [PATCH] net/mlx5: poll mlx5 eq during irq migration Praveen Kumar Kannoju
2026-03-04 20:11 ` Jason Gunthorpe
     [not found]   ` <CH3PR10MB7704DD1E6B9A671796FC6B528C7DA@CH3PR10MB7704.namprd10.prod.outlook.com>
2026-03-06  0:32     ` Jason Gunthorpe
2026-03-06 14:19       ` Praveen Kannoju
2026-03-06 23:10         ` Jason Gunthorpe
2026-03-07  5:43           ` Praveen Kannoju
2026-03-12  0:35             ` Jason Gunthorpe
2026-03-20 16:31               ` Praveen Kannoju
2026-03-05  4:17 ` kernel test robot
2026-03-05  8:45 ` kernel test robot
2026-03-05  9:29 ` kernel test robot
2026-03-05 11:16 ` kernel test robot
2026-03-05 13:15   ` Praveen Kannoju

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox