All of lore.kernel.org
 help / color / mirror / Atom feed
From: Tariq Toukan <tariqt@nvidia.com>
To: Leon Romanovsky <leon@kernel.org>, Jason Gunthorpe <jgg@ziepe.ca>,
	"Saeed Mahameed" <saeedm@nvidia.com>,
	Tariq Toukan <tariqt@nvidia.com>
Cc: Eric Dumazet <edumazet@google.com>,
	Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
	Andrew Lunn <andrew+netdev@lunn.ch>,
	"David S. Miller" <davem@davemloft.net>,
	Mark Bloch <mbloch@nvidia.com>, <linux-kernel@vger.kernel.org>,
	<linux-rdma@vger.kernel.org>, <netdev@vger.kernel.org>,
	Gal Pressman <gal@nvidia.com>,
	Dragos Tatulea <dtatulea@nvidia.com>,
	Moshe Shemesh <moshe@nvidia.com>, Shay Drory <shayd@nvidia.com>,
	Alexei Lazar <alazar@nvidia.com>
Subject: [PATCH mlx5-next 4/8] net/mlx5: LAG, use xa_alloc to manage LAG device indices
Date: Sun, 8 Mar 2026 08:55:55 +0200	[thread overview]
Message-ID: <20260308065559.1837449-5-tariqt@nvidia.com> (raw)
In-Reply-To: <20260308065559.1837449-1-tariqt@nvidia.com>

Replace the use of mlx5_get_dev_index() for xarray indexing with
xa_alloc() to dynamically allocate indices. This decouples the LAG
xarray index from the physical device index.

Update mlx5_ldev_add_netdev/remove_mdev to find entries by dev pointer
and replace mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1) calls with
mlx5_lag_get_master_idx() where appropriate.

No functional changes intended

Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/lag/lag.c | 242 ++++++++++++++----
 .../net/ethernet/mellanox/mlx5/core/lag/lag.h |  29 +++
 .../ethernet/mellanox/mlx5/core/lag/mpesw.c   |   3 +-
 .../mellanox/mlx5/core/lag/port_sel.c         |  12 +-
 4 files changed, 230 insertions(+), 56 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
index 81b1f84f902e..4beee64c937a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
@@ -288,7 +288,7 @@ static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev)
 
 	kref_init(&ldev->ref);
 	mutex_init(&ldev->lock);
-	xa_init(&ldev->pfs);
+	xa_init_flags(&ldev->pfs, XA_FLAGS_ALLOC);
 	INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
 	INIT_WORK(&ldev->speed_update_work, mlx5_mpesw_speed_update_work);
 
@@ -326,14 +326,42 @@ int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
 	return -ENOENT;
 }
 
+static int mlx5_lag_get_master_idx(struct mlx5_lag *ldev)
+{
+	unsigned long idx = 0;
+	void *entry;
+
+	if (!ldev)
+		return -ENOENT;
+
+	entry = xa_find(&ldev->pfs, &idx, U8_MAX, MLX5_LAG_XA_MARK_MASTER);
+	if (!entry)
+		return -ENOENT;
+
+	return (int)idx;
+}
+
 int mlx5_lag_get_dev_index_by_seq(struct mlx5_lag *ldev, int seq)
 {
-	int i, num = 0;
+	int master_idx, i, num = 0;
 
 	if (!ldev)
 		return -ENOENT;
 
+	master_idx = mlx5_lag_get_master_idx(ldev);
+
+	/* If seq 0 is requested and there's a primary PF, return it */
+	if (master_idx >= 0) {
+		if (seq == 0)
+			return master_idx;
+		num++;
+	}
+
 	mlx5_ldev_for_each(i, 0, ldev) {
+		/* Skip the primary PF in the loop */
+		if (i == master_idx)
+			continue;
+
 		if (num == seq)
 			return i;
 		num++;
@@ -341,6 +369,75 @@ int mlx5_lag_get_dev_index_by_seq(struct mlx5_lag *ldev, int seq)
 	return -ENOENT;
 }
 
+/* Devcom events for LAG master marking */
+#define LAG_DEVCOM_PAIR		(0)
+#define LAG_DEVCOM_UNPAIR	(1)
+
+static void mlx5_lag_mark_master(struct mlx5_lag *ldev)
+{
+	int lowest_dev_idx = INT_MAX;
+	struct lag_func *pf;
+	int master_xa_idx = -1;
+	int dev_idx;
+	int i;
+
+	mlx5_ldev_for_each(i, 0, ldev) {
+		pf = mlx5_lag_pf(ldev, i);
+		dev_idx = mlx5_get_dev_index(pf->dev);
+		if (dev_idx < lowest_dev_idx) {
+			lowest_dev_idx = dev_idx;
+			master_xa_idx = i;
+		}
+	}
+
+	if (master_xa_idx >= 0)
+		xa_set_mark(&ldev->pfs, master_xa_idx, MLX5_LAG_XA_MARK_MASTER);
+}
+
+static void mlx5_lag_clear_master(struct mlx5_lag *ldev)
+{
+	unsigned long idx = 0;
+	void *entry;
+
+	entry = xa_find(&ldev->pfs, &idx, U8_MAX, MLX5_LAG_XA_MARK_MASTER);
+	if (!entry)
+		return;
+
+	xa_clear_mark(&ldev->pfs, idx, MLX5_LAG_XA_MARK_MASTER);
+}
+
+/* Devcom event handler to manage LAG master marking */
+static int mlx5_lag_devcom_event(int event, void *my_data, void *event_data)
+{
+	struct mlx5_core_dev *dev = my_data;
+	struct mlx5_lag *ldev;
+	int idx;
+
+	ldev = mlx5_lag_dev(dev);
+	if (!ldev)
+		return 0;
+
+	mutex_lock(&ldev->lock);
+	switch (event) {
+	case LAG_DEVCOM_PAIR:
+		/* No need to mark more than once */
+		idx = mlx5_lag_get_master_idx(ldev);
+		if (idx >= 0)
+			break;
+		/* Check if all LAG ports are now registered */
+		if (mlx5_lag_num_devs(ldev) == ldev->ports)
+			mlx5_lag_mark_master(ldev);
+		break;
+
+	case LAG_DEVCOM_UNPAIR:
+		/* Clear master mark when a device is removed */
+		mlx5_lag_clear_master(ldev);
+		break;
+	}
+	mutex_unlock(&ldev->lock);
+	return 0;
+}
+
 int mlx5_lag_num_devs(struct mlx5_lag *ldev)
 {
 	int i, num = 0;
@@ -411,11 +508,12 @@ static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
 
 	/* Use native mapping by default where each port's buckets
 	 * point the native port: 1 1 1 .. 1 2 2 2 ... 2 3 3 3 ... 3 etc
+	 * ports[] values are 1-indexed device indices for FW.
 	 */
 	mlx5_ldev_for_each(i, 0, ldev) {
 		for (j = 0; j < buckets; j++) {
 			idx = i * buckets + j;
-			ports[idx] = i + 1;
+			ports[idx] = mlx5_lag_xa_to_dev_idx(ldev, i) + 1;
 		}
 	}
 
@@ -427,8 +525,12 @@ static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
 	/* Go over the disabled ports and for each assign a random active port */
 	for (i = 0; i < disabled_ports_num; i++) {
 		for (j = 0; j < buckets; j++) {
+			int rand_xa_idx;
+
 			get_random_bytes(&rand, 4);
-			ports[disabled[i] * buckets + j] = enabled[rand % enabled_ports_num] + 1;
+			rand_xa_idx = enabled[rand % enabled_ports_num];
+			ports[disabled[i] * buckets + j] =
+				mlx5_lag_xa_to_dev_idx(ldev, rand_xa_idx) + 1;
 		}
 	}
 }
@@ -683,20 +785,23 @@ char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
 
 static int mlx5_lag_create_single_fdb(struct mlx5_lag *ldev)
 {
-	int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
+	int master_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
 	struct mlx5_eswitch *master_esw;
 	struct mlx5_core_dev *dev0;
 	int i, j;
 	int err;
 
-	if (first_idx < 0)
+	if (master_idx < 0)
 		return -EINVAL;
 
-	dev0 = mlx5_lag_pf(ldev, first_idx)->dev;
+	dev0 = mlx5_lag_pf(ldev, master_idx)->dev;
 	master_esw = dev0->priv.eswitch;
-	mlx5_ldev_for_each(i, first_idx + 1, ldev) {
+	mlx5_ldev_for_each(i, 0, ldev) {
 		struct mlx5_eswitch *slave_esw;
 
+		if (i == master_idx)
+			continue;
+
 		slave_esw = mlx5_lag_pf(ldev, i)->dev->priv.eswitch;
 
 		err = mlx5_eswitch_offloads_single_fdb_add_one(master_esw,
@@ -706,9 +811,12 @@ static int mlx5_lag_create_single_fdb(struct mlx5_lag *ldev)
 	}
 	return 0;
 err:
-	mlx5_ldev_for_each_reverse(j, i, first_idx + 1, ldev)
+	mlx5_ldev_for_each_reverse(j, i, 0, ldev) {
+		if (j == master_idx)
+			continue;
 		mlx5_eswitch_offloads_single_fdb_del_one(master_esw,
 							 mlx5_lag_pf(ldev, j)->dev->priv.eswitch);
+	}
 	return err;
 }
 
@@ -717,8 +825,8 @@ static int mlx5_create_lag(struct mlx5_lag *ldev,
 			   enum mlx5_lag_mode mode,
 			   unsigned long flags)
 {
-	bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
 	int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
+	bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
 	u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
 	struct mlx5_core_dev *dev0;
 	int err;
@@ -764,16 +872,17 @@ int mlx5_activate_lag(struct mlx5_lag *ldev,
 		      enum mlx5_lag_mode mode,
 		      bool shared_fdb)
 {
-	int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
 	bool roce_lag = mode == MLX5_LAG_MODE_ROCE;
 	struct mlx5_core_dev *dev0;
 	unsigned long flags = 0;
+	int master_idx;
 	int err;
 
-	if (first_idx < 0)
+	master_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
+	if (master_idx < 0)
 		return -EINVAL;
 
-	dev0 = mlx5_lag_pf(ldev, first_idx)->dev;
+	dev0 = mlx5_lag_pf(ldev, master_idx)->dev;
 	err = mlx5_lag_set_flags(ldev, mode, tracker, shared_fdb, &flags);
 	if (err)
 		return err;
@@ -817,7 +926,7 @@ int mlx5_activate_lag(struct mlx5_lag *ldev,
 
 int mlx5_deactivate_lag(struct mlx5_lag *ldev)
 {
-	int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
+	int master_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
 	u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
 	bool roce_lag = __mlx5_lag_is_roce(ldev);
 	unsigned long flags = ldev->mode_flags;
@@ -826,19 +935,22 @@ int mlx5_deactivate_lag(struct mlx5_lag *ldev)
 	int err;
 	int i;
 
-	if (first_idx < 0)
+	if (master_idx < 0)
 		return -EINVAL;
 
-	dev0 = mlx5_lag_pf(ldev, first_idx)->dev;
+	dev0 = mlx5_lag_pf(ldev, master_idx)->dev;
 	master_esw = dev0->priv.eswitch;
 	ldev->mode = MLX5_LAG_MODE_NONE;
 	ldev->mode_flags = 0;
 	mlx5_lag_mp_reset(ldev);
 
 	if (test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags)) {
-		mlx5_ldev_for_each(i, first_idx + 1, ldev)
+		mlx5_ldev_for_each(i, 0, ldev) {
+			if (i == master_idx)
+				continue;
 			mlx5_eswitch_offloads_single_fdb_del_one(master_esw,
 								 mlx5_lag_pf(ldev, i)->dev->priv.eswitch);
+		}
 		clear_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
 	}
 
@@ -868,7 +980,7 @@ int mlx5_deactivate_lag(struct mlx5_lag *ldev)
 
 bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
 {
-	int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
+	int master_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
 #ifdef CONFIG_MLX5_ESWITCH
 	struct mlx5_core_dev *dev;
 	u8 mode;
@@ -877,7 +989,7 @@ bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
 	bool roce_support;
 	int i;
 
-	if (first_idx < 0 || mlx5_lag_num_devs(ldev) != ldev->ports)
+	if (master_idx < 0 || mlx5_lag_num_devs(ldev) != ldev->ports)
 		return false;
 
 #ifdef CONFIG_MLX5_ESWITCH
@@ -888,7 +1000,7 @@ bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
 			return false;
 	}
 
-	pf = mlx5_lag_pf(ldev, first_idx);
+	pf = mlx5_lag_pf(ldev, master_idx);
 	dev = pf->dev;
 	mode = mlx5_eswitch_mode(dev);
 	mlx5_ldev_for_each(i, 0, ldev) {
@@ -904,9 +1016,11 @@ bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
 			return false;
 	}
 #endif
-	pf = mlx5_lag_pf(ldev, first_idx);
+	pf = mlx5_lag_pf(ldev, master_idx);
 	roce_support = mlx5_get_roce_state(pf->dev);
-	mlx5_ldev_for_each(i, first_idx + 1, ldev) {
+	mlx5_ldev_for_each(i, 0, ldev) {
+		if (i == master_idx)
+			continue;
 		pf = mlx5_lag_pf(ldev, i);
 		if (mlx5_get_roce_state(pf->dev) != roce_support)
 			return false;
@@ -967,8 +1081,11 @@ void mlx5_disable_lag(struct mlx5_lag *ldev)
 			dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
 			mlx5_rescan_drivers_locked(dev0);
 		}
-		mlx5_ldev_for_each(i, idx + 1, ldev)
+		mlx5_ldev_for_each(i, 0, ldev) {
+			if (i == idx)
+				continue;
 			mlx5_nic_vport_disable_roce(mlx5_lag_pf(ldev, i)->dev);
+		}
 	}
 
 	err = mlx5_deactivate_lag(ldev);
@@ -986,14 +1103,18 @@ void mlx5_disable_lag(struct mlx5_lag *ldev)
 
 bool mlx5_lag_shared_fdb_supported(struct mlx5_lag *ldev)
 {
-	int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
 	struct mlx5_core_dev *dev;
+	bool ret = false;
+	int idx;
 	int i;
 
+	idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
 	if (idx < 0)
 		return false;
 
-	mlx5_ldev_for_each(i, idx + 1, ldev) {
+	mlx5_ldev_for_each(i, 0, ldev) {
+		if (i == idx)
+			continue;
 		dev = mlx5_lag_pf(ldev, i)->dev;
 		if (is_mdev_switchdev_mode(dev) &&
 		    mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) &&
@@ -1011,9 +1132,9 @@ bool mlx5_lag_shared_fdb_supported(struct mlx5_lag *ldev)
 	    mlx5_esw_offloads_devcom_is_ready(dev->priv.eswitch) &&
 	    MLX5_CAP_ESW(dev, esw_shared_ingress_acl) &&
 	    mlx5_eswitch_get_npeers(dev->priv.eswitch) == MLX5_CAP_GEN(dev, num_lag_ports) - 1)
-		return true;
+		ret = true;
 
-	return false;
+	return ret;
 }
 
 static bool mlx5_lag_is_roce_lag(struct mlx5_lag *ldev)
@@ -1239,12 +1360,16 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
 			}
 
 			return;
-		} else if (roce_lag) {
+		}
+
+		if (roce_lag) {
 			struct mlx5_core_dev *dev;
 
 			dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
 			mlx5_rescan_drivers_locked(dev0);
-			mlx5_ldev_for_each(i, idx + 1, ldev) {
+			mlx5_ldev_for_each(i, 0, ldev) {
+				if (i == idx)
+					continue;
 				dev = mlx5_lag_pf(ldev, i)->dev;
 				if (mlx5_get_roce_state(dev))
 					mlx5_nic_vport_enable_roce(dev);
@@ -1598,15 +1723,21 @@ static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev,
 				struct mlx5_core_dev *dev,
 				struct net_device *netdev)
 {
-	unsigned int fn = mlx5_get_dev_index(dev);
 	struct lag_func *pf;
 	unsigned long flags;
+	int i;
 
 	spin_lock_irqsave(&lag_lock, flags);
-	pf = mlx5_lag_pf(ldev, fn);
-	pf->netdev = netdev;
-	ldev->tracker.netdev_state[fn].link_up = 0;
-	ldev->tracker.netdev_state[fn].tx_enabled = 0;
+	/* Find pf entry by matching dev pointer */
+	mlx5_ldev_for_each(i, 0, ldev) {
+		pf = mlx5_lag_pf(ldev, i);
+		if (pf->dev == dev) {
+			pf->netdev = netdev;
+			ldev->tracker.netdev_state[i].link_up = 0;
+			ldev->tracker.netdev_state[i].tx_enabled = 0;
+			break;
+		}
+	}
 	spin_unlock_irqrestore(&lag_lock, flags);
 }
 
@@ -1631,23 +1762,22 @@ static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev,
 static int mlx5_ldev_add_mdev(struct mlx5_lag *ldev,
 			      struct mlx5_core_dev *dev)
 {
-	unsigned int fn = mlx5_get_dev_index(dev);
 	struct lag_func *pf;
+	u32 idx;
 	int err;
 
-	pf = xa_load(&ldev->pfs, fn);
-	if (!pf) {
-		pf = kzalloc_obj(*pf);
-		if (!pf)
-			return -ENOMEM;
+	pf = kzalloc_obj(*pf);
+	if (!pf)
+		return -ENOMEM;
 
-		err = xa_err(xa_store(&ldev->pfs, fn, pf, GFP_KERNEL));
-		if (err) {
-			kfree(pf);
-			return err;
-		}
+	err = xa_alloc(&ldev->pfs, &idx, pf, XA_LIMIT(0, MLX5_MAX_PORTS - 1),
+		       GFP_KERNEL);
+	if (err) {
+		kfree(pf);
+		return err;
 	}
 
+	pf->idx = idx;
 	pf->dev = dev;
 	dev->priv.lag = ldev;
 
@@ -1662,11 +1792,14 @@ static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
 				  struct mlx5_core_dev *dev)
 {
 	struct lag_func *pf;
-	int fn;
+	int i;
 
-	fn = mlx5_get_dev_index(dev);
-	pf = xa_load(&ldev->pfs, fn);
-	if (!pf || pf->dev != dev)
+	mlx5_ldev_for_each(i, 0, ldev) {
+		pf = mlx5_lag_pf(ldev, i);
+		if (pf->dev == dev)
+			break;
+	}
+	if (i >= MLX5_MAX_PORTS)
 		return;
 
 	if (pf->port_change_nb.nb.notifier_call)
@@ -1674,7 +1807,7 @@ static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
 
 	pf->dev = NULL;
 	dev->priv.lag = NULL;
-	xa_erase(&ldev->pfs, fn);
+	xa_erase(&ldev->pfs, pf->idx);
 	kfree(pf);
 }
 
@@ -1744,7 +1877,8 @@ static int mlx5_lag_register_hca_devcom_comp(struct mlx5_core_dev *dev)
 	dev->priv.hca_devcom_comp =
 		mlx5_devcom_register_component(dev->priv.devc,
 					       MLX5_DEVCOM_HCA_PORTS,
-					       &attr, NULL, dev);
+					       &attr, mlx5_lag_devcom_event,
+					       dev);
 	if (!dev->priv.hca_devcom_comp) {
 		mlx5_core_err(dev,
 			      "Failed to register devcom HCA component.");
@@ -1775,6 +1909,9 @@ void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
 	}
 	mlx5_ldev_remove_mdev(ldev, dev);
 	mutex_unlock(&ldev->lock);
+	/* Send devcom event to notify peers that a device is being removed */
+	mlx5_devcom_send_event(dev->priv.hca_devcom_comp,
+			       LAG_DEVCOM_UNPAIR, LAG_DEVCOM_UNPAIR, dev);
 	mlx5_lag_unregister_hca_devcom_comp(dev);
 	mlx5_ldev_put(ldev);
 }
@@ -1798,6 +1935,9 @@ void mlx5_lag_add_mdev(struct mlx5_core_dev *dev)
 		msleep(100);
 		goto recheck;
 	}
+	/* Send devcom event to notify peers that a device was added */
+	mlx5_devcom_send_event(dev->priv.hca_devcom_comp,
+			       LAG_DEVCOM_PAIR, LAG_DEVCOM_UNPAIR, dev);
 	mlx5_ldev_add_debugfs(dev);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
index 09758871b3da..30cbd61768f8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
@@ -7,6 +7,12 @@
 #include <linux/debugfs.h>
 
 #define MLX5_LAG_MAX_HASH_BUCKETS 16
+/* XArray mark for the LAG master device
+ * (device with lowest mlx5_get_dev_index).
+ * Note: XA_MARK_0 is reserved by XA_FLAGS_ALLOC for free-slot tracking.
+ */
+#define MLX5_LAG_XA_MARK_MASTER XA_MARK_1
+
 #include "mlx5_core.h"
 #include "mp.h"
 #include "port_sel.h"
@@ -39,6 +45,7 @@ struct lag_func {
 	struct mlx5_core_dev *dev;
 	struct net_device    *netdev;
 	bool has_drop;
+	unsigned int idx; /* xarray index assigned by LAG */
 	struct mlx5_nb port_change_nb;
 };
 
@@ -90,6 +97,28 @@ mlx5_lag_pf(struct mlx5_lag *ldev, unsigned int idx)
 	return xa_load(&ldev->pfs, idx);
 }
 
+/* Get device index (mlx5_get_dev_index) from xarray index */
+static inline int mlx5_lag_xa_to_dev_idx(struct mlx5_lag *ldev, int xa_idx)
+{
+	struct lag_func *pf = mlx5_lag_pf(ldev, xa_idx);
+
+	return pf ? mlx5_get_dev_index(pf->dev) : -ENOENT;
+}
+
+/* Find lag_func by device index (reverse lookup from mlx5_get_dev_index) */
+static inline struct lag_func *
+mlx5_lag_pf_by_dev_idx(struct mlx5_lag *ldev, int dev_idx)
+{
+	struct lag_func *pf;
+	unsigned long idx;
+
+	xa_for_each(&ldev->pfs, idx, pf) {
+		if (mlx5_get_dev_index(pf->dev) == dev_idx)
+			return pf;
+	}
+	return NULL;
+}
+
 static inline bool
 __mlx5_lag_is_active(struct mlx5_lag *ldev)
 {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
index 0e7d206cd594..5eea12a6887a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
@@ -67,9 +67,9 @@ static int mlx5_mpesw_metadata_set(struct mlx5_lag *ldev)
 
 static int mlx5_lag_enable_mpesw(struct mlx5_lag *ldev)
 {
+	int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
 	struct mlx5_core_dev *dev0;
 	int err;
-	int idx;
 	int i;
 
 	if (ldev->mode == MLX5_LAG_MODE_MPESW)
@@ -78,7 +78,6 @@ static int mlx5_lag_enable_mpesw(struct mlx5_lag *ldev)
 	if (ldev->mode != MLX5_LAG_MODE_NONE)
 		return -EINVAL;
 
-	idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
 	if (idx < 0)
 		return -EINVAL;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
index 7e9e3e81977d..2a034b2a3eee 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
@@ -84,8 +84,11 @@ static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev,
 			idx = i * ldev->buckets + j;
 			affinity = ports[idx];
 
+			/* affinity is 1-indexed device index,
+			 * use reverse lookup.
+			 */
 			dest.vport.vhca_id =
-				MLX5_CAP_GEN(mlx5_lag_pf(ldev, affinity - 1)->dev,
+				MLX5_CAP_GEN(mlx5_lag_pf_by_dev_idx(ldev, affinity - 1)->dev,
 					     vhca_id);
 			lag_definer->rules[idx] = mlx5_add_flow_rules(lag_definer->ft,
 								      NULL, &flow_act,
@@ -358,7 +361,7 @@ static void mlx5_lag_destroy_definer(struct mlx5_lag *ldev,
 		return;
 
 	dev = mlx5_lag_pf(ldev, first_idx)->dev;
-	mlx5_ldev_for_each(i, first_idx, ldev) {
+	mlx5_ldev_for_each(i, 0, ldev) {
 		for (j = 0; j < ldev->buckets; j++) {
 			idx = i * ldev->buckets + j;
 			mlx5_del_flow_rules(lag_definer->rules[idx]);
@@ -595,8 +598,11 @@ static int __mlx5_lag_modify_definers_destinations(struct mlx5_lag *ldev,
 			if (ldev->v2p_map[idx] == ports[idx])
 				continue;
 
+			/* ports[] contains 1-indexed device indices,
+			 * use reverse lookup.
+			 */
 			dest.vport.vhca_id =
-				MLX5_CAP_GEN(mlx5_lag_pf(ldev, ports[idx] - 1)->dev,
+				MLX5_CAP_GEN(mlx5_lag_pf_by_dev_idx(ldev, ports[idx] - 1)->dev,
 					     vhca_id);
 			err = mlx5_modify_rule_destination(def->rules[idx], &dest, NULL);
 			if (err)
-- 
2.44.0


  parent reply	other threads:[~2026-03-08  6:57 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-08  6:55 [PATCH mlx5-next 0/8] mlx5-next updates 2026-03-08 Tariq Toukan
2026-03-08  6:55 ` [PATCH mlx5-next 1/8] net/mlx5: Add IFC bits for shared headroom pool PBMC support Tariq Toukan
2026-03-08  6:55 ` [PATCH mlx5-next 2/8] net/mlx5: Add silent mode set/query and VHCA RX IFC bits Tariq Toukan
2026-03-08  6:55 ` [PATCH mlx5-next 3/8] net/mlx5: LAG, replace pf array with xarray Tariq Toukan
2026-03-08  6:55 ` Tariq Toukan [this message]
2026-03-08  6:55 ` [PATCH mlx5-next 5/8] net/mlx5: E-switch, modify peer miss rule index to vhca_id Tariq Toukan
2026-03-08  6:55 ` [PATCH mlx5-next 6/8] net/mlx5: LAG, replace mlx5_get_dev_index with LAG sequence number Tariq Toukan
2026-03-08  6:55 ` [PATCH mlx5-next 7/8] net/mlx5: Add VHCA RX flow destination support for FW steering Tariq Toukan
2026-03-08  6:55 ` [PATCH mlx5-next 8/8] {net/RDMA}/mlx5: Add LAG demux table API and vport demux rules Tariq Toukan
2026-03-08 15:52   ` Jakub Kicinski
2026-03-08 18:34     ` Mark Bloch
2026-03-09 21:33       ` Jakub Kicinski
2026-03-10  6:05         ` Mark Bloch
2026-03-10 23:58           ` Jakub Kicinski

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260308065559.1837449-5-tariqt@nvidia.com \
    --to=tariqt@nvidia.com \
    --cc=alazar@nvidia.com \
    --cc=andrew+netdev@lunn.ch \
    --cc=davem@davemloft.net \
    --cc=dtatulea@nvidia.com \
    --cc=edumazet@google.com \
    --cc=gal@nvidia.com \
    --cc=jgg@ziepe.ca \
    --cc=kuba@kernel.org \
    --cc=leon@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-rdma@vger.kernel.org \
    --cc=mbloch@nvidia.com \
    --cc=moshe@nvidia.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=saeedm@nvidia.com \
    --cc=shayd@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.