public inbox for netdev@vger.kernel.org
 help / color / mirror / Atom feed
From: Tariq Toukan <tariqt@nvidia.com>
To: Leon Romanovsky <leon@kernel.org>, Jason Gunthorpe <jgg@ziepe.ca>,
	"Saeed Mahameed" <saeedm@nvidia.com>,
	Tariq Toukan <tariqt@nvidia.com>
Cc: Eric Dumazet <edumazet@google.com>,
	Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
	Andrew Lunn <andrew+netdev@lunn.ch>,
	"David S. Miller" <davem@davemloft.net>,
	Mark Bloch <mbloch@nvidia.com>, <linux-kernel@vger.kernel.org>,
	<linux-rdma@vger.kernel.org>, <netdev@vger.kernel.org>,
	Gal Pressman <gal@nvidia.com>,
	Dragos Tatulea <dtatulea@nvidia.com>,
	Moshe Shemesh <moshe@nvidia.com>, Shay Drory <shayd@nvidia.com>,
	Alexei Lazar <alazar@nvidia.com>
Subject: [PATCH mlx5-next 4/8] net/mlx5: LAG, use xa_alloc to manage LAG device indices
Date: Sun, 8 Mar 2026 08:55:55 +0200	[thread overview]
Message-ID: <20260308065559.1837449-5-tariqt@nvidia.com> (raw)
In-Reply-To: <20260308065559.1837449-1-tariqt@nvidia.com>

Replace the use of mlx5_get_dev_index() for xarray indexing with
xa_alloc() to dynamically allocate indices. This decouples the LAG
xarray index from the physical device index.

Update mlx5_ldev_add_netdev/remove_mdev to find entries by dev pointer
and replace mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1) calls with
mlx5_lag_get_master_idx() where appropriate.

No functional changes intended

Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/lag/lag.c | 242 ++++++++++++++----
 .../net/ethernet/mellanox/mlx5/core/lag/lag.h |  29 +++
 .../ethernet/mellanox/mlx5/core/lag/mpesw.c   |   3 +-
 .../mellanox/mlx5/core/lag/port_sel.c         |  12 +-
 4 files changed, 230 insertions(+), 56 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
index 81b1f84f902e..4beee64c937a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
@@ -288,7 +288,7 @@ static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev)
 
 	kref_init(&ldev->ref);
 	mutex_init(&ldev->lock);
-	xa_init(&ldev->pfs);
+	xa_init_flags(&ldev->pfs, XA_FLAGS_ALLOC);
 	INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
 	INIT_WORK(&ldev->speed_update_work, mlx5_mpesw_speed_update_work);
 
@@ -326,14 +326,42 @@ int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
 	return -ENOENT;
 }
 
+static int mlx5_lag_get_master_idx(struct mlx5_lag *ldev)
+{
+	unsigned long idx = 0;
+	void *entry;
+
+	if (!ldev)
+		return -ENOENT;
+
+	entry = xa_find(&ldev->pfs, &idx, U8_MAX, MLX5_LAG_XA_MARK_MASTER);
+	if (!entry)
+		return -ENOENT;
+
+	return (int)idx;
+}
+
 int mlx5_lag_get_dev_index_by_seq(struct mlx5_lag *ldev, int seq)
 {
-	int i, num = 0;
+	int master_idx, i, num = 0;
 
 	if (!ldev)
 		return -ENOENT;
 
+	master_idx = mlx5_lag_get_master_idx(ldev);
+
+	/* If seq 0 is requested and there's a primary PF, return it */
+	if (master_idx >= 0) {
+		if (seq == 0)
+			return master_idx;
+		num++;
+	}
+
 	mlx5_ldev_for_each(i, 0, ldev) {
+		/* Skip the primary PF in the loop */
+		if (i == master_idx)
+			continue;
+
 		if (num == seq)
 			return i;
 		num++;
@@ -341,6 +369,75 @@ int mlx5_lag_get_dev_index_by_seq(struct mlx5_lag *ldev, int seq)
 	return -ENOENT;
 }
 
+/* Devcom events for LAG master marking */
+#define LAG_DEVCOM_PAIR		(0)
+#define LAG_DEVCOM_UNPAIR	(1)
+
+static void mlx5_lag_mark_master(struct mlx5_lag *ldev)
+{
+	int lowest_dev_idx = INT_MAX;
+	struct lag_func *pf;
+	int master_xa_idx = -1;
+	int dev_idx;
+	int i;
+
+	mlx5_ldev_for_each(i, 0, ldev) {
+		pf = mlx5_lag_pf(ldev, i);
+		dev_idx = mlx5_get_dev_index(pf->dev);
+		if (dev_idx < lowest_dev_idx) {
+			lowest_dev_idx = dev_idx;
+			master_xa_idx = i;
+		}
+	}
+
+	if (master_xa_idx >= 0)
+		xa_set_mark(&ldev->pfs, master_xa_idx, MLX5_LAG_XA_MARK_MASTER);
+}
+
+static void mlx5_lag_clear_master(struct mlx5_lag *ldev)
+{
+	unsigned long idx = 0;
+	void *entry;
+
+	entry = xa_find(&ldev->pfs, &idx, U8_MAX, MLX5_LAG_XA_MARK_MASTER);
+	if (!entry)
+		return;
+
+	xa_clear_mark(&ldev->pfs, idx, MLX5_LAG_XA_MARK_MASTER);
+}
+
+/* Devcom event handler to manage LAG master marking */
+static int mlx5_lag_devcom_event(int event, void *my_data, void *event_data)
+{
+	struct mlx5_core_dev *dev = my_data;
+	struct mlx5_lag *ldev;
+	int idx;
+
+	ldev = mlx5_lag_dev(dev);
+	if (!ldev)
+		return 0;
+
+	mutex_lock(&ldev->lock);
+	switch (event) {
+	case LAG_DEVCOM_PAIR:
+		/* No need to mark more than once */
+		idx = mlx5_lag_get_master_idx(ldev);
+		if (idx >= 0)
+			break;
+		/* Check if all LAG ports are now registered */
+		if (mlx5_lag_num_devs(ldev) == ldev->ports)
+			mlx5_lag_mark_master(ldev);
+		break;
+
+	case LAG_DEVCOM_UNPAIR:
+		/* Clear master mark when a device is removed */
+		mlx5_lag_clear_master(ldev);
+		break;
+	}
+	mutex_unlock(&ldev->lock);
+	return 0;
+}
+
 int mlx5_lag_num_devs(struct mlx5_lag *ldev)
 {
 	int i, num = 0;
@@ -411,11 +508,12 @@ static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
 
 	/* Use native mapping by default where each port's buckets
 	 * point the native port: 1 1 1 .. 1 2 2 2 ... 2 3 3 3 ... 3 etc
+	 * ports[] values are 1-indexed device indices for FW.
 	 */
 	mlx5_ldev_for_each(i, 0, ldev) {
 		for (j = 0; j < buckets; j++) {
 			idx = i * buckets + j;
-			ports[idx] = i + 1;
+			ports[idx] = mlx5_lag_xa_to_dev_idx(ldev, i) + 1;
 		}
 	}
 
@@ -427,8 +525,12 @@ static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
 	/* Go over the disabled ports and for each assign a random active port */
 	for (i = 0; i < disabled_ports_num; i++) {
 		for (j = 0; j < buckets; j++) {
+			int rand_xa_idx;
+
 			get_random_bytes(&rand, 4);
-			ports[disabled[i] * buckets + j] = enabled[rand % enabled_ports_num] + 1;
+			rand_xa_idx = enabled[rand % enabled_ports_num];
+			ports[disabled[i] * buckets + j] =
+				mlx5_lag_xa_to_dev_idx(ldev, rand_xa_idx) + 1;
 		}
 	}
 }
@@ -683,20 +785,23 @@ char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
 
 static int mlx5_lag_create_single_fdb(struct mlx5_lag *ldev)
 {
-	int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
+	int master_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
 	struct mlx5_eswitch *master_esw;
 	struct mlx5_core_dev *dev0;
 	int i, j;
 	int err;
 
-	if (first_idx < 0)
+	if (master_idx < 0)
 		return -EINVAL;
 
-	dev0 = mlx5_lag_pf(ldev, first_idx)->dev;
+	dev0 = mlx5_lag_pf(ldev, master_idx)->dev;
 	master_esw = dev0->priv.eswitch;
-	mlx5_ldev_for_each(i, first_idx + 1, ldev) {
+	mlx5_ldev_for_each(i, 0, ldev) {
 		struct mlx5_eswitch *slave_esw;
 
+		if (i == master_idx)
+			continue;
+
 		slave_esw = mlx5_lag_pf(ldev, i)->dev->priv.eswitch;
 
 		err = mlx5_eswitch_offloads_single_fdb_add_one(master_esw,
@@ -706,9 +811,12 @@ static int mlx5_lag_create_single_fdb(struct mlx5_lag *ldev)
 	}
 	return 0;
 err:
-	mlx5_ldev_for_each_reverse(j, i, first_idx + 1, ldev)
+	mlx5_ldev_for_each_reverse(j, i, 0, ldev) {
+		if (j == master_idx)
+			continue;
 		mlx5_eswitch_offloads_single_fdb_del_one(master_esw,
 							 mlx5_lag_pf(ldev, j)->dev->priv.eswitch);
+	}
 	return err;
 }
 
@@ -717,8 +825,8 @@ static int mlx5_create_lag(struct mlx5_lag *ldev,
 			   enum mlx5_lag_mode mode,
 			   unsigned long flags)
 {
-	bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
 	int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
+	bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
 	u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
 	struct mlx5_core_dev *dev0;
 	int err;
@@ -764,16 +872,17 @@ int mlx5_activate_lag(struct mlx5_lag *ldev,
 		      enum mlx5_lag_mode mode,
 		      bool shared_fdb)
 {
-	int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
 	bool roce_lag = mode == MLX5_LAG_MODE_ROCE;
 	struct mlx5_core_dev *dev0;
 	unsigned long flags = 0;
+	int master_idx;
 	int err;
 
-	if (first_idx < 0)
+	master_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
+	if (master_idx < 0)
 		return -EINVAL;
 
-	dev0 = mlx5_lag_pf(ldev, first_idx)->dev;
+	dev0 = mlx5_lag_pf(ldev, master_idx)->dev;
 	err = mlx5_lag_set_flags(ldev, mode, tracker, shared_fdb, &flags);
 	if (err)
 		return err;
@@ -817,7 +926,7 @@ int mlx5_activate_lag(struct mlx5_lag *ldev,
 
 int mlx5_deactivate_lag(struct mlx5_lag *ldev)
 {
-	int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
+	int master_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
 	u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
 	bool roce_lag = __mlx5_lag_is_roce(ldev);
 	unsigned long flags = ldev->mode_flags;
@@ -826,19 +935,22 @@ int mlx5_deactivate_lag(struct mlx5_lag *ldev)
 	int err;
 	int i;
 
-	if (first_idx < 0)
+	if (master_idx < 0)
 		return -EINVAL;
 
-	dev0 = mlx5_lag_pf(ldev, first_idx)->dev;
+	dev0 = mlx5_lag_pf(ldev, master_idx)->dev;
 	master_esw = dev0->priv.eswitch;
 	ldev->mode = MLX5_LAG_MODE_NONE;
 	ldev->mode_flags = 0;
 	mlx5_lag_mp_reset(ldev);
 
 	if (test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags)) {
-		mlx5_ldev_for_each(i, first_idx + 1, ldev)
+		mlx5_ldev_for_each(i, 0, ldev) {
+			if (i == master_idx)
+				continue;
 			mlx5_eswitch_offloads_single_fdb_del_one(master_esw,
 								 mlx5_lag_pf(ldev, i)->dev->priv.eswitch);
+		}
 		clear_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
 	}
 
@@ -868,7 +980,7 @@ int mlx5_deactivate_lag(struct mlx5_lag *ldev)
 
 bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
 {
-	int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
+	int master_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
 #ifdef CONFIG_MLX5_ESWITCH
 	struct mlx5_core_dev *dev;
 	u8 mode;
@@ -877,7 +989,7 @@ bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
 	bool roce_support;
 	int i;
 
-	if (first_idx < 0 || mlx5_lag_num_devs(ldev) != ldev->ports)
+	if (master_idx < 0 || mlx5_lag_num_devs(ldev) != ldev->ports)
 		return false;
 
 #ifdef CONFIG_MLX5_ESWITCH
@@ -888,7 +1000,7 @@ bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
 			return false;
 	}
 
-	pf = mlx5_lag_pf(ldev, first_idx);
+	pf = mlx5_lag_pf(ldev, master_idx);
 	dev = pf->dev;
 	mode = mlx5_eswitch_mode(dev);
 	mlx5_ldev_for_each(i, 0, ldev) {
@@ -904,9 +1016,11 @@ bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
 			return false;
 	}
 #endif
-	pf = mlx5_lag_pf(ldev, first_idx);
+	pf = mlx5_lag_pf(ldev, master_idx);
 	roce_support = mlx5_get_roce_state(pf->dev);
-	mlx5_ldev_for_each(i, first_idx + 1, ldev) {
+	mlx5_ldev_for_each(i, 0, ldev) {
+		if (i == master_idx)
+			continue;
 		pf = mlx5_lag_pf(ldev, i);
 		if (mlx5_get_roce_state(pf->dev) != roce_support)
 			return false;
@@ -967,8 +1081,11 @@ void mlx5_disable_lag(struct mlx5_lag *ldev)
 			dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
 			mlx5_rescan_drivers_locked(dev0);
 		}
-		mlx5_ldev_for_each(i, idx + 1, ldev)
+		mlx5_ldev_for_each(i, 0, ldev) {
+			if (i == idx)
+				continue;
 			mlx5_nic_vport_disable_roce(mlx5_lag_pf(ldev, i)->dev);
+		}
 	}
 
 	err = mlx5_deactivate_lag(ldev);
@@ -986,14 +1103,18 @@ void mlx5_disable_lag(struct mlx5_lag *ldev)
 
 bool mlx5_lag_shared_fdb_supported(struct mlx5_lag *ldev)
 {
-	int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
 	struct mlx5_core_dev *dev;
+	bool ret = false;
+	int idx;
 	int i;
 
+	idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
 	if (idx < 0)
 		return false;
 
-	mlx5_ldev_for_each(i, idx + 1, ldev) {
+	mlx5_ldev_for_each(i, 0, ldev) {
+		if (i == idx)
+			continue;
 		dev = mlx5_lag_pf(ldev, i)->dev;
 		if (is_mdev_switchdev_mode(dev) &&
 		    mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) &&
@@ -1011,9 +1132,9 @@ bool mlx5_lag_shared_fdb_supported(struct mlx5_lag *ldev)
 	    mlx5_esw_offloads_devcom_is_ready(dev->priv.eswitch) &&
 	    MLX5_CAP_ESW(dev, esw_shared_ingress_acl) &&
 	    mlx5_eswitch_get_npeers(dev->priv.eswitch) == MLX5_CAP_GEN(dev, num_lag_ports) - 1)
-		return true;
+		ret = true;
 
-	return false;
+	return ret;
 }
 
 static bool mlx5_lag_is_roce_lag(struct mlx5_lag *ldev)
@@ -1239,12 +1360,16 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
 			}
 
 			return;
-		} else if (roce_lag) {
+		}
+
+		if (roce_lag) {
 			struct mlx5_core_dev *dev;
 
 			dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
 			mlx5_rescan_drivers_locked(dev0);
-			mlx5_ldev_for_each(i, idx + 1, ldev) {
+			mlx5_ldev_for_each(i, 0, ldev) {
+				if (i == idx)
+					continue;
 				dev = mlx5_lag_pf(ldev, i)->dev;
 				if (mlx5_get_roce_state(dev))
 					mlx5_nic_vport_enable_roce(dev);
@@ -1598,15 +1723,21 @@ static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev,
 				struct mlx5_core_dev *dev,
 				struct net_device *netdev)
 {
-	unsigned int fn = mlx5_get_dev_index(dev);
 	struct lag_func *pf;
 	unsigned long flags;
+	int i;
 
 	spin_lock_irqsave(&lag_lock, flags);
-	pf = mlx5_lag_pf(ldev, fn);
-	pf->netdev = netdev;
-	ldev->tracker.netdev_state[fn].link_up = 0;
-	ldev->tracker.netdev_state[fn].tx_enabled = 0;
+	/* Find pf entry by matching dev pointer */
+	mlx5_ldev_for_each(i, 0, ldev) {
+		pf = mlx5_lag_pf(ldev, i);
+		if (pf->dev == dev) {
+			pf->netdev = netdev;
+			ldev->tracker.netdev_state[i].link_up = 0;
+			ldev->tracker.netdev_state[i].tx_enabled = 0;
+			break;
+		}
+	}
 	spin_unlock_irqrestore(&lag_lock, flags);
 }
 
@@ -1631,23 +1762,22 @@ static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev,
 static int mlx5_ldev_add_mdev(struct mlx5_lag *ldev,
 			      struct mlx5_core_dev *dev)
 {
-	unsigned int fn = mlx5_get_dev_index(dev);
 	struct lag_func *pf;
+	u32 idx;
 	int err;
 
-	pf = xa_load(&ldev->pfs, fn);
-	if (!pf) {
-		pf = kzalloc_obj(*pf);
-		if (!pf)
-			return -ENOMEM;
+	pf = kzalloc_obj(*pf);
+	if (!pf)
+		return -ENOMEM;
 
-		err = xa_err(xa_store(&ldev->pfs, fn, pf, GFP_KERNEL));
-		if (err) {
-			kfree(pf);
-			return err;
-		}
+	err = xa_alloc(&ldev->pfs, &idx, pf, XA_LIMIT(0, MLX5_MAX_PORTS - 1),
+		       GFP_KERNEL);
+	if (err) {
+		kfree(pf);
+		return err;
 	}
 
+	pf->idx = idx;
 	pf->dev = dev;
 	dev->priv.lag = ldev;
 
@@ -1662,11 +1792,14 @@ static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
 				  struct mlx5_core_dev *dev)
 {
 	struct lag_func *pf;
-	int fn;
+	int i;
 
-	fn = mlx5_get_dev_index(dev);
-	pf = xa_load(&ldev->pfs, fn);
-	if (!pf || pf->dev != dev)
+	mlx5_ldev_for_each(i, 0, ldev) {
+		pf = mlx5_lag_pf(ldev, i);
+		if (pf->dev == dev)
+			break;
+	}
+	if (i >= MLX5_MAX_PORTS)
 		return;
 
 	if (pf->port_change_nb.nb.notifier_call)
@@ -1674,7 +1807,7 @@ static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
 
 	pf->dev = NULL;
 	dev->priv.lag = NULL;
-	xa_erase(&ldev->pfs, fn);
+	xa_erase(&ldev->pfs, pf->idx);
 	kfree(pf);
 }
 
@@ -1744,7 +1877,8 @@ static int mlx5_lag_register_hca_devcom_comp(struct mlx5_core_dev *dev)
 	dev->priv.hca_devcom_comp =
 		mlx5_devcom_register_component(dev->priv.devc,
 					       MLX5_DEVCOM_HCA_PORTS,
-					       &attr, NULL, dev);
+					       &attr, mlx5_lag_devcom_event,
+					       dev);
 	if (!dev->priv.hca_devcom_comp) {
 		mlx5_core_err(dev,
 			      "Failed to register devcom HCA component.");
@@ -1775,6 +1909,9 @@ void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
 	}
 	mlx5_ldev_remove_mdev(ldev, dev);
 	mutex_unlock(&ldev->lock);
+	/* Send devcom event to notify peers that a device is being removed */
+	mlx5_devcom_send_event(dev->priv.hca_devcom_comp,
+			       LAG_DEVCOM_UNPAIR, LAG_DEVCOM_UNPAIR, dev);
 	mlx5_lag_unregister_hca_devcom_comp(dev);
 	mlx5_ldev_put(ldev);
 }
@@ -1798,6 +1935,9 @@ void mlx5_lag_add_mdev(struct mlx5_core_dev *dev)
 		msleep(100);
 		goto recheck;
 	}
+	/* Send devcom event to notify peers that a device was added */
+	mlx5_devcom_send_event(dev->priv.hca_devcom_comp,
+			       LAG_DEVCOM_PAIR, LAG_DEVCOM_UNPAIR, dev);
 	mlx5_ldev_add_debugfs(dev);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
index 09758871b3da..30cbd61768f8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
@@ -7,6 +7,12 @@
 #include <linux/debugfs.h>
 
 #define MLX5_LAG_MAX_HASH_BUCKETS 16
+/* XArray mark for the LAG master device
+ * (device with lowest mlx5_get_dev_index).
+ * Note: XA_MARK_0 is reserved by XA_FLAGS_ALLOC for free-slot tracking.
+ */
+#define MLX5_LAG_XA_MARK_MASTER XA_MARK_1
+
 #include "mlx5_core.h"
 #include "mp.h"
 #include "port_sel.h"
@@ -39,6 +45,7 @@ struct lag_func {
 	struct mlx5_core_dev *dev;
 	struct net_device    *netdev;
 	bool has_drop;
+	unsigned int idx; /* xarray index assigned by LAG */
 	struct mlx5_nb port_change_nb;
 };
 
@@ -90,6 +97,28 @@ mlx5_lag_pf(struct mlx5_lag *ldev, unsigned int idx)
 	return xa_load(&ldev->pfs, idx);
 }
 
+/* Get device index (mlx5_get_dev_index) from xarray index */
+static inline int mlx5_lag_xa_to_dev_idx(struct mlx5_lag *ldev, int xa_idx)
+{
+	struct lag_func *pf = mlx5_lag_pf(ldev, xa_idx);
+
+	return pf ? mlx5_get_dev_index(pf->dev) : -ENOENT;
+}
+
+/* Find lag_func by device index (reverse lookup from mlx5_get_dev_index) */
+static inline struct lag_func *
+mlx5_lag_pf_by_dev_idx(struct mlx5_lag *ldev, int dev_idx)
+{
+	struct lag_func *pf;
+	unsigned long idx;
+
+	xa_for_each(&ldev->pfs, idx, pf) {
+		if (mlx5_get_dev_index(pf->dev) == dev_idx)
+			return pf;
+	}
+	return NULL;
+}
+
 static inline bool
 __mlx5_lag_is_active(struct mlx5_lag *ldev)
 {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
index 0e7d206cd594..5eea12a6887a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
@@ -67,9 +67,9 @@ static int mlx5_mpesw_metadata_set(struct mlx5_lag *ldev)
 
 static int mlx5_lag_enable_mpesw(struct mlx5_lag *ldev)
 {
+	int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
 	struct mlx5_core_dev *dev0;
 	int err;
-	int idx;
 	int i;
 
 	if (ldev->mode == MLX5_LAG_MODE_MPESW)
@@ -78,7 +78,6 @@ static int mlx5_lag_enable_mpesw(struct mlx5_lag *ldev)
 	if (ldev->mode != MLX5_LAG_MODE_NONE)
 		return -EINVAL;
 
-	idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
 	if (idx < 0)
 		return -EINVAL;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
index 7e9e3e81977d..2a034b2a3eee 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
@@ -84,8 +84,11 @@ static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev,
 			idx = i * ldev->buckets + j;
 			affinity = ports[idx];
 
+			/* affinity is 1-indexed device index,
+			 * use reverse lookup.
+			 */
 			dest.vport.vhca_id =
-				MLX5_CAP_GEN(mlx5_lag_pf(ldev, affinity - 1)->dev,
+				MLX5_CAP_GEN(mlx5_lag_pf_by_dev_idx(ldev, affinity - 1)->dev,
 					     vhca_id);
 			lag_definer->rules[idx] = mlx5_add_flow_rules(lag_definer->ft,
 								      NULL, &flow_act,
@@ -358,7 +361,7 @@ static void mlx5_lag_destroy_definer(struct mlx5_lag *ldev,
 		return;
 
 	dev = mlx5_lag_pf(ldev, first_idx)->dev;
-	mlx5_ldev_for_each(i, first_idx, ldev) {
+	mlx5_ldev_for_each(i, 0, ldev) {
 		for (j = 0; j < ldev->buckets; j++) {
 			idx = i * ldev->buckets + j;
 			mlx5_del_flow_rules(lag_definer->rules[idx]);
@@ -595,8 +598,11 @@ static int __mlx5_lag_modify_definers_destinations(struct mlx5_lag *ldev,
 			if (ldev->v2p_map[idx] == ports[idx])
 				continue;
 
+			/* ports[] contains 1-indexed device indices,
+			 * use reverse lookup.
+			 */
 			dest.vport.vhca_id =
-				MLX5_CAP_GEN(mlx5_lag_pf(ldev, ports[idx] - 1)->dev,
+				MLX5_CAP_GEN(mlx5_lag_pf_by_dev_idx(ldev, ports[idx] - 1)->dev,
 					     vhca_id);
 			err = mlx5_modify_rule_destination(def->rules[idx], &dest, NULL);
 			if (err)
-- 
2.44.0


  parent reply	other threads:[~2026-03-08  6:57 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-08  6:55 [PATCH mlx5-next 0/8] mlx5-next updates 2026-03-08 Tariq Toukan
2026-03-08  6:55 ` [PATCH mlx5-next 1/8] net/mlx5: Add IFC bits for shared headroom pool PBMC support Tariq Toukan
2026-03-08  6:55 ` [PATCH mlx5-next 2/8] net/mlx5: Add silent mode set/query and VHCA RX IFC bits Tariq Toukan
2026-03-08  6:55 ` [PATCH mlx5-next 3/8] net/mlx5: LAG, replace pf array with xarray Tariq Toukan
2026-03-08  6:55 ` Tariq Toukan [this message]
2026-03-08  6:55 ` [PATCH mlx5-next 5/8] net/mlx5: E-switch, modify peer miss rule index to vhca_id Tariq Toukan
2026-03-08  6:55 ` [PATCH mlx5-next 6/8] net/mlx5: LAG, replace mlx5_get_dev_index with LAG sequence number Tariq Toukan
2026-03-08  6:55 ` [PATCH mlx5-next 7/8] net/mlx5: Add VHCA RX flow destination support for FW steering Tariq Toukan
2026-03-08  6:55 ` [PATCH mlx5-next 8/8] {net/RDMA}/mlx5: Add LAG demux table API and vport demux rules Tariq Toukan
2026-03-08 15:52   ` Jakub Kicinski
2026-03-08 18:34     ` Mark Bloch
2026-03-09 21:33       ` Jakub Kicinski
2026-03-10  6:05         ` Mark Bloch
2026-03-10 23:58           ` Jakub Kicinski

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260308065559.1837449-5-tariqt@nvidia.com \
    --to=tariqt@nvidia.com \
    --cc=alazar@nvidia.com \
    --cc=andrew+netdev@lunn.ch \
    --cc=davem@davemloft.net \
    --cc=dtatulea@nvidia.com \
    --cc=edumazet@google.com \
    --cc=gal@nvidia.com \
    --cc=jgg@ziepe.ca \
    --cc=kuba@kernel.org \
    --cc=leon@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-rdma@vger.kernel.org \
    --cc=mbloch@nvidia.com \
    --cc=moshe@nvidia.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=saeedm@nvidia.com \
    --cc=shayd@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox