All of lore.kernel.org
 help / color / mirror / Atom feed
From: Saeed Mahameed <saeed@kernel.org>
To: "David S. Miller" <davem@davemloft.net>,
	Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
	Eric Dumazet <edumazet@google.com>
Cc: Saeed Mahameed <saeedm@nvidia.com>,
	netdev@vger.kernel.org, Tariq Toukan <tariqt@nvidia.com>,
	Leon Romanovsky <leonro@nvidia.com>,
	linux-rdma@vger.kernel.org, Shay Drory <shayd@nvidia.com>,
	Mark Bloch <mbloch@nvidia.com>
Subject: [net-next V2 02/14] {net/RDMA}/mlx5: introduce lag_for_each_peer
Date: Wed,  7 Jun 2023 14:03:58 -0700	[thread overview]
Message-ID: <20230607210410.88209-3-saeed@kernel.org> (raw)
In-Reply-To: <20230607210410.88209-1-saeed@kernel.org>

From: Shay Drory <shayd@nvidia.com>

Introduce a generic APIs to iterate over all the devices which are part
of the LAG. This API replace mlx5_lag_get_peer_mdev() which retrieve
only a single peer device from the lag.

Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/infiniband/hw/mlx5/ib_rep.c           | 98 ++++++++++++-------
 .../net/ethernet/mellanox/mlx5/core/fs_cmd.c  | 24 +++--
 .../net/ethernet/mellanox/mlx5/core/lag/lag.c | 21 +++-
 include/linux/mlx5/driver.h                   |  8 +-
 4 files changed, 100 insertions(+), 51 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c
index a4db22fe1883..c7a4ee896121 100644
--- a/drivers/infiniband/hw/mlx5/ib_rep.c
+++ b/drivers/infiniband/hw/mlx5/ib_rep.c
@@ -30,45 +30,65 @@ mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev,
 
 static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev);
 
+static void mlx5_ib_num_ports_update(struct mlx5_core_dev *dev, u32 *num_ports)
+{
+	struct mlx5_core_dev *peer_dev;
+	int i;
+
+	mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
+		u32 peer_num_ports = mlx5_eswitch_get_total_vports(peer_dev);
+
+		if (mlx5_lag_is_mpesw(peer_dev))
+			*num_ports += peer_num_ports;
+		else
+			/* Only 1 ib port is the representor for all uplinks */
+			*num_ports += peer_num_ports - 1;
+	}
+}
+
 static int
 mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 {
 	u32 num_ports = mlx5_eswitch_get_total_vports(dev);
+	struct mlx5_core_dev *lag_master = dev;
 	const struct mlx5_ib_profile *profile;
 	struct mlx5_core_dev *peer_dev;
 	struct mlx5_ib_dev *ibdev;
-	int second_uplink = false;
-	u32 peer_num_ports;
+	int new_uplink = false;
 	int vport_index;
 	int ret;
+	int i;
 
 	vport_index = rep->vport_index;
 
 	if (mlx5_lag_is_shared_fdb(dev)) {
-		peer_dev = mlx5_lag_get_peer_mdev(dev);
-		peer_num_ports = mlx5_eswitch_get_total_vports(peer_dev);
 		if (mlx5_lag_is_master(dev)) {
-			if (mlx5_lag_is_mpesw(dev))
-				num_ports += peer_num_ports;
-			else
-				num_ports += peer_num_ports - 1;
-
+			mlx5_ib_num_ports_update(dev, &num_ports);
 		} else {
 			if (rep->vport == MLX5_VPORT_UPLINK) {
 				if (!mlx5_lag_is_mpesw(dev))
 					return 0;
-				second_uplink = true;
+				new_uplink = true;
 			}
+			mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
+				u32 peer_n_ports = mlx5_eswitch_get_total_vports(peer_dev);
+
+				if (mlx5_lag_is_master(peer_dev))
+					lag_master = peer_dev;
+				else if (!mlx5_lag_is_mpesw(dev))
+				/* Only 1 ib port is the representor for all uplinks */
+					peer_n_ports--;
 
-			vport_index += peer_num_ports;
-			dev = peer_dev;
+				if (mlx5_get_dev_index(peer_dev) < mlx5_get_dev_index(dev))
+					vport_index += peer_n_ports;
+			}
 		}
 	}
 
-	if (rep->vport == MLX5_VPORT_UPLINK && !second_uplink)
+	if (rep->vport == MLX5_VPORT_UPLINK && !new_uplink)
 		profile = &raw_eth_profile;
 	else
-		return mlx5_ib_set_vport_rep(dev, rep, vport_index);
+		return mlx5_ib_set_vport_rep(lag_master, rep, vport_index);
 
 	ibdev = ib_alloc_device(mlx5_ib_dev, ib_dev);
 	if (!ibdev)
@@ -85,8 +105,8 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 	vport_index = rep->vport_index;
 	ibdev->port[vport_index].rep = rep;
 	ibdev->port[vport_index].roce.netdev =
-		mlx5_ib_get_rep_netdev(dev->priv.eswitch, rep->vport);
-	ibdev->mdev = dev;
+		mlx5_ib_get_rep_netdev(lag_master->priv.eswitch, rep->vport);
+	ibdev->mdev = lag_master;
 	ibdev->num_ports = num_ports;
 
 	ret = __mlx5_ib_add(ibdev, profile);
@@ -94,8 +114,8 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 		goto fail_add;
 
 	rep->rep_data[REP_IB].priv = ibdev;
-	if (mlx5_lag_is_shared_fdb(dev))
-		mlx5_ib_register_peer_vport_reps(dev);
+	if (mlx5_lag_is_shared_fdb(lag_master))
+		mlx5_ib_register_peer_vport_reps(lag_master);
 
 	return 0;
 
@@ -118,23 +138,27 @@ mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
 	struct mlx5_ib_dev *dev = mlx5_ib_rep_to_dev(rep);
 	int vport_index = rep->vport_index;
 	struct mlx5_ib_port *port;
+	int i;
 
 	if (WARN_ON(!mdev))
 		return;
 
+	if (!dev)
+		return;
+
 	if (mlx5_lag_is_shared_fdb(mdev) &&
 	    !mlx5_lag_is_master(mdev)) {
-		struct mlx5_core_dev *peer_mdev;
-
 		if (rep->vport == MLX5_VPORT_UPLINK && !mlx5_lag_is_mpesw(mdev))
 			return;
-		peer_mdev = mlx5_lag_get_peer_mdev(mdev);
-		vport_index += mlx5_eswitch_get_total_vports(peer_mdev);
+		for (i = 0; i < dev->num_ports; i++) {
+			if (dev->port[i].rep == rep)
+				break;
+		}
+		if (WARN_ON(i == dev->num_ports))
+			return;
+		vport_index = i;
 	}
 
-	if (!dev)
-		return;
-
 	port = &dev->port[vport_index];
 	write_lock(&port->roce.netdev_lock);
 	port->roce.netdev = NULL;
@@ -143,16 +167,18 @@ mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
 	port->rep = NULL;
 
 	if (rep->vport == MLX5_VPORT_UPLINK) {
-		struct mlx5_core_dev *peer_mdev;
-		struct mlx5_eswitch *esw;
 
 		if (mlx5_lag_is_shared_fdb(mdev) && !mlx5_lag_is_master(mdev))
 			return;
 
 		if (mlx5_lag_is_shared_fdb(mdev)) {
-			peer_mdev = mlx5_lag_get_peer_mdev(mdev);
-			esw = peer_mdev->priv.eswitch;
-			mlx5_eswitch_unregister_vport_reps(esw, REP_IB);
+			struct mlx5_core_dev *peer_mdev;
+			struct mlx5_eswitch *esw;
+
+			mlx5_lag_for_each_peer_mdev(mdev, peer_mdev, i) {
+				esw = peer_mdev->priv.eswitch;
+				mlx5_eswitch_unregister_vport_reps(esw, REP_IB);
+			}
 		}
 		__mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
 	}
@@ -166,14 +192,14 @@ static const struct mlx5_eswitch_rep_ops rep_ops = {
 
 static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev)
 {
-	struct mlx5_core_dev *peer_mdev = mlx5_lag_get_peer_mdev(mdev);
+	struct mlx5_core_dev *peer_mdev;
 	struct mlx5_eswitch *esw;
+	int i;
 
-	if (!peer_mdev)
-		return;
-
-	esw = peer_mdev->priv.eswitch;
-	mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB);
+	mlx5_lag_for_each_peer_mdev(mdev, peer_mdev, i) {
+		esw = peer_mdev->priv.eswitch;
+		mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB);
+	}
 }
 
 struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 11374c3744c5..8a10ed4d8cbb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -244,16 +244,22 @@ static int mlx5_cmd_update_root_ft(struct mlx5_flow_root_namespace *ns,
 	    ft->type == FS_FT_FDB &&
 	    mlx5_lag_is_shared_fdb(dev) &&
 	    mlx5_lag_is_master(dev)) {
-		err = mlx5_cmd_set_slave_root_fdb(dev,
-						  mlx5_lag_get_peer_mdev(dev),
-						  !disconnect, (!disconnect) ?
-						  ft->id : 0);
-		if (err && !disconnect) {
-			MLX5_SET(set_flow_table_root_in, in, op_mod, 0);
-			MLX5_SET(set_flow_table_root_in, in, table_id,
-				 ns->root_ft->id);
-			mlx5_cmd_exec_in(dev, set_flow_table_root, in);
+		struct mlx5_core_dev *peer_dev;
+		int i;
+
+		mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
+			err = mlx5_cmd_set_slave_root_fdb(dev, peer_dev, !disconnect,
+							  (!disconnect) ? ft->id : 0);
+			if (err && !disconnect) {
+				MLX5_SET(set_flow_table_root_in, in, op_mod, 0);
+				MLX5_SET(set_flow_table_root_in, in, table_id,
+					 ns->root_ft->id);
+				mlx5_cmd_exec_in(dev, set_flow_table_root, in);
+			}
+			if (err)
+				break;
 		}
+
 	}
 
 	return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
index c820f7d266de..c55e36e0571d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
@@ -1519,26 +1519,37 @@ u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev)
 }
 EXPORT_SYMBOL(mlx5_lag_get_num_ports);
 
-struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev)
+struct mlx5_core_dev *mlx5_lag_get_next_peer_mdev(struct mlx5_core_dev *dev, int *i)
 {
 	struct mlx5_core_dev *peer_dev = NULL;
 	struct mlx5_lag *ldev;
 	unsigned long flags;
+	int idx;
 
 	spin_lock_irqsave(&lag_lock, flags);
 	ldev = mlx5_lag_dev(dev);
 	if (!ldev)
 		goto unlock;
 
-	peer_dev = ldev->pf[MLX5_LAG_P1].dev == dev ?
-			   ldev->pf[MLX5_LAG_P2].dev :
-			   ldev->pf[MLX5_LAG_P1].dev;
+	if (*i == ldev->ports)
+		goto unlock;
+	for (idx = *i; idx < ldev->ports; idx++)
+		if (ldev->pf[idx].dev != dev)
+			break;
+
+	if (idx == ldev->ports) {
+		*i = idx;
+		goto unlock;
+	}
+	*i = idx + 1;
+
+	peer_dev = ldev->pf[idx].dev;
 
 unlock:
 	spin_unlock_irqrestore(&lag_lock, flags);
 	return peer_dev;
 }
-EXPORT_SYMBOL(mlx5_lag_get_peer_mdev);
+EXPORT_SYMBOL(mlx5_lag_get_next_peer_mdev);
 
 int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
 				 u64 *values,
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 94d2be5848ae..9a744c48eec2 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1174,7 +1174,13 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
 				 u64 *values,
 				 int num_counters,
 				 size_t *offsets);
-struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev);
+struct mlx5_core_dev *mlx5_lag_get_next_peer_mdev(struct mlx5_core_dev *dev, int *i);
+
+#define mlx5_lag_for_each_peer_mdev(dev, peer, i)				\
+	for (i = 0, peer = mlx5_lag_get_next_peer_mdev(dev, &i);		\
+	     peer;								\
+	     peer = mlx5_lag_get_next_peer_mdev(dev, &i))
+
 u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev);
 struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev);
 void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up);
-- 
2.40.1


  parent reply	other threads:[~2023-06-07 21:05 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-06-07 21:03 [pull request][net-next V2 00/14] mlx5 updates 2023-06-06 Saeed Mahameed
2023-06-07 21:03 ` [net-next V2 01/14] RDMA/mlx5: Free second uplink ib port Saeed Mahameed
2023-06-09  2:40   ` patchwork-bot+netdevbpf
2023-06-07 21:03 ` Saeed Mahameed [this message]
2023-06-07 21:03 ` [net-next V2 03/14] net/mlx5: LAG, check if all eswitches are paired for shared FDB Saeed Mahameed
2023-06-07 21:04 ` [net-next V2 04/14] net/mlx5: LAG, generalize handling of " Saeed Mahameed
2023-06-07 21:04 ` [net-next V2 05/14] net/mlx5: LAG, change mlx5_shared_fdb_supported() to static Saeed Mahameed
2023-06-07 21:04 ` [net-next V2 06/14] net/mlx5: LAG, block multipath LAG in case ldev have more than 2 ports Saeed Mahameed
2023-06-07 21:04 ` [net-next V2 07/14] net/mlx5: LAG, block multiport eswitch " Saeed Mahameed
2023-06-07 21:04 ` [net-next V2 08/14] net/mlx5: Enable 4 ports VF LAG Saeed Mahameed
2023-06-07 21:04 ` [net-next V2 09/14] net/mlx5e: Expose catastrophic steering error counters Saeed Mahameed
2023-06-07 21:04 ` [net-next V2 10/14] net/mlx5e: Remove RX page cache leftovers Saeed Mahameed
2023-06-07 21:04 ` [net-next V2 11/14] net/mlx5e: TC, refactor access to hash key Saeed Mahameed
2023-06-07 21:04 ` [net-next V2 12/14] net/mlx5: Skip inline mode check after mlx5_eswitch_enable_locked() failure Saeed Mahameed
2023-06-07 21:04 ` [net-next V2 13/14] mlx5/core: E-Switch, Allocate ECPF vport if it's an eswitch manager Saeed Mahameed
2023-06-07 21:04 ` [net-next V2 14/14] net/mlx5e: simplify condition after napi budget handling change Saeed Mahameed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230607210410.88209-3-saeed@kernel.org \
    --to=saeed@kernel.org \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=kuba@kernel.org \
    --cc=leonro@nvidia.com \
    --cc=linux-rdma@vger.kernel.org \
    --cc=mbloch@nvidia.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=saeedm@nvidia.com \
    --cc=shayd@nvidia.com \
    --cc=tariqt@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.