netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Saeed Mahameed <saeed@kernel.org>
To: "David S. Miller" <davem@davemloft.net>,
	Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
	Eric Dumazet <edumazet@google.com>
Cc: Saeed Mahameed <saeedm@nvidia.com>,
	netdev@vger.kernel.org, Tariq Toukan <tariqt@nvidia.com>,
	Leon Romanovsky <leonro@nvidia.com>,
	linux-rdma@vger.kernel.org, Shay Drory <shayd@nvidia.com>,
	Mark Bloch <mbloch@nvidia.com>
Subject: [net-next 02/15] {net/RDMA}/mlx5: introduce lag_for_each_peer
Date: Tue,  6 Jun 2023 00:12:06 -0700	[thread overview]
Message-ID: <20230606071219.483255-3-saeed@kernel.org> (raw)
In-Reply-To: <20230606071219.483255-1-saeed@kernel.org>

From: Shay Drory <shayd@nvidia.com>

Introduce a generic APIs to iterate over all the devices which are part
of the LAG. This API replace mlx5_lag_get_peer_mdev() which retrieve
only a single peer device from the lag.

Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/infiniband/hw/mlx5/ib_rep.c           | 98 ++++++++++++-------
 .../net/ethernet/mellanox/mlx5/core/fs_cmd.c  | 24 +++--
 .../net/ethernet/mellanox/mlx5/core/lag/lag.c | 21 +++-
 include/linux/mlx5/driver.h                   |  8 +-
 4 files changed, 100 insertions(+), 51 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c
index a4db22fe1883..c7a4ee896121 100644
--- a/drivers/infiniband/hw/mlx5/ib_rep.c
+++ b/drivers/infiniband/hw/mlx5/ib_rep.c
@@ -30,45 +30,65 @@ mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev,
 
 static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev);
 
+static void mlx5_ib_num_ports_update(struct mlx5_core_dev *dev, u32 *num_ports)
+{
+	struct mlx5_core_dev *peer_dev;
+	int i;
+
+	mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
+		u32 peer_num_ports = mlx5_eswitch_get_total_vports(peer_dev);
+
+		if (mlx5_lag_is_mpesw(peer_dev))
+			*num_ports += peer_num_ports;
+		else
+			/* Only 1 ib port is the representor for all uplinks */
+			*num_ports += peer_num_ports - 1;
+	}
+}
+
 static int
 mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 {
 	u32 num_ports = mlx5_eswitch_get_total_vports(dev);
+	struct mlx5_core_dev *lag_master = dev;
 	const struct mlx5_ib_profile *profile;
 	struct mlx5_core_dev *peer_dev;
 	struct mlx5_ib_dev *ibdev;
-	int second_uplink = false;
-	u32 peer_num_ports;
+	int new_uplink = false;
 	int vport_index;
 	int ret;
+	int i;
 
 	vport_index = rep->vport_index;
 
 	if (mlx5_lag_is_shared_fdb(dev)) {
-		peer_dev = mlx5_lag_get_peer_mdev(dev);
-		peer_num_ports = mlx5_eswitch_get_total_vports(peer_dev);
 		if (mlx5_lag_is_master(dev)) {
-			if (mlx5_lag_is_mpesw(dev))
-				num_ports += peer_num_ports;
-			else
-				num_ports += peer_num_ports - 1;
-
+			mlx5_ib_num_ports_update(dev, &num_ports);
 		} else {
 			if (rep->vport == MLX5_VPORT_UPLINK) {
 				if (!mlx5_lag_is_mpesw(dev))
 					return 0;
-				second_uplink = true;
+				new_uplink = true;
 			}
+			mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
+				u32 peer_n_ports = mlx5_eswitch_get_total_vports(peer_dev);
+
+				if (mlx5_lag_is_master(peer_dev))
+					lag_master = peer_dev;
+				else if (!mlx5_lag_is_mpesw(dev))
+				/* Only 1 ib port is the representor for all uplinks */
+					peer_n_ports--;
 
-			vport_index += peer_num_ports;
-			dev = peer_dev;
+				if (mlx5_get_dev_index(peer_dev) < mlx5_get_dev_index(dev))
+					vport_index += peer_n_ports;
+			}
 		}
 	}
 
-	if (rep->vport == MLX5_VPORT_UPLINK && !second_uplink)
+	if (rep->vport == MLX5_VPORT_UPLINK && !new_uplink)
 		profile = &raw_eth_profile;
 	else
-		return mlx5_ib_set_vport_rep(dev, rep, vport_index);
+		return mlx5_ib_set_vport_rep(lag_master, rep, vport_index);
 
 	ibdev = ib_alloc_device(mlx5_ib_dev, ib_dev);
 	if (!ibdev)
@@ -85,8 +105,8 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 	vport_index = rep->vport_index;
 	ibdev->port[vport_index].rep = rep;
 	ibdev->port[vport_index].roce.netdev =
-		mlx5_ib_get_rep_netdev(dev->priv.eswitch, rep->vport);
-	ibdev->mdev = dev;
+		mlx5_ib_get_rep_netdev(lag_master->priv.eswitch, rep->vport);
+	ibdev->mdev = lag_master;
 	ibdev->num_ports = num_ports;
 
 	ret = __mlx5_ib_add(ibdev, profile);
@@ -94,8 +114,8 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 		goto fail_add;
 
 	rep->rep_data[REP_IB].priv = ibdev;
-	if (mlx5_lag_is_shared_fdb(dev))
-		mlx5_ib_register_peer_vport_reps(dev);
+	if (mlx5_lag_is_shared_fdb(lag_master))
+		mlx5_ib_register_peer_vport_reps(lag_master);
 
 	return 0;
 
@@ -118,23 +138,27 @@ mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
 	struct mlx5_ib_dev *dev = mlx5_ib_rep_to_dev(rep);
 	int vport_index = rep->vport_index;
 	struct mlx5_ib_port *port;
+	int i;
 
 	if (WARN_ON(!mdev))
 		return;
 
+	if (!dev)
+		return;
+
 	if (mlx5_lag_is_shared_fdb(mdev) &&
 	    !mlx5_lag_is_master(mdev)) {
-		struct mlx5_core_dev *peer_mdev;
-
 		if (rep->vport == MLX5_VPORT_UPLINK && !mlx5_lag_is_mpesw(mdev))
 			return;
-		peer_mdev = mlx5_lag_get_peer_mdev(mdev);
-		vport_index += mlx5_eswitch_get_total_vports(peer_mdev);
+		for (i = 0; i < dev->num_ports; i++) {
+			if (dev->port[i].rep == rep)
+				break;
+		}
+		if (WARN_ON(i == dev->num_ports))
+			return;
+		vport_index = i;
 	}
 
-	if (!dev)
-		return;
-
 	port = &dev->port[vport_index];
 	write_lock(&port->roce.netdev_lock);
 	port->roce.netdev = NULL;
@@ -143,16 +167,18 @@ mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
 	port->rep = NULL;
 
 	if (rep->vport == MLX5_VPORT_UPLINK) {
-		struct mlx5_core_dev *peer_mdev;
-		struct mlx5_eswitch *esw;
 
 		if (mlx5_lag_is_shared_fdb(mdev) && !mlx5_lag_is_master(mdev))
 			return;
 
 		if (mlx5_lag_is_shared_fdb(mdev)) {
-			peer_mdev = mlx5_lag_get_peer_mdev(mdev);
-			esw = peer_mdev->priv.eswitch;
-			mlx5_eswitch_unregister_vport_reps(esw, REP_IB);
+			struct mlx5_core_dev *peer_mdev;
+			struct mlx5_eswitch *esw;
+
+			mlx5_lag_for_each_peer_mdev(mdev, peer_mdev, i) {
+				esw = peer_mdev->priv.eswitch;
+				mlx5_eswitch_unregister_vport_reps(esw, REP_IB);
+			}
 		}
 		__mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
 	}
@@ -166,14 +192,14 @@ static const struct mlx5_eswitch_rep_ops rep_ops = {
 
 static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev)
 {
-	struct mlx5_core_dev *peer_mdev = mlx5_lag_get_peer_mdev(mdev);
+	struct mlx5_core_dev *peer_mdev;
 	struct mlx5_eswitch *esw;
+	int i;
 
-	if (!peer_mdev)
-		return;
-
-	esw = peer_mdev->priv.eswitch;
-	mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB);
+	mlx5_lag_for_each_peer_mdev(mdev, peer_mdev, i) {
+		esw = peer_mdev->priv.eswitch;
+		mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB);
+	}
 }
 
 struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 11374c3744c5..8a10ed4d8cbb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -244,16 +244,22 @@ static int mlx5_cmd_update_root_ft(struct mlx5_flow_root_namespace *ns,
 	    ft->type == FS_FT_FDB &&
 	    mlx5_lag_is_shared_fdb(dev) &&
 	    mlx5_lag_is_master(dev)) {
-		err = mlx5_cmd_set_slave_root_fdb(dev,
-						  mlx5_lag_get_peer_mdev(dev),
-						  !disconnect, (!disconnect) ?
-						  ft->id : 0);
-		if (err && !disconnect) {
-			MLX5_SET(set_flow_table_root_in, in, op_mod, 0);
-			MLX5_SET(set_flow_table_root_in, in, table_id,
-				 ns->root_ft->id);
-			mlx5_cmd_exec_in(dev, set_flow_table_root, in);
+		struct mlx5_core_dev *peer_dev;
+		int i;
+
+		mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
+			err = mlx5_cmd_set_slave_root_fdb(dev, peer_dev, !disconnect,
+							  (!disconnect) ? ft->id : 0);
+			if (err && !disconnect) {
+				MLX5_SET(set_flow_table_root_in, in, op_mod, 0);
+				MLX5_SET(set_flow_table_root_in, in, table_id,
+					 ns->root_ft->id);
+				mlx5_cmd_exec_in(dev, set_flow_table_root, in);
+			}
+			if (err)
+				break;
 		}
+
 	}
 
 	return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
index c820f7d266de..c55e36e0571d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
@@ -1519,26 +1519,37 @@ u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev)
 }
 EXPORT_SYMBOL(mlx5_lag_get_num_ports);
 
-struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev)
+struct mlx5_core_dev *mlx5_lag_get_next_peer_mdev(struct mlx5_core_dev *dev, int *i)
 {
 	struct mlx5_core_dev *peer_dev = NULL;
 	struct mlx5_lag *ldev;
 	unsigned long flags;
+	int idx;
 
 	spin_lock_irqsave(&lag_lock, flags);
 	ldev = mlx5_lag_dev(dev);
 	if (!ldev)
 		goto unlock;
 
-	peer_dev = ldev->pf[MLX5_LAG_P1].dev == dev ?
-			   ldev->pf[MLX5_LAG_P2].dev :
-			   ldev->pf[MLX5_LAG_P1].dev;
+	if (*i == ldev->ports)
+		goto unlock;
+	for (idx = *i; idx < ldev->ports; idx++)
+		if (ldev->pf[idx].dev != dev)
+			break;
+
+	if (idx == ldev->ports) {
+		*i = idx;
+		goto unlock;
+	}
+	*i = idx + 1;
+
+	peer_dev = ldev->pf[idx].dev;
 
 unlock:
 	spin_unlock_irqrestore(&lag_lock, flags);
 	return peer_dev;
 }
-EXPORT_SYMBOL(mlx5_lag_get_peer_mdev);
+EXPORT_SYMBOL(mlx5_lag_get_next_peer_mdev);
 
 int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
 				 u64 *values,
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 94d2be5848ae..9a744c48eec2 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1174,7 +1174,13 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
 				 u64 *values,
 				 int num_counters,
 				 size_t *offsets);
-struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev);
+struct mlx5_core_dev *mlx5_lag_get_next_peer_mdev(struct mlx5_core_dev *dev, int *i);
+
+#define mlx5_lag_for_each_peer_mdev(dev, peer, i)				\
+	for (i = 0, peer = mlx5_lag_get_next_peer_mdev(dev, &i);		\
+	     peer;								\
+	     peer = mlx5_lag_get_next_peer_mdev(dev, &i))
+
 u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev);
 struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev);
 void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up);
-- 
2.40.1


  parent reply	other threads:[~2023-06-06  7:12 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-06-06  7:12 [pull request][net-next 00/15] mlx5 updates 2023-06-06 Saeed Mahameed
2023-06-06  7:12 ` [net-next 01/15] RDMA/mlx5: Free second uplink ib port Saeed Mahameed
2023-06-06  7:12 ` Saeed Mahameed [this message]
2023-06-06  7:12 ` [net-next 03/15] net/mlx5: LAG, check if all eswitches are paired for shared FDB Saeed Mahameed
2023-06-06  7:12 ` [net-next 04/15] net/mlx5: LAG, generalize handling of " Saeed Mahameed
2023-06-06  7:12 ` [net-next 05/15] net/mlx5: LAG, change mlx5_shared_fdb_supported() to static Saeed Mahameed
2023-06-06  7:12 ` [net-next 06/15] net/mlx5: LAG, block multipath LAG in case ldev have more than 2 ports Saeed Mahameed
2023-06-06  7:12 ` [net-next 07/15] net/mlx5: LAG, block multiport eswitch " Saeed Mahameed
2023-06-06  7:12 ` [net-next 08/15] net/mlx5: Enable 4 ports VF LAG Saeed Mahameed
2023-06-06  7:12 ` [net-next 09/15] net/mlx5e: RX, Log error when page_pool size is too large Saeed Mahameed
2023-06-07 16:24   ` Alexander Lobakin
2023-06-07 16:40     ` Dragos Tatulea
2023-06-06  7:12 ` [net-next 10/15] net/mlx5e: Expose catastrophic steering error counters Saeed Mahameed
2023-06-06  7:12 ` [net-next 11/15] net/mlx5e: Remove RX page cache leftovers Saeed Mahameed
2023-06-06  7:12 ` [net-next 12/15] net/mlx5e: TC, refactor access to hash key Saeed Mahameed
2023-06-06  7:12 ` [net-next 13/15] net/mlx5: Skip inline mode check after mlx5_eswitch_enable_locked() failure Saeed Mahameed
2023-06-07  5:01   ` Jakub Kicinski
2023-06-07 14:10     ` Jason Gunthorpe
2023-06-07 16:19       ` Jakub Kicinski
2023-06-07 23:18         ` Jason Gunthorpe
2023-06-07 15:46     ` Jiri Pirko
2023-06-07 16:22       ` Jakub Kicinski
2023-06-06  7:12 ` [net-next 14/15] mlx5/core: E-Switch, Allocate ECPF vport if it's an eswitch manager Saeed Mahameed
2023-06-06  7:12 ` [net-next 15/15] net/mlx5e: simplify condition after napi budget handling change Saeed Mahameed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230606071219.483255-3-saeed@kernel.org \
    --to=saeed@kernel.org \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=kuba@kernel.org \
    --cc=leonro@nvidia.com \
    --cc=linux-rdma@vger.kernel.org \
    --cc=mbloch@nvidia.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=saeedm@nvidia.com \
    --cc=shayd@nvidia.com \
    --cc=tariqt@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).