From: Saeed Mahameed <saeed@kernel.org>
To: "David S. Miller" <davem@davemloft.net>,
Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
Eric Dumazet <edumazet@google.com>
Cc: Saeed Mahameed <saeedm@nvidia.com>,
netdev@vger.kernel.org, Tariq Toukan <tariqt@nvidia.com>,
Mark Bloch <mbloch@nvidia.com>, Roi Dayan <roid@nvidia.com>
Subject: [net-next 05/15] net/mlx5: Lag, Add single RDMA device in multiport mode
Date: Fri, 10 Feb 2023 14:18:11 -0800 [thread overview]
Message-ID: <20230210221821.271571-6-saeed@kernel.org> (raw)
In-Reply-To: <20230210221821.271571-1-saeed@kernel.org>
From: Mark Bloch <mbloch@nvidia.com>
In MultiPort E-Switch mode a single RDMA is created. This device has multiple
RDMA ports that represent the uplink ports that are connected to the E-Switch.
Account for this when creating the RDMA device so it has an addtional port for
the non native uplink.
As a side effect of this patch, use shared fdb in multiport eswitch mode.
Signed-off-by: Mark Bloch <mbloch@nvidia.com>
Reviewed-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
drivers/infiniband/hw/mlx5/ib_rep.c | 18 ++++++---
.../net/ethernet/mellanox/mlx5/core/lag/lag.c | 6 +--
.../net/ethernet/mellanox/mlx5/core/lag/lag.h | 3 ++
.../ethernet/mellanox/mlx5/core/lag/mpesw.c | 37 +++++++++++++++----
include/linux/mlx5/driver.h | 1 +
5 files changed, 49 insertions(+), 16 deletions(-)
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c
index 52821485371a..ddcfc116b19a 100644
--- a/drivers/infiniband/hw/mlx5/ib_rep.c
+++ b/drivers/infiniband/hw/mlx5/ib_rep.c
@@ -37,6 +37,7 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
const struct mlx5_ib_profile *profile;
struct mlx5_core_dev *peer_dev;
struct mlx5_ib_dev *ibdev;
+ int second_uplink = false;
u32 peer_num_ports;
int vport_index;
int ret;
@@ -47,17 +48,24 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
peer_dev = mlx5_lag_get_peer_mdev(dev);
peer_num_ports = mlx5_eswitch_get_total_vports(peer_dev);
if (mlx5_lag_is_master(dev)) {
- /* Only 1 ib port is the representor for both uplinks */
- num_ports += peer_num_ports - 1;
+ if (mlx5_lag_is_mpesw(dev))
+ num_ports += peer_num_ports;
+ else
+ num_ports += peer_num_ports - 1;
+
} else {
- if (rep->vport == MLX5_VPORT_UPLINK)
- return 0;
+ if (rep->vport == MLX5_VPORT_UPLINK) {
+ if (!mlx5_lag_is_mpesw(dev))
+ return 0;
+ second_uplink = true;
+ }
+
vport_index += peer_num_ports;
dev = peer_dev;
}
}
- if (rep->vport == MLX5_VPORT_UPLINK)
+ if (rep->vport == MLX5_VPORT_UPLINK && !second_uplink)
profile = &raw_eth_profile;
else
return mlx5_ib_set_vport_rep(dev, rep, vport_index);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
index 301994741b08..5d331b940f4d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
@@ -644,7 +644,7 @@ int mlx5_activate_lag(struct mlx5_lag *ldev,
return 0;
}
-static int mlx5_deactivate_lag(struct mlx5_lag *ldev)
+int mlx5_deactivate_lag(struct mlx5_lag *ldev)
{
struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
@@ -721,7 +721,7 @@ bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
return true;
}
-static void mlx5_lag_add_devices(struct mlx5_lag *ldev)
+void mlx5_lag_add_devices(struct mlx5_lag *ldev)
{
int i;
@@ -738,7 +738,7 @@ static void mlx5_lag_add_devices(struct mlx5_lag *ldev)
}
}
-static void mlx5_lag_remove_devices(struct mlx5_lag *ldev)
+void mlx5_lag_remove_devices(struct mlx5_lag *ldev)
{
int i;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
index 2dbd96a86ef8..bc1f1dd3e283 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
@@ -120,5 +120,8 @@ void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports,
void mlx5_ldev_add_debugfs(struct mlx5_core_dev *dev);
void mlx5_ldev_remove_debugfs(struct dentry *dbg);
void mlx5_disable_lag(struct mlx5_lag *ldev);
+void mlx5_lag_remove_devices(struct mlx5_lag *ldev);
+int mlx5_deactivate_lag(struct mlx5_lag *ldev);
+void mlx5_lag_add_devices(struct mlx5_lag *ldev);
#endif /* __MLX5_LAG_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
index 2f7f2af312d7..0c0ef600f643 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
@@ -67,15 +67,16 @@ static int mlx5_mpesw_metadata_set(struct mlx5_lag *ldev)
static int enable_mpesw(struct mlx5_lag *ldev)
{
- struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev;
+ struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+ struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
int err;
if (ldev->mode != MLX5_LAG_MODE_NONE)
return -EINVAL;
- if (mlx5_eswitch_mode(dev) != MLX5_ESWITCH_OFFLOADS ||
- !MLX5_CAP_PORT_SELECTION(dev, port_select_flow_table) ||
- !MLX5_CAP_GEN(dev, create_lag_when_not_master_up) ||
+ if (mlx5_eswitch_mode(dev0) != MLX5_ESWITCH_OFFLOADS ||
+ !MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table) ||
+ !MLX5_CAP_GEN(dev0, create_lag_when_not_master_up) ||
!mlx5_lag_check_prereq(ldev))
return -EOPNOTSUPP;
@@ -83,15 +84,32 @@ static int enable_mpesw(struct mlx5_lag *ldev)
if (err)
return err;
- err = mlx5_activate_lag(ldev, NULL, MLX5_LAG_MODE_MPESW, false);
+ mlx5_lag_remove_devices(ldev);
+
+ err = mlx5_activate_lag(ldev, NULL, MLX5_LAG_MODE_MPESW, true);
if (err) {
- mlx5_core_warn(dev, "Failed to create LAG in MPESW mode (%d)\n", err);
- goto out_err;
+ mlx5_core_warn(dev0, "Failed to create LAG in MPESW mode (%d)\n", err);
+ goto err_add_devices;
}
+ dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
+ mlx5_rescan_drivers_locked(dev0);
+ err = mlx5_eswitch_reload_reps(dev0->priv.eswitch);
+ if (!err)
+ err = mlx5_eswitch_reload_reps(dev1->priv.eswitch);
+ if (err)
+ goto err_rescan_drivers;
+
return 0;
-out_err:
+err_rescan_drivers:
+ dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
+ mlx5_rescan_drivers_locked(dev0);
+ mlx5_deactivate_lag(ldev);
+err_add_devices:
+ mlx5_lag_add_devices(ldev);
+ mlx5_eswitch_reload_reps(dev0->priv.eswitch);
+ mlx5_eswitch_reload_reps(dev1->priv.eswitch);
mlx5_mpesw_metadata_cleanup(ldev);
return err;
}
@@ -109,6 +127,7 @@ static void mlx5_mpesw_work(struct work_struct *work)
struct mlx5_mpesw_work_st *mpesww = container_of(work, struct mlx5_mpesw_work_st, work);
struct mlx5_lag *ldev = mpesww->lag;
+ mlx5_dev_list_lock();
mutex_lock(&ldev->lock);
if (ldev->mode_changes_in_progress) {
mpesww->result = -EAGAIN;
@@ -121,6 +140,7 @@ static void mlx5_mpesw_work(struct work_struct *work)
disable_mpesw(ldev);
unlock:
mutex_unlock(&ldev->lock);
+ mlx5_dev_list_unlock();
complete(&mpesww->comp);
}
@@ -187,3 +207,4 @@ bool mlx5_lag_is_mpesw(struct mlx5_core_dev *dev)
return ldev && ldev->mode == MLX5_LAG_MODE_MPESW;
}
+EXPORT_SYMBOL(mlx5_lag_is_mpesw);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index a4bb5842a948..c9259350cdfc 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1162,6 +1162,7 @@ bool mlx5_lag_is_active(struct mlx5_core_dev *dev);
bool mlx5_lag_mode_is_hash(struct mlx5_core_dev *dev);
bool mlx5_lag_is_master(struct mlx5_core_dev *dev);
bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev);
+bool mlx5_lag_is_mpesw(struct mlx5_core_dev *dev);
struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev);
u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
struct net_device *slave);
--
2.39.1
next prev parent reply other threads:[~2023-02-10 22:19 UTC|newest]
Thread overview: 29+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-02-10 22:18 [pull request][net-next 00/15] mlx5 updates 2023-02-10 Saeed Mahameed
2023-02-10 22:18 ` [net-next 01/15] net/mlx5: Lag, Let user configure multiport eswitch Saeed Mahameed
2023-02-11 4:03 ` Jakub Kicinski
2023-02-13 11:32 ` Jiri Pirko
2023-02-13 19:00 ` Mark Bloch
2023-02-14 2:02 ` Jakub Kicinski
2023-02-14 7:31 ` Mark Bloch
2023-02-14 20:40 ` Jakub Kicinski
2023-02-14 21:50 ` Saeed Mahameed
2023-02-14 13:38 ` Jiri Pirko
2023-02-14 17:07 ` Alexander Lobakin
2023-02-14 21:49 ` Saeed Mahameed
2023-02-15 11:46 ` Leon Romanovsky
2023-02-15 17:04 ` Alexander Lobakin
2023-02-15 19:41 ` Saeed Mahameed
2023-02-10 22:18 ` [net-next 02/15] net/mlx5e: TC, Add peer flow in mpesw mode Saeed Mahameed
2023-02-10 22:18 ` [net-next 03/15] net/mlx5: E-Switch, rename bond update function to be reused Saeed Mahameed
2023-02-10 22:18 ` [net-next 04/15] net/mlx5: Lag, set different uplink vport metadata in multiport eswitch mode Saeed Mahameed
2023-02-10 22:18 ` Saeed Mahameed [this message]
2023-02-10 22:18 ` [net-next 06/15] net/mlx5e: Use a simpler comparison for uplink rep Saeed Mahameed
2023-02-10 22:18 ` [net-next 07/15] net/mlx5e: TC, Remove redundant parse_attr argument Saeed Mahameed
2023-02-10 22:18 ` [net-next 08/15] net/mlx5: Remove outdated comment Saeed Mahameed
2023-02-10 22:18 ` [net-next 09/15] net/mlx5e: Pass mdev to mlx5e_devlink_port_register() Saeed Mahameed
2023-02-10 22:18 ` [net-next 10/15] net/mlx5e: Replace usage of mlx5e_devlink_get_dl_port() by netdev->devlink_port Saeed Mahameed
2023-02-10 22:18 ` [net-next 11/15] net/mlx5e: Move dl_port to struct mlx5e_dev Saeed Mahameed
2023-02-10 22:18 ` [net-next 12/15] net/mlx5e: Move devlink port registration to be done before netdev alloc Saeed Mahameed
2023-02-10 22:18 ` [net-next 13/15] net/mlx5e: Create auxdev devlink instance in the same ns as parent devlink Saeed Mahameed
2023-02-10 22:18 ` [net-next 14/15] net/mlx5: Remove "recovery" arg from mlx5_load_one() function Saeed Mahameed
2023-02-10 22:18 ` [net-next 15/15] net/mlx5: Suspend auxiliary devices only in case of PCI device suspend Saeed Mahameed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230210221821.271571-6-saeed@kernel.org \
--to=saeed@kernel.org \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=kuba@kernel.org \
--cc=mbloch@nvidia.com \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=roid@nvidia.com \
--cc=saeedm@nvidia.com \
--cc=tariqt@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.