Netdev List
 help / color / mirror / Atom feed
From: Tariq Toukan <tariqt@nvidia.com>
To: Eric Dumazet <edumazet@google.com>,
	Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
	Andrew Lunn <andrew+netdev@lunn.ch>,
	"David S. Miller" <davem@davemloft.net>
Cc: Saeed Mahameed <saeedm@nvidia.com>,
	Leon Romanovsky <leon@kernel.org>,
	Tariq Toukan <tariqt@nvidia.com>, Mark Bloch <mbloch@nvidia.com>,
	Nimrod Oren <noren@nvidia.com>, Yael Chemla <ychemla@nvidia.com>,
	Shay Drory <shayd@nvidia.com>, Or Har-Toov <ohartoov@nvidia.com>,
	Edward Srouji <edwards@nvidia.com>,
	Simon Horman <horms@kernel.org>,
	Maher Sanalla <msanalla@nvidia.com>,
	Parav Pandit <parav@nvidia.com>, Kees Cook <kees@kernel.org>,
	Patrisious Haddad <phaddad@nvidia.com>,
	Moshe Shemesh <moshe@nvidia.com>, <linux-kernel@vger.kernel.org>,
	<netdev@vger.kernel.org>, <linux-rdma@vger.kernel.org>,
	Gal Pressman <gal@nvidia.com>,
	Jacob Keller <jacob.e.keller@intel.com>
Subject: [PATCH net-next V2 07/13] net/mlx5: SD, introduce Socket Direct LAG
Date: Sun, 31 May 2026 14:39:47 +0300	[thread overview]
Message-ID: <20260531113954.395443-8-tariqt@nvidia.com> (raw)
In-Reply-To: <20260531113954.395443-1-tariqt@nvidia.com>

From: Shay Drory <shayd@nvidia.com>

Register SD secondary devices with the existing LAG structure by
adding them to the primary's ldev xarray with a shared group_id.
This ties the SD LAG lifecycle to the SD group lifecycle.

Add sd_lag_state debugfs entry for LAG state visibility. To avoid
race between this entry and LAG deletion, have debugfs creation
and deletion done last on SD init and first on SD cleanup.

Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/lib/sd.c  | 135 ++++++++++++++++--
 1 file changed, 121 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
index bbd77ae11e84..e341d814873a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
@@ -2,6 +2,7 @@
 /* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
 
 #include "lib/sd.h"
+#include "../lag/lag.h"
 #include "mlx5_core.h"
 #include "lib/mlx5.h"
 #include "fs_cmd.h"
@@ -223,6 +224,108 @@ static void sd_cleanup(struct mlx5_core_dev *dev)
 	kfree(sd);
 }
 
+static int sd_lag_state_show(struct seq_file *file, void *priv)
+{
+	struct mlx5_core_dev *dev = file->private;
+	struct mlx5_lag *ldev;
+	struct lag_func *pf;
+	bool active = false;
+	int i;
+
+	ldev = mlx5_lag_dev(dev);
+	if (!ldev)
+		return -EINVAL;
+
+	mutex_lock(&ldev->lock);
+	mlx5_ldev_for_each(i, 0, ldev) {
+		pf = mlx5_lag_pf(ldev, i);
+		if (pf->dev == dev) {
+			active = pf->sd_fdb_active;
+			break;
+		}
+	}
+	mutex_unlock(&ldev->lock);
+
+	seq_printf(file, "%s\n", active ? "active" : "disabled");
+	return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(sd_lag_state);
+
+/* SD LAG integration is optional. If LAG isn't available on this device
+ * (e.g. lag caps are off), or registering secondaries fails, just warn
+ * and continue - SD can operate without the LAG-side bookkeeping.
+ */
+static void sd_lag_init(struct mlx5_core_dev *dev)
+{
+	struct mlx5_core_dev *primary = mlx5_sd_get_primary(dev);
+	struct mlx5_sd *sd = mlx5_get_sd(primary);
+	struct mlx5_core_dev *pos, *to;
+	struct mlx5_lag *ldev;
+	struct lag_func *pf;
+	int err;
+	int i;
+
+	ldev = mlx5_lag_dev(primary);
+	if (!ldev) {
+		sd_warn(primary, "%s: no ldev (LAG caps off?), skipping\n",
+			__func__);
+		return;
+	}
+
+	mutex_lock(&ldev->lock);
+	pf = mlx5_lag_pf_by_dev(ldev, primary);
+	if (!pf) {
+		sd_warn(primary, "%s: primary not registered in ldev, skipping\n",
+			__func__);
+		goto out;
+	}
+
+	pf->group_id = sd->group_id;
+
+	mlx5_sd_for_each_secondary(i, primary, pos) {
+		err = mlx5_ldev_add_mdev(ldev, pos, sd->group_id);
+		if (err) {
+			sd_warn(primary, "%s: failed to add secondary %s to ldev: %d\n",
+				__func__, dev_name(pos->device), err);
+			goto err;
+		}
+	}
+
+out:
+	mutex_unlock(&ldev->lock);
+	return;
+
+err:
+	to = pos;
+	mlx5_sd_for_each_secondary_to(i, primary, to, pos)
+		mlx5_ldev_remove_mdev(ldev, pos);
+	pf->group_id = 0;
+	mutex_unlock(&ldev->lock);
+}
+
+static void sd_lag_cleanup(struct mlx5_core_dev *dev)
+{
+	struct mlx5_core_dev *primary = mlx5_sd_get_primary(dev);
+	struct mlx5_core_dev *pos;
+	struct mlx5_lag *ldev;
+	struct lag_func *pf;
+	int i;
+
+	ldev = mlx5_lag_dev(primary);
+	if (!ldev)
+		return;
+
+	mutex_lock(&ldev->lock);
+	mlx5_sd_for_each_secondary(i, primary, pos)
+		mlx5_ldev_remove_mdev(ldev, pos);
+
+	pf = mlx5_lag_pf_by_dev(ldev, primary);
+	if (pf)
+		pf->group_id = 0;
+	mutex_unlock(&ldev->lock);
+}
+
 static int sd_register(struct mlx5_core_dev *dev)
 {
 	struct mlx5_devcom_comp_dev *devcom, *pos;
@@ -473,27 +576,32 @@ int mlx5_sd_init(struct mlx5_core_dev *dev)
 	if (err)
 		goto err_sd_unregister;
 
+	mlx5_sd_for_each_secondary(i, primary, pos) {
+		err = sd_cmd_set_secondary(pos, primary, alias_key);
+		if (err)
+			goto err_unset_secondaries;
+	}
+
+	sd_lag_init(primary);
+
 	primary_sd->dfs =
 		debugfs_create_dir("multi-pf",
 				   mlx5_debugfs_get_dev_root(primary));
-	debugfs_create_x32("group_id", 0400, primary_sd->dfs,
-			   &primary_sd->group_id);
-	debugfs_create_file("primary", 0400, primary_sd->dfs, primary,
-			    &dev_fops);
-
 	mlx5_sd_for_each_secondary(i, primary, pos) {
 		char name[32];
 
-		err = sd_cmd_set_secondary(pos, primary, alias_key);
-		if (err)
-			goto err_unset_secondaries;
-
 		snprintf(name, sizeof(name), "secondary_%d", i - 1);
 		debugfs_create_file(name, 0400, primary_sd->dfs, pos,
 				    &dev_fops);
-
 	}
 
+	debugfs_create_file("sd_lag_state", 0400, primary_sd->dfs, primary,
+			    &sd_lag_state_fops);
+	debugfs_create_x32("group_id", 0400, primary_sd->dfs,
+			   &primary_sd->group_id);
+	debugfs_create_file("primary", 0400, primary_sd->dfs, primary,
+			    &dev_fops);
+
 	sd_info(primary, "group id %#x, size %d, combined\n",
 		sd->group_id, mlx5_devcom_comp_get_size(sd->devcom));
 	sd_print_group(primary);
@@ -508,8 +616,6 @@ int mlx5_sd_init(struct mlx5_core_dev *dev)
 	mlx5_sd_for_each_secondary_to(i, primary, to, pos)
 		sd_cmd_unset_secondary(pos);
 	sd_cmd_unset_primary(primary);
-	debugfs_remove_recursive(primary_sd->dfs);
-	primary_sd->dfs = NULL;
 err_sd_unregister:
 	mlx5_sd_for_each_secondary(i, primary, pos) {
 		struct mlx5_sd *peer_sd = mlx5_get_sd(pos);
@@ -548,11 +654,12 @@ void mlx5_sd_cleanup(struct mlx5_core_dev *dev)
 	if (primary_sd->state != MLX5_SD_STATE_UP)
 		goto out_clear_peers;
 
+	debugfs_remove_recursive(primary_sd->dfs);
+	primary_sd->dfs = NULL;
+	sd_lag_cleanup(primary);
 	mlx5_sd_for_each_secondary(i, primary, pos)
 		sd_cmd_unset_secondary(pos);
 	sd_cmd_unset_primary(primary);
-	debugfs_remove_recursive(primary_sd->dfs);
-	primary_sd->dfs = NULL;
 
 	sd_info(primary, "group id %#x, uncombined\n", sd->group_id);
 	primary_sd->state = MLX5_SD_STATE_DOWN;
-- 
2.44.0


  parent reply	other threads:[~2026-05-31 11:41 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-31 11:39 [PATCH net-next V2 00/13] net/mlx5: Add switchdev mode support for Socket Direct single netdev, part 1/2 Tariq Toukan
2026-05-31 11:39 ` [PATCH net-next V2 01/13] net/mlx5: LAG, factor out shared FDB code into dedicated file Tariq Toukan
2026-05-31 11:39 ` [PATCH net-next V2 02/13] net/mlx5: E-Switch, align disable sequence with switchdev-to-legacy transition Tariq Toukan
2026-05-31 11:39 ` [PATCH net-next V2 03/13] net/mlx5: E-Switch, move devcom init from TC to eswitch layer Tariq Toukan
2026-05-31 11:39 ` [PATCH net-next V2 04/13] net/mlx5: LAG, replace peer count check with direct peer lookup Tariq Toukan
2026-05-31 11:39 ` [PATCH net-next V2 05/13] net/mlx5: LAG, prepare for SD device integration Tariq Toukan
2026-05-31 11:39 ` [PATCH net-next V2 06/13] net/mlx5: LAG, extend shared FDB API with group_id filter Tariq Toukan
2026-05-31 11:39 ` Tariq Toukan [this message]
2026-05-31 11:39 ` [PATCH net-next V2 08/13] net/mlx5: LAG, block RoCE and VF LAG for SD devices Tariq Toukan
2026-05-31 11:39 ` [PATCH net-next V2 09/13] net/mlx5: LAG, block multipath " Tariq Toukan
2026-05-31 11:39 ` [PATCH net-next V2 10/13] net/mlx5: SD, keep netdev resources on same PF in switchdev mode Tariq Toukan
2026-05-31 11:39 ` [PATCH net-next V2 11/13] net/mlx5e: TC, track peer flow slots with bitmap Tariq Toukan
2026-05-31 11:39 ` [PATCH net-next V2 12/13] net/mlx5e: TC, enable steering for SD LAG Tariq Toukan
2026-06-03  2:26   ` Jakub Kicinski
2026-06-03  6:43     ` Shay Drori
2026-05-31 11:39 ` [PATCH net-next V2 13/13] net/mlx5e: Verify unique vhca_id count instead of range Tariq Toukan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260531113954.395443-8-tariqt@nvidia.com \
    --to=tariqt@nvidia.com \
    --cc=andrew+netdev@lunn.ch \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=edwards@nvidia.com \
    --cc=gal@nvidia.com \
    --cc=horms@kernel.org \
    --cc=jacob.e.keller@intel.com \
    --cc=kees@kernel.org \
    --cc=kuba@kernel.org \
    --cc=leon@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-rdma@vger.kernel.org \
    --cc=mbloch@nvidia.com \
    --cc=moshe@nvidia.com \
    --cc=msanalla@nvidia.com \
    --cc=netdev@vger.kernel.org \
    --cc=noren@nvidia.com \
    --cc=ohartoov@nvidia.com \
    --cc=pabeni@redhat.com \
    --cc=parav@nvidia.com \
    --cc=phaddad@nvidia.com \
    --cc=saeedm@nvidia.com \
    --cc=shayd@nvidia.com \
    --cc=ychemla@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox