Netdev List
 help / color / mirror / Atom feed
From: Tariq Toukan <tariqt@nvidia.com>
To: Eric Dumazet <edumazet@google.com>,
	Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
	Andrew Lunn <andrew+netdev@lunn.ch>,
	"David S. Miller" <davem@davemloft.net>
Cc: Saeed Mahameed <saeedm@nvidia.com>,
	Leon Romanovsky <leon@kernel.org>,
	Tariq Toukan <tariqt@nvidia.com>, Mark Bloch <mbloch@nvidia.com>,
	Nimrod Oren <noren@nvidia.com>, Yael Chemla <ychemla@nvidia.com>,
	Shay Drory <shayd@nvidia.com>, Or Har-Toov <ohartoov@nvidia.com>,
	Edward Srouji <edwards@nvidia.com>,
	Maher Sanalla <msanalla@nvidia.com>,
	Simon Horman <horms@kernel.org>, Parav Pandit <parav@nvidia.com>,
	Patrisious Haddad <phaddad@nvidia.com>,
	Kees Cook <kees@kernel.org>, Moshe Shemesh <moshe@nvidia.com>,
	<linux-kernel@vger.kernel.org>, <netdev@vger.kernel.org>,
	<linux-rdma@vger.kernel.org>, Gal Pressman <gal@nvidia.com>
Subject: [PATCH net-next 08/13] net/mlx5: LAG, block RoCE and VF LAG for SD devices
Date: Wed, 27 May 2026 15:54:22 +0300	[thread overview]
Message-ID: <20260527125427.385976-9-tariqt@nvidia.com> (raw)
In-Reply-To: <20260527125427.385976-1-tariqt@nvidia.com>

From: Shay Drory <shayd@nvidia.com>

Socket Direct devices manage their own LAG via SD LAG infrastructure.
Block the standard netdev-event-driven LAG path (RoCE LAG and VF LAG)
for SD devices to prevent conflicting LAG configurations.

Expose mlx5_sd_is_supported() as a public helper that encapsulates all
SD eligibility checks. Use it in mlx5_lag_dev_alloc() to skip netdev
notifier registration for SD-capable devices at alloc time. Some sd
code is reordered to expose the new function, no logic is changed.

Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/lag/lag.c | 13 ++--
 .../net/ethernet/mellanox/mlx5/core/lib/sd.c  | 60 ++++++++++++++-----
 .../net/ethernet/mellanox/mlx5/core/lib/sd.h  | 11 ++++
 3 files changed, 63 insertions(+), 21 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
index 3decb49e9f19..a2c7e2927431 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
@@ -293,11 +293,14 @@ static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev)
 	INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
 	INIT_WORK(&ldev->speed_update_work, mlx5_mpesw_speed_update_work);
 
-	ldev->nb.notifier_call = mlx5_lag_netdev_event;
-	write_pnet(&ldev->net, mlx5_core_net(dev));
-	if (register_netdevice_notifier_net(read_pnet(&ldev->net), &ldev->nb)) {
-		ldev->nb.notifier_call = NULL;
-		mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
+	if (!mlx5_sd_is_supported(dev)) {
+		ldev->nb.notifier_call = mlx5_lag_netdev_event;
+		write_pnet(&ldev->net, mlx5_core_net(dev));
+		if (register_netdevice_notifier_net(read_pnet(&ldev->net),
+						    &ldev->nb)) {
+			ldev->nb.notifier_call = NULL;
+			mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
+		}
 	}
 	ldev->mode = MLX5_LAG_MODE_NONE;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
index e341d814873a..8991db3a19cf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
@@ -115,7 +115,28 @@ static bool ft_create_alias_supported(struct mlx5_core_dev *dev)
 	return true;
 }
 
-static bool mlx5_sd_is_supported(struct mlx5_core_dev *dev, u8 host_buses)
+static int mlx5_query_sd(struct mlx5_core_dev *dev, bool *sdm,
+			 u8 *host_buses)
+{
+	u32 out[MLX5_ST_SZ_DW(mpir_reg)];
+	int err;
+
+	err = mlx5_query_mpir_reg(dev, out);
+	if (err)
+		return err;
+
+	*sdm = MLX5_GET(mpir_reg, out, sdm);
+	*host_buses = MLX5_GET(mpir_reg, out, host_buses);
+
+	return 0;
+}
+
+static u32 mlx5_sd_group_id(struct mlx5_core_dev *dev, u8 sd_group)
+{
+	return (u32)((MLX5_CAP_GEN(dev, native_port_num) << 8) | sd_group);
+}
+
+static bool mlx5_sd_caps_supported(struct mlx5_core_dev *dev, u8 host_buses)
 {
 	/* Honor the SW implementation limit */
 	if (host_buses > MLX5_SD_MAX_GROUP_SZ)
@@ -142,25 +163,32 @@ static bool mlx5_sd_is_supported(struct mlx5_core_dev *dev, u8 host_buses)
 	return true;
 }
 
-static int mlx5_query_sd(struct mlx5_core_dev *dev, bool *sdm,
-			 u8 *host_buses)
+bool mlx5_sd_is_supported(struct mlx5_core_dev *dev)
 {
-	u32 out[MLX5_ST_SZ_DW(mpir_reg)];
+	u8 host_buses, sd_group;
+	bool sdm;
 	int err;
 
-	err = mlx5_query_mpir_reg(dev, out);
-	if (err)
-		return err;
+	/* Feature is currently implemented for PFs only */
+	if (!mlx5_core_is_pf(dev))
+		return false;
 
-	*sdm = MLX5_GET(mpir_reg, out, sdm);
-	*host_buses = MLX5_GET(mpir_reg, out, host_buses);
+	/* Block on embedded CPU PFs */
+	if (mlx5_core_is_ecpf(dev))
+		return false;
 
-	return 0;
-}
+	err = mlx5_query_nic_vport_sd_group(dev, &sd_group);
+	if (err || !sd_group)
+		return false;
 
-static u32 mlx5_sd_group_id(struct mlx5_core_dev *dev, u8 sd_group)
-{
-	return (u32)((MLX5_CAP_GEN(dev, native_port_num) << 8) | sd_group);
+	if (!MLX5_CAP_MCAM_REG(dev, mpir))
+		return false;
+
+	err = mlx5_query_sd(dev, &sdm, &host_buses);
+	if (err || !sdm)
+		return false;
+
+	return mlx5_sd_caps_supported(dev, host_buses);
 }
 
 static int sd_init(struct mlx5_core_dev *dev)
@@ -198,8 +226,8 @@ static int sd_init(struct mlx5_core_dev *dev)
 
 	group_id = mlx5_sd_group_id(dev, sd_group);
 
-	if (!mlx5_sd_is_supported(dev, host_buses)) {
-		sd_warn(dev, "can't support requested netdev combining for group id 0x%x), skipping\n",
+	if (!mlx5_sd_caps_supported(dev, host_buses)) {
+		sd_warn(dev, "can't support requested netdev combining for group id 0x%x, skipping\n",
 			group_id);
 		return 0;
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.h
index 2ab259095d7e..bf59903ab23f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.h
@@ -4,6 +4,8 @@
 #ifndef __MLX5_LIB_SD_H__
 #define __MLX5_LIB_SD_H__
 
+#include <linux/types.h>
+
 #define MLX5_SD_MAX_GROUP_SZ 2
 
 struct mlx5_sd;
@@ -18,6 +20,15 @@ struct auxiliary_device *mlx5_sd_get_adev(struct mlx5_core_dev *dev,
 void mlx5_sd_put_adev(struct auxiliary_device *actual_adev,
 		      struct auxiliary_device *adev);
 
+#ifdef CONFIG_MLX5_CORE_EN
+bool mlx5_sd_is_supported(struct mlx5_core_dev *dev);
+#else
+static inline bool mlx5_sd_is_supported(struct mlx5_core_dev *dev)
+{
+	return false;
+}
+#endif
+
 int mlx5_sd_init(struct mlx5_core_dev *dev);
 void mlx5_sd_cleanup(struct mlx5_core_dev *dev);
 
-- 
2.44.0


  parent reply	other threads:[~2026-05-27 12:56 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-27 12:54 [PATCH net-next 00/13] net/mlx5: Add switchdev mode support for Socket Direct single netdev, part 1/2 Tariq Toukan
2026-05-27 12:54 ` [PATCH net-next 01/13] net/mlx5: LAG, factor out shared FDB code into dedicated file Tariq Toukan
2026-05-27 12:54 ` [PATCH net-next 02/13] net/mlx5: E-Switch, align disable sequence with switchdev-to-legacy transition Tariq Toukan
2026-05-27 12:54 ` [PATCH net-next 03/13] net/mlx5: E-Switch, move devcom init from TC to eswitch layer Tariq Toukan
2026-05-28 18:48   ` Shay Drori
2026-05-27 12:54 ` [PATCH net-next 04/13] net/mlx5: LAG, replace peer count check with direct peer lookup Tariq Toukan
2026-05-27 12:54 ` [PATCH net-next 05/13] net/mlx5: LAG, prepare for SD device integration Tariq Toukan
2026-05-28 18:56   ` Shay Drori
2026-05-27 12:54 ` [PATCH net-next 06/13] net/mlx5: LAG, extend shared FDB API with group_id filter Tariq Toukan
2026-05-27 12:54 ` [PATCH net-next 07/13] net/mlx5: SD, introduce Socket Direct LAG Tariq Toukan
2026-05-27 12:54 ` Tariq Toukan [this message]
2026-05-27 12:54 ` [PATCH net-next 09/13] net/mlx5: LAG, block multipath LAG for SD devices Tariq Toukan
2026-05-27 12:54 ` [PATCH net-next 10/13] net/mlx5: SD, keep netdev resources on same PF in switchdev mode Tariq Toukan
2026-05-27 12:54 ` [PATCH net-next 11/13] net/mlx5e: TC, track peer flow slots with bitmap Tariq Toukan
2026-05-27 12:54 ` [PATCH net-next 12/13] net/mlx5e: TC, enable steering for SD LAG Tariq Toukan
2026-05-27 12:54 ` [PATCH net-next 13/13] net/mlx5e: Verify unique vhca_id count instead of range Tariq Toukan
2026-05-27 22:08 ` [PATCH net-next 00/13] net/mlx5: Add switchdev mode support for Socket Direct single netdev, part 1/2 Jacob Keller
2026-05-28  9:18   ` Shay Drori
2026-05-28 17:59     ` Jacob Keller
2026-05-29  0:40 ` Jakub Kicinski

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260527125427.385976-9-tariqt@nvidia.com \
    --to=tariqt@nvidia.com \
    --cc=andrew+netdev@lunn.ch \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=edwards@nvidia.com \
    --cc=gal@nvidia.com \
    --cc=horms@kernel.org \
    --cc=kees@kernel.org \
    --cc=kuba@kernel.org \
    --cc=leon@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-rdma@vger.kernel.org \
    --cc=mbloch@nvidia.com \
    --cc=moshe@nvidia.com \
    --cc=msanalla@nvidia.com \
    --cc=netdev@vger.kernel.org \
    --cc=noren@nvidia.com \
    --cc=ohartoov@nvidia.com \
    --cc=pabeni@redhat.com \
    --cc=parav@nvidia.com \
    --cc=phaddad@nvidia.com \
    --cc=saeedm@nvidia.com \
    --cc=shayd@nvidia.com \
    --cc=ychemla@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox