Netdev List
 help / color / mirror / Atom feed
From: Tariq Toukan <tariqt@nvidia.com>
To: Eric Dumazet <edumazet@google.com>,
	Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
	Andrew Lunn <andrew+netdev@lunn.ch>,
	"David S. Miller" <davem@davemloft.net>
Cc: Saeed Mahameed <saeedm@nvidia.com>,
	Leon Romanovsky <leon@kernel.org>,
	Tariq Toukan <tariqt@nvidia.com>, Mark Bloch <mbloch@nvidia.com>,
	Shay Drory <shayd@nvidia.com>, Or Har-Toov <ohartoov@nvidia.com>,
	Edward Srouji <edwards@nvidia.com>,
	Simon Horman <horms@kernel.org>,
	Maher Sanalla <msanalla@nvidia.com>,
	Parav Pandit <parav@nvidia.com>, Kees Cook <kees@kernel.org>,
	Moshe Shemesh <moshe@nvidia.com>,
	Patrisious Haddad <phaddad@nvidia.com>, <netdev@vger.kernel.org>,
	<linux-rdma@vger.kernel.org>, <linux-kernel@vger.kernel.org>,
	Gal Pressman <gal@nvidia.com>
Subject: [PATCH net-next 05/15] net/mlx5: SD, add L2 table silent mode query support
Date: Thu, 4 Jun 2026 14:44:45 +0300	[thread overview]
Message-ID: <20260604114455.434711-6-tariqt@nvidia.com> (raw)
In-Reply-To: <20260604114455.434711-1-tariqt@nvidia.com>

From: Shay Drory <shayd@nvidia.com>

Add mlx5_fs_cmd_query_l2table_silent() to query the current silent mode
state from firmware. This allows detecting if firmware has already put
secondary devices into silent mode.

During SD group registration, query the silent mode of each device. If
a device is already in silent mode (set by firmware), record this in
the fw_silents_secondaries flag and use it to help determine the
primary/secondary roles.

When fw_silents_secondaries is set, skip the driver-initiated silent
mode set/unset operations since firmware manages this state. This
handles configurations where firmware persistently silences secondary
devices.

Signed-off-by: Shay Drory <shayd@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/fs_cmd.c  |  21 ++++
 .../net/ethernet/mellanox/mlx5/core/fs_cmd.h  |   2 +
 .../net/ethernet/mellanox/mlx5/core/lib/sd.c  | 105 +++++++++++++++---
 3 files changed, 114 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 1cd4cd898ec2..8af73393770c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -1217,3 +1217,24 @@ int mlx5_fs_cmd_set_tx_flow_table_root(struct mlx5_core_dev *dev, u32 ft_id, boo
 
 	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
+
+int mlx5_fs_cmd_query_l2table_silent(struct mlx5_core_dev *dev, u8 *silent_mode)
+{
+	u32 out[MLX5_ST_SZ_DW(query_l2_table_entry_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(query_l2_table_entry_in)] = {};
+	int err;
+
+	if (!MLX5_CAP_GEN(dev, silent_mode_query))
+		return -EOPNOTSUPP;
+
+	MLX5_SET(query_l2_table_entry_in, in, opcode,
+		 MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY);
+	MLX5_SET(query_l2_table_entry_in, in, silent_mode_query, 1);
+
+	err = mlx5_cmd_exec_inout(dev, query_l2_table_entry, in, out);
+	if (err)
+		return err;
+
+	*silent_mode = MLX5_GET(query_l2_table_entry_out, out, silent_mode);
+	return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
index 7eb7b3ffe3d8..60280ff7da50 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
@@ -124,6 +124,8 @@ const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void);
 
 int mlx5_fs_cmd_set_l2table_entry_silent(struct mlx5_core_dev *dev, u8 silent_mode);
 int mlx5_fs_cmd_set_tx_flow_table_root(struct mlx5_core_dev *dev, u32 ft_id, bool disconnect);
+int mlx5_fs_cmd_query_l2table_silent(struct mlx5_core_dev *dev,
+				     u8 *silent_mode);
 
 static inline bool mlx5_fs_cmd_is_fw_term_table(struct mlx5_flow_table *ft)
 {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
index 41979bf6a615..afad05a1e3fe 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
@@ -22,6 +22,7 @@ struct mlx5_sd {
 	struct dentry *dfs;
 	u8 state;
 	bool primary;
+	bool fw_silents_secondaries;
 	union {
 		struct { /* primary */
 			struct mlx5_core_dev *secondaries[MLX5_SD_MAX_GROUP_SZ - 1];
@@ -167,7 +168,8 @@ static bool mlx5_sd_caps_supported(struct mlx5_core_dev *dev, u8 host_buses)
 	/* Disconnect secondaries from the network */
 	if (!MLX5_CAP_GEN(dev, eswitch_manager))
 		return false;
-	if (!MLX5_CAP_GEN(dev, silent_mode_set))
+	if (!MLX5_CAP_GEN(dev, silent_mode_set) &&
+	    !MLX5_CAP_GEN(dev, silent_mode_query))
 		return false;
 
 	/* RX steering from primary to secondaries */
@@ -379,23 +381,77 @@ static void sd_lag_cleanup(struct mlx5_core_dev *dev)
 enum {
 	SD_PRIMARY_SET,
 	SD_SECONDARIES_SET,
+	SD_FW_SILENT_CHECK,
 };
 
-static void sd_handle_primary_set(struct mlx5_core_dev *dev,
-				  struct mlx5_core_dev *peer)
+static int sd_handle_fw_silent_check(struct mlx5_core_dev *dev,
+				     struct mlx5_core_dev *peer)
+{
+	struct mlx5_sd *peer_sd = mlx5_get_sd(peer);
+	struct mlx5_sd *sd = mlx5_get_sd(dev);
+	u8 dev_silent = 0, peer_silent = 0;
+	int err;
+
+	if (peer_sd->fw_silents_secondaries) {
+		sd->fw_silents_secondaries = true;
+		return 0;
+	}
+
+	err = mlx5_fs_cmd_query_l2table_silent(dev, &dev_silent);
+	if (err) {
+		sd_warn(dev, "Failed to query silent mode for dev: %d\n", err);
+		return err;
+	}
+
+	err = mlx5_fs_cmd_query_l2table_silent(peer, &peer_silent);
+	if (err) {
+		sd_warn(dev, "Failed to query silent mode for peer: %d\n", err);
+		return err;
+	}
+
+	if (dev_silent || peer_silent) {
+		sd->fw_silents_secondaries = true;
+		peer_sd->fw_silents_secondaries = true;
+		sd_info(dev, "FW indicates at least one device is silent\n");
+	}
+	return 0;
+}
+
+static int sd_handle_primary_set(struct mlx5_core_dev *dev,
+				 struct mlx5_core_dev *peer)
 {
 	struct mlx5_sd *peer_sd = mlx5_get_sd(peer);
 	struct mlx5_sd *sd = mlx5_get_sd(dev);
 	struct mlx5_core_dev *candidate;
 	struct mlx5_sd *candidate_sd;
+	bool dev_should_be_primary;
 
 	/* Peer is the device that being sent to all the other devices in the
 	 * group. Hence, use peer to get the candidate device.
 	 */
 	candidate = peer_sd->primary ? peer : peer_sd->primary_dev;
 
-	if (dev->pdev->bus->number >= candidate->pdev->bus->number)
-		return;
+	if (sd->fw_silents_secondaries) {
+		u8 candidate_silent = 0;
+		int err;
+
+		err = mlx5_fs_cmd_query_l2table_silent(candidate,
+						       &candidate_silent);
+		if (err) {
+			sd_warn(candidate, "Failed to query silent mode for dev: %d\n",
+				err);
+			return err;
+		}
+		/* Candidate is silent, dev should be primary */
+		dev_should_be_primary = candidate_silent;
+	} else {
+		/* No FW silent mode, use bus number */
+		dev_should_be_primary =
+			dev->pdev->bus->number < candidate->pdev->bus->number;
+	}
+
+	if (!dev_should_be_primary)
+		return 0;
 
 	candidate_sd = mlx5_get_sd(candidate);
 
@@ -404,6 +460,7 @@ static void sd_handle_primary_set(struct mlx5_core_dev *dev,
 	candidate_sd->primary_dev = dev;
 	peer_sd->primary = false;
 	peer_sd->primary_dev = dev;
+	return 0;
 }
 
 static void sd_handle_secondaries_set(struct mlx5_core_dev *dev,
@@ -431,12 +488,13 @@ static int mlx5_sd_devcom_event(int event, void *my_data, void *event_data)
 	struct mlx5_core_dev *dev = my_data;
 
 	switch (event) {
+	case SD_FW_SILENT_CHECK:
+		return sd_handle_fw_silent_check(dev, peer);
 	case SD_PRIMARY_SET:
-		sd_handle_primary_set(dev, peer);
-		break;
+		return sd_handle_primary_set(dev, peer);
 	case SD_SECONDARIES_SET:
 		sd_handle_secondaries_set(dev, peer);
-		break;
+		return 0;
 	}
 
 	return 0;
@@ -468,9 +526,21 @@ static int sd_register(struct mlx5_core_dev *dev)
 	    mlx5_devcom_comp_is_ready(devcom))
 		goto out;
 
+	/* If silent mode query is supported, ask each device whether it is
+	 * silent and propagate the result to the whole group. In each group
+	 * only one device is not silent
+	 */
+	if (MLX5_CAP_GEN(dev, silent_mode_query)) {
+		err = mlx5_devcom_locked_send_event(devcom, SD_FW_SILENT_CHECK,
+						    SD_FW_SILENT_CHECK, dev);
+		if (err)
+			goto err_devcom_unreg;
+	}
+
 	/* Send SD_PRIMARY_SET event with this device.
 	 * All peers will receive this event and compare to this device.
-	 * The one with lowest bus number will be marked as primary.
+	 * If fw_silents_secondaries is set, choose non-silent device.
+	 * Otherwise use bus number.
 	 */
 	sd->primary = true;
 	err = mlx5_devcom_locked_send_event(devcom, SD_PRIMARY_SET,
@@ -586,9 +656,11 @@ static int sd_cmd_set_secondary(struct mlx5_core_dev *secondary,
 	struct mlx5_sd *sd = mlx5_get_sd(secondary);
 	int err;
 
-	err = mlx5_fs_cmd_set_l2table_entry_silent(secondary, 1);
-	if (err)
-		return err;
+	if (!primary_sd->fw_silents_secondaries) {
+		err = mlx5_fs_cmd_set_l2table_entry_silent(secondary, 1);
+		if (err)
+			return err;
+	}
 
 	err = sd_secondary_create_alias_ft(secondary, primary, primary_sd->tx_ft,
 					   &sd->alias_obj_id, alias_key);
@@ -604,15 +676,20 @@ static int sd_cmd_set_secondary(struct mlx5_core_dev *secondary,
 err_destroy_alias_ft:
 	sd_secondary_destroy_alias_ft(secondary);
 err_unset_silent:
-	mlx5_fs_cmd_set_l2table_entry_silent(secondary, 0);
+	if (!primary_sd->fw_silents_secondaries)
+		mlx5_fs_cmd_set_l2table_entry_silent(secondary, 0);
 	return err;
 }
 
 static void sd_cmd_unset_secondary(struct mlx5_core_dev *secondary)
 {
+	struct mlx5_sd *primary_sd;
+
+	primary_sd = mlx5_get_sd(mlx5_sd_get_primary(secondary));
 	mlx5_fs_cmd_set_tx_flow_table_root(secondary, 0, true);
 	sd_secondary_destroy_alias_ft(secondary);
-	mlx5_fs_cmd_set_l2table_entry_silent(secondary, 0);
+	if (!primary_sd->fw_silents_secondaries)
+		mlx5_fs_cmd_set_l2table_entry_silent(secondary, 0);
 }
 
 static void sd_print_group(struct mlx5_core_dev *primary)
-- 
2.44.0


  parent reply	other threads:[~2026-06-04 11:48 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-06-04 11:44 [PATCH net-next 00/15] net/mlx5: Add switchdev mode support for Socket Direct single netdev, part 2/2 Tariq Toukan
2026-06-04 11:44 ` [PATCH net-next 01/15] net/mlx5: E-Switch, skip uplink IB rep load for SD secondary devices Tariq Toukan
2026-06-04 11:44 ` [PATCH net-next 02/15] net/mlx5: devcom, expose locked variant of send_event Tariq Toukan
2026-06-04 11:44 ` [PATCH net-next 03/15] net/mlx5: devcom, add DEVCOM_CANT_FAIL for non-rollback events Tariq Toukan
2026-06-04 11:44 ` [PATCH net-next 04/15] net/mlx5: SD, make primary/secondary role determination more robust Tariq Toukan
2026-06-04 11:44 ` Tariq Toukan [this message]
2026-06-04 11:44 ` [PATCH net-next 06/15] net/mlx5: SD, expend vport metadata for SD secondary devices Tariq Toukan
2026-06-04 11:44 ` [PATCH net-next 07/15] net/mlx5: SD, support switchdev mode transition with shared FDB Tariq Toukan
2026-06-04 11:44 ` [PATCH net-next 08/15] net/mlx5: E-Switch, notify SD on eswitch disable Tariq Toukan
2026-06-04 11:44 ` [PATCH net-next 09/15] net/mlx5: LAG, store demux resources per master lag_func Tariq Toukan
2026-06-04 11:44 ` [PATCH net-next 10/15] net/mlx5: LAG, disable both regular and SD LAG on lag_disable_change Tariq Toukan
2026-06-04 11:44 ` [PATCH net-next 11/15] net/mlx5: LAG, introduce software vport LAG implementation Tariq Toukan
2026-06-04 11:44 ` [PATCH net-next 12/15] net/mlx5: LAG, add MPESW over SD LAG support Tariq Toukan
2026-06-04 11:44 ` [PATCH net-next 13/15] net/mlx5: E-Switch, defer rep load while SD LAG is not active Tariq Toukan
2026-06-04 11:44 ` [PATCH net-next 14/15] net/mlx5: SD, defer vport metadata init until SD is ready Tariq Toukan
2026-06-04 11:44 ` [PATCH net-next 15/15] net/mlx5: SD, enable SD over ECPF and allow switchdev transition Tariq Toukan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260604114455.434711-6-tariqt@nvidia.com \
    --to=tariqt@nvidia.com \
    --cc=andrew+netdev@lunn.ch \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=edwards@nvidia.com \
    --cc=gal@nvidia.com \
    --cc=horms@kernel.org \
    --cc=kees@kernel.org \
    --cc=kuba@kernel.org \
    --cc=leon@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-rdma@vger.kernel.org \
    --cc=mbloch@nvidia.com \
    --cc=moshe@nvidia.com \
    --cc=msanalla@nvidia.com \
    --cc=netdev@vger.kernel.org \
    --cc=ohartoov@nvidia.com \
    --cc=pabeni@redhat.com \
    --cc=parav@nvidia.com \
    --cc=phaddad@nvidia.com \
    --cc=saeedm@nvidia.com \
    --cc=shayd@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox