netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Saeed Mahameed <saeed@kernel.org>
To: "David S. Miller" <davem@davemloft.net>,
	Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
	Eric Dumazet <edumazet@google.com>
Cc: Saeed Mahameed <saeedm@nvidia.com>,
	netdev@vger.kernel.org, Tariq Toukan <tariqt@nvidia.com>,
	Moshe Shemesh <moshe@nvidia.com>, Shay Drory <shayd@nvidia.com>
Subject: [net-next 04/15] net/mlx5: Handle sync reset unload event
Date: Fri, 16 Jun 2023 13:11:02 -0700	[thread overview]
Message-ID: <20230616201113.45510-5-saeed@kernel.org> (raw)
In-Reply-To: <20230616201113.45510-1-saeed@kernel.org>

From: Moshe Shemesh <moshe@nvidia.com>

Added a new event handler to firmware sync reset, which is used to
support firmware sync reset flow on smart NIC. Adding this new stage to
the flow enables the firmware to ensure host PFs unload before ECPFs
unload, to avoid race of PFs recovery.

If firmware sends sync_reset_unload event to driver the driver should
unload and close all HW resources of the function. Once the driver
finishes unloading part, it can't get any more events from firmware as
event queues are closed, so it polls the reset state field to know when
to continue to next stage of the sync reset flow.

Added capability bit for supporting sync_reset_unload event.

Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
Reviewed-by: Shay Drory <shayd@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../ethernet/mellanox/mlx5/core/fw_reset.c    | 103 +++++++++++++++++-
 .../net/ethernet/mellanox/mlx5/core/main.c    |   3 +
 include/linux/mlx5/device.h                   |   1 +
 include/linux/mlx5/mlx5_ifc.h                 |   3 +-
 4 files changed, 104 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
index 952cc340b510..7af2b14ab5d8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
@@ -21,6 +21,7 @@ struct mlx5_fw_reset {
 	struct workqueue_struct *wq;
 	struct work_struct fw_live_patch_work;
 	struct work_struct reset_request_work;
+	struct work_struct reset_unload_work;
 	struct work_struct reset_reload_work;
 	struct work_struct reset_now_work;
 	struct work_struct reset_abort_work;
@@ -30,6 +31,26 @@ struct mlx5_fw_reset {
 	int ret;
 };
 
+enum {
+	MLX5_FW_RST_STATE_IDLE = 0,
+	MLX5_FW_RST_STATE_TOGGLE_REQ = 4,
+};
+
+enum {
+	MLX5_RST_STATE_BIT_NUM = 12,
+	MLX5_RST_ACK_BIT_NUM = 22,
+};
+
+static u8 mlx5_get_fw_rst_state(struct mlx5_core_dev *dev)
+{
+	return (ioread32be(&dev->iseg->initializing) >> MLX5_RST_STATE_BIT_NUM) & 0xF;
+}
+
+static void mlx5_set_fw_rst_ack(struct mlx5_core_dev *dev)
+{
+	iowrite32be(BIT(MLX5_RST_ACK_BIT_NUM), &dev->iseg->initializing);
+}
+
 static int mlx5_fw_reset_enable_remote_dev_reset_set(struct devlink *devlink, u32 id,
 						     struct devlink_param_gset_ctx *ctx)
 {
@@ -155,7 +176,7 @@ int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev)
 	return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL0, 0, 0, false);
 }
 
-static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev)
+static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev, bool unloaded)
 {
 	struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
 
@@ -163,7 +184,8 @@ static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev)
 	if (test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags)) {
 		complete(&fw_reset->done);
 	} else {
-		mlx5_unload_one(dev, false);
+		if (!unloaded)
+			mlx5_unload_one(dev, false);
 		if (mlx5_health_wait_pci_up(dev))
 			mlx5_core_err(dev, "reset reload flow aborted, PCI reads still not working\n");
 		else
@@ -204,7 +226,7 @@ static void mlx5_sync_reset_reload_work(struct work_struct *work)
 
 	mlx5_sync_reset_clear_reset_requested(dev, false);
 	mlx5_enter_error_state(dev, true);
-	mlx5_fw_reset_complete_reload(dev);
+	mlx5_fw_reset_complete_reload(dev, false);
 }
 
 #define MLX5_RESET_POLL_INTERVAL	(HZ / 10)
@@ -458,7 +480,70 @@ static void mlx5_sync_reset_now_event(struct work_struct *work)
 	mlx5_enter_error_state(dev, true);
 done:
 	fw_reset->ret = err;
-	mlx5_fw_reset_complete_reload(dev);
+	mlx5_fw_reset_complete_reload(dev, false);
+}
+
+static void mlx5_sync_reset_unload_event(struct work_struct *work)
+{
+	struct mlx5_fw_reset *fw_reset;
+	struct mlx5_core_dev *dev;
+	unsigned long timeout;
+	bool reset_action;
+	u8 rst_state;
+	int err;
+
+	fw_reset = container_of(work, struct mlx5_fw_reset, reset_unload_work);
+	dev = fw_reset->dev;
+
+	if (mlx5_sync_reset_clear_reset_requested(dev, false))
+		return;
+
+	mlx5_core_warn(dev, "Sync Reset Unload. Function is forced down.\n");
+
+	err = mlx5_cmd_fast_teardown_hca(dev);
+	if (err)
+		mlx5_core_warn(dev, "Fast teardown failed, unloading, err %d\n", err);
+	else
+		mlx5_enter_error_state(dev, true);
+
+	if (test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags))
+		mlx5_unload_one_devl_locked(dev, false);
+	else
+		mlx5_unload_one(dev, false);
+
+	mlx5_set_fw_rst_ack(dev);
+	mlx5_core_warn(dev, "Sync Reset Unload done, device reset expected\n");
+
+	reset_action = false;
+	timeout = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, RESET_UNLOAD));
+	do {
+		rst_state = mlx5_get_fw_rst_state(dev);
+		if (rst_state == MLX5_FW_RST_STATE_TOGGLE_REQ ||
+		    rst_state == MLX5_FW_RST_STATE_IDLE) {
+			reset_action = true;
+			break;
+		}
+		msleep(20);
+	} while (!time_after(jiffies, timeout));
+
+	if (!reset_action) {
+		mlx5_core_err(dev, "Got timeout waiting for sync reset action, state = %u\n",
+			      rst_state);
+		fw_reset->ret = -ETIMEDOUT;
+		goto done;
+	}
+
+	mlx5_core_warn(dev, "Sync Reset, got reset action. rst_state = %u\n", rst_state);
+	if (rst_state == MLX5_FW_RST_STATE_TOGGLE_REQ) {
+		err = mlx5_pci_link_toggle(dev);
+		if (err) {
+			mlx5_core_warn(dev, "mlx5_pci_link_toggle failed, err %d\n", err);
+			fw_reset->ret = err;
+		}
+	}
+
+done:
+	mlx5_fw_reset_complete_reload(dev, true);
 }
 
 static void mlx5_sync_reset_abort_event(struct work_struct *work)
@@ -483,6 +568,9 @@ static void mlx5_sync_reset_events_handle(struct mlx5_fw_reset *fw_reset, struct
 	case MLX5_SYNC_RST_STATE_RESET_REQUEST:
 		queue_work(fw_reset->wq, &fw_reset->reset_request_work);
 		break;
+	case MLX5_SYNC_RST_STATE_RESET_UNLOAD:
+		queue_work(fw_reset->wq, &fw_reset->reset_unload_work);
+		break;
 	case MLX5_SYNC_RST_STATE_RESET_NOW:
 		queue_work(fw_reset->wq, &fw_reset->reset_now_work);
 		break;
@@ -517,10 +605,13 @@ static int fw_reset_event_notifier(struct notifier_block *nb, unsigned long acti
 int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev)
 {
 	unsigned long pci_sync_update_timeout = mlx5_tout_ms(dev, PCI_SYNC_UPDATE);
-	unsigned long timeout = msecs_to_jiffies(pci_sync_update_timeout);
 	struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+	unsigned long timeout;
 	int err;
 
+	if (MLX5_CAP_GEN(dev, pci_sync_for_fw_update_with_driver_unload))
+		pci_sync_update_timeout += mlx5_tout_ms(dev, RESET_UNLOAD);
+	timeout = msecs_to_jiffies(pci_sync_update_timeout);
 	if (!wait_for_completion_timeout(&fw_reset->done, timeout)) {
 		mlx5_core_warn(dev, "FW sync reset timeout after %lu seconds\n",
 			       pci_sync_update_timeout / 1000);
@@ -557,6 +648,7 @@ void mlx5_drain_fw_reset(struct mlx5_core_dev *dev)
 	set_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, &fw_reset->reset_flags);
 	cancel_work_sync(&fw_reset->fw_live_patch_work);
 	cancel_work_sync(&fw_reset->reset_request_work);
+	cancel_work_sync(&fw_reset->reset_unload_work);
 	cancel_work_sync(&fw_reset->reset_reload_work);
 	cancel_work_sync(&fw_reset->reset_now_work);
 	cancel_work_sync(&fw_reset->reset_abort_work);
@@ -595,6 +687,7 @@ int mlx5_fw_reset_init(struct mlx5_core_dev *dev)
 
 	INIT_WORK(&fw_reset->fw_live_patch_work, mlx5_fw_live_patch_event);
 	INIT_WORK(&fw_reset->reset_request_work, mlx5_sync_reset_request_event);
+	INIT_WORK(&fw_reset->reset_unload_work, mlx5_sync_reset_unload_event);
 	INIT_WORK(&fw_reset->reset_reload_work, mlx5_sync_reset_reload_work);
 	INIT_WORK(&fw_reset->reset_now_work, mlx5_sync_reset_now_event);
 	INIT_WORK(&fw_reset->reset_abort_work, mlx5_sync_reset_abort_event);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 6fa314f8e5ee..88dbea6631d5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -619,6 +619,9 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
 
 	if (MLX5_CAP_GEN_MAX(dev, pci_sync_for_fw_update_event))
 		MLX5_SET(cmd_hca_cap, set_hca_cap, pci_sync_for_fw_update_event, 1);
+	if (MLX5_CAP_GEN_MAX(dev, pci_sync_for_fw_update_with_driver_unload))
+		MLX5_SET(cmd_hca_cap, set_hca_cap,
+			 pci_sync_for_fw_update_with_driver_unload, 1);
 
 	if (MLX5_CAP_GEN_MAX(dev, num_vhca_ports))
 		MLX5_SET(cmd_hca_cap,
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index c0af74efd3cb..80cc12a9a531 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -716,6 +716,7 @@ enum sync_rst_state_type {
 	MLX5_SYNC_RST_STATE_RESET_REQUEST	= 0x0,
 	MLX5_SYNC_RST_STATE_RESET_NOW		= 0x1,
 	MLX5_SYNC_RST_STATE_RESET_ABORT		= 0x2,
+	MLX5_SYNC_RST_STATE_RESET_UNLOAD	= 0x3,
 };
 
 struct mlx5_eqe_sync_fw_update {
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 14892e795808..d61dcb5d7cd5 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1755,7 +1755,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         reserved_at_328[0x2];
 	u8	   relaxed_ordering_read[0x1];
 	u8         log_max_pd[0x5];
-	u8         reserved_at_330[0x7];
+	u8         reserved_at_330[0x6];
+	u8         pci_sync_for_fw_update_with_driver_unload[0x1];
 	u8         vnic_env_cnt_steering_fail[0x1];
 	u8         reserved_at_338[0x1];
 	u8         q_counter_aggregation[0x1];
-- 
2.40.1


  parent reply	other threads:[~2023-06-16 20:11 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-06-16 20:10 [pull request][net-next 00/15] mlx5 updates 2023-06-16 Saeed Mahameed
2023-06-16 20:10 ` [net-next 01/15] net/mlx5: Ack on sync_reset_request only if PF can do reset_now Saeed Mahameed
2023-06-18 18:00   ` patchwork-bot+netdevbpf
2023-06-16 20:11 ` [net-next 02/15] net/mlx5: Expose timeout for sync reset unload stage Saeed Mahameed
2023-06-16 20:11 ` [net-next 03/15] net/mlx5: Check DTOR entry value is not zero Saeed Mahameed
2023-06-16 20:11 ` Saeed Mahameed [this message]
2023-06-16 20:11 ` [net-next 05/15] net/mlx5: Create eswitch debugfs root directory Saeed Mahameed
2023-06-16 20:11 ` [net-next 06/15] net/mlx5: Bridge, pass net device when linking vport to bridge Saeed Mahameed
2023-06-16 20:11 ` [net-next 07/15] net/mlx5: Bridge, expose FDB state via debugfs Saeed Mahameed
2023-06-17  7:48   ` Jakub Kicinski
2023-06-19  8:37     ` Vlad Buslov
2023-06-19 18:28       ` Jakub Kicinski
2023-06-19 18:34         ` Vlad Buslov
2023-06-19 19:05           ` Jakub Kicinski
2023-06-19 19:13             ` Vlad Buslov
2023-06-16 20:11 ` [net-next 08/15] net/mlx5: E-Switch, remove redundant else statements Saeed Mahameed
2023-06-16 20:11 ` [net-next 09/15] net/mlx5e: Remove mlx5e_dbg() and msglvl support Saeed Mahameed
2023-06-16 20:11 ` [net-next 10/15] net/mlx5: Expose bits for local loopback counter Saeed Mahameed
2023-06-16 20:11 ` [net-next 11/15] net/mlx5e: Add local loopback counter to vport stats Saeed Mahameed
2023-06-16 20:11 ` [net-next 12/15] net/mlx5: Fix the macro for accessing EC VF vports Saeed Mahameed
2023-06-16 20:11 ` [net-next 13/15] net/mlx5: DR, update query of HCA caps for EC VFs Saeed Mahameed
2023-06-16 20:11 ` [net-next 14/15] net/mlx5: Add header file for events Saeed Mahameed
2023-06-16 20:11 ` [net-next 15/15] net/mlx5: Remove unused ecpu field from struct mlx5_sf_table Saeed Mahameed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230616201113.45510-5-saeed@kernel.org \
    --to=saeed@kernel.org \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=kuba@kernel.org \
    --cc=moshe@nvidia.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=saeedm@nvidia.com \
    --cc=shayd@nvidia.com \
    --cc=tariqt@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).