From: Saeed Mahameed <saeed@kernel.org>
To: "David S. Miller" <davem@davemloft.net>,
Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
Eric Dumazet <edumazet@google.com>
Cc: Saeed Mahameed <saeedm@nvidia.com>,
netdev@vger.kernel.org, Tariq Toukan <tariqt@nvidia.com>,
Moshe Shemesh <moshe@nvidia.com>
Subject: [PATCH net-next 11/15] net/mlx5: Start health poll at earlier stage of driver load
Date: Sat, 1 Oct 2022 21:56:28 -0700 [thread overview]
Message-ID: <20221002045632.291612-12-saeed@kernel.org> (raw)
In-Reply-To: <20221002045632.291612-1-saeed@kernel.org>
From: Moshe Shemesh <moshe@nvidia.com>
Start health poll at earlier stage, so if fw fatal issue occurred before
or during initialization commands such as init_hca or set_hca_cap the
poll health can detect and indicate that the driver is already in error
state.
Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
.../net/ethernet/mellanox/mlx5/core/health.c | 11 ++++++++---
drivers/net/ethernet/mellanox/mlx5/core/main.c | 17 ++++++++++-------
include/linux/mlx5/driver.h | 1 +
3 files changed, 19 insertions(+), 10 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index 59205ba2ef7b..5bfc54a10621 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -843,9 +843,6 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev)
health->timer.expires = jiffies + msecs_to_jiffies(poll_interval_ms);
add_timer(&health->timer);
-
- if (mlx5_core_is_pf(dev) && MLX5_CAP_MCAM_REG(dev, mrtc))
- queue_delayed_work(health->wq, &health->update_fw_log_ts_work, 0);
}
void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health)
@@ -862,6 +859,14 @@ void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health)
del_timer_sync(&health->timer);
}
+void mlx5_start_health_fw_log_up(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_health *health = &dev->priv.health;
+
+ if (mlx5_core_is_pf(dev) && MLX5_CAP_MCAM_REG(dev, mrtc))
+ queue_delayed_work(health->wq, &health->update_fw_log_ts_work, 0);
+}
+
void mlx5_drain_health_wq(struct mlx5_core_dev *dev)
{
struct mlx5_core_health *health = &dev->priv.health;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index daa7442f31c9..0b459d841c3a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1092,7 +1092,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
mlx5_devcom_unregister_device(dev->priv.devcom);
}
-static int mlx5_function_setup(struct mlx5_core_dev *dev, u64 timeout)
+static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot, u64 timeout)
{
int err;
@@ -1130,10 +1130,12 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, u64 timeout)
mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_UP);
+ mlx5_start_health_poll(dev);
+
err = mlx5_core_enable_hca(dev, 0);
if (err) {
mlx5_core_err(dev, "enable hca failed\n");
- goto err_cmd_cleanup;
+ goto stop_health_poll;
}
err = mlx5_core_set_issi(dev);
@@ -1185,8 +1187,7 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, u64 timeout)
mlx5_core_err(dev, "query hca failed\n");
goto reclaim_boot_pages;
}
-
- mlx5_start_health_poll(dev);
+ mlx5_start_health_fw_log_up(dev);
return 0;
@@ -1194,6 +1195,8 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, u64 timeout)
mlx5_reclaim_startup_pages(dev);
err_disable_hca:
mlx5_core_disable_hca(dev, 0);
+stop_health_poll:
+ mlx5_stop_health_poll(dev, boot);
err_cmd_cleanup:
mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN);
mlx5_cmd_cleanup(dev);
@@ -1205,7 +1208,6 @@ static int mlx5_function_teardown(struct mlx5_core_dev *dev, bool boot)
{
int err;
- mlx5_stop_health_poll(dev, boot);
err = mlx5_cmd_teardown_hca(dev);
if (err) {
mlx5_core_err(dev, "tear_down_hca failed, skip cleanup\n");
@@ -1213,6 +1215,7 @@ static int mlx5_function_teardown(struct mlx5_core_dev *dev, bool boot)
}
mlx5_reclaim_startup_pages(dev);
mlx5_core_disable_hca(dev, 0);
+ mlx5_stop_health_poll(dev, boot);
mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN);
mlx5_cmd_cleanup(dev);
@@ -1362,7 +1365,7 @@ int mlx5_init_one(struct mlx5_core_dev *dev)
mutex_lock(&dev->intf_state_mutex);
dev->state = MLX5_DEVICE_STATE_UP;
- err = mlx5_function_setup(dev, mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT));
+ err = mlx5_function_setup(dev, true, mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT));
if (err)
goto err_function;
@@ -1450,7 +1453,7 @@ int mlx5_load_one_devl_locked(struct mlx5_core_dev *dev, bool recovery)
timeout = mlx5_tout_ms(dev, FW_PRE_INIT_ON_RECOVERY_TIMEOUT);
else
timeout = mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT);
- err = mlx5_function_setup(dev, timeout);
+ err = mlx5_function_setup(dev, false, timeout);
if (err)
goto err_function;
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 285f301a6390..a12929bc31b2 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1017,6 +1017,7 @@ void mlx5_health_cleanup(struct mlx5_core_dev *dev);
int mlx5_health_init(struct mlx5_core_dev *dev);
void mlx5_start_health_poll(struct mlx5_core_dev *dev);
void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health);
+void mlx5_start_health_fw_log_up(struct mlx5_core_dev *dev);
void mlx5_drain_health_wq(struct mlx5_core_dev *dev);
void mlx5_trigger_health_work(struct mlx5_core_dev *dev);
int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size,
--
2.37.3
next prev parent reply other threads:[~2022-10-02 5:14 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-10-02 4:56 [PATCH net-next 00/15] ] mlx5 xsk updates part4 and more Saeed Mahameed
2022-10-02 4:56 ` [PATCH net-next 01/15] net/mlx5e: xsk: Flush RQ on XSK activation to save memory Saeed Mahameed
2022-10-02 4:56 ` [PATCH net-next 02/15] net/mlx5e: xsk: Set napi_id to support busy polling Saeed Mahameed
2022-10-02 4:56 ` [PATCH net-next 03/15] net/mlx5e: xsk: Include XSK skb_from_cqe callbacks in INDIRECT_CALL Saeed Mahameed
2022-10-02 4:56 ` [PATCH net-next 04/15] net/mlx5e: xsk: Improve need_wakeup logic Saeed Mahameed
2022-10-02 4:56 ` [PATCH net-next 05/15] net/mlx5e: xsk: Use umr_mode to calculate striding RQ parameters Saeed Mahameed
2022-10-02 4:56 ` [PATCH net-next 06/15] net/mlx5e: Improve MTT/KSM alignment Saeed Mahameed
2022-10-02 4:56 ` [PATCH net-next 07/15] net/mlx5e: xsk: Use KLM to protect frame overrun in unaligned mode Saeed Mahameed
2022-10-02 4:56 ` [PATCH net-next 08/15] net/mlx5e: xsk: Print a warning in slow configurations Saeed Mahameed
2022-10-02 4:56 ` [PATCH net-next 09/15] net/mlx5e: xsk: Optimize for unaligned mode with 3072-byte frames Saeed Mahameed
2022-10-02 4:56 ` [PATCH net-next 10/15] net/mlx5e: Expose rx_oversize_pkts_buffer counter Saeed Mahameed
2022-10-02 4:56 ` Saeed Mahameed [this message]
2022-10-02 4:56 ` [PATCH net-next 12/15] net/mlx5: Set default grace period based on function type Saeed Mahameed
2022-10-02 4:56 ` [PATCH net-next 13/15] net/mlx5: E-Switch, Allow offloading fwd dest flow table with vport Saeed Mahameed
2022-10-02 4:56 ` [PATCH net-next 14/15] net/mlx5: E-switch, Don't update group if qos is not enabled Saeed Mahameed
2022-10-02 4:56 ` [PATCH net-next 15/15] net/mlx5: E-Switch, Return EBUSY if can't get mode lock Saeed Mahameed
2022-10-04 0:10 ` [PATCH net-next 00/15] ] mlx5 xsk updates part4 and more patchwork-bot+netdevbpf
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20221002045632.291612-12-saeed@kernel.org \
--to=saeed@kernel.org \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=kuba@kernel.org \
--cc=moshe@nvidia.com \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=saeedm@nvidia.com \
--cc=tariqt@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).