From: Saeed Mahameed <saeed@kernel.org>
To: "David S. Miller" <davem@davemloft.net>,
Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
Eric Dumazet <edumazet@google.com>
Cc: Saeed Mahameed <saeedm@nvidia.com>,
netdev@vger.kernel.org, Tariq Toukan <tariqt@nvidia.com>,
Moshe Shemesh <moshe@nvidia.com>
Subject: [PATCH net-next 11/15] net/mlx5: Start health poll at earlier stage of driver load
Date: Sat, 1 Oct 2022 21:56:28 -0700 [thread overview]
Message-ID: <20221002045632.291612-12-saeed@kernel.org> (raw)
In-Reply-To: <20221002045632.291612-1-saeed@kernel.org>
From: Moshe Shemesh <moshe@nvidia.com>
Start health poll at earlier stage, so if fw fatal issue occurred before
or during initialization commands such as init_hca or set_hca_cap the
poll health can detect and indicate that the driver is already in error
state.
Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
.../net/ethernet/mellanox/mlx5/core/health.c | 11 ++++++++---
drivers/net/ethernet/mellanox/mlx5/core/main.c | 17 ++++++++++-------
include/linux/mlx5/driver.h | 1 +
3 files changed, 19 insertions(+), 10 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index 59205ba2ef7b..5bfc54a10621 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -843,9 +843,6 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev)
health->timer.expires = jiffies + msecs_to_jiffies(poll_interval_ms);
add_timer(&health->timer);
-
- if (mlx5_core_is_pf(dev) && MLX5_CAP_MCAM_REG(dev, mrtc))
- queue_delayed_work(health->wq, &health->update_fw_log_ts_work, 0);
}
void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health)
@@ -862,6 +859,14 @@ void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health)
del_timer_sync(&health->timer);
}
+void mlx5_start_health_fw_log_up(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_health *health = &dev->priv.health;
+
+ if (mlx5_core_is_pf(dev) && MLX5_CAP_MCAM_REG(dev, mrtc))
+ queue_delayed_work(health->wq, &health->update_fw_log_ts_work, 0);
+}
+
void mlx5_drain_health_wq(struct mlx5_core_dev *dev)
{
struct mlx5_core_health *health = &dev->priv.health;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index daa7442f31c9..0b459d841c3a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1092,7 +1092,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
mlx5_devcom_unregister_device(dev->priv.devcom);
}
-static int mlx5_function_setup(struct mlx5_core_dev *dev, u64 timeout)
+static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot, u64 timeout)
{
int err;
@@ -1130,10 +1130,12 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, u64 timeout)
mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_UP);
+ mlx5_start_health_poll(dev);
+
err = mlx5_core_enable_hca(dev, 0);
if (err) {
mlx5_core_err(dev, "enable hca failed\n");
- goto err_cmd_cleanup;
+ goto stop_health_poll;
}
err = mlx5_core_set_issi(dev);
@@ -1185,8 +1187,7 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, u64 timeout)
mlx5_core_err(dev, "query hca failed\n");
goto reclaim_boot_pages;
}
-
- mlx5_start_health_poll(dev);
+ mlx5_start_health_fw_log_up(dev);
return 0;
@@ -1194,6 +1195,8 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, u64 timeout)
mlx5_reclaim_startup_pages(dev);
err_disable_hca:
mlx5_core_disable_hca(dev, 0);
+stop_health_poll:
+ mlx5_stop_health_poll(dev, boot);
err_cmd_cleanup:
mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN);
mlx5_cmd_cleanup(dev);
@@ -1205,7 +1208,6 @@ static int mlx5_function_teardown(struct mlx5_core_dev *dev, bool boot)
{
int err;
- mlx5_stop_health_poll(dev, boot);
err = mlx5_cmd_teardown_hca(dev);
if (err) {
mlx5_core_err(dev, "tear_down_hca failed, skip cleanup\n");
@@ -1213,6 +1215,7 @@ static int mlx5_function_teardown(struct mlx5_core_dev *dev, bool boot)
}
mlx5_reclaim_startup_pages(dev);
mlx5_core_disable_hca(dev, 0);
+ mlx5_stop_health_poll(dev, boot);
mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN);
mlx5_cmd_cleanup(dev);
@@ -1362,7 +1365,7 @@ int mlx5_init_one(struct mlx5_core_dev *dev)
mutex_lock(&dev->intf_state_mutex);
dev->state = MLX5_DEVICE_STATE_UP;
- err = mlx5_function_setup(dev, mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT));
+ err = mlx5_function_setup(dev, true, mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT));
if (err)
goto err_function;
@@ -1450,7 +1453,7 @@ int mlx5_load_one_devl_locked(struct mlx5_core_dev *dev, bool recovery)
timeout = mlx5_tout_ms(dev, FW_PRE_INIT_ON_RECOVERY_TIMEOUT);
else
timeout = mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT);
- err = mlx5_function_setup(dev, timeout);
+ err = mlx5_function_setup(dev, false, timeout);
if (err)
goto err_function;
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 285f301a6390..a12929bc31b2 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1017,6 +1017,7 @@ void mlx5_health_cleanup(struct mlx5_core_dev *dev);
int mlx5_health_init(struct mlx5_core_dev *dev);
void mlx5_start_health_poll(struct mlx5_core_dev *dev);
void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health);
+void mlx5_start_health_fw_log_up(struct mlx5_core_dev *dev);
void mlx5_drain_health_wq(struct mlx5_core_dev *dev);
void mlx5_trigger_health_work(struct mlx5_core_dev *dev);
int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size,
--
2.37.3
next prev parent reply other threads:[~2022-10-02 5:14 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-10-02 4:56 [PATCH net-next 00/15] ] mlx5 xsk updates part4 and more Saeed Mahameed
2022-10-02 4:56 ` [PATCH net-next 01/15] net/mlx5e: xsk: Flush RQ on XSK activation to save memory Saeed Mahameed
2022-10-02 4:56 ` [PATCH net-next 02/15] net/mlx5e: xsk: Set napi_id to support busy polling Saeed Mahameed
2022-10-02 4:56 ` [PATCH net-next 03/15] net/mlx5e: xsk: Include XSK skb_from_cqe callbacks in INDIRECT_CALL Saeed Mahameed
2022-10-02 4:56 ` [PATCH net-next 04/15] net/mlx5e: xsk: Improve need_wakeup logic Saeed Mahameed
2022-10-02 4:56 ` [PATCH net-next 05/15] net/mlx5e: xsk: Use umr_mode to calculate striding RQ parameters Saeed Mahameed
2022-10-02 4:56 ` [PATCH net-next 06/15] net/mlx5e: Improve MTT/KSM alignment Saeed Mahameed
2022-10-02 4:56 ` [PATCH net-next 07/15] net/mlx5e: xsk: Use KLM to protect frame overrun in unaligned mode Saeed Mahameed
2022-10-02 4:56 ` [PATCH net-next 08/15] net/mlx5e: xsk: Print a warning in slow configurations Saeed Mahameed
2022-10-02 4:56 ` [PATCH net-next 09/15] net/mlx5e: xsk: Optimize for unaligned mode with 3072-byte frames Saeed Mahameed
2022-10-02 4:56 ` [PATCH net-next 10/15] net/mlx5e: Expose rx_oversize_pkts_buffer counter Saeed Mahameed
2022-10-02 4:56 ` Saeed Mahameed [this message]
2022-10-02 4:56 ` [PATCH net-next 12/15] net/mlx5: Set default grace period based on function type Saeed Mahameed
2022-10-02 4:56 ` [PATCH net-next 13/15] net/mlx5: E-Switch, Allow offloading fwd dest flow table with vport Saeed Mahameed
2022-10-02 4:56 ` [PATCH net-next 14/15] net/mlx5: E-switch, Don't update group if qos is not enabled Saeed Mahameed
2022-10-02 4:56 ` [PATCH net-next 15/15] net/mlx5: E-Switch, Return EBUSY if can't get mode lock Saeed Mahameed
2022-10-04 0:10 ` [PATCH net-next 00/15] ] mlx5 xsk updates part4 and more patchwork-bot+netdevbpf
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20221002045632.291612-12-saeed@kernel.org \
--to=saeed@kernel.org \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=kuba@kernel.org \
--cc=moshe@nvidia.com \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=saeedm@nvidia.com \
--cc=tariqt@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.