All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jiri Pirko <jiri@resnulli.us>
To: Shannon Nelson <shannon.nelson@amd.com>
Cc: brett.creeley@amd.com, davem@davemloft.net,
	netdev@vger.kernel.org, kuba@kernel.org, drivers@pensando.io,
	leon@kernel.org
Subject: Re: [PATCH RFC v4 net-next 03/13] pds_core: health timer and workqueue
Date: Wed, 8 Mar 2023 10:39:21 +0100	[thread overview]
Message-ID: <ZAhXycSgSiNFwpNl@nanopsycho> (raw)
In-Reply-To: <20230308051310.12544-4-shannon.nelson@amd.com>

Wed, Mar 08, 2023 at 06:13:00AM CET, shannon.nelson@amd.com wrote:
>Add in the periodic health check and the related workqueue,
>as well as the handlers for when a FW reset is seen.

Why don't you use devlink health to let the user know that something odd
happened with HW?


>
>Signed-off-by: Shannon Nelson <shannon.nelson@amd.com>
>---
> drivers/net/ethernet/amd/pds_core/core.c | 60 ++++++++++++++++++++++++
> drivers/net/ethernet/amd/pds_core/dev.c  |  3 ++
> drivers/net/ethernet/amd/pds_core/main.c | 51 ++++++++++++++++++++
> include/linux/pds/pds_core.h             | 10 ++++
> 4 files changed, 124 insertions(+)
>
>diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c
>index 88a6aa42cc28..87717bf40e80 100644
>--- a/drivers/net/ethernet/amd/pds_core/core.c
>+++ b/drivers/net/ethernet/amd/pds_core/core.c
>@@ -34,3 +34,63 @@ void pdsc_teardown(struct pdsc *pdsc, bool removing)
> 
> 	set_bit(PDSC_S_FW_DEAD, &pdsc->state);
> }
>+
>+static void pdsc_fw_down(struct pdsc *pdsc)
>+{
>+	mutex_lock(&pdsc->config_lock);
>+
>+	if (test_and_set_bit(PDSC_S_FW_DEAD, &pdsc->state)) {
>+		dev_err(pdsc->dev, "%s: already happening\n", __func__);
>+		mutex_unlock(&pdsc->config_lock);
>+		return;
>+	}
>+
>+	pdsc_teardown(pdsc, PDSC_TEARDOWN_RECOVERY);
>+
>+	mutex_unlock(&pdsc->config_lock);
>+}
>+
>+static void pdsc_fw_up(struct pdsc *pdsc)
>+{
>+	int err;
>+
>+	mutex_lock(&pdsc->config_lock);
>+
>+	if (!test_bit(PDSC_S_FW_DEAD, &pdsc->state)) {
>+		dev_err(pdsc->dev, "%s: fw not dead\n", __func__);
>+		mutex_unlock(&pdsc->config_lock);
>+		return;
>+	}
>+
>+	err = pdsc_setup(pdsc, PDSC_SETUP_RECOVERY);
>+	if (err)
>+		goto err_out;
>+
>+	mutex_unlock(&pdsc->config_lock);
>+
>+	return;
>+
>+err_out:
>+	pdsc_teardown(pdsc, PDSC_TEARDOWN_RECOVERY);
>+	mutex_unlock(&pdsc->config_lock);
>+}
>+
>+void pdsc_health_thread(struct work_struct *work)
>+{
>+	struct pdsc *pdsc = container_of(work, struct pdsc, health_work);
>+	bool healthy;
>+
>+	healthy = pdsc_is_fw_good(pdsc);
>+	dev_dbg(pdsc->dev, "%s: health %d fw_status %#02x fw_heartbeat %d\n",
>+		__func__, healthy, pdsc->fw_status, pdsc->last_hb);
>+
>+	if (test_bit(PDSC_S_FW_DEAD, &pdsc->state)) {
>+		if (healthy)
>+			pdsc_fw_up(pdsc);
>+	} else {
>+		if (!healthy)
>+			pdsc_fw_down(pdsc);
>+	}
>+
>+	pdsc->fw_generation = pdsc->fw_status & PDS_CORE_FW_STS_F_GENERATION;
>+}
>diff --git a/drivers/net/ethernet/amd/pds_core/dev.c b/drivers/net/ethernet/amd/pds_core/dev.c
>index 7a9db1505c1f..43229c149b2f 100644
>--- a/drivers/net/ethernet/amd/pds_core/dev.c
>+++ b/drivers/net/ethernet/amd/pds_core/dev.c
>@@ -177,6 +177,9 @@ int pdsc_devcmd_locked(struct pdsc *pdsc, union pds_core_dev_cmd *cmd,
> 	err = pdsc_devcmd_wait(pdsc, max_seconds);
> 	memcpy_fromio(comp, &pdsc->cmd_regs->comp, sizeof(*comp));
> 
>+	if (err == -ENXIO || err == -ETIMEDOUT)
>+		pdsc_queue_health_check(pdsc);
>+
> 	return err;
> }
> 
>diff --git a/drivers/net/ethernet/amd/pds_core/main.c b/drivers/net/ethernet/amd/pds_core/main.c
>index 0c63bbe8b417..ac1b30f78e0e 100644
>--- a/drivers/net/ethernet/amd/pds_core/main.c
>+++ b/drivers/net/ethernet/amd/pds_core/main.c
>@@ -25,6 +25,31 @@ static const struct pci_device_id pdsc_id_table[] = {
> };
> MODULE_DEVICE_TABLE(pci, pdsc_id_table);
> 
>+void pdsc_queue_health_check(struct pdsc *pdsc)
>+{
>+	unsigned long mask;
>+
>+	/* Don't do a check when in a transition state */
>+	mask = BIT_ULL(PDSC_S_INITING_DRIVER) |
>+	       BIT_ULL(PDSC_S_STOPPING_DRIVER);
>+	if (pdsc->state & mask)
>+		return;
>+
>+	/* Queue a new health check if one isn't already queued */
>+	queue_work(pdsc->wq, &pdsc->health_work);
>+}
>+
>+static void pdsc_wdtimer_cb(struct timer_list *t)
>+{
>+	struct pdsc *pdsc = from_timer(pdsc, t, wdtimer);
>+
>+	dev_dbg(pdsc->dev, "%s: jiffies %ld\n", __func__, jiffies);
>+	mod_timer(&pdsc->wdtimer,
>+		  round_jiffies(jiffies + pdsc->wdtimer_period));
>+
>+	pdsc_queue_health_check(pdsc);
>+}
>+
> static void pdsc_unmap_bars(struct pdsc *pdsc)
> {
> 	struct pdsc_dev_bar *bars = pdsc->bars;
>@@ -135,8 +160,11 @@ static int pdsc_init_vf(struct pdsc *pdsc)
> 	return -1;
> }
> 
>+#define PDSC_WQ_NAME_LEN 24
>+
> static int pdsc_init_pf(struct pdsc *pdsc)
> {
>+	char wq_name[PDSC_WQ_NAME_LEN];
> 	int err;
> 
> 	pcie_print_link_status(pdsc->pdev);
>@@ -151,6 +179,13 @@ static int pdsc_init_pf(struct pdsc *pdsc)
> 	if (err)
> 		goto err_out_release_regions;
> 
>+	/* General workqueue and timer, but don't start timer yet */
>+	snprintf(wq_name, sizeof(wq_name), "%s.%d", PDS_CORE_DRV_NAME, pdsc->id);
>+	pdsc->wq = create_singlethread_workqueue(wq_name);
>+	INIT_WORK(&pdsc->health_work, pdsc_health_thread);
>+	timer_setup(&pdsc->wdtimer, pdsc_wdtimer_cb, 0);
>+	pdsc->wdtimer_period = PDSC_WATCHDOG_SECS * HZ;
>+
> 	mutex_init(&pdsc->devcmd_lock);
> 	mutex_init(&pdsc->config_lock);
> 
>@@ -167,11 +202,20 @@ static int pdsc_init_pf(struct pdsc *pdsc)
> 
> 	mutex_unlock(&pdsc->config_lock);
> 
>+	/* Lastly, start the health check timer */
>+	mod_timer(&pdsc->wdtimer, round_jiffies(jiffies + pdsc->wdtimer_period));
>+
> 	return 0;
> 
> err_out_teardown:
> 	pdsc_teardown(pdsc, PDSC_TEARDOWN_REMOVING);
> err_out_unmap_bars:
>+	del_timer_sync(&pdsc->wdtimer);
>+	if (pdsc->wq) {
>+		flush_workqueue(pdsc->wq);
>+		destroy_workqueue(pdsc->wq);
>+		pdsc->wq = NULL;
>+	}
> 	mutex_unlock(&pdsc->config_lock);
> 	mutex_destroy(&pdsc->config_lock);
> 	mutex_destroy(&pdsc->devcmd_lock);
>@@ -262,6 +306,13 @@ static void pdsc_remove(struct pci_dev *pdev)
> 		mutex_lock(&pdsc->config_lock);
> 		set_bit(PDSC_S_STOPPING_DRIVER, &pdsc->state);
> 
>+		del_timer_sync(&pdsc->wdtimer);
>+		if (pdsc->wq) {
>+			flush_workqueue(pdsc->wq);
>+			destroy_workqueue(pdsc->wq);
>+			pdsc->wq = NULL;
>+		}
>+
> 		pdsc_teardown(pdsc, PDSC_TEARDOWN_REMOVING);
> 		mutex_unlock(&pdsc->config_lock);
> 		mutex_destroy(&pdsc->config_lock);
>diff --git a/include/linux/pds/pds_core.h b/include/linux/pds/pds_core.h
>index da0663925018..e73af422fae0 100644
>--- a/include/linux/pds/pds_core.h
>+++ b/include/linux/pds/pds_core.h
>@@ -12,6 +12,8 @@
> #include <linux/pds/pds_intr.h>
> 
> #define PDSC_DRV_DESCRIPTION	"AMD/Pensando Core Driver"
>+
>+#define PDSC_WATCHDOG_SECS	5
> #define PDSC_TEARDOWN_RECOVERY	false
> #define PDSC_TEARDOWN_REMOVING	true
> #define PDSC_SETUP_RECOVERY	false
>@@ -63,12 +65,17 @@ struct pdsc {
> 	u8 fw_generation;
> 	unsigned long last_fw_time;
> 	u32 last_hb;
>+	struct timer_list wdtimer;
>+	unsigned int wdtimer_period;
>+	struct work_struct health_work;
> 
> 	struct pdsc_devinfo dev_info;
> 	struct pds_core_dev_identity dev_ident;
> 	unsigned int nintrs;
> 	struct pdsc_intr_info *intr_info;	/* array of nintrs elements */
> 
>+	struct workqueue_struct *wq;
>+
> 	unsigned int devcmd_timeout;
> 	struct mutex devcmd_lock;	/* lock for dev_cmd operations */
> 	struct mutex config_lock;	/* lock for configuration operations */
>@@ -81,6 +88,8 @@ struct pdsc {
> 	u64 __iomem *kern_dbpage;
> };
> 
>+void pdsc_queue_health_check(struct pdsc *pdsc);
>+
> struct pdsc *pdsc_dl_alloc(struct device *dev, bool is_pf);
> void pdsc_dl_free(struct pdsc *pdsc);
> int pdsc_dl_register(struct pdsc *pdsc);
>@@ -116,5 +125,6 @@ int pdsc_dev_init(struct pdsc *pdsc);
> 
> int pdsc_setup(struct pdsc *pdsc, bool init);
> void pdsc_teardown(struct pdsc *pdsc, bool removing);
>+void pdsc_health_thread(struct work_struct *work);
> 
> #endif /* _PDSC_H_ */
>-- 
>2.17.1
>

  reply	other threads:[~2023-03-08  9:39 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-03-08  5:12 [PATCH RFC v4 net-next 00/13] pds_core driver Shannon Nelson
2023-03-08  5:12 ` [PATCH RFC v4 net-next 01/13] pds_core: initial framework for pds_core PF driver Shannon Nelson
2023-03-08  5:12 ` [PATCH RFC v4 net-next 02/13] pds_core: add devcmd device interfaces Shannon Nelson
2023-03-08  9:37   ` Jiri Pirko
2023-03-09  2:05     ` Shannon Nelson
2023-03-09  9:29       ` Jiri Pirko
2023-03-09 22:25         ` Shannon Nelson
2023-03-08  5:13 ` [PATCH RFC v4 net-next 03/13] pds_core: health timer and workqueue Shannon Nelson
2023-03-08  9:39   ` Jiri Pirko [this message]
2023-03-09  2:08     ` Shannon Nelson
2023-03-09  9:30       ` Jiri Pirko
2023-03-09 22:26         ` Shannon Nelson
2023-03-08  5:13 ` [PATCH RFC v4 net-next 04/13] pds_core: set up device and adminq Shannon Nelson
2023-03-08  5:13 ` [PATCH RFC v4 net-next 05/13] pds_core: Add adminq processing and commands Shannon Nelson
2023-03-08  5:13 ` [PATCH RFC v4 net-next 06/13] pds_core: add FW update feature to devlink Shannon Nelson
2023-03-08  5:13 ` [PATCH RFC v4 net-next 07/13] pds_core: set up the VIF definitions and defaults Shannon Nelson
2023-03-08  5:13 ` [PATCH RFC v4 net-next 08/13] pds_core: add initial VF device handling Shannon Nelson
2023-03-08  5:13 ` [PATCH RFC v4 net-next 09/13] pds_core: add auxiliary_bus devices Shannon Nelson
2023-03-08  5:13 ` [PATCH RFC v4 net-next 10/13] pds_core: devlink params for enabling VIF support Shannon Nelson
2023-03-08  5:13 ` [PATCH RFC v4 net-next 11/13] pds_core: add the aux client API Shannon Nelson
2023-03-14 12:14   ` Leon Romanovsky
2023-03-14 16:53     ` Shannon Nelson
2023-03-15  8:21       ` Leon Romanovsky
2023-03-16  1:08         ` Shannon Nelson
2023-03-08  5:13 ` [PATCH RFC v4 net-next 12/13] pds_core: publish events to the clients Shannon Nelson
2023-03-08  5:13 ` [PATCH RFC v4 net-next 13/13] pds_core: Kconfig and pds_core.rst Shannon Nelson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ZAhXycSgSiNFwpNl@nanopsycho \
    --to=jiri@resnulli.us \
    --cc=brett.creeley@amd.com \
    --cc=davem@davemloft.net \
    --cc=drivers@pensando.io \
    --cc=kuba@kernel.org \
    --cc=leon@kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=shannon.nelson@amd.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.