From: Fenghua Yu <fenghua.yu@intel.com>
To: "Vinod Koul" <vkoul@kernel.org>, "Dave Jiang" <dave.jiang@intel.com>
Cc: dmaengine@vger.kernel.org,
"linux-kernel" <linux-kernel@vger.kernel.org>,
Fenghua Yu <fenghua.yu@intel.com>
Subject: [PATCH v2 5/5] dmaengine: idxd: Enable Function Level Reset (FLR) for halt
Date: Fri, 22 Nov 2024 15:30:28 -0800 [thread overview]
Message-ID: <20241122233028.2762809-6-fenghua.yu@intel.com> (raw)
In-Reply-To: <20241122233028.2762809-1-fenghua.yu@intel.com>
When DSA/IAA device hits a fatal error, the device enters a halt state.
The driver can reset the device depending on Reset Type required by
hardware to recover the device.
Supported Reset Types are:
0: Reset Device command
1: Function Level Reset (FLR)
2: Warm reset
3: Cold reset
Currently, the driver only supports Reset Type 0.
This patch adds support for FLR recovery Type 1. Before issuing a PCIe
FLR command, IDXD device and WQ states are saved. After the FLR command
execution, the device is recovered to its previous states, allowing
the user can continue using the device.
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
---
drivers/dma/idxd/init.c | 123 ++++++++++++++++++++++++++++++++++++++++
drivers/dma/idxd/irq.c | 28 ++++++++-
2 files changed, 148 insertions(+), 3 deletions(-)
diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
index da5b76a1e208..ea44974e927c 100644
--- a/drivers/dma/idxd/init.c
+++ b/drivers/dma/idxd/init.c
@@ -981,6 +981,118 @@ static void idxd_device_config_restore(struct idxd_device *idxd,
kfree(idxd_saved->saved_wqs);
}
+static void idxd_reset_prepare(struct pci_dev *pdev)
+{
+ struct idxd_device *idxd = pci_get_drvdata(pdev);
+ struct device *dev = &idxd->pdev->dev;
+ const char *idxd_name;
+ int rc;
+
+ dev = &idxd->pdev->dev;
+ idxd_name = dev_name(idxd_confdev(idxd));
+
+ struct idxd_saved_states *idxd_saved __free(kfree) =
+ kzalloc_node(sizeof(*idxd_saved), GFP_KERNEL,
+ dev_to_node(&pdev->dev));
+ if (!idxd_saved) {
+ dev_err(dev, "HALT: no memory\n");
+
+ return;
+ }
+
+ /* Save IDXD configurations. */
+ rc = idxd_device_config_save(idxd, idxd_saved);
+ if (rc < 0) {
+ dev_err(dev, "HALT: cannot save %s configs\n", idxd_name);
+
+ return;
+ }
+
+ idxd->idxd_saved = no_free_ptr(idxd_saved);
+
+ /* Save PCI device state. */
+ pci_save_state(idxd->pdev);
+}
+
+static void idxd_reset_done(struct pci_dev *pdev)
+{
+ struct idxd_device *idxd = pci_get_drvdata(pdev);
+ const char *idxd_name;
+ struct device *dev;
+ int rc, i;
+
+ if (!idxd->idxd_saved)
+ return;
+
+ dev = &idxd->pdev->dev;
+ idxd_name = dev_name(idxd_confdev(idxd));
+
+ /* Restore PCI device state. */
+ pci_restore_state(idxd->pdev);
+
+ /* Unbind idxd device from driver. */
+ idxd_unbind(&idxd_drv.drv, idxd_name);
+
+ /*
+ * Probe PCI device without allocating or changing
+ * idxd software data which keeps the same as before FLR.
+ */
+ idxd_pci_probe_alloc(idxd, NULL, NULL);
+
+ /* Restore IDXD configurations. */
+ idxd_device_config_restore(idxd, idxd->idxd_saved);
+
+ /* Re-configure IDXD device if allowed. */
+ if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) {
+ rc = idxd_device_config(idxd);
+ if (rc < 0) {
+ dev_err(dev, "HALT: %s config fails\n", idxd_name);
+ goto out;
+ }
+ }
+
+ /* Bind IDXD device to driver. */
+ rc = idxd_bind(&idxd_drv.drv, idxd_name);
+ if (rc < 0) {
+ dev_err(dev, "HALT: binding %s to driver fails\n", idxd_name);
+ goto out;
+ }
+
+ /* Bind enabled wq in the IDXD device to driver. */
+ for (i = 0; i < idxd->max_wqs; i++) {
+ if (test_bit(i, idxd->wq_enable_map)) {
+ struct idxd_wq *wq = idxd->wqs[i];
+ char wq_name[32];
+
+ wq->state = IDXD_WQ_DISABLED;
+ sprintf(wq_name, "wq%d.%d", idxd->id, wq->id);
+ /*
+ * Bind to user driver depending on wq type.
+ *
+ * Currently only support user type WQ. Will support
+ * kernel type WQ in the future.
+ */
+ if (wq->type == IDXD_WQT_USER)
+ rc = idxd_bind(&idxd_user_drv.drv, wq_name);
+ else
+ rc = -EINVAL;
+ if (rc < 0) {
+ clear_bit(i, idxd->wq_enable_map);
+ dev_err(dev,
+ "HALT: unable to re-enable wq %s\n",
+ dev_name(wq_confdev(wq)));
+ }
+ }
+ }
+out:
+ kfree(idxd->idxd_saved);
+}
+
+static const struct pci_error_handlers idxd_error_handler = {
+ .reset_prepare = idxd_reset_prepare,
+ .reset_done = idxd_reset_done,
+};
+
/*
* Probe idxd PCI device.
* If idxd is not given, need to allocate idxd and set up its data.
@@ -1054,6 +1166,16 @@ int idxd_pci_probe_alloc(struct idxd_device *idxd, struct pci_dev *pdev,
dev_warn(dev, "IDXD debugfs failed to setup\n");
}
+ if (!alloc_idxd) {
+ /* Release interrupts in the IDXD device. */
+ idxd_cleanup_interrupts(idxd);
+
+ /* Re-enable interrupts in the IDXD device. */
+ rc = idxd_setup_interrupts(idxd);
+ if (rc)
+ dev_warn(dev, "IDXD interrupts failed to setup\n");
+ }
+
dev_info(&pdev->dev, "Intel(R) Accelerator Device (v%x)\n",
idxd->hw.version);
@@ -1144,6 +1266,7 @@ static struct pci_driver idxd_pci_driver = {
.probe = idxd_pci_probe,
.remove = idxd_remove,
.shutdown = idxd_shutdown,
+ .err_handler = &idxd_error_handler,
};
static int __init idxd_init_module(void)
diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c
index a46e58b756a5..1107db3ce0a3 100644
--- a/drivers/dma/idxd/irq.c
+++ b/drivers/dma/idxd/irq.c
@@ -383,6 +383,20 @@ static void process_evl_entries(struct idxd_device *idxd)
mutex_unlock(&evl->lock);
}
+static void idxd_device_flr(struct work_struct *work)
+{
+ struct idxd_device *idxd = container_of(work, struct idxd_device, work);
+ int rc;
+
+ /*
+ * IDXD device requires a Function Level Reset (FLR).
+ * pci_reset_function() will reset the device with FLR.
+ */
+ rc = pci_reset_function(idxd->pdev);
+ if (rc)
+ dev_err(&idxd->pdev->dev, "FLR failed\n");
+}
+
static irqreturn_t idxd_halt(struct idxd_device *idxd)
{
union gensts_reg gensts;
@@ -398,15 +412,23 @@ static irqreturn_t idxd_halt(struct idxd_device *idxd)
*/
INIT_WORK(&idxd->work, idxd_device_reinit);
queue_work(idxd->wq, &idxd->work);
+ } else if (gensts.reset_type == IDXD_DEVICE_RESET_FLR) {
+ idxd->state = IDXD_DEV_HALTED;
+ idxd_mask_error_interrupts(idxd);
+ dev_dbg(&idxd->pdev->dev,
+ "idxd halted, doing FLR. After FLR, configs are restored\n");
+ INIT_WORK(&idxd->work, idxd_device_flr);
+ queue_work(idxd->wq, &idxd->work);
+
} else {
idxd->state = IDXD_DEV_HALTED;
idxd_wqs_quiesce(idxd);
idxd_wqs_unmap_portal(idxd);
idxd_device_clear_state(idxd);
dev_err(&idxd->pdev->dev,
- "idxd halted, need %s.\n",
- gensts.reset_type == IDXD_DEVICE_RESET_FLR ?
- "FLR" : "system reset");
+ "idxd halted, need system reset");
+
+ return -ENXIO;
}
}
--
2.37.1
next prev parent reply other threads:[~2024-11-22 23:30 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-11-22 23:30 [PATCH v2 0/5] Enable FLR for IDXD halt Fenghua Yu
2024-11-22 23:30 ` [PATCH v2 1/5] dmaengine: idxd: Add idxd_pci_probe_alloc() helper Fenghua Yu
2024-11-22 23:30 ` [PATCH v2 2/5] dmaengine: idxd: Binding and unbinding IDXD device and driver Fenghua Yu
2024-11-22 23:30 ` [PATCH v2 3/5] dmaengine: idxd: Add idxd_device_config_save() and idxd_device_config_restore() helpers Fenghua Yu
2024-11-22 23:30 ` [PATCH v2 4/5] dmaengine: idxd: Refactor halt handler Fenghua Yu
2024-11-22 23:30 ` Fenghua Yu [this message]
2024-12-02 22:15 ` [PATCH v2 0/5] Enable FLR for IDXD halt Dave Jiang
2024-12-24 10:42 ` Vinod Koul
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241122233028.2762809-6-fenghua.yu@intel.com \
--to=fenghua.yu@intel.com \
--cc=dave.jiang@intel.com \
--cc=dmaengine@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=vkoul@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox