* [PATCH 0/6] accel/ivpu: Changes for 6.15
@ 2025-01-29 12:56 Jacek Lawrynowicz
2025-01-29 12:56 ` [PATCH 1/6] accel/ivpu: Add support for hardware fault injection Jacek Lawrynowicz
` (6 more replies)
0 siblings, 7 replies; 16+ messages in thread
From: Jacek Lawrynowicz @ 2025-01-29 12:56 UTC (permalink / raw)
To: dri-devel; +Cc: oded.gabbay, quic_jhugo, maciej.falkowski, Jacek Lawrynowicz
Most notable is the addition of hardware fault injection support which allows
to test error handling paths in the driver.
Jacek Lawrynowicz (2):
accel/ivpu: Add support for hardware fault injection
accel/ivpu: Update last_busy in IRQ handler
Karol Wachowski (3):
accel/ivpu: Fix missing MMU events if file_priv is unbound
accel/ivpu: Turn on HWS by default on all platforms
accel/ivpu: Move recovery work to system_unbound_wq
Tomasz Rusinowicz (1):
accel/ivpu: Enable recovery and adjust timeouts for fpga
drivers/accel/ivpu/ivpu_debugfs.c | 5 +++
drivers/accel/ivpu/ivpu_fw.c | 10 +----
drivers/accel/ivpu/ivpu_hw.c | 33 +++++++++++----
drivers/accel/ivpu/ivpu_hw_ip.c | 4 +-
drivers/accel/ivpu/ivpu_hw_reg_io.h | 64 +++++++++++++++++------------
drivers/accel/ivpu/ivpu_job.c | 8 +++-
drivers/accel/ivpu/ivpu_pm.c | 7 +---
7 files changed, 78 insertions(+), 53 deletions(-)
--
2.45.1
^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH 1/6] accel/ivpu: Add support for hardware fault injection
2025-01-29 12:56 [PATCH 0/6] accel/ivpu: Changes for 6.15 Jacek Lawrynowicz
@ 2025-01-29 12:56 ` Jacek Lawrynowicz
2025-01-31 18:41 ` Jeffrey Hugo
2025-01-29 12:56 ` [PATCH 2/6] accel/ivpu: Update last_busy in IRQ handler Jacek Lawrynowicz
` (5 subsequent siblings)
6 siblings, 1 reply; 16+ messages in thread
From: Jacek Lawrynowicz @ 2025-01-29 12:56 UTC (permalink / raw)
To: dri-devel; +Cc: oded.gabbay, quic_jhugo, maciej.falkowski, Jacek Lawrynowicz
This commit introduces the capability to simulate hardware faults for
testing purposes. The new `fail_hw` fault can be injected in
`ivpu_hw_reg_poll_fld()`, which is used in various parts of the driver
to wait for the hardware to reach a specific state. This allows to test
failures during NPU boot and shutdown, IPC message handling and more.
Fault injection can be enabled using debugfs or a module parameter.
Reviewed-by: Maciej Falkowski <maciej.falkowski@linux.intel.com>
Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
---
drivers/accel/ivpu/ivpu_debugfs.c | 5 +++
drivers/accel/ivpu/ivpu_hw.c | 14 +++++++
drivers/accel/ivpu/ivpu_hw_ip.c | 4 +-
drivers/accel/ivpu/ivpu_hw_reg_io.h | 64 +++++++++++++++++------------
4 files changed, 58 insertions(+), 29 deletions(-)
diff --git a/drivers/accel/ivpu/ivpu_debugfs.c b/drivers/accel/ivpu/ivpu_debugfs.c
index 8180b95ed69dc..e79715c53f2a0 100644
--- a/drivers/accel/ivpu/ivpu_debugfs.c
+++ b/drivers/accel/ivpu/ivpu_debugfs.c
@@ -4,6 +4,7 @@
*/
#include <linux/debugfs.h>
+#include <linux/fault-inject.h>
#include <drm/drm_debugfs.h>
#include <drm/drm_file.h>
@@ -430,4 +431,8 @@ void ivpu_debugfs_init(struct ivpu_device *vdev)
debugfs_root, vdev, &fw_profiling_freq_fops);
debugfs_create_file("dct", 0644, debugfs_root, vdev, &ivpu_dct_fops);
}
+
+#ifdef CONFIG_FAULT_INJECTION
+ fault_create_debugfs_attr("fail_hw", debugfs_root, &ivpu_hw_failure);
+#endif
}
diff --git a/drivers/accel/ivpu/ivpu_hw.c b/drivers/accel/ivpu/ivpu_hw.c
index e332f19ab51de..8099ab047bfe6 100644
--- a/drivers/accel/ivpu/ivpu_hw.c
+++ b/drivers/accel/ivpu/ivpu_hw.c
@@ -9,6 +9,15 @@
#include "ivpu_hw_ip.h"
#include <linux/dmi.h>
+#include <linux/fault-inject.h>
+
+#ifdef CONFIG_FAULT_INJECTION
+DECLARE_FAULT_ATTR(ivpu_hw_failure);
+
+static char *ivpu_fail_hw;
+module_param_named_unsafe(fail_hw, ivpu_fail_hw, charp, 0444);
+MODULE_PARM_DESC(fail_hw, "<interval>,<probability>,<space>,<times>");
+#endif
static char *platform_to_str(u32 platform)
{
@@ -247,6 +256,11 @@ int ivpu_hw_init(struct ivpu_device *vdev)
timeouts_init(vdev);
atomic_set(&vdev->hw->firewall_irq_counter, 0);
+#ifdef CONFIG_FAULT_INJECTION
+ if (ivpu_fail_hw)
+ setup_fault_attr(&ivpu_hw_failure, ivpu_fail_hw);
+#endif
+
return 0;
}
diff --git a/drivers/accel/ivpu/ivpu_hw_ip.c b/drivers/accel/ivpu/ivpu_hw_ip.c
index 029dd065614b2..823f6a57dc546 100644
--- a/drivers/accel/ivpu/ivpu_hw_ip.c
+++ b/drivers/accel/ivpu/ivpu_hw_ip.c
@@ -968,14 +968,14 @@ void ivpu_hw_ip_wdt_disable(struct ivpu_device *vdev)
static u32 ipc_rx_count_get_37xx(struct ivpu_device *vdev)
{
- u32 count = REGV_RD32_SILENT(VPU_37XX_HOST_SS_TIM_IPC_FIFO_STAT);
+ u32 count = readl(vdev->regv + VPU_37XX_HOST_SS_TIM_IPC_FIFO_STAT);
return REG_GET_FLD(VPU_37XX_HOST_SS_TIM_IPC_FIFO_STAT, FILL_LEVEL, count);
}
static u32 ipc_rx_count_get_40xx(struct ivpu_device *vdev)
{
- u32 count = REGV_RD32_SILENT(VPU_40XX_HOST_SS_TIM_IPC_FIFO_STAT);
+ u32 count = readl(vdev->regv + VPU_40XX_HOST_SS_TIM_IPC_FIFO_STAT);
return REG_GET_FLD(VPU_40XX_HOST_SS_TIM_IPC_FIFO_STAT, FILL_LEVEL, count);
}
diff --git a/drivers/accel/ivpu/ivpu_hw_reg_io.h b/drivers/accel/ivpu/ivpu_hw_reg_io.h
index 79b3f441eac4d..66259b0ead026 100644
--- a/drivers/accel/ivpu/ivpu_hw_reg_io.h
+++ b/drivers/accel/ivpu/ivpu_hw_reg_io.h
@@ -7,6 +7,7 @@
#define __IVPU_HW_REG_IO_H__
#include <linux/bitfield.h>
+#include <linux/fault-inject.h>
#include <linux/io.h>
#include <linux/iopoll.h>
@@ -16,13 +17,11 @@
#define REG_IO_ERROR 0xffffffff
#define REGB_RD32(reg) ivpu_hw_reg_rd32(vdev, vdev->regb, (reg), #reg, __func__)
-#define REGB_RD32_SILENT(reg) readl(vdev->regb + (reg))
#define REGB_RD64(reg) ivpu_hw_reg_rd64(vdev, vdev->regb, (reg), #reg, __func__)
#define REGB_WR32(reg, val) ivpu_hw_reg_wr32(vdev, vdev->regb, (reg), (val), #reg, __func__)
#define REGB_WR64(reg, val) ivpu_hw_reg_wr64(vdev, vdev->regb, (reg), (val), #reg, __func__)
#define REGV_RD32(reg) ivpu_hw_reg_rd32(vdev, vdev->regv, (reg), #reg, __func__)
-#define REGV_RD32_SILENT(reg) readl(vdev->regv + (reg))
#define REGV_RD64(reg) ivpu_hw_reg_rd64(vdev, vdev->regv, (reg), #reg, __func__)
#define REGV_WR32(reg, val) ivpu_hw_reg_wr32(vdev, vdev->regv, (reg), (val), #reg, __func__)
#define REGV_WR64(reg, val) ivpu_hw_reg_wr64(vdev, vdev->regv, (reg), (val), #reg, __func__)
@@ -47,31 +46,42 @@
#define REG_TEST_FLD_NUM(REG, FLD, num, val) \
((num) == FIELD_GET(REG##_##FLD##_MASK, val))
-#define REGB_POLL_FLD(reg, fld, val, timeout_us) \
-({ \
- u32 var; \
- int r; \
- ivpu_dbg(vdev, REG, "%s : %s (0x%08x) Polling field %s started (expected 0x%x)\n", \
- __func__, #reg, reg, #fld, val); \
- r = read_poll_timeout(REGB_RD32_SILENT, var, (FIELD_GET(reg##_##fld##_MASK, var) == (val)),\
- REG_POLL_SLEEP_US, timeout_us, false, (reg)); \
- ivpu_dbg(vdev, REG, "%s : %s (0x%08x) Polling field %s %s (reg val 0x%08x)\n", \
- __func__, #reg, reg, #fld, r ? "ETIMEDOUT" : "OK", var); \
- r; \
-})
-
-#define REGV_POLL_FLD(reg, fld, val, timeout_us) \
-({ \
- u32 var; \
- int r; \
- ivpu_dbg(vdev, REG, "%s : %s (0x%08x) Polling field %s started (expected 0x%x)\n", \
- __func__, #reg, reg, #fld, val); \
- r = read_poll_timeout(REGV_RD32_SILENT, var, (FIELD_GET(reg##_##fld##_MASK, var) == (val)),\
- REG_POLL_SLEEP_US, timeout_us, false, (reg)); \
- ivpu_dbg(vdev, REG, "%s : %s (0x%08x) Polling field %s %s (reg val 0x%08x)\n", \
- __func__, #reg, reg, #fld, r ? "ETIMEDOUT" : "OK", var); \
- r; \
-})
+#define REGB_POLL_FLD(reg, fld, exp_fld_val, timeout_us) \
+ ivpu_hw_reg_poll_fld(vdev, vdev->regb, reg, reg##_##fld##_MASK, \
+ FIELD_PREP(reg##_##fld##_MASK, exp_fld_val), timeout_us, \
+ __func__, #reg, #fld)
+
+#define REGV_POLL_FLD(reg, fld, exp_fld_val, timeout_us) \
+ ivpu_hw_reg_poll_fld(vdev, vdev->regv, reg, reg##_##fld##_MASK, \
+ FIELD_PREP(reg##_##fld##_MASK, exp_fld_val), timeout_us, \
+ __func__, #reg, #fld)
+
+extern struct fault_attr ivpu_hw_failure;
+
+static inline int __must_check
+ivpu_hw_reg_poll_fld(struct ivpu_device *vdev, void __iomem *base,
+ u32 reg_offset, u32 reg_mask, u32 exp_masked_val, u32 timeout_us,
+ const char *func_name, const char *reg_name, const char *fld_name)
+{
+ u32 reg_val;
+ int ret;
+
+ ivpu_dbg(vdev, REG, "%s : %s (0x%08x) POLL %s started (exp_val 0x%x)\n",
+ func_name, reg_name, reg_offset, fld_name, exp_masked_val);
+
+ ret = read_poll_timeout(readl, reg_val, (reg_val & reg_mask) == exp_masked_val,
+ REG_POLL_SLEEP_US, timeout_us, false, base + reg_offset);
+
+#ifdef CONFIG_FAULT_INJECTION
+ if (should_fail(&ivpu_hw_failure, 1))
+ ret = -ETIMEDOUT;
+#endif
+
+ ivpu_dbg(vdev, REG, "%s : %s (0x%08x) POLL %s %s (reg_val 0x%08x)\n",
+ func_name, reg_name, reg_offset, fld_name, ret ? "ETIMEDOUT" : "OK", reg_val);
+
+ return ret;
+}
static inline u32
ivpu_hw_reg_rd32(struct ivpu_device *vdev, void __iomem *base, u32 reg,
--
2.45.1
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH 2/6] accel/ivpu: Update last_busy in IRQ handler
2025-01-29 12:56 [PATCH 0/6] accel/ivpu: Changes for 6.15 Jacek Lawrynowicz
2025-01-29 12:56 ` [PATCH 1/6] accel/ivpu: Add support for hardware fault injection Jacek Lawrynowicz
@ 2025-01-29 12:56 ` Jacek Lawrynowicz
2025-01-31 18:43 ` Jeffrey Hugo
2025-01-29 12:56 ` [PATCH 3/6] accel/ivpu: Fix missing MMU events if file_priv is unbound Jacek Lawrynowicz
` (4 subsequent siblings)
6 siblings, 1 reply; 16+ messages in thread
From: Jacek Lawrynowicz @ 2025-01-29 12:56 UTC (permalink / raw)
To: dri-devel
Cc: oded.gabbay, quic_jhugo, maciej.falkowski, Jacek Lawrynowicz,
Karol Wachowski
Call pm_runtime_mark_last_busy() in top half of IRQ handler to prevent
device from being runtime suspended before bottom half is executed on
a workqueue.
Reviewed-by: Karol Wachowski <karol.wachowski@intel.com>
Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
---
drivers/accel/ivpu/ivpu_hw.c | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/drivers/accel/ivpu/ivpu_hw.c b/drivers/accel/ivpu/ivpu_hw.c
index 8099ab047bfe6..4199f0bbb435b 100644
--- a/drivers/accel/ivpu/ivpu_hw.c
+++ b/drivers/accel/ivpu/ivpu_hw.c
@@ -10,6 +10,7 @@
#include <linux/dmi.h>
#include <linux/fault-inject.h>
+#include <linux/pm_runtime.h>
#ifdef CONFIG_FAULT_INJECTION
DECLARE_FAULT_ATTR(ivpu_hw_failure);
@@ -331,7 +332,9 @@ irqreturn_t ivpu_hw_irq_handler(int irq, void *ptr)
/* Re-enable global interrupts to re-trigger MSI for pending interrupts */
ivpu_hw_btrs_global_int_enable(vdev);
- if (ip_handled || btrs_handled)
- return IRQ_HANDLED;
- return IRQ_NONE;
+ if (!ip_handled && !btrs_handled)
+ return IRQ_NONE;
+
+ pm_runtime_mark_last_busy(vdev->drm.dev);
+ return IRQ_HANDLED;
}
--
2.45.1
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH 3/6] accel/ivpu: Fix missing MMU events if file_priv is unbound
2025-01-29 12:56 [PATCH 0/6] accel/ivpu: Changes for 6.15 Jacek Lawrynowicz
2025-01-29 12:56 ` [PATCH 1/6] accel/ivpu: Add support for hardware fault injection Jacek Lawrynowicz
2025-01-29 12:56 ` [PATCH 2/6] accel/ivpu: Update last_busy in IRQ handler Jacek Lawrynowicz
@ 2025-01-29 12:56 ` Jacek Lawrynowicz
2025-01-31 18:45 ` Jeffrey Hugo
2025-01-29 12:56 ` [PATCH 4/6] accel/ivpu: Turn on HWS by default on all platforms Jacek Lawrynowicz
` (3 subsequent siblings)
6 siblings, 1 reply; 16+ messages in thread
From: Jacek Lawrynowicz @ 2025-01-29 12:56 UTC (permalink / raw)
To: dri-devel
Cc: oded.gabbay, quic_jhugo, maciej.falkowski, Karol Wachowski,
Jacek Lawrynowicz
From: Karol Wachowski <karol.wachowski@intel.com>
Move the ivpu_mmu_discard_events() function to the common portion of
the abort work function. This ensures it is called only once, even if
there are no faulty contexts in context_xa, to guarantee that MMU events
are discarded and new events are not missed.
Reviewed-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Signed-off-by: Karol Wachowski <karol.wachowski@intel.com>
Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
---
drivers/accel/ivpu/ivpu_job.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c
index 3c162ac41a1dc..c1013f511efa6 100644
--- a/drivers/accel/ivpu/ivpu_job.c
+++ b/drivers/accel/ivpu/ivpu_job.c
@@ -375,7 +375,6 @@ void ivpu_context_abort_locked(struct ivpu_file_priv *file_priv)
ivpu_jsm_context_release(vdev, file_priv->ctx.id);
ivpu_mmu_disable_ssid_events(vdev, file_priv->ctx.id);
- ivpu_mmu_discard_events(vdev);
file_priv->aborted = true;
}
@@ -980,6 +979,13 @@ void ivpu_context_abort_work_fn(struct work_struct *work)
}
mutex_unlock(&vdev->context_list_lock);
+ /*
+ * We will not receive new MMU event interrupts until existing events are discarded
+ * however, we want to discard these events only after aborting the faulty context
+ * to avoid generating new faults from that context
+ */
+ ivpu_mmu_discard_events(vdev);
+
if (vdev->fw->sched_mode != VPU_SCHEDULING_MODE_HW)
return;
--
2.45.1
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH 4/6] accel/ivpu: Turn on HWS by default on all platforms
2025-01-29 12:56 [PATCH 0/6] accel/ivpu: Changes for 6.15 Jacek Lawrynowicz
` (2 preceding siblings ...)
2025-01-29 12:56 ` [PATCH 3/6] accel/ivpu: Fix missing MMU events if file_priv is unbound Jacek Lawrynowicz
@ 2025-01-29 12:56 ` Jacek Lawrynowicz
2025-01-31 18:47 ` Jeffrey Hugo
2025-01-29 12:56 ` [PATCH 5/6] accel/ivpu: Enable recovery and adjust timeouts for fpga Jacek Lawrynowicz
` (2 subsequent siblings)
6 siblings, 1 reply; 16+ messages in thread
From: Jacek Lawrynowicz @ 2025-01-29 12:56 UTC (permalink / raw)
To: dri-devel
Cc: oded.gabbay, quic_jhugo, maciej.falkowski, Karol Wachowski,
Jacek Lawrynowicz
From: Karol Wachowski <karol.wachowski@intel.com>
Hardware scheduling (HWS) is supposed to be supported on all existing
platform with recent FW including pre-silicon ones. Turn on HWS by
default.
Reviewed-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Signed-off-by: Karol Wachowski <karol.wachowski@intel.com>
Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
---
drivers/accel/ivpu/ivpu_fw.c | 10 +---------
1 file changed, 1 insertion(+), 9 deletions(-)
diff --git a/drivers/accel/ivpu/ivpu_fw.c b/drivers/accel/ivpu/ivpu_fw.c
index 6cf1fb826d1ba..cfe8f79ec5a6f 100644
--- a/drivers/accel/ivpu/ivpu_fw.c
+++ b/drivers/accel/ivpu/ivpu_fw.c
@@ -148,15 +148,7 @@ ivpu_fw_sched_mode_select(struct ivpu_device *vdev, const struct vpu_firmware_he
if (IVPU_FW_CHECK_API_VER_LT(vdev, fw_hdr, JSM, 3, 24))
return VPU_SCHEDULING_MODE_OS;
- switch (ivpu_device_id(vdev)) {
- case PCI_DEVICE_ID_MTL:
- case PCI_DEVICE_ID_ARL:
- case PCI_DEVICE_ID_LNL:
- case PCI_DEVICE_ID_PTL_P:
- return VPU_SCHEDULING_MODE_HW;
- default:
- return VPU_SCHEDULING_MODE_OS;
- }
+ return VPU_SCHEDULING_MODE_HW;
}
static int ivpu_fw_parse(struct ivpu_device *vdev)
--
2.45.1
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH 5/6] accel/ivpu: Enable recovery and adjust timeouts for fpga
2025-01-29 12:56 [PATCH 0/6] accel/ivpu: Changes for 6.15 Jacek Lawrynowicz
` (3 preceding siblings ...)
2025-01-29 12:56 ` [PATCH 4/6] accel/ivpu: Turn on HWS by default on all platforms Jacek Lawrynowicz
@ 2025-01-29 12:56 ` Jacek Lawrynowicz
2025-01-31 18:48 ` Jeffrey Hugo
2025-01-29 12:56 ` [PATCH 6/6] accel/ivpu: Move recovery work to system_unbound_wq Jacek Lawrynowicz
2025-02-03 9:42 ` [PATCH 0/6] accel/ivpu: Changes for 6.15 Jacek Lawrynowicz
6 siblings, 1 reply; 16+ messages in thread
From: Jacek Lawrynowicz @ 2025-01-29 12:56 UTC (permalink / raw)
To: dri-devel
Cc: oded.gabbay, quic_jhugo, maciej.falkowski, Tomasz Rusinowicz,
Jacek Lawrynowicz
From: Tomasz Rusinowicz <tomasz.rusinowicz@intel.com>
Recovery now works on fpga. JSM state dump timeout needs to
be really long for the new fpga model releases.
Enable punit on fpga.
Reviewed-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Signed-off-by: Tomasz Rusinowicz <tomasz.rusinowicz@intel.com>
Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
---
drivers/accel/ivpu/ivpu_hw.c | 10 +++++-----
drivers/accel/ivpu/ivpu_pm.c | 5 -----
2 files changed, 5 insertions(+), 10 deletions(-)
diff --git a/drivers/accel/ivpu/ivpu_hw.c b/drivers/accel/ivpu/ivpu_hw.c
index 4199f0bbb435b..2057907c5aa6a 100644
--- a/drivers/accel/ivpu/ivpu_hw.c
+++ b/drivers/accel/ivpu/ivpu_hw.c
@@ -58,7 +58,7 @@ static void platform_init(struct ivpu_device *vdev)
static void wa_init(struct ivpu_device *vdev)
{
- vdev->wa.punit_disabled = ivpu_is_fpga(vdev);
+ vdev->wa.punit_disabled = false;
vdev->wa.clear_runtime_mem = false;
if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)
@@ -87,12 +87,12 @@ static void timeouts_init(struct ivpu_device *vdev)
vdev->timeout.autosuspend = -1;
vdev->timeout.d0i3_entry_msg = -1;
} else if (ivpu_is_fpga(vdev)) {
- vdev->timeout.boot = 100000;
- vdev->timeout.jsm = 50000;
- vdev->timeout.tdr = 2000000;
+ vdev->timeout.boot = 50;
+ vdev->timeout.jsm = 15000;
+ vdev->timeout.tdr = 30000;
vdev->timeout.autosuspend = -1;
vdev->timeout.d0i3_entry_msg = 500;
- vdev->timeout.state_dump_msg = 10;
+ vdev->timeout.state_dump_msg = 10000;
} else if (ivpu_is_simics(vdev)) {
vdev->timeout.boot = 50;
vdev->timeout.jsm = 500;
diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c
index f41b3bfe40af4..8f6222d157204 100644
--- a/drivers/accel/ivpu/ivpu_pm.c
+++ b/drivers/accel/ivpu/ivpu_pm.c
@@ -161,11 +161,6 @@ void ivpu_pm_trigger_recovery(struct ivpu_device *vdev, const char *reason)
return;
}
- if (ivpu_is_fpga(vdev)) {
- ivpu_err(vdev, "Recovery not available on FPGA\n");
- return;
- }
-
/* Trigger recovery if it's not in progress */
if (atomic_cmpxchg(&vdev->pm->reset_pending, 0, 1) == 0) {
ivpu_hw_diagnose_failure(vdev);
--
2.45.1
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH 6/6] accel/ivpu: Move recovery work to system_unbound_wq
2025-01-29 12:56 [PATCH 0/6] accel/ivpu: Changes for 6.15 Jacek Lawrynowicz
` (4 preceding siblings ...)
2025-01-29 12:56 ` [PATCH 5/6] accel/ivpu: Enable recovery and adjust timeouts for fpga Jacek Lawrynowicz
@ 2025-01-29 12:56 ` Jacek Lawrynowicz
2025-01-31 18:50 ` Jeffrey Hugo
2025-02-03 9:42 ` [PATCH 0/6] accel/ivpu: Changes for 6.15 Jacek Lawrynowicz
6 siblings, 1 reply; 16+ messages in thread
From: Jacek Lawrynowicz @ 2025-01-29 12:56 UTC (permalink / raw)
To: dri-devel
Cc: oded.gabbay, quic_jhugo, maciej.falkowski, Karol Wachowski,
Jacek Lawrynowicz
From: Karol Wachowski <karol.wachowski@intel.com>
Recovery work doesn't need to be bound to any specific CPU, so move it
to unbound workqueue to improve execution time and system latency.
Reviewed-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Signed-off-by: Karol Wachowski <karol.wachowski@intel.com>
Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
---
drivers/accel/ivpu/ivpu_pm.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c
index 8f6222d157204..d3db944ad8643 100644
--- a/drivers/accel/ivpu/ivpu_pm.c
+++ b/drivers/accel/ivpu/ivpu_pm.c
@@ -165,7 +165,7 @@ void ivpu_pm_trigger_recovery(struct ivpu_device *vdev, const char *reason)
if (atomic_cmpxchg(&vdev->pm->reset_pending, 0, 1) == 0) {
ivpu_hw_diagnose_failure(vdev);
ivpu_hw_irq_disable(vdev); /* Disable IRQ early to protect from IRQ storm */
- queue_work(system_long_wq, &vdev->pm->recovery_work);
+ queue_work(system_unbound_wq, &vdev->pm->recovery_work);
}
}
--
2.45.1
^ permalink raw reply related [flat|nested] 16+ messages in thread
* Re: [PATCH 1/6] accel/ivpu: Add support for hardware fault injection
2025-01-29 12:56 ` [PATCH 1/6] accel/ivpu: Add support for hardware fault injection Jacek Lawrynowicz
@ 2025-01-31 18:41 ` Jeffrey Hugo
2025-02-03 9:41 ` Jacek Lawrynowicz
0 siblings, 1 reply; 16+ messages in thread
From: Jeffrey Hugo @ 2025-01-31 18:41 UTC (permalink / raw)
To: Jacek Lawrynowicz, dri-devel; +Cc: oded.gabbay, maciej.falkowski
On 1/29/2025 5:56 AM, Jacek Lawrynowicz wrote:
> This commit introduces the capability to simulate hardware faults for
Nit - "This commit" is redundant.
> testing purposes. The new `fail_hw` fault can be injected in
> `ivpu_hw_reg_poll_fld()`, which is used in various parts of the driver
> to wait for the hardware to reach a specific state. This allows to test
> failures during NPU boot and shutdown, IPC message handling and more.
>
> Fault injection can be enabled using debugfs or a module parameter.
>
> Reviewed-by: Maciej Falkowski <maciej.falkowski@linux.intel.com>
> Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/6] accel/ivpu: Update last_busy in IRQ handler
2025-01-29 12:56 ` [PATCH 2/6] accel/ivpu: Update last_busy in IRQ handler Jacek Lawrynowicz
@ 2025-01-31 18:43 ` Jeffrey Hugo
0 siblings, 0 replies; 16+ messages in thread
From: Jeffrey Hugo @ 2025-01-31 18:43 UTC (permalink / raw)
To: Jacek Lawrynowicz, dri-devel
Cc: oded.gabbay, maciej.falkowski, Karol Wachowski
On 1/29/2025 5:56 AM, Jacek Lawrynowicz wrote:
> Call pm_runtime_mark_last_busy() in top half of IRQ handler to prevent
> device from being runtime suspended before bottom half is executed on
> a workqueue.
>
> Reviewed-by: Karol Wachowski <karol.wachowski@intel.com>
> Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 3/6] accel/ivpu: Fix missing MMU events if file_priv is unbound
2025-01-29 12:56 ` [PATCH 3/6] accel/ivpu: Fix missing MMU events if file_priv is unbound Jacek Lawrynowicz
@ 2025-01-31 18:45 ` Jeffrey Hugo
0 siblings, 0 replies; 16+ messages in thread
From: Jeffrey Hugo @ 2025-01-31 18:45 UTC (permalink / raw)
To: Jacek Lawrynowicz, dri-devel
Cc: oded.gabbay, maciej.falkowski, Karol Wachowski
On 1/29/2025 5:56 AM, Jacek Lawrynowicz wrote:
> From: Karol Wachowski <karol.wachowski@intel.com>
>
> Move the ivpu_mmu_discard_events() function to the common portion of
> the abort work function. This ensures it is called only once, even if
> there are no faulty contexts in context_xa, to guarantee that MMU events
> are discarded and new events are not missed.
>
> Reviewed-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
> Signed-off-by: Karol Wachowski <karol.wachowski@intel.com>
> Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 4/6] accel/ivpu: Turn on HWS by default on all platforms
2025-01-29 12:56 ` [PATCH 4/6] accel/ivpu: Turn on HWS by default on all platforms Jacek Lawrynowicz
@ 2025-01-31 18:47 ` Jeffrey Hugo
2025-02-03 9:26 ` Jacek Lawrynowicz
0 siblings, 1 reply; 16+ messages in thread
From: Jeffrey Hugo @ 2025-01-31 18:47 UTC (permalink / raw)
To: Jacek Lawrynowicz, dri-devel
Cc: oded.gabbay, maciej.falkowski, Karol Wachowski
On 1/29/2025 5:56 AM, Jacek Lawrynowicz wrote:
> From: Karol Wachowski <karol.wachowski@intel.com>
>
> Hardware scheduling (HWS) is supposed to be supported on all existing
> platform with recent FW including pre-silicon ones. Turn on HWS by
> default.
Is there released firmware which does not have this enabled/supported?
Should this be a "on by default, if FW VER > X"?
-Jeff
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 5/6] accel/ivpu: Enable recovery and adjust timeouts for fpga
2025-01-29 12:56 ` [PATCH 5/6] accel/ivpu: Enable recovery and adjust timeouts for fpga Jacek Lawrynowicz
@ 2025-01-31 18:48 ` Jeffrey Hugo
0 siblings, 0 replies; 16+ messages in thread
From: Jeffrey Hugo @ 2025-01-31 18:48 UTC (permalink / raw)
To: Jacek Lawrynowicz, dri-devel
Cc: oded.gabbay, maciej.falkowski, Tomasz Rusinowicz
On 1/29/2025 5:56 AM, Jacek Lawrynowicz wrote:
> From: Tomasz Rusinowicz <tomasz.rusinowicz@intel.com>
>
> Recovery now works on fpga. JSM state dump timeout needs to
> be really long for the new fpga model releases.
>
> Enable punit on fpga.
>
> Reviewed-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
> Signed-off-by: Tomasz Rusinowicz <tomasz.rusinowicz@intel.com>
> Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 6/6] accel/ivpu: Move recovery work to system_unbound_wq
2025-01-29 12:56 ` [PATCH 6/6] accel/ivpu: Move recovery work to system_unbound_wq Jacek Lawrynowicz
@ 2025-01-31 18:50 ` Jeffrey Hugo
0 siblings, 0 replies; 16+ messages in thread
From: Jeffrey Hugo @ 2025-01-31 18:50 UTC (permalink / raw)
To: Jacek Lawrynowicz, dri-devel
Cc: oded.gabbay, maciej.falkowski, Karol Wachowski
On 1/29/2025 5:56 AM, Jacek Lawrynowicz wrote:
> From: Karol Wachowski <karol.wachowski@intel.com>
>
> Recovery work doesn't need to be bound to any specific CPU, so move it
> to unbound workqueue to improve execution time and system latency.
>
> Reviewed-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
> Signed-off-by: Karol Wachowski <karol.wachowski@intel.com>
> Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 4/6] accel/ivpu: Turn on HWS by default on all platforms
2025-01-31 18:47 ` Jeffrey Hugo
@ 2025-02-03 9:26 ` Jacek Lawrynowicz
0 siblings, 0 replies; 16+ messages in thread
From: Jacek Lawrynowicz @ 2025-02-03 9:26 UTC (permalink / raw)
To: Jeffrey Hugo, dri-devel; +Cc: oded.gabbay, maciej.falkowski, Karol Wachowski
Yes and there is a check for FW version in ivpu_fw_sched_mode_select() that verifies this.
It is just above the changed lines from this patch.
On 1/31/2025 7:47 PM, Jeffrey Hugo wrote:
> On 1/29/2025 5:56 AM, Jacek Lawrynowicz wrote:
>> From: Karol Wachowski <karol.wachowski@intel.com>
>>
>> Hardware scheduling (HWS) is supposed to be supported on all existing
>> platform with recent FW including pre-silicon ones. Turn on HWS by
>> default.
>
> Is there released firmware which does not have this enabled/supported? Should this be a "on by default, if FW VER > X"?
>
> -Jeff
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 1/6] accel/ivpu: Add support for hardware fault injection
2025-01-31 18:41 ` Jeffrey Hugo
@ 2025-02-03 9:41 ` Jacek Lawrynowicz
0 siblings, 0 replies; 16+ messages in thread
From: Jacek Lawrynowicz @ 2025-02-03 9:41 UTC (permalink / raw)
To: Jeffrey Hugo, dri-devel; +Cc: oded.gabbay, maciej.falkowski
On 1/31/2025 7:41 PM, Jeffrey Hugo wrote:
> On 1/29/2025 5:56 AM, Jacek Lawrynowicz wrote:
>> This commit introduces the capability to simulate hardware faults for
>
> Nit - "This commit" is redundant.
Sure, removed.
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 0/6] accel/ivpu: Changes for 6.15
2025-01-29 12:56 [PATCH 0/6] accel/ivpu: Changes for 6.15 Jacek Lawrynowicz
` (5 preceding siblings ...)
2025-01-29 12:56 ` [PATCH 6/6] accel/ivpu: Move recovery work to system_unbound_wq Jacek Lawrynowicz
@ 2025-02-03 9:42 ` Jacek Lawrynowicz
6 siblings, 0 replies; 16+ messages in thread
From: Jacek Lawrynowicz @ 2025-02-03 9:42 UTC (permalink / raw)
To: dri-devel; +Cc: oded.gabbay, quic_jhugo, maciej.falkowski
Applied to drm-misc-next
On 1/29/2025 1:56 PM, Jacek Lawrynowicz wrote:
> Most notable is the addition of hardware fault injection support which allows
> to test error handling paths in the driver.
>
> Jacek Lawrynowicz (2):
> accel/ivpu: Add support for hardware fault injection
> accel/ivpu: Update last_busy in IRQ handler
>
> Karol Wachowski (3):
> accel/ivpu: Fix missing MMU events if file_priv is unbound
> accel/ivpu: Turn on HWS by default on all platforms
> accel/ivpu: Move recovery work to system_unbound_wq
>
> Tomasz Rusinowicz (1):
> accel/ivpu: Enable recovery and adjust timeouts for fpga
>
> drivers/accel/ivpu/ivpu_debugfs.c | 5 +++
> drivers/accel/ivpu/ivpu_fw.c | 10 +----
> drivers/accel/ivpu/ivpu_hw.c | 33 +++++++++++----
> drivers/accel/ivpu/ivpu_hw_ip.c | 4 +-
> drivers/accel/ivpu/ivpu_hw_reg_io.h | 64 +++++++++++++++++------------
> drivers/accel/ivpu/ivpu_job.c | 8 +++-
> drivers/accel/ivpu/ivpu_pm.c | 7 +---
> 7 files changed, 78 insertions(+), 53 deletions(-)
>
> --
> 2.45.1
^ permalink raw reply [flat|nested] 16+ messages in thread
end of thread, other threads:[~2025-02-03 9:42 UTC | newest]
Thread overview: 16+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-01-29 12:56 [PATCH 0/6] accel/ivpu: Changes for 6.15 Jacek Lawrynowicz
2025-01-29 12:56 ` [PATCH 1/6] accel/ivpu: Add support for hardware fault injection Jacek Lawrynowicz
2025-01-31 18:41 ` Jeffrey Hugo
2025-02-03 9:41 ` Jacek Lawrynowicz
2025-01-29 12:56 ` [PATCH 2/6] accel/ivpu: Update last_busy in IRQ handler Jacek Lawrynowicz
2025-01-31 18:43 ` Jeffrey Hugo
2025-01-29 12:56 ` [PATCH 3/6] accel/ivpu: Fix missing MMU events if file_priv is unbound Jacek Lawrynowicz
2025-01-31 18:45 ` Jeffrey Hugo
2025-01-29 12:56 ` [PATCH 4/6] accel/ivpu: Turn on HWS by default on all platforms Jacek Lawrynowicz
2025-01-31 18:47 ` Jeffrey Hugo
2025-02-03 9:26 ` Jacek Lawrynowicz
2025-01-29 12:56 ` [PATCH 5/6] accel/ivpu: Enable recovery and adjust timeouts for fpga Jacek Lawrynowicz
2025-01-31 18:48 ` Jeffrey Hugo
2025-01-29 12:56 ` [PATCH 6/6] accel/ivpu: Move recovery work to system_unbound_wq Jacek Lawrynowicz
2025-01-31 18:50 ` Jeffrey Hugo
2025-02-03 9:42 ` [PATCH 0/6] accel/ivpu: Changes for 6.15 Jacek Lawrynowicz
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.