* [PATCH v3 1/7] drm/panthor: Make panthor_irq::state a non-atomic field
2026-06-23 12:51 [PATCH v3 0/7] drm/panthor: Reduce dma_fence signalling latency Boris Brezillon
@ 2026-06-23 12:51 ` Boris Brezillon
2026-06-23 12:51 ` [PATCH v3 2/7] drm/panthor: Move the register accessors before the IRQ helpers Boris Brezillon
` (5 subsequent siblings)
6 siblings, 0 replies; 11+ messages in thread
From: Boris Brezillon @ 2026-06-23 12:51 UTC (permalink / raw)
To: Steven Price, Liviu Dudau, Chia-I Wu
Cc: Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann, David Airlie,
Simona Vetter, dri-devel, linux-kernel, Boris Brezillon
The only place where panthor_irq::state is accessed without
panthor_irq::mask_lock held is in the prologue of _irq_suspend(),
which is not really a fast-path. So let's simplify things by assuming
panthor_irq::state must always be accessed with the mask_lock held,
and add a scoped_guard() in _irq_suspend().
While at it, rename the lock so it's clear it doesn't just protect
access to the panthor_irq::mask or the INT_MASK register.
Reviewed-by: Steven Price <steven.price@arm.com>
Reviewed-by: Liviu Dudau <liviu.dudau@arm.com>
Reviewed-by: Chia-I Wu <olvaffe@gmail.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
---
drivers/gpu/drm/panthor/panthor_device.h | 53 ++++++++++++++++----------------
1 file changed, 26 insertions(+), 27 deletions(-)
diff --git a/drivers/gpu/drm/panthor/panthor_device.h b/drivers/gpu/drm/panthor/panthor_device.h
index 35679bfa1f3a..c4a03ac0812e 100644
--- a/drivers/gpu/drm/panthor/panthor_device.h
+++ b/drivers/gpu/drm/panthor/panthor_device.h
@@ -92,17 +92,21 @@ struct panthor_irq {
u32 mask;
/**
- * @mask_lock: protects modifications to _INT_MASK and @mask.
+ * @lock: protects modifications to _INT_MASK, @mask and @state.
*
* In paths where _INT_MASK is updated based on a state
* transition/check, it's crucial for the state update/check to be
* inside the locked section, otherwise it introduces a race window
* leading to potential _INT_MASK inconsistencies.
*/
- spinlock_t mask_lock;
+ spinlock_t lock;
- /** @state: one of &enum panthor_irq_state reflecting the current state. */
- atomic_t state;
+ /**
+ * @state: one of &enum panthor_irq_state reflecting the current state.
+ *
+ * Must be accessed with lock held.
+ */
+ enum panthor_irq_state state;
};
/**
@@ -510,18 +514,15 @@ const char *panthor_exception_name(struct panthor_device *ptdev,
static irqreturn_t panthor_ ## __name ## _irq_raw_handler(int irq, void *data) \
{ \
struct panthor_irq *pirq = data; \
- enum panthor_irq_state old_state; \
\
if (!gpu_read(pirq->iomem, INT_STAT)) \
return IRQ_NONE; \
\
- guard(spinlock_irqsave)(&pirq->mask_lock); \
- old_state = atomic_cmpxchg(&pirq->state, \
- PANTHOR_IRQ_STATE_ACTIVE, \
- PANTHOR_IRQ_STATE_PROCESSING); \
- if (old_state != PANTHOR_IRQ_STATE_ACTIVE) \
+ guard(spinlock_irqsave)(&pirq->lock); \
+ if (pirq->state != PANTHOR_IRQ_STATE_ACTIVE) \
return IRQ_NONE; \
\
+ pirq->state = PANTHOR_IRQ_STATE_PROCESSING; \
gpu_write(pirq->iomem, INT_MASK, 0); \
return IRQ_WAKE_THREAD; \
} \
@@ -550,14 +551,11 @@ static irqreturn_t panthor_ ## __name ## _irq_threaded_handler(int irq, void *da
ret = IRQ_HANDLED; \
} \
\
- scoped_guard(spinlock_irqsave, &pirq->mask_lock) { \
- enum panthor_irq_state old_state; \
- \
- old_state = atomic_cmpxchg(&pirq->state, \
- PANTHOR_IRQ_STATE_PROCESSING, \
- PANTHOR_IRQ_STATE_ACTIVE); \
- if (old_state == PANTHOR_IRQ_STATE_PROCESSING) \
+ scoped_guard(spinlock_irqsave, &pirq->lock) { \
+ if (pirq->state == PANTHOR_IRQ_STATE_PROCESSING) { \
+ pirq->state = PANTHOR_IRQ_STATE_ACTIVE; \
gpu_write(pirq->iomem, INT_MASK, pirq->mask); \
+ } \
} \
\
return ret; \
@@ -565,19 +563,20 @@ static irqreturn_t panthor_ ## __name ## _irq_threaded_handler(int irq, void *da
\
static inline void panthor_ ## __name ## _irq_suspend(struct panthor_irq *pirq) \
{ \
- scoped_guard(spinlock_irqsave, &pirq->mask_lock) { \
- atomic_set(&pirq->state, PANTHOR_IRQ_STATE_SUSPENDING); \
+ scoped_guard(spinlock_irqsave, &pirq->lock) { \
+ pirq->state = PANTHOR_IRQ_STATE_SUSPENDING; \
gpu_write(pirq->iomem, INT_MASK, 0); \
} \
synchronize_irq(pirq->irq); \
- atomic_set(&pirq->state, PANTHOR_IRQ_STATE_SUSPENDED); \
+ scoped_guard(spinlock_irqsave, &pirq->lock) \
+ pirq->state = PANTHOR_IRQ_STATE_SUSPENDED; \
} \
\
static inline void panthor_ ## __name ## _irq_resume(struct panthor_irq *pirq) \
{ \
- guard(spinlock_irqsave)(&pirq->mask_lock); \
+ guard(spinlock_irqsave)(&pirq->lock); \
\
- atomic_set(&pirq->state, PANTHOR_IRQ_STATE_ACTIVE); \
+ pirq->state = PANTHOR_IRQ_STATE_ACTIVE; \
gpu_write(pirq->iomem, INT_CLEAR, pirq->mask); \
gpu_write(pirq->iomem, INT_MASK, pirq->mask); \
} \
@@ -590,7 +589,7 @@ static int panthor_request_ ## __name ## _irq(struct panthor_device *ptdev, \
pirq->irq = irq; \
pirq->mask = mask; \
pirq->iomem = iomem; \
- spin_lock_init(&pirq->mask_lock); \
+ spin_lock_init(&pirq->lock); \
panthor_ ## __name ## _irq_resume(pirq); \
\
return devm_request_threaded_irq(ptdev->base.dev, irq, \
@@ -602,7 +601,7 @@ static int panthor_request_ ## __name ## _irq(struct panthor_device *ptdev, \
\
static inline void panthor_ ## __name ## _irq_enable_events(struct panthor_irq *pirq, u32 mask) \
{ \
- guard(spinlock_irqsave)(&pirq->mask_lock); \
+ guard(spinlock_irqsave)(&pirq->lock); \
pirq->mask |= mask; \
\
/* The only situation where we need to write the new mask is if the IRQ is active. \
@@ -610,13 +609,13 @@ static inline void panthor_ ## __name ## _irq_enable_events(struct panthor_irq *
* on the PROCESSING -> ACTIVE transition. \
* If the IRQ is suspended/suspending, the mask is restored at resume time. \
*/ \
- if (atomic_read(&pirq->state) == PANTHOR_IRQ_STATE_ACTIVE) \
+ if (pirq->state == PANTHOR_IRQ_STATE_ACTIVE) \
gpu_write(pirq->iomem, INT_MASK, pirq->mask); \
} \
\
static inline void panthor_ ## __name ## _irq_disable_events(struct panthor_irq *pirq, u32 mask)\
{ \
- guard(spinlock_irqsave)(&pirq->mask_lock); \
+ guard(spinlock_irqsave)(&pirq->lock); \
pirq->mask &= ~mask; \
\
/* The only situation where we need to write the new mask is if the IRQ is active. \
@@ -624,7 +623,7 @@ static inline void panthor_ ## __name ## _irq_disable_events(struct panthor_irq
* on the PROCESSING -> ACTIVE transition. \
* If the IRQ is suspended/suspending, the mask is restored at resume time. \
*/ \
- if (atomic_read(&pirq->state) == PANTHOR_IRQ_STATE_ACTIVE) \
+ if (pirq->state == PANTHOR_IRQ_STATE_ACTIVE) \
gpu_write(pirq->iomem, INT_MASK, pirq->mask); \
}
--
2.54.0
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH v3 2/7] drm/panthor: Move the register accessors before the IRQ helpers
2026-06-23 12:51 [PATCH v3 0/7] drm/panthor: Reduce dma_fence signalling latency Boris Brezillon
2026-06-23 12:51 ` [PATCH v3 1/7] drm/panthor: Make panthor_irq::state a non-atomic field Boris Brezillon
@ 2026-06-23 12:51 ` Boris Brezillon
2026-06-23 12:51 ` [PATCH v3 3/7] drm/panthor: Replace the panthor_irq macro machinery by inline helpers Boris Brezillon
` (4 subsequent siblings)
6 siblings, 0 replies; 11+ messages in thread
From: Boris Brezillon @ 2026-06-23 12:51 UTC (permalink / raw)
To: Steven Price, Liviu Dudau, Chia-I Wu
Cc: Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann, David Airlie,
Simona Vetter, dri-devel, linux-kernel, Boris Brezillon
We're about to add an IRQ inline helper using gpu_read(). Move things
around to avoid forward declarations.
No functional changes.
Reviewed-by: Steven Price <steven.price@arm.com>
Reviewed-by: Liviu Dudau <liviu.dudau@arm.com>
Reviewed-by: Chia-I Wu <olvaffe@gmail.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
---
drivers/gpu/drm/panthor/panthor_device.h | 142 +++++++++++++++----------------
1 file changed, 71 insertions(+), 71 deletions(-)
diff --git a/drivers/gpu/drm/panthor/panthor_device.h b/drivers/gpu/drm/panthor/panthor_device.h
index c4a03ac0812e..a7fcfc2e7899 100644
--- a/drivers/gpu/drm/panthor/panthor_device.h
+++ b/drivers/gpu/drm/panthor/panthor_device.h
@@ -495,6 +495,77 @@ panthor_exception_is_fault(u32 exception_code)
const char *panthor_exception_name(struct panthor_device *ptdev,
u32 exception_code);
+static inline void gpu_write(void __iomem *iomem, u32 reg, u32 data)
+{
+ writel(data, iomem + reg);
+}
+
+static inline u32 gpu_read(void __iomem *iomem, u32 reg)
+{
+ return readl(iomem + reg);
+}
+
+static inline u32 gpu_read_relaxed(void __iomem *iomem, u32 reg)
+{
+ return readl_relaxed(iomem + reg);
+}
+
+static inline void gpu_write64(void __iomem *iomem, u32 reg, u64 data)
+{
+ gpu_write(iomem, reg, lower_32_bits(data));
+ gpu_write(iomem, reg + 4, upper_32_bits(data));
+}
+
+static inline u64 gpu_read64(void __iomem *iomem, u32 reg)
+{
+ return (gpu_read(iomem, reg) | ((u64)gpu_read(iomem, reg + 4) << 32));
+}
+
+static inline u64 gpu_read64_relaxed(void __iomem *iomem, u32 reg)
+{
+ return (gpu_read_relaxed(iomem, reg) |
+ ((u64)gpu_read_relaxed(iomem, reg + 4) << 32));
+}
+
+static inline u64 gpu_read64_counter(void __iomem *iomem, u32 reg)
+{
+ u32 lo, hi1, hi2;
+ do {
+ hi1 = gpu_read(iomem, reg + 4);
+ lo = gpu_read(iomem, reg);
+ hi2 = gpu_read(iomem, reg + 4);
+ } while (hi1 != hi2);
+ return lo | ((u64)hi2 << 32);
+}
+
+#define gpu_read_poll_timeout(iomem, reg, val, cond, delay_us, timeout_us) \
+ read_poll_timeout(gpu_read, val, cond, delay_us, timeout_us, false, \
+ iomem, reg)
+
+#define gpu_read_poll_timeout_atomic(iomem, reg, val, cond, delay_us, \
+ timeout_us) \
+ read_poll_timeout_atomic(gpu_read, val, cond, delay_us, timeout_us, \
+ false, iomem, reg)
+
+#define gpu_read64_poll_timeout(iomem, reg, val, cond, delay_us, timeout_us) \
+ read_poll_timeout(gpu_read64, val, cond, delay_us, timeout_us, false, \
+ iomem, reg)
+
+#define gpu_read64_poll_timeout_atomic(iomem, reg, val, cond, delay_us, \
+ timeout_us) \
+ read_poll_timeout_atomic(gpu_read64, val, cond, delay_us, timeout_us, \
+ false, iomem, reg)
+
+#define gpu_read_relaxed_poll_timeout_atomic(iomem, reg, val, cond, delay_us, \
+ timeout_us) \
+ read_poll_timeout_atomic(gpu_read_relaxed, val, cond, delay_us, \
+ timeout_us, false, iomem, reg)
+
+#define gpu_read64_relaxed_poll_timeout(iomem, reg, val, cond, delay_us, \
+ timeout_us) \
+ read_poll_timeout(gpu_read64_relaxed, val, cond, delay_us, timeout_us, \
+ false, iomem, reg)
+
#define INT_RAWSTAT 0x0
#define INT_CLEAR 0x4
#define INT_MASK 0x8
@@ -629,75 +700,4 @@ static inline void panthor_ ## __name ## _irq_disable_events(struct panthor_irq
extern struct workqueue_struct *panthor_cleanup_wq;
-static inline void gpu_write(void __iomem *iomem, u32 reg, u32 data)
-{
- writel(data, iomem + reg);
-}
-
-static inline u32 gpu_read(void __iomem *iomem, u32 reg)
-{
- return readl(iomem + reg);
-}
-
-static inline u32 gpu_read_relaxed(void __iomem *iomem, u32 reg)
-{
- return readl_relaxed(iomem + reg);
-}
-
-static inline void gpu_write64(void __iomem *iomem, u32 reg, u64 data)
-{
- gpu_write(iomem, reg, lower_32_bits(data));
- gpu_write(iomem, reg + 4, upper_32_bits(data));
-}
-
-static inline u64 gpu_read64(void __iomem *iomem, u32 reg)
-{
- return (gpu_read(iomem, reg) | ((u64)gpu_read(iomem, reg + 4) << 32));
-}
-
-static inline u64 gpu_read64_relaxed(void __iomem *iomem, u32 reg)
-{
- return (gpu_read_relaxed(iomem, reg) |
- ((u64)gpu_read_relaxed(iomem, reg + 4) << 32));
-}
-
-static inline u64 gpu_read64_counter(void __iomem *iomem, u32 reg)
-{
- u32 lo, hi1, hi2;
- do {
- hi1 = gpu_read(iomem, reg + 4);
- lo = gpu_read(iomem, reg);
- hi2 = gpu_read(iomem, reg + 4);
- } while (hi1 != hi2);
- return lo | ((u64)hi2 << 32);
-}
-
-#define gpu_read_poll_timeout(iomem, reg, val, cond, delay_us, timeout_us) \
- read_poll_timeout(gpu_read, val, cond, delay_us, timeout_us, false, \
- iomem, reg)
-
-#define gpu_read_poll_timeout_atomic(iomem, reg, val, cond, delay_us, \
- timeout_us) \
- read_poll_timeout_atomic(gpu_read, val, cond, delay_us, timeout_us, \
- false, iomem, reg)
-
-#define gpu_read64_poll_timeout(iomem, reg, val, cond, delay_us, timeout_us) \
- read_poll_timeout(gpu_read64, val, cond, delay_us, timeout_us, false, \
- iomem, reg)
-
-#define gpu_read64_poll_timeout_atomic(iomem, reg, val, cond, delay_us, \
- timeout_us) \
- read_poll_timeout_atomic(gpu_read64, val, cond, delay_us, timeout_us, \
- false, iomem, reg)
-
-#define gpu_read_relaxed_poll_timeout_atomic(iomem, reg, val, cond, delay_us, \
- timeout_us) \
- read_poll_timeout_atomic(gpu_read_relaxed, val, cond, delay_us, \
- timeout_us, false, iomem, reg)
-
-#define gpu_read64_relaxed_poll_timeout(iomem, reg, val, cond, delay_us, \
- timeout_us) \
- read_poll_timeout(gpu_read64_relaxed, val, cond, delay_us, timeout_us, \
- false, iomem, reg)
-
#endif
--
2.54.0
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH v3 3/7] drm/panthor: Replace the panthor_irq macro machinery by inline helpers
2026-06-23 12:51 [PATCH v3 0/7] drm/panthor: Reduce dma_fence signalling latency Boris Brezillon
2026-06-23 12:51 ` [PATCH v3 1/7] drm/panthor: Make panthor_irq::state a non-atomic field Boris Brezillon
2026-06-23 12:51 ` [PATCH v3 2/7] drm/panthor: Move the register accessors before the IRQ helpers Boris Brezillon
@ 2026-06-23 12:51 ` Boris Brezillon
2026-06-23 13:02 ` sashiko-bot
2026-06-23 12:51 ` [PATCH v3 4/7] drm/panthor: Protect events processing with a separate spinlock Boris Brezillon
` (3 subsequent siblings)
6 siblings, 1 reply; 11+ messages in thread
From: Boris Brezillon @ 2026-06-23 12:51 UTC (permalink / raw)
To: Steven Price, Liviu Dudau, Chia-I Wu
Cc: Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann, David Airlie,
Simona Vetter, dri-devel, linux-kernel, Boris Brezillon
Now that panthor_irq contains the iomem region, there's no real need
for the macro-based panthor_irq helper generation logic. We can just
provide inline helpers that do the same and let the compiler optimize
indirect function calls. The only extra annoyance is the fact we have
to open-code the panthor_xxx_irq_threaded_handler() implementation, but
those are single-line functions, so it's acceptable.
While at it, we changed the prototype of the IRQ handlers to take
a panthor_irq instead of panthor_device, since that's the thing
that's passed around when it comes to panthor_irq, and the
panthor_device can be directly extracted from there.
Reviewed-by: Steven Price <steven.price@arm.com>
Reviewed-by: Chia-I Wu <olvaffe@gmail.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
---
drivers/gpu/drm/panthor/panthor_device.h | 245 +++++++++++++++----------------
drivers/gpu/drm/panthor/panthor_fw.c | 22 ++-
drivers/gpu/drm/panthor/panthor_gpu.c | 26 ++--
drivers/gpu/drm/panthor/panthor_mmu.c | 37 ++---
drivers/gpu/drm/panthor/panthor_pwr.c | 20 ++-
5 files changed, 183 insertions(+), 167 deletions(-)
diff --git a/drivers/gpu/drm/panthor/panthor_device.h b/drivers/gpu/drm/panthor/panthor_device.h
index a7fcfc2e7899..34831d036328 100644
--- a/drivers/gpu/drm/panthor/panthor_device.h
+++ b/drivers/gpu/drm/panthor/panthor_device.h
@@ -571,131 +571,126 @@ static inline u64 gpu_read64_counter(void __iomem *iomem, u32 reg)
#define INT_MASK 0x8
#define INT_STAT 0xc
-/**
- * PANTHOR_IRQ_HANDLER() - Define interrupt handlers and the interrupt
- * registration function.
- *
- * The boiler-plate to gracefully deal with shared interrupts is
- * auto-generated. All you have to do is call PANTHOR_IRQ_HANDLER()
- * just after the actual handler. The handler prototype is:
- *
- * void (*handler)(struct panthor_device *, u32 status);
- */
-#define PANTHOR_IRQ_HANDLER(__name, __handler) \
-static irqreturn_t panthor_ ## __name ## _irq_raw_handler(int irq, void *data) \
-{ \
- struct panthor_irq *pirq = data; \
- \
- if (!gpu_read(pirq->iomem, INT_STAT)) \
- return IRQ_NONE; \
- \
- guard(spinlock_irqsave)(&pirq->lock); \
- if (pirq->state != PANTHOR_IRQ_STATE_ACTIVE) \
- return IRQ_NONE; \
- \
- pirq->state = PANTHOR_IRQ_STATE_PROCESSING; \
- gpu_write(pirq->iomem, INT_MASK, 0); \
- return IRQ_WAKE_THREAD; \
-} \
- \
-static irqreturn_t panthor_ ## __name ## _irq_threaded_handler(int irq, void *data) \
-{ \
- struct panthor_irq *pirq = data; \
- struct panthor_device *ptdev = pirq->ptdev; \
- irqreturn_t ret = IRQ_NONE; \
- \
- while (true) { \
- /* It's safe to access pirq->mask without the lock held here. If a new \
- * event gets added to the mask and the corresponding IRQ is pending, \
- * we'll process it right away instead of adding an extra raw -> threaded \
- * round trip. If an event is removed and the status bit is set, it will \
- * be ignored, just like it would have been if the mask had been adjusted \
- * right before the HW event kicks in. TLDR; it's all expected races we're \
- * covered for. \
- */ \
- u32 status = gpu_read(pirq->iomem, INT_RAWSTAT) & pirq->mask; \
- \
- if (!status) \
- break; \
- \
- __handler(ptdev, status); \
- ret = IRQ_HANDLED; \
- } \
- \
- scoped_guard(spinlock_irqsave, &pirq->lock) { \
- if (pirq->state == PANTHOR_IRQ_STATE_PROCESSING) { \
- pirq->state = PANTHOR_IRQ_STATE_ACTIVE; \
- gpu_write(pirq->iomem, INT_MASK, pirq->mask); \
- } \
- } \
- \
- return ret; \
-} \
- \
-static inline void panthor_ ## __name ## _irq_suspend(struct panthor_irq *pirq) \
-{ \
- scoped_guard(spinlock_irqsave, &pirq->lock) { \
- pirq->state = PANTHOR_IRQ_STATE_SUSPENDING; \
- gpu_write(pirq->iomem, INT_MASK, 0); \
- } \
- synchronize_irq(pirq->irq); \
- scoped_guard(spinlock_irqsave, &pirq->lock) \
- pirq->state = PANTHOR_IRQ_STATE_SUSPENDED; \
-} \
- \
-static inline void panthor_ ## __name ## _irq_resume(struct panthor_irq *pirq) \
-{ \
- guard(spinlock_irqsave)(&pirq->lock); \
- \
- pirq->state = PANTHOR_IRQ_STATE_ACTIVE; \
- gpu_write(pirq->iomem, INT_CLEAR, pirq->mask); \
- gpu_write(pirq->iomem, INT_MASK, pirq->mask); \
-} \
- \
-static int panthor_request_ ## __name ## _irq(struct panthor_device *ptdev, \
- struct panthor_irq *pirq, \
- int irq, u32 mask, void __iomem *iomem) \
-{ \
- pirq->ptdev = ptdev; \
- pirq->irq = irq; \
- pirq->mask = mask; \
- pirq->iomem = iomem; \
- spin_lock_init(&pirq->lock); \
- panthor_ ## __name ## _irq_resume(pirq); \
- \
- return devm_request_threaded_irq(ptdev->base.dev, irq, \
- panthor_ ## __name ## _irq_raw_handler, \
- panthor_ ## __name ## _irq_threaded_handler, \
- IRQF_SHARED, KBUILD_MODNAME "-" # __name, \
- pirq); \
-} \
- \
-static inline void panthor_ ## __name ## _irq_enable_events(struct panthor_irq *pirq, u32 mask) \
-{ \
- guard(spinlock_irqsave)(&pirq->lock); \
- pirq->mask |= mask; \
- \
- /* The only situation where we need to write the new mask is if the IRQ is active. \
- * If it's being processed, the mask will be restored for us in _irq_threaded_handler() \
- * on the PROCESSING -> ACTIVE transition. \
- * If the IRQ is suspended/suspending, the mask is restored at resume time. \
- */ \
- if (pirq->state == PANTHOR_IRQ_STATE_ACTIVE) \
- gpu_write(pirq->iomem, INT_MASK, pirq->mask); \
-} \
- \
-static inline void panthor_ ## __name ## _irq_disable_events(struct panthor_irq *pirq, u32 mask)\
-{ \
- guard(spinlock_irqsave)(&pirq->lock); \
- pirq->mask &= ~mask; \
- \
- /* The only situation where we need to write the new mask is if the IRQ is active. \
- * If it's being processed, the mask will be restored for us in _irq_threaded_handler() \
- * on the PROCESSING -> ACTIVE transition. \
- * If the IRQ is suspended/suspending, the mask is restored at resume time. \
- */ \
- if (pirq->state == PANTHOR_IRQ_STATE_ACTIVE) \
- gpu_write(pirq->iomem, INT_MASK, pirq->mask); \
+static inline irqreturn_t panthor_irq_default_raw_handler(int irq, void *data)
+{
+ struct panthor_irq *pirq = data;
+
+ if (!gpu_read(pirq->iomem, INT_STAT))
+ return IRQ_NONE;
+
+ guard(spinlock_irqsave)(&pirq->lock);
+ if (pirq->state != PANTHOR_IRQ_STATE_ACTIVE)
+ return IRQ_NONE;
+
+ pirq->state = PANTHOR_IRQ_STATE_PROCESSING;
+ gpu_write(pirq->iomem, INT_MASK, 0);
+ return IRQ_WAKE_THREAD;
+}
+
+static __always_inline irqreturn_t
+panthor_irq_default_threaded_handler(void *data,
+ void (*slow_handler)(struct panthor_irq *, u32))
+{
+ struct panthor_irq *pirq = data;
+ irqreturn_t ret = IRQ_NONE;
+
+ while (true) {
+ /* It's safe to access pirq->mask without the lock held here. If a new
+ * event gets added to the mask and the corresponding IRQ is pending,
+ * we'll process it right away instead of adding an extra raw -> threaded
+ * round trip. If an event is removed and the status bit is set, it will
+ * be ignored, just like it would have been if the mask had been adjusted
+ * right before the HW event kicks in. TLDR; it's all expected races we're
+ * covered for.
+ */
+ u32 status = gpu_read(pirq->iomem, INT_RAWSTAT) & pirq->mask;
+
+ if (!status)
+ break;
+
+ slow_handler(pirq, status);
+ ret = IRQ_HANDLED;
+ }
+
+ scoped_guard(spinlock_irqsave, &pirq->lock) {
+ if (pirq->state == PANTHOR_IRQ_STATE_PROCESSING) {
+ pirq->state = PANTHOR_IRQ_STATE_ACTIVE;
+ gpu_write(pirq->iomem, INT_MASK, pirq->mask);
+ }
+ }
+
+ return ret;
+}
+
+static inline void panthor_irq_suspend(struct panthor_irq *pirq)
+{
+ scoped_guard(spinlock_irqsave, &pirq->lock) {
+ pirq->state = PANTHOR_IRQ_STATE_SUSPENDING;
+ gpu_write(pirq->iomem, INT_MASK, 0);
+ }
+ synchronize_irq(pirq->irq);
+ scoped_guard(spinlock_irqsave, &pirq->lock)
+ pirq->state = PANTHOR_IRQ_STATE_SUSPENDED;
+}
+
+static inline void panthor_irq_resume(struct panthor_irq *pirq)
+{
+ guard(spinlock_irqsave)(&pirq->lock);
+ pirq->state = PANTHOR_IRQ_STATE_ACTIVE;
+ gpu_write(pirq->iomem, INT_CLEAR, pirq->mask);
+ gpu_write(pirq->iomem, INT_MASK, pirq->mask);
+}
+
+static inline void panthor_irq_enable_events(struct panthor_irq *pirq, u32 mask)
+{
+ guard(spinlock_irqsave)(&pirq->lock);
+ pirq->mask |= mask;
+
+ /* The only situation where we need to write the new mask is if the IRQ is active.
+ * If it's being processed, the mask will be restored for us in _irq_threaded_handler()
+ * on the PROCESSING -> ACTIVE transition.
+ * If the IRQ is suspended/suspending, the mask is restored at resume time.
+ */
+ if (pirq->state == PANTHOR_IRQ_STATE_ACTIVE)
+ gpu_write(pirq->iomem, INT_MASK, pirq->mask);
+}
+
+static inline void panthor_irq_disable_events(struct panthor_irq *pirq, u32 mask)
+{
+ guard(spinlock_irqsave)(&pirq->lock);
+ pirq->mask &= ~mask;
+
+ /* The only situation where we need to write the new mask is if the IRQ is active.
+ * If it's being processed, the mask will be restored for us in _irq_threaded_handler()
+ * on the PROCESSING -> ACTIVE transition.
+ * If the IRQ is suspended/suspending, the mask is restored at resume time.
+ */
+ if (pirq->state == PANTHOR_IRQ_STATE_ACTIVE)
+ gpu_write(pirq->iomem, INT_MASK, pirq->mask);
+}
+
+static inline int
+panthor_irq_request(struct panthor_device *ptdev, struct panthor_irq *pirq,
+ int irq, u32 mask, void __iomem *iomem, const char *name,
+ irqreturn_t (*threaded_handler)(int, void *data))
+{
+ const char *full_name;
+
+ pirq->ptdev = ptdev;
+ pirq->irq = irq;
+ pirq->mask = mask;
+ pirq->iomem = iomem;
+ spin_lock_init(&pirq->lock);
+
+ full_name = devm_kasprintf(ptdev->base.dev, GFP_KERNEL, KBUILD_MODNAME "-%s", name);
+ if (!full_name)
+ return -ENOMEM;
+
+ panthor_irq_resume(pirq);
+ return devm_request_threaded_irq(ptdev->base.dev, irq,
+ panthor_irq_default_raw_handler,
+ threaded_handler,
+ IRQF_SHARED, full_name, pirq);
}
extern struct workqueue_struct *panthor_cleanup_wq;
diff --git a/drivers/gpu/drm/panthor/panthor_fw.c b/drivers/gpu/drm/panthor/panthor_fw.c
index 986151681b24..eaf599b0a887 100644
--- a/drivers/gpu/drm/panthor/panthor_fw.c
+++ b/drivers/gpu/drm/panthor/panthor_fw.c
@@ -1064,8 +1064,9 @@ static void panthor_fw_init_global_iface(struct panthor_device *ptdev)
msecs_to_jiffies(PING_INTERVAL_MS));
}
-static void panthor_job_irq_handler(struct panthor_device *ptdev, u32 status)
+static void panthor_job_irq_handler(struct panthor_irq *pirq, u32 status)
{
+ struct panthor_device *ptdev = pirq->ptdev;
u32 duration;
u64 start = 0;
@@ -1091,7 +1092,11 @@ static void panthor_job_irq_handler(struct panthor_device *ptdev, u32 status)
trace_gpu_job_irq(ptdev->base.dev, status, duration);
}
}
-PANTHOR_IRQ_HANDLER(job, panthor_job_irq_handler);
+
+static irqreturn_t panthor_job_irq_threaded_handler(int irq, void *data)
+{
+ return panthor_irq_default_threaded_handler(data, panthor_job_irq_handler);
+}
static int panthor_fw_start(struct panthor_device *ptdev)
{
@@ -1099,8 +1104,8 @@ static int panthor_fw_start(struct panthor_device *ptdev)
bool timedout = false;
ptdev->fw->booted = false;
- panthor_job_irq_enable_events(&ptdev->fw->irq, ~0);
- panthor_job_irq_resume(&ptdev->fw->irq);
+ panthor_irq_enable_events(&ptdev->fw->irq, ~0);
+ panthor_irq_resume(&ptdev->fw->irq);
gpu_write(fw->iomem, MCU_CONTROL, MCU_CONTROL_AUTO);
if (!wait_event_timeout(ptdev->fw->req_waitqueue,
@@ -1210,7 +1215,7 @@ void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang)
ptdev->reset.fast = true;
}
- panthor_job_irq_suspend(&ptdev->fw->irq);
+ panthor_irq_suspend(&ptdev->fw->irq);
panthor_fw_stop(ptdev);
}
@@ -1280,7 +1285,7 @@ void panthor_fw_unplug(struct panthor_device *ptdev)
if (!IS_ENABLED(CONFIG_PM) || pm_runtime_active(ptdev->base.dev)) {
/* Make sure the IRQ handler cannot be called after that point. */
if (ptdev->fw->irq.irq)
- panthor_job_irq_suspend(&ptdev->fw->irq);
+ panthor_irq_suspend(&ptdev->fw->irq);
panthor_fw_stop(ptdev);
}
@@ -1476,8 +1481,9 @@ int panthor_fw_init(struct panthor_device *ptdev)
if (irq <= 0)
return -ENODEV;
- ret = panthor_request_job_irq(ptdev, &fw->irq, irq, 0,
- ptdev->iomem + JOB_INT_BASE);
+ ret = panthor_irq_request(ptdev, &fw->irq, irq, 0,
+ ptdev->iomem + JOB_INT_BASE, "job",
+ panthor_job_irq_threaded_handler);
if (ret) {
drm_err(&ptdev->base, "failed to request job irq");
return ret;
diff --git a/drivers/gpu/drm/panthor/panthor_gpu.c b/drivers/gpu/drm/panthor/panthor_gpu.c
index e52c5675981f..ce208e384762 100644
--- a/drivers/gpu/drm/panthor/panthor_gpu.c
+++ b/drivers/gpu/drm/panthor/panthor_gpu.c
@@ -86,8 +86,9 @@ static void panthor_gpu_l2_config_set(struct panthor_device *ptdev)
gpu_write(gpu->iomem, GPU_L2_CONFIG, l2_config);
}
-static void panthor_gpu_irq_handler(struct panthor_device *ptdev, u32 status)
+static void panthor_gpu_irq_handler(struct panthor_irq *pirq, u32 status)
{
+ struct panthor_device *ptdev = pirq->ptdev;
struct panthor_gpu *gpu = ptdev->gpu;
gpu_write(gpu->irq.iomem, INT_CLEAR, status);
@@ -116,7 +117,11 @@ static void panthor_gpu_irq_handler(struct panthor_device *ptdev, u32 status)
}
spin_unlock(&ptdev->gpu->reqs_lock);
}
-PANTHOR_IRQ_HANDLER(gpu, panthor_gpu_irq_handler);
+
+static irqreturn_t panthor_gpu_irq_threaded_handler(int irq, void *data)
+{
+ return panthor_irq_default_threaded_handler(data, panthor_gpu_irq_handler);
+}
/**
* panthor_gpu_unplug() - Called when the GPU is unplugged.
@@ -128,7 +133,7 @@ void panthor_gpu_unplug(struct panthor_device *ptdev)
/* Make sure the IRQ handler is not running after that point. */
if (!IS_ENABLED(CONFIG_PM) || pm_runtime_active(ptdev->base.dev))
- panthor_gpu_irq_suspend(&ptdev->gpu->irq);
+ panthor_irq_suspend(&ptdev->gpu->irq);
/* Wake-up all waiters. */
spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags);
@@ -169,9 +174,10 @@ int panthor_gpu_init(struct panthor_device *ptdev)
if (irq < 0)
return irq;
- ret = panthor_request_gpu_irq(ptdev, &ptdev->gpu->irq, irq,
- GPU_INTERRUPTS_MASK,
- ptdev->iomem + GPU_INT_BASE);
+ ret = panthor_irq_request(ptdev, &ptdev->gpu->irq, irq,
+ GPU_INTERRUPTS_MASK,
+ ptdev->iomem + GPU_INT_BASE, "gpu",
+ panthor_gpu_irq_threaded_handler);
if (ret)
return ret;
@@ -182,7 +188,7 @@ int panthor_gpu_power_changed_on(struct panthor_device *ptdev)
{
guard(pm_runtime_active)(ptdev->base.dev);
- panthor_gpu_irq_enable_events(&ptdev->gpu->irq, GPU_POWER_INTERRUPTS_MASK);
+ panthor_irq_enable_events(&ptdev->gpu->irq, GPU_POWER_INTERRUPTS_MASK);
return 0;
}
@@ -191,7 +197,7 @@ void panthor_gpu_power_changed_off(struct panthor_device *ptdev)
{
guard(pm_runtime_active)(ptdev->base.dev);
- panthor_gpu_irq_disable_events(&ptdev->gpu->irq, GPU_POWER_INTERRUPTS_MASK);
+ panthor_irq_disable_events(&ptdev->gpu->irq, GPU_POWER_INTERRUPTS_MASK);
}
/**
@@ -424,7 +430,7 @@ void panthor_gpu_suspend(struct panthor_device *ptdev)
else
panthor_hw_l2_power_off(ptdev);
- panthor_gpu_irq_suspend(&ptdev->gpu->irq);
+ panthor_irq_suspend(&ptdev->gpu->irq);
}
/**
@@ -436,7 +442,7 @@ void panthor_gpu_suspend(struct panthor_device *ptdev)
*/
void panthor_gpu_resume(struct panthor_device *ptdev)
{
- panthor_gpu_irq_resume(&ptdev->gpu->irq);
+ panthor_irq_resume(&ptdev->gpu->irq);
panthor_hw_l2_power_on(ptdev);
}
diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c
index 31cc57029c12..5e735995f80d 100644
--- a/drivers/gpu/drm/panthor/panthor_mmu.c
+++ b/drivers/gpu/drm/panthor/panthor_mmu.c
@@ -598,17 +598,13 @@ static u32 panthor_mmu_as_fault_mask(struct panthor_device *ptdev, u32 as)
return BIT(as);
}
-/* Forward declaration to call helpers within as_enable/disable */
-static void panthor_mmu_irq_handler(struct panthor_device *ptdev, u32 status);
-PANTHOR_IRQ_HANDLER(mmu, panthor_mmu_irq_handler);
-
static int panthor_mmu_as_enable(struct panthor_device *ptdev, u32 as_nr,
u64 transtab, u64 transcfg, u64 memattr)
{
struct panthor_mmu *mmu = ptdev->mmu;
- panthor_mmu_irq_enable_events(&ptdev->mmu->irq,
- panthor_mmu_as_fault_mask(ptdev, as_nr));
+ panthor_irq_enable_events(&ptdev->mmu->irq,
+ panthor_mmu_as_fault_mask(ptdev, as_nr));
gpu_write64(mmu->iomem, AS_TRANSTAB(as_nr), transtab);
gpu_write64(mmu->iomem, AS_MEMATTR(as_nr), memattr);
@@ -626,8 +622,8 @@ static int panthor_mmu_as_disable(struct panthor_device *ptdev, u32 as_nr,
lockdep_assert_held(&ptdev->mmu->as.slots_lock);
- panthor_mmu_irq_disable_events(&ptdev->mmu->irq,
- panthor_mmu_as_fault_mask(ptdev, as_nr));
+ panthor_irq_disable_events(&ptdev->mmu->irq,
+ panthor_mmu_as_fault_mask(ptdev, as_nr));
/* Flush+invalidate RW caches, invalidate RO ones. */
ret = panthor_gpu_flush_caches(ptdev, CACHE_CLEAN | CACHE_INV,
@@ -1857,8 +1853,9 @@ static void panthor_vm_unlock_region(struct panthor_vm *vm)
mutex_unlock(&ptdev->mmu->as.slots_lock);
}
-static void panthor_mmu_irq_handler(struct panthor_device *ptdev, u32 status)
+static void panthor_mmu_irq_handler(struct panthor_irq *pirq, u32 status)
{
+ struct panthor_device *ptdev = pirq->ptdev;
struct panthor_mmu *mmu = ptdev->mmu;
bool has_unhandled_faults = false;
@@ -1921,6 +1918,11 @@ static void panthor_mmu_irq_handler(struct panthor_device *ptdev, u32 status)
panthor_sched_report_mmu_fault(ptdev);
}
+static irqreturn_t panthor_mmu_irq_threaded_handler(int irq, void *data)
+{
+ return panthor_irq_default_threaded_handler(data, panthor_mmu_irq_handler);
+}
+
/**
* panthor_mmu_suspend() - Suspend the MMU logic
* @ptdev: Device.
@@ -1945,7 +1947,7 @@ void panthor_mmu_suspend(struct panthor_device *ptdev)
}
mutex_unlock(&ptdev->mmu->as.slots_lock);
- panthor_mmu_irq_suspend(&ptdev->mmu->irq);
+ panthor_irq_suspend(&ptdev->mmu->irq);
}
/**
@@ -1964,7 +1966,7 @@ void panthor_mmu_resume(struct panthor_device *ptdev)
ptdev->mmu->as.faulty_mask = 0;
mutex_unlock(&ptdev->mmu->as.slots_lock);
- panthor_mmu_irq_resume(&ptdev->mmu->irq);
+ panthor_irq_resume(&ptdev->mmu->irq);
}
/**
@@ -1981,7 +1983,7 @@ void panthor_mmu_pre_reset(struct panthor_device *ptdev)
{
struct panthor_vm *vm;
- panthor_mmu_irq_suspend(&ptdev->mmu->irq);
+ panthor_irq_suspend(&ptdev->mmu->irq);
mutex_lock(&ptdev->mmu->vm.lock);
ptdev->mmu->vm.reset_in_progress = true;
@@ -2018,7 +2020,7 @@ void panthor_mmu_post_reset(struct panthor_device *ptdev)
mutex_unlock(&ptdev->mmu->as.slots_lock);
- panthor_mmu_irq_resume(&ptdev->mmu->irq);
+ panthor_irq_resume(&ptdev->mmu->irq);
/* Restart the VM_BIND queues. */
mutex_lock(&ptdev->mmu->vm.lock);
@@ -3344,7 +3346,7 @@ panthor_mmu_reclaim_priv_bos(struct panthor_device *ptdev,
void panthor_mmu_unplug(struct panthor_device *ptdev)
{
if (!IS_ENABLED(CONFIG_PM) || pm_runtime_active(ptdev->base.dev))
- panthor_mmu_irq_suspend(&ptdev->mmu->irq);
+ panthor_irq_suspend(&ptdev->mmu->irq);
mutex_lock(&ptdev->mmu->as.slots_lock);
for (u32 i = 0; i < ARRAY_SIZE(ptdev->mmu->as.slots); i++) {
@@ -3405,9 +3407,10 @@ int panthor_mmu_init(struct panthor_device *ptdev)
if (irq <= 0)
return -ENODEV;
- ret = panthor_request_mmu_irq(ptdev, &mmu->irq, irq,
- panthor_mmu_fault_mask(ptdev, ~0),
- ptdev->iomem + MMU_INT_BASE);
+ ret = panthor_irq_request(ptdev, &mmu->irq, irq,
+ panthor_mmu_fault_mask(ptdev, ~0),
+ ptdev->iomem + MMU_INT_BASE, "mmu",
+ panthor_mmu_irq_threaded_handler);
if (ret)
return ret;
diff --git a/drivers/gpu/drm/panthor/panthor_pwr.c b/drivers/gpu/drm/panthor/panthor_pwr.c
index 7c7f424a1436..80cf78007896 100644
--- a/drivers/gpu/drm/panthor/panthor_pwr.c
+++ b/drivers/gpu/drm/panthor/panthor_pwr.c
@@ -56,8 +56,9 @@ struct panthor_pwr {
wait_queue_head_t reqs_acked;
};
-static void panthor_pwr_irq_handler(struct panthor_device *ptdev, u32 status)
+static void panthor_pwr_irq_handler(struct panthor_irq *pirq, u32 status)
{
+ struct panthor_device *ptdev = pirq->ptdev;
struct panthor_pwr *pwr = ptdev->pwr;
spin_lock(&ptdev->pwr->reqs_lock);
@@ -75,7 +76,11 @@ static void panthor_pwr_irq_handler(struct panthor_device *ptdev, u32 status)
}
spin_unlock(&ptdev->pwr->reqs_lock);
}
-PANTHOR_IRQ_HANDLER(pwr, panthor_pwr_irq_handler);
+
+static irqreturn_t panthor_pwr_irq_threaded_handler(int irq, void *data)
+{
+ return panthor_irq_default_threaded_handler(data, panthor_pwr_irq_handler);
+}
static void panthor_pwr_write_command(struct panthor_device *ptdev, u32 command, u64 args)
{
@@ -453,7 +458,7 @@ void panthor_pwr_unplug(struct panthor_device *ptdev)
return;
/* Make sure the IRQ handler is not running after that point. */
- panthor_pwr_irq_suspend(&ptdev->pwr->irq);
+ panthor_irq_suspend(&ptdev->pwr->irq);
/* Wake-up all waiters. */
spin_lock_irqsave(&ptdev->pwr->reqs_lock, flags);
@@ -483,9 +488,10 @@ int panthor_pwr_init(struct panthor_device *ptdev)
if (irq < 0)
return irq;
- err = panthor_request_pwr_irq(
+ err = panthor_irq_request(
ptdev, &pwr->irq, irq, PWR_INTERRUPTS_MASK,
- pwr->iomem + PWR_INT_BASE);
+ pwr->iomem + PWR_INT_BASE, "pwr",
+ panthor_pwr_irq_threaded_handler);
if (err)
return err;
@@ -564,7 +570,7 @@ void panthor_pwr_suspend(struct panthor_device *ptdev)
if (!ptdev->pwr)
return;
- panthor_pwr_irq_suspend(&ptdev->pwr->irq);
+ panthor_irq_suspend(&ptdev->pwr->irq);
}
void panthor_pwr_resume(struct panthor_device *ptdev)
@@ -572,5 +578,5 @@ void panthor_pwr_resume(struct panthor_device *ptdev)
if (!ptdev->pwr)
return;
- panthor_pwr_irq_resume(&ptdev->pwr->irq);
+ panthor_irq_resume(&ptdev->pwr->irq);
}
--
2.54.0
^ permalink raw reply related [flat|nested] 11+ messages in thread* Re: [PATCH v3 3/7] drm/panthor: Replace the panthor_irq macro machinery by inline helpers
2026-06-23 12:51 ` [PATCH v3 3/7] drm/panthor: Replace the panthor_irq macro machinery by inline helpers Boris Brezillon
@ 2026-06-23 13:02 ` sashiko-bot
0 siblings, 0 replies; 11+ messages in thread
From: sashiko-bot @ 2026-06-23 13:02 UTC (permalink / raw)
To: Boris Brezillon; +Cc: dri-devel
Thank you for your contribution! Sashiko AI review found 1 potential issue(s) to consider:
Pre-existing issues:
- [High] Hardware interrupts are unmasked before the OS interrupt handler is successfully registered, risking an unhandled IRQ storm.
--
commit 4e867b9890a5ecc61feefa09f80bbc3aaddcd675
Author: Boris Brezillon <boris.brezillon@collabora.com>
drm/panthor: Replace the panthor_irq macro machinery by inline helpers
This commit removes the panthor_irq macro generation logic and replaces it
with inline helpers to rely on compiler optimization. It also refactors the
prototype of IRQ handlers to take struct panthor_irq instead of struct
panthor_device.
> diff --git a/drivers/gpu/drm/panthor/panthor_device.h b/drivers/gpu/drm/panthor/panthor_device.h
> --- a/drivers/gpu/drm/panthor/panthor_device.h
> +++ b/drivers/gpu/drm/panthor/panthor_device.h
[ ... ]
> +static inline int
> +panthor_irq_request(struct panthor_device *ptdev, struct panthor_irq *pirq,
> + int irq, u32 mask, void __iomem *iomem, const char *name,
> + irqreturn_t (*threaded_handler)(int, void *data))
> +{
[ ... ]
> + panthor_irq_resume(pirq);
> + return devm_request_threaded_irq(ptdev->base.dev, irq,
> + panthor_irq_default_raw_handler,
> + threaded_handler,
> + IRQF_SHARED, full_name, pirq);
> }
[Severity: High]
This is a pre-existing issue, but by calling panthor_irq_resume() before
devm_request_threaded_irq(), does this leave a window where the hardware
interrupt is unmasked before the OS handler is successfully registered?
If the device asserts an interrupt before the handler is fully registered,
or if devm_request_threaded_irq() fails entirely and returns without
subsequently masking the hardware interrupt, could this unhandled interrupt
trigger an IRQ storm? This might cause the kernel's irq core to permanently
disable the shared interrupt line.
--
Sashiko AI review · https://sashiko.dev/#/patchset/20260623-panthor-signal-from-irq-v3-0-2ece396f8ee0@collabora.com?part=3
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH v3 4/7] drm/panthor: Protect events processing with a separate spinlock
2026-06-23 12:51 [PATCH v3 0/7] drm/panthor: Reduce dma_fence signalling latency Boris Brezillon
` (2 preceding siblings ...)
2026-06-23 12:51 ` [PATCH v3 3/7] drm/panthor: Replace the panthor_irq macro machinery by inline helpers Boris Brezillon
@ 2026-06-23 12:51 ` Boris Brezillon
2026-06-23 13:07 ` sashiko-bot
2026-06-23 12:51 ` [PATCH v3 5/7] drm/panthor: Don't defer job completion checks Boris Brezillon
` (2 subsequent siblings)
6 siblings, 1 reply; 11+ messages in thread
From: Boris Brezillon @ 2026-06-23 12:51 UTC (permalink / raw)
To: Steven Price, Liviu Dudau, Chia-I Wu
Cc: Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann, David Airlie,
Simona Vetter, dri-devel, linux-kernel, Boris Brezillon
Add a specific spinlock for events processing so we can selectively
move some event processing to the threaded IRQ handler. For events to be
processed, we need to have access to the group attached to the CSG slot
which also forces us to protect the csg_slots[] updates with this
lock.
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
---
drivers/gpu/drm/panthor/panthor_sched.c | 74 +++++++++++++++++++++------------
1 file changed, 47 insertions(+), 27 deletions(-)
diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c
index 5b34032deff8..4f5d874e5d46 100644
--- a/drivers/gpu/drm/panthor/panthor_sched.c
+++ b/drivers/gpu/drm/panthor/panthor_sched.c
@@ -254,8 +254,21 @@ struct panthor_scheduler {
struct list_head waiting;
} groups;
+ /**
+ * @events_lock: Lock taken when processing events.
+ *
+ * This also needs to be taken when csg_slots are updated, to make sure
+ * the event processing logic doesn't touch groups that have left the CSG
+ * slot.
+ */
+ spinlock_t events_lock;
+
/**
* @csg_slots: FW command stream group slots.
+ *
+ * Updates to these slots must happen with both panthor_scheduler::lock and
+ * panthor_scheduler::events_lock held. As a result, reads can happen with
+ * either of these locks held.
*/
struct panthor_csg_slot csg_slots[MAX_CSGS];
@@ -999,7 +1012,6 @@ static int
group_bind_locked(struct panthor_group *group, u32 csg_id)
{
struct panthor_device *ptdev = group->ptdev;
- struct panthor_csg_slot *csg_slot;
int ret;
lockdep_assert_held(&ptdev->scheduler->lock);
@@ -1012,9 +1024,7 @@ group_bind_locked(struct panthor_group *group, u32 csg_id)
if (ret)
return ret;
- csg_slot = &ptdev->scheduler->csg_slots[csg_id];
group_get(group);
- group->csg_id = csg_id;
/* Dummy doorbell allocation: doorbell is assigned to the group and
* all queues use the same doorbell.
@@ -1026,7 +1036,10 @@ group_bind_locked(struct panthor_group *group, u32 csg_id)
for (u32 i = 0; i < group->queue_count; i++)
group->queues[i]->doorbell_id = csg_id + 1;
- csg_slot->group = group;
+ scoped_guard(spinlock, &ptdev->scheduler->events_lock) {
+ ptdev->scheduler->csg_slots[csg_id].group = group;
+ group->csg_id = csg_id;
+ }
return 0;
}
@@ -1041,7 +1054,6 @@ static int
group_unbind_locked(struct panthor_group *group)
{
struct panthor_device *ptdev = group->ptdev;
- struct panthor_csg_slot *slot;
lockdep_assert_held(&ptdev->scheduler->lock);
@@ -1051,9 +1063,12 @@ group_unbind_locked(struct panthor_group *group)
if (drm_WARN_ON(&ptdev->base, group->state == PANTHOR_CS_GROUP_ACTIVE))
return -EINVAL;
- slot = &ptdev->scheduler->csg_slots[group->csg_id];
+ scoped_guard(spinlock, &ptdev->scheduler->events_lock) {
+ ptdev->scheduler->csg_slots[group->csg_id].group = NULL;
+ group->csg_id = -1;
+ }
+
panthor_vm_idle(group->vm);
- group->csg_id = -1;
/* Tiler OOM events will be re-issued next time the group is scheduled. */
atomic_set(&group->tiler_oom, 0);
@@ -1062,8 +1077,6 @@ group_unbind_locked(struct panthor_group *group)
for (u32 i = 0; i < group->queue_count; i++)
group->queues[i]->doorbell_id = -1;
- slot->group = NULL;
-
group_put(group);
return 0;
}
@@ -1484,7 +1497,7 @@ cs_slot_process_fatal_event_locked(struct panthor_device *ptdev,
u32 fatal;
u64 info;
- lockdep_assert_held(&sched->lock);
+ lockdep_assert_held(&sched->events_lock);
cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
fatal = cs_iface->output->fatal;
@@ -1532,7 +1545,7 @@ cs_slot_process_fault_event_locked(struct panthor_device *ptdev,
u32 fault;
u64 info;
- lockdep_assert_held(&sched->lock);
+ lockdep_assert_held(&sched->events_lock);
cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
fault = cs_iface->output->fault;
@@ -1682,7 +1695,7 @@ cs_slot_process_tiler_oom_event_locked(struct panthor_device *ptdev,
struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
struct panthor_group *group = csg_slot->group;
- lockdep_assert_held(&sched->lock);
+ lockdep_assert_held(&sched->events_lock);
if (drm_WARN_ON(&ptdev->base, !group))
return;
@@ -1703,7 +1716,7 @@ static bool cs_slot_process_irq_locked(struct panthor_device *ptdev,
struct panthor_fw_cs_iface *cs_iface;
u32 req, ack, events;
- lockdep_assert_held(&ptdev->scheduler->lock);
+ lockdep_assert_held(&ptdev->scheduler->events_lock);
cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
req = cs_iface->input->req;
@@ -1731,7 +1744,7 @@ static void csg_slot_process_idle_event_locked(struct panthor_device *ptdev, u32
{
struct panthor_scheduler *sched = ptdev->scheduler;
- lockdep_assert_held(&sched->lock);
+ lockdep_assert_held(&sched->events_lock);
sched->might_have_idle_groups = true;
@@ -1748,7 +1761,7 @@ static void csg_slot_sync_update_locked(struct panthor_device *ptdev,
struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id];
struct panthor_group *group = csg_slot->group;
- lockdep_assert_held(&ptdev->scheduler->lock);
+ lockdep_assert_held(&ptdev->scheduler->events_lock);
if (group)
group_queue_work(group, sync_upd);
@@ -1763,7 +1776,7 @@ csg_slot_process_progress_timer_event_locked(struct panthor_device *ptdev, u32 c
struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
struct panthor_group *group = csg_slot->group;
- lockdep_assert_held(&sched->lock);
+ lockdep_assert_held(&sched->events_lock);
group = csg_slot->group;
if (!drm_WARN_ON(&ptdev->base, !group)) {
@@ -1784,7 +1797,7 @@ static void sched_process_csg_irq_locked(struct panthor_device *ptdev, u32 csg_i
struct panthor_fw_csg_iface *csg_iface;
u32 ring_cs_db_mask = 0;
- lockdep_assert_held(&ptdev->scheduler->lock);
+ lockdep_assert_held(&ptdev->scheduler->events_lock);
if (drm_WARN_ON(&ptdev->base, csg_id >= ptdev->scheduler->csg_slot_count))
return;
@@ -1842,7 +1855,7 @@ static void sched_process_idle_event_locked(struct panthor_device *ptdev)
{
struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
- lockdep_assert_held(&ptdev->scheduler->lock);
+ lockdep_assert_held(&ptdev->scheduler->events_lock);
/* Acknowledge the idle event and schedule a tick. */
panthor_fw_update_reqs(glb_iface, req, glb_iface->output->ack, GLB_IDLE);
@@ -1858,7 +1871,7 @@ static void sched_process_global_irq_locked(struct panthor_device *ptdev)
struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
u32 req, ack, evts;
- lockdep_assert_held(&ptdev->scheduler->lock);
+ lockdep_assert_held(&ptdev->scheduler->events_lock);
req = READ_ONCE(glb_iface->input->req);
ack = READ_ONCE(glb_iface->output->ack);
@@ -1875,7 +1888,7 @@ static void process_fw_events_work(struct work_struct *work)
u32 events = atomic_xchg(&sched->fw_events, 0);
struct panthor_device *ptdev = sched->ptdev;
- mutex_lock(&sched->lock);
+ guard(spinlock)(&sched->events_lock);
if (events & JOB_INT_GLOBAL_IF) {
sched_process_global_irq_locked(ptdev);
@@ -1888,8 +1901,6 @@ static void process_fw_events_work(struct work_struct *work)
sched_process_csg_irq_locked(ptdev, csg_id);
events &= ~BIT(csg_id);
}
-
- mutex_unlock(&sched->lock);
}
/**
@@ -2136,7 +2147,8 @@ tick_ctx_init(struct panthor_scheduler *sched,
* CSG IRQs, so we can flag the faulty queue.
*/
if (panthor_vm_has_unhandled_faults(group->vm)) {
- sched_process_csg_irq_locked(ptdev, i);
+ scoped_guard(spinlock, &sched->events_lock)
+ sched_process_csg_irq_locked(ptdev, i);
/* No fatal fault reported, flag all queues as faulty. */
if (!group->fatal_queues)
@@ -2336,8 +2348,10 @@ tick_ctx_apply(struct panthor_scheduler *sched, struct panthor_sched_tick_ctx *c
* any pending interrupts before we start the new
* group.
*/
- if (group->csg_id >= 0)
+ if (group->csg_id >= 0) {
+ guard(spinlock)(&sched->events_lock);
sched_process_csg_irq_locked(ptdev, group->csg_id);
+ }
group_unbind_locked(group);
}
@@ -2902,10 +2916,12 @@ void panthor_sched_suspend(struct panthor_device *ptdev)
u32 csg_id = ffs(slot_mask) - 1;
struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
- if (flush_caches_failed)
+ if (flush_caches_failed) {
csg_slot->group->state = PANTHOR_CS_GROUP_TERMINATED;
- else
+ } else {
+ guard(spinlock)(&sched->events_lock);
csg_slot_sync_update_locked(ptdev, csg_id);
+ }
slot_mask &= ~BIT(csg_id);
}
@@ -2920,8 +2936,10 @@ void panthor_sched_suspend(struct panthor_device *ptdev)
group_get(group);
- if (group->csg_id >= 0)
+ if (group->csg_id >= 0) {
+ guard(spinlock)(&sched->events_lock);
sched_process_csg_irq_locked(ptdev, group->csg_id);
+ }
group_unbind_locked(group);
@@ -4141,6 +4159,8 @@ int panthor_sched_init(struct panthor_device *ptdev)
INIT_WORK(&sched->sync_upd_work, sync_upd_work);
INIT_WORK(&sched->fw_events_work, process_fw_events_work);
+ spin_lock_init(&sched->events_lock);
+
ret = drmm_mutex_init(&ptdev->base, &sched->lock);
if (ret)
return ret;
--
2.54.0
^ permalink raw reply related [flat|nested] 11+ messages in thread* Re: [PATCH v3 4/7] drm/panthor: Protect events processing with a separate spinlock
2026-06-23 12:51 ` [PATCH v3 4/7] drm/panthor: Protect events processing with a separate spinlock Boris Brezillon
@ 2026-06-23 13:07 ` sashiko-bot
0 siblings, 0 replies; 11+ messages in thread
From: sashiko-bot @ 2026-06-23 13:07 UTC (permalink / raw)
To: Boris Brezillon; +Cc: dri-devel
Thank you for your contribution! Sashiko AI review found 3 potential issue(s) to consider:
- [High] Data race and lost updates on `group->fatal_queues` due to split locking.
- [Medium] Data race on `group->timedout` boolean flag due to split locking.
- [Medium] Data race on `sched->might_have_idle_groups` due to split locking.
--
commit db6f3f8002805cdf65efff6be24738f26e5f60a4
Author: Boris Brezillon <boris.brezillon@collabora.com>
drm/panthor: Protect events processing with a separate spinlock
This commit introduces a new events_lock spinlock in the panthor scheduler.
It aims to protect event processing and command stream group (CSG) slot
updates, allowing some event processing to be moved to a threaded IRQ
handler.
> diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c
> index 5b34032deff81..4f5d874e5d468 100644
> --- a/drivers/gpu/drm/panthor/panthor_sched.c
> +++ b/drivers/gpu/drm/panthor/panthor_sched.c
[ ... ]
> @@ -1484,7 +1497,7 @@ cs_slot_process_fatal_event_locked(struct panthor_device *ptdev,
> u32 fatal;
> u64 info;
>
> - lockdep_assert_held(&sched->lock);
> + lockdep_assert_held(&sched->events_lock);
>
> cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
> fatal = cs_iface->output->fatal;
[Severity: High]
With this change, cs_slot_process_fatal_event_locked() now executes with only
events_lock held, which means the subsequent update to group->fatal_queues
inside this function is no longer protected by sched->lock:
group->fatal_queues |= BIT(cs_id);
However, in tick_ctx_init(), we update this same field while holding
sched->lock, but after dropping the scoped events_lock:
if (!group->fatal_queues)
group->fatal_queues |= GENMASK(group->queue_count - 1, 0);
Could this result in a data race and lost updates on group->fatal_queues if
these two paths execute concurrently on different CPUs? If the non-atomic
read-modify-write operations overlap, one of the updates might be lost,
potentially leaving a faulty queue unflagged.
This is a pre-existing issue, but it's worth noting that
group_process_tiler_oom() also performs a lockless read-modify-write on
group->fatal_queues:
group->fatal_queues |= BIT(cs_id);
[ ... ]
> @@ -1731,7 +1744,7 @@ static void csg_slot_process_idle_event_locked(struct panthor_device *ptdev, u32
> {
> struct panthor_scheduler *sched = ptdev->scheduler;
>
> - lockdep_assert_held(&sched->lock);
> + lockdep_assert_held(&sched->events_lock);
>
> sched->might_have_idle_groups = true;
>
[Severity: Medium]
Will this lead to a data race on sched->might_have_idle_groups?
csg_slot_process_idle_event_locked() sets this flag to true while holding
events_lock. But in tick_ctx_apply(), the flag is overwritten under
sched->lock without holding events_lock:
sched->might_have_idle_groups = ctx->idle_group_count > 0;
Because these paths do not share a common lock, they can execute
concurrently, which violates the C memory model and might trigger KCSAN
warnings.
[ ... ]
> @@ -1763,7 +1776,7 @@ csg_slot_process_progress_timer_event_locked(struct panthor_device *ptdev, u32 c
> struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
> struct panthor_group *group = csg_slot->group;
>
> - lockdep_assert_held(&sched->lock);
> + lockdep_assert_held(&sched->events_lock);
>
> group = csg_slot->group;
> if (!drm_WARN_ON(&ptdev->base, !group)) {
[Severity: Medium]
Since csg_slot_process_progress_timer_event_locked() now holds events_lock
instead of sched->lock, is it still safe to modify group->timedout here?
group->timedout = true;
Other paths like panthor_sched_suspend() and panthor_job_timedout() modify
and read group->timedout while holding only sched->lock:
csg_slot->group->timedout = true;
Concurrent writes from the event processing workqueue and the suspend/timeout
paths could cause a data race on this boolean flag.
--
Sashiko AI review · https://sashiko.dev/#/patchset/20260623-panthor-signal-from-irq-v3-0-2ece396f8ee0@collabora.com?part=4
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH v3 5/7] drm/panthor: Don't defer job completion checks
2026-06-23 12:51 [PATCH v3 0/7] drm/panthor: Reduce dma_fence signalling latency Boris Brezillon
` (3 preceding siblings ...)
2026-06-23 12:51 ` [PATCH v3 4/7] drm/panthor: Protect events processing with a separate spinlock Boris Brezillon
@ 2026-06-23 12:51 ` Boris Brezillon
2026-06-23 12:51 ` [PATCH v3 6/7] drm/panthor: Don't defer FW event processing Boris Brezillon
2026-06-23 12:51 ` [PATCH v3 7/7] drm/panthor: Automate CSG IRQ processing at group unbind time Boris Brezillon
6 siblings, 0 replies; 11+ messages in thread
From: Boris Brezillon @ 2026-06-23 12:51 UTC (permalink / raw)
To: Steven Price, Liviu Dudau, Chia-I Wu
Cc: Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann, David Airlie,
Simona Vetter, dri-devel, linux-kernel, Boris Brezillon
Call group_check_job_completion() directly from
csg_slot_sync_update_locked() instead of deferring it.
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
---
drivers/gpu/drm/panthor/panthor_sched.c | 182 +++++++++++++++-----------------
1 file changed, 87 insertions(+), 95 deletions(-)
diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c
index 4f5d874e5d46..6b2d07110220 100644
--- a/drivers/gpu/drm/panthor/panthor_sched.c
+++ b/drivers/gpu/drm/panthor/panthor_sched.c
@@ -689,9 +689,6 @@ struct panthor_group {
*/
struct panthor_kernel_bo *protm_suspend_buf;
- /** @sync_upd_work: Work used to check/signal job fences. */
- struct work_struct sync_upd_work;
-
/** @tiler_oom_work: Work used to process tiler OOM events happening on this group. */
struct work_struct tiler_oom_work;
@@ -1755,6 +1752,92 @@ static void csg_slot_process_idle_event_locked(struct panthor_device *ptdev, u32
sched_queue_delayed_work(sched, tick, 0);
}
+static void update_fdinfo_stats(struct panthor_job *job)
+{
+ struct panthor_group *group = job->group;
+ struct panthor_queue *queue = group->queues[job->queue_idx];
+ struct panthor_gpu_usage *fdinfo = &group->fdinfo.data;
+ struct panthor_job_profiling_data *slots = queue->profiling.slots->kmap;
+ struct panthor_job_profiling_data *data = &slots[job->profiling.slot];
+
+ scoped_guard(spinlock_irqsave, &group->fdinfo.lock) {
+ if (job->profiling.mask & PANTHOR_DEVICE_PROFILING_CYCLES)
+ fdinfo->cycles += data->cycles.after - data->cycles.before;
+ if (job->profiling.mask & PANTHOR_DEVICE_PROFILING_TIMESTAMP)
+ fdinfo->time += data->time.after - data->time.before;
+ }
+}
+
+static bool queue_check_job_completion(struct panthor_queue *queue)
+{
+ struct panthor_syncobj_64b *syncobj = NULL;
+ struct panthor_job *job, *job_tmp;
+ bool cookie, progress = false;
+ LIST_HEAD(done_jobs);
+
+ cookie = dma_fence_begin_signalling();
+ spin_lock(&queue->fence_ctx.lock);
+ list_for_each_entry_safe(job, job_tmp, &queue->fence_ctx.in_flight_jobs, node) {
+ if (!syncobj) {
+ struct panthor_group *group = job->group;
+
+ syncobj = group->syncobjs->kmap +
+ (job->queue_idx * sizeof(*syncobj));
+ }
+
+ if (syncobj->seqno < job->done_fence->seqno)
+ break;
+
+ list_move_tail(&job->node, &done_jobs);
+ dma_fence_signal_locked(job->done_fence);
+ }
+
+ if (list_empty(&queue->fence_ctx.in_flight_jobs)) {
+ /* If we have no job left, we cancel the timer, and reset remaining
+ * time to its default so it can be restarted next time
+ * queue_resume_timeout() is called.
+ */
+ queue_suspend_timeout_locked(queue);
+
+ /* If there's no job pending, we consider it progress to avoid a
+ * spurious timeout if the timeout handler and the sync update
+ * handler raced.
+ */
+ progress = true;
+ } else if (!list_empty(&done_jobs)) {
+ queue_reset_timeout_locked(queue);
+ progress = true;
+ }
+ spin_unlock(&queue->fence_ctx.lock);
+ dma_fence_end_signalling(cookie);
+
+ list_for_each_entry_safe(job, job_tmp, &done_jobs, node) {
+ if (job->profiling.mask)
+ update_fdinfo_stats(job);
+ list_del_init(&job->node);
+ panthor_job_put(&job->base);
+ }
+
+ return progress;
+}
+
+static void group_check_job_completion(struct panthor_group *group)
+{
+ u32 queue_idx;
+ bool cookie;
+
+ cookie = dma_fence_begin_signalling();
+ for (queue_idx = 0; queue_idx < group->queue_count; queue_idx++) {
+ struct panthor_queue *queue = group->queues[queue_idx];
+
+ if (!queue)
+ continue;
+
+ queue_check_job_completion(queue);
+ }
+ dma_fence_end_signalling(cookie);
+}
+
static void csg_slot_sync_update_locked(struct panthor_device *ptdev,
u32 csg_id)
{
@@ -1764,7 +1847,7 @@ static void csg_slot_sync_update_locked(struct panthor_device *ptdev,
lockdep_assert_held(&ptdev->scheduler->events_lock);
if (group)
- group_queue_work(group, sync_upd);
+ group_check_job_completion(group);
sched_queue_work(ptdev->scheduler, sync_upd);
}
@@ -3023,22 +3106,6 @@ void panthor_sched_post_reset(struct panthor_device *ptdev, bool reset_failed)
}
}
-static void update_fdinfo_stats(struct panthor_job *job)
-{
- struct panthor_group *group = job->group;
- struct panthor_queue *queue = group->queues[job->queue_idx];
- struct panthor_gpu_usage *fdinfo = &group->fdinfo.data;
- struct panthor_job_profiling_data *slots = queue->profiling.slots->kmap;
- struct panthor_job_profiling_data *data = &slots[job->profiling.slot];
-
- scoped_guard(spinlock, &group->fdinfo.lock) {
- if (job->profiling.mask & PANTHOR_DEVICE_PROFILING_CYCLES)
- fdinfo->cycles += data->cycles.after - data->cycles.before;
- if (job->profiling.mask & PANTHOR_DEVICE_PROFILING_TIMESTAMP)
- fdinfo->time += data->time.after - data->time.before;
- }
-}
-
void panthor_fdinfo_gather_group_samples(struct panthor_file *pfile)
{
struct panthor_group_pool *gpool = pfile->groups;
@@ -3059,80 +3126,6 @@ void panthor_fdinfo_gather_group_samples(struct panthor_file *pfile)
xa_unlock(&gpool->xa);
}
-static bool queue_check_job_completion(struct panthor_queue *queue)
-{
- struct panthor_syncobj_64b *syncobj = NULL;
- struct panthor_job *job, *job_tmp;
- bool cookie, progress = false;
- LIST_HEAD(done_jobs);
-
- cookie = dma_fence_begin_signalling();
- spin_lock(&queue->fence_ctx.lock);
- list_for_each_entry_safe(job, job_tmp, &queue->fence_ctx.in_flight_jobs, node) {
- if (!syncobj) {
- struct panthor_group *group = job->group;
-
- syncobj = group->syncobjs->kmap +
- (job->queue_idx * sizeof(*syncobj));
- }
-
- if (syncobj->seqno < job->done_fence->seqno)
- break;
-
- list_move_tail(&job->node, &done_jobs);
- dma_fence_signal_locked(job->done_fence);
- }
-
- if (list_empty(&queue->fence_ctx.in_flight_jobs)) {
- /* If we have no job left, we cancel the timer, and reset remaining
- * time to its default so it can be restarted next time
- * queue_resume_timeout() is called.
- */
- queue_suspend_timeout_locked(queue);
-
- /* If there's no job pending, we consider it progress to avoid a
- * spurious timeout if the timeout handler and the sync update
- * handler raced.
- */
- progress = true;
- } else if (!list_empty(&done_jobs)) {
- queue_reset_timeout_locked(queue);
- progress = true;
- }
- spin_unlock(&queue->fence_ctx.lock);
- dma_fence_end_signalling(cookie);
-
- list_for_each_entry_safe(job, job_tmp, &done_jobs, node) {
- if (job->profiling.mask)
- update_fdinfo_stats(job);
- list_del_init(&job->node);
- panthor_job_put(&job->base);
- }
-
- return progress;
-}
-
-static void group_sync_upd_work(struct work_struct *work)
-{
- struct panthor_group *group =
- container_of(work, struct panthor_group, sync_upd_work);
- u32 queue_idx;
- bool cookie;
-
- cookie = dma_fence_begin_signalling();
- for (queue_idx = 0; queue_idx < group->queue_count; queue_idx++) {
- struct panthor_queue *queue = group->queues[queue_idx];
-
- if (!queue)
- continue;
-
- queue_check_job_completion(queue);
- }
- dma_fence_end_signalling(cookie);
-
- group_put(group);
-}
-
struct panthor_job_ringbuf_instrs {
u64 buffer[MAX_INSTRS_PER_JOB];
u32 count;
@@ -3701,7 +3694,6 @@ int panthor_group_create(struct panthor_file *pfile,
INIT_LIST_HEAD(&group->wait_node);
INIT_LIST_HEAD(&group->run_node);
INIT_WORK(&group->term_work, group_term_work);
- INIT_WORK(&group->sync_upd_work, group_sync_upd_work);
INIT_WORK(&group->tiler_oom_work, group_tiler_oom_work);
INIT_WORK(&group->release_work, group_release_work);
--
2.54.0
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH v3 6/7] drm/panthor: Don't defer FW event processing
2026-06-23 12:51 [PATCH v3 0/7] drm/panthor: Reduce dma_fence signalling latency Boris Brezillon
` (4 preceding siblings ...)
2026-06-23 12:51 ` [PATCH v3 5/7] drm/panthor: Don't defer job completion checks Boris Brezillon
@ 2026-06-23 12:51 ` Boris Brezillon
2026-06-23 12:51 ` [PATCH v3 7/7] drm/panthor: Automate CSG IRQ processing at group unbind time Boris Brezillon
6 siblings, 0 replies; 11+ messages in thread
From: Boris Brezillon @ 2026-06-23 12:51 UTC (permalink / raw)
To: Steven Price, Liviu Dudau, Chia-I Wu
Cc: Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann, David Airlie,
Simona Vetter, dri-devel, linux-kernel, Boris Brezillon
Avoid a workqueue roundtrip and process things immediately from
panthor_sched_report_fw_events().
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
---
drivers/gpu/drm/panthor/panthor_sched.c | 48 +++++++--------------------------
1 file changed, 9 insertions(+), 39 deletions(-)
diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c
index 6b2d07110220..c8818718d135 100644
--- a/drivers/gpu/drm/panthor/panthor_sched.c
+++ b/drivers/gpu/drm/panthor/panthor_sched.c
@@ -177,23 +177,6 @@ struct panthor_scheduler {
*/
struct work_struct sync_upd_work;
- /**
- * @fw_events_work: Work used to process FW events outside the interrupt path.
- *
- * Even if the interrupt is threaded, we need any event processing
- * that require taking the panthor_scheduler::lock to be processed
- * outside the interrupt path so we don't block the tick logic when
- * it calls panthor_fw_{csg,wait}_wait_acks(). Since most of the
- * event processing requires taking this lock, we just delegate all
- * FW event processing to the scheduler workqueue.
- */
- struct work_struct fw_events_work;
-
- /**
- * @fw_events: Bitmask encoding pending FW events.
- */
- atomic_t fw_events;
-
/**
* @resched_target: When the next tick should occur.
*
@@ -1964,14 +1947,17 @@ static void sched_process_global_irq_locked(struct panthor_device *ptdev)
sched_process_idle_event_locked(ptdev);
}
-static void process_fw_events_work(struct work_struct *work)
+/**
+ * panthor_sched_report_fw_events() - Report FW events to the scheduler.
+ * @ptdev: Device.
+ * @events: Bitmask of pending FW events to report.
+ */
+void panthor_sched_report_fw_events(struct panthor_device *ptdev, u32 events)
{
- struct panthor_scheduler *sched = container_of(work, struct panthor_scheduler,
- fw_events_work);
- u32 events = atomic_xchg(&sched->fw_events, 0);
- struct panthor_device *ptdev = sched->ptdev;
+ if (!ptdev->scheduler)
+ return;
- guard(spinlock)(&sched->events_lock);
+ guard(spinlock)(&ptdev->scheduler->events_lock);
if (events & JOB_INT_GLOBAL_IF) {
sched_process_global_irq_locked(ptdev);
@@ -1986,20 +1972,6 @@ static void process_fw_events_work(struct work_struct *work)
}
}
-/**
- * panthor_sched_report_fw_events() - Report FW events to the scheduler.
- * @ptdev: Device.
- * @events: Bitmask of pending FW events to report.
- */
-void panthor_sched_report_fw_events(struct panthor_device *ptdev, u32 events)
-{
- if (!ptdev->scheduler)
- return;
-
- atomic_or(events, &ptdev->scheduler->fw_events);
- sched_queue_work(ptdev->scheduler, fw_events);
-}
-
static const char *fence_get_driver_name(struct dma_fence *fence)
{
return "panthor";
@@ -4064,7 +4036,6 @@ void panthor_sched_unplug(struct panthor_device *ptdev)
struct panthor_scheduler *sched = ptdev->scheduler;
disable_delayed_work_sync(&sched->tick_work);
- disable_work_sync(&sched->fw_events_work);
disable_work_sync(&sched->sync_upd_work);
mutex_lock(&sched->lock);
@@ -4149,7 +4120,6 @@ int panthor_sched_init(struct panthor_device *ptdev)
sched->tick_period = msecs_to_jiffies(10);
INIT_DELAYED_WORK(&sched->tick_work, tick_work);
INIT_WORK(&sched->sync_upd_work, sync_upd_work);
- INIT_WORK(&sched->fw_events_work, process_fw_events_work);
spin_lock_init(&sched->events_lock);
--
2.54.0
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH v3 7/7] drm/panthor: Automate CSG IRQ processing at group unbind time
2026-06-23 12:51 [PATCH v3 0/7] drm/panthor: Reduce dma_fence signalling latency Boris Brezillon
` (5 preceding siblings ...)
2026-06-23 12:51 ` [PATCH v3 6/7] drm/panthor: Don't defer FW event processing Boris Brezillon
@ 2026-06-23 12:51 ` Boris Brezillon
2026-06-23 13:04 ` sashiko-bot
6 siblings, 1 reply; 11+ messages in thread
From: Boris Brezillon @ 2026-06-23 12:51 UTC (permalink / raw)
To: Steven Price, Liviu Dudau, Chia-I Wu
Cc: Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann, David Airlie,
Simona Vetter, dri-devel, linux-kernel, Boris Brezillon
Make the sched_process_csg_irq_locked() call part of
group_unbind_locked() so we don't have to manually call it in
tick_ctx_apply()/panthor_sched_suspend().
This implies moving group_[un]bind_locked() around to avoid a
forward declaration.
Reviewed-by: Steven Price <steven.price@arm.com>
Reviewed-by: Chia-I Wu <olvaffe@gmail.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
---
drivers/gpu/drm/panthor/panthor_sched.c | 180 +++++++++++++++-----------------
1 file changed, 83 insertions(+), 97 deletions(-)
diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c
index c8818718d135..124999297aec 100644
--- a/drivers/gpu/drm/panthor/panthor_sched.c
+++ b/drivers/gpu/drm/panthor/panthor_sched.c
@@ -981,86 +981,6 @@ group_get(struct panthor_group *group)
return group;
}
-/**
- * group_bind_locked() - Bind a group to a group slot
- * @group: Group.
- * @csg_id: Slot.
- *
- * Return: 0 on success, a negative error code otherwise.
- */
-static int
-group_bind_locked(struct panthor_group *group, u32 csg_id)
-{
- struct panthor_device *ptdev = group->ptdev;
- int ret;
-
- lockdep_assert_held(&ptdev->scheduler->lock);
-
- if (drm_WARN_ON(&ptdev->base, group->csg_id != -1 || csg_id >= MAX_CSGS ||
- ptdev->scheduler->csg_slots[csg_id].group))
- return -EINVAL;
-
- ret = panthor_vm_active(group->vm);
- if (ret)
- return ret;
-
- group_get(group);
-
- /* Dummy doorbell allocation: doorbell is assigned to the group and
- * all queues use the same doorbell.
- *
- * TODO: Implement LRU-based doorbell assignment, so the most often
- * updated queues get their own doorbell, thus avoiding useless checks
- * on queues belonging to the same group that are rarely updated.
- */
- for (u32 i = 0; i < group->queue_count; i++)
- group->queues[i]->doorbell_id = csg_id + 1;
-
- scoped_guard(spinlock, &ptdev->scheduler->events_lock) {
- ptdev->scheduler->csg_slots[csg_id].group = group;
- group->csg_id = csg_id;
- }
-
- return 0;
-}
-
-/**
- * group_unbind_locked() - Unbind a group from a slot.
- * @group: Group to unbind.
- *
- * Return: 0 on success, a negative error code otherwise.
- */
-static int
-group_unbind_locked(struct panthor_group *group)
-{
- struct panthor_device *ptdev = group->ptdev;
-
- lockdep_assert_held(&ptdev->scheduler->lock);
-
- if (drm_WARN_ON(&ptdev->base, group->csg_id < 0 || group->csg_id >= MAX_CSGS))
- return -EINVAL;
-
- if (drm_WARN_ON(&ptdev->base, group->state == PANTHOR_CS_GROUP_ACTIVE))
- return -EINVAL;
-
- scoped_guard(spinlock, &ptdev->scheduler->events_lock) {
- ptdev->scheduler->csg_slots[group->csg_id].group = NULL;
- group->csg_id = -1;
- }
-
- panthor_vm_idle(group->vm);
-
- /* Tiler OOM events will be re-issued next time the group is scheduled. */
- atomic_set(&group->tiler_oom, 0);
- cancel_work(&group->tiler_oom_work);
-
- for (u32 i = 0; i < group->queue_count; i++)
- group->queues[i]->doorbell_id = -1;
-
- group_put(group);
- return 0;
-}
-
static bool
group_is_idle(struct panthor_group *group)
{
@@ -1972,6 +1892,88 @@ void panthor_sched_report_fw_events(struct panthor_device *ptdev, u32 events)
}
}
+/**
+ * group_bind_locked() - Bind a group to a group slot
+ * @group: Group.
+ * @csg_id: Slot.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+static int
+group_bind_locked(struct panthor_group *group, u32 csg_id)
+{
+ struct panthor_device *ptdev = group->ptdev;
+ int ret;
+
+ lockdep_assert_held(&ptdev->scheduler->lock);
+
+ if (drm_WARN_ON(&ptdev->base, group->csg_id != -1 || csg_id >= MAX_CSGS ||
+ ptdev->scheduler->csg_slots[csg_id].group))
+ return -EINVAL;
+
+ ret = panthor_vm_active(group->vm);
+ if (ret)
+ return ret;
+
+ group_get(group);
+
+ /* Dummy doorbell allocation: doorbell is assigned to the group and
+ * all queues use the same doorbell.
+ *
+ * TODO: Implement LRU-based doorbell assignment, so the most often
+ * updated queues get their own doorbell, thus avoiding useless checks
+ * on queues belonging to the same group that are rarely updated.
+ */
+ for (u32 i = 0; i < group->queue_count; i++)
+ group->queues[i]->doorbell_id = csg_id + 1;
+
+ scoped_guard(spinlock, &ptdev->scheduler->events_lock) {
+ ptdev->scheduler->csg_slots[csg_id].group = group;
+ group->csg_id = csg_id;
+ }
+
+ return 0;
+}
+
+/**
+ * group_unbind_locked() - Unbind a group from a slot.
+ * @group: Group to unbind.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+static int
+group_unbind_locked(struct panthor_group *group)
+{
+ struct panthor_device *ptdev = group->ptdev;
+
+ lockdep_assert_held(&ptdev->scheduler->lock);
+
+ if (drm_WARN_ON(&ptdev->base, group->csg_id < 0 || group->csg_id >= MAX_CSGS))
+ return -EINVAL;
+
+ if (drm_WARN_ON(&ptdev->base, group->state == PANTHOR_CS_GROUP_ACTIVE))
+ return -EINVAL;
+
+ scoped_guard(spinlock, &ptdev->scheduler->events_lock) {
+ /* Process all pending IRQs before returning the slot. */
+ sched_process_csg_irq_locked(ptdev, group->csg_id);
+ ptdev->scheduler->csg_slots[group->csg_id].group = NULL;
+ group->csg_id = -1;
+ }
+
+ panthor_vm_idle(group->vm);
+
+ /* Tiler OOM events will be re-issued next time the group is scheduled. */
+ atomic_set(&group->tiler_oom, 0);
+ cancel_work(&group->tiler_oom_work);
+
+ for (u32 i = 0; i < group->queue_count; i++)
+ group->queues[i]->doorbell_id = -1;
+
+ group_put(group);
+ return 0;
+}
+
static const char *fence_get_driver_name(struct dma_fence *fence)
{
return "panthor";
@@ -2398,18 +2400,8 @@ tick_ctx_apply(struct panthor_scheduler *sched, struct panthor_sched_tick_ctx *c
/* Unbind evicted groups. */
for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) {
- list_for_each_entry(group, &ctx->old_groups[prio], run_node) {
- /* This group is gone. Process interrupts to clear
- * any pending interrupts before we start the new
- * group.
- */
- if (group->csg_id >= 0) {
- guard(spinlock)(&sched->events_lock);
- sched_process_csg_irq_locked(ptdev, group->csg_id);
- }
-
+ list_for_each_entry(group, &ctx->old_groups[prio], run_node)
group_unbind_locked(group);
- }
}
for (i = 0; i < sched->csg_slot_count; i++) {
@@ -2990,12 +2982,6 @@ void panthor_sched_suspend(struct panthor_device *ptdev)
continue;
group_get(group);
-
- if (group->csg_id >= 0) {
- guard(spinlock)(&sched->events_lock);
- sched_process_csg_irq_locked(ptdev, group->csg_id);
- }
-
group_unbind_locked(group);
drm_WARN_ON(&group->ptdev->base, !list_empty(&group->run_node));
--
2.54.0
^ permalink raw reply related [flat|nested] 11+ messages in thread* Re: [PATCH v3 7/7] drm/panthor: Automate CSG IRQ processing at group unbind time
2026-06-23 12:51 ` [PATCH v3 7/7] drm/panthor: Automate CSG IRQ processing at group unbind time Boris Brezillon
@ 2026-06-23 13:04 ` sashiko-bot
0 siblings, 0 replies; 11+ messages in thread
From: sashiko-bot @ 2026-06-23 13:04 UTC (permalink / raw)
To: Boris Brezillon; +Cc: dri-devel
Thank you for your contribution! Sashiko AI review found 2 potential issue(s) to consider:
Pre-existing issues:
- [High] Leaking `panthor_group` reference count when `tiler_oom_work` is successfully cancelled during group unbind.
- [High] Ignored return value from `group_bind_locked()` in `tick_ctx_apply()` can lead to firmware state corruption and hardware hangs.
--
commit 3008df4d374d0ccc21ff32d8af0ae14af924b9fd
Author: Boris Brezillon <boris.brezillon@collabora.com>
drm/panthor: Automate CSG IRQ processing at group unbind time
This commit moves the sched_process_csg_irq_locked() call into
group_unbind_locked() to automate CSG IRQ processing at group unbind time.
This eliminates the need to manually call it in tick_ctx_apply() and
panthor_sched_suspend(). The patch also relocates group_bind_locked() and
group_unbind_locked() to avoid forward declarations.
> diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c
> index c8818718d1353..124999297aec9 100644
> --- a/drivers/gpu/drm/panthor/panthor_sched.c
> +++ b/drivers/gpu/drm/panthor/panthor_sched.c
[ ... ]
> @@ -1972,6 +1892,88 @@ void panthor_sched_report_fw_events(struct panthor_device *ptdev, u32 events)
> }
> }
>
> +/**
> + * group_bind_locked() - Bind a group to a group slot
> + * @group: Group.
> + * @csg_id: Slot.
> + *
> + * Return: 0 on success, a negative error code otherwise.
> + */
> +static int
> +group_bind_locked(struct panthor_group *group, u32 csg_id)
[Severity: High]
This isn't a bug introduced by this patch, but could ignoring the return
value of group_bind_locked() in tick_ctx_apply() lead to firmware state
corruption?
If group_bind_locked() fails (e.g., panthor_vm_active() encounters an MMU
timeout), the error is ignored and csg_slot->group remains NULL.
This causes csg_slot_prog_locked() to return immediately without configuring
the hardware. However, tick_ctx_apply() then unconditionally queues a
CSG_STATE_START or CSG_STATE_RESUME command to the firmware for this slot,
despite it being unprogrammed.
Can this trigger firmware faults or hardware hangs?
> +{
> + struct panthor_device *ptdev = group->ptdev;
> + int ret;
> +
[ ... ]
> +static int
> +group_unbind_locked(struct panthor_group *group)
> +{
> + struct panthor_device *ptdev = group->ptdev;
> +
[ ... ]
> + panthor_vm_idle(group->vm);
> +
> + /* Tiler OOM events will be re-issued next time the group is scheduled. */
> + atomic_set(&group->tiler_oom, 0);
> + cancel_work(&group->tiler_oom_work);
[Severity: High]
This is a pre-existing issue, but does this leak the group reference if
cancel_work() returns true?
When a tiler out-of-memory (OOM) event occurs,
cs_slot_process_tiler_oom_event_locked() acquires a reference to the group
and queues tiler_oom_work.
If the group is unbound before the work executes, group_unbind_locked()
calls cancel_work(&group->tiler_oom_work). Because cancel_work() removes
the pending work from the queue without executing the work function, the
delegated group_put() is skipped.
Should we check the return value of cancel_work() and call group_put(group)
if it returns true?
> +
> + for (u32 i = 0; i < group->queue_count; i++)
> + group->queues[i]->doorbell_id = -1;
> +
> + group_put(group);
> + return 0;
> +}
--
Sashiko AI review · https://sashiko.dev/#/patchset/20260623-panthor-signal-from-irq-v3-0-2ece396f8ee0@collabora.com?part=7
^ permalink raw reply [flat|nested] 11+ messages in thread