* [PATCH 1/6] drm/v3d: Store a pointer to `struct v3d_file_priv` inside each job
2025-07-19 13:24 [PATCH 0/6] drm/v3d: General job locking improvements + race-condition fixes Maíra Canal
@ 2025-07-19 13:24 ` Maíra Canal
2025-07-19 13:24 ` [PATCH 2/6] drm/v3d: Store the active job inside the queue's state Maíra Canal
` (5 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Maíra Canal @ 2025-07-19 13:24 UTC (permalink / raw)
To: Melissa Wen, Iago Toral Quiroga, Jose Maria Casanova Crespo,
David Airlie, Simona Vetter
Cc: kernel-dev, dri-devel, Maíra Canal
Instead of storing a pointer to the DRM file data, store a pointer
directly to the private V3D file struct.
Signed-off-by: Maíra Canal <mcanal@igalia.com>
---
drivers/gpu/drm/v3d/v3d_drv.h | 4 ++--
drivers/gpu/drm/v3d/v3d_sched.c | 10 +++++-----
drivers/gpu/drm/v3d/v3d_submit.c | 2 +-
3 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
index dabda7aaf00074d8de42dcdb345d5f3331ac13b2..dd8d39bf2d948551c66d852aee7f9afa473df4f1 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.h
+++ b/drivers/gpu/drm/v3d/v3d_drv.h
@@ -319,9 +319,9 @@ struct v3d_job {
struct v3d_perfmon *perfmon;
/* File descriptor of the process that submitted the job that could be used
- * for collecting stats by process of GPU usage.
+ * to collect per-process information about the GPU.
*/
- struct drm_file *file;
+ struct v3d_file_priv *file_priv;
/* Callback for the freeing of the job on refcount going to 0. */
void (*free)(struct kref *ref);
diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
index 7d45664f03c714bbb754ea22902147a64e63d115..797aac34481c8a72cff3e16d9d7abebc570e4983 100644
--- a/drivers/gpu/drm/v3d/v3d_sched.c
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
@@ -139,7 +139,7 @@ static void
v3d_job_start_stats(struct v3d_job *job, enum v3d_queue queue)
{
struct v3d_dev *v3d = job->v3d;
- struct v3d_file_priv *file = job->file->driver_priv;
+ struct v3d_file_priv *file = job->file_priv;
struct v3d_stats *global_stats = &v3d->queue[queue].stats;
struct v3d_stats *local_stats = &file->stats[queue];
u64 now = local_clock();
@@ -197,7 +197,7 @@ void
v3d_job_update_stats(struct v3d_job *job, enum v3d_queue queue)
{
struct v3d_dev *v3d = job->v3d;
- struct v3d_file_priv *file = job->file->driver_priv;
+ struct v3d_file_priv *file = job->file_priv;
struct v3d_stats *global_stats = &v3d->queue[queue].stats;
struct v3d_stats *local_stats = &file->stats[queue];
u64 now = local_clock();
@@ -570,7 +570,7 @@ static void
v3d_reset_performance_queries(struct v3d_cpu_job *job)
{
struct v3d_performance_query_info *performance_query = &job->performance_query;
- struct v3d_file_priv *v3d_priv = job->base.file->driver_priv;
+ struct v3d_file_priv *v3d_priv = job->base.file_priv;
struct v3d_dev *v3d = job->base.v3d;
struct v3d_perfmon *perfmon;
@@ -600,7 +600,7 @@ v3d_write_performance_query_result(struct v3d_cpu_job *job, void *data,
{
struct v3d_performance_query_info *performance_query =
&job->performance_query;
- struct v3d_file_priv *v3d_priv = job->base.file->driver_priv;
+ struct v3d_file_priv *v3d_priv = job->base.file_priv;
struct v3d_performance_query *perf_query =
&performance_query->queries[query];
struct v3d_dev *v3d = job->base.v3d;
@@ -718,7 +718,7 @@ static enum drm_gpu_sched_stat
v3d_gpu_reset_for_timeout(struct v3d_dev *v3d, struct drm_sched_job *sched_job)
{
struct v3d_job *job = to_v3d_job(sched_job);
- struct v3d_file_priv *v3d_priv = job->file->driver_priv;
+ struct v3d_file_priv *v3d_priv = job->file_priv;
enum v3d_queue q;
mutex_lock(&v3d->reset_lock);
diff --git a/drivers/gpu/drm/v3d/v3d_submit.c b/drivers/gpu/drm/v3d/v3d_submit.c
index 5171ffe9012d4d0140d82d40af71ecbaf029a24a..f3652e90683c398f25d2ce306be1c0fdfe4d286f 100644
--- a/drivers/gpu/drm/v3d/v3d_submit.c
+++ b/drivers/gpu/drm/v3d/v3d_submit.c
@@ -166,7 +166,7 @@ v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv,
job->v3d = v3d;
job->free = free;
- job->file = file_priv;
+ job->file_priv = v3d_priv;
ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue],
1, v3d_priv, file_priv->client_id);
--
2.50.0
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 2/6] drm/v3d: Store the active job inside the queue's state
2025-07-19 13:24 [PATCH 0/6] drm/v3d: General job locking improvements + race-condition fixes Maíra Canal
2025-07-19 13:24 ` [PATCH 1/6] drm/v3d: Store a pointer to `struct v3d_file_priv` inside each job Maíra Canal
@ 2025-07-19 13:24 ` Maíra Canal
2025-07-19 13:24 ` [PATCH 3/6] drm/v3d: Replace a global spinlock with a per-queue spinlock Maíra Canal
` (4 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Maíra Canal @ 2025-07-19 13:24 UTC (permalink / raw)
To: Melissa Wen, Iago Toral Quiroga, Jose Maria Casanova Crespo,
David Airlie, Simona Vetter
Cc: kernel-dev, dri-devel, Maíra Canal
Instead of storing the queue's active job in four different variables,
store the active job inside the queue's state. This way, it's possible
to access all active jobs using an index based in `enum v3d_queue`.
Signed-off-by: Maíra Canal <mcanal@igalia.com>
---
drivers/gpu/drm/v3d/v3d_drv.h | 8 ++----
drivers/gpu/drm/v3d/v3d_gem.c | 7 ++---
drivers/gpu/drm/v3d/v3d_irq.c | 62 ++++++++++++++++-------------------------
drivers/gpu/drm/v3d/v3d_sched.c | 16 +++++------
4 files changed, 38 insertions(+), 55 deletions(-)
diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
index dd8d39bf2d948551c66d852aee7f9afa473df4f1..b5d7e58c3696a04b1824ecfe31e1962901746856 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.h
+++ b/drivers/gpu/drm/v3d/v3d_drv.h
@@ -58,6 +58,9 @@ struct v3d_queue_state {
/* Stores the GPU stats for this queue in the global context. */
struct v3d_stats stats;
+
+ /* Currently active job for this queue */
+ struct v3d_job *active_job;
};
/* Performance monitor object. The perform lifetime is controlled by userspace
@@ -151,11 +154,6 @@ struct v3d_dev {
struct work_struct overflow_mem_work;
- struct v3d_bin_job *bin_job;
- struct v3d_render_job *render_job;
- struct v3d_tfu_job *tfu_job;
- struct v3d_csd_job *csd_job;
-
struct v3d_queue_state queue[V3D_MAX_QUEUES];
/* Spinlock used to synchronize the overflow memory
diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index d7d16da78db328f004d1d702731d1a1b5437a394..7e0409a154497ecf500c4260e6aff0589b5932a1 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -322,6 +322,7 @@ void
v3d_gem_destroy(struct drm_device *dev)
{
struct v3d_dev *v3d = to_v3d_dev(dev);
+ enum v3d_queue q;
v3d_sched_fini(v3d);
v3d_gemfs_fini(v3d);
@@ -329,10 +330,8 @@ v3d_gem_destroy(struct drm_device *dev)
/* Waiting for jobs to finish would need to be done before
* unregistering V3D.
*/
- WARN_ON(v3d->bin_job);
- WARN_ON(v3d->render_job);
- WARN_ON(v3d->tfu_job);
- WARN_ON(v3d->csd_job);
+ for (q = 0; q < V3D_MAX_QUEUES; q++)
+ WARN_ON(v3d->queue[q].active_job);
drm_mm_takedown(&v3d->mm);
diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c
index 2cca5d3a26a22cf03d2c4e9ae0688ab57e0eedcd..e96c7fd65a147177993bbc45d573453974ac2e39 100644
--- a/drivers/gpu/drm/v3d/v3d_irq.c
+++ b/drivers/gpu/drm/v3d/v3d_irq.c
@@ -42,6 +42,8 @@ v3d_overflow_mem_work(struct work_struct *work)
container_of(work, struct v3d_dev, overflow_mem_work);
struct drm_device *dev = &v3d->drm;
struct v3d_bo *bo = v3d_bo_create(dev, NULL /* XXX: GMP */, 256 * 1024);
+ struct v3d_queue_state *queue = &v3d->queue[V3D_BIN];
+ struct v3d_bin_job *bin_job;
struct drm_gem_object *obj;
unsigned long irqflags;
@@ -61,13 +63,15 @@ v3d_overflow_mem_work(struct work_struct *work)
* some binner pool anyway.
*/
spin_lock_irqsave(&v3d->job_lock, irqflags);
- if (!v3d->bin_job) {
+ bin_job = (struct v3d_bin_job *)queue->active_job;
+
+ if (!bin_job) {
spin_unlock_irqrestore(&v3d->job_lock, irqflags);
goto out;
}
drm_gem_object_get(obj);
- list_add_tail(&bo->unref_head, &v3d->bin_job->render->unref_list);
+ list_add_tail(&bo->unref_head, &bin_job->render->unref_list);
spin_unlock_irqrestore(&v3d->job_lock, irqflags);
v3d_mmu_flush_all(v3d);
@@ -79,6 +83,20 @@ v3d_overflow_mem_work(struct work_struct *work)
drm_gem_object_put(obj);
}
+static void
+v3d_irq_signal_fence(struct v3d_dev *v3d, enum v3d_queue q,
+ void (*trace_irq)(struct drm_device *, uint64_t))
+{
+ struct v3d_queue_state *queue = &v3d->queue[q];
+ struct v3d_fence *fence = to_v3d_fence(queue->active_job->irq_fence);
+
+ v3d_job_update_stats(queue->active_job, q);
+ trace_irq(&v3d->drm, fence->seqno);
+
+ queue->active_job = NULL;
+ dma_fence_signal(&fence->base);
+}
+
static irqreturn_t
v3d_irq(int irq, void *arg)
{
@@ -102,41 +120,17 @@ v3d_irq(int irq, void *arg)
}
if (intsts & V3D_INT_FLDONE) {
- struct v3d_fence *fence =
- to_v3d_fence(v3d->bin_job->base.irq_fence);
-
- v3d_job_update_stats(&v3d->bin_job->base, V3D_BIN);
- trace_v3d_bcl_irq(&v3d->drm, fence->seqno);
-
- v3d->bin_job = NULL;
- dma_fence_signal(&fence->base);
-
+ v3d_irq_signal_fence(v3d, V3D_BIN, trace_v3d_bcl_irq);
status = IRQ_HANDLED;
}
if (intsts & V3D_INT_FRDONE) {
- struct v3d_fence *fence =
- to_v3d_fence(v3d->render_job->base.irq_fence);
-
- v3d_job_update_stats(&v3d->render_job->base, V3D_RENDER);
- trace_v3d_rcl_irq(&v3d->drm, fence->seqno);
-
- v3d->render_job = NULL;
- dma_fence_signal(&fence->base);
-
+ v3d_irq_signal_fence(v3d, V3D_RENDER, trace_v3d_rcl_irq);
status = IRQ_HANDLED;
}
if (intsts & V3D_INT_CSDDONE(v3d->ver)) {
- struct v3d_fence *fence =
- to_v3d_fence(v3d->csd_job->base.irq_fence);
-
- v3d_job_update_stats(&v3d->csd_job->base, V3D_CSD);
- trace_v3d_csd_irq(&v3d->drm, fence->seqno);
-
- v3d->csd_job = NULL;
- dma_fence_signal(&fence->base);
-
+ v3d_irq_signal_fence(v3d, V3D_CSD, trace_v3d_csd_irq);
status = IRQ_HANDLED;
}
@@ -168,15 +162,7 @@ v3d_hub_irq(int irq, void *arg)
V3D_WRITE(V3D_HUB_INT_CLR, intsts);
if (intsts & V3D_HUB_INT_TFUC) {
- struct v3d_fence *fence =
- to_v3d_fence(v3d->tfu_job->base.irq_fence);
-
- v3d_job_update_stats(&v3d->tfu_job->base, V3D_TFU);
- trace_v3d_tfu_irq(&v3d->drm, fence->seqno);
-
- v3d->tfu_job = NULL;
- dma_fence_signal(&fence->base);
-
+ v3d_irq_signal_fence(v3d, V3D_TFU, trace_v3d_tfu_irq);
status = IRQ_HANDLED;
}
diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
index 797aac34481c8a72cff3e16d9d7abebc570e4983..4fbcd662b4c4e69d5680d77171ed85fb869959bd 100644
--- a/drivers/gpu/drm/v3d/v3d_sched.c
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
@@ -228,7 +228,7 @@ static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job)
if (unlikely(job->base.base.s_fence->finished.error)) {
spin_lock_irqsave(&v3d->job_lock, irqflags);
- v3d->bin_job = NULL;
+ v3d->queue[V3D_BIN].active_job = NULL;
spin_unlock_irqrestore(&v3d->job_lock, irqflags);
return NULL;
}
@@ -237,7 +237,7 @@ static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job)
* v3d_overflow_mem_work().
*/
spin_lock_irqsave(&v3d->job_lock, irqflags);
- v3d->bin_job = job;
+ v3d->queue[V3D_BIN].active_job = to_v3d_job(sched_job);
/* Clear out the overflow allocation, so we don't
* reuse the overflow attached to a previous job.
*/
@@ -286,11 +286,11 @@ static struct dma_fence *v3d_render_job_run(struct drm_sched_job *sched_job)
struct dma_fence *fence;
if (unlikely(job->base.base.s_fence->finished.error)) {
- v3d->render_job = NULL;
+ v3d->queue[V3D_RENDER].active_job = NULL;
return NULL;
}
- v3d->render_job = job;
+ v3d->queue[V3D_RENDER].active_job = to_v3d_job(sched_job);
/* Can we avoid this flush? We need to be careful of
* scheduling, though -- imagine job0 rendering to texture and
@@ -334,11 +334,11 @@ v3d_tfu_job_run(struct drm_sched_job *sched_job)
struct dma_fence *fence;
if (unlikely(job->base.base.s_fence->finished.error)) {
- v3d->tfu_job = NULL;
+ v3d->queue[V3D_TFU].active_job = NULL;
return NULL;
}
- v3d->tfu_job = job;
+ v3d->queue[V3D_TFU].active_job = to_v3d_job(sched_job);
fence = v3d_fence_create(v3d, V3D_TFU);
if (IS_ERR(fence))
@@ -382,11 +382,11 @@ v3d_csd_job_run(struct drm_sched_job *sched_job)
int i, csd_cfg0_reg;
if (unlikely(job->base.base.s_fence->finished.error)) {
- v3d->csd_job = NULL;
+ v3d->queue[V3D_CSD].active_job = NULL;
return NULL;
}
- v3d->csd_job = job;
+ v3d->queue[V3D_CSD].active_job = to_v3d_job(sched_job);
v3d_invalidate_caches(v3d);
--
2.50.0
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 3/6] drm/v3d: Replace a global spinlock with a per-queue spinlock
2025-07-19 13:24 [PATCH 0/6] drm/v3d: General job locking improvements + race-condition fixes Maíra Canal
2025-07-19 13:24 ` [PATCH 1/6] drm/v3d: Store a pointer to `struct v3d_file_priv` inside each job Maíra Canal
2025-07-19 13:24 ` [PATCH 2/6] drm/v3d: Store the active job inside the queue's state Maíra Canal
@ 2025-07-19 13:24 ` Maíra Canal
2025-07-19 13:24 ` [PATCH 4/6] drm/v3d: Address race-condition between per-fd GPU stats and fd release Maíra Canal
` (3 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Maíra Canal @ 2025-07-19 13:24 UTC (permalink / raw)
To: Melissa Wen, Iago Toral Quiroga, Jose Maria Casanova Crespo,
David Airlie, Simona Vetter
Cc: kernel-dev, dri-devel, Maíra Canal
Each V3D queue works independently and all the dependencies between the
jobs are handled through the DRM scheduler. Therefore, there is no need
to use one single lock for all queues. Using it, creates unnecessary
contention between different queues that can operate independently.
Replace the global spinlock with per-queue locks to improve parallelism
and reduce contention between different V3D queues (BIN, RENDER, TFU,
CSD). This allows independent queues to operate concurrently while
maintaining proper synchronization within each queue.
Signed-off-by: Maíra Canal <mcanal@igalia.com>
---
drivers/gpu/drm/v3d/v3d_drv.h | 8 ++------
drivers/gpu/drm/v3d/v3d_fence.c | 11 ++++++-----
drivers/gpu/drm/v3d/v3d_gem.c | 3 ++-
drivers/gpu/drm/v3d/v3d_irq.c | 6 +++---
drivers/gpu/drm/v3d/v3d_sched.c | 13 +++++++------
5 files changed, 20 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
index b5d7e58c3696a04b1824ecfe31e1962901746856..fb1a3d010fd5a61099f413629e9bfeeed3c45c59 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.h
+++ b/drivers/gpu/drm/v3d/v3d_drv.h
@@ -61,6 +61,7 @@ struct v3d_queue_state {
/* Currently active job for this queue */
struct v3d_job *active_job;
+ spinlock_t queue_lock;
};
/* Performance monitor object. The perform lifetime is controlled by userspace
@@ -156,11 +157,6 @@ struct v3d_dev {
struct v3d_queue_state queue[V3D_MAX_QUEUES];
- /* Spinlock used to synchronize the overflow memory
- * management against bin job submission.
- */
- spinlock_t job_lock;
-
/* Used to track the active perfmon if any. */
struct v3d_perfmon *active_perfmon;
@@ -560,7 +556,7 @@ void v3d_get_stats(const struct v3d_stats *stats, u64 timestamp,
/* v3d_fence.c */
extern const struct dma_fence_ops v3d_fence_ops;
-struct dma_fence *v3d_fence_create(struct v3d_dev *v3d, enum v3d_queue queue);
+struct dma_fence *v3d_fence_create(struct v3d_dev *v3d, enum v3d_queue q);
/* v3d_gem.c */
int v3d_gem_init(struct drm_device *dev);
diff --git a/drivers/gpu/drm/v3d/v3d_fence.c b/drivers/gpu/drm/v3d/v3d_fence.c
index 89840ed212c06036e5b9ecef91852a490538ba89..8f8471adae34af7a444f5eeca4ef08d66ac1b7b5 100644
--- a/drivers/gpu/drm/v3d/v3d_fence.c
+++ b/drivers/gpu/drm/v3d/v3d_fence.c
@@ -3,8 +3,9 @@
#include "v3d_drv.h"
-struct dma_fence *v3d_fence_create(struct v3d_dev *v3d, enum v3d_queue queue)
+struct dma_fence *v3d_fence_create(struct v3d_dev *v3d, enum v3d_queue q)
{
+ struct v3d_queue_state *queue = &v3d->queue[q];
struct v3d_fence *fence;
fence = kzalloc(sizeof(*fence), GFP_KERNEL);
@@ -12,10 +13,10 @@ struct dma_fence *v3d_fence_create(struct v3d_dev *v3d, enum v3d_queue queue)
return ERR_PTR(-ENOMEM);
fence->dev = &v3d->drm;
- fence->queue = queue;
- fence->seqno = ++v3d->queue[queue].emit_seqno;
- dma_fence_init(&fence->base, &v3d_fence_ops, &v3d->job_lock,
- v3d->queue[queue].fence_context, fence->seqno);
+ fence->queue = q;
+ fence->seqno = ++queue->emit_seqno;
+ dma_fence_init(&fence->base, &v3d_fence_ops, &queue->queue_lock,
+ queue->fence_context, fence->seqno);
return &fence->base;
}
diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index 7e0409a154497ecf500c4260e6aff0589b5932a1..c1593b5b2729eb6333197ef6a332b52ea567d474 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -269,10 +269,11 @@ v3d_gem_init(struct drm_device *dev)
queue->fence_context = dma_fence_context_alloc(1);
memset(&queue->stats, 0, sizeof(queue->stats));
seqcount_init(&queue->stats.lock);
+
+ spin_lock_init(&queue->queue_lock);
}
spin_lock_init(&v3d->mm_lock);
- spin_lock_init(&v3d->job_lock);
ret = drmm_mutex_init(dev, &v3d->bo_lock);
if (ret)
return ret;
diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c
index e96c7fd65a147177993bbc45d573453974ac2e39..74c84e0bd1bb5aa01c8c8e0e24310dac872fc85c 100644
--- a/drivers/gpu/drm/v3d/v3d_irq.c
+++ b/drivers/gpu/drm/v3d/v3d_irq.c
@@ -62,17 +62,17 @@ v3d_overflow_mem_work(struct work_struct *work)
* bin job got scheduled, that's fine. We'll just give them
* some binner pool anyway.
*/
- spin_lock_irqsave(&v3d->job_lock, irqflags);
+ spin_lock_irqsave(&queue->queue_lock, irqflags);
bin_job = (struct v3d_bin_job *)queue->active_job;
if (!bin_job) {
- spin_unlock_irqrestore(&v3d->job_lock, irqflags);
+ spin_unlock_irqrestore(&queue->queue_lock, irqflags);
goto out;
}
drm_gem_object_get(obj);
list_add_tail(&bo->unref_head, &bin_job->render->unref_list);
- spin_unlock_irqrestore(&v3d->job_lock, irqflags);
+ spin_unlock_irqrestore(&queue->queue_lock, irqflags);
v3d_mmu_flush_all(v3d);
diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
index 4fbcd662b4c4e69d5680d77171ed85fb869959bd..85f6fdb470ae0270c0b4d421fb5e214a13c2e03a 100644
--- a/drivers/gpu/drm/v3d/v3d_sched.c
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
@@ -222,27 +222,28 @@ static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job)
{
struct v3d_bin_job *job = to_bin_job(sched_job);
struct v3d_dev *v3d = job->base.v3d;
+ struct v3d_queue_state *queue = &v3d->queue[V3D_BIN];
struct drm_device *dev = &v3d->drm;
struct dma_fence *fence;
unsigned long irqflags;
if (unlikely(job->base.base.s_fence->finished.error)) {
- spin_lock_irqsave(&v3d->job_lock, irqflags);
- v3d->queue[V3D_BIN].active_job = NULL;
- spin_unlock_irqrestore(&v3d->job_lock, irqflags);
+ spin_lock_irqsave(&queue->queue_lock, irqflags);
+ queue->active_job = NULL;
+ spin_unlock_irqrestore(&queue->queue_lock, irqflags);
return NULL;
}
/* Lock required around bin_job update vs
* v3d_overflow_mem_work().
*/
- spin_lock_irqsave(&v3d->job_lock, irqflags);
- v3d->queue[V3D_BIN].active_job = to_v3d_job(sched_job);
+ spin_lock_irqsave(&queue->queue_lock, irqflags);
+ queue->active_job = to_v3d_job(sched_job);
/* Clear out the overflow allocation, so we don't
* reuse the overflow attached to a previous job.
*/
V3D_CORE_WRITE(0, V3D_PTB_BPOS, 0);
- spin_unlock_irqrestore(&v3d->job_lock, irqflags);
+ spin_unlock_irqrestore(&queue->queue_lock, irqflags);
v3d_invalidate_caches(v3d);
--
2.50.0
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 4/6] drm/v3d: Address race-condition between per-fd GPU stats and fd release
2025-07-19 13:24 [PATCH 0/6] drm/v3d: General job locking improvements + race-condition fixes Maíra Canal
` (2 preceding siblings ...)
2025-07-19 13:24 ` [PATCH 3/6] drm/v3d: Replace a global spinlock with a per-queue spinlock Maíra Canal
@ 2025-07-19 13:24 ` Maíra Canal
2025-07-19 13:24 ` [PATCH 5/6] drm/v3d: Synchronous operations can't timeout Maíra Canal
` (2 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Maíra Canal @ 2025-07-19 13:24 UTC (permalink / raw)
To: Melissa Wen, Iago Toral Quiroga, Jose Maria Casanova Crespo,
David Airlie, Simona Vetter
Cc: kernel-dev, dri-devel, Maíra Canal
When the file descriptor is closed while a job is still running,
there's a race condition between the job completion callback and the
file descriptor cleanup. This can lead to accessing freed memory when
updating per-fd GPU stats, such as the following example:
[56120.512903] Unable to handle kernel paging request at virtual address 0000330a92b9688a
[56120.520881] Mem abort info:
[56120.523687] ESR = 0x0000000096000005
[56120.527454] EC = 0x25: DABT (current EL), IL = 32 bits
[56120.532785] SET = 0, FnV = 0
[56120.535847] EA = 0, S1PTW = 0
[56120.538995] FSC = 0x05: level 1 translation fault
[56120.543891] Data abort info:
[56120.546778] ISV = 0, ISS = 0x00000005, ISS2 = 0x00000000
[56120.552289] CM = 0, WnR = 0, TnD = 0, TagAccess = 0
[56120.557362] GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
[56120.562690] user pgtable: 16k pages, 47-bit VAs, pgdp=0000000023f54000
[56120.569239] [0000330a92b9688a] pgd=0000000000000000, p4d=0000000000000000, pud=0000000000000000
[56120.577975] Internal error: Oops: 0000000096000005 [#1] PREEMPT SMP
CPU: 0 UID: 1000 PID: 1497409 Comm: mpv Not tainted 6.12.37-ncvm5+ #1
Hardware name: Raspberry Pi 5 Model B Rev 1.0 (DT)
pstate: 604000c9 (nZCv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
pc : v3d_job_update_stats+0x64/0x168 [v3d]
lr : v3d_job_update_stats+0x40/0x168 [v3d]
sp : ffffc00080003e60
x29: ffffc00080003e60 x28: ffff800002860000 x27: 0000000000000000
x26: 0000000000000000 x25: ffff800002860000 x24: ffff800002630800
x23: ffff800060786000 x22: 0000330a933c31fb x21: 0000000000000001
x20: 0000330a92b96302 x19: ffff800060786b10 x18: 0000000000000000
x17: ffffaf90506a0000 x16: ffffd06fce57c360 x15: 0000000000000000
x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000
x11: 0000000000000000 x10: 0000000000000000 x9 : ffffd06f5d0fec40
x8 : 0000000000000000 x7 : 0000000000000000 x6 : 000002978dbd535a
x5 : 00ffffffffffffff x4 : 0000000000000015 x3 : 0000300001fddf88
x2 : 0000000000000020 x1 : 0000000000010001 x0 : 0000330a92b96872
Call trace:
v3d_job_update_stats+0x64/0x168 [v3d]
v3d_irq+0x118/0x2e0 [v3d]
__handle_irq_event_percpu+0x60/0x220
Fix such an issue by protecting all accesses to `job->file_priv` with
the queue's lock. With that, we can clear `job->file_priv` before the
V3D per-fd structure is freed and assure that `job->file_priv` exists
during the per-fd GPU stats updates.
Fixes: e1bc3a13bd77 ("drm/v3d: Avoid NULL pointer dereference in `v3d_job_update_stats()`")
Signed-off-by: Maíra Canal <mcanal@igalia.com>
---
drivers/gpu/drm/v3d/v3d_drv.c | 14 +++++++++++++-
drivers/gpu/drm/v3d/v3d_drv.h | 2 +-
drivers/gpu/drm/v3d/v3d_sched.c | 16 +++++++++++-----
3 files changed, 25 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c
index 2def155ce496ec5f159f6bda9929aeaae141d1a6..c5a3bbbc74c5c6c9a34a6e6111b4e4e21bee0b34 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.c
+++ b/drivers/gpu/drm/v3d/v3d_drv.c
@@ -157,12 +157,24 @@ v3d_open(struct drm_device *dev, struct drm_file *file)
static void
v3d_postclose(struct drm_device *dev, struct drm_file *file)
{
+ struct v3d_dev *v3d = to_v3d_dev(dev);
struct v3d_file_priv *v3d_priv = file->driver_priv;
+ unsigned long irqflags;
enum v3d_queue q;
- for (q = 0; q < V3D_MAX_QUEUES; q++)
+ for (q = 0; q < V3D_MAX_QUEUES; q++) {
+ struct v3d_queue_state *queue = &v3d->queue[q];
+ struct v3d_job *job = queue->active_job;
+
drm_sched_entity_destroy(&v3d_priv->sched_entity[q]);
+ if (job && job->base.entity == &v3d_priv->sched_entity[q]) {
+ spin_lock_irqsave(&queue->queue_lock, irqflags);
+ job->file_priv = NULL;
+ spin_unlock_irqrestore(&queue->queue_lock, irqflags);
+ }
+ }
+
v3d_perfmon_close_file(v3d_priv);
kfree(v3d_priv);
}
diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
index fb1a3d010fd5a61099f413629e9bfeeed3c45c59..a1ad2c0cbe8ba991766f8d0777a8a1a5e78b4520 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.h
+++ b/drivers/gpu/drm/v3d/v3d_drv.h
@@ -600,7 +600,7 @@ void v3d_timestamp_query_info_free(struct v3d_timestamp_query_info *query_info,
unsigned int count);
void v3d_performance_query_info_free(struct v3d_performance_query_info *query_info,
unsigned int count);
-void v3d_job_update_stats(struct v3d_job *job, enum v3d_queue queue);
+void v3d_job_update_stats(struct v3d_job *job, enum v3d_queue q);
int v3d_sched_init(struct v3d_dev *v3d);
void v3d_sched_fini(struct v3d_dev *v3d);
diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
index 85f6fdb470ae0270c0b4d421fb5e214a13c2e03a..4a226ea3bc96865894e360b4115eb836da12d550 100644
--- a/drivers/gpu/drm/v3d/v3d_sched.c
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
@@ -194,12 +194,11 @@ v3d_stats_update(struct v3d_stats *stats, u64 now)
}
void
-v3d_job_update_stats(struct v3d_job *job, enum v3d_queue queue)
+v3d_job_update_stats(struct v3d_job *job, enum v3d_queue q)
{
struct v3d_dev *v3d = job->v3d;
- struct v3d_file_priv *file = job->file_priv;
- struct v3d_stats *global_stats = &v3d->queue[queue].stats;
- struct v3d_stats *local_stats = &file->stats[queue];
+ struct v3d_queue_state *queue = &v3d->queue[q];
+ struct v3d_stats *global_stats = &queue->stats;
u64 now = local_clock();
unsigned long flags;
@@ -209,7 +208,14 @@ v3d_job_update_stats(struct v3d_job *job, enum v3d_queue queue)
else
preempt_disable();
- v3d_stats_update(local_stats, now);
+ /* Don't update the local stats if the file context has already closed */
+ spin_lock(&queue->queue_lock);
+ if (job->file_priv)
+ v3d_stats_update(&job->file_priv->stats[q], now);
+ else
+ drm_dbg(&v3d->drm, "The file descriptor was closed before job completion\n");
+ spin_unlock(&queue->queue_lock);
+
v3d_stats_update(global_stats, now);
if (IS_ENABLED(CONFIG_LOCKDEP))
--
2.50.0
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 5/6] drm/v3d: Synchronous operations can't timeout
2025-07-19 13:24 [PATCH 0/6] drm/v3d: General job locking improvements + race-condition fixes Maíra Canal
` (3 preceding siblings ...)
2025-07-19 13:24 ` [PATCH 4/6] drm/v3d: Address race-condition between per-fd GPU stats and fd release Maíra Canal
@ 2025-07-19 13:24 ` Maíra Canal
2025-07-19 13:24 ` [PATCH 6/6] drm/v3d: Protect per-fd reset counter against fd release Maíra Canal
2025-07-21 6:38 ` [PATCH 0/6] drm/v3d: General job locking improvements + race-condition fixes Iago Toral
6 siblings, 0 replies; 8+ messages in thread
From: Maíra Canal @ 2025-07-19 13:24 UTC (permalink / raw)
To: Melissa Wen, Iago Toral Quiroga, Jose Maria Casanova Crespo,
David Airlie, Simona Vetter
Cc: kernel-dev, dri-devel, Maíra Canal
CPU jobs and CACHE CLEAN jobs execute synchronously once the DRM
scheduler starts running them. Therefore, there is no fence to wait on,
neither are those jobs able to timeout.
Hence, remove the `timedout_job` hook from the CPU and CACHE CLEAN
scheduler ops.
Signed-off-by: Maíra Canal <mcanal@igalia.com>
---
drivers/gpu/drm/v3d/v3d_sched.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
index 4a226ea3bc96865894e360b4115eb836da12d550..715cdc3ada62a271e6dbd0584cdbac24e23c63d6 100644
--- a/drivers/gpu/drm/v3d/v3d_sched.c
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
@@ -703,6 +703,7 @@ v3d_cpu_job_run(struct drm_sched_job *sched_job)
trace_v3d_cpu_job_end(&v3d->drm, job->job_type);
v3d_job_update_stats(&job->base, V3D_CPU);
+ /* Synchronous operation, so no fence to wait on. */
return NULL;
}
@@ -718,6 +719,7 @@ v3d_cache_clean_job_run(struct drm_sched_job *sched_job)
v3d_job_update_stats(job, V3D_CACHE_CLEAN);
+ /* Synchronous operation, so no fence to wait on. */
return NULL;
}
@@ -799,7 +801,7 @@ v3d_render_job_timedout(struct drm_sched_job *sched_job)
}
static enum drm_gpu_sched_stat
-v3d_generic_job_timedout(struct drm_sched_job *sched_job)
+v3d_tfu_job_timedout(struct drm_sched_job *sched_job)
{
struct v3d_job *job = to_v3d_job(sched_job);
@@ -839,7 +841,7 @@ static const struct drm_sched_backend_ops v3d_render_sched_ops = {
static const struct drm_sched_backend_ops v3d_tfu_sched_ops = {
.run_job = v3d_tfu_job_run,
- .timedout_job = v3d_generic_job_timedout,
+ .timedout_job = v3d_tfu_job_timedout,
.free_job = v3d_sched_job_free,
};
@@ -851,13 +853,11 @@ static const struct drm_sched_backend_ops v3d_csd_sched_ops = {
static const struct drm_sched_backend_ops v3d_cache_clean_sched_ops = {
.run_job = v3d_cache_clean_job_run,
- .timedout_job = v3d_generic_job_timedout,
.free_job = v3d_sched_job_free
};
static const struct drm_sched_backend_ops v3d_cpu_sched_ops = {
.run_job = v3d_cpu_job_run,
- .timedout_job = v3d_generic_job_timedout,
.free_job = v3d_cpu_job_free
};
--
2.50.0
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 6/6] drm/v3d: Protect per-fd reset counter against fd release
2025-07-19 13:24 [PATCH 0/6] drm/v3d: General job locking improvements + race-condition fixes Maíra Canal
` (4 preceding siblings ...)
2025-07-19 13:24 ` [PATCH 5/6] drm/v3d: Synchronous operations can't timeout Maíra Canal
@ 2025-07-19 13:24 ` Maíra Canal
2025-07-21 6:38 ` [PATCH 0/6] drm/v3d: General job locking improvements + race-condition fixes Iago Toral
6 siblings, 0 replies; 8+ messages in thread
From: Maíra Canal @ 2025-07-19 13:24 UTC (permalink / raw)
To: Melissa Wen, Iago Toral Quiroga, Jose Maria Casanova Crespo,
David Airlie, Simona Vetter
Cc: kernel-dev, dri-devel, Maíra Canal
The per-fd reset counter tracks GPU resets caused by jobs submitted
through a specific file descriptor. However, there's a race condition
where the file descriptor can be closed while jobs are still running,
leading to potential access to freed memory when updating the reset
counter.
Ensure that the per-fd reset counter is only updated when the file
descriptor is still valid, preventing use-after-free scenarios during
GPU reset handling.
Signed-off-by: Maíra Canal <mcanal@igalia.com>
---
drivers/gpu/drm/v3d/v3d_sched.c | 28 ++++++++++++++++------------
1 file changed, 16 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
index 715cdc3ada62a271e6dbd0584cdbac24e23c63d6..9f57ab41a7980ab097ea4cfed613f0cf6a1e6359 100644
--- a/drivers/gpu/drm/v3d/v3d_sched.c
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
@@ -724,17 +724,19 @@ v3d_cache_clean_job_run(struct drm_sched_job *sched_job)
}
static enum drm_gpu_sched_stat
-v3d_gpu_reset_for_timeout(struct v3d_dev *v3d, struct drm_sched_job *sched_job)
+v3d_gpu_reset_for_timeout(struct v3d_dev *v3d, struct drm_sched_job *sched_job,
+ enum v3d_queue q)
{
struct v3d_job *job = to_v3d_job(sched_job);
struct v3d_file_priv *v3d_priv = job->file_priv;
- enum v3d_queue q;
+ unsigned long irqflags;
+ enum v3d_queue i;
mutex_lock(&v3d->reset_lock);
/* block scheduler */
- for (q = 0; q < V3D_MAX_QUEUES; q++)
- drm_sched_stop(&v3d->queue[q].sched, sched_job);
+ for (i = 0; i < V3D_MAX_QUEUES; i++)
+ drm_sched_stop(&v3d->queue[i].sched, sched_job);
if (sched_job)
drm_sched_increase_karma(sched_job);
@@ -743,15 +745,17 @@ v3d_gpu_reset_for_timeout(struct v3d_dev *v3d, struct drm_sched_job *sched_job)
v3d_reset(v3d);
v3d->reset_counter++;
- v3d_priv->reset_counter++;
+ spin_lock_irqsave(&v3d->queue[q].queue_lock, irqflags);
+ if (v3d_priv)
+ v3d_priv->reset_counter++;
+ spin_unlock_irqrestore(&v3d->queue[q].queue_lock, irqflags);
- for (q = 0; q < V3D_MAX_QUEUES; q++)
+ for (i = 0; i < V3D_MAX_QUEUES; i++)
drm_sched_resubmit_jobs(&v3d->queue[q].sched);
/* Unblock schedulers and restart their jobs. */
- for (q = 0; q < V3D_MAX_QUEUES; q++) {
- drm_sched_start(&v3d->queue[q].sched, 0);
- }
+ for (i = 0; i < V3D_MAX_QUEUES; i++)
+ drm_sched_start(&v3d->queue[i].sched, 0);
mutex_unlock(&v3d->reset_lock);
@@ -779,7 +783,7 @@ v3d_cl_job_timedout(struct drm_sched_job *sched_job, enum v3d_queue q,
return DRM_GPU_SCHED_STAT_NO_HANG;
}
- return v3d_gpu_reset_for_timeout(v3d, sched_job);
+ return v3d_gpu_reset_for_timeout(v3d, sched_job, q);
}
static enum drm_gpu_sched_stat
@@ -805,7 +809,7 @@ v3d_tfu_job_timedout(struct drm_sched_job *sched_job)
{
struct v3d_job *job = to_v3d_job(sched_job);
- return v3d_gpu_reset_for_timeout(job->v3d, sched_job);
+ return v3d_gpu_reset_for_timeout(job->v3d, sched_job, V3D_TFU);
}
static enum drm_gpu_sched_stat
@@ -824,7 +828,7 @@ v3d_csd_job_timedout(struct drm_sched_job *sched_job)
return DRM_GPU_SCHED_STAT_NO_HANG;
}
- return v3d_gpu_reset_for_timeout(v3d, sched_job);
+ return v3d_gpu_reset_for_timeout(v3d, sched_job, V3D_CSD);
}
static const struct drm_sched_backend_ops v3d_bin_sched_ops = {
--
2.50.0
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH 0/6] drm/v3d: General job locking improvements + race-condition fixes
2025-07-19 13:24 [PATCH 0/6] drm/v3d: General job locking improvements + race-condition fixes Maíra Canal
` (5 preceding siblings ...)
2025-07-19 13:24 ` [PATCH 6/6] drm/v3d: Protect per-fd reset counter against fd release Maíra Canal
@ 2025-07-21 6:38 ` Iago Toral
6 siblings, 0 replies; 8+ messages in thread
From: Iago Toral @ 2025-07-21 6:38 UTC (permalink / raw)
To: Maíra Canal, Melissa Wen, Jose Maria Casanova Crespo,
David Airlie, Simona Vetter
Cc: kernel-dev, dri-devel
Thanks Maíra, series is:
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Iago
El sáb, 19-07-2025 a las 10:24 -0300, Maíra Canal escribió:
> This patch series was initially motivated by a race condition
> (exposed
> in PATCH 4/6) where we lacked synchronization for `job->file` access.
> This led to use-after-free issues when a file descriptor was closed
> while a job was still running.
>
> However, beyond fixing this specific race, the series introduces
> broader improvements to active job management and locking. While
> PATCH
> 1/6, 2/6, and 5/6 are primarily code refactors, PATCH 3/6 brings a
> significant change to the locking scheme. Previously, all queues
> shared
> the same spinlock, which caused unnecessary contention during high
> GPU
> usage across different queues. PATCH 3/6 allows queues to operate
> more
> independently.
>
> Finally, PATCH 6/6 addresses a similar race condition to PATCH 4/6,
> but
> this time, on the per-file descriptor reset counter.
>
> Best Regards,
> - Maíra
>
> ---
> Maíra Canal (6):
> drm/v3d: Store a pointer to `struct v3d_file_priv` inside each
> job
> drm/v3d: Store the active job inside the queue's state
> drm/v3d: Replace a global spinlock with a per-queue spinlock
> drm/v3d: Address race-condition between per-fd GPU stats and fd
> release
> drm/v3d: Synchronous operations can't timeout
> drm/v3d: Protect per-fd reset counter against fd release
>
> drivers/gpu/drm/v3d/v3d_drv.c | 14 ++++++-
> drivers/gpu/drm/v3d/v3d_drv.h | 22 ++++-------
> drivers/gpu/drm/v3d/v3d_fence.c | 11 +++---
> drivers/gpu/drm/v3d/v3d_gem.c | 10 ++---
> drivers/gpu/drm/v3d/v3d_irq.c | 68 +++++++++++++-----------------
> --
> drivers/gpu/drm/v3d/v3d_sched.c | 85 +++++++++++++++++++++++-------
> ----------
> drivers/gpu/drm/v3d/v3d_submit.c | 2 +-
> 7 files changed, 108 insertions(+), 104 deletions(-)
> ---
> base-commit: ca2a6abdaee43808034cdb218428d2ed85fd3db8
> change-id: 20250718-v3d-queue-lock-59babfb548bc
>
>
^ permalink raw reply [flat|nested] 8+ messages in thread