* [PATCH v3 2/2] drm/panthor: Avoid sleep locking in the internal BO size path
2025-02-27 23:16 [PATCH v3 1/2] drm/panthor: Replace sleep locks with spinlocks in fdinfo path Adrián Larumbe
@ 2025-02-27 23:16 ` Adrián Larumbe
2025-02-28 8:18 ` Boris Brezillon
2025-02-28 16:53 ` [PATCH v3 1/2] drm/panthor: Replace sleep locks with spinlocks in fdinfo path Steven Price
1 sibling, 1 reply; 4+ messages in thread
From: Adrián Larumbe @ 2025-02-27 23:16 UTC (permalink / raw)
To: Boris Brezillon, Steven Price, Liviu Dudau, Maarten Lankhorst,
Maxime Ripard, Thomas Zimmermann, David Airlie, Simona Vetter,
Mihail Atanassov, Adrián Larumbe
Cc: kernel, dri-devel, linux-kernel
Commit 434e5ca5b5d7 ("drm/panthor: Expose size of driver internal BO's over
fdinfo") locks the VMS xarray, to avoid UAF errors when the same VM is
being concurrently destroyed by another thread. However, that puts the
current thread in atomic context, which means taking the VMS' heap locks
will trigger a warning as the thread is no longer allowed to sleep.
Because in this case replacing the heap mutex with a spinlock isn't
feasible, the fdinfo handler no longer traverses the list of heaps for
every single VM associated with an open DRM file. Instead, when a new heap
chunk is allocated, its size is accumulated into a pool-wide tally, which
also makes the atomic context code path somewhat faster.
Signed-off-by: Adrián Larumbe <adrian.larumbe@collabora.com>
Fixes: 3e2c8c718567 ("drm/panthor: Expose size of driver internal BO's over fdinfo")
---
drivers/gpu/drm/panthor/panthor_heap.c | 59 +++++++++++++-------------
drivers/gpu/drm/panthor/panthor_mmu.c | 8 +---
2 files changed, 31 insertions(+), 36 deletions(-)
diff --git a/drivers/gpu/drm/panthor/panthor_heap.c b/drivers/gpu/drm/panthor/panthor_heap.c
index db0285ce5812..7cd05c7ee342 100644
--- a/drivers/gpu/drm/panthor/panthor_heap.c
+++ b/drivers/gpu/drm/panthor/panthor_heap.c
@@ -97,6 +97,9 @@ struct panthor_heap_pool {
/** @gpu_contexts: Buffer object containing the GPU heap contexts. */
struct panthor_kernel_bo *gpu_contexts;
+
+ /** @size: Size of all chunks across all heaps in the pool. */
+ atomic_t size;
};
static int panthor_heap_ctx_stride(struct panthor_device *ptdev)
@@ -118,7 +121,7 @@ static void *panthor_get_heap_ctx(struct panthor_heap_pool *pool, int id)
panthor_get_heap_ctx_offset(pool, id);
}
-static void panthor_free_heap_chunk(struct panthor_vm *vm,
+static void panthor_free_heap_chunk(struct panthor_heap_pool *pool,
struct panthor_heap *heap,
struct panthor_heap_chunk *chunk)
{
@@ -127,11 +130,13 @@ static void panthor_free_heap_chunk(struct panthor_vm *vm,
heap->chunk_count--;
mutex_unlock(&heap->lock);
+ atomic_sub(heap->chunk_size, &pool->size);
+
panthor_kernel_bo_destroy(chunk->bo);
kfree(chunk);
}
-static int panthor_alloc_heap_chunk(struct panthor_device *ptdev,
+static int panthor_alloc_heap_chunk(struct panthor_heap_pool *pool,
struct panthor_vm *vm,
struct panthor_heap *heap,
bool initial_chunk)
@@ -144,7 +149,7 @@ static int panthor_alloc_heap_chunk(struct panthor_device *ptdev,
if (!chunk)
return -ENOMEM;
- chunk->bo = panthor_kernel_bo_create(ptdev, vm, heap->chunk_size,
+ chunk->bo = panthor_kernel_bo_create(pool->ptdev, vm, heap->chunk_size,
DRM_PANTHOR_BO_NO_MMAP,
DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
PANTHOR_VM_KERNEL_AUTO_VA);
@@ -180,6 +185,8 @@ static int panthor_alloc_heap_chunk(struct panthor_device *ptdev,
heap->chunk_count++;
mutex_unlock(&heap->lock);
+ atomic_add(heap->chunk_size, &pool->size);
+
return 0;
err_destroy_bo:
@@ -191,16 +198,16 @@ static int panthor_alloc_heap_chunk(struct panthor_device *ptdev,
return ret;
}
-static void panthor_free_heap_chunks(struct panthor_vm *vm,
+static void panthor_free_heap_chunks(struct panthor_heap_pool *pool,
struct panthor_heap *heap)
{
struct panthor_heap_chunk *chunk, *tmp;
list_for_each_entry_safe(chunk, tmp, &heap->chunks, node)
- panthor_free_heap_chunk(vm, heap, chunk);
+ panthor_free_heap_chunk(pool, heap, chunk);
}
-static int panthor_alloc_heap_chunks(struct panthor_device *ptdev,
+static int panthor_alloc_heap_chunks(struct panthor_heap_pool *pool,
struct panthor_vm *vm,
struct panthor_heap *heap,
u32 chunk_count)
@@ -209,7 +216,7 @@ static int panthor_alloc_heap_chunks(struct panthor_device *ptdev,
u32 i;
for (i = 0; i < chunk_count; i++) {
- ret = panthor_alloc_heap_chunk(ptdev, vm, heap, true);
+ ret = panthor_alloc_heap_chunk(pool, vm, heap, true);
if (ret)
return ret;
}
@@ -226,7 +233,7 @@ panthor_heap_destroy_locked(struct panthor_heap_pool *pool, u32 handle)
if (!heap)
return -EINVAL;
- panthor_free_heap_chunks(pool->vm, heap);
+ panthor_free_heap_chunks(pool, heap);
mutex_destroy(&heap->lock);
kfree(heap);
return 0;
@@ -308,7 +315,7 @@ int panthor_heap_create(struct panthor_heap_pool *pool,
heap->max_chunks = max_chunks;
heap->target_in_flight = target_in_flight;
- ret = panthor_alloc_heap_chunks(pool->ptdev, vm, heap,
+ ret = panthor_alloc_heap_chunks(pool, vm, heap,
initial_chunk_count);
if (ret)
goto err_free_heap;
@@ -342,7 +349,7 @@ int panthor_heap_create(struct panthor_heap_pool *pool,
return id;
err_free_heap:
- panthor_free_heap_chunks(pool->vm, heap);
+ panthor_free_heap_chunks(pool, heap);
mutex_destroy(&heap->lock);
kfree(heap);
@@ -389,6 +396,7 @@ int panthor_heap_return_chunk(struct panthor_heap_pool *pool,
removed = chunk;
list_del(&chunk->node);
heap->chunk_count--;
+ atomic_sub(heap->chunk_size, &pool->size);
break;
}
}
@@ -466,7 +474,7 @@ int panthor_heap_grow(struct panthor_heap_pool *pool,
* further jobs in this queue fail immediately instead of having to
* wait for the job timeout.
*/
- ret = panthor_alloc_heap_chunk(pool->ptdev, pool->vm, heap, false);
+ ret = panthor_alloc_heap_chunk(pool, pool->vm, heap, false);
if (ret)
goto out_unlock;
@@ -560,6 +568,8 @@ panthor_heap_pool_create(struct panthor_device *ptdev, struct panthor_vm *vm)
if (ret)
goto err_destroy_pool;
+ atomic_add(pool->gpu_contexts->obj->size, &pool->size);
+
return pool;
err_destroy_pool:
@@ -594,8 +604,10 @@ void panthor_heap_pool_destroy(struct panthor_heap_pool *pool)
xa_for_each(&pool->xa, i, heap)
drm_WARN_ON(&pool->ptdev->base, panthor_heap_destroy_locked(pool, i));
- if (!IS_ERR_OR_NULL(pool->gpu_contexts))
+ if (!IS_ERR_OR_NULL(pool->gpu_contexts)) {
+ atomic_sub(pool->gpu_contexts->obj->size, &pool->size);
panthor_kernel_bo_destroy(pool->gpu_contexts);
+ }
/* Reflects the fact the pool has been destroyed. */
pool->vm = NULL;
@@ -605,27 +617,16 @@ void panthor_heap_pool_destroy(struct panthor_heap_pool *pool)
}
/**
- * panthor_heap_pool_size() - Calculate size of all chunks across all heaps in a pool
- * @pool: Pool whose total chunk size to calculate.
+ * panthor_heap_pool_size() - Get a heap pool's total size
+ * @pool: Pool whose total chunks size to return
*
- * This function adds the size of all heap chunks across all heaps in the
- * argument pool. It also adds the size of the gpu contexts kernel bo.
- * It is meant to be used by fdinfo for displaying the size of internal
- * driver BO's that aren't exposed to userspace through a GEM handle.
+ * Returns the aggregated size of all chunks for all heaps in the pool
*
*/
size_t panthor_heap_pool_size(struct panthor_heap_pool *pool)
{
- struct panthor_heap *heap;
- unsigned long i;
- size_t size = 0;
-
- down_read(&pool->lock);
- xa_for_each(&pool->xa, i, heap)
- size += heap->chunk_size * heap->chunk_count;
- up_read(&pool->lock);
-
- size += pool->gpu_contexts->obj->size;
+ if (!pool)
+ return 0;
- return size;
+ return atomic_read(&pool->size);
}
diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c
index 8c6fc587ddc3..12a02e28f50f 100644
--- a/drivers/gpu/drm/panthor/panthor_mmu.c
+++ b/drivers/gpu/drm/panthor/panthor_mmu.c
@@ -1963,13 +1963,7 @@ void panthor_vm_heaps_sizes(struct panthor_file *pfile, struct drm_memory_stats
xa_lock(&pfile->vms->xa);
xa_for_each(&pfile->vms->xa, i, vm) {
- size_t size = 0;
-
- mutex_lock(&vm->heaps.lock);
- if (vm->heaps.pool)
- size = panthor_heap_pool_size(vm->heaps.pool);
- mutex_unlock(&vm->heaps.lock);
-
+ size_t size = panthor_heap_pool_size(vm->heaps.pool);
stats->resident += size;
if (vm->as.id >= 0)
stats->active += size;
--
2.47.1
^ permalink raw reply related [flat|nested] 4+ messages in thread* Re: [PATCH v3 1/2] drm/panthor: Replace sleep locks with spinlocks in fdinfo path
2025-02-27 23:16 [PATCH v3 1/2] drm/panthor: Replace sleep locks with spinlocks in fdinfo path Adrián Larumbe
2025-02-27 23:16 ` [PATCH v3 2/2] drm/panthor: Avoid sleep locking in the internal BO size path Adrián Larumbe
@ 2025-02-28 16:53 ` Steven Price
1 sibling, 0 replies; 4+ messages in thread
From: Steven Price @ 2025-02-28 16:53 UTC (permalink / raw)
To: Adrián Larumbe, Boris Brezillon, Liviu Dudau,
Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann, David Airlie,
Simona Vetter
Cc: kernel, dri-devel, linux-kernel
On 27/02/2025 23:16, Adrián Larumbe wrote:
> Commit 0590c94c3596 ("drm/panthor: Fix race condition when gathering fdinfo
> group samples") introduced an xarray lock to deal with potential
> use-after-free errors when accessing groups fdinfo figures. However, this
> toggles the kernel's atomic context status, so the next nested mutex lock
> will raise a warning when the kernel is compiled with mutex debug options:
>
> CONFIG_DEBUG_RT_MUTEXES=y
> CONFIG_DEBUG_MUTEXES=y
>
> Replace Panthor's group fdinfo data mutex with a guarded spinlock.
>
> Signed-off-by: Adrián Larumbe <adrian.larumbe@collabora.com>
> 0590c94c3596 ("drm/panthor: Fix race condition when gathering fdinfo group samples")
Missing "Fixes:" prefix
> Reviewed-by: Liviu Dudau <liviu.dudau@arm.com>
> Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Otherwise
Reviewed-by: Steven Price <steven.price@arm.com>
> ---
> drivers/gpu/drm/panthor/panthor_sched.c | 26 ++++++++++++-------------
> 1 file changed, 12 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c
> index 1a276db095ff..4d31d1967716 100644
> --- a/drivers/gpu/drm/panthor/panthor_sched.c
> +++ b/drivers/gpu/drm/panthor/panthor_sched.c
> @@ -9,6 +9,7 @@
> #include <drm/panthor_drm.h>
>
> #include <linux/build_bug.h>
> +#include <linux/cleanup.h>
> #include <linux/clk.h>
> #include <linux/delay.h>
> #include <linux/dma-mapping.h>
> @@ -631,10 +632,10 @@ struct panthor_group {
> struct panthor_gpu_usage data;
>
> /**
> - * @lock: Mutex to govern concurrent access from drm file's fdinfo callback
> - * and job post-completion processing function
> + * @fdinfo.lock: Spinlock to govern concurrent access from drm file's fdinfo
> + * callback and job post-completion processing function
> */
> - struct mutex lock;
> + spinlock_t lock;
>
> /** @fdinfo.kbo_sizes: Aggregate size of private kernel BO's held by the group. */
> size_t kbo_sizes;
> @@ -910,8 +911,6 @@ static void group_release_work(struct work_struct *work)
> release_work);
> u32 i;
>
> - mutex_destroy(&group->fdinfo.lock);
> -
> for (i = 0; i < group->queue_count; i++)
> group_free_queue(group, group->queues[i]);
>
> @@ -2861,12 +2860,12 @@ static void update_fdinfo_stats(struct panthor_job *job)
> struct panthor_job_profiling_data *slots = queue->profiling.slots->kmap;
> struct panthor_job_profiling_data *data = &slots[job->profiling.slot];
>
> - mutex_lock(&group->fdinfo.lock);
> - if (job->profiling.mask & PANTHOR_DEVICE_PROFILING_CYCLES)
> - fdinfo->cycles += data->cycles.after - data->cycles.before;
> - if (job->profiling.mask & PANTHOR_DEVICE_PROFILING_TIMESTAMP)
> - fdinfo->time += data->time.after - data->time.before;
> - mutex_unlock(&group->fdinfo.lock);
> + scoped_guard(spinlock, &group->fdinfo.lock) {
> + if (job->profiling.mask & PANTHOR_DEVICE_PROFILING_CYCLES)
> + fdinfo->cycles += data->cycles.after - data->cycles.before;
> + if (job->profiling.mask & PANTHOR_DEVICE_PROFILING_TIMESTAMP)
> + fdinfo->time += data->time.after - data->time.before;
> + }
> }
>
> void panthor_fdinfo_gather_group_samples(struct panthor_file *pfile)
> @@ -2880,12 +2879,11 @@ void panthor_fdinfo_gather_group_samples(struct panthor_file *pfile)
>
> xa_lock(&gpool->xa);
> xa_for_each(&gpool->xa, i, group) {
> - mutex_lock(&group->fdinfo.lock);
> + guard(spinlock)(&group->fdinfo.lock);
> pfile->stats.cycles += group->fdinfo.data.cycles;
> pfile->stats.time += group->fdinfo.data.time;
> group->fdinfo.data.cycles = 0;
> group->fdinfo.data.time = 0;
> - mutex_unlock(&group->fdinfo.lock);
> }
> xa_unlock(&gpool->xa);
> }
> @@ -3537,7 +3535,7 @@ int panthor_group_create(struct panthor_file *pfile,
> mutex_unlock(&sched->reset.lock);
>
> add_group_kbo_sizes(group->ptdev, group);
> - mutex_init(&group->fdinfo.lock);
> + spin_lock_init(&group->fdinfo.lock);
>
> return gid;
>
^ permalink raw reply [flat|nested] 4+ messages in thread