From: "Khatri, Sunil" <sukhatri@amd.com>
To: "Christian König" <ckoenig.leichtzumerken@gmail.com>,
alexander.deucher@amd.com, Prike.Liang@amd.com,
amd-gfx@lists.freedesktop.org
Cc: christian.koenig@amd.com
Subject: Re: [PATCH 09/11] drm/amdgpu: revert to old status lock handling v4
Date: Thu, 23 Apr 2026 16:15:37 +0530 [thread overview]
Message-ID: <3cd5634b-e715-4e92-8e21-26758f790458@amd.com> (raw)
In-Reply-To: <20260421125513.4545-9-christian.koenig@amd.com>
Not sure if anything changes in this version if not its already reviewed.
regards
Sunil Khatri
On 21-04-2026 06:25 pm, Christian König wrote:
> It turned out that protecting the status of each bo_va with a
> spinlock was just hiding problems instead of solving them.
>
> Revert the whole approach, add a separate stats_lock and lockdep
> assertions that the correct reservation lock is held all over the place.
>
> This not only allows for better checks if a state transition is properly
> protected by a lock, but also switching back to using list macros to
> iterate over the state of lists protected by the dma_resv lock of the
> root PD.
>
> v2: re-add missing check
> v3: split into two patches
> v4: re-apply by fixing holding the VM lock at the right places.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
> Reviewed-by: Sunil Khatri <sunil.khatri@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 8 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 146 ++++++++--------------
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 15 ++-
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 4 -
> 4 files changed, 68 insertions(+), 105 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> index ad6dac17dd21..7fc733ba962e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> @@ -1048,12 +1048,12 @@ amdgpu_userq_bo_validate(struct amdgpu_device *adev, struct drm_exec *exec,
> struct amdgpu_bo *bo;
> int ret;
>
> - spin_lock(&vm->status_lock);
> + spin_lock(&vm->invalidated_lock);
> while (!list_empty(&vm->invalidated)) {
> bo_va = list_first_entry(&vm->invalidated,
> struct amdgpu_bo_va,
> base.vm_status);
> - spin_unlock(&vm->status_lock);
> + spin_unlock(&vm->invalidated_lock);
>
> bo = bo_va->base.bo;
> ret = drm_exec_prepare_obj(exec, &bo->tbo.base, 2);
> @@ -1070,9 +1070,9 @@ amdgpu_userq_bo_validate(struct amdgpu_device *adev, struct drm_exec *exec,
> if (ret)
> return ret;
>
> - spin_lock(&vm->status_lock);
> + spin_lock(&vm->invalidated_lock);
> }
> - spin_unlock(&vm->status_lock);
> + spin_unlock(&vm->invalidated_lock);
>
> return 0;
> }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 63156289ae7f..e2a21a66b28f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -167,12 +167,10 @@ static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo)
>
> vm_bo->moved = true;
> amdgpu_vm_assert_locked(vm);
> - spin_lock(&vm_bo->vm->status_lock);
> if (bo->tbo.type == ttm_bo_type_kernel)
> list_move(&vm_bo->vm_status, &vm->evicted);
> else
> list_move_tail(&vm_bo->vm_status, &vm->evicted);
> - spin_unlock(&vm_bo->vm->status_lock);
> }
> /**
> * amdgpu_vm_bo_moved - vm_bo is moved
> @@ -185,9 +183,7 @@ static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo)
> static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo)
> {
> amdgpu_vm_assert_locked(vm_bo->vm);
> - spin_lock(&vm_bo->vm->status_lock);
> list_move(&vm_bo->vm_status, &vm_bo->vm->moved);
> - spin_unlock(&vm_bo->vm->status_lock);
> }
>
> /**
> @@ -201,9 +197,7 @@ static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo)
> static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo)
> {
> amdgpu_vm_assert_locked(vm_bo->vm);
> - spin_lock(&vm_bo->vm->status_lock);
> list_move(&vm_bo->vm_status, &vm_bo->vm->idle);
> - spin_unlock(&vm_bo->vm->status_lock);
> vm_bo->moved = false;
> }
>
> @@ -217,9 +211,9 @@ static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo)
> */
> static void amdgpu_vm_bo_invalidated(struct amdgpu_vm_bo_base *vm_bo)
> {
> - spin_lock(&vm_bo->vm->status_lock);
> + spin_lock(&vm_bo->vm->invalidated_lock);
> list_move(&vm_bo->vm_status, &vm_bo->vm->invalidated);
> - spin_unlock(&vm_bo->vm->status_lock);
> + spin_unlock(&vm_bo->vm->invalidated_lock);
> }
>
> /**
> @@ -232,10 +226,9 @@ static void amdgpu_vm_bo_invalidated(struct amdgpu_vm_bo_base *vm_bo)
> */
> static void amdgpu_vm_bo_evicted_user(struct amdgpu_vm_bo_base *vm_bo)
> {
> + amdgpu_vm_assert_locked(vm_bo->vm);
> vm_bo->moved = true;
> - spin_lock(&vm_bo->vm->status_lock);
> list_move(&vm_bo->vm_status, &vm_bo->vm->evicted_user);
> - spin_unlock(&vm_bo->vm->status_lock);
> }
>
> /**
> @@ -249,13 +242,10 @@ static void amdgpu_vm_bo_evicted_user(struct amdgpu_vm_bo_base *vm_bo)
> static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo)
> {
> amdgpu_vm_assert_locked(vm_bo->vm);
> - if (vm_bo->bo->parent) {
> - spin_lock(&vm_bo->vm->status_lock);
> + if (vm_bo->bo->parent)
> list_move(&vm_bo->vm_status, &vm_bo->vm->relocated);
> - spin_unlock(&vm_bo->vm->status_lock);
> - } else {
> + else
> amdgpu_vm_bo_idle(vm_bo);
> - }
> }
>
> /**
> @@ -269,9 +259,7 @@ static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo)
> static void amdgpu_vm_bo_done(struct amdgpu_vm_bo_base *vm_bo)
> {
> amdgpu_vm_assert_locked(vm_bo->vm);
> - spin_lock(&vm_bo->vm->status_lock);
> list_move(&vm_bo->vm_status, &vm_bo->vm->done);
> - spin_unlock(&vm_bo->vm->status_lock);
> }
>
> /**
> @@ -285,13 +273,13 @@ static void amdgpu_vm_bo_reset_state_machine(struct amdgpu_vm *vm)
> {
> struct amdgpu_vm_bo_base *vm_bo, *tmp;
>
> - amdgpu_vm_assert_locked(vm);
> -
> - spin_lock(&vm->status_lock);
> + spin_lock(&vm->invalidated_lock);
> list_splice_init(&vm->done, &vm->invalidated);
> list_for_each_entry(vm_bo, &vm->invalidated, vm_status)
> vm_bo->moved = true;
> + spin_unlock(&vm->invalidated_lock);
>
> + amdgpu_vm_assert_locked(vm);
> list_for_each_entry_safe(vm_bo, tmp, &vm->idle, vm_status) {
> struct amdgpu_bo *bo = vm_bo->bo;
>
> @@ -301,14 +289,13 @@ static void amdgpu_vm_bo_reset_state_machine(struct amdgpu_vm *vm)
> else if (bo->parent)
> list_move(&vm_bo->vm_status, &vm_bo->vm->relocated);
> }
> - spin_unlock(&vm->status_lock);
> }
>
> /**
> * amdgpu_vm_update_shared - helper to update shared memory stat
> * @base: base structure for tracking BO usage in a VM
> *
> - * Takes the vm status_lock and updates the shared memory stat. If the basic
> + * Takes the vm stats_lock and updates the shared memory stat. If the basic
> * stat changed (e.g. buffer was moved) amdgpu_vm_update_stats need to be called
> * as well.
> */
> @@ -321,7 +308,7 @@ static void amdgpu_vm_update_shared(struct amdgpu_vm_bo_base *base)
> bool shared;
>
> dma_resv_assert_held(bo->tbo.base.resv);
> - spin_lock(&vm->status_lock);
> + spin_lock(&vm->stats_lock);
> shared = drm_gem_object_is_shared_for_memory_stats(&bo->tbo.base);
> if (base->shared != shared) {
> base->shared = shared;
> @@ -333,7 +320,7 @@ static void amdgpu_vm_update_shared(struct amdgpu_vm_bo_base *base)
> vm->stats[bo_memtype].drm.private += size;
> }
> }
> - spin_unlock(&vm->status_lock);
> + spin_unlock(&vm->stats_lock);
> }
>
> /**
> @@ -358,11 +345,11 @@ void amdgpu_vm_bo_update_shared(struct amdgpu_bo *bo)
> * be bo->tbo.resource
> * @sign: if we should add (+1) or subtract (-1) from the stat
> *
> - * Caller need to have the vm status_lock held. Useful for when multiple update
> + * Caller need to have the vm stats_lock held. Useful for when multiple update
> * need to happen at the same time.
> */
> static void amdgpu_vm_update_stats_locked(struct amdgpu_vm_bo_base *base,
> - struct ttm_resource *res, int sign)
> + struct ttm_resource *res, int sign)
> {
> struct amdgpu_vm *vm = base->vm;
> struct amdgpu_bo *bo = base->bo;
> @@ -386,7 +373,8 @@ static void amdgpu_vm_update_stats_locked(struct amdgpu_vm_bo_base *base,
> */
> if (bo->flags & AMDGPU_GEM_CREATE_DISCARDABLE)
> vm->stats[res_memtype].drm.purgeable += size;
> - if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(res_memtype)))
> + if (!(bo->preferred_domains &
> + amdgpu_mem_type_to_domain(res_memtype)))
> vm->stats[bo_memtype].evicted += size;
> }
> }
> @@ -405,9 +393,9 @@ void amdgpu_vm_update_stats(struct amdgpu_vm_bo_base *base,
> {
> struct amdgpu_vm *vm = base->vm;
>
> - spin_lock(&vm->status_lock);
> + spin_lock(&vm->stats_lock);
> amdgpu_vm_update_stats_locked(base, res, sign);
> - spin_unlock(&vm->status_lock);
> + spin_unlock(&vm->stats_lock);
> }
>
> /**
> @@ -433,10 +421,10 @@ void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
> base->next = bo->vm_bo;
> bo->vm_bo = base;
>
> - spin_lock(&vm->status_lock);
> + spin_lock(&vm->stats_lock);
> base->shared = drm_gem_object_is_shared_for_memory_stats(&bo->tbo.base);
> amdgpu_vm_update_stats_locked(base, bo->tbo.resource, +1);
> - spin_unlock(&vm->status_lock);
> + spin_unlock(&vm->stats_lock);
>
> if (!amdgpu_vm_is_bo_always_valid(vm, bo))
> return;
> @@ -495,25 +483,25 @@ int amdgpu_vm_lock_done_list(struct amdgpu_vm *vm, struct drm_exec *exec,
> int ret;
>
> /* We can only trust prev->next while holding the lock */
> - spin_lock(&vm->status_lock);
> + spin_lock(&vm->invalidated_lock);
> while (!list_is_head(prev->next, &vm->done)) {
> bo_va = list_entry(prev->next, typeof(*bo_va), base.vm_status);
>
> bo = bo_va->base.bo;
> if (bo) {
> amdgpu_bo_ref(bo);
> - spin_unlock(&vm->status_lock);
> + spin_unlock(&vm->invalidated_lock);
>
> ret = drm_exec_prepare_obj(exec, &bo->tbo.base, 1);
> amdgpu_bo_unref(&bo);
> if (unlikely(ret))
> return ret;
>
> - spin_lock(&vm->status_lock);
> + spin_lock(&vm->invalidated_lock);
> }
> prev = prev->next;
> }
> - spin_unlock(&vm->status_lock);
> + spin_unlock(&vm->invalidated_lock);
>
> return 0;
> }
> @@ -609,7 +597,7 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> void *param)
> {
> uint64_t new_vm_generation = amdgpu_vm_generation(adev, vm);
> - struct amdgpu_vm_bo_base *bo_base;
> + struct amdgpu_vm_bo_base *bo_base, *tmp;
> struct amdgpu_bo *bo;
> int r;
>
> @@ -622,13 +610,7 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> return r;
> }
>
> - spin_lock(&vm->status_lock);
> - while (!list_empty(&vm->evicted)) {
> - bo_base = list_first_entry(&vm->evicted,
> - struct amdgpu_vm_bo_base,
> - vm_status);
> - spin_unlock(&vm->status_lock);
> -
> + list_for_each_entry_safe(bo_base, tmp, &vm->evicted, vm_status) {
> bo = bo_base->bo;
>
> r = validate(param, bo);
> @@ -641,26 +623,21 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> vm->update_funcs->map_table(to_amdgpu_bo_vm(bo));
> amdgpu_vm_bo_relocated(bo_base);
> }
> - spin_lock(&vm->status_lock);
> }
> - while (ticket && !list_empty(&vm->evicted_user)) {
> - bo_base = list_first_entry(&vm->evicted_user,
> - struct amdgpu_vm_bo_base,
> - vm_status);
> - spin_unlock(&vm->status_lock);
>
> - bo = bo_base->bo;
> - dma_resv_assert_held(bo->tbo.base.resv);
> + if (ticket) {
> + list_for_each_entry_safe(bo_base, tmp, &vm->evicted_user,
> + vm_status) {
> + bo = bo_base->bo;
> + dma_resv_assert_held(bo->tbo.base.resv);
>
> - r = validate(param, bo);
> - if (r)
> - return r;
> -
> - amdgpu_vm_bo_invalidated(bo_base);
> + r = validate(param, bo);
> + if (r)
> + return r;
>
> - spin_lock(&vm->status_lock);
> + amdgpu_vm_bo_invalidated(bo_base);
> + }
> }
> - spin_unlock(&vm->status_lock);
>
> amdgpu_vm_eviction_lock(vm);
> vm->evicting = false;
> @@ -689,9 +666,7 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm)
> ret = !vm->evicting;
> amdgpu_vm_eviction_unlock(vm);
>
> - spin_lock(&vm->status_lock);
> ret &= list_empty(&vm->evicted);
> - spin_unlock(&vm->status_lock);
>
> spin_lock(&vm->immediate.lock);
> ret &= !vm->immediate.stopped;
> @@ -985,18 +960,13 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
> struct amdgpu_vm *vm, bool immediate)
> {
> struct amdgpu_vm_update_params params;
> - struct amdgpu_vm_bo_base *entry;
> + struct amdgpu_vm_bo_base *entry, *tmp;
> bool flush_tlb_needed = false;
> - LIST_HEAD(relocated);
> int r, idx;
>
> amdgpu_vm_assert_locked(vm);
>
> - spin_lock(&vm->status_lock);
> - list_splice_init(&vm->relocated, &relocated);
> - spin_unlock(&vm->status_lock);
> -
> - if (list_empty(&relocated))
> + if (list_empty(&vm->relocated))
> return 0;
>
> if (!drm_dev_enter(adev_to_drm(adev), &idx))
> @@ -1012,7 +982,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
> if (r)
> goto error;
>
> - list_for_each_entry(entry, &relocated, vm_status) {
> + list_for_each_entry(entry, &vm->relocated, vm_status) {
> /* vm_flush_needed after updating moved PDEs */
> flush_tlb_needed |= entry->moved;
>
> @@ -1028,9 +998,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
> if (flush_tlb_needed)
> atomic64_inc(&vm->tlb_seq);
>
> - while (!list_empty(&relocated)) {
> - entry = list_first_entry(&relocated, struct amdgpu_vm_bo_base,
> - vm_status);
> + list_for_each_entry_safe(entry, tmp, &vm->relocated, vm_status) {
> amdgpu_vm_bo_idle(entry);
> }
>
> @@ -1260,9 +1228,9 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> void amdgpu_vm_get_memory(struct amdgpu_vm *vm,
> struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM])
> {
> - spin_lock(&vm->status_lock);
> + spin_lock(&vm->stats_lock);
> memcpy(stats, vm->stats, sizeof(*stats) * __AMDGPU_PL_NUM);
> - spin_unlock(&vm->status_lock);
> + spin_unlock(&vm->stats_lock);
> }
>
> /**
> @@ -1629,29 +1597,24 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
> struct amdgpu_vm *vm,
> struct ww_acquire_ctx *ticket)
> {
> - struct amdgpu_bo_va *bo_va;
> + struct amdgpu_bo_va *bo_va, *tmp;
> struct dma_resv *resv;
> bool clear, unlock;
> int r;
>
> - spin_lock(&vm->status_lock);
> - while (!list_empty(&vm->moved)) {
> - bo_va = list_first_entry(&vm->moved, struct amdgpu_bo_va,
> - base.vm_status);
> - spin_unlock(&vm->status_lock);
> -
> + list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) {
> /* Per VM BOs never need to bo cleared in the page tables */
> r = amdgpu_vm_bo_update(adev, bo_va, false);
> if (r)
> return r;
> - spin_lock(&vm->status_lock);
> }
>
> + spin_lock(&vm->invalidated_lock);
> while (!list_empty(&vm->invalidated)) {
> bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va,
> base.vm_status);
> resv = bo_va->base.bo->tbo.base.resv;
> - spin_unlock(&vm->status_lock);
> + spin_unlock(&vm->invalidated_lock);
>
> /* Try to reserve the BO to avoid clearing its ptes */
> if (!adev->debug_vm && dma_resv_trylock(resv)) {
> @@ -1683,9 +1646,9 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
> bo_va->base.bo->tbo.resource->mem_type == TTM_PL_SYSTEM))
> amdgpu_vm_bo_evicted_user(&bo_va->base);
>
> - spin_lock(&vm->status_lock);
> + spin_lock(&vm->invalidated_lock);
> }
> - spin_unlock(&vm->status_lock);
> + spin_unlock(&vm->invalidated_lock);
>
> return 0;
> }
> @@ -2223,9 +2186,9 @@ void amdgpu_vm_bo_del(struct amdgpu_device *adev,
> }
> }
>
> - spin_lock(&vm->status_lock);
> + spin_lock(&vm->invalidated_lock);
> list_del(&bo_va->base.vm_status);
> - spin_unlock(&vm->status_lock);
> + spin_unlock(&vm->invalidated_lock);
>
> list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
> list_del(&mapping->list);
> @@ -2333,10 +2296,10 @@ void amdgpu_vm_bo_move(struct amdgpu_bo *bo, struct ttm_resource *new_mem,
> for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
> struct amdgpu_vm *vm = bo_base->vm;
>
> - spin_lock(&vm->status_lock);
> + spin_lock(&vm->stats_lock);
> amdgpu_vm_update_stats_locked(bo_base, bo->tbo.resource, -1);
> amdgpu_vm_update_stats_locked(bo_base, new_mem, +1);
> - spin_unlock(&vm->status_lock);
> + spin_unlock(&vm->stats_lock);
> }
>
> amdgpu_vm_bo_invalidate(bo, evicted);
> @@ -2608,11 +2571,12 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> INIT_LIST_HEAD(&vm->relocated);
> INIT_LIST_HEAD(&vm->moved);
> INIT_LIST_HEAD(&vm->idle);
> + spin_lock_init(&vm->invalidated_lock);
> INIT_LIST_HEAD(&vm->invalidated);
> - spin_lock_init(&vm->status_lock);
> INIT_LIST_HEAD(&vm->freed);
> INIT_LIST_HEAD(&vm->done);
> INIT_KFIFO(vm->faults);
> + spin_lock_init(&vm->stats_lock);
>
> r = amdgpu_vm_init_entities(adev, vm);
> if (r)
> @@ -3105,7 +3069,6 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
>
> amdgpu_vm_assert_locked(vm);
>
> - spin_lock(&vm->status_lock);
> seq_puts(m, "\tIdle BOs:\n");
> list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status) {
> if (!bo_va->base.bo)
> @@ -3143,11 +3106,13 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
> id = 0;
>
> seq_puts(m, "\tInvalidated BOs:\n");
> + spin_lock(&vm->invalidated_lock);
> list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status) {
> if (!bo_va->base.bo)
> continue;
> total_invalidated += amdgpu_bo_print_info(id++, bo_va->base.bo, m);
> }
> + spin_unlock(&vm->invalidated_lock);
> total_invalidated_objs = id;
> id = 0;
>
> @@ -3157,7 +3122,6 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
> continue;
> total_done += amdgpu_bo_print_info(id++, bo_va->base.bo, m);
> }
> - spin_unlock(&vm->status_lock);
> total_done_objs = id;
>
> seq_printf(m, "\tTotal idle size: %12lld\tobjs:\t%d\n", total_idle,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index f33ea7f8509b..b5216bc1292f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -205,11 +205,11 @@ struct amdgpu_vm_bo_base {
> /* protected by bo being reserved */
> struct amdgpu_vm_bo_base *next;
>
> - /* protected by vm status_lock */
> + /* protected by vm reservation and invalidated_lock */
> struct list_head vm_status;
>
> /* if the bo is counted as shared in mem stats
> - * protected by vm status_lock */
> + * protected by vm BO being reserved */
> bool shared;
>
> /* protected by the BO being reserved */
> @@ -345,10 +345,8 @@ struct amdgpu_vm {
> bool evicting;
> unsigned int saved_flags;
>
> - /* Lock to protect vm_bo add/del/move on all lists of vm */
> - spinlock_t status_lock;
> -
> - /* Memory statistics for this vm, protected by status_lock */
> + /* Memory statistics for this vm, protected by stats_lock */
> + spinlock_t stats_lock;
> struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM];
>
> /*
> @@ -356,6 +354,8 @@ struct amdgpu_vm {
> * PDs, PTs or per VM BOs. The state transits are:
> *
> * evicted -> relocated (PDs, PTs) or moved (per VM BOs) -> idle
> + *
> + * Lists are protected by the root PD dma_resv lock.
> */
>
> /* Per-VM and PT BOs who needs a validation */
> @@ -376,7 +376,10 @@ struct amdgpu_vm {
> * state transits are:
> *
> * evicted_user or invalidated -> done
> + *
> + * Lists are protected by the invalidated_lock.
> */
> + spinlock_t invalidated_lock;
>
> /* BOs for user mode queues that need a validation */
> struct list_head evicted_user;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> index 31a437ce9570..7bdd664f0770 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> @@ -544,9 +544,7 @@ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
> entry->bo->vm_bo = NULL;
> ttm_bo_set_bulk_move(&entry->bo->tbo, NULL);
>
> - spin_lock(&entry->vm->status_lock);
> list_del(&entry->vm_status);
> - spin_unlock(&entry->vm->status_lock);
> amdgpu_bo_unref(&entry->bo);
> }
>
> @@ -590,7 +588,6 @@ static void amdgpu_vm_pt_add_list(struct amdgpu_vm_update_params *params,
> struct amdgpu_vm_pt_cursor seek;
> struct amdgpu_vm_bo_base *entry;
>
> - spin_lock(¶ms->vm->status_lock);
> for_each_amdgpu_vm_pt_dfs_safe(params->adev, params->vm, cursor, seek, entry) {
> if (entry && entry->bo)
> list_move(&entry->vm_status, ¶ms->tlb_flush_waitlist);
> @@ -598,7 +595,6 @@ static void amdgpu_vm_pt_add_list(struct amdgpu_vm_update_params *params,
>
> /* enter start node now */
> list_move(&cursor->entry->vm_status, ¶ms->tlb_flush_waitlist);
> - spin_unlock(¶ms->vm->status_lock);
> }
>
> /**
next prev parent reply other threads:[~2026-04-23 10:45 UTC|newest]
Thread overview: 38+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-04-21 12:55 [PATCH 01/11] drm/amdgpu: fix AMDGPU_INFO_READ_MMR_REG Christian König
2026-04-21 12:55 ` [PATCH 02/11] drm/amdgpu: remove deadlocks from amdgpu_userq_pre_reset Christian König
2026-04-22 4:53 ` Khatri, Sunil
2026-04-22 7:13 ` Christian König
2026-04-22 7:19 ` Khatri, Sunil
2026-04-22 7:24 ` Christian König
2026-04-22 7:29 ` Khatri, Sunil
2026-04-27 8:45 ` Liang, Prike
2026-04-21 12:55 ` [PATCH 03/11] drm/amdgpu: nuke amdgpu_userq_fence_free Christian König
2026-04-22 8:29 ` Khatri, Sunil
2026-04-22 9:26 ` Christian König
2026-04-22 9:40 ` Khatri, Sunil
2026-04-22 10:12 ` Christian König
2026-04-22 14:32 ` Khatri, Sunil
2026-04-27 6:21 ` Liang, Prike
2026-04-21 12:55 ` [PATCH 04/11] drm/amdgpu: rework amdgpu_userq_signal_ioctl Christian König
2026-04-22 10:08 ` Khatri, Sunil
2026-04-22 10:14 ` Christian König
2026-04-22 15:14 ` Khatri, Sunil
2026-04-23 9:58 ` Liang, Prike
2026-04-23 10:47 ` Christian König
2026-04-23 10:54 ` Khatri, Sunil
2026-04-24 8:01 ` Liang, Prike
2026-04-24 13:02 ` Christian König
2026-04-21 12:55 ` [PATCH 05/11] drm/amdgpu: rework userq fence signal processing Christian König
2026-04-22 10:16 ` Khatri, Sunil
2026-04-21 12:55 ` [PATCH 06/11] drm/amdgpu: remove almost all calls to amdgpu_userq_detect_and_reset_queues Christian König
2026-04-22 10:20 ` Khatri, Sunil
2026-04-21 12:55 ` [PATCH 07/11] drm/amdgpu: fix userq hang detection and reset Christian König
2026-04-22 10:35 ` Khatri, Sunil
2026-04-21 12:55 ` [PATCH 08/11] drm/amdgpu: rework userq reset work handling Christian König
2026-04-23 10:43 ` Khatri, Sunil
2026-04-21 12:55 ` [PATCH 09/11] drm/amdgpu: revert to old status lock handling v4 Christian König
2026-04-23 10:45 ` Khatri, Sunil [this message]
2026-04-21 12:55 ` [PATCH 10/11] drm/amdgpu: restructure VM state machine v2 Christian König
2026-04-23 10:46 ` Khatri, Sunil
2026-04-21 12:55 ` [PATCH 11/11] drm/amdgpu: WIP sync amdgpu_ttm_fill_mem only to kernel fences Christian König
2026-04-23 10:47 ` Khatri, Sunil
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=3cd5634b-e715-4e92-8e21-26758f790458@amd.com \
--to=sukhatri@amd.com \
--cc=Prike.Liang@amd.com \
--cc=alexander.deucher@amd.com \
--cc=amd-gfx@lists.freedesktop.org \
--cc=christian.koenig@amd.com \
--cc=ckoenig.leichtzumerken@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox