AMD-GFX Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: "Khatri, Sunil" <sukhatri@amd.com>
To: "Christian König" <christian.koenig@amd.com>,
	"Sunil Khatri" <sunil.khatri@amd.com>,
	"Alex Deucher" <alexander.deucher@amd.com>
Cc: amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu/userq: fix access to stale wptr mapping
Date: Tue, 5 May 2026 12:59:37 +0530	[thread overview]
Message-ID: <e9078a36-09fb-4a99-ae56-d513fa787b40@amd.com> (raw)
In-Reply-To: <ff00838c-128b-40be-b215-4f21012376cd@amd.com>


On 04-05-2026 06:35 pm, Christian König wrote:
> On 5/4/26 14:56, Sunil Khatri wrote:
>> Use drm_exec to take both locks i.e vm root bo and
>> wptr_obj bo to access the mapping data properly.
>>
>> This fixes the security issue of unmap the wptr_obj while
>> a queue creation is in progress and passing other
>> bo at same address.
>>
>> Signed-off-by: Sunil Khatri <sunil.khatri@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 122 ++++++++++-----------
>>   1 file changed, 57 insertions(+), 65 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
>> index 501e2e10b4a6..3d4f83015488 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
>> @@ -30,34 +30,6 @@
>>   #define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE
>>   #define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE
>>   
>> -static int
>> -mes_userq_map_gtt_bo_to_gart(struct amdgpu_bo *bo)
>> -{
>> -	int ret;
>> -
>> -	ret = amdgpu_bo_reserve(bo, true);
>> -	if (ret) {
>> -		DRM_ERROR("Failed to reserve bo. ret %d\n", ret);
>> -		goto err_reserve_bo_failed;
>> -	}
>> -
>> -	ret = amdgpu_ttm_alloc_gart(&bo->tbo);
>> -	if (ret) {
>> -		DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret);
>> -		goto err_map_bo_gart_failed;
>> -	}
>> -
>> -	amdgpu_bo_unreserve(bo);
>> -	bo = amdgpu_bo_ref(bo);
>> -
>> -	return 0;
>> -
>> -err_map_bo_gart_failed:
>> -	amdgpu_bo_unreserve(bo);
>> -err_reserve_bo_failed:
>> -	return ret;
>> -}
>> -
>>   static int
>>   mes_userq_create_wptr_mapping(struct amdgpu_device *adev,
>>   			      struct amdgpu_userq_mgr *uq_mgr,
>> @@ -65,55 +37,75 @@ mes_userq_create_wptr_mapping(struct amdgpu_device *adev,
>>   			      uint64_t wptr)
>>   {
>>   	struct amdgpu_bo_va_mapping *wptr_mapping;
>> -	struct amdgpu_vm *wptr_vm;
>>   	struct amdgpu_userq_obj *wptr_obj = &queue->wptr_obj;
>> +	struct amdgpu_bo *obj;
>> +	struct amdgpu_vm *vm = queue->vm;
>> +	struct drm_exec exec;
>>   	int ret;
>>   
>> -	wptr_vm = queue->vm;
>> -	ret = amdgpu_bo_reserve(wptr_vm->root.bo, false);
>> -	if (ret)
>> -		return ret;
>> -
>>   	wptr &= AMDGPU_GMC_HOLE_MASK;
>> -	wptr_mapping = amdgpu_vm_bo_lookup_mapping(wptr_vm, wptr >> PAGE_SHIFT);
>> -	amdgpu_bo_unreserve(wptr_vm->root.bo);
>> -	if (!wptr_mapping) {
>> -		DRM_ERROR("Failed to lookup wptr bo\n");
>> -		return -EINVAL;
>> -	}
>>   
>> -	wptr_obj->obj = wptr_mapping->bo_va->base.bo;
>> -	if (wptr_obj->obj->tbo.base.size > PAGE_SIZE) {
>> -		DRM_ERROR("Requested GART mapping for wptr bo larger than one page\n");
>> -		return -EINVAL;
>> -	}
>> +	drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
> This should probably be 2 instead of 0.
Noted
>
>> +	drm_exec_until_all_locked(&exec) {
>> +		ret = amdgpu_vm_lock_pd(vm, &exec, 1);
>> +		drm_exec_retry_on_contention(&exec);
>> +		if (unlikely(ret))
>> +			goto fail_lock;
>> +
>> +		wptr_mapping = amdgpu_vm_bo_lookup_mapping(vm, wptr >> PAGE_SHIFT);
>> +		if (!wptr_mapping) {
>> +			DRM_ERROR("Failed to lock up wptr bo\n");
> Please drop that error message. It can spam the logs when userspace intentionally gives incorrect values.
Noted
>
>> +			ret = -EINVAL;
>> +			goto fail_lock;
>> +		}
>>   
>> -	ret = mes_userq_map_gtt_bo_to_gart(wptr_obj->obj);
>> -	if (ret) {
>> -		DRM_ERROR("Failed to map wptr bo to GART\n");
>> -		return ret;
>> -	}
>> +		obj = wptr_mapping->bo_va->base.bo;
>> +		ret = drm_exec_prepare_obj(&exec, &obj->tbo.base, 1);
> Using drm_exec_lock_obj() should be sufficient.
>
> We don't need a fence slot for this use case.
Sure
>
>> +		drm_exec_retry_on_contention(&exec);
>> +		if (unlikely(ret)) {
>> +			DRM_ERROR("Failed to prepare wptr bo\n");
> Same here, that this can fail is normal handling. The worst case is OOM and that is already printed in the logs.
Noted
>
>> +			goto fail_lock;
>> +		}
> We got all the locks now, so the drm_exec_until_all_locked() loop can be closed here.
True, Noted
>
>>   
>> -	ret = amdgpu_bo_reserve(wptr_obj->obj, true);
>> -	if (ret) {
>> -		DRM_ERROR("Failed to reserve wptr bo\n");
>> -		return ret;
>> -	}
>
>> +		/* mapping now should be stable since both the locks are held */
>> +		wptr_mapping = amdgpu_vm_bo_lookup_mapping(vm, wptr >> PAGE_SHIFT);
>> +		if (!wptr_mapping) {
>> +			DRM_ERROR("Failed to lock up wptr bo\n");
>> +			ret = -EINVAL;
>> +			goto fail_lock;
>> +		}
> Doing that again is unecessary. We are holding the VM lock above while doing the lockup.

First time we had mapping at time we did not had wptr object lock. Isnt 
it possible that either mapping is changed/updated or the bo is freed. 
At this moment we have both the locks and we

should be having correct mapping.

>
>>   
>> -	/* TODO use eviction fence instead of pinning. */
>> -	ret = amdgpu_bo_pin(wptr_obj->obj, AMDGPU_GEM_DOMAIN_GTT);
>> -	if (ret) {
>> -		drm_file_err(uq_mgr->file, "[Usermode queues] Failed to pin wptr bo\n");
>> -		goto unresv_bo;
>> -	}
>> +		wptr_obj->obj = amdgpu_bo_ref(wptr_mapping->bo_va->base.bo);
> That is now unecessary as well.

We are holding the reference in original code and freeing it in destroy. 
Dont we want to hold any reference now, how do we make sure that its not 
being freed in between?

If we dont need a reference then we need to remove from the destroy path 
too. In amdgpu_userq_destroy we are doing unpin and unref both db_obj 
and wptr_obj.

>
>> +
>> +		if (wptr_obj->obj->tbo.base.size > PAGE_SIZE) {
>> +			DRM_ERROR("Requested wptr bo size is larger than one page\n");
> Same, please drop that error message.
Noted
>
>> +			ret = -EINVAL;
>> +			goto fail_map;
>> +		}
>> +
>> +		ret = amdgpu_ttm_alloc_gart(&wptr_obj->obj->tbo);
>> +		if (ret) {
>> +			DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret);
> That error message is useful.
Got it.
>
>> +			goto fail_map;
>> +		}
>> +
>> +		/* TODO use eviction fence instead of pinning. */
>> +		ret = amdgpu_bo_pin(wptr_obj->obj, AMDGPU_GEM_DOMAIN_GTT);
> Oh! The piun needs to come before the alloc_gart! That's wrong in the existing code as well.
Got it.
>
>> +		if (ret) {
>> +			DRM_ERROR("Failed to pin wptr bo. ret %d\n", ret);
> That error message makes sense.
regards
Sunil Khatri

>
> Regards,
> Christian.
>
>> +			goto fail_map;
>> +		}
>>   
>> -	queue->wptr_obj.gpu_addr = amdgpu_bo_gpu_offset(wptr_obj->obj);
>> -	amdgpu_bo_unreserve(wptr_obj->obj);
>> +		queue->wptr_obj.gpu_addr = amdgpu_bo_gpu_offset(wptr_obj->obj);
>> +	}
>>   
>> +	drm_exec_fini(&exec);
>>   	return 0;
>>   
>> -unresv_bo:
>> -	amdgpu_bo_unreserve(wptr_obj->obj);
>> +fail_map:
>> +	amdgpu_bo_unref(&wptr_obj->obj);
>> +fail_lock:
>> +	drm_exec_fini(&exec);
>>   	return ret;
>>   
>>   }

  reply	other threads:[~2026-05-05  7:29 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-04 12:56 [PATCH] drm/amdgpu/userq: fix access to stale wptr mapping Sunil Khatri
2026-05-04 13:05 ` Christian König
2026-05-05  7:29   ` Khatri, Sunil [this message]
2026-05-05  7:54     ` Christian König

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=e9078a36-09fb-4a99-ae56-d513fa787b40@amd.com \
    --to=sukhatri@amd.com \
    --cc=alexander.deucher@amd.com \
    --cc=amd-gfx@lists.freedesktop.org \
    --cc=christian.koenig@amd.com \
    --cc=sunil.khatri@amd.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox