AMD-GFX Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: "Christian König" <christian.koenig@amd.com>
To: Sunil Khatri <sunil.khatri@amd.com>,
	Alex Deucher <alexander.deucher@amd.com>
Cc: amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu/userq: fix access to stale wptr mapping
Date: Mon, 4 May 2026 15:05:44 +0200	[thread overview]
Message-ID: <ff00838c-128b-40be-b215-4f21012376cd@amd.com> (raw)
In-Reply-To: <20260504125645.91836-1-sunil.khatri@amd.com>

On 5/4/26 14:56, Sunil Khatri wrote:
> Use drm_exec to take both locks i.e vm root bo and
> wptr_obj bo to access the mapping data properly.
> 
> This fixes the security issue of unmap the wptr_obj while
> a queue creation is in progress and passing other
> bo at same address.
> 
> Signed-off-by: Sunil Khatri <sunil.khatri@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 122 ++++++++++-----------
>  1 file changed, 57 insertions(+), 65 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> index 501e2e10b4a6..3d4f83015488 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> @@ -30,34 +30,6 @@
>  #define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE
>  #define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE
>  
> -static int
> -mes_userq_map_gtt_bo_to_gart(struct amdgpu_bo *bo)
> -{
> -	int ret;
> -
> -	ret = amdgpu_bo_reserve(bo, true);
> -	if (ret) {
> -		DRM_ERROR("Failed to reserve bo. ret %d\n", ret);
> -		goto err_reserve_bo_failed;
> -	}
> -
> -	ret = amdgpu_ttm_alloc_gart(&bo->tbo);
> -	if (ret) {
> -		DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret);
> -		goto err_map_bo_gart_failed;
> -	}
> -
> -	amdgpu_bo_unreserve(bo);
> -	bo = amdgpu_bo_ref(bo);
> -
> -	return 0;
> -
> -err_map_bo_gart_failed:
> -	amdgpu_bo_unreserve(bo);
> -err_reserve_bo_failed:
> -	return ret;
> -}
> -
>  static int
>  mes_userq_create_wptr_mapping(struct amdgpu_device *adev,
>  			      struct amdgpu_userq_mgr *uq_mgr,
> @@ -65,55 +37,75 @@ mes_userq_create_wptr_mapping(struct amdgpu_device *adev,
>  			      uint64_t wptr)
>  {
>  	struct amdgpu_bo_va_mapping *wptr_mapping;
> -	struct amdgpu_vm *wptr_vm;
>  	struct amdgpu_userq_obj *wptr_obj = &queue->wptr_obj;
> +	struct amdgpu_bo *obj;
> +	struct amdgpu_vm *vm = queue->vm;
> +	struct drm_exec exec;
>  	int ret;
>  
> -	wptr_vm = queue->vm;
> -	ret = amdgpu_bo_reserve(wptr_vm->root.bo, false);
> -	if (ret)
> -		return ret;
> -
>  	wptr &= AMDGPU_GMC_HOLE_MASK;
> -	wptr_mapping = amdgpu_vm_bo_lookup_mapping(wptr_vm, wptr >> PAGE_SHIFT);
> -	amdgpu_bo_unreserve(wptr_vm->root.bo);
> -	if (!wptr_mapping) {
> -		DRM_ERROR("Failed to lookup wptr bo\n");
> -		return -EINVAL;
> -	}
>  
> -	wptr_obj->obj = wptr_mapping->bo_va->base.bo;
> -	if (wptr_obj->obj->tbo.base.size > PAGE_SIZE) {
> -		DRM_ERROR("Requested GART mapping for wptr bo larger than one page\n");
> -		return -EINVAL;
> -	}
> +	drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);

This should probably be 2 instead of 0.

> +	drm_exec_until_all_locked(&exec) {
> +		ret = amdgpu_vm_lock_pd(vm, &exec, 1);
> +		drm_exec_retry_on_contention(&exec);
> +		if (unlikely(ret))
> +			goto fail_lock;
> +
> +		wptr_mapping = amdgpu_vm_bo_lookup_mapping(vm, wptr >> PAGE_SHIFT);
> +		if (!wptr_mapping) {
> +			DRM_ERROR("Failed to lock up wptr bo\n");

Please drop that error message. It can spam the logs when userspace intentionally gives incorrect values.

> +			ret = -EINVAL;
> +			goto fail_lock;
> +		}
>  
> -	ret = mes_userq_map_gtt_bo_to_gart(wptr_obj->obj);
> -	if (ret) {
> -		DRM_ERROR("Failed to map wptr bo to GART\n");
> -		return ret;
> -	}
> +		obj = wptr_mapping->bo_va->base.bo;
> +		ret = drm_exec_prepare_obj(&exec, &obj->tbo.base, 1);

Using drm_exec_lock_obj() should be sufficient.

We don't need a fence slot for this use case.

> +		drm_exec_retry_on_contention(&exec);
> +		if (unlikely(ret)) {
> +			DRM_ERROR("Failed to prepare wptr bo\n");

Same here, that this can fail is normal handling. The worst case is OOM and that is already printed in the logs.

> +			goto fail_lock;
> +		}

We got all the locks now, so the drm_exec_until_all_locked() loop can be closed here.

>  
> -	ret = amdgpu_bo_reserve(wptr_obj->obj, true);
> -	if (ret) {
> -		DRM_ERROR("Failed to reserve wptr bo\n");
> -		return ret;
> -	}


> +		/* mapping now should be stable since both the locks are held */
> +		wptr_mapping = amdgpu_vm_bo_lookup_mapping(vm, wptr >> PAGE_SHIFT);
> +		if (!wptr_mapping) {
> +			DRM_ERROR("Failed to lock up wptr bo\n");
> +			ret = -EINVAL;
> +			goto fail_lock;
> +		}

Doing that again is unecessary. We are holding the VM lock above while doing the lockup.

>  
> -	/* TODO use eviction fence instead of pinning. */
> -	ret = amdgpu_bo_pin(wptr_obj->obj, AMDGPU_GEM_DOMAIN_GTT);
> -	if (ret) {
> -		drm_file_err(uq_mgr->file, "[Usermode queues] Failed to pin wptr bo\n");
> -		goto unresv_bo;
> -	}
> +		wptr_obj->obj = amdgpu_bo_ref(wptr_mapping->bo_va->base.bo);

That is now unecessary as well.

> +
> +		if (wptr_obj->obj->tbo.base.size > PAGE_SIZE) {
> +			DRM_ERROR("Requested wptr bo size is larger than one page\n");

Same, please drop that error message.

> +			ret = -EINVAL;
> +			goto fail_map;
> +		}
> +
> +		ret = amdgpu_ttm_alloc_gart(&wptr_obj->obj->tbo);
> +		if (ret) {
> +			DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret);

That error message is useful.

> +			goto fail_map;
> +		}
> +
> +		/* TODO use eviction fence instead of pinning. */
> +		ret = amdgpu_bo_pin(wptr_obj->obj, AMDGPU_GEM_DOMAIN_GTT);

Oh! The piun needs to come before the alloc_gart! That's wrong in the existing code as well.

> +		if (ret) {
> +			DRM_ERROR("Failed to pin wptr bo. ret %d\n", ret);

That error message makes sense.

Regards,
Christian.

> +			goto fail_map;
> +		}
>  
> -	queue->wptr_obj.gpu_addr = amdgpu_bo_gpu_offset(wptr_obj->obj);
> -	amdgpu_bo_unreserve(wptr_obj->obj);
> +		queue->wptr_obj.gpu_addr = amdgpu_bo_gpu_offset(wptr_obj->obj);
> +	}
>  
> +	drm_exec_fini(&exec);
>  	return 0;
>  
> -unresv_bo:
> -	amdgpu_bo_unreserve(wptr_obj->obj);
> +fail_map:
> +	amdgpu_bo_unref(&wptr_obj->obj);
> +fail_lock:
> +	drm_exec_fini(&exec);
>  	return ret;
>  
>  }


  reply	other threads:[~2026-05-04 13:05 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-04 12:56 [PATCH] drm/amdgpu/userq: fix access to stale wptr mapping Sunil Khatri
2026-05-04 13:05 ` Christian König [this message]
2026-05-05  7:29   ` Khatri, Sunil
2026-05-05  7:54     ` Christian König

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ff00838c-128b-40be-b215-4f21012376cd@amd.com \
    --to=christian.koenig@amd.com \
    --cc=alexander.deucher@amd.com \
    --cc=amd-gfx@lists.freedesktop.org \
    --cc=sunil.khatri@amd.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox