AMD-GFX Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/4] drm/amdgpu: fix amdgpu_hmm_range_get_pages
@ 2026-05-12 16:27 Christian König
  2026-05-12 16:27 ` [PATCH 2/4] drm/amdgpu: fix calling VM invalidation in amdgpu_hmm_invalidate_gfx Christian König
                   ` (3 more replies)
  0 siblings, 4 replies; 6+ messages in thread
From: Christian König @ 2026-05-12 16:27 UTC (permalink / raw)
  To: Alexander.Deucher, amd-gfx; +Cc: vprosyak

The notifier sequence must only be read once or otherwise we could work
with invalid pages.

While at it also fix the coding style, e.g. drop the pre-initialized
return value and use the common define for 2G range.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
index 90d26d820bac..eb470e252399 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
@@ -51,8 +51,6 @@
 #include "amdgpu_amdkfd.h"
 #include "amdgpu_hmm.h"
 
-#define MAX_WALK_BYTE	(2UL << 30)
-
 /**
  * amdgpu_hmm_invalidate_gfx - callback to notify about mm change
  *
@@ -170,11 +168,13 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
 			       void *owner,
 			       struct amdgpu_hmm_range *range)
 {
-	unsigned long end;
+	const u64 max_bytes = SZ_2G;
+
+	struct hmm_range *hmm_range = &range->hmm_range;
 	unsigned long timeout;
 	unsigned long *pfns;
-	int r = 0;
-	struct hmm_range *hmm_range = &range->hmm_range;
+	unsigned long end;
+	int r;
 
 	pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL);
 	if (unlikely(!pfns)) {
@@ -191,8 +191,9 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
 	end = start + npages * PAGE_SIZE;
 	hmm_range->dev_private_owner = owner;
 
+	hmm_range->notifier_seq = mmu_interval_read_begin(notifier);
 	do {
-		hmm_range->end = min(hmm_range->start + MAX_WALK_BYTE, end);
+		hmm_range->end = min(hmm_range->start + max_bytes, end);
 
 		pr_debug("hmm range: start = 0x%lx, end = 0x%lx",
 			hmm_range->start, hmm_range->end);
@@ -200,7 +201,6 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
 		timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
 
 retry:
-		hmm_range->notifier_seq = mmu_interval_read_begin(notifier);
 		r = hmm_range_fault(hmm_range);
 		if (unlikely(r)) {
 			if (r == -EBUSY && !time_after(jiffies, timeout))
@@ -210,7 +210,7 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
 
 		if (hmm_range->end == end)
 			break;
-		hmm_range->hmm_pfns += MAX_WALK_BYTE >> PAGE_SHIFT;
+		hmm_range->hmm_pfns += max_bytes >> PAGE_SHIFT;
 		hmm_range->start = hmm_range->end;
 	} while (hmm_range->end < end);
 
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/4] drm/amdgpu: fix calling VM invalidation in amdgpu_hmm_invalidate_gfx
  2026-05-12 16:27 [PATCH 1/4] drm/amdgpu: fix amdgpu_hmm_range_get_pages Christian König
@ 2026-05-12 16:27 ` Christian König
  2026-05-12 16:27 ` [PATCH 3/4] drm/amdgpu: fix waiting for all submissions for userptrs Christian König
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 6+ messages in thread
From: Christian König @ 2026-05-12 16:27 UTC (permalink / raw)
  To: Alexander.Deucher, amd-gfx; +Cc: vprosyak

Otherwise we don't invalidate page tables on next CS.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c | 1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 7 +++++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
index eb470e252399..5d72878c8fe9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
@@ -76,6 +76,7 @@ static bool amdgpu_hmm_invalidate_gfx(struct mmu_interval_notifier *mni,
 
 	mmu_interval_set_seq(mni, cur_seq);
 
+	amdgpu_vm_bo_invalidate(bo, false);
 	r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP,
 				  false, MAX_SCHEDULE_TIMEOUT);
 	mutex_unlock(&adev->notifier_lock);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 82a1c19350ee..cd82541ad5d7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1631,6 +1631,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
 {
 	struct amdgpu_bo_va *bo_va;
 	struct dma_resv *resv;
+	struct amdgpu_bo *bo;
 	bool clear, unlock;
 	int r;
 
@@ -1650,11 +1651,13 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
 	while (!list_empty(&vm->invalidated)) {
 		bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va,
 					 base.vm_status);
-		resv = bo_va->base.bo->tbo.base.resv;
+		bo = bo_va->base.bo;
+		resv = bo->tbo.base.resv;
 		spin_unlock(&vm->status_lock);
 
 		/* Try to reserve the BO to avoid clearing its ptes */
-		if (!adev->debug_vm && dma_resv_trylock(resv)) {
+		if (!adev->debug_vm && !amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) &&
+		    dma_resv_trylock(resv)) {
 			clear = false;
 			unlock = true;
 		/* The caller is already holding the reservation lock */
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 3/4] drm/amdgpu: fix waiting for all submissions for userptrs
  2026-05-12 16:27 [PATCH 1/4] drm/amdgpu: fix amdgpu_hmm_range_get_pages Christian König
  2026-05-12 16:27 ` [PATCH 2/4] drm/amdgpu: fix calling VM invalidation in amdgpu_hmm_invalidate_gfx Christian König
@ 2026-05-12 16:27 ` Christian König
  2026-05-12 16:27 ` [PATCH 4/4] drm/amdgpu: restart the CS if some parts of the VM are still invalidated Christian König
  2026-05-12 18:26 ` [PATCH 1/4] drm/amdgpu: fix amdgpu_hmm_range_get_pages vitaly prosyak
  3 siblings, 0 replies; 6+ messages in thread
From: Christian König @ 2026-05-12 16:27 UTC (permalink / raw)
  To: Alexander.Deucher, amd-gfx; +Cc: vprosyak

Wait for all submissions when userptrs need to be invalidated by the MMU
notifier, not just the one the userptr was involved into.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
index 5d72878c8fe9..642ce358950c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
@@ -67,6 +67,7 @@ static bool amdgpu_hmm_invalidate_gfx(struct mmu_interval_notifier *mni,
 {
 	struct amdgpu_bo *bo = container_of(mni, struct amdgpu_bo, notifier);
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	struct amdgpu_bo *vm_root = bo->vm_bo->vm->root.bo;
 	long r;
 
 	if (!mmu_notifier_range_blockable(range))
@@ -77,8 +78,9 @@ static bool amdgpu_hmm_invalidate_gfx(struct mmu_interval_notifier *mni,
 	mmu_interval_set_seq(mni, cur_seq);
 
 	amdgpu_vm_bo_invalidate(bo, false);
-	r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP,
-				  false, MAX_SCHEDULE_TIMEOUT);
+	r = dma_resv_wait_timeout(vm_root->tbo.base.resv,
+				  DMA_RESV_USAGE_BOOKKEEP, false,
+				  MAX_SCHEDULE_TIMEOUT);
 	mutex_unlock(&adev->notifier_lock);
 	if (r <= 0)
 		DRM_ERROR("(%ld) failed to wait for user bo\n", r);
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 4/4] drm/amdgpu: restart the CS if some parts of the VM are still invalidated
  2026-05-12 16:27 [PATCH 1/4] drm/amdgpu: fix amdgpu_hmm_range_get_pages Christian König
  2026-05-12 16:27 ` [PATCH 2/4] drm/amdgpu: fix calling VM invalidation in amdgpu_hmm_invalidate_gfx Christian König
  2026-05-12 16:27 ` [PATCH 3/4] drm/amdgpu: fix waiting for all submissions for userptrs Christian König
@ 2026-05-12 16:27 ` Christian König
  2026-05-12 18:29   ` Alex Deucher
  2026-05-12 18:26 ` [PATCH 1/4] drm/amdgpu: fix amdgpu_hmm_range_get_pages vitaly prosyak
  3 siblings, 1 reply; 6+ messages in thread
From: Christian König @ 2026-05-12 16:27 UTC (permalink / raw)
  To: Alexander.Deucher, amd-gfx; +Cc: vprosyak

Make sure that we only submit work with full up to date VM page tables.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 10d8dcc3a972..b26f681527f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1280,6 +1280,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 {
 	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
 	struct amdgpu_job *leader = p->gang_leader;
+	struct amdgpu_vm *vm = &fpriv->vm;
 	struct amdgpu_bo_list_entry *e;
 	struct drm_gem_object *gobj;
 	unsigned long index;
@@ -1325,7 +1326,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 		amdgpu_hmm_range_free(e->range);
 		e->range = NULL;
 	}
-	if (r) {
+
+	if (r || !list_empty(&vm->invalidated)) {
 		r = -EAGAIN;
 		mutex_unlock(&p->adev->notifier_lock);
 		return r;
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/4] drm/amdgpu: fix amdgpu_hmm_range_get_pages
  2026-05-12 16:27 [PATCH 1/4] drm/amdgpu: fix amdgpu_hmm_range_get_pages Christian König
                   ` (2 preceding siblings ...)
  2026-05-12 16:27 ` [PATCH 4/4] drm/amdgpu: restart the CS if some parts of the VM are still invalidated Christian König
@ 2026-05-12 18:26 ` vitaly prosyak
  3 siblings, 0 replies; 6+ messages in thread
From: vitaly prosyak @ 2026-05-12 18:26 UTC (permalink / raw)
  To: christian.koenig, Alexander.Deucher, amd-gfx

[-- Attachment #1: Type: text/plain, Size: 2708 bytes --]

Series is:
Reviewed-by: Vitaly Prosyak vitaly.prosyak@amd.com
Tested-by: Vitaly Prosyak vitaly.prosyak@amd.com

On 2026-05-12 12:27, Christian König wrote:
> The notifier sequence must only be read once or otherwise we could work
> with invalid pages.
>
> While at it also fix the coding style, e.g. drop the pre-initialized
> return value and use the common define for 2G range.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c | 16 ++++++++--------
>  1 file changed, 8 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
> index 90d26d820bac..eb470e252399 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
> @@ -51,8 +51,6 @@
>  #include "amdgpu_amdkfd.h"
>  #include "amdgpu_hmm.h"
>  
> -#define MAX_WALK_BYTE	(2UL << 30)
> -
>  /**
>   * amdgpu_hmm_invalidate_gfx - callback to notify about mm change
>   *
> @@ -170,11 +168,13 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
>  			       void *owner,
>  			       struct amdgpu_hmm_range *range)
>  {
> -	unsigned long end;
> +	const u64 max_bytes = SZ_2G;
> +
> +	struct hmm_range *hmm_range = &range->hmm_range;
>  	unsigned long timeout;
>  	unsigned long *pfns;
> -	int r = 0;
> -	struct hmm_range *hmm_range = &range->hmm_range;
> +	unsigned long end;
> +	int r;
>  
>  	pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL);
>  	if (unlikely(!pfns)) {
> @@ -191,8 +191,9 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
>  	end = start + npages * PAGE_SIZE;
>  	hmm_range->dev_private_owner = owner;
>  
> +	hmm_range->notifier_seq = mmu_interval_read_begin(notifier);
>  	do {
> -		hmm_range->end = min(hmm_range->start + MAX_WALK_BYTE, end);
> +		hmm_range->end = min(hmm_range->start + max_bytes, end);
>  
>  		pr_debug("hmm range: start = 0x%lx, end = 0x%lx",
>  			hmm_range->start, hmm_range->end);
> @@ -200,7 +201,6 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
>  		timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
>  
>  retry:
> -		hmm_range->notifier_seq = mmu_interval_read_begin(notifier);
>  		r = hmm_range_fault(hmm_range);
>  		if (unlikely(r)) {
>  			if (r == -EBUSY && !time_after(jiffies, timeout))
> @@ -210,7 +210,7 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
>  
>  		if (hmm_range->end == end)
>  			break;
> -		hmm_range->hmm_pfns += MAX_WALK_BYTE >> PAGE_SHIFT;
> +		hmm_range->hmm_pfns += max_bytes >> PAGE_SHIFT;
>  		hmm_range->start = hmm_range->end;
>  	} while (hmm_range->end < end);
>  

[-- Attachment #2: Type: text/html, Size: 5040 bytes --]

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 4/4] drm/amdgpu: restart the CS if some parts of the VM are still invalidated
  2026-05-12 16:27 ` [PATCH 4/4] drm/amdgpu: restart the CS if some parts of the VM are still invalidated Christian König
@ 2026-05-12 18:29   ` Alex Deucher
  0 siblings, 0 replies; 6+ messages in thread
From: Alex Deucher @ 2026-05-12 18:29 UTC (permalink / raw)
  To: christian.koenig; +Cc: Alexander.Deucher, amd-gfx, vprosyak

Series is:
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

On Tue, May 12, 2026 at 12:27 PM Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> Make sure that we only submit work with full up to date VM page tables.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 10d8dcc3a972..b26f681527f2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -1280,6 +1280,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>  {
>         struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
>         struct amdgpu_job *leader = p->gang_leader;
> +       struct amdgpu_vm *vm = &fpriv->vm;
>         struct amdgpu_bo_list_entry *e;
>         struct drm_gem_object *gobj;
>         unsigned long index;
> @@ -1325,7 +1326,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>                 amdgpu_hmm_range_free(e->range);
>                 e->range = NULL;
>         }
> -       if (r) {
> +
> +       if (r || !list_empty(&vm->invalidated)) {
>                 r = -EAGAIN;
>                 mutex_unlock(&p->adev->notifier_lock);
>                 return r;
> --
> 2.43.0
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2026-05-12 18:30 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-05-12 16:27 [PATCH 1/4] drm/amdgpu: fix amdgpu_hmm_range_get_pages Christian König
2026-05-12 16:27 ` [PATCH 2/4] drm/amdgpu: fix calling VM invalidation in amdgpu_hmm_invalidate_gfx Christian König
2026-05-12 16:27 ` [PATCH 3/4] drm/amdgpu: fix waiting for all submissions for userptrs Christian König
2026-05-12 16:27 ` [PATCH 4/4] drm/amdgpu: restart the CS if some parts of the VM are still invalidated Christian König
2026-05-12 18:29   ` Alex Deucher
2026-05-12 18:26 ` [PATCH 1/4] drm/amdgpu: fix amdgpu_hmm_range_get_pages vitaly prosyak

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox