public inbox for amd-gfx@lists.freedesktop.org
 help / color / mirror / Atom feed
* [PATCH 2/2] drm/amdgpu: replace get_user_pages with HMM address mirror helpers v2
@ 2018-10-18 21:52 Yang, Philip
  0 siblings, 0 replies; 7+ messages in thread
From: Yang, Philip @ 2018-10-18 21:52 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org; +Cc: Yang, Philip

Use HMM helper function hmm_vma_fault() to get physical pages backing
userptr and start CPU page table update track of those pages. Then use
hmm_vma_range_done() to check if those pages are updated before
amdgpu_cs_submit for gfx or before user queues are resumed for kfd.

If userptr pages are updated, for gfx, amdgpu_cs_ioctl will restart
from scratch, for kfd, restore worker is rescheduled to retry.

To avoid circular lock dependency, no nested locking between mmap_sem
and bo::reserve. The locking order is:
bo::reserve -> amdgpu_mn_lock(p->mn)

HMM simplify the CPU page table concurrent update check, so remove
guptasklock, mmu_invalidations, last_set_pages fields from
amdgpu_ttm_tt struct.

HMM does not pin the page (increase page ref count), so remove related
operations like release_pages(), put_page(), mark_page_dirty().

v2:
* Remove nested locking between mmap_sem and bo::reserve
* Change locking order to bo::reserve -> amdgpu_mn_lock()
* Use dynamic allocation to replace VLA in kernel stack

Change-Id: Iffd5f855cc9ce402cdfca167f68f83fe39ac56f9
Signed-off-by: Philip Yang <Philip.Yang@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c   | 101 ++++++++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c        |   2 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h        |   3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c             | 188 ++++++++++-----------
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c            |  14 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c             |  34 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h             |   7 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c            | 164 +++++++-----------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h            |   3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c             |   1 -
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  |  67 ++++----
 11 files changed, 312 insertions(+), 272 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index df0a059..3fd0340 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -615,8 +615,7 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
 	amdgpu_bo_unreserve(bo);
 
 release_out:
-	if (ret)
-		release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
+	amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
 free_out:
 	kvfree(mem->user_pages);
 	mem->user_pages = NULL;
@@ -678,7 +677,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
 	ctx->kfd_bo.priority = 0;
 	ctx->kfd_bo.tv.bo = &bo->tbo;
 	ctx->kfd_bo.tv.shared = true;
-	ctx->kfd_bo.user_pages = NULL;
 	list_add(&ctx->kfd_bo.tv.head, &ctx->list);
 
 	amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);
@@ -742,7 +740,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
 	ctx->kfd_bo.priority = 0;
 	ctx->kfd_bo.tv.bo = &bo->tbo;
 	ctx->kfd_bo.tv.shared = true;
-	ctx->kfd_bo.user_pages = NULL;
 	list_add(&ctx->kfd_bo.tv.head, &ctx->list);
 
 	i = 0;
@@ -1311,9 +1308,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 	/* Free user pages if necessary */
 	if (mem->user_pages) {
 		pr_debug("%s: Freeing user_pages array\n", __func__);
-		if (mem->user_pages[0])
-			release_pages(mem->user_pages,
-					mem->bo->tbo.ttm->num_pages);
 		kvfree(mem->user_pages);
 	}
 
@@ -1739,8 +1733,6 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
 				       __func__);
 				return -ENOMEM;
 			}
-		} else if (mem->user_pages[0]) {
-			release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
 		}
 
 		/* Get updated user pages */
@@ -1756,12 +1748,6 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
 			 * stalled user mode queues.
 			 */
 		}
-
-		/* Mark the BO as valid unless it was invalidated
-		 * again concurrently
-		 */
-		if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid)
-			return -EAGAIN;
 	}
 
 	return 0;
@@ -1854,14 +1840,10 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
 		}
 
 		/* Validate succeeded, now the BO owns the pages, free
-		 * our copy of the pointer array. Put this BO back on
-		 * the userptr_valid_list. If we need to revalidate
-		 * it, we need to start from scratch.
+		 * our copy of the pointer array.
 		 */
 		kvfree(mem->user_pages);
 		mem->user_pages = NULL;
-		list_move_tail(&mem->validate_list.head,
-			       &process_info->userptr_valid_list);
 
 		/* Update mapping. If the BO was not validated
 		 * (because we couldn't get user pages), this will
@@ -1902,6 +1884,70 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
 	return ret;
 }
 
+/* user_pages_invalidated - if CPU page table is updated after getting user
+ * pages
+ *
+ * HMM mirror callback lock amn is hold to prevent the concurrent CPU
+ * page table update while resuming user queues.
+ *
+ * Returns: true if CPU page table is updated, false otherwise
+ */
+static int user_pages_invalidated(struct mm_struct *mm,
+			struct amdkfd_process_info *process_info,
+			struct amdgpu_mn **amn)
+{
+	struct kgd_mem *mem, *tmp_mem;
+	struct amdgpu_bo *bo;
+	struct amdgpu_device *adev;
+	int invalid, r = 0;
+
+	list_for_each_entry_safe(mem, tmp_mem,
+				 &process_info->userptr_inval_list,
+				 validate_list.head) {
+
+		invalid = atomic_read(&mem->invalid);
+		if (!invalid)
+			/* BO hasn't been invalidated since the last
+			 * revalidation attempt. Keep its BO list.
+			 */
+			continue;
+
+		bo = mem->bo;
+
+		/* Get HMM mirror callback lock */
+		if (!*amn) {
+			adev = amdgpu_ttm_adev(bo->tbo.bdev);
+			*amn = amdgpu_mn_get(mm, adev, AMDGPU_MN_TYPE_HSA);
+			if (IS_ERR(*amn)) {
+				r = true;
+				*amn = NULL;
+				goto out;
+			}
+
+			amdgpu_mn_lock(*amn);
+		}
+
+		r |= amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
+
+		/* Put this BO back on the userptr_valid_list. If we need to
+		 * revalidate it, we need to start from scratch.
+		 */
+		list_move_tail(&mem->validate_list.head,
+			       &process_info->userptr_valid_list);
+
+		/* Mark the BO as valid unless it was invalidated
+		 * again concurrently
+		 */
+		if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid) {
+			r = true;
+			goto out;
+		}
+	}
+
+out:
+	return r;
+}
+
 /* Worker callback to restore evicted userptr BOs
  *
  * Tries to update and validate all userptr BOs. If successful and no
@@ -1917,6 +1963,7 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
 	struct task_struct *usertask;
 	struct mm_struct *mm;
 	int evicted_bos;
+	struct amdgpu_mn *amn = NULL;
 
 	evicted_bos = atomic_read(&process_info->evicted_bos);
 	if (!evicted_bos)
@@ -1955,13 +2002,27 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
 	if (atomic_cmpxchg(&process_info->evicted_bos, evicted_bos, 0) !=
 	    evicted_bos)
 		goto unlock_out;
+
+	/* If CPU page table is updated again after getting user pages,
+	 * schedule to restart the restore process again.
+	 *
+	 * amn is also locked to prevent CPU page table update while resuming
+	 * user queues. amn is unlocked after user queues are resumed.
+	 */
+	if (user_pages_invalidated(mm, process_info, &amn))
+		goto unlock_mn_out;
+
 	evicted_bos = 0;
+
 	if (kgd2kfd->resume_mm(mm)) {
 		pr_err("%s: Failed to resume KFD\n", __func__);
 		/* No recovery from this failure. Probably the CP is
 		 * hanging. No point trying again.
 		 */
 	}
+
+unlock_mn_out:
+	amdgpu_mn_unlock(amn);
 unlock_out:
 	mutex_unlock(&process_info->lock);
 	mmput(mm);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index 14d2982..2716c24 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -201,8 +201,6 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
 
 		if (!bo->parent)
 			list_add_tail(&e->tv.head, &bucket[priority]);
-
-		e->user_pages = NULL;
 	}
 
 	/* Connect the sorted buckets in the output list. */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
index 7c5f5d1..4beab2d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
@@ -35,8 +35,7 @@ struct amdgpu_bo_list_entry {
 	struct ttm_validate_buffer	tv;
 	struct amdgpu_bo_va		*bo_va;
 	uint32_t			priority;
-	struct page			**user_pages;
-	int				user_invalidated;
+	bool				user_invalidated;
 };
 
 struct amdgpu_bo_list {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 8836186..69c6b41 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -51,7 +51,6 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
 	p->uf_entry.priority = 0;
 	p->uf_entry.tv.bo = &bo->tbo;
 	p->uf_entry.tv.shared = true;
-	p->uf_entry.user_pages = NULL;
 
 	drm_gem_object_put_unlocked(gobj);
 
@@ -531,24 +530,19 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
 
 	list_for_each_entry(lobj, validated, tv.head) {
 		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(lobj->tv.bo);
-		bool binding_userptr = false;
 		struct mm_struct *usermm;
 
 		usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm);
 		if (usermm && usermm != current->mm)
 			return -EPERM;
 
-		/* Check if we have user pages and nobody bound the BO already */
-		if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&
-		    lobj->user_pages) {
+		if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm) &&
+		    lobj->user_invalidated) {
 			amdgpu_bo_placement_from_domain(bo,
 							AMDGPU_GEM_DOMAIN_CPU);
 			r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 			if (r)
 				return r;
-			amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
-						     lobj->user_pages);
-			binding_userptr = true;
 		}
 
 		if (p->evictable == lobj)
@@ -557,11 +551,6 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
 		r = amdgpu_cs_validate(p, bo);
 		if (r)
 			return r;
-
-		if (binding_userptr) {
-			kvfree(lobj->user_pages);
-			lobj->user_pages = NULL;
-		}
 	}
 	return 0;
 }
@@ -576,7 +565,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 	struct amdgpu_bo *gds;
 	struct amdgpu_bo *gws;
 	struct amdgpu_bo *oa;
-	unsigned tries = 10;
+	struct list_head userpage_list;
 	int r;
 
 	INIT_LIST_HEAD(&p->validated);
@@ -585,7 +574,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 	if (cs->in.bo_list_handle) {
 		if (p->bo_list)
 			return -EINVAL;
-
 		r = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle,
 				       &p->bo_list);
 		if (r)
@@ -600,7 +588,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 
 	amdgpu_bo_list_get_list(p->bo_list, &p->validated);
 	if (p->bo_list->first_userptr != p->bo_list->num_entries)
-		p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX);
+		p->mn = amdgpu_mn_get(current->mm, p->adev,
+					AMDGPU_MN_TYPE_GFX);
 
 	INIT_LIST_HEAD(&duplicates);
 	amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd);
@@ -608,79 +597,72 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 	if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent)
 		list_add(&p->uf_entry.tv.head, &p->validated);
 
-	while (1) {
-		struct list_head need_pages;
+	r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
+				   &duplicates);
+	if (unlikely(r != 0)) {
+		if (r != -ERESTARTSYS)
+			DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
+		goto out;
+	}
 
-		r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
-					   &duplicates);
-		if (unlikely(r != 0)) {
-			if (r != -ERESTARTSYS)
-				DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
-			goto error_free_pages;
-		}
+	INIT_LIST_HEAD(&userpage_list);
+	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
+		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
 
-		INIT_LIST_HEAD(&need_pages);
-		amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
-			struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
+		list_del(&e->tv.head);
+		list_add(&e->tv.head, &userpage_list);
+		amdgpu_bo_unreserve(bo);
+	}
 
-			if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm,
-				 &e->user_invalidated) && e->user_pages) {
+	/* Get userptr backing pages. If pages are updated after registered
+	 * in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do
+	 * amdgpu_ttm_backend_bind() to flush and invalidate new pages
+	 */
+	if (!list_empty(&userpage_list)) {
+		/* Unreserve everything again, to avoid circular locking case
+		 * bo::reserve -> mmap->sem
+		 */
+		ttm_eu_backoff_reservation(&p->ticket, &p->validated);
 
-				/* We acquired a page array, but somebody
-				 * invalidated it. Free it and try again
-				 */
-				release_pages(e->user_pages,
-					      bo->tbo.ttm->num_pages);
-				kvfree(e->user_pages);
-				e->user_pages = NULL;
+		list_for_each_entry(e, &userpage_list, tv.head) {
+			struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
+			bool userpage_invalidated = false;
+			struct page **pages;
+			int i;
+
+			pages = kvmalloc_array(bo->tbo.ttm->num_pages,
+						sizeof(*pages),
+						GFP_KERNEL | __GFP_ZERO);
+			if (!pages) {
+				DRM_ERROR("calloc failure\n");
+				return -ENOMEM;
 			}
 
-			if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&
-			    !e->user_pages) {
-				list_del(&e->tv.head);
-				list_add(&e->tv.head, &need_pages);
-
-				amdgpu_bo_unreserve(bo);
+			r = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, pages);
+			if (r) {
+				kvfree(pages);
+				return r;
 			}
-		}
 
-		if (list_empty(&need_pages))
-			break;
-
-		/* Unreserve everything again. */
-		ttm_eu_backoff_reservation(&p->ticket, &p->validated);
-
-		/* We tried too many times, just abort */
-		if (!--tries) {
-			r = -EDEADLK;
-			DRM_ERROR("deadlock in %s\n", __func__);
-			goto error_free_pages;
+			for (i = 0; i < bo->tbo.ttm->num_pages; i++) {
+				if (bo->tbo.ttm->pages[i] != pages[i]) {
+					bo->tbo.ttm->pages[i] = pages[i];
+					userpage_invalidated = true;
+				}
+			}
+			e->user_invalidated = userpage_invalidated;
+			kvfree(pages);
 		}
 
-		/* Fill the page arrays for all userptrs. */
-		list_for_each_entry(e, &need_pages, tv.head) {
-			struct ttm_tt *ttm = e->tv.bo->ttm;
-
-			e->user_pages = kvmalloc_array(ttm->num_pages,
-							 sizeof(struct page*),
-							 GFP_KERNEL | __GFP_ZERO);
-			if (!e->user_pages) {
-				r = -ENOMEM;
-				DRM_ERROR("calloc failure in %s\n", __func__);
-				goto error_free_pages;
-			}
+		list_splice(&userpage_list, &p->validated);
 
-			r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages);
-			if (r) {
-				DRM_ERROR("amdgpu_ttm_tt_get_user_pages failed.\n");
-				kvfree(e->user_pages);
-				e->user_pages = NULL;
-				goto error_free_pages;
-			}
+		r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
+					   &duplicates);
+		if (unlikely(r != 0)) {
+			if (r != -ERESTARTSYS)
+				DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
+			goto out;
 		}
-
-		/* And try again. */
-		list_splice(&need_pages, &p->validated);
 	}
 
 	amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
@@ -743,17 +725,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 error_validate:
 	if (r)
 		ttm_eu_backoff_reservation(&p->ticket, &p->validated);
-
-error_free_pages:
-
-	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
-		if (!e->user_pages)
-			continue;
-
-		release_pages(e->user_pages, e->tv.bo->ttm->num_pages);
-		kvfree(e->user_pages);
-	}
-
+out:
 	return r;
 }
 
@@ -1206,8 +1178,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 	struct amdgpu_bo_list_entry *e;
 	struct amdgpu_job *job;
 	uint64_t seq;
-
-	int r;
+	int r = 0;
 
 	job = p->job;
 	p->job = NULL;
@@ -1216,15 +1187,21 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 	if (r)
 		goto error_unlock;
 
-	/* No memory allocation is allowed while holding the mn lock */
+	/* No memory allocation is allowed while holding the mn lock.
+	 * p->mn is hold until amdgpu_cs_submit is finished and fence is added
+	 * to BOs.
+	 */
 	amdgpu_mn_lock(p->mn);
+
+	/* If userptr are updated after amdgpu_cs_parser_bos(), restart cs */
 	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
 		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
 
-		if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
-			r = -ERESTARTSYS;
-			goto error_abort;
-		}
+		r |= amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
+	}
+	if (r) {
+		r = -ERESTARTSYS;
+		goto error_abort;
 	}
 
 	job->owner = p->filp;
@@ -1272,14 +1249,20 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 {
 	struct amdgpu_device *adev = dev->dev_private;
-	union drm_amdgpu_cs *cs = data;
-	struct amdgpu_cs_parser parser = {};
-	bool reserved_buffers = false;
+	union drm_amdgpu_cs *cs;
+	struct amdgpu_cs_parser parser;
+	bool reserved_buffers;
+	int tries = 10;
 	int i, r;
 
 	if (!adev->accel_working)
 		return -EBUSY;
 
+restart:
+	memset(&parser, 0, sizeof(parser));
+	cs = data;
+	reserved_buffers = false;
+
 	parser.adev = adev;
 	parser.filp = filp;
 
@@ -1321,6 +1304,15 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 
 out:
 	amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
+
+	if (r == -ERESTARTSYS) {
+		if (!--tries) {
+			DRM_ERROR("Possible deadlock? Retry too many times\n");
+			return -EDEADLK;
+		}
+		goto restart;
+	}
+
 	return r;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 7b3d1eb..ff9a8fd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -336,26 +336,24 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
 
 		r = amdgpu_bo_reserve(bo, true);
 		if (r)
-			goto free_pages;
+			goto user_pages_done;
 
 		amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
 		r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 		amdgpu_bo_unreserve(bo);
 		if (r)
-			goto free_pages;
+			goto user_pages_done;
 	}
 
 	r = drm_gem_handle_create(filp, gobj, &handle);
-	/* drop reference from allocate - handle holds it now */
-	drm_gem_object_put_unlocked(gobj);
 	if (r)
-		return r;
+		goto user_pages_done;
 
 	args->handle = handle;
-	return 0;
 
-free_pages:
-	release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages);
+user_pages_done:
+	if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE)
+		amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
 
 release_object:
 	drm_gem_object_put_unlocked(gobj);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index 56595b3..6b6becd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -229,8 +229,6 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
 			true, false, MAX_SCHEDULE_TIMEOUT);
 		if (r <= 0)
 			DRM_ERROR("(%ld) failed to wait for user bo\n", r);
-
-		amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm);
 	}
 }
 
@@ -355,15 +353,16 @@ static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
 /**
  * amdgpu_mn_get - create HMM mirror context
  *
+ * @mm: the mm struct
  * @adev: amdgpu device pointer
  * @type: type of MMU notifier context
  *
- * Creates a HMM mirror context for current->mm.
+ * Creates a HMM mirror context for mm.
  */
-struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
+struct amdgpu_mn *amdgpu_mn_get(struct mm_struct *mm,
+				struct amdgpu_device *adev,
 				enum amdgpu_mn_type type)
 {
-	struct mm_struct *mm = current->mm;
 	struct amdgpu_mn *amn;
 	unsigned long key = AMDGPU_MN_KEY(mm, type);
 	int r;
@@ -433,7 +432,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
 	struct list_head bos;
 	struct interval_tree_node *it;
 
-	amn = amdgpu_mn_get(adev, type);
+	amn = amdgpu_mn_get(current->mm, adev, type);
 	if (IS_ERR(amn))
 		return PTR_ERR(amn);
 
@@ -515,3 +514,26 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)
 	mutex_unlock(&adev->mn_lock);
 }
 
+/* flags used by HMM internal, not related to CPU/GPU PTE flags */
+static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = {
+		(1 << 0), /* HMM_PFN_VALID */
+		(1 << 1), /* HMM_PFN_WRITE */
+		0 /* HMM_PFN_DEVICE_PRIVATE */
+};
+
+static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = {
+		0xfffffffffffffffeUL, /* HMM_PFN_ERROR */
+		0, /* HMM_PFN_NONE */
+		0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */
+};
+
+void amdgpu_hmm_init_range(struct hmm_range *range)
+{
+	if (range) {
+		range->flags = hmm_range_flags;
+		range->values = hmm_range_values;
+		range->pfn_shift = PAGE_SHIFT;
+		range->pfns = NULL;
+		INIT_LIST_HEAD(&range->list);
+	}
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
index 0e27526..59ea30e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
@@ -25,9 +25,10 @@
 #define __AMDGPU_MN_H__
 
 /*
- * MMU Notifier
+ * HMM mirror
  */
 struct amdgpu_mn;
+struct hmm_range;
 
 enum amdgpu_mn_type {
 	AMDGPU_MN_TYPE_GFX,
@@ -37,10 +38,12 @@ enum amdgpu_mn_type {
 #if defined(CONFIG_HMM)
 void amdgpu_mn_lock(struct amdgpu_mn *mn);
 void amdgpu_mn_unlock(struct amdgpu_mn *mn);
-struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
+struct amdgpu_mn *amdgpu_mn_get(struct mm_struct *mm,
+				struct amdgpu_device *adev,
 				enum amdgpu_mn_type type);
 int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
 void amdgpu_mn_unregister(struct amdgpu_bo *bo);
+void amdgpu_hmm_init_range(struct hmm_range *range);
 #else
 static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {}
 static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 3a68028..b0537d1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -43,6 +43,7 @@
 #include <linux/pagemap.h>
 #include <linux/debugfs.h>
 #include <linux/iommu.h>
+#include <linux/hmm.h>
 #include "amdgpu.h"
 #include "amdgpu_object.h"
 #include "amdgpu_trace.h"
@@ -799,11 +800,6 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
 /*
  * TTM backend functions.
  */
-struct amdgpu_ttm_gup_task_list {
-	struct list_head	list;
-	struct task_struct	*task;
-};
-
 struct amdgpu_ttm_tt {
 	struct ttm_dma_tt	ttm;
 	u64			offset;
@@ -812,85 +808,91 @@ struct amdgpu_ttm_tt {
 	uint32_t		userflags;
 	spinlock_t              guptasklock;
 	struct list_head        guptasks;
-	atomic_t		mmu_invalidations;
-	uint32_t		last_set_pages;
+	struct hmm_range	range;
 };
 
 /**
- * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to by a USERPTR
- * pointer to memory
+ * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user
+ * memory and start HMM tracking CPU page table update
  *
- * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos().
- * This provides a wrapper around the get_user_pages() call to provide
- * device accessible pages that back user memory.
+ * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only
+ * once afterwards to stop HMM tracking
  */
 int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
 {
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
 	struct mm_struct *mm = gtt->usertask->mm;
-	unsigned int flags = 0;
-	unsigned pinned = 0;
-	int r;
+	unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
+	struct hmm_range *range = &gtt->range;
+	int r, i;
 
 	if (!mm) /* Happens during process shutdown */
 		return -ESRCH;
 
-	if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
-		flags |= FOLL_WRITE;
+	amdgpu_hmm_init_range(range);
 
 	down_read(&mm->mmap_sem);
 
-	if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
-		/*
-		 * check that we only use anonymous memory to prevent problems
-		 * with writeback
-		 */
-		unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
-		struct vm_area_struct *vma;
+	range->vma = find_vma(mm, gtt->userptr);
+	if (!range->vma || range->vma->vm_file || range->vma->vm_end < end) {
+		r = -EPERM;
+		goto out;
+	}
+	range->pfns = kvmalloc_array(ttm->num_pages, sizeof(uint64_t),
+				GFP_KERNEL | __GFP_ZERO);
+	if (range->pfns == NULL) {
+		r = -ENOMEM;
+		goto out;
+	}
+	range->start = gtt->userptr;
+	range->end = end;
 
-		vma = find_vma(mm, gtt->userptr);
-		if (!vma || vma->vm_file || vma->vm_end < end) {
-			up_read(&mm->mmap_sem);
-			return -EPERM;
-		}
+	for (i = 0; i < ttm->num_pages; i++) {
+		range->pfns[i] = range->flags[HMM_PFN_VALID];
+		range->pfns[i] |= amdgpu_ttm_tt_is_readonly(ttm) ?
+					0 : range->flags[HMM_PFN_WRITE];
 	}
 
-	/* loop enough times using contiguous pages of memory */
-	do {
-		unsigned num_pages = ttm->num_pages - pinned;
-		uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE;
-		struct page **p = pages + pinned;
-		struct amdgpu_ttm_gup_task_list guptask;
+	/* This may triggles page table update */
+	r = hmm_vma_fault(range, true);
+	if (r)
+		goto out_free_pfns;
 
-		guptask.task = current;
-		spin_lock(&gtt->guptasklock);
-		list_add(&guptask.list, &gtt->guptasks);
-		spin_unlock(&gtt->guptasklock);
+	for (i = 0; i < ttm->num_pages; i++)
+		pages[i] = hmm_pfn_to_page(range, range->pfns[i]);
 
-		if (mm == current->mm)
-			r = get_user_pages(userptr, num_pages, flags, p, NULL);
-		else
-			r = get_user_pages_remote(gtt->usertask,
-					mm, userptr, num_pages,
-					flags, p, NULL, NULL);
+	up_read(&mm->mmap_sem);
+	return 0;
 
-		spin_lock(&gtt->guptasklock);
-		list_del(&guptask.list);
-		spin_unlock(&gtt->guptasklock);
+out_free_pfns:
+	kvfree(range->pfns);
+	range->pfns = NULL;
+out:
+	up_read(&mm->mmap_sem);
+	return r;
+}
 
-		if (r < 0)
-			goto release_pages;
+/**
+ * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change
+ * Check if the pages backing this ttm range have been invalidated
+ *
+ * Returns: true if pages are invalidated since the last time they've been set
+ */
+bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
+{
+	struct amdgpu_ttm_tt *gtt = (void *)ttm;
+	int r;
 
-		pinned += r;
+	if (gtt == NULL || !gtt->userptr)
+		return false;
 
-	} while (pinned < ttm->num_pages);
+	r = !hmm_vma_range_done(&gtt->range);
 
-	up_read(&mm->mmap_sem);
-	return 0;
+	if (gtt->range.pfns) {
+		kvfree(gtt->range.pfns);
+		gtt->range.pfns = NULL;
+	}
 
-release_pages:
-	release_pages(pages, pinned);
-	up_read(&mm->mmap_sem);
 	return r;
 }
 
@@ -903,16 +905,10 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
  */
 void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
 {
-	struct amdgpu_ttm_tt *gtt = (void *)ttm;
 	unsigned i;
 
-	gtt->last_set_pages = atomic_read(&gtt->mmu_invalidations);
-	for (i = 0; i < ttm->num_pages; ++i) {
-		if (ttm->pages[i])
-			put_page(ttm->pages[i]);
-
+	for (i = 0; i < ttm->num_pages; ++i)
 		ttm->pages[i] = pages ? pages[i] : NULL;
-	}
 }
 
 /**
@@ -997,9 +993,6 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
 	/* unmap the pages mapped to the device */
 	dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
 
-	/* mark the pages as dirty */
-	amdgpu_ttm_tt_mark_user_pages(ttm);
-
 	sg_free_table(ttm->sg);
 }
 
@@ -1352,8 +1345,6 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
 
 	spin_lock_init(&gtt->guptasklock);
 	INIT_LIST_HEAD(&gtt->guptasks);
-	atomic_set(&gtt->mmu_invalidations, 0);
-	gtt->last_set_pages = 0;
 
 	return 0;
 }
@@ -1383,7 +1374,6 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
 				  unsigned long end)
 {
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
-	struct amdgpu_ttm_gup_task_list *entry;
 	unsigned long size;
 
 	if (gtt == NULL || !gtt->userptr)
@@ -1396,48 +1386,20 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
 	if (gtt->userptr > end || gtt->userptr + size <= start)
 		return false;
 
-	/* Search the lists of tasks that hold this mapping and see
-	 * if current is one of them.  If it is return false.
-	 */
-	spin_lock(&gtt->guptasklock);
-	list_for_each_entry(entry, &gtt->guptasks, list) {
-		if (entry->task == current) {
-			spin_unlock(&gtt->guptasklock);
-			return false;
-		}
-	}
-	spin_unlock(&gtt->guptasklock);
-
-	atomic_inc(&gtt->mmu_invalidations);
-
 	return true;
 }
 
 /**
- * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been invalidated?
+ * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr?
  */
-bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
-				       int *last_invalidated)
-{
-	struct amdgpu_ttm_tt *gtt = (void *)ttm;
-	int prev_invalidated = *last_invalidated;
-
-	*last_invalidated = atomic_read(&gtt->mmu_invalidations);
-	return prev_invalidated != *last_invalidated;
-}
-
-/**
- * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this ttm_tt object
- * been invalidated since the last time they've been set?
- */
-bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm)
+bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm)
 {
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
 
 	if (gtt == NULL || !gtt->userptr)
 		return false;
 
-	return atomic_read(&gtt->mmu_invalidations) != gtt->last_set_pages;
+	return true;
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index fe8f276..aeeea77 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -104,6 +104,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
 int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
 
 int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages);
+bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm);
 void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages);
 void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm);
 int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
@@ -114,7 +115,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
 				  unsigned long end);
 bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
 				       int *last_invalidated);
-bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm);
+bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm);
 bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);
 uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_mem_reg *mem);
 uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 6904d79..fa5bf45 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -617,7 +617,6 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
 	entry->priority = 0;
 	entry->tv.bo = &vm->root.base.bo->tbo;
 	entry->tv.shared = true;
-	entry->user_pages = NULL;
 	list_add(&entry->tv.head, validated);
 }
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index a3b9339..85be762 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1164,6 +1164,33 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
 
 	retval = 0;
 
+	/* Do init_mqd before dqm_lock(dqm) to avoid circular locking order:
+	 * lock(dqm) -> bo::reserve
+	 */
+	mqd_mgr = dqm->ops.get_mqd_manager(dqm,
+			get_mqd_type_from_queue_type(q->properties.type));
+
+	if (!mqd_mgr) {
+		retval = -ENOMEM;
+		goto out;
+	}
+
+	/*
+	 * Eviction state logic: we only mark active queues as evicted
+	 * to avoid the overhead of restoring inactive queues later
+	 */
+	if (qpd->evicted)
+		q->properties.is_evicted = (q->properties.queue_size > 0 &&
+					    q->properties.queue_percent > 0 &&
+					    q->properties.queue_address != 0);
+	dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
+	q->properties.tba_addr = qpd->tba_addr;
+	q->properties.tma_addr = qpd->tma_addr;
+	retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
+				&q->gart_mqd_addr, &q->properties);
+	if (retval)
+		goto out;
+
 	dqm_lock(dqm);
 
 	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
@@ -1187,30 +1214,6 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
 	if (retval)
 		goto out_deallocate_sdma_queue;
 
-	mqd_mgr = dqm->ops.get_mqd_manager(dqm,
-			get_mqd_type_from_queue_type(q->properties.type));
-
-	if (!mqd_mgr) {
-		retval = -ENOMEM;
-		goto out_deallocate_doorbell;
-	}
-	/*
-	 * Eviction state logic: we only mark active queues as evicted
-	 * to avoid the overhead of restoring inactive queues later
-	 */
-	if (qpd->evicted)
-		q->properties.is_evicted = (q->properties.queue_size > 0 &&
-					    q->properties.queue_percent > 0 &&
-					    q->properties.queue_address != 0);
-
-	dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
-
-	q->properties.tba_addr = qpd->tba_addr;
-	q->properties.tma_addr = qpd->tma_addr;
-	retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
-				&q->gart_mqd_addr, &q->properties);
-	if (retval)
-		goto out_deallocate_doorbell;
 
 	list_add(&q->list, &qpd->queues_list);
 	qpd->queue_count++;
@@ -1234,14 +1237,12 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
 	dqm_unlock(dqm);
 	return retval;
 
-out_deallocate_doorbell:
-	deallocate_doorbell(qpd, q);
 out_deallocate_sdma_queue:
 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
 		deallocate_sdma_queue(dqm, q->sdma_id);
 out_unlock:
 	dqm_unlock(dqm);
-
+out:
 	return retval;
 }
 
@@ -1404,8 +1405,6 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
 			qpd->reset_wavefronts = true;
 	}
 
-	mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
-
 	/*
 	 * Unconditionally decrement this counter, regardless of the queue's
 	 * type
@@ -1416,6 +1415,9 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
 
 	dqm_unlock(dqm);
 
+	/* Do uninit_mqd after dqm_unlock(dqm) to avoid circular locking */
+	mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
+
 	return retval;
 
 failed:
@@ -1637,7 +1639,11 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
 		qpd->reset_wavefronts = false;
 	}
 
-	/* lastly, free mqd resources */
+	dqm_unlock(dqm);
+
+	/* Lastly, free mqd resources.
+	 * Do uninit_mqd() after dqm_unlock to avoid circular locking.
+	 */
 	list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
 		mqd_mgr = dqm->ops.get_mqd_manager(dqm,
 			get_mqd_type_from_queue_type(q->properties.type));
@@ -1651,7 +1657,6 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
 	}
 
 out:
-	dqm_unlock(dqm);
 	return retval;
 }
 
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 1/2] drm/amdgpu: use HMM mirror callback to replace mmu notifier v5
@ 2018-12-03 20:19 Yang, Philip
       [not found] ` <20181203201840.25059-1-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 7+ messages in thread
From: Yang, Philip @ 2018-12-03 20:19 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org; +Cc: Yang, Philip

Replace our MMU notifier with hmm_mirror_ops.sync_cpu_device_pagetables
callback. Enable CONFIG_HMM and CONFIG_HMM_MIRROR as a dependency in
DRM_AMDGPU_USERPTR Kconfig.

It supports both KFD userptr and gfx userptr paths.

The depdent HMM patchsets from Jérôme Glisse are all merged into 4.20.0
kernel now.

Change-Id: Ie62c3c5e3c5b8521ab3b438d1eff2aa2a003835e
Signed-off-by: Philip Yang <Philip.Yang@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/Kconfig     |   6 +-
 drivers/gpu/drm/amd/amdgpu/Makefile    |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 124 +++++++++++--------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h |   2 +-
 4 files changed, 57 insertions(+), 77 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig
index 9221e5489069..960a63355705 100644
--- a/drivers/gpu/drm/amd/amdgpu/Kconfig
+++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
@@ -26,10 +26,10 @@ config DRM_AMDGPU_CIK
 config DRM_AMDGPU_USERPTR
 	bool "Always enable userptr write support"
 	depends on DRM_AMDGPU
-	select MMU_NOTIFIER
+	select HMM_MIRROR
 	help
-	  This option selects CONFIG_MMU_NOTIFIER if it isn't already
-	  selected to enabled full userptr support.
+	  This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it
+	  isn't already selected to enabled full userptr support.
 
 config DRM_AMDGPU_GART_DEBUGFS
 	bool "Allow GART access through debugfs"
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index f76bcb9c45e4..05ca6dc381e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -172,7 +172,7 @@ endif
 amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
 amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
 amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
-amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_mn.o
+amdgpu-$(CONFIG_HMM) += amdgpu_mn.o
 
 include $(FULL_AMD_PATH)/powerplay/Makefile
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index e55508b39496..56595b3d90d2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -45,7 +45,7 @@
 
 #include <linux/firmware.h>
 #include <linux/module.h>
-#include <linux/mmu_notifier.h>
+#include <linux/hmm.h>
 #include <linux/interval_tree.h>
 #include <drm/drmP.h>
 #include <drm/drm.h>
@@ -58,7 +58,6 @@
  *
  * @adev: amdgpu device pointer
  * @mm: process address space
- * @mn: MMU notifier structure
  * @type: type of MMU notifier
  * @work: destruction work item
  * @node: hash table node to find structure by adev and mn
@@ -66,6 +65,7 @@
  * @objects: interval tree containing amdgpu_mn_nodes
  * @read_lock: mutex for recursive locking of @lock
  * @recursion: depth of recursion
+ * @mirror: HMM mirror function support
  *
  * Data for each amdgpu device and process address space.
  */
@@ -73,7 +73,6 @@ struct amdgpu_mn {
 	/* constant after initialisation */
 	struct amdgpu_device	*adev;
 	struct mm_struct	*mm;
-	struct mmu_notifier	mn;
 	enum amdgpu_mn_type	type;
 
 	/* only used on destruction */
@@ -87,6 +86,9 @@ struct amdgpu_mn {
 	struct rb_root_cached	objects;
 	struct mutex		read_lock;
 	atomic_t		recursion;
+
+	/* HMM mirror */
+	struct hmm_mirror	mirror;
 };
 
 /**
@@ -103,7 +105,7 @@ struct amdgpu_mn_node {
 };
 
 /**
- * amdgpu_mn_destroy - destroy the MMU notifier
+ * amdgpu_mn_destroy - destroy the HMM mirror
  *
  * @work: previously sheduled work item
  *
@@ -129,28 +131,26 @@ static void amdgpu_mn_destroy(struct work_struct *work)
 	}
 	up_write(&amn->lock);
 	mutex_unlock(&adev->mn_lock);
-	mmu_notifier_unregister_no_release(&amn->mn, amn->mm);
+
+	hmm_mirror_unregister(&amn->mirror);
 	kfree(amn);
 }
 
 /**
- * amdgpu_mn_release - callback to notify about mm destruction
+ * amdgpu_hmm_mirror_release - callback to notify about mm destruction
  *
- * @mn: our notifier
- * @mm: the mm this callback is about
+ * @mirror: the HMM mirror (mm) this callback is about
  *
- * Shedule a work item to lazy destroy our notifier.
+ * Shedule a work item to lazy destroy HMM mirror.
  */
-static void amdgpu_mn_release(struct mmu_notifier *mn,
-			      struct mm_struct *mm)
+static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror)
 {
-	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
+	struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
 
 	INIT_WORK(&amn->work, amdgpu_mn_destroy);
 	schedule_work(&amn->work);
 }
 
-
 /**
  * amdgpu_mn_lock - take the write side lock for this notifier
  *
@@ -237,21 +237,19 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
 /**
  * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change
  *
- * @mn: our notifier
- * @mm: the mm this callback is about
- * @start: start of updated range
- * @end: end of updated range
+ * @mirror: the hmm_mirror (mm) is about to update
+ * @update: the update start, end address
  *
  * Block for operations on BOs to finish and mark pages as accessed and
  * potentially dirty.
  */
-static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
-						 struct mm_struct *mm,
-						 unsigned long start,
-						 unsigned long end,
-						 bool blockable)
+static int amdgpu_mn_invalidate_range_start_gfx(struct hmm_mirror *mirror,
+			const struct hmm_update *update)
 {
-	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
+	struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
+	unsigned long start = update->start;
+	unsigned long end = update->end;
+	bool blockable = update->blockable;
 	struct interval_tree_node *it;
 
 	/* notification is exclusive, but interval is inclusive */
@@ -278,28 +276,28 @@ static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
 		amdgpu_mn_invalidate_node(node, start, end);
 	}
 
+	amdgpu_mn_read_unlock(amn);
+
 	return 0;
 }
 
 /**
  * amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change
  *
- * @mn: our notifier
- * @mm: the mm this callback is about
- * @start: start of updated range
- * @end: end of updated range
+ * @mirror: the hmm_mirror (mm) is about to update
+ * @update: the update start, end address
  *
  * We temporarily evict all BOs between start and end. This
  * necessitates evicting all user-mode queues of the process. The BOs
  * are restorted in amdgpu_mn_invalidate_range_end_hsa.
  */
-static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
-						 struct mm_struct *mm,
-						 unsigned long start,
-						 unsigned long end,
-						 bool blockable)
+static int amdgpu_mn_invalidate_range_start_hsa(struct hmm_mirror *mirror,
+			const struct hmm_update *update)
 {
-	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
+	struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
+	unsigned long start = update->start;
+	unsigned long end = update->end;
+	bool blockable = update->blockable;
 	struct interval_tree_node *it;
 
 	/* notification is exclusive, but interval is inclusive */
@@ -326,59 +324,41 @@ static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
 
 			if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
 							 start, end))
-				amdgpu_amdkfd_evict_userptr(mem, mm);
+				amdgpu_amdkfd_evict_userptr(mem, amn->mm);
 		}
 	}
 
+	amdgpu_mn_read_unlock(amn);
+
 	return 0;
 }
 
-/**
- * amdgpu_mn_invalidate_range_end - callback to notify about mm change
- *
- * @mn: our notifier
- * @mm: the mm this callback is about
- * @start: start of updated range
- * @end: end of updated range
- *
- * Release the lock again to allow new command submissions.
+/* Low bits of any reasonable mm pointer will be unused due to struct
+ * alignment. Use these bits to make a unique key from the mm pointer
+ * and notifier type.
  */
-static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
-					   struct mm_struct *mm,
-					   unsigned long start,
-					   unsigned long end)
-{
-	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
-
-	amdgpu_mn_read_unlock(amn);
-}
+#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
 
-static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
+static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
 	[AMDGPU_MN_TYPE_GFX] = {
-		.release = amdgpu_mn_release,
-		.invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx,
-		.invalidate_range_end = amdgpu_mn_invalidate_range_end,
+		.sync_cpu_device_pagetables =
+				amdgpu_mn_invalidate_range_start_gfx,
+		.release = amdgpu_hmm_mirror_release
 	},
 	[AMDGPU_MN_TYPE_HSA] = {
-		.release = amdgpu_mn_release,
-		.invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa,
-		.invalidate_range_end = amdgpu_mn_invalidate_range_end,
+		.sync_cpu_device_pagetables =
+				amdgpu_mn_invalidate_range_start_hsa,
+		.release = amdgpu_hmm_mirror_release
 	},
 };
 
-/* Low bits of any reasonable mm pointer will be unused due to struct
- * alignment. Use these bits to make a unique key from the mm pointer
- * and notifier type.
- */
-#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
-
 /**
- * amdgpu_mn_get - create notifier context
+ * amdgpu_mn_get - create HMM mirror context
  *
  * @adev: amdgpu device pointer
  * @type: type of MMU notifier context
  *
- * Creates a notifier context for current->mm.
+ * Creates a HMM mirror context for current->mm.
  */
 struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
 				enum amdgpu_mn_type type)
@@ -408,12 +388,12 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
 	amn->mm = mm;
 	init_rwsem(&amn->lock);
 	amn->type = type;
-	amn->mn.ops = &amdgpu_mn_ops[type];
 	amn->objects = RB_ROOT_CACHED;
 	mutex_init(&amn->read_lock);
 	atomic_set(&amn->recursion, 0);
 
-	r = __mmu_notifier_register(&amn->mn, mm);
+	amn->mirror.ops = &amdgpu_hmm_mirror_ops[type];
+	r = hmm_mirror_register(&amn->mirror, mm);
 	if (r)
 		goto free_amn;
 
@@ -439,7 +419,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
  * @bo: amdgpu buffer object
  * @addr: userptr addr we should monitor
  *
- * Registers an MMU notifier for the given BO at the specified address.
+ * Registers an HMM mirror for the given BO at the specified address.
  * Returns 0 on success, -ERRNO if anything goes wrong.
  */
 int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
@@ -495,11 +475,11 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
 }
 
 /**
- * amdgpu_mn_unregister - unregister a BO for notifier updates
+ * amdgpu_mn_unregister - unregister a BO for HMM mirror updates
  *
  * @bo: amdgpu buffer object
  *
- * Remove any registration of MMU notifier updates from the buffer object.
+ * Remove any registration of HMM mirror updates from the buffer object.
  */
 void amdgpu_mn_unregister(struct amdgpu_bo *bo)
 {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
index eb0f432f78fe..0e2752646e6f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
@@ -34,7 +34,7 @@ enum amdgpu_mn_type {
 	AMDGPU_MN_TYPE_HSA,
 };
 
-#if defined(CONFIG_MMU_NOTIFIER)
+#if defined(CONFIG_HMM)
 void amdgpu_mn_lock(struct amdgpu_mn *mn);
 void amdgpu_mn_unlock(struct amdgpu_mn *mn);
 struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/2] drm/amdgpu: replace get_user_pages with HMM address mirror helpers v2
       [not found] ` <20181203201840.25059-1-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
@ 2018-12-03 20:19   ` Yang, Philip
       [not found]     ` <20181203201840.25059-2-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
  2018-12-04  9:06   ` [PATCH 1/2] drm/amdgpu: use HMM mirror callback to replace mmu notifier v5 Christian König
  1 sibling, 1 reply; 7+ messages in thread
From: Yang, Philip @ 2018-12-03 20:19 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org; +Cc: Yang, Philip

Use HMM helper function hmm_vma_fault() to get physical pages backing
userptr and start CPU page table update track of those pages. Then use
hmm_vma_range_done() to check if those pages are updated before
amdgpu_cs_submit for gfx or before user queues are resumed for kfd.

If userptr pages are updated, for gfx, amdgpu_cs_ioctl will restart
from scratch, for kfd, restore worker is rescheduled to retry.

To avoid circular lock dependency, no nested locking between mmap_sem
and bo::reserve. The locking order is:
bo::reserve -> amdgpu_mn_lock(p->mn)

HMM simplify the CPU page table concurrent update check, so remove
guptasklock, mmu_invalidations, last_set_pages fields from
amdgpu_ttm_tt struct.

HMM does not pin the page (increase page ref count), so remove related
operations like release_pages(), put_page(), mark_page_dirty().

v2:
* Remove nested locking between mmap_sem and bo::reserve
* Change locking order to bo::reserve -> amdgpu_mn_lock()
* Use dynamic allocation to replace VLA in kernel stack

Change-Id: Iffd5f855cc9ce402cdfca167f68f83fe39ac56f9
Signed-off-by: Philip Yang <Philip.Yang@amd.com>
---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 101 ++++++++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c   |   2 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h   |   3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c        | 188 +++++++++---------
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |  14 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c        |  34 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h        |   7 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       | 164 ++++++---------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |   3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        |   1 -
 .../drm/amd/amdkfd/kfd_device_queue_manager.c |  67 ++++---
 11 files changed, 312 insertions(+), 272 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index f3129b912714..5ce6ba24fc72 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -614,8 +614,7 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
 	amdgpu_bo_unreserve(bo);
 
 release_out:
-	if (ret)
-		release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
+	amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
 free_out:
 	kvfree(mem->user_pages);
 	mem->user_pages = NULL;
@@ -677,7 +676,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
 	ctx->kfd_bo.priority = 0;
 	ctx->kfd_bo.tv.bo = &bo->tbo;
 	ctx->kfd_bo.tv.shared = true;
-	ctx->kfd_bo.user_pages = NULL;
 	list_add(&ctx->kfd_bo.tv.head, &ctx->list);
 
 	amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);
@@ -741,7 +739,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
 	ctx->kfd_bo.priority = 0;
 	ctx->kfd_bo.tv.bo = &bo->tbo;
 	ctx->kfd_bo.tv.shared = true;
-	ctx->kfd_bo.user_pages = NULL;
 	list_add(&ctx->kfd_bo.tv.head, &ctx->list);
 
 	i = 0;
@@ -1332,9 +1329,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 	/* Free user pages if necessary */
 	if (mem->user_pages) {
 		pr_debug("%s: Freeing user_pages array\n", __func__);
-		if (mem->user_pages[0])
-			release_pages(mem->user_pages,
-					mem->bo->tbo.ttm->num_pages);
 		kvfree(mem->user_pages);
 	}
 
@@ -1761,8 +1755,6 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
 				       __func__);
 				return -ENOMEM;
 			}
-		} else if (mem->user_pages[0]) {
-			release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
 		}
 
 		/* Get updated user pages */
@@ -1778,12 +1770,6 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
 			 * stalled user mode queues.
 			 */
 		}
-
-		/* Mark the BO as valid unless it was invalidated
-		 * again concurrently
-		 */
-		if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid)
-			return -EAGAIN;
 	}
 
 	return 0;
@@ -1876,14 +1862,10 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
 		}
 
 		/* Validate succeeded, now the BO owns the pages, free
-		 * our copy of the pointer array. Put this BO back on
-		 * the userptr_valid_list. If we need to revalidate
-		 * it, we need to start from scratch.
+		 * our copy of the pointer array.
 		 */
 		kvfree(mem->user_pages);
 		mem->user_pages = NULL;
-		list_move_tail(&mem->validate_list.head,
-			       &process_info->userptr_valid_list);
 
 		/* Update mapping. If the BO was not validated
 		 * (because we couldn't get user pages), this will
@@ -1924,6 +1906,70 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
 	return ret;
 }
 
+/* user_pages_invalidated - if CPU page table is updated after getting user
+ * pages
+ *
+ * HMM mirror callback lock amn is hold to prevent the concurrent CPU
+ * page table update while resuming user queues.
+ *
+ * Returns: true if CPU page table is updated, false otherwise
+ */
+static int user_pages_invalidated(struct mm_struct *mm,
+			struct amdkfd_process_info *process_info,
+			struct amdgpu_mn **amn)
+{
+	struct kgd_mem *mem, *tmp_mem;
+	struct amdgpu_bo *bo;
+	struct amdgpu_device *adev;
+	int invalid, r = 0;
+
+	list_for_each_entry_safe(mem, tmp_mem,
+				 &process_info->userptr_inval_list,
+				 validate_list.head) {
+
+		invalid = atomic_read(&mem->invalid);
+		if (!invalid)
+			/* BO hasn't been invalidated since the last
+			 * revalidation attempt. Keep its BO list.
+			 */
+			continue;
+
+		bo = mem->bo;
+
+		/* Get HMM mirror callback lock */
+		if (!*amn) {
+			adev = amdgpu_ttm_adev(bo->tbo.bdev);
+			*amn = amdgpu_mn_get(mm, adev, AMDGPU_MN_TYPE_HSA);
+			if (IS_ERR(*amn)) {
+				r = true;
+				*amn = NULL;
+				goto out;
+			}
+
+			amdgpu_mn_lock(*amn);
+		}
+
+		r |= amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
+
+		/* Put this BO back on the userptr_valid_list. If we need to
+		 * revalidate it, we need to start from scratch.
+		 */
+		list_move_tail(&mem->validate_list.head,
+			       &process_info->userptr_valid_list);
+
+		/* Mark the BO as valid unless it was invalidated
+		 * again concurrently
+		 */
+		if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid) {
+			r = true;
+			goto out;
+		}
+	}
+
+out:
+	return r;
+}
+
 /* Worker callback to restore evicted userptr BOs
  *
  * Tries to update and validate all userptr BOs. If successful and no
@@ -1939,6 +1985,7 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
 	struct task_struct *usertask;
 	struct mm_struct *mm;
 	int evicted_bos;
+	struct amdgpu_mn *amn = NULL;
 
 	evicted_bos = atomic_read(&process_info->evicted_bos);
 	if (!evicted_bos)
@@ -1977,13 +2024,27 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
 	if (atomic_cmpxchg(&process_info->evicted_bos, evicted_bos, 0) !=
 	    evicted_bos)
 		goto unlock_out;
+
+	/* If CPU page table is updated again after getting user pages,
+	 * schedule to restart the restore process again.
+	 *
+	 * amn is also locked to prevent CPU page table update while resuming
+	 * user queues. amn is unlocked after user queues are resumed.
+	 */
+	if (user_pages_invalidated(mm, process_info, &amn))
+		goto unlock_mn_out;
+
 	evicted_bos = 0;
+
 	if (kgd2kfd->resume_mm(mm)) {
 		pr_err("%s: Failed to resume KFD\n", __func__);
 		/* No recovery from this failure. Probably the CP is
 		 * hanging. No point trying again.
 		 */
 	}
+
+unlock_mn_out:
+	amdgpu_mn_unlock(amn);
 unlock_out:
 	mutex_unlock(&process_info->lock);
 	mmput(mm);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index 14d2982a47cc..2716c241f57e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -201,8 +201,6 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
 
 		if (!bo->parent)
 			list_add_tail(&e->tv.head, &bucket[priority]);
-
-		e->user_pages = NULL;
 	}
 
 	/* Connect the sorted buckets in the output list. */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
index 7c5f5d1601e6..4beab2de09b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
@@ -35,8 +35,7 @@ struct amdgpu_bo_list_entry {
 	struct ttm_validate_buffer	tv;
 	struct amdgpu_bo_va		*bo_va;
 	uint32_t			priority;
-	struct page			**user_pages;
-	int				user_invalidated;
+	bool				user_invalidated;
 };
 
 struct amdgpu_bo_list {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 024dfbd87f11..cee7d398916c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -51,7 +51,6 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
 	p->uf_entry.priority = 0;
 	p->uf_entry.tv.bo = &bo->tbo;
 	p->uf_entry.tv.shared = true;
-	p->uf_entry.user_pages = NULL;
 
 	drm_gem_object_put_unlocked(gobj);
 
@@ -531,24 +530,19 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
 
 	list_for_each_entry(lobj, validated, tv.head) {
 		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(lobj->tv.bo);
-		bool binding_userptr = false;
 		struct mm_struct *usermm;
 
 		usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm);
 		if (usermm && usermm != current->mm)
 			return -EPERM;
 
-		/* Check if we have user pages and nobody bound the BO already */
-		if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&
-		    lobj->user_pages) {
+		if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm) &&
+		    lobj->user_invalidated) {
 			amdgpu_bo_placement_from_domain(bo,
 							AMDGPU_GEM_DOMAIN_CPU);
 			r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 			if (r)
 				return r;
-			amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
-						     lobj->user_pages);
-			binding_userptr = true;
 		}
 
 		if (p->evictable == lobj)
@@ -557,11 +551,6 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
 		r = amdgpu_cs_validate(p, bo);
 		if (r)
 			return r;
-
-		if (binding_userptr) {
-			kvfree(lobj->user_pages);
-			lobj->user_pages = NULL;
-		}
 	}
 	return 0;
 }
@@ -576,7 +565,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 	struct amdgpu_bo *gds;
 	struct amdgpu_bo *gws;
 	struct amdgpu_bo *oa;
-	unsigned tries = 10;
+	struct list_head userpage_list;
 	int r;
 
 	INIT_LIST_HEAD(&p->validated);
@@ -585,7 +574,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 	if (cs->in.bo_list_handle) {
 		if (p->bo_list)
 			return -EINVAL;
-
 		r = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle,
 				       &p->bo_list);
 		if (r)
@@ -600,7 +588,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 
 	amdgpu_bo_list_get_list(p->bo_list, &p->validated);
 	if (p->bo_list->first_userptr != p->bo_list->num_entries)
-		p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX);
+		p->mn = amdgpu_mn_get(current->mm, p->adev,
+					AMDGPU_MN_TYPE_GFX);
 
 	INIT_LIST_HEAD(&duplicates);
 	amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd);
@@ -608,79 +597,72 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 	if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent)
 		list_add(&p->uf_entry.tv.head, &p->validated);
 
-	while (1) {
-		struct list_head need_pages;
+	r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
+				   &duplicates);
+	if (unlikely(r != 0)) {
+		if (r != -ERESTARTSYS)
+			DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
+		goto out;
+	}
 
-		r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
-					   &duplicates);
-		if (unlikely(r != 0)) {
-			if (r != -ERESTARTSYS)
-				DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
-			goto error_free_pages;
-		}
+	INIT_LIST_HEAD(&userpage_list);
+	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
+		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
 
-		INIT_LIST_HEAD(&need_pages);
-		amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
-			struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
+		list_del(&e->tv.head);
+		list_add(&e->tv.head, &userpage_list);
+		amdgpu_bo_unreserve(bo);
+	}
 
-			if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm,
-				 &e->user_invalidated) && e->user_pages) {
+	/* Get userptr backing pages. If pages are updated after registered
+	 * in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do
+	 * amdgpu_ttm_backend_bind() to flush and invalidate new pages
+	 */
+	if (!list_empty(&userpage_list)) {
+		/* Unreserve everything again, to avoid circular locking case
+		 * bo::reserve -> mmap->sem
+		 */
+		ttm_eu_backoff_reservation(&p->ticket, &p->validated);
 
-				/* We acquired a page array, but somebody
-				 * invalidated it. Free it and try again
-				 */
-				release_pages(e->user_pages,
-					      bo->tbo.ttm->num_pages);
-				kvfree(e->user_pages);
-				e->user_pages = NULL;
+		list_for_each_entry(e, &userpage_list, tv.head) {
+			struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
+			bool userpage_invalidated = false;
+			struct page **pages;
+			int i;
+
+			pages = kvmalloc_array(bo->tbo.ttm->num_pages,
+						sizeof(*pages),
+						GFP_KERNEL | __GFP_ZERO);
+			if (!pages) {
+				DRM_ERROR("calloc failure\n");
+				return -ENOMEM;
 			}
 
-			if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&
-			    !e->user_pages) {
-				list_del(&e->tv.head);
-				list_add(&e->tv.head, &need_pages);
-
-				amdgpu_bo_unreserve(bo);
+			r = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, pages);
+			if (r) {
+				kvfree(pages);
+				return r;
 			}
-		}
 
-		if (list_empty(&need_pages))
-			break;
-
-		/* Unreserve everything again. */
-		ttm_eu_backoff_reservation(&p->ticket, &p->validated);
-
-		/* We tried too many times, just abort */
-		if (!--tries) {
-			r = -EDEADLK;
-			DRM_ERROR("deadlock in %s\n", __func__);
-			goto error_free_pages;
+			for (i = 0; i < bo->tbo.ttm->num_pages; i++) {
+				if (bo->tbo.ttm->pages[i] != pages[i]) {
+					bo->tbo.ttm->pages[i] = pages[i];
+					userpage_invalidated = true;
+				}
+			}
+			e->user_invalidated = userpage_invalidated;
+			kvfree(pages);
 		}
 
-		/* Fill the page arrays for all userptrs. */
-		list_for_each_entry(e, &need_pages, tv.head) {
-			struct ttm_tt *ttm = e->tv.bo->ttm;
-
-			e->user_pages = kvmalloc_array(ttm->num_pages,
-							 sizeof(struct page*),
-							 GFP_KERNEL | __GFP_ZERO);
-			if (!e->user_pages) {
-				r = -ENOMEM;
-				DRM_ERROR("calloc failure in %s\n", __func__);
-				goto error_free_pages;
-			}
+		list_splice(&userpage_list, &p->validated);
 
-			r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages);
-			if (r) {
-				DRM_ERROR("amdgpu_ttm_tt_get_user_pages failed.\n");
-				kvfree(e->user_pages);
-				e->user_pages = NULL;
-				goto error_free_pages;
-			}
+		r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
+					   &duplicates);
+		if (unlikely(r != 0)) {
+			if (r != -ERESTARTSYS)
+				DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
+			goto out;
 		}
-
-		/* And try again. */
-		list_splice(&need_pages, &p->validated);
 	}
 
 	amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
@@ -743,17 +725,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 error_validate:
 	if (r)
 		ttm_eu_backoff_reservation(&p->ticket, &p->validated);
-
-error_free_pages:
-
-	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
-		if (!e->user_pages)
-			continue;
-
-		release_pages(e->user_pages, e->tv.bo->ttm->num_pages);
-		kvfree(e->user_pages);
-	}
-
+out:
 	return r;
 }
 
@@ -1206,8 +1178,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 	struct amdgpu_bo_list_entry *e;
 	struct amdgpu_job *job;
 	uint64_t seq;
-
-	int r;
+	int r = 0;
 
 	job = p->job;
 	p->job = NULL;
@@ -1216,15 +1187,21 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 	if (r)
 		goto error_unlock;
 
-	/* No memory allocation is allowed while holding the mn lock */
+	/* No memory allocation is allowed while holding the mn lock.
+	 * p->mn is hold until amdgpu_cs_submit is finished and fence is added
+	 * to BOs.
+	 */
 	amdgpu_mn_lock(p->mn);
+
+	/* If userptr are updated after amdgpu_cs_parser_bos(), restart cs */
 	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
 		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
 
-		if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
-			r = -ERESTARTSYS;
-			goto error_abort;
-		}
+		r |= amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
+	}
+	if (r) {
+		r = -ERESTARTSYS;
+		goto error_abort;
 	}
 
 	job->owner = p->filp;
@@ -1271,14 +1248,20 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 {
 	struct amdgpu_device *adev = dev->dev_private;
-	union drm_amdgpu_cs *cs = data;
-	struct amdgpu_cs_parser parser = {};
-	bool reserved_buffers = false;
+	union drm_amdgpu_cs *cs;
+	struct amdgpu_cs_parser parser;
+	bool reserved_buffers;
+	int tries = 10;
 	int i, r;
 
 	if (!adev->accel_working)
 		return -EBUSY;
 
+restart:
+	memset(&parser, 0, sizeof(parser));
+	cs = data;
+	reserved_buffers = false;
+
 	parser.adev = adev;
 	parser.filp = filp;
 
@@ -1320,6 +1303,15 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 
 out:
 	amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
+
+	if (r == -ERESTARTSYS) {
+		if (!--tries) {
+			DRM_ERROR("Possible deadlock? Retry too many times\n");
+			return -EDEADLK;
+		}
+		goto restart;
+	}
+
 	return r;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 7b3d1ebda9df..ff9a8fdab20f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -336,26 +336,24 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
 
 		r = amdgpu_bo_reserve(bo, true);
 		if (r)
-			goto free_pages;
+			goto user_pages_done;
 
 		amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
 		r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 		amdgpu_bo_unreserve(bo);
 		if (r)
-			goto free_pages;
+			goto user_pages_done;
 	}
 
 	r = drm_gem_handle_create(filp, gobj, &handle);
-	/* drop reference from allocate - handle holds it now */
-	drm_gem_object_put_unlocked(gobj);
 	if (r)
-		return r;
+		goto user_pages_done;
 
 	args->handle = handle;
-	return 0;
 
-free_pages:
-	release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages);
+user_pages_done:
+	if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE)
+		amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
 
 release_object:
 	drm_gem_object_put_unlocked(gobj);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index 56595b3d90d2..6b6becdb725b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -229,8 +229,6 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
 			true, false, MAX_SCHEDULE_TIMEOUT);
 		if (r <= 0)
 			DRM_ERROR("(%ld) failed to wait for user bo\n", r);
-
-		amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm);
 	}
 }
 
@@ -355,15 +353,16 @@ static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
 /**
  * amdgpu_mn_get - create HMM mirror context
  *
+ * @mm: the mm struct
  * @adev: amdgpu device pointer
  * @type: type of MMU notifier context
  *
- * Creates a HMM mirror context for current->mm.
+ * Creates a HMM mirror context for mm.
  */
-struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
+struct amdgpu_mn *amdgpu_mn_get(struct mm_struct *mm,
+				struct amdgpu_device *adev,
 				enum amdgpu_mn_type type)
 {
-	struct mm_struct *mm = current->mm;
 	struct amdgpu_mn *amn;
 	unsigned long key = AMDGPU_MN_KEY(mm, type);
 	int r;
@@ -433,7 +432,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
 	struct list_head bos;
 	struct interval_tree_node *it;
 
-	amn = amdgpu_mn_get(adev, type);
+	amn = amdgpu_mn_get(current->mm, adev, type);
 	if (IS_ERR(amn))
 		return PTR_ERR(amn);
 
@@ -515,3 +514,26 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)
 	mutex_unlock(&adev->mn_lock);
 }
 
+/* flags used by HMM internal, not related to CPU/GPU PTE flags */
+static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = {
+		(1 << 0), /* HMM_PFN_VALID */
+		(1 << 1), /* HMM_PFN_WRITE */
+		0 /* HMM_PFN_DEVICE_PRIVATE */
+};
+
+static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = {
+		0xfffffffffffffffeUL, /* HMM_PFN_ERROR */
+		0, /* HMM_PFN_NONE */
+		0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */
+};
+
+void amdgpu_hmm_init_range(struct hmm_range *range)
+{
+	if (range) {
+		range->flags = hmm_range_flags;
+		range->values = hmm_range_values;
+		range->pfn_shift = PAGE_SHIFT;
+		range->pfns = NULL;
+		INIT_LIST_HEAD(&range->list);
+	}
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
index 0e2752646e6f..59ea30e149bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
@@ -25,9 +25,10 @@
 #define __AMDGPU_MN_H__
 
 /*
- * MMU Notifier
+ * HMM mirror
  */
 struct amdgpu_mn;
+struct hmm_range;
 
 enum amdgpu_mn_type {
 	AMDGPU_MN_TYPE_GFX,
@@ -37,10 +38,12 @@ enum amdgpu_mn_type {
 #if defined(CONFIG_HMM)
 void amdgpu_mn_lock(struct amdgpu_mn *mn);
 void amdgpu_mn_unlock(struct amdgpu_mn *mn);
-struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
+struct amdgpu_mn *amdgpu_mn_get(struct mm_struct *mm,
+				struct amdgpu_device *adev,
 				enum amdgpu_mn_type type);
 int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
 void amdgpu_mn_unregister(struct amdgpu_bo *bo);
+void amdgpu_hmm_init_range(struct hmm_range *range);
 #else
 static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {}
 static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index c91ec3101d00..4a1cb4d15dfb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -43,6 +43,7 @@
 #include <linux/pagemap.h>
 #include <linux/debugfs.h>
 #include <linux/iommu.h>
+#include <linux/hmm.h>
 #include "amdgpu.h"
 #include "amdgpu_object.h"
 #include "amdgpu_trace.h"
@@ -705,11 +706,6 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
 /*
  * TTM backend functions.
  */
-struct amdgpu_ttm_gup_task_list {
-	struct list_head	list;
-	struct task_struct	*task;
-};
-
 struct amdgpu_ttm_tt {
 	struct ttm_dma_tt	ttm;
 	u64			offset;
@@ -718,85 +714,91 @@ struct amdgpu_ttm_tt {
 	uint32_t		userflags;
 	spinlock_t              guptasklock;
 	struct list_head        guptasks;
-	atomic_t		mmu_invalidations;
-	uint32_t		last_set_pages;
+	struct hmm_range	range;
 };
 
 /**
- * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to by a USERPTR
- * pointer to memory
+ * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user
+ * memory and start HMM tracking CPU page table update
  *
- * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos().
- * This provides a wrapper around the get_user_pages() call to provide
- * device accessible pages that back user memory.
+ * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only
+ * once afterwards to stop HMM tracking
  */
 int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
 {
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
 	struct mm_struct *mm = gtt->usertask->mm;
-	unsigned int flags = 0;
-	unsigned pinned = 0;
-	int r;
+	unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
+	struct hmm_range *range = &gtt->range;
+	int r, i;
 
 	if (!mm) /* Happens during process shutdown */
 		return -ESRCH;
 
-	if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
-		flags |= FOLL_WRITE;
+	amdgpu_hmm_init_range(range);
 
 	down_read(&mm->mmap_sem);
 
-	if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
-		/*
-		 * check that we only use anonymous memory to prevent problems
-		 * with writeback
-		 */
-		unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
-		struct vm_area_struct *vma;
+	range->vma = find_vma(mm, gtt->userptr);
+	if (!range->vma || range->vma->vm_file || range->vma->vm_end < end) {
+		r = -EPERM;
+		goto out;
+	}
+	range->pfns = kvmalloc_array(ttm->num_pages, sizeof(uint64_t),
+				GFP_KERNEL | __GFP_ZERO);
+	if (range->pfns == NULL) {
+		r = -ENOMEM;
+		goto out;
+	}
+	range->start = gtt->userptr;
+	range->end = end;
 
-		vma = find_vma(mm, gtt->userptr);
-		if (!vma || vma->vm_file || vma->vm_end < end) {
-			up_read(&mm->mmap_sem);
-			return -EPERM;
-		}
+	for (i = 0; i < ttm->num_pages; i++) {
+		range->pfns[i] = range->flags[HMM_PFN_VALID];
+		range->pfns[i] |= amdgpu_ttm_tt_is_readonly(ttm) ?
+					0 : range->flags[HMM_PFN_WRITE];
 	}
 
-	/* loop enough times using contiguous pages of memory */
-	do {
-		unsigned num_pages = ttm->num_pages - pinned;
-		uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE;
-		struct page **p = pages + pinned;
-		struct amdgpu_ttm_gup_task_list guptask;
+	/* This may triggles page table update */
+	r = hmm_vma_fault(range, true);
+	if (r)
+		goto out_free_pfns;
 
-		guptask.task = current;
-		spin_lock(&gtt->guptasklock);
-		list_add(&guptask.list, &gtt->guptasks);
-		spin_unlock(&gtt->guptasklock);
+	for (i = 0; i < ttm->num_pages; i++)
+		pages[i] = hmm_pfn_to_page(range, range->pfns[i]);
 
-		if (mm == current->mm)
-			r = get_user_pages(userptr, num_pages, flags, p, NULL);
-		else
-			r = get_user_pages_remote(gtt->usertask,
-					mm, userptr, num_pages,
-					flags, p, NULL, NULL);
+	up_read(&mm->mmap_sem);
+	return 0;
 
-		spin_lock(&gtt->guptasklock);
-		list_del(&guptask.list);
-		spin_unlock(&gtt->guptasklock);
+out_free_pfns:
+	kvfree(range->pfns);
+	range->pfns = NULL;
+out:
+	up_read(&mm->mmap_sem);
+	return r;
+}
 
-		if (r < 0)
-			goto release_pages;
+/**
+ * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change
+ * Check if the pages backing this ttm range have been invalidated
+ *
+ * Returns: true if pages are invalidated since the last time they've been set
+ */
+bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
+{
+	struct amdgpu_ttm_tt *gtt = (void *)ttm;
+	int r;
 
-		pinned += r;
+	if (gtt == NULL || !gtt->userptr)
+		return false;
 
-	} while (pinned < ttm->num_pages);
+	r = !hmm_vma_range_done(&gtt->range);
 
-	up_read(&mm->mmap_sem);
-	return 0;
+	if (gtt->range.pfns) {
+		kvfree(gtt->range.pfns);
+		gtt->range.pfns = NULL;
+	}
 
-release_pages:
-	release_pages(pages, pinned);
-	up_read(&mm->mmap_sem);
 	return r;
 }
 
@@ -809,16 +811,10 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
  */
 void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
 {
-	struct amdgpu_ttm_tt *gtt = (void *)ttm;
 	unsigned i;
 
-	gtt->last_set_pages = atomic_read(&gtt->mmu_invalidations);
-	for (i = 0; i < ttm->num_pages; ++i) {
-		if (ttm->pages[i])
-			put_page(ttm->pages[i]);
-
+	for (i = 0; i < ttm->num_pages; ++i)
 		ttm->pages[i] = pages ? pages[i] : NULL;
-	}
 }
 
 /**
@@ -903,9 +899,6 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
 	/* unmap the pages mapped to the device */
 	dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
 
-	/* mark the pages as dirty */
-	amdgpu_ttm_tt_mark_user_pages(ttm);
-
 	sg_free_table(ttm->sg);
 }
 
@@ -1258,8 +1251,6 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
 
 	spin_lock_init(&gtt->guptasklock);
 	INIT_LIST_HEAD(&gtt->guptasks);
-	atomic_set(&gtt->mmu_invalidations, 0);
-	gtt->last_set_pages = 0;
 
 	return 0;
 }
@@ -1289,7 +1280,6 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
 				  unsigned long end)
 {
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
-	struct amdgpu_ttm_gup_task_list *entry;
 	unsigned long size;
 
 	if (gtt == NULL || !gtt->userptr)
@@ -1302,48 +1292,20 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
 	if (gtt->userptr > end || gtt->userptr + size <= start)
 		return false;
 
-	/* Search the lists of tasks that hold this mapping and see
-	 * if current is one of them.  If it is return false.
-	 */
-	spin_lock(&gtt->guptasklock);
-	list_for_each_entry(entry, &gtt->guptasks, list) {
-		if (entry->task == current) {
-			spin_unlock(&gtt->guptasklock);
-			return false;
-		}
-	}
-	spin_unlock(&gtt->guptasklock);
-
-	atomic_inc(&gtt->mmu_invalidations);
-
 	return true;
 }
 
 /**
- * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been invalidated?
+ * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr?
  */
-bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
-				       int *last_invalidated)
-{
-	struct amdgpu_ttm_tt *gtt = (void *)ttm;
-	int prev_invalidated = *last_invalidated;
-
-	*last_invalidated = atomic_read(&gtt->mmu_invalidations);
-	return prev_invalidated != *last_invalidated;
-}
-
-/**
- * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this ttm_tt object
- * been invalidated since the last time they've been set?
- */
-bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm)
+bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm)
 {
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
 
 	if (gtt == NULL || !gtt->userptr)
 		return false;
 
-	return atomic_read(&gtt->mmu_invalidations) != gtt->last_set_pages;
+	return true;
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index b5b2d101f7db..8988c87fff9d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -102,6 +102,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
 int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
 
 int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages);
+bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm);
 void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages);
 void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm);
 int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
@@ -112,7 +113,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
 				  unsigned long end);
 bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
 				       int *last_invalidated);
-bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm);
+bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm);
 bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);
 uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_mem_reg *mem);
 uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 2acb9838913e..4a67a88acea1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -618,7 +618,6 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
 	entry->priority = 0;
 	entry->tv.bo = &vm->root.base.bo->tbo;
 	entry->tv.shared = true;
-	entry->user_pages = NULL;
 	list_add(&entry->tv.head, validated);
 }
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 8372556b52eb..fe120cc0930c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1158,6 +1158,33 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
 
 	retval = 0;
 
+	/* Do init_mqd before dqm_lock(dqm) to avoid circular locking order:
+	 * lock(dqm) -> bo::reserve
+	 */
+	mqd_mgr = dqm->ops.get_mqd_manager(dqm,
+			get_mqd_type_from_queue_type(q->properties.type));
+
+	if (!mqd_mgr) {
+		retval = -ENOMEM;
+		goto out;
+	}
+
+	/*
+	 * Eviction state logic: we only mark active queues as evicted
+	 * to avoid the overhead of restoring inactive queues later
+	 */
+	if (qpd->evicted)
+		q->properties.is_evicted = (q->properties.queue_size > 0 &&
+					    q->properties.queue_percent > 0 &&
+					    q->properties.queue_address != 0);
+	dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
+	q->properties.tba_addr = qpd->tba_addr;
+	q->properties.tma_addr = qpd->tma_addr;
+	retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
+				&q->gart_mqd_addr, &q->properties);
+	if (retval)
+		goto out;
+
 	dqm_lock(dqm);
 
 	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
@@ -1181,30 +1208,6 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
 	if (retval)
 		goto out_deallocate_sdma_queue;
 
-	mqd_mgr = dqm->ops.get_mqd_manager(dqm,
-			get_mqd_type_from_queue_type(q->properties.type));
-
-	if (!mqd_mgr) {
-		retval = -ENOMEM;
-		goto out_deallocate_doorbell;
-	}
-	/*
-	 * Eviction state logic: we only mark active queues as evicted
-	 * to avoid the overhead of restoring inactive queues later
-	 */
-	if (qpd->evicted)
-		q->properties.is_evicted = (q->properties.queue_size > 0 &&
-					    q->properties.queue_percent > 0 &&
-					    q->properties.queue_address != 0);
-
-	dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
-
-	q->properties.tba_addr = qpd->tba_addr;
-	q->properties.tma_addr = qpd->tma_addr;
-	retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
-				&q->gart_mqd_addr, &q->properties);
-	if (retval)
-		goto out_deallocate_doorbell;
 
 	list_add(&q->list, &qpd->queues_list);
 	qpd->queue_count++;
@@ -1228,14 +1231,12 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
 	dqm_unlock(dqm);
 	return retval;
 
-out_deallocate_doorbell:
-	deallocate_doorbell(qpd, q);
 out_deallocate_sdma_queue:
 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
 		deallocate_sdma_queue(dqm, q->sdma_id);
 out_unlock:
 	dqm_unlock(dqm);
-
+out:
 	return retval;
 }
 
@@ -1398,8 +1399,6 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
 			qpd->reset_wavefronts = true;
 	}
 
-	mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
-
 	/*
 	 * Unconditionally decrement this counter, regardless of the queue's
 	 * type
@@ -1410,6 +1409,9 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
 
 	dqm_unlock(dqm);
 
+	/* Do uninit_mqd after dqm_unlock(dqm) to avoid circular locking */
+	mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
+
 	return retval;
 
 failed:
@@ -1631,7 +1633,11 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
 		qpd->reset_wavefronts = false;
 	}
 
-	/* lastly, free mqd resources */
+	dqm_unlock(dqm);
+
+	/* Lastly, free mqd resources.
+	 * Do uninit_mqd() after dqm_unlock to avoid circular locking.
+	 */
 	list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
 		mqd_mgr = dqm->ops.get_mqd_manager(dqm,
 			get_mqd_type_from_queue_type(q->properties.type));
@@ -1645,7 +1651,6 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
 	}
 
 out:
-	dqm_unlock(dqm);
 	return retval;
 }
 
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: replace get_user_pages with HMM address mirror helpers v2
       [not found]     ` <20181203201840.25059-2-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
@ 2018-12-04  1:52       ` Kuehling, Felix
       [not found]         ` <f95aee13-7579-6f1b-c6a4-887802140702-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 7+ messages in thread
From: Kuehling, Felix @ 2018-12-04  1:52 UTC (permalink / raw)
  To: Yang, Philip,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org

See comments inline. I didn't review the amdgpu_cs and amdgpu_gem parts
as I don't know them very well.

On 2018-12-03 3:19 p.m., Yang, Philip wrote:
> Use HMM helper function hmm_vma_fault() to get physical pages backing
> userptr and start CPU page table update track of those pages. Then use
> hmm_vma_range_done() to check if those pages are updated before
> amdgpu_cs_submit for gfx or before user queues are resumed for kfd.
>
> If userptr pages are updated, for gfx, amdgpu_cs_ioctl will restart
> from scratch, for kfd, restore worker is rescheduled to retry.
>
> To avoid circular lock dependency, no nested locking between mmap_sem
> and bo::reserve. The locking order is:
> bo::reserve -> amdgpu_mn_lock(p->mn)
>
> HMM simplify the CPU page table concurrent update check, so remove
> guptasklock, mmu_invalidations, last_set_pages fields from
> amdgpu_ttm_tt struct.
>
> HMM does not pin the page (increase page ref count), so remove related
> operations like release_pages(), put_page(), mark_page_dirty().
>
> v2:
> * Remove nested locking between mmap_sem and bo::reserve
> * Change locking order to bo::reserve -> amdgpu_mn_lock()
> * Use dynamic allocation to replace VLA in kernel stack
>
> Change-Id: Iffd5f855cc9ce402cdfca167f68f83fe39ac56f9
> Signed-off-by: Philip Yang <Philip.Yang@amd.com>
> ---
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 101 ++++++++--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c   |   2 -
>  drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h   |   3 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c        | 188 +++++++++---------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |  14 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c        |  34 +++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h        |   7 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       | 164 ++++++---------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |   3 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        |   1 -
>  .../drm/amd/amdkfd/kfd_device_queue_manager.c |  67 ++++---
>  11 files changed, 312 insertions(+), 272 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index f3129b912714..5ce6ba24fc72 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -614,8 +614,7 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
>  	amdgpu_bo_unreserve(bo);
>  
>  release_out:
> -	if (ret)
> -		release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
> +	amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
>  free_out:
>  	kvfree(mem->user_pages);
>  	mem->user_pages = NULL;
> @@ -677,7 +676,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
>  	ctx->kfd_bo.priority = 0;
>  	ctx->kfd_bo.tv.bo = &bo->tbo;
>  	ctx->kfd_bo.tv.shared = true;
> -	ctx->kfd_bo.user_pages = NULL;
>  	list_add(&ctx->kfd_bo.tv.head, &ctx->list);
>  
>  	amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);
> @@ -741,7 +739,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
>  	ctx->kfd_bo.priority = 0;
>  	ctx->kfd_bo.tv.bo = &bo->tbo;
>  	ctx->kfd_bo.tv.shared = true;
> -	ctx->kfd_bo.user_pages = NULL;
>  	list_add(&ctx->kfd_bo.tv.head, &ctx->list);
>  
>  	i = 0;
> @@ -1332,9 +1329,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
>  	/* Free user pages if necessary */
>  	if (mem->user_pages) {
>  		pr_debug("%s: Freeing user_pages array\n", __func__);
> -		if (mem->user_pages[0])
> -			release_pages(mem->user_pages,
> -					mem->bo->tbo.ttm->num_pages);
>  		kvfree(mem->user_pages);
>  	}
>  
> @@ -1761,8 +1755,6 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
>  				       __func__);
>  				return -ENOMEM;
>  			}
> -		} else if (mem->user_pages[0]) {
> -			release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
>  		}
>  
>  		/* Get updated user pages */
> @@ -1778,12 +1770,6 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
>  			 * stalled user mode queues.
>  			 */
>  		}
> -
> -		/* Mark the BO as valid unless it was invalidated
> -		 * again concurrently
> -		 */
> -		if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid)
> -			return -EAGAIN;
>  	}
>  
>  	return 0;
> @@ -1876,14 +1862,10 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
>  		}
>  
>  		/* Validate succeeded, now the BO owns the pages, free
> -		 * our copy of the pointer array. Put this BO back on
> -		 * the userptr_valid_list. If we need to revalidate
> -		 * it, we need to start from scratch.
> +		 * our copy of the pointer array.
>  		 */
>  		kvfree(mem->user_pages);
>  		mem->user_pages = NULL;
> -		list_move_tail(&mem->validate_list.head,
> -			       &process_info->userptr_valid_list);
>  
>  		/* Update mapping. If the BO was not validated
>  		 * (because we couldn't get user pages), this will
> @@ -1924,6 +1906,70 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
>  	return ret;
>  }
>  
> +/* user_pages_invalidated - if CPU page table is updated after getting user
> + * pages
> + *
> + * HMM mirror callback lock amn is hold to prevent the concurrent CPU
> + * page table update while resuming user queues.
> + *
> + * Returns: true if CPU page table is updated, false otherwise
> + */
> +static int user_pages_invalidated(struct mm_struct *mm,
> +			struct amdkfd_process_info *process_info,
> +			struct amdgpu_mn **amn)
> +{
> +	struct kgd_mem *mem, *tmp_mem;
> +	struct amdgpu_bo *bo;
> +	struct amdgpu_device *adev;
> +	int invalid, r = 0;
> +
> +	list_for_each_entry_safe(mem, tmp_mem,
> +				 &process_info->userptr_inval_list,
> +				 validate_list.head) {
> +
> +		invalid = atomic_read(&mem->invalid);
> +		if (!invalid)
> +			/* BO hasn't been invalidated since the last
> +			 * revalidation attempt. Keep its BO list.
> +			 */
> +			continue;
> +
> +		bo = mem->bo;
> +
> +		/* Get HMM mirror callback lock */
> +		if (!*amn) {
> +			adev = amdgpu_ttm_adev(bo->tbo.bdev);
> +			*amn = amdgpu_mn_get(mm, adev, AMDGPU_MN_TYPE_HSA);
> +			if (IS_ERR(*amn)) {
> +				r = true;
> +				*amn = NULL;
> +				goto out;
> +			}
> +
> +			amdgpu_mn_lock(*amn);

There can be multiple GPUs involved here. So you'd need more than one
amn lock. I think you're trying to use the amn lock to serialize page
table updates. But that's probably not the right lock. What we should be
using for this is the page directory reservation locks. We're already
taking them for all affected GPUs in validate_invalid_user_pages. And
that's where you should also be calling amdgpu_ttm_tt_get_user_pages_done.


> +		}
> +
> +		r |= amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);

This needs to be called while holding the "driver lock that serializes
device page table updates". I think in our case that's the page
directory reservation. You're not holding that here.


> +
> +		/* Put this BO back on the userptr_valid_list. If we need to
> +		 * revalidate it, we need to start from scratch.
> +		 */
> +		list_move_tail(&mem->validate_list.head,
> +			       &process_info->userptr_valid_list);

Doing this unconditionally here looks wrong. Also, this was already done
in validate_invalid_user_pages. So I'm wondering what's the point of
this function. In most cases the process_info->userptr_inval_list will
be empty here to begin with.

I think you should probably call amdgpu_ttm_tt_get_user_pages_done in
validate_invalid_user_pages while holding the PD reservation lock and
where you call amdgpu_ttm_tt_set_user_pages.


> +
> +		/* Mark the BO as valid unless it was invalidated
> +		 * again concurrently
> +		 */
> +		if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid) {
> +			r = true;
> +			goto out;
> +		}
> +	}
> +
> +out:
> +	return r;
> +}
> +
>  /* Worker callback to restore evicted userptr BOs
>   *
>   * Tries to update and validate all userptr BOs. If successful and no
> @@ -1939,6 +1985,7 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
>  	struct task_struct *usertask;
>  	struct mm_struct *mm;
>  	int evicted_bos;
> +	struct amdgpu_mn *amn = NULL;
>  
>  	evicted_bos = atomic_read(&process_info->evicted_bos);
>  	if (!evicted_bos)
> @@ -1977,13 +2024,27 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
>  	if (atomic_cmpxchg(&process_info->evicted_bos, evicted_bos, 0) !=
>  	    evicted_bos)
>  		goto unlock_out;
> +
> +	/* If CPU page table is updated again after getting user pages,
> +	 * schedule to restart the restore process again.
> +	 *
> +	 * amn is also locked to prevent CPU page table update while resuming
> +	 * user queues. amn is unlocked after user queues are resumed.
> +	 */
> +	if (user_pages_invalidated(mm, process_info, &amn))
> +		goto unlock_mn_out;
> +
>  	evicted_bos = 0;
> +
>  	if (kgd2kfd->resume_mm(mm)) {
>  		pr_err("%s: Failed to resume KFD\n", __func__);
>  		/* No recovery from this failure. Probably the CP is
>  		 * hanging. No point trying again.
>  		 */
>  	}
> +
> +unlock_mn_out:
> +	amdgpu_mn_unlock(amn);
>  unlock_out:
>  	mutex_unlock(&process_info->lock);
>  	mmput(mm);

[snip]

> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> index 56595b3d90d2..6b6becdb725b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> @@ -229,8 +229,6 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
>  			true, false, MAX_SCHEDULE_TIMEOUT);
>  		if (r <= 0)
>  			DRM_ERROR("(%ld) failed to wait for user bo\n", r);
> -
> -		amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm);
>  	}
>  }
>  
> @@ -355,15 +353,16 @@ static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
>  /**
>   * amdgpu_mn_get - create HMM mirror context
>   *
> + * @mm: the mm struct
>   * @adev: amdgpu device pointer
>   * @type: type of MMU notifier context
>   *
> - * Creates a HMM mirror context for current->mm.
> + * Creates a HMM mirror context for mm.
>   */
> -struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
> +struct amdgpu_mn *amdgpu_mn_get(struct mm_struct *mm,
> +				struct amdgpu_device *adev,
>  				enum amdgpu_mn_type type)
>  {
> -	struct mm_struct *mm = current->mm;
>  	struct amdgpu_mn *amn;
>  	unsigned long key = AMDGPU_MN_KEY(mm, type);
>  	int r;
> @@ -433,7 +432,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
>  	struct list_head bos;
>  	struct interval_tree_node *it;
>  
> -	amn = amdgpu_mn_get(adev, type);
> +	amn = amdgpu_mn_get(current->mm, adev, type);
>  	if (IS_ERR(amn))
>  		return PTR_ERR(amn);
>  
> @@ -515,3 +514,26 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)
>  	mutex_unlock(&adev->mn_lock);
>  }
>  
> +/* flags used by HMM internal, not related to CPU/GPU PTE flags */
> +static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = {
> +		(1 << 0), /* HMM_PFN_VALID */
> +		(1 << 1), /* HMM_PFN_WRITE */
> +		0 /* HMM_PFN_DEVICE_PRIVATE */
> +};
> +
> +static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = {
> +		0xfffffffffffffffeUL, /* HMM_PFN_ERROR */
> +		0, /* HMM_PFN_NONE */
> +		0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */
> +};
> +
> +void amdgpu_hmm_init_range(struct hmm_range *range)
> +{
> +	if (range) {
> +		range->flags = hmm_range_flags;
> +		range->values = hmm_range_values;
> +		range->pfn_shift = PAGE_SHIFT;
> +		range->pfns = NULL;
> +		INIT_LIST_HEAD(&range->list);
> +	}
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
> index 0e2752646e6f..59ea30e149bd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
> @@ -25,9 +25,10 @@
>  #define __AMDGPU_MN_H__
>  
>  /*
> - * MMU Notifier
> + * HMM mirror
>   */
>  struct amdgpu_mn;
> +struct hmm_range;
>  
>  enum amdgpu_mn_type {
>  	AMDGPU_MN_TYPE_GFX,
> @@ -37,10 +38,12 @@ enum amdgpu_mn_type {
>  #if defined(CONFIG_HMM)
>  void amdgpu_mn_lock(struct amdgpu_mn *mn);
>  void amdgpu_mn_unlock(struct amdgpu_mn *mn);
> -struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
> +struct amdgpu_mn *amdgpu_mn_get(struct mm_struct *mm,
> +				struct amdgpu_device *adev,
>  				enum amdgpu_mn_type type);
>  int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
>  void amdgpu_mn_unregister(struct amdgpu_bo *bo);
> +void amdgpu_hmm_init_range(struct hmm_range *range);
>  #else
>  static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {}
>  static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index c91ec3101d00..4a1cb4d15dfb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -43,6 +43,7 @@
>  #include <linux/pagemap.h>
>  #include <linux/debugfs.h>
>  #include <linux/iommu.h>
> +#include <linux/hmm.h>
>  #include "amdgpu.h"
>  #include "amdgpu_object.h"
>  #include "amdgpu_trace.h"
> @@ -705,11 +706,6 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
>  /*
>   * TTM backend functions.
>   */
> -struct amdgpu_ttm_gup_task_list {
> -	struct list_head	list;
> -	struct task_struct	*task;
> -};
> -
>  struct amdgpu_ttm_tt {
>  	struct ttm_dma_tt	ttm;
>  	u64			offset;
> @@ -718,85 +714,91 @@ struct amdgpu_ttm_tt {
>  	uint32_t		userflags;
>  	spinlock_t              guptasklock;
>  	struct list_head        guptasks;
> -	atomic_t		mmu_invalidations;
> -	uint32_t		last_set_pages;
> +	struct hmm_range	range;
>  };
>  
>  /**
> - * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to by a USERPTR
> - * pointer to memory
> + * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user
> + * memory and start HMM tracking CPU page table update
>   *
> - * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos().
> - * This provides a wrapper around the get_user_pages() call to provide
> - * device accessible pages that back user memory.
> + * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only
> + * once afterwards to stop HMM tracking
>   */
>  int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
>  {
>  	struct amdgpu_ttm_tt *gtt = (void *)ttm;
>  	struct mm_struct *mm = gtt->usertask->mm;
> -	unsigned int flags = 0;
> -	unsigned pinned = 0;
> -	int r;
> +	unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
> +	struct hmm_range *range = &gtt->range;
> +	int r, i;
>  
>  	if (!mm) /* Happens during process shutdown */
>  		return -ESRCH;
>  
> -	if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
> -		flags |= FOLL_WRITE;
> +	amdgpu_hmm_init_range(range);
>  
>  	down_read(&mm->mmap_sem);
>  
> -	if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
> -		/*
> -		 * check that we only use anonymous memory to prevent problems
> -		 * with writeback
> -		 */
> -		unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
> -		struct vm_area_struct *vma;
> +	range->vma = find_vma(mm, gtt->userptr);
> +	if (!range->vma || range->vma->vm_file || range->vma->vm_end < end) {

I think this breaks file-backed mappings and also adds a new limitation
that the address range has to be a single VMA. This was previously only
required it gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY.


> +		r = -EPERM;
> +		goto out;
> +	}
> +	range->pfns = kvmalloc_array(ttm->num_pages, sizeof(uint64_t),
> +				GFP_KERNEL | __GFP_ZERO);

Is GFP_ZERO really needed here? The loop below initializes all pfns
explicitly.


> +	if (range->pfns == NULL) {
> +		r = -ENOMEM;
> +		goto out;
> +	}
> +	range->start = gtt->userptr;
> +	range->end = end;
>  
> -		vma = find_vma(mm, gtt->userptr);
> -		if (!vma || vma->vm_file || vma->vm_end < end) {
> -			up_read(&mm->mmap_sem);
> -			return -EPERM;
> -		}
> +	for (i = 0; i < ttm->num_pages; i++) {
> +		range->pfns[i] = range->flags[HMM_PFN_VALID];
> +		range->pfns[i] |= amdgpu_ttm_tt_is_readonly(ttm) ?
> +					0 : range->flags[HMM_PFN_WRITE];
>  	}
>  
> -	/* loop enough times using contiguous pages of memory */
> -	do {
> -		unsigned num_pages = ttm->num_pages - pinned;
> -		uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE;
> -		struct page **p = pages + pinned;
> -		struct amdgpu_ttm_gup_task_list guptask;
> +	/* This may triggles page table update */

Do you mean "trigger"?


> +	r = hmm_vma_fault(range, true);
> +	if (r)
> +		goto out_free_pfns;
>  
> -		guptask.task = current;
> -		spin_lock(&gtt->guptasklock);
> -		list_add(&guptask.list, &gtt->guptasks);
> -		spin_unlock(&gtt->guptasklock);
> +	for (i = 0; i < ttm->num_pages; i++)
> +		pages[i] = hmm_pfn_to_page(range, range->pfns[i]);
>  
> -		if (mm == current->mm)
> -			r = get_user_pages(userptr, num_pages, flags, p, NULL);
> -		else
> -			r = get_user_pages_remote(gtt->usertask,
> -					mm, userptr, num_pages,
> -					flags, p, NULL, NULL);
> +	up_read(&mm->mmap_sem);

You can probably up_read before the loop.

Also, why do you defer freeing of range->pfns to
amdgpu_ttm_tt_get_user_pages_done? The array isn't used for anything
else later, so you could free it here.

Or instead, don't free it and use it to replace the pages array. It
seems wasteful to maintain two pages arrays and constantly allocate and
deallocate one of them.


> +	return 0;
>  
> -		spin_lock(&gtt->guptasklock);
> -		list_del(&guptask.list);
> -		spin_unlock(&gtt->guptasklock);
> +out_free_pfns:
> +	kvfree(range->pfns);
> +	range->pfns = NULL;

Setting range->pfns = NULL is not needed. This is a local variable
that's about to go out of scope anyway.


> +out:
> +	up_read(&mm->mmap_sem);
> +	return r;
> +}
>  
> -		if (r < 0)
> -			goto release_pages;
> +/**
> + * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change
> + * Check if the pages backing this ttm range have been invalidated
> + *
> + * Returns: true if pages are invalidated since the last time they've been set
> + */
> +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
> +{
> +	struct amdgpu_ttm_tt *gtt = (void *)ttm;
> +	int r;
>  
> -		pinned += r;
> +	if (gtt == NULL || !gtt->userptr)
> +		return false;
>  
> -	} while (pinned < ttm->num_pages);
> +	r = !hmm_vma_range_done(&gtt->range);
>  
> -	up_read(&mm->mmap_sem);
> -	return 0;
> +	if (gtt->range.pfns) {
> +		kvfree(gtt->range.pfns);
> +		gtt->range.pfns = NULL;
> +	}
>  
> -release_pages:
> -	release_pages(pages, pinned);
> -	up_read(&mm->mmap_sem);
>  	return r;
>  }
>  
> @@ -809,16 +811,10 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
>   */
>  void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)

Does this function still make sense? It just copies a pages array. Could
it instead use gtt->range->pfns directly? And why does this still have
to be separate from amdgpu_ttm_tt_get_user_pages? I think originally it
was separate because we needed to keep track of the original pages for
unpinning them. But we're no longer pinning and unpinning pages.


>  {
> -	struct amdgpu_ttm_tt *gtt = (void *)ttm;
>  	unsigned i;
>  
> -	gtt->last_set_pages = atomic_read(&gtt->mmu_invalidations);
> -	for (i = 0; i < ttm->num_pages; ++i) {
> -		if (ttm->pages[i])
> -			put_page(ttm->pages[i]);
> -
> +	for (i = 0; i < ttm->num_pages; ++i)
>  		ttm->pages[i] = pages ? pages[i] : NULL;

If this is still needed, it could just be a memcpy now.


> -	}
>  }
>  
>  /**
> @@ -903,9 +899,6 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
>  	/* unmap the pages mapped to the device */
>  	dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
>  
> -	/* mark the pages as dirty */
> -	amdgpu_ttm_tt_mark_user_pages(ttm);
> -
>  	sg_free_table(ttm->sg);
>  }
>  
> @@ -1258,8 +1251,6 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
>  
>  	spin_lock_init(&gtt->guptasklock);
>  	INIT_LIST_HEAD(&gtt->guptasks);
> -	atomic_set(&gtt->mmu_invalidations, 0);
> -	gtt->last_set_pages = 0;
>  
>  	return 0;
>  }
> @@ -1289,7 +1280,6 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
>  				  unsigned long end)
>  {
>  	struct amdgpu_ttm_tt *gtt = (void *)ttm;
> -	struct amdgpu_ttm_gup_task_list *entry;
>  	unsigned long size;
>  
>  	if (gtt == NULL || !gtt->userptr)
> @@ -1302,48 +1292,20 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
>  	if (gtt->userptr > end || gtt->userptr + size <= start)
>  		return false;
>  
> -	/* Search the lists of tasks that hold this mapping and see
> -	 * if current is one of them.  If it is return false.
> -	 */
> -	spin_lock(&gtt->guptasklock);
> -	list_for_each_entry(entry, &gtt->guptasks, list) {
> -		if (entry->task == current) {
> -			spin_unlock(&gtt->guptasklock);
> -			return false;
> -		}
> -	}
> -	spin_unlock(&gtt->guptasklock);
> -
> -	atomic_inc(&gtt->mmu_invalidations);
> -
>  	return true;
>  }
>  
>  /**
> - * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been invalidated?
> + * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr?
>   */
> -bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
> -				       int *last_invalidated)
> -{
> -	struct amdgpu_ttm_tt *gtt = (void *)ttm;
> -	int prev_invalidated = *last_invalidated;
> -
> -	*last_invalidated = atomic_read(&gtt->mmu_invalidations);
> -	return prev_invalidated != *last_invalidated;
> -}
> -
> -/**
> - * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this ttm_tt object
> - * been invalidated since the last time they've been set?
> - */
> -bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm)
> +bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm)
>  {
>  	struct amdgpu_ttm_tt *gtt = (void *)ttm;
>  
>  	if (gtt == NULL || !gtt->userptr)
>  		return false;
>  
> -	return atomic_read(&gtt->mmu_invalidations) != gtt->last_set_pages;
> +	return true;
>  }
>  
>  /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> index b5b2d101f7db..8988c87fff9d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> @@ -102,6 +102,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
>  int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
>  
>  int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages);
> +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm);
>  void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages);
>  void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm);
>  int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
> @@ -112,7 +113,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
>  				  unsigned long end);
>  bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
>  				       int *last_invalidated);
> -bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm);
> +bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm);
>  bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);
>  uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_mem_reg *mem);
>  uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 2acb9838913e..4a67a88acea1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -618,7 +618,6 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
>  	entry->priority = 0;
>  	entry->tv.bo = &vm->root.base.bo->tbo;
>  	entry->tv.shared = true;
> -	entry->user_pages = NULL;
>  	list_add(&entry->tv.head, validated);
>  }
>  
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index 8372556b52eb..fe120cc0930c 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c

Please make everything below a separate commit.

Regards,
  Felix


> @@ -1158,6 +1158,33 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
>  
>  	retval = 0;
>  
> +	/* Do init_mqd before dqm_lock(dqm) to avoid circular locking order:
> +	 * lock(dqm) -> bo::reserve
> +	 */
> +	mqd_mgr = dqm->ops.get_mqd_manager(dqm,
> +			get_mqd_type_from_queue_type(q->properties.type));
> +
> +	if (!mqd_mgr) {
> +		retval = -ENOMEM;
> +		goto out;
> +	}
> +
> +	/*
> +	 * Eviction state logic: we only mark active queues as evicted
> +	 * to avoid the overhead of restoring inactive queues later
> +	 */
> +	if (qpd->evicted)
> +		q->properties.is_evicted = (q->properties.queue_size > 0 &&
> +					    q->properties.queue_percent > 0 &&
> +					    q->properties.queue_address != 0);
> +	dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
> +	q->properties.tba_addr = qpd->tba_addr;
> +	q->properties.tma_addr = qpd->tma_addr;
> +	retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
> +				&q->gart_mqd_addr, &q->properties);
> +	if (retval)
> +		goto out;
> +
>  	dqm_lock(dqm);
>  
>  	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
> @@ -1181,30 +1208,6 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
>  	if (retval)
>  		goto out_deallocate_sdma_queue;
>  
> -	mqd_mgr = dqm->ops.get_mqd_manager(dqm,
> -			get_mqd_type_from_queue_type(q->properties.type));
> -
> -	if (!mqd_mgr) {
> -		retval = -ENOMEM;
> -		goto out_deallocate_doorbell;
> -	}
> -	/*
> -	 * Eviction state logic: we only mark active queues as evicted
> -	 * to avoid the overhead of restoring inactive queues later
> -	 */
> -	if (qpd->evicted)
> -		q->properties.is_evicted = (q->properties.queue_size > 0 &&
> -					    q->properties.queue_percent > 0 &&
> -					    q->properties.queue_address != 0);
> -
> -	dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
> -
> -	q->properties.tba_addr = qpd->tba_addr;
> -	q->properties.tma_addr = qpd->tma_addr;
> -	retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
> -				&q->gart_mqd_addr, &q->properties);
> -	if (retval)
> -		goto out_deallocate_doorbell;
>  
>  	list_add(&q->list, &qpd->queues_list);
>  	qpd->queue_count++;
> @@ -1228,14 +1231,12 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
>  	dqm_unlock(dqm);
>  	return retval;
>  
> -out_deallocate_doorbell:
> -	deallocate_doorbell(qpd, q);
>  out_deallocate_sdma_queue:
>  	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
>  		deallocate_sdma_queue(dqm, q->sdma_id);
>  out_unlock:
>  	dqm_unlock(dqm);
> -
> +out:
>  	return retval;
>  }
>  
> @@ -1398,8 +1399,6 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
>  			qpd->reset_wavefronts = true;
>  	}
>  
> -	mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
> -
>  	/*
>  	 * Unconditionally decrement this counter, regardless of the queue's
>  	 * type
> @@ -1410,6 +1409,9 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
>  
>  	dqm_unlock(dqm);
>  
> +	/* Do uninit_mqd after dqm_unlock(dqm) to avoid circular locking */
> +	mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
> +
>  	return retval;
>  
>  failed:
> @@ -1631,7 +1633,11 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
>  		qpd->reset_wavefronts = false;
>  	}
>  
> -	/* lastly, free mqd resources */
> +	dqm_unlock(dqm);
> +
> +	/* Lastly, free mqd resources.
> +	 * Do uninit_mqd() after dqm_unlock to avoid circular locking.
> +	 */
>  	list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
>  		mqd_mgr = dqm->ops.get_mqd_manager(dqm,
>  			get_mqd_type_from_queue_type(q->properties.type));
> @@ -1645,7 +1651,6 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
>  	}
>  
>  out:
> -	dqm_unlock(dqm);
>  	return retval;
>  }
>  
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu: use HMM mirror callback to replace mmu notifier v5
       [not found] ` <20181203201840.25059-1-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
  2018-12-03 20:19   ` [PATCH 2/2] drm/amdgpu: replace get_user_pages with HMM address mirror helpers v2 Yang, Philip
@ 2018-12-04  9:06   ` Christian König
       [not found]     ` <0c6817a1-d033-3691-a73c-d88e740297b2-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  1 sibling, 1 reply; 7+ messages in thread
From: Christian König @ 2018-12-04  9:06 UTC (permalink / raw)
  To: Yang, Philip,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org

Am 03.12.18 um 21:19 schrieb Yang, Philip:
> Replace our MMU notifier with hmm_mirror_ops.sync_cpu_device_pagetables
> callback. Enable CONFIG_HMM and CONFIG_HMM_MIRROR as a dependency in
> DRM_AMDGPU_USERPTR Kconfig.
>
> It supports both KFD userptr and gfx userptr paths.
>
> The depdent HMM patchsets from Jérôme Glisse are all merged into 4.20.0
> kernel now.
>
> Change-Id: Ie62c3c5e3c5b8521ab3b438d1eff2aa2a003835e
> Signed-off-by: Philip Yang <Philip.Yang@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/Kconfig     |   6 +-
>   drivers/gpu/drm/amd/amdgpu/Makefile    |   2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 124 +++++++++++--------------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h |   2 +-
>   4 files changed, 57 insertions(+), 77 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig
> index 9221e5489069..960a63355705 100644
> --- a/drivers/gpu/drm/amd/amdgpu/Kconfig
> +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
> @@ -26,10 +26,10 @@ config DRM_AMDGPU_CIK
>   config DRM_AMDGPU_USERPTR
>   	bool "Always enable userptr write support"
>   	depends on DRM_AMDGPU
> -	select MMU_NOTIFIER
> +	select HMM_MIRROR
>   	help
> -	  This option selects CONFIG_MMU_NOTIFIER if it isn't already
> -	  selected to enabled full userptr support.
> +	  This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it
> +	  isn't already selected to enabled full userptr support.
>   
>   config DRM_AMDGPU_GART_DEBUGFS
>   	bool "Allow GART access through debugfs"
> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
> index f76bcb9c45e4..05ca6dc381e0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
> @@ -172,7 +172,7 @@ endif
>   amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
>   amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
>   amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
> -amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_mn.o
> +amdgpu-$(CONFIG_HMM) += amdgpu_mn.o

This probably need to be CONFIG_HMM_MIRROR.

>   
>   include $(FULL_AMD_PATH)/powerplay/Makefile
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> index e55508b39496..56595b3d90d2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> @@ -45,7 +45,7 @@
>   
>   #include <linux/firmware.h>
>   #include <linux/module.h>
> -#include <linux/mmu_notifier.h>
> +#include <linux/hmm.h>
>   #include <linux/interval_tree.h>
>   #include <drm/drmP.h>
>   #include <drm/drm.h>
> @@ -58,7 +58,6 @@
>    *
>    * @adev: amdgpu device pointer
>    * @mm: process address space
> - * @mn: MMU notifier structure
>    * @type: type of MMU notifier
>    * @work: destruction work item
>    * @node: hash table node to find structure by adev and mn
> @@ -66,6 +65,7 @@
>    * @objects: interval tree containing amdgpu_mn_nodes
>    * @read_lock: mutex for recursive locking of @lock
>    * @recursion: depth of recursion
> + * @mirror: HMM mirror function support
>    *
>    * Data for each amdgpu device and process address space.
>    */
> @@ -73,7 +73,6 @@ struct amdgpu_mn {
>   	/* constant after initialisation */
>   	struct amdgpu_device	*adev;
>   	struct mm_struct	*mm;
> -	struct mmu_notifier	mn;
>   	enum amdgpu_mn_type	type;
>   
>   	/* only used on destruction */
> @@ -87,6 +86,9 @@ struct amdgpu_mn {
>   	struct rb_root_cached	objects;
>   	struct mutex		read_lock;
>   	atomic_t		recursion;
> +
> +	/* HMM mirror */
> +	struct hmm_mirror	mirror;
>   };
>   
>   /**
> @@ -103,7 +105,7 @@ struct amdgpu_mn_node {
>   };
>   
>   /**
> - * amdgpu_mn_destroy - destroy the MMU notifier
> + * amdgpu_mn_destroy - destroy the HMM mirror
>    *
>    * @work: previously sheduled work item
>    *
> @@ -129,28 +131,26 @@ static void amdgpu_mn_destroy(struct work_struct *work)
>   	}
>   	up_write(&amn->lock);
>   	mutex_unlock(&adev->mn_lock);
> -	mmu_notifier_unregister_no_release(&amn->mn, amn->mm);
> +
> +	hmm_mirror_unregister(&amn->mirror);
>   	kfree(amn);
>   }
>   
>   /**
> - * amdgpu_mn_release - callback to notify about mm destruction
> + * amdgpu_hmm_mirror_release - callback to notify about mm destruction
>    *
> - * @mn: our notifier
> - * @mm: the mm this callback is about
> + * @mirror: the HMM mirror (mm) this callback is about
>    *
> - * Shedule a work item to lazy destroy our notifier.
> + * Shedule a work item to lazy destroy HMM mirror.
>    */
> -static void amdgpu_mn_release(struct mmu_notifier *mn,
> -			      struct mm_struct *mm)
> +static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror)
>   {
> -	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
> +	struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
>   
>   	INIT_WORK(&amn->work, amdgpu_mn_destroy);
>   	schedule_work(&amn->work);
>   }
>   
> -
>   /**
>    * amdgpu_mn_lock - take the write side lock for this notifier
>    *
> @@ -237,21 +237,19 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
>   /**
>    * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change
>    *
> - * @mn: our notifier
> - * @mm: the mm this callback is about
> - * @start: start of updated range
> - * @end: end of updated range
> + * @mirror: the hmm_mirror (mm) is about to update
> + * @update: the update start, end address
>    *
>    * Block for operations on BOs to finish and mark pages as accessed and
>    * potentially dirty.
>    */
> -static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
> -						 struct mm_struct *mm,
> -						 unsigned long start,
> -						 unsigned long end,
> -						 bool blockable)
> +static int amdgpu_mn_invalidate_range_start_gfx(struct hmm_mirror *mirror,
> +			const struct hmm_update *update)
>   {
> -	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
> +	struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
> +	unsigned long start = update->start;
> +	unsigned long end = update->end;
> +	bool blockable = update->blockable;
>   	struct interval_tree_node *it;
>   
>   	/* notification is exclusive, but interval is inclusive */
> @@ -278,28 +276,28 @@ static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
>   		amdgpu_mn_invalidate_node(node, start, end);
>   	}
>   
> +	amdgpu_mn_read_unlock(amn);
> +
>   	return 0;
>   }
>   
>   /**
>    * amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change
>    *
> - * @mn: our notifier
> - * @mm: the mm this callback is about
> - * @start: start of updated range
> - * @end: end of updated range
> + * @mirror: the hmm_mirror (mm) is about to update
> + * @update: the update start, end address
>    *
>    * We temporarily evict all BOs between start and end. This
>    * necessitates evicting all user-mode queues of the process. The BOs
>    * are restorted in amdgpu_mn_invalidate_range_end_hsa.
>    */
> -static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
> -						 struct mm_struct *mm,
> -						 unsigned long start,
> -						 unsigned long end,
> -						 bool blockable)
> +static int amdgpu_mn_invalidate_range_start_hsa(struct hmm_mirror *mirror,
> +			const struct hmm_update *update)
>   {
> -	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
> +	struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
> +	unsigned long start = update->start;
> +	unsigned long end = update->end;
> +	bool blockable = update->blockable;
>   	struct interval_tree_node *it;
>   
>   	/* notification is exclusive, but interval is inclusive */
> @@ -326,59 +324,41 @@ static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
>   
>   			if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
>   							 start, end))
> -				amdgpu_amdkfd_evict_userptr(mem, mm);
> +				amdgpu_amdkfd_evict_userptr(mem, amn->mm);
>   		}
>   	}
>   
> +	amdgpu_mn_read_unlock(amn);
> +
>   	return 0;
>   }
>   
> -/**
> - * amdgpu_mn_invalidate_range_end - callback to notify about mm change
> - *
> - * @mn: our notifier
> - * @mm: the mm this callback is about
> - * @start: start of updated range
> - * @end: end of updated range
> - *
> - * Release the lock again to allow new command submissions.
> +/* Low bits of any reasonable mm pointer will be unused due to struct
> + * alignment. Use these bits to make a unique key from the mm pointer
> + * and notifier type.
>    */
> -static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
> -					   struct mm_struct *mm,
> -					   unsigned long start,
> -					   unsigned long end)
> -{
> -	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
> -
> -	amdgpu_mn_read_unlock(amn);
> -}
> +#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
>   
> -static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
> +static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
>   	[AMDGPU_MN_TYPE_GFX] = {
> -		.release = amdgpu_mn_release,
> -		.invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx,
> -		.invalidate_range_end = amdgpu_mn_invalidate_range_end,
> +		.sync_cpu_device_pagetables =
> +				amdgpu_mn_invalidate_range_start_gfx,

Please rename the function amdgpu_mn_invalidate_range_start_hsa to 
something like amdgpu_mn_sync_pagetables_gfx.

> +		.release = amdgpu_hmm_mirror_release
>   	},
>   	[AMDGPU_MN_TYPE_HSA] = {
> -		.release = amdgpu_mn_release,
> -		.invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa,
> -		.invalidate_range_end = amdgpu_mn_invalidate_range_end,
> +		.sync_cpu_device_pagetables =
> +				amdgpu_mn_invalidate_range_start_hsa,

Dito for HSA.

> +		.release = amdgpu_hmm_mirror_release
>   	},
>   };
>   
> -/* Low bits of any reasonable mm pointer will be unused due to struct
> - * alignment. Use these bits to make a unique key from the mm pointer
> - * and notifier type.
> - */
> -#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
> -
>   /**
> - * amdgpu_mn_get - create notifier context
> + * amdgpu_mn_get - create HMM mirror context
>    *
>    * @adev: amdgpu device pointer
>    * @type: type of MMU notifier context
>    *
> - * Creates a notifier context for current->mm.
> + * Creates a HMM mirror context for current->mm.
>    */
>   struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
>   				enum amdgpu_mn_type type)
> @@ -408,12 +388,12 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
>   	amn->mm = mm;
>   	init_rwsem(&amn->lock);
>   	amn->type = type;
> -	amn->mn.ops = &amdgpu_mn_ops[type];
>   	amn->objects = RB_ROOT_CACHED;
>   	mutex_init(&amn->read_lock);
>   	atomic_set(&amn->recursion, 0);
>   
> -	r = __mmu_notifier_register(&amn->mn, mm);
> +	amn->mirror.ops = &amdgpu_hmm_mirror_ops[type];
> +	r = hmm_mirror_register(&amn->mirror, mm);
>   	if (r)
>   		goto free_amn;
>   
> @@ -439,7 +419,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
>    * @bo: amdgpu buffer object
>    * @addr: userptr addr we should monitor
>    *
> - * Registers an MMU notifier for the given BO at the specified address.
> + * Registers an HMM mirror for the given BO at the specified address.
>    * Returns 0 on success, -ERRNO if anything goes wrong.
>    */
>   int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
> @@ -495,11 +475,11 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
>   }
>   
>   /**
> - * amdgpu_mn_unregister - unregister a BO for notifier updates
> + * amdgpu_mn_unregister - unregister a BO for HMM mirror updates
>    *
>    * @bo: amdgpu buffer object
>    *
> - * Remove any registration of MMU notifier updates from the buffer object.
> + * Remove any registration of HMM mirror updates from the buffer object.
>    */
>   void amdgpu_mn_unregister(struct amdgpu_bo *bo)
>   {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
> index eb0f432f78fe..0e2752646e6f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
> @@ -34,7 +34,7 @@ enum amdgpu_mn_type {
>   	AMDGPU_MN_TYPE_HSA,
>   };
>   
> -#if defined(CONFIG_MMU_NOTIFIER)
> +#if defined(CONFIG_HMM)

I think that needs to be CONFIG_HMM_MIRROR as well.

Christian.

>   void amdgpu_mn_lock(struct amdgpu_mn *mn);
>   void amdgpu_mn_unlock(struct amdgpu_mn *mn);
>   struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu: use HMM mirror callback to replace mmu notifier v5
       [not found]     ` <0c6817a1-d033-3691-a73c-d88e740297b2-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2018-12-04 19:36       ` Yang, Philip
  0 siblings, 0 replies; 7+ messages in thread
From: Yang, Philip @ 2018-12-04 19:36 UTC (permalink / raw)
  To: Koenig, Christian,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org

On 2018-12-04 4:06 a.m., Christian König wrote:
> Am 03.12.18 um 21:19 schrieb Yang, Philip:
>> Replace our MMU notifier with hmm_mirror_ops.sync_cpu_device_pagetables
>> callback. Enable CONFIG_HMM and CONFIG_HMM_MIRROR as a dependency in
>> DRM_AMDGPU_USERPTR Kconfig.
>>
>> It supports both KFD userptr and gfx userptr paths.
>>
>> The depdent HMM patchsets from Jérôme Glisse are all merged into 4.20.0
>> kernel now.
>>
>> Change-Id: Ie62c3c5e3c5b8521ab3b438d1eff2aa2a003835e
>> Signed-off-by: Philip Yang <Philip.Yang@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/Kconfig     |   6 +-
>>   drivers/gpu/drm/amd/amdgpu/Makefile    |   2 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 124 +++++++++++--------------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h |   2 +-
>>   4 files changed, 57 insertions(+), 77 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig 
>> b/drivers/gpu/drm/amd/amdgpu/Kconfig
>> index 9221e5489069..960a63355705 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/Kconfig
>> +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
>> @@ -26,10 +26,10 @@ config DRM_AMDGPU_CIK
>>   config DRM_AMDGPU_USERPTR
>>       bool "Always enable userptr write support"
>>       depends on DRM_AMDGPU
>> -    select MMU_NOTIFIER
>> +    select HMM_MIRROR
>>       help
>> -      This option selects CONFIG_MMU_NOTIFIER if it isn't already
>> -      selected to enabled full userptr support.
>> +      This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it
>> +      isn't already selected to enabled full userptr support.
>>     config DRM_AMDGPU_GART_DEBUGFS
>>       bool "Allow GART access through debugfs"
>> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
>> b/drivers/gpu/drm/amd/amdgpu/Makefile
>> index f76bcb9c45e4..05ca6dc381e0 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
>> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
>> @@ -172,7 +172,7 @@ endif
>>   amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
>>   amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
>>   amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
>> -amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_mn.o
>> +amdgpu-$(CONFIG_HMM) += amdgpu_mn.o
>
> This probably need to be CONFIG_HMM_MIRROR.
Yes, CONFIG_HMM_MIRROR selects CONFIG_HMM, but the reverse is not true.
>
>>     include $(FULL_AMD_PATH)/powerplay/Makefile
>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>> index e55508b39496..56595b3d90d2 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>> @@ -45,7 +45,7 @@
>>     #include <linux/firmware.h>
>>   #include <linux/module.h>
>> -#include <linux/mmu_notifier.h>
>> +#include <linux/hmm.h>
>>   #include <linux/interval_tree.h>
>>   #include <drm/drmP.h>
>>   #include <drm/drm.h>
>> @@ -58,7 +58,6 @@
>>    *
>>    * @adev: amdgpu device pointer
>>    * @mm: process address space
>> - * @mn: MMU notifier structure
>>    * @type: type of MMU notifier
>>    * @work: destruction work item
>>    * @node: hash table node to find structure by adev and mn
>> @@ -66,6 +65,7 @@
>>    * @objects: interval tree containing amdgpu_mn_nodes
>>    * @read_lock: mutex for recursive locking of @lock
>>    * @recursion: depth of recursion
>> + * @mirror: HMM mirror function support
>>    *
>>    * Data for each amdgpu device and process address space.
>>    */
>> @@ -73,7 +73,6 @@ struct amdgpu_mn {
>>       /* constant after initialisation */
>>       struct amdgpu_device    *adev;
>>       struct mm_struct    *mm;
>> -    struct mmu_notifier    mn;
>>       enum amdgpu_mn_type    type;
>>         /* only used on destruction */
>> @@ -87,6 +86,9 @@ struct amdgpu_mn {
>>       struct rb_root_cached    objects;
>>       struct mutex        read_lock;
>>       atomic_t        recursion;
>> +
>> +    /* HMM mirror */
>> +    struct hmm_mirror    mirror;
>>   };
>>     /**
>> @@ -103,7 +105,7 @@ struct amdgpu_mn_node {
>>   };
>>     /**
>> - * amdgpu_mn_destroy - destroy the MMU notifier
>> + * amdgpu_mn_destroy - destroy the HMM mirror
>>    *
>>    * @work: previously sheduled work item
>>    *
>> @@ -129,28 +131,26 @@ static void amdgpu_mn_destroy(struct 
>> work_struct *work)
>>       }
>>       up_write(&amn->lock);
>>       mutex_unlock(&adev->mn_lock);
>> -    mmu_notifier_unregister_no_release(&amn->mn, amn->mm);
>> +
>> +    hmm_mirror_unregister(&amn->mirror);
>>       kfree(amn);
>>   }
>>     /**
>> - * amdgpu_mn_release - callback to notify about mm destruction
>> + * amdgpu_hmm_mirror_release - callback to notify about mm destruction
>>    *
>> - * @mn: our notifier
>> - * @mm: the mm this callback is about
>> + * @mirror: the HMM mirror (mm) this callback is about
>>    *
>> - * Shedule a work item to lazy destroy our notifier.
>> + * Shedule a work item to lazy destroy HMM mirror.
>>    */
>> -static void amdgpu_mn_release(struct mmu_notifier *mn,
>> -                  struct mm_struct *mm)
>> +static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror)
>>   {
>> -    struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
>> +    struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, 
>> mirror);
>>         INIT_WORK(&amn->work, amdgpu_mn_destroy);
>>       schedule_work(&amn->work);
>>   }
>>   -
>>   /**
>>    * amdgpu_mn_lock - take the write side lock for this notifier
>>    *
>> @@ -237,21 +237,19 @@ static void amdgpu_mn_invalidate_node(struct 
>> amdgpu_mn_node *node,
>>   /**
>>    * amdgpu_mn_invalidate_range_start_gfx - callback to notify about 
>> mm change
>>    *
>> - * @mn: our notifier
>> - * @mm: the mm this callback is about
>> - * @start: start of updated range
>> - * @end: end of updated range
>> + * @mirror: the hmm_mirror (mm) is about to update
>> + * @update: the update start, end address
>>    *
>>    * Block for operations on BOs to finish and mark pages as accessed 
>> and
>>    * potentially dirty.
>>    */
>> -static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier 
>> *mn,
>> -                         struct mm_struct *mm,
>> -                         unsigned long start,
>> -                         unsigned long end,
>> -                         bool blockable)
>> +static int amdgpu_mn_invalidate_range_start_gfx(struct hmm_mirror 
>> *mirror,
>> +            const struct hmm_update *update)
>>   {
>> -    struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
>> +    struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, 
>> mirror);
>> +    unsigned long start = update->start;
>> +    unsigned long end = update->end;
>> +    bool blockable = update->blockable;
>>       struct interval_tree_node *it;
>>         /* notification is exclusive, but interval is inclusive */
>> @@ -278,28 +276,28 @@ static int 
>> amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
>>           amdgpu_mn_invalidate_node(node, start, end);
>>       }
>>   +    amdgpu_mn_read_unlock(amn);
>> +
>>       return 0;
>>   }
>>     /**
>>    * amdgpu_mn_invalidate_range_start_hsa - callback to notify about 
>> mm change
>>    *
>> - * @mn: our notifier
>> - * @mm: the mm this callback is about
>> - * @start: start of updated range
>> - * @end: end of updated range
>> + * @mirror: the hmm_mirror (mm) is about to update
>> + * @update: the update start, end address
>>    *
>>    * We temporarily evict all BOs between start and end. This
>>    * necessitates evicting all user-mode queues of the process. The BOs
>>    * are restorted in amdgpu_mn_invalidate_range_end_hsa.
>>    */
>> -static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier 
>> *mn,
>> -                         struct mm_struct *mm,
>> -                         unsigned long start,
>> -                         unsigned long end,
>> -                         bool blockable)
>> +static int amdgpu_mn_invalidate_range_start_hsa(struct hmm_mirror 
>> *mirror,
>> +            const struct hmm_update *update)
>>   {
>> -    struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
>> +    struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, 
>> mirror);
>> +    unsigned long start = update->start;
>> +    unsigned long end = update->end;
>> +    bool blockable = update->blockable;
>>       struct interval_tree_node *it;
>>         /* notification is exclusive, but interval is inclusive */
>> @@ -326,59 +324,41 @@ static int 
>> amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
>>                 if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
>>                                start, end))
>> -                amdgpu_amdkfd_evict_userptr(mem, mm);
>> +                amdgpu_amdkfd_evict_userptr(mem, amn->mm);
>>           }
>>       }
>>   +    amdgpu_mn_read_unlock(amn);
>> +
>>       return 0;
>>   }
>>   -/**
>> - * amdgpu_mn_invalidate_range_end - callback to notify about mm change
>> - *
>> - * @mn: our notifier
>> - * @mm: the mm this callback is about
>> - * @start: start of updated range
>> - * @end: end of updated range
>> - *
>> - * Release the lock again to allow new command submissions.
>> +/* Low bits of any reasonable mm pointer will be unused due to struct
>> + * alignment. Use these bits to make a unique key from the mm pointer
>> + * and notifier type.
>>    */
>> -static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
>> -                       struct mm_struct *mm,
>> -                       unsigned long start,
>> -                       unsigned long end)
>> -{
>> -    struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
>> -
>> -    amdgpu_mn_read_unlock(amn);
>> -}
>> +#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
>>   -static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
>> +static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
>>       [AMDGPU_MN_TYPE_GFX] = {
>> -        .release = amdgpu_mn_release,
>> -        .invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx,
>> -        .invalidate_range_end = amdgpu_mn_invalidate_range_end,
>> +        .sync_cpu_device_pagetables =
>> +                amdgpu_mn_invalidate_range_start_gfx,
>
> Please rename the function amdgpu_mn_invalidate_range_start_hsa to 
> something like amdgpu_mn_sync_pagetables_gfx.
>
>> +        .release = amdgpu_hmm_mirror_release
>>       },
>>       [AMDGPU_MN_TYPE_HSA] = {
>> -        .release = amdgpu_mn_release,
>> -        .invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa,
>> -        .invalidate_range_end = amdgpu_mn_invalidate_range_end,
>> +        .sync_cpu_device_pagetables =
>> +                amdgpu_mn_invalidate_range_start_hsa,
>
> Dito for HSA.
Done
>
>> +        .release = amdgpu_hmm_mirror_release
>>       },
>>   };
>>   -/* Low bits of any reasonable mm pointer will be unused due to struct
>> - * alignment. Use these bits to make a unique key from the mm pointer
>> - * and notifier type.
>> - */
>> -#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
>> -
>>   /**
>> - * amdgpu_mn_get - create notifier context
>> + * amdgpu_mn_get - create HMM mirror context
>>    *
>>    * @adev: amdgpu device pointer
>>    * @type: type of MMU notifier context
>>    *
>> - * Creates a notifier context for current->mm.
>> + * Creates a HMM mirror context for current->mm.
>>    */
>>   struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
>>                   enum amdgpu_mn_type type)
>> @@ -408,12 +388,12 @@ struct amdgpu_mn *amdgpu_mn_get(struct 
>> amdgpu_device *adev,
>>       amn->mm = mm;
>>       init_rwsem(&amn->lock);
>>       amn->type = type;
>> -    amn->mn.ops = &amdgpu_mn_ops[type];
>>       amn->objects = RB_ROOT_CACHED;
>>       mutex_init(&amn->read_lock);
>>       atomic_set(&amn->recursion, 0);
>>   -    r = __mmu_notifier_register(&amn->mn, mm);
>> +    amn->mirror.ops = &amdgpu_hmm_mirror_ops[type];
>> +    r = hmm_mirror_register(&amn->mirror, mm);
>>       if (r)
>>           goto free_amn;
>>   @@ -439,7 +419,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct 
>> amdgpu_device *adev,
>>    * @bo: amdgpu buffer object
>>    * @addr: userptr addr we should monitor
>>    *
>> - * Registers an MMU notifier for the given BO at the specified address.
>> + * Registers an HMM mirror for the given BO at the specified address.
>>    * Returns 0 on success, -ERRNO if anything goes wrong.
>>    */
>>   int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
>> @@ -495,11 +475,11 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, 
>> unsigned long addr)
>>   }
>>     /**
>> - * amdgpu_mn_unregister - unregister a BO for notifier updates
>> + * amdgpu_mn_unregister - unregister a BO for HMM mirror updates
>>    *
>>    * @bo: amdgpu buffer object
>>    *
>> - * Remove any registration of MMU notifier updates from the buffer 
>> object.
>> + * Remove any registration of HMM mirror updates from the buffer 
>> object.
>>    */
>>   void amdgpu_mn_unregister(struct amdgpu_bo *bo)
>>   {
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>> index eb0f432f78fe..0e2752646e6f 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>> @@ -34,7 +34,7 @@ enum amdgpu_mn_type {
>>       AMDGPU_MN_TYPE_HSA,
>>   };
>>   -#if defined(CONFIG_MMU_NOTIFIER)
>> +#if defined(CONFIG_HMM)
>
> I think that needs to be CONFIG_HMM_MIRROR as well.
>
Right.
> Christian.
>
>>   void amdgpu_mn_lock(struct amdgpu_mn *mn);
>>   void amdgpu_mn_unlock(struct amdgpu_mn *mn);
>>   struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: replace get_user_pages with HMM address mirror helpers v2
       [not found]         ` <f95aee13-7579-6f1b-c6a4-887802140702-5C7GfCeVMHo@public.gmane.org>
@ 2018-12-06 21:01           ` Yang, Philip
  0 siblings, 0 replies; 7+ messages in thread
From: Yang, Philip @ 2018-12-06 21:01 UTC (permalink / raw)
  To: Kuehling, Felix,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org

On 2018-12-03 8:52 p.m., Kuehling, Felix wrote:
> See comments inline. I didn't review the amdgpu_cs and amdgpu_gem parts
> as I don't know them very well.
>
> On 2018-12-03 3:19 p.m., Yang, Philip wrote:
>> Use HMM helper function hmm_vma_fault() to get physical pages backing
>> userptr and start CPU page table update track of those pages. Then use
>> hmm_vma_range_done() to check if those pages are updated before
>> amdgpu_cs_submit for gfx or before user queues are resumed for kfd.
>>
>> If userptr pages are updated, for gfx, amdgpu_cs_ioctl will restart
>> from scratch, for kfd, restore worker is rescheduled to retry.
>>
>> To avoid circular lock dependency, no nested locking between mmap_sem
>> and bo::reserve. The locking order is:
>> bo::reserve -> amdgpu_mn_lock(p->mn)
>>
>> HMM simplify the CPU page table concurrent update check, so remove
>> guptasklock, mmu_invalidations, last_set_pages fields from
>> amdgpu_ttm_tt struct.
>>
>> HMM does not pin the page (increase page ref count), so remove related
>> operations like release_pages(), put_page(), mark_page_dirty().
>>
>> v2:
>> * Remove nested locking between mmap_sem and bo::reserve
>> * Change locking order to bo::reserve -> amdgpu_mn_lock()
>> * Use dynamic allocation to replace VLA in kernel stack
>>
>> Change-Id: Iffd5f855cc9ce402cdfca167f68f83fe39ac56f9
>> Signed-off-by: Philip Yang <Philip.Yang@amd.com>
>> ---
>>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 101 ++++++++--
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c   |   2 -
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h   |   3 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c        | 188 +++++++++---------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |  14 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c        |  34 +++-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h        |   7 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       | 164 ++++++---------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |   3 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        |   1 -
>>   .../drm/amd/amdkfd/kfd_device_queue_manager.c |  67 ++++---
>>   11 files changed, 312 insertions(+), 272 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> index f3129b912714..5ce6ba24fc72 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> @@ -614,8 +614,7 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
>>   	amdgpu_bo_unreserve(bo);
>>   
>>   release_out:
>> -	if (ret)
>> -		release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
>> +	amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
>>   free_out:
>>   	kvfree(mem->user_pages);
>>   	mem->user_pages = NULL;
>> @@ -677,7 +676,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
>>   	ctx->kfd_bo.priority = 0;
>>   	ctx->kfd_bo.tv.bo = &bo->tbo;
>>   	ctx->kfd_bo.tv.shared = true;
>> -	ctx->kfd_bo.user_pages = NULL;
>>   	list_add(&ctx->kfd_bo.tv.head, &ctx->list);
>>   
>>   	amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);
>> @@ -741,7 +739,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
>>   	ctx->kfd_bo.priority = 0;
>>   	ctx->kfd_bo.tv.bo = &bo->tbo;
>>   	ctx->kfd_bo.tv.shared = true;
>> -	ctx->kfd_bo.user_pages = NULL;
>>   	list_add(&ctx->kfd_bo.tv.head, &ctx->list);
>>   
>>   	i = 0;
>> @@ -1332,9 +1329,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
>>   	/* Free user pages if necessary */
>>   	if (mem->user_pages) {
>>   		pr_debug("%s: Freeing user_pages array\n", __func__);
>> -		if (mem->user_pages[0])
>> -			release_pages(mem->user_pages,
>> -					mem->bo->tbo.ttm->num_pages);
>>   		kvfree(mem->user_pages);
>>   	}
>>   
>> @@ -1761,8 +1755,6 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
>>   				       __func__);
>>   				return -ENOMEM;
>>   			}
>> -		} else if (mem->user_pages[0]) {
>> -			release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
>>   		}
>>   
>>   		/* Get updated user pages */
>> @@ -1778,12 +1770,6 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
>>   			 * stalled user mode queues.
>>   			 */
>>   		}
>> -
>> -		/* Mark the BO as valid unless it was invalidated
>> -		 * again concurrently
>> -		 */
>> -		if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid)
>> -			return -EAGAIN;
>>   	}
>>   
>>   	return 0;
>> @@ -1876,14 +1862,10 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
>>   		}
>>   
>>   		/* Validate succeeded, now the BO owns the pages, free
>> -		 * our copy of the pointer array. Put this BO back on
>> -		 * the userptr_valid_list. If we need to revalidate
>> -		 * it, we need to start from scratch.
>> +		 * our copy of the pointer array.
>>   		 */
>>   		kvfree(mem->user_pages);
>>   		mem->user_pages = NULL;
>> -		list_move_tail(&mem->validate_list.head,
>> -			       &process_info->userptr_valid_list);
>>   
>>   		/* Update mapping. If the BO was not validated
>>   		 * (because we couldn't get user pages), this will
>> @@ -1924,6 +1906,70 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
>>   	return ret;
>>   }
>>   
>> +/* user_pages_invalidated - if CPU page table is updated after getting user
>> + * pages
>> + *
>> + * HMM mirror callback lock amn is hold to prevent the concurrent CPU
>> + * page table update while resuming user queues.
>> + *
>> + * Returns: true if CPU page table is updated, false otherwise
>> + */
>> +static int user_pages_invalidated(struct mm_struct *mm,
>> +			struct amdkfd_process_info *process_info,
>> +			struct amdgpu_mn **amn)
>> +{
>> +	struct kgd_mem *mem, *tmp_mem;
>> +	struct amdgpu_bo *bo;
>> +	struct amdgpu_device *adev;
>> +	int invalid, r = 0;
>> +
>> +	list_for_each_entry_safe(mem, tmp_mem,
>> +				 &process_info->userptr_inval_list,
>> +				 validate_list.head) {
>> +
>> +		invalid = atomic_read(&mem->invalid);
>> +		if (!invalid)
>> +			/* BO hasn't been invalidated since the last
>> +			 * revalidation attempt. Keep its BO list.
>> +			 */
>> +			continue;
>> +
>> +		bo = mem->bo;
>> +
>> +		/* Get HMM mirror callback lock */
>> +		if (!*amn) {
>> +			adev = amdgpu_ttm_adev(bo->tbo.bdev);
>> +			*amn = amdgpu_mn_get(mm, adev, AMDGPU_MN_TYPE_HSA);
>> +			if (IS_ERR(*amn)) {
>> +				r = true;
>> +				*amn = NULL;
>> +				goto out;
>> +			}
>> +
>> +			amdgpu_mn_lock(*amn);
> There can be multiple GPUs involved here. So you'd need more than one
> amn lock. I think you're trying to use the amn lock to serialize page
> table updates. But that's probably not the right lock. What we should be
> using for this is the page directory reservation locks. We're already
> taking them for all affected GPUs in validate_invalid_user_pages. And
> that's where you should also be calling amdgpu_ttm_tt_get_user_pages_done.
>
Yes, this is incorrect for mGPU. Remove this function completely and add
calling amdgpu_tmm_tt_get_user_pages_done inside validate_invalid_user_pages
while holding the reservation locks.
>> +		}
>> +
>> +		r |= amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
> This needs to be called while holding the "driver lock that serializes
> device page table updates". I think in our case that's the page
> directory reservation. You're not holding that here.
>
>
>> +
>> +		/* Put this BO back on the userptr_valid_list. If we need to
>> +		 * revalidate it, we need to start from scratch.
>> +		 */
>> +		list_move_tail(&mem->validate_list.head,
>> +			       &process_info->userptr_valid_list);
> Doing this unconditionally here looks wrong. Also, this was already done
> in validate_invalid_user_pages. So I'm wondering what's the point of
> this function. In most cases the process_info->userptr_inval_list will
> be empty here to begin with.
>
> I think you should probably call amdgpu_ttm_tt_get_user_pages_done in
> validate_invalid_user_pages while holding the PD reservation lock and
> where you call amdgpu_ttm_tt_set_user_pages.
>
>
>> +
>> +		/* Mark the BO as valid unless it was invalidated
>> +		 * again concurrently
>> +		 */
>> +		if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid) {
>> +			r = true;
>> +			goto out;
>> +		}
>> +	}
>> +
>> +out:
>> +	return r;
>> +}
>> +
>>   /* Worker callback to restore evicted userptr BOs
>>    *
>>    * Tries to update and validate all userptr BOs. If successful and no
>> @@ -1939,6 +1985,7 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
>>   	struct task_struct *usertask;
>>   	struct mm_struct *mm;
>>   	int evicted_bos;
>> +	struct amdgpu_mn *amn = NULL;
>>   
>>   	evicted_bos = atomic_read(&process_info->evicted_bos);
>>   	if (!evicted_bos)
>> @@ -1977,13 +2024,27 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
>>   	if (atomic_cmpxchg(&process_info->evicted_bos, evicted_bos, 0) !=
>>   	    evicted_bos)
>>   		goto unlock_out;
>> +
>> +	/* If CPU page table is updated again after getting user pages,
>> +	 * schedule to restart the restore process again.
>> +	 *
>> +	 * amn is also locked to prevent CPU page table update while resuming
>> +	 * user queues. amn is unlocked after user queues are resumed.
>> +	 */
>> +	if (user_pages_invalidated(mm, process_info, &amn))
>> +		goto unlock_mn_out;
>> +
>>   	evicted_bos = 0;
>> +
>>   	if (kgd2kfd->resume_mm(mm)) {
>>   		pr_err("%s: Failed to resume KFD\n", __func__);
>>   		/* No recovery from this failure. Probably the CP is
>>   		 * hanging. No point trying again.
>>   		 */
>>   	}
>> +
>> +unlock_mn_out:
>> +	amdgpu_mn_unlock(amn);
>>   unlock_out:
>>   	mutex_unlock(&process_info->lock);
>>   	mmput(mm);
> [snip]
>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>> index 56595b3d90d2..6b6becdb725b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>> @@ -229,8 +229,6 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
>>   			true, false, MAX_SCHEDULE_TIMEOUT);
>>   		if (r <= 0)
>>   			DRM_ERROR("(%ld) failed to wait for user bo\n", r);
>> -
>> -		amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm);
>>   	}
>>   }
>>   
>> @@ -355,15 +353,16 @@ static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
>>   /**
>>    * amdgpu_mn_get - create HMM mirror context
>>    *
>> + * @mm: the mm struct
>>    * @adev: amdgpu device pointer
>>    * @type: type of MMU notifier context
>>    *
>> - * Creates a HMM mirror context for current->mm.
>> + * Creates a HMM mirror context for mm.
>>    */
>> -struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
>> +struct amdgpu_mn *amdgpu_mn_get(struct mm_struct *mm,
>> +				struct amdgpu_device *adev,
>>   				enum amdgpu_mn_type type)
>>   {
>> -	struct mm_struct *mm = current->mm;
>>   	struct amdgpu_mn *amn;
>>   	unsigned long key = AMDGPU_MN_KEY(mm, type);
>>   	int r;
>> @@ -433,7 +432,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
>>   	struct list_head bos;
>>   	struct interval_tree_node *it;
>>   
>> -	amn = amdgpu_mn_get(adev, type);
>> +	amn = amdgpu_mn_get(current->mm, adev, type);
>>   	if (IS_ERR(amn))
>>   		return PTR_ERR(amn);
>>   
>> @@ -515,3 +514,26 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)
>>   	mutex_unlock(&adev->mn_lock);
>>   }
>>   
>> +/* flags used by HMM internal, not related to CPU/GPU PTE flags */
>> +static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = {
>> +		(1 << 0), /* HMM_PFN_VALID */
>> +		(1 << 1), /* HMM_PFN_WRITE */
>> +		0 /* HMM_PFN_DEVICE_PRIVATE */
>> +};
>> +
>> +static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = {
>> +		0xfffffffffffffffeUL, /* HMM_PFN_ERROR */
>> +		0, /* HMM_PFN_NONE */
>> +		0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */
>> +};
>> +
>> +void amdgpu_hmm_init_range(struct hmm_range *range)
>> +{
>> +	if (range) {
>> +		range->flags = hmm_range_flags;
>> +		range->values = hmm_range_values;
>> +		range->pfn_shift = PAGE_SHIFT;
>> +		range->pfns = NULL;
>> +		INIT_LIST_HEAD(&range->list);
>> +	}
>> +}
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>> index 0e2752646e6f..59ea30e149bd 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>> @@ -25,9 +25,10 @@
>>   #define __AMDGPU_MN_H__
>>   
>>   /*
>> - * MMU Notifier
>> + * HMM mirror
>>    */
>>   struct amdgpu_mn;
>> +struct hmm_range;
>>   
>>   enum amdgpu_mn_type {
>>   	AMDGPU_MN_TYPE_GFX,
>> @@ -37,10 +38,12 @@ enum amdgpu_mn_type {
>>   #if defined(CONFIG_HMM)
>>   void amdgpu_mn_lock(struct amdgpu_mn *mn);
>>   void amdgpu_mn_unlock(struct amdgpu_mn *mn);
>> -struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
>> +struct amdgpu_mn *amdgpu_mn_get(struct mm_struct *mm,
>> +				struct amdgpu_device *adev,
>>   				enum amdgpu_mn_type type);
>>   int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
>>   void amdgpu_mn_unregister(struct amdgpu_bo *bo);
>> +void amdgpu_hmm_init_range(struct hmm_range *range);
>>   #else
>>   static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {}
>>   static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {}
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> index c91ec3101d00..4a1cb4d15dfb 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> @@ -43,6 +43,7 @@
>>   #include <linux/pagemap.h>
>>   #include <linux/debugfs.h>
>>   #include <linux/iommu.h>
>> +#include <linux/hmm.h>
>>   #include "amdgpu.h"
>>   #include "amdgpu_object.h"
>>   #include "amdgpu_trace.h"
>> @@ -705,11 +706,6 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
>>   /*
>>    * TTM backend functions.
>>    */
>> -struct amdgpu_ttm_gup_task_list {
>> -	struct list_head	list;
>> -	struct task_struct	*task;
>> -};
>> -
>>   struct amdgpu_ttm_tt {
>>   	struct ttm_dma_tt	ttm;
>>   	u64			offset;
>> @@ -718,85 +714,91 @@ struct amdgpu_ttm_tt {
>>   	uint32_t		userflags;
>>   	spinlock_t              guptasklock;
>>   	struct list_head        guptasks;
>> -	atomic_t		mmu_invalidations;
>> -	uint32_t		last_set_pages;
>> +	struct hmm_range	range;
>>   };
>>   
>>   /**
>> - * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to by a USERPTR
>> - * pointer to memory
>> + * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user
>> + * memory and start HMM tracking CPU page table update
>>    *
>> - * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos().
>> - * This provides a wrapper around the get_user_pages() call to provide
>> - * device accessible pages that back user memory.
>> + * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only
>> + * once afterwards to stop HMM tracking
>>    */
>>   int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
>>   {
>>   	struct amdgpu_ttm_tt *gtt = (void *)ttm;
>>   	struct mm_struct *mm = gtt->usertask->mm;
>> -	unsigned int flags = 0;
>> -	unsigned pinned = 0;
>> -	int r;
>> +	unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
>> +	struct hmm_range *range = &gtt->range;
>> +	int r, i;
>>   
>>   	if (!mm) /* Happens during process shutdown */
>>   		return -ESRCH;
>>   
>> -	if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
>> -		flags |= FOLL_WRITE;
>> +	amdgpu_hmm_init_range(range);
>>   
>>   	down_read(&mm->mmap_sem);
>>   
>> -	if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
>> -		/*
>> -		 * check that we only use anonymous memory to prevent problems
>> -		 * with writeback
>> -		 */
>> -		unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
>> -		struct vm_area_struct *vma;
>> +	range->vma = find_vma(mm, gtt->userptr);
>> +	if (!range->vma || range->vma->vm_file || range->vma->vm_end < end) {
> I think this breaks file-backed mappings and also adds a new limitation
> that the address range has to be a single VMA. This was previously only
> required it gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY.
Previously this vma is for sanity check only because get_user_pages do 
not need pass in vma.
now get_vma_fault expect a valid vma in range struct, change to keep 
previous check. Thanks.
>> +		r = -EPERM;
>> +		goto out;
>> +	}
>> +	range->pfns = kvmalloc_array(ttm->num_pages, sizeof(uint64_t),
>> +				GFP_KERNEL | __GFP_ZERO);
> Is GFP_ZERO really needed here? The loop below initializes all pfns
> explicitly.
GFP_ZERO is not needed.
>
>> +	if (range->pfns == NULL) {
>> +		r = -ENOMEM;
>> +		goto out;
>> +	}
>> +	range->start = gtt->userptr;
>> +	range->end = end;
>>   
>> -		vma = find_vma(mm, gtt->userptr);
>> -		if (!vma || vma->vm_file || vma->vm_end < end) {
>> -			up_read(&mm->mmap_sem);
>> -			return -EPERM;
>> -		}
>> +	for (i = 0; i < ttm->num_pages; i++) {
>> +		range->pfns[i] = range->flags[HMM_PFN_VALID];
>> +		range->pfns[i] |= amdgpu_ttm_tt_is_readonly(ttm) ?
>> +					0 : range->flags[HMM_PFN_WRITE];
>>   	}
>>   
>> -	/* loop enough times using contiguous pages of memory */
>> -	do {
>> -		unsigned num_pages = ttm->num_pages - pinned;
>> -		uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE;
>> -		struct page **p = pages + pinned;
>> -		struct amdgpu_ttm_gup_task_list guptask;
>> +	/* This may triggles page table update */
> Do you mean "trigger"?
>
>
>> +	r = hmm_vma_fault(range, true);
>> +	if (r)
>> +		goto out_free_pfns;
>>   
>> -		guptask.task = current;
>> -		spin_lock(&gtt->guptasklock);
>> -		list_add(&guptask.list, &gtt->guptasks);
>> -		spin_unlock(&gtt->guptasklock);
>> +	for (i = 0; i < ttm->num_pages; i++)
>> +		pages[i] = hmm_pfn_to_page(range, range->pfns[i]);
>>   
>> -		if (mm == current->mm)
>> -			r = get_user_pages(userptr, num_pages, flags, p, NULL);
>> -		else
>> -			r = get_user_pages_remote(gtt->usertask,
>> -					mm, userptr, num_pages,
>> -					flags, p, NULL, NULL);
>> +	up_read(&mm->mmap_sem);
> You can probably up_read before the loop.
>
> Also, why do you defer freeing of range->pfns to
> amdgpu_ttm_tt_get_user_pages_done? The array isn't used for anything
> else later, so you could free it here.
>
> Or instead, don't free it and use it to replace the pages array. It
> seems wasteful to maintain two pages arrays and constantly allocate and
> deallocate one of them.
range->pfns will be used later by hmm_vma_range_done() in 
amdgpu_ttm_tt_get_user_pages_done
>
>> +	return 0;
>>   
>> -		spin_lock(&gtt->guptasklock);
>> -		list_del(&guptask.list);
>> -		spin_unlock(&gtt->guptasklock);
>> +out_free_pfns:
>> +	kvfree(range->pfns);
>> +	range->pfns = NULL;
> Setting range->pfns = NULL is not needed. This is a local variable
> that's about to go out of scope anyway.
range is a local pointer to gtt->range which will be freed in

amdgpu_ttm_tt_get_user_pages_done, assign to NULL to avoid double free.

>
>> +out:
>> +	up_read(&mm->mmap_sem);
>> +	return r;
>> +}
>>   
>> -		if (r < 0)
>> -			goto release_pages;
>> +/**
>> + * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change
>> + * Check if the pages backing this ttm range have been invalidated
>> + *
>> + * Returns: true if pages are invalidated since the last time they've been set
>> + */
>> +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
>> +{
>> +	struct amdgpu_ttm_tt *gtt = (void *)ttm;
>> +	int r;
>>   
>> -		pinned += r;
>> +	if (gtt == NULL || !gtt->userptr)
>> +		return false;
>>   
>> -	} while (pinned < ttm->num_pages);
>> +	r = !hmm_vma_range_done(&gtt->range);
>>   
>> -	up_read(&mm->mmap_sem);
>> -	return 0;
>> +	if (gtt->range.pfns) {
>> +		kvfree(gtt->range.pfns);
>> +		gtt->range.pfns = NULL;
>> +	}
>>   
>> -release_pages:
>> -	release_pages(pages, pinned);
>> -	up_read(&mm->mmap_sem);
>>   	return r;
>>   }
>>   
>> @@ -809,16 +811,10 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
>>    */
>>   void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
> Does this function still make sense? It just copies a pages array. Could
> it instead use gtt->range->pfns directly? And why does this still have
> to be separate from amdgpu_ttm_tt_get_user_pages? I think originally it
> was separate because we needed to keep track of the original pages for
> unpinning them. But we're no longer pinning and unpinning pages.
>
Yes, remove mem->user_pages completely, use bo->tbo.ttm->pages instead.
>>   {
>> -	struct amdgpu_ttm_tt *gtt = (void *)ttm;
>>   	unsigned i;
>>   
>> -	gtt->last_set_pages = atomic_read(&gtt->mmu_invalidations);
>> -	for (i = 0; i < ttm->num_pages; ++i) {
>> -		if (ttm->pages[i])
>> -			put_page(ttm->pages[i]);
>> -
>> +	for (i = 0; i < ttm->num_pages; ++i)
>>   		ttm->pages[i] = pages ? pages[i] : NULL;
> If this is still needed, it could just be a memcpy now.
>
>> -	}
>>   }
>>   
>>   /**
>> @@ -903,9 +899,6 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
>>   	/* unmap the pages mapped to the device */
>>   	dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
>>   
>> -	/* mark the pages as dirty */
>> -	amdgpu_ttm_tt_mark_user_pages(ttm);
>> -
>>   	sg_free_table(ttm->sg);
>>   }
>>   
>> @@ -1258,8 +1251,6 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
>>   
>>   	spin_lock_init(&gtt->guptasklock);
>>   	INIT_LIST_HEAD(&gtt->guptasks);
>> -	atomic_set(&gtt->mmu_invalidations, 0);
>> -	gtt->last_set_pages = 0;
>>   
>>   	return 0;
>>   }
>> @@ -1289,7 +1280,6 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
>>   				  unsigned long end)
>>   {
>>   	struct amdgpu_ttm_tt *gtt = (void *)ttm;
>> -	struct amdgpu_ttm_gup_task_list *entry;
>>   	unsigned long size;
>>   
>>   	if (gtt == NULL || !gtt->userptr)
>> @@ -1302,48 +1292,20 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
>>   	if (gtt->userptr > end || gtt->userptr + size <= start)
>>   		return false;
>>   
>> -	/* Search the lists of tasks that hold this mapping and see
>> -	 * if current is one of them.  If it is return false.
>> -	 */
>> -	spin_lock(&gtt->guptasklock);
>> -	list_for_each_entry(entry, &gtt->guptasks, list) {
>> -		if (entry->task == current) {
>> -			spin_unlock(&gtt->guptasklock);
>> -			return false;
>> -		}
>> -	}
>> -	spin_unlock(&gtt->guptasklock);
>> -
>> -	atomic_inc(&gtt->mmu_invalidations);
>> -
>>   	return true;
>>   }
>>   
>>   /**
>> - * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been invalidated?
>> + * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr?
>>    */
>> -bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
>> -				       int *last_invalidated)
>> -{
>> -	struct amdgpu_ttm_tt *gtt = (void *)ttm;
>> -	int prev_invalidated = *last_invalidated;
>> -
>> -	*last_invalidated = atomic_read(&gtt->mmu_invalidations);
>> -	return prev_invalidated != *last_invalidated;
>> -}
>> -
>> -/**
>> - * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this ttm_tt object
>> - * been invalidated since the last time they've been set?
>> - */
>> -bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm)
>> +bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm)
>>   {
>>   	struct amdgpu_ttm_tt *gtt = (void *)ttm;
>>   
>>   	if (gtt == NULL || !gtt->userptr)
>>   		return false;
>>   
>> -	return atomic_read(&gtt->mmu_invalidations) != gtt->last_set_pages;
>> +	return true;
>>   }
>>   
>>   /**
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> index b5b2d101f7db..8988c87fff9d 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> @@ -102,6 +102,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
>>   int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
>>   
>>   int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages);
>> +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm);
>>   void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages);
>>   void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm);
>>   int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
>> @@ -112,7 +113,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
>>   				  unsigned long end);
>>   bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
>>   				       int *last_invalidated);
>> -bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm);
>> +bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm);
>>   bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);
>>   uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_mem_reg *mem);
>>   uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 2acb9838913e..4a67a88acea1 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -618,7 +618,6 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
>>   	entry->priority = 0;
>>   	entry->tv.bo = &vm->root.base.bo->tbo;
>>   	entry->tv.shared = true;
>> -	entry->user_pages = NULL;
>>   	list_add(&entry->tv.head, validated);
>>   }
>>   
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>> index 8372556b52eb..fe120cc0930c 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> Please make everything below a separate commit.
Ok, submit separate patch for the changes below.
> Regards,
>    Felix
>
>
>> @@ -1158,6 +1158,33 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
>>   
>>   	retval = 0;
>>   
>> +	/* Do init_mqd before dqm_lock(dqm) to avoid circular locking order:
>> +	 * lock(dqm) -> bo::reserve
>> +	 */
>> +	mqd_mgr = dqm->ops.get_mqd_manager(dqm,
>> +			get_mqd_type_from_queue_type(q->properties.type));
>> +
>> +	if (!mqd_mgr) {
>> +		retval = -ENOMEM;
>> +		goto out;
>> +	}
>> +
>> +	/*
>> +	 * Eviction state logic: we only mark active queues as evicted
>> +	 * to avoid the overhead of restoring inactive queues later
>> +	 */
>> +	if (qpd->evicted)
>> +		q->properties.is_evicted = (q->properties.queue_size > 0 &&
>> +					    q->properties.queue_percent > 0 &&
>> +					    q->properties.queue_address != 0);
>> +	dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
>> +	q->properties.tba_addr = qpd->tba_addr;
>> +	q->properties.tma_addr = qpd->tma_addr;
>> +	retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
>> +				&q->gart_mqd_addr, &q->properties);
>> +	if (retval)
>> +		goto out;
>> +
>>   	dqm_lock(dqm);
>>   
>>   	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
>> @@ -1181,30 +1208,6 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
>>   	if (retval)
>>   		goto out_deallocate_sdma_queue;
>>   
>> -	mqd_mgr = dqm->ops.get_mqd_manager(dqm,
>> -			get_mqd_type_from_queue_type(q->properties.type));
>> -
>> -	if (!mqd_mgr) {
>> -		retval = -ENOMEM;
>> -		goto out_deallocate_doorbell;
>> -	}
>> -	/*
>> -	 * Eviction state logic: we only mark active queues as evicted
>> -	 * to avoid the overhead of restoring inactive queues later
>> -	 */
>> -	if (qpd->evicted)
>> -		q->properties.is_evicted = (q->properties.queue_size > 0 &&
>> -					    q->properties.queue_percent > 0 &&
>> -					    q->properties.queue_address != 0);
>> -
>> -	dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
>> -
>> -	q->properties.tba_addr = qpd->tba_addr;
>> -	q->properties.tma_addr = qpd->tma_addr;
>> -	retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
>> -				&q->gart_mqd_addr, &q->properties);
>> -	if (retval)
>> -		goto out_deallocate_doorbell;
>>   
>>   	list_add(&q->list, &qpd->queues_list);
>>   	qpd->queue_count++;
>> @@ -1228,14 +1231,12 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
>>   	dqm_unlock(dqm);
>>   	return retval;
>>   
>> -out_deallocate_doorbell:
>> -	deallocate_doorbell(qpd, q);
>>   out_deallocate_sdma_queue:
>>   	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
>>   		deallocate_sdma_queue(dqm, q->sdma_id);
>>   out_unlock:
>>   	dqm_unlock(dqm);
>> -
>> +out:
>>   	return retval;
>>   }
>>   
>> @@ -1398,8 +1399,6 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
>>   			qpd->reset_wavefronts = true;
>>   	}
>>   
>> -	mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
>> -
>>   	/*
>>   	 * Unconditionally decrement this counter, regardless of the queue's
>>   	 * type
>> @@ -1410,6 +1409,9 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
>>   
>>   	dqm_unlock(dqm);
>>   
>> +	/* Do uninit_mqd after dqm_unlock(dqm) to avoid circular locking */
>> +	mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
>> +
>>   	return retval;
>>   
>>   failed:
>> @@ -1631,7 +1633,11 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
>>   		qpd->reset_wavefronts = false;
>>   	}
>>   
>> -	/* lastly, free mqd resources */
>> +	dqm_unlock(dqm);
>> +
>> +	/* Lastly, free mqd resources.
>> +	 * Do uninit_mqd() after dqm_unlock to avoid circular locking.
>> +	 */
>>   	list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
>>   		mqd_mgr = dqm->ops.get_mqd_manager(dqm,
>>   			get_mqd_type_from_queue_type(q->properties.type));
>> @@ -1645,7 +1651,6 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
>>   	}
>>   
>>   out:
>> -	dqm_unlock(dqm);
>>   	return retval;
>>   }
>>   

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2018-12-06 21:01 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2018-12-03 20:19 [PATCH 1/2] drm/amdgpu: use HMM mirror callback to replace mmu notifier v5 Yang, Philip
     [not found] ` <20181203201840.25059-1-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
2018-12-03 20:19   ` [PATCH 2/2] drm/amdgpu: replace get_user_pages with HMM address mirror helpers v2 Yang, Philip
     [not found]     ` <20181203201840.25059-2-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
2018-12-04  1:52       ` Kuehling, Felix
     [not found]         ` <f95aee13-7579-6f1b-c6a4-887802140702-5C7GfCeVMHo@public.gmane.org>
2018-12-06 21:01           ` Yang, Philip
2018-12-04  9:06   ` [PATCH 1/2] drm/amdgpu: use HMM mirror callback to replace mmu notifier v5 Christian König
     [not found]     ` <0c6817a1-d033-3691-a73c-d88e740297b2-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2018-12-04 19:36       ` Yang, Philip
  -- strict thread matches above, loose matches on Subject: below --
2018-10-18 21:52 [PATCH 2/2] drm/amdgpu: replace get_user_pages with HMM address mirror helpers v2 Yang, Philip

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox