[PATCH v3 14/30] drm/xe: Add SVM garbage collector

Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed

From: Matthew Brost <matthew.brost@intel.com>
To: intel-xe@lists.freedesktop.org, dri-devel@lists.freedesktop.org
Cc: apopple@nvidia.com, airlied@gmail.com, christian.koenig@amd.com,
	thomas.hellstrom@linux.intel.com, simona.vetter@ffwll.ch,
	felix.kuehling@amd.com, dakr@kernel.org
Subject: [PATCH v3 14/30] drm/xe: Add SVM garbage collector
Date: Tue, 17 Dec 2024 15:33:32 -0800	[thread overview]
Message-ID: <20241217233348.3519726-15-matthew.brost@intel.com> (raw)
In-Reply-To: <20241217233348.3519726-1-matthew.brost@intel.com>

Add basic SVM garbage collector which destroy a SVM range upon a MMU
UNMAP event. The garbage collector runs on worker or in GPU fault
handler and is required as locks in the path of reclaim are required and
cannot be taken the notifier.

v2:
 - Flush garbage collector in xe_svm_close
v3:
 - Better commit message (Thomas)
 - Kernel doc (Thomas)
 - Use list_first_entry_or_null for garbage collector loop (Thomas)
 - Don't add to garbage collector if VM is closed (Thomas)

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/xe/xe_svm.c      | 90 +++++++++++++++++++++++++++++++-
 drivers/gpu/drm/xe/xe_svm.h      |  5 ++
 drivers/gpu/drm/xe/xe_vm.c       |  4 ++
 drivers/gpu/drm/xe/xe_vm_types.h | 18 +++++++
 4 files changed, 115 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index ace8c32f3428..32b1581b1ad0 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -28,6 +28,7 @@ xe_svm_range_alloc(struct drm_gpusvm *gpusvm)
 	if (!range)
 		return ERR_PTR(-ENOMEM);
 
+	INIT_LIST_HEAD(&range->garbage_collector_link);
 	xe_vm_get(gpusvm_to_vm(gpusvm));
 
 	return &range->base;
@@ -44,6 +45,24 @@ static struct xe_svm_range *to_xe_range(struct drm_gpusvm_range *r)
 	return container_of(r, struct xe_svm_range, base);
 }
 
+static void
+xe_svm_garbage_collector_add_range(struct xe_vm *vm, struct xe_svm_range *range,
+				   const struct mmu_notifier_range *mmu_range)
+{
+	struct xe_device *xe = vm->xe;
+
+	drm_gpusvm_range_set_unmapped(&range->base, mmu_range);
+
+	spin_lock(&vm->svm.garbage_collector.lock);
+	if (list_empty(&range->garbage_collector_link))
+		list_add_tail(&range->garbage_collector_link,
+			      &vm->svm.garbage_collector.range_list);
+	spin_unlock(&vm->svm.garbage_collector.lock);
+
+	queue_work(xe_device_get_root_tile(xe)->primary_gt->usm.pf_wq,
+		   &vm->svm.garbage_collector.work);
+}
+
 static u8
 xe_svm_range_notifier_event_begin(struct xe_vm *vm, struct drm_gpusvm_range *r,
 				  const struct mmu_notifier_range *mmu_range,
@@ -90,7 +109,9 @@ xe_svm_range_notifier_event_end(struct xe_vm *vm, struct drm_gpusvm_range *r,
 	xe_svm_assert_in_notifier(vm);
 
 	drm_gpusvm_range_unmap_pages(&vm->svm.gpusvm, r, &ctx);
-	/* TODO: Add range to garbage collector if VM is not closed */
+	if (!xe_vm_is_closed(vm) && mmu_range->event == MMU_NOTIFY_UNMAP)
+		xe_svm_garbage_collector_add_range(vm, to_xe_range(r),
+						   mmu_range);
 }
 
 static void xe_svm_invalidate(struct drm_gpusvm *gpusvm,
@@ -192,6 +213,62 @@ static void xe_svm_invalidate(struct drm_gpusvm *gpusvm,
 		xe_svm_range_notifier_event_end(vm, r, mmu_range);
 }
 
+static int __xe_svm_garbage_collector(struct xe_vm *vm,
+				      struct xe_svm_range *range)
+{
+	/* TODO: Do unbind */
+
+	drm_gpusvm_range_remove(&vm->svm.gpusvm, &range->base);
+
+	return 0;
+}
+
+static int xe_svm_garbage_collector(struct xe_vm *vm)
+{
+	struct xe_svm_range *range;
+	int err;
+
+	lockdep_assert_held_write(&vm->lock);
+
+	if (xe_vm_is_closed_or_banned(vm))
+		return -ENOENT;
+
+	spin_lock(&vm->svm.garbage_collector.lock);
+	for (;;) {
+		range = list_first_entry_or_null(&vm->svm.garbage_collector.range_list,
+						 typeof(*range),
+						 garbage_collector_link);
+		if (!range)
+			break;
+
+		list_del(&range->garbage_collector_link);
+		spin_unlock(&vm->svm.garbage_collector.lock);
+
+		err = __xe_svm_garbage_collector(vm, range);
+		if (err) {
+			drm_warn(&vm->xe->drm,
+				 "Garbage collection failed: %d\n", err);
+			xe_vm_kill(vm, true);
+			return err;
+		}
+
+		spin_lock(&vm->svm.garbage_collector.lock);
+	}
+	spin_unlock(&vm->svm.garbage_collector.lock);
+
+	return 0;
+}
+
+static void xe_svm_garbage_collector_work_func(struct work_struct *w)
+{
+	struct xe_vm *vm = container_of(w, struct xe_vm,
+					svm.garbage_collector.work);
+
+	down_write(&vm->lock);
+	xe_svm_garbage_collector(vm);
+	up_write(&vm->lock);
+}
+
 static const struct drm_gpusvm_ops gpusvm_ops = {
 	.range_alloc = xe_svm_range_alloc,
 	.range_free = xe_svm_range_free,
@@ -216,6 +293,11 @@ int xe_svm_init(struct xe_vm *vm)
 {
 	int err;
 
+	spin_lock_init(&vm->svm.garbage_collector.lock);
+	INIT_LIST_HEAD(&vm->svm.garbage_collector.range_list);
+	INIT_WORK(&vm->svm.garbage_collector.work,
+		  xe_svm_garbage_collector_work_func);
+
 	err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM", &vm->xe->drm,
 			      current->mm, NULL, 0, vm->size,
 			      SZ_512M, &gpusvm_ops, fault_chunk_sizes,
@@ -237,6 +319,7 @@ int xe_svm_init(struct xe_vm *vm)
 void xe_svm_close(struct xe_vm *vm)
 {
 	xe_assert(vm->xe, xe_vm_is_closed(vm));
+	flush_work(&vm->svm.garbage_collector.work);
 }
 
 /**
@@ -286,7 +369,10 @@ int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
 	xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma));
 
 retry:
-	/* TODO: Run garbage collector */
+	/* Always process UNMAPs first so view SVM ranges is current */
+	err = xe_svm_garbage_collector(vm);
+	if (err)
+		return err;
 
 	r = drm_gpusvm_range_find_or_insert(&vm->svm.gpusvm, fault_addr,
 					    xe_vma_start(vma), xe_vma_end(vma),
diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
index 6cd57382509d..5531868cdd6f 100644
--- a/drivers/gpu/drm/xe/xe_svm.h
+++ b/drivers/gpu/drm/xe/xe_svm.h
@@ -19,6 +19,11 @@ struct xe_vma;
 struct xe_svm_range {
 	/** @base: base drm_gpusvm_range */
 	struct drm_gpusvm_range base;
+	/**
+	 * @garbage_collector_link: Link into VM's garbage collect SVM range
+	 * list. Protected by VM's garbage collect lock.
+	 */
+	struct list_head garbage_collector_link;
 	/**
 	 * @tile_present: Tile mask of binding is present for this range.
 	 * Protected by GPU SVM notifier lock.
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 67859e9c8595..12e5e562c5e1 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -3123,6 +3123,10 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 		}
 	}
 
+	/* Ensure all UNMAPs visable */
+	if (xe_vm_in_fault_mode(vm))
+		flush_work(&vm->svm.garbage_collector.work);
+
 	err = down_write_killable(&vm->lock);
 	if (err)
 		goto put_exec_queue;
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 983f724c911b..576316729249 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -146,6 +146,24 @@ struct xe_vm {
 	struct {
 		/** @svm.gpusvm: base GPUSVM used to track fault allocations */
 		struct drm_gpusvm gpusvm;
+		/**
+		 * @svm.garbage_collector: Garbage collector which is used unmap
+		 * SVM range's GPU bindings and destroy the ranges.
+		 */
+		struct {
+			/** @svm.garbage_collector.lock: Protect's range list */
+			spinlock_t lock;
+			/**
+			 * @svm.garbage_collector.range_list: List of SVM ranges
+			 * in the garbage collector.
+			 */
+			struct list_head range_list;
+			/**
+			 * @svm.garbage_collector.work: Worker which the
+			 * garbage collector runs on.
+			 */
+			struct work_struct work;
+		} garbage_collector;
 	} svm;
 
 	struct xe_device *xe;
-- 
2.34.1

next prev parent reply	other threads:[~2024-12-17 23:33 UTC|newest]

Thread overview: 54+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-12-17 23:33 [PATCH v3 00/30] Introduce GPU SVM and Xe SVM implementation Matthew Brost
2024-12-17 23:33 ` [PATCH v3 01/30] drm/xe: Retry BO allocation Matthew Brost
2025-01-07 12:28   ` Gwan-gyeong Mun
2025-01-07 21:56   ` Summers, Stuart
2024-12-17 23:33 ` [PATCH v3 02/30] mm/migrate: Add migrate_device_pfns Matthew Brost
2024-12-17 23:33 ` [PATCH v3 03/30] mm/migrate: Trylock device page in do_swap_page Matthew Brost
2025-01-28 17:26   ` Thomas Hellström
2025-01-28 19:46     ` Matthew Brost
2024-12-17 23:33 ` [PATCH v3 04/30] drm/pagemap: Add DRM pagemap Matthew Brost
2025-01-24  7:19   ` Gwan-gyeong Mun
2025-01-29 17:42     ` Matthew Brost
2024-12-17 23:33 ` [PATCH v3 05/30] drm/gpusvm: Add support for GPU Shared Virtual Memory Matthew Brost
2024-12-20 19:04   ` Matthew Brost
2025-01-08  1:30   ` Matthew Brost
2025-01-10 21:17   ` Matthew Brost
2025-01-17  8:26   ` Gwan-gyeong Mun
2025-01-17 18:53     ` Matthew Brost
2025-01-24  7:17   ` Gwan-gyeong Mun
2024-12-17 23:33 ` [PATCH v3 06/30] drm/xe: Select DRM_GPUSVM Kconfig Matthew Brost
2024-12-17 23:33 ` [PATCH v3 07/30] drm/xe/uapi: Add DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR flag Matthew Brost
2024-12-17 23:33 ` [PATCH v3 08/30] drm/xe: Add SVM init / close / fini to faulting VMs Matthew Brost
2024-12-17 23:33 ` [PATCH v3 09/30] drm/xe: Add dma_addr res cursor Matthew Brost
2024-12-17 23:33 ` [PATCH v3 10/30] drm/xe: Nuke VM's mapping upon close Matthew Brost
2024-12-17 23:33 ` [PATCH v3 11/30] drm/xe: Add SVM range invalidation and page fault handler Matthew Brost
2024-12-17 23:33 ` [PATCH v3 12/30] drm/gpuvm: Add DRM_GPUVA_OP_DRIVER Matthew Brost
2024-12-17 23:33 ` [PATCH v3 13/30] drm/xe: Add (re)bind to SVM page fault handler Matthew Brost
2024-12-17 23:33 ` Matthew Brost [this message]
2024-12-17 23:33 ` [PATCH v3 15/30] drm/xe: Add unbind to SVM garbage collector Matthew Brost
2024-12-20 18:50   ` Ghimiray, Himal Prasad
2024-12-20 18:54     ` Matthew Brost
2024-12-17 23:33 ` [PATCH v3 16/30] drm/xe: Do not allow CPU address mirror VMA unbind if the GPU has bindings Matthew Brost
2024-12-17 23:33 ` [PATCH v3 17/30] drm/xe: Enable CPU address mirror uAPI Matthew Brost
2024-12-17 23:33 ` [PATCH v3 18/30] drm/xe: Add migrate layer functions for SVM support Matthew Brost
2024-12-17 23:33 ` [PATCH v3 19/30] drm/xe: Add SVM device memory mirroring Matthew Brost
2024-12-20 18:39   ` Ghimiray, Himal Prasad
2024-12-20 18:45     ` Matthew Brost
2024-12-17 23:33 ` [PATCH v3 20/30] drm/xe: Add drm_gpusvm_devmem to xe_bo Matthew Brost
2024-12-17 23:33 ` [PATCH v3 21/30] drm/xe: Add drm_pagemap ops to SVM Matthew Brost
2024-12-17 23:33 ` [PATCH v3 22/30] drm/xe: Add GPUSVM device memory copy vfunc functions Matthew Brost
2024-12-17 23:33 ` [PATCH v3 23/30] drm/xe: Add Xe SVM populate_devmem_pfn GPU SVM vfunc Matthew Brost
2024-12-17 23:33 ` [PATCH v3 24/30] drm/xe: Add Xe SVM devmem_release " Matthew Brost
2024-12-17 23:33 ` [PATCH v3 25/30] drm/xe: Add BO flags required for SVM Matthew Brost
2024-12-17 23:33 ` [PATCH v3 26/30] drm/xe: Add SVM VRAM migration Matthew Brost
2024-12-17 23:33 ` [PATCH v3 27/30] drm/xe: Basic SVM BO eviction Matthew Brost
2024-12-19  3:42   ` Matthew Brost
2024-12-17 23:33 ` [PATCH v3 28/30] drm/xe: Add SVM debug Matthew Brost
2024-12-17 23:33 ` [PATCH v3 29/30] drm/xe: Add modparam for SVM notifier size Matthew Brost
2024-12-17 23:33 ` [PATCH v3 30/30] drm/xe: Add always_migrate_to_vram modparam Matthew Brost
2024-12-18  4:07 ` ✓ CI.Patch_applied: success for Introduce GPU SVM and Xe SVM implementation (rev3) Patchwork
2024-12-18  4:08 ` ✗ CI.checkpatch: warning " Patchwork
2024-12-18  4:09 ` ✗ CI.KUnit: failure " Patchwork
2025-01-07 12:19 ` [PATCH v3 00/30] Introduce GPU SVM and Xe SVM implementation Gwan-gyeong Mun
2025-01-17  9:47   ` Gwan-gyeong Mun
2025-01-21 21:14     ` Matthew Brost

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:ace8c32f342 dfblob:32b1581b1ad dfblob:6cd57382509
dfblob:5531868cdd6 dfblob:67859e9c859 dfblob:12e5e562c5e
dfblob:983f724c911 dfblob:57631672924 )
 OR (
bs:"[PATCH v3 14/30] drm/xe: Add SVM garbage collector" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20241217233348.3519726-15-matthew.brost@intel.com \
    --to=matthew.brost@intel.com \
    --cc=airlied@gmail.com \
    --cc=apopple@nvidia.com \
    --cc=christian.koenig@amd.com \
    --cc=dakr@kernel.org \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=felix.kuehling@amd.com \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=simona.vetter@ffwll.ch \
    --cc=thomas.hellstrom@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox