All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Thomas Hellström" <thomas.hellstrom@linux.intel.com>
To: intel-xe@lists.freedesktop.org
Cc: "Thomas Hellström" <thomas.hellstrom@linux.intel.com>,
	himal.prasad.ghimiray@intel.com,
	"Matthew Brost" <matthew.brost@intel.com>,
	"Matthew Auld" <matthew.auld@intel.com>
Subject: [PATCH 3/5] drm/xe/bo: Add a bo remove callback
Date: Mon, 17 Mar 2025 11:41:30 +0100	[thread overview]
Message-ID: <20250317104132.8200-4-thomas.hellstrom@linux.intel.com> (raw)
In-Reply-To: <20250317104132.8200-1-thomas.hellstrom@linux.intel.com>

On device unbind, migrate exported bos, including pagemap bos to
system. This allows importers to take proper action without
disruption. In particular, SVM clients on remote devices may
continue as if nothing happened, and can chose a different
placement.

The evict_flags() placement is chosen in such a way that bos that
aren't exported are purged.

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c     | 96 +++++++++++++++++++++++++++++++++-
 drivers/gpu/drm/xe/xe_bo.h     |  2 +
 drivers/gpu/drm/xe/xe_device.c |  2 +
 3 files changed, 99 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 64f9c936eea0..c7c206041632 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -55,6 +55,8 @@ static struct ttm_placement sys_placement = {
 	.placement = &sys_placement_flags,
 };
 
+static struct ttm_placement purge_placement;
+
 static const struct ttm_place tt_placement_flags[] = {
 	{
 		.fpfn = 0,
@@ -281,6 +283,8 @@ int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
 static void xe_evict_flags(struct ttm_buffer_object *tbo,
 			   struct ttm_placement *placement)
 {
+	struct xe_device *xe = container_of(tbo->bdev, typeof(*xe), ttm);
+	bool device_unplugged = drm_dev_is_unplugged(&xe->drm);
 	struct xe_bo *bo;
 
 	if (!xe_bo_is_xe_bo(tbo)) {
@@ -290,7 +294,7 @@ static void xe_evict_flags(struct ttm_buffer_object *tbo,
 			return;
 		}
 
-		*placement = sys_placement;
+		*placement = device_unplugged ? purge_placement : sys_placement;
 		return;
 	}
 
@@ -300,6 +304,11 @@ static void xe_evict_flags(struct ttm_buffer_object *tbo,
 		return;
 	}
 
+	if (device_unplugged && !tbo->base.dma_buf) {
+		*placement = purge_placement;
+		return;
+	}
+
 	/*
 	 * For xe, sg bos that are evicted to system just triggers a
 	 * rebind of the sg list upon subsequent validation to XE_PL_TT.
@@ -657,11 +666,20 @@ static int xe_bo_move_dmabuf(struct ttm_buffer_object *ttm_bo,
 	struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm, struct xe_ttm_tt,
 					       ttm);
 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
+	bool device_unplugged = drm_dev_is_unplugged(&xe->drm);
 	struct sg_table *sg;
 
 	xe_assert(xe, attach);
 	xe_assert(xe, ttm_bo->ttm);
 
+	if (device_unplugged && new_res->mem_type == XE_PL_SYSTEM &&
+	    ttm_bo->sg) {
+		dma_resv_wait_timeout(ttm_bo->base.resv, DMA_RESV_USAGE_BOOKKEEP,
+				      false, MAX_SCHEDULE_TIMEOUT);
+		dma_buf_unmap_attachment(attach, ttm_bo->sg, DMA_BIDIRECTIONAL);
+		ttm_bo->sg = NULL;
+	}
+
 	if (new_res->mem_type == XE_PL_SYSTEM)
 		goto out;
 
@@ -2945,6 +2963,82 @@ void xe_bo_runtime_pm_release_mmap_offset(struct xe_bo *bo)
 	list_del_init(&bo->vram_userfault_link);
 }
 
+static void xe_bo_dma_unmap_pinned(struct xe_device *xe)
+{
+	struct list_head still_in_list;
+
+	INIT_LIST_HEAD(&still_in_list);
+	spin_lock(&xe->pinned.lock);
+	for (;;) {
+		struct xe_bo *bo = list_first_entry_or_null(&xe->pinned.kernel_bo_present,
+							    typeof(*bo), pinned_link);
+		struct ttm_buffer_object *ttm_bo;
+		struct ttm_tt *tt;
+		struct xe_ttm_tt *xe_tt;
+
+		if (!bo)
+			break;
+
+		list_move_tail(&bo->pinned_link, &still_in_list);
+		xe_bo_get(bo);
+		spin_unlock(&xe->pinned.lock);
+
+		xe_bo_lock(bo, false);
+		ttm_bo = &bo->ttm;
+		tt = ttm_bo->ttm;
+		if (tt) {
+			xe_ttm_bo_delete_mem_notify(ttm_bo);
+			xe_tt = container_of(tt, typeof(*xe_tt), ttm);
+			if (xe_tt->sg) {
+				dma_unmap_sgtable(xe_tt->xe->drm.dev, xe_tt->sg,
+						  DMA_BIDIRECTIONAL, 0);
+				sg_free_table(xe_tt->sg);
+				xe_tt->sg = NULL;
+			}
+		}
+
+		xe_bo_unlock(bo);
+		xe_bo_put(bo);
+		spin_lock(&xe->pinned.lock);
+	}
+	list_splice_tail(&still_in_list, &xe->pinned.kernel_bo_present);
+	spin_unlock(&xe->pinned.lock);
+}
+
+/**
+ * xe_bo_remove() - Handle bos when the pci_device is about to be removed
+ * @xe: The xe device.
+ *
+ * On pci_device removal we need to drop all dma mappings and move
+ * the data of exported bos out to system. This includes SVM bos and
+ * exported dma-buf bos. This is done by evicting all bos, but
+ * the evict placement in xe_evict_flags() is chosen such that all
+ * bos except those mentioned are purged, and thus their memory
+ * is released.
+ *
+ * For pinned bos, we're unmapping dma.
+ */
+void xe_bo_remove(struct xe_device *xe)
+{
+	unsigned int mem_type;
+
+	/*
+	 * Move pagemap bos and exported dma-buf to system.
+	 */
+	for (mem_type = XE_PL_VRAM1; mem_type >= XE_PL_TT; --mem_type) {
+		struct ttm_resource_manager *man =
+			ttm_manager_type(&xe->ttm, mem_type);
+
+		if (man) {
+			int ret = ttm_resource_manager_evict_all(&xe->ttm, man);
+
+			drm_WARN_ON(&xe->drm, ret);
+		}
+	}
+
+	xe_bo_dma_unmap_pinned(xe);
+}
+
 #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
 #include "tests/xe_bo.c"
 #endif
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index bda3fdd408da..22b1c63f9311 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -405,6 +405,8 @@ long xe_bo_shrink(struct ttm_operation_ctx *ctx, struct ttm_buffer_object *bo,
 		  const struct xe_bo_shrink_flags flags,
 		  unsigned long *scanned);
 
+void xe_bo_remove(struct xe_device *xe);
+
 #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
 /**
  * xe_bo_is_mem_type - Whether the bo currently resides in the given
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 5d79b439dd62..bf487f5f369f 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -919,6 +919,8 @@ void xe_device_remove(struct xe_device *xe)
 	xe_display_unregister(xe);
 
 	drm_dev_unplug(&xe->drm);
+
+	xe_bo_remove(xe);
 }
 
 void xe_device_shutdown(struct xe_device *xe)
-- 
2.48.1


  parent reply	other threads:[~2025-03-17 10:41 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-03-17 10:41 [PATCH 0/5] drm/xe: xe-only patches from the multi-device GPUSVM series Thomas Hellström
2025-03-17 10:41 ` [PATCH 1/5] drm/xe: Introduce CONFIG_DRM_XE_GPUSVM Thomas Hellström
2025-03-20 21:24   ` Matthew Brost
2025-03-17 10:41 ` [PATCH 2/5] drm/xe/svm: Fix a potential bo UAF Thomas Hellström
2025-03-20 21:25   ` Matthew Brost
2025-03-17 10:41 ` Thomas Hellström [this message]
2025-03-17 15:58   ` [PATCH 3/5] drm/xe/bo: Add a bo remove callback Matthew Auld
2025-03-17 16:16     ` Thomas Hellström
2025-03-17 16:50       ` Matthew Auld
2025-03-17 17:07         ` Thomas Hellström
2025-03-17 17:35           ` Matthew Auld
2025-03-17 10:41 ` [PATCH 4/5] drm/xe/migrate: Allow xe_migrate_vram() also on non-pagefault capable devices Thomas Hellström
2025-03-20 21:26   ` Matthew Brost
2025-03-17 10:41 ` [PATCH 5/5] drm/xe/uapi, drm/xe: Make the PT code handle placement per PTE rather than per vma / range Thomas Hellström
2025-03-17 15:19 ` ✗ CI.Patch_applied: failure for drm/xe: xe-only patches from the multi-device GPUSVM series Patchwork
2025-03-19 10:50 ` ✓ CI.Patch_applied: success for drm/xe: xe-only patches from the multi-device GPUSVM series (rev2) Patchwork
2025-03-19 10:50 ` ✗ CI.checkpatch: warning " Patchwork
2025-03-19 10:51 ` ✓ CI.KUnit: success " Patchwork
2025-03-19 11:08 ` ✓ CI.Build: " Patchwork
2025-03-19 11:10 ` ✗ CI.Hooks: failure " Patchwork
2025-03-19 11:11 ` ✓ CI.checksparse: success " Patchwork
2025-03-19 11:32 ` ✗ Xe.CI.BAT: failure " Patchwork
2025-03-19 12:30 ` ✗ Xe.CI.Full: " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250317104132.8200-4-thomas.hellstrom@linux.intel.com \
    --to=thomas.hellstrom@linux.intel.com \
    --cc=himal.prasad.ghimiray@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=matthew.auld@intel.com \
    --cc=matthew.brost@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.