[RFC 7/7] drm/xe/svm: Correct memory attribute reset for partial unmap

public inbox for intel-xe@lists.freedesktop.org
 help / color / mirror / Atom feed

From: Arvind Yadav <arvind.yadav@intel.com>
To: intel-xe@lists.freedesktop.org
Cc: matthew.brost@intel.com, himal.prasad.ghimiray@intel.com,
	thomas.hellstrom@linux.intel.com
Subject: [RFC 7/7] drm/xe/svm: Correct memory attribute reset for partial unmap
Date: Thu, 19 Feb 2026 14:43:12 +0530	[thread overview]
Message-ID: <20260219091312.796749-8-arvind.yadav@intel.com> (raw)
In-Reply-To: <20260219091312.796749-1-arvind.yadav@intel.com>

From: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>

When performing a partial unmap of an SVM range, the memory attributes
were being reset for the entire range instead of just the portion
being unmapped. This could lead to unintended side effects and behaviour.

Fix this by restricting the attribute reset to only the affected subrange
that is being unmapped.

Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Signed-off-by: Arvind Yadav <arvind.yadav@intel.com>
---
 drivers/gpu/drm/xe/xe_svm.c | 56 +++++++++++++++++++++++++++----------
 drivers/gpu/drm/xe/xe_svm.h | 10 +++++++
 2 files changed, 52 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index 8335fdc976b5..3c833e6d6b2c 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -57,6 +57,8 @@ void *xe_svm_private_page_owner(struct xe_vm *vm, bool force_smem)
 	return force_smem ? NULL : vm->svm.peer.owner;
 }
 
+#define XE_SVM_ATTR_RETRY_MAX 3
+
 static bool xe_svm_range_in_vram(struct xe_svm_range *range)
 {
 	/*
@@ -126,15 +128,23 @@ static void xe_svm_range_free(struct drm_gpusvm_range *range)
 	kfree(range);
 }
 
+static void xe_svm_range_set_unmapped(struct xe_svm_range *range,
+				      const struct mmu_notifier_range *mmu_range)
+{
+	drm_gpusvm_range_set_unmapped(&range->base, mmu_range);
+	if (range->base.pages.flags.partial_unmap) {
+		range->partial_unmap.start = max(xe_svm_range_start(range), mmu_range->start);
+		range->partial_unmap.end = min(xe_svm_range_end(range), mmu_range->end);
+	}
+}
+
 static void
 xe_svm_garbage_collector_add_range(struct xe_vm *vm, struct xe_svm_range *range,
 				   const struct mmu_notifier_range *mmu_range)
 {
 	struct xe_device *xe = vm->xe;
 
-	range_debug(range, "GARBAGE COLLECTOR ADD");
-
-	drm_gpusvm_range_set_unmapped(&range->base, mmu_range);
+	xe_svm_range_set_unmapped(range, mmu_range);
 
 	spin_lock(&vm->svm.garbage_collector.lock);
 	if (list_empty(&range->garbage_collector_link))
@@ -375,9 +385,10 @@ static int xe_svm_range_set_default_attr(struct xe_vm *vm, u64 start, u64 end)
 static int xe_svm_garbage_collector(struct xe_vm *vm)
 {
 	struct xe_svm_range *range;
-	u64 range_start;
-	u64 range_end;
+	u64 unmap_start;
+	u64 unmap_end;
 	int err, ret = 0;
+	int retry_count;
 
 	lockdep_assert_held_write(&vm->lock);
 
@@ -392,8 +403,13 @@ static int xe_svm_garbage_collector(struct xe_vm *vm)
 		if (!range)
 			break;
 
-		range_start = xe_svm_range_start(range);
-		range_end = xe_svm_range_end(range);
+		if (range->base.pages.flags.partial_unmap) {
+			unmap_start = range->partial_unmap.start;
+			unmap_end = range->partial_unmap.end;
+		} else {
+			unmap_start = xe_svm_range_start(range);
+			unmap_end = xe_svm_range_end(range);
+		}
 
 		list_del(&range->garbage_collector_link);
 		spin_unlock(&vm->svm.garbage_collector.lock);
@@ -407,13 +423,25 @@ static int xe_svm_garbage_collector(struct xe_vm *vm)
 			return err;
 		}
 
-		err = xe_svm_range_set_default_attr(vm, range_start, range_end);
-		if (err) {
-			if (err == -EAGAIN)
-				ret = -EAGAIN;
-			else
-				return err;
-		}
+		/*
+		 * Retry set_default_attr on -EAGAIN (VMA was recreated).
+		 * Limit retries to prevent infinite loop.
+		 */
+		retry_count = 0;
+
+		do {
+			err = xe_svm_range_set_default_attr(vm, unmap_start, unmap_end);
+			if (err == -EAGAIN && ++retry_count > XE_SVM_ATTR_RETRY_MAX) {
+				drm_err(&vm->xe->drm,
+					"SET_ATTR retry limit exceeded for [0x%llx-0x%llx]\n",
+					unmap_start, unmap_end);
+				xe_vm_kill(vm, true);
+				return -EIO;
+			}
+		} while (err == -EAGAIN);
+
+		if (err)
+			return err;
 	}
 	spin_unlock(&vm->svm.garbage_collector.lock);
 
diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
index b7b8eeacf196..4651e044cf53 100644
--- a/drivers/gpu/drm/xe/xe_svm.h
+++ b/drivers/gpu/drm/xe/xe_svm.h
@@ -46,6 +46,16 @@ struct xe_svm_range {
 	 * range. Protected by GPU SVM notifier lock.
 	 */
 	u8 tile_invalidated;
+	/**
+	 * @partial_unmap: Structure to hold partial unmap range info.
+	 * Valid only if partial unmap is in effect.
+	 */
+	struct {
+		/** @start: Start address of the partial unmap range */
+		u64 start;
+		/** @end: End address of the partial unmap range */
+		u64 end;
+	} partial_unmap;
 };
 
 /**
-- 
2.43.0

next prev parent reply	other threads:[~2026-02-19  9:13 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-02-19  9:13 [RFC 0/7] drm/xe/svm: Add MMU notifier-based madvise autoreset on munmap Arvind Yadav
2026-02-19  9:13 ` [RFC 1/7] drm/xe/vm: Add CPU_AUTORESET_ACTIVE VMA flag Arvind Yadav
2026-02-19  9:13 ` [RFC 2/7] drm/xe/vm: Preserve CPU_AUTORESET_ACTIVE across GPUVA operations Arvind Yadav
2026-02-19  9:13 ` [RFC 3/7] drm/xe/svm: Clear CPU_AUTORESET_ACTIVE on first GPU fault Arvind Yadav
2026-02-20 20:12   ` Matthew Brost
2026-02-20 22:33     ` Matthew Brost
2026-03-05  3:38       ` Yadav, Arvind
2026-02-19  9:13 ` [RFC 4/7] drm/xe/vm: Add madvise autoreset interval notifier worker infrastructure Arvind Yadav
2026-02-25 23:34   ` Matthew Brost
2026-03-09  7:07     ` Yadav, Arvind
2026-03-09  9:32       ` Thomas Hellström
2026-03-11  6:34         ` Yadav, Arvind
2026-02-19  9:13 ` [RFC 5/7] drm/xe/vm: Deactivate madvise notifier on GPU touch Arvind Yadav
2026-02-19  9:13 ` [RFC 6/7] drm/xe/vm: Wire MADVISE_AUTORESET notifiers into VM lifecycle Arvind Yadav
2026-02-19  9:13 ` Arvind Yadav [this message]
2026-02-19  9:40 ` ✗ CI.checkpatch: warning for drm/xe/svm: Add MMU notifier-based madvise autoreset on munmap Patchwork
2026-02-19  9:42 ` ✓ CI.KUnit: success " Patchwork
2026-02-19 10:40 ` ✓ Xe.CI.BAT: " Patchwork
2026-02-19 13:04 ` ✗ Xe.CI.FULL: failure " Patchwork

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:8335fdc976b dfblob:3c833e6d6b2 dfblob:b7b8eeacf19
dfblob:4651e044cf5 )
 OR (
bs:"[RFC 7/7] drm/xe/svm: Correct memory attribute reset for partial unmap" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260219091312.796749-8-arvind.yadav@intel.com \
    --to=arvind.yadav@intel.com \
    --cc=himal.prasad.ghimiray@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=matthew.brost@intel.com \
    --cc=thomas.hellstrom@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox