From: "Thomas Hellström" <thomas.hellstrom@linux.intel.com>
To: intel-xe@lists.freedesktop.org
Cc: "Thomas Hellström" <thomas.hellstrom@linux.intel.com>,
"Jason Gunthorpe" <jgg@ziepe.ca>,
"Andrew Morton" <akpm@linux-foundation.org>,
"Simona Vetter" <simona.vetter@ffwll.ch>,
"Dave Airlie" <airlied@gmail.com>,
dri-devel@lists.freedesktop.org, linux-mm@kvack.org,
linux-kernel@vger.kernel.org,
"Matthew Brost" <matthew.brost@intel.com>,
"Christian König" <christian.koenig@amd.com>
Subject: [RFC PATCH 1/6] mm/mmu_notifier: Allow multiple struct mmu_interval_notifier passes
Date: Sat, 9 Aug 2025 15:51:32 +0200 [thread overview]
Message-ID: <20250809135137.259427-2-thomas.hellstrom@linux.intel.com> (raw)
In-Reply-To: <20250809135137.259427-1-thomas.hellstrom@linux.intel.com>
GPU use-cases for mmu_interval_notifiers with hmm often involve
starting a gpu operation and then waiting for it to complete.
These operations are typically context preemption or TLB flushing.
With single-pass notifiers per GPU this doesn't scale in
multi-gpu scenarios. In those scenarios we'd want to first start
preemption- or TLB flushing on all GPUs and as a second pass wait
for them to complete on all gpus.
One can do this on per-driver basis multiplexing per-driver
notifiers but that would mean sharing the notifier "user" lock
across all GPUs and that doesn't scale well either, so adding support
for multi-pass in the core appears like the right choice.
Implement multi-pass capability in the mmu_interval_notifier. Use a
linked list for the additional passes to minimize the impact for
use-cases that don't need the multi-pass functionality.
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Simona Vetter <simona.vetter@ffwll.ch>
Cc: Dave Airlie <airlied@gmail.com>
Cc: <dri-devel@lists.freedesktop.org>
Cc: <linux-mm@kvack.org>
Cc: <linux-kernel@vger.kernel.org>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
include/linux/mmu_notifier.h | 30 ++++++++++++++++
mm/mmu_notifier.c | 67 +++++++++++++++++++++++++++++++-----
2 files changed, 88 insertions(+), 9 deletions(-)
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index d1094c2d5fb6..1107a8eafd8a 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -233,6 +233,32 @@ struct mmu_notifier {
unsigned int users;
};
+/**
+ * struct mmu_interval_notifier_pass - mmu_interval_notifier multi-pass abstraction
+ * @link: List link for the notifiers pending pass list
+ *
+ * Allocate, typically using GFP_NOWAIT in the interval notifier's first pass.
+ * If allocation fails (which is not unlikely under memory pressure), fall back
+ * to single-pass operation.
+ */
+struct mmu_interval_notifier_pass {
+ struct list_head link;
+ /**
+ * @pass: Driver callback for additionall pass.
+ * @additional_pass: Pointer to the mmu_interval_notifier_pass structure.
+ * @range: The mmu_notifier_range.
+ * @cur_seq: The current sequence set by the first pass.
+ *
+ * Return: Either a pointer to a valid mmu_interval_notifier_pass for
+ * another pass to be called, or %NULL if processing is complete for this
+ * notifier. There is no error reporting mechanism for additional passes.
+ */
+ struct mmu_interval_notifier_pass *
+ (*pass) (struct mmu_interval_notifier_pass *additional_pass,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq);
+};
+
/**
* struct mmu_interval_notifier_ops
* @invalidate: Upon return the caller must stop using any SPTEs within this
@@ -243,6 +269,10 @@ struct mmu_interval_notifier_ops {
bool (*invalidate)(struct mmu_interval_notifier *interval_sub,
const struct mmu_notifier_range *range,
unsigned long cur_seq);
+ bool (*invalidate_multipass)(struct mmu_interval_notifier *interval_sub,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq,
+ struct mmu_interval_notifier_pass **pass);
};
struct mmu_interval_notifier {
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 8e0125dc0522..dd6af87db103 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -260,6 +260,22 @@ mmu_interval_read_begin(struct mmu_interval_notifier *interval_sub)
}
EXPORT_SYMBOL_GPL(mmu_interval_read_begin);
+static void mn_itree_additional_passes(struct list_head *additional_passes,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq)
+{
+ struct mmu_interval_notifier_pass *p, *next;
+
+ while (!list_empty(additional_passes)) {
+ list_for_each_entry_safe(p, next, additional_passes, link) {
+ list_del_init(&p->link);
+ p = p->pass(p, range, cur_seq);
+ if (p)
+ list_add_tail(&p->link, additional_passes);
+ }
+ }
+}
+
static void mn_itree_release(struct mmu_notifier_subscriptions *subscriptions,
struct mm_struct *mm)
{
@@ -272,17 +288,32 @@ static void mn_itree_release(struct mmu_notifier_subscriptions *subscriptions,
};
struct mmu_interval_notifier *interval_sub;
unsigned long cur_seq;
+ LIST_HEAD(additional_passes);
bool ret;
for (interval_sub =
mn_itree_inv_start_range(subscriptions, &range, &cur_seq);
interval_sub;
interval_sub = mn_itree_inv_next(interval_sub, &range)) {
- ret = interval_sub->ops->invalidate(interval_sub, &range,
- cur_seq);
+ if (interval_sub->ops->invalidate_multipass) {
+ struct mmu_interval_notifier_pass *second = NULL;
+
+ ret = interval_sub->ops->invalidate_multipass(interval_sub,
+ &range,
+ cur_seq,
+ &second);
+ if (ret && second)
+ list_add_tail(&second->link, &additional_passes);
+
+ } else {
+ ret = interval_sub->ops->invalidate(interval_sub,
+ &range,
+ cur_seq);
+ }
WARN_ON(!ret);
}
+ mn_itree_additional_passes(&additional_passes, &range, cur_seq);
mn_itree_inv_end(subscriptions);
}
@@ -431,6 +462,8 @@ static int mn_itree_invalidate(struct mmu_notifier_subscriptions *subscriptions,
{
struct mmu_interval_notifier *interval_sub;
unsigned long cur_seq;
+ LIST_HEAD(additional_passes);
+ int err = 0;
for (interval_sub =
mn_itree_inv_start_range(subscriptions, range, &cur_seq);
@@ -438,23 +471,39 @@ static int mn_itree_invalidate(struct mmu_notifier_subscriptions *subscriptions,
interval_sub = mn_itree_inv_next(interval_sub, range)) {
bool ret;
- ret = interval_sub->ops->invalidate(interval_sub, range,
- cur_seq);
+ if (interval_sub->ops->invalidate_multipass) {
+ struct mmu_interval_notifier_pass *second = NULL;
+
+ ret = interval_sub->ops->invalidate_multipass(interval_sub,
+ range,
+ cur_seq,
+ &second);
+ if (ret && second)
+ list_add_tail(&second->link, &additional_passes);
+
+ } else {
+ ret = interval_sub->ops->invalidate(interval_sub,
+ range,
+ cur_seq);
+ }
if (!ret) {
if (WARN_ON(mmu_notifier_range_blockable(range)))
continue;
- goto out_would_block;
+ err = -EAGAIN;
+ break;
}
}
- return 0;
-out_would_block:
+ mn_itree_additional_passes(&additional_passes, range, cur_seq);
+
/*
* On -EAGAIN the non-blocking caller is not allowed to call
* invalidate_range_end()
*/
- mn_itree_inv_end(subscriptions);
- return -EAGAIN;
+ if (err)
+ mn_itree_inv_end(subscriptions);
+
+ return err;
}
static int mn_hlist_invalidate_range_start(
--
2.50.1
next prev parent reply other threads:[~2025-08-09 13:52 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-08-09 13:51 [RFC PATCH 0/6] Multi-pass MMU interval notifiers Thomas Hellström
2025-08-09 13:51 ` Thomas Hellström [this message]
2025-08-18 16:07 ` [RFC PATCH 1/6] mm/mmu_notifier: Allow multiple struct mmu_interval_notifier passes Jason Gunthorpe
2025-08-18 16:25 ` Matthew Brost
2025-08-18 16:36 ` Jason Gunthorpe
2025-08-18 16:42 ` Thomas Hellström
2025-08-18 16:45 ` Matthew Brost
2025-08-18 16:44 ` Matthew Brost
2025-08-18 16:46 ` Jason Gunthorpe
2025-08-19 9:55 ` Alistair Popple
2025-08-19 11:33 ` Thomas Hellström
2025-08-19 15:35 ` Matthew Brost
2025-08-21 9:34 ` Thomas Hellström
2025-08-19 10:03 ` Alistair Popple
2025-08-19 11:35 ` Thomas Hellström
2025-08-09 13:51 ` [RFC PATCH 2/6] drm/gpusvm: Update GPU SVM / Xe to twopass MMU notifier Thomas Hellström
2025-08-09 13:51 ` [RFC PATCH 3/6] drm/gpusvm: Add drm_gpusvm_in_notifier_* helpers Thomas Hellström
2025-08-09 13:51 ` [RFC PATCH 4/6] drm/xe: Skip waiting on unarmed fences in xe_gt_tlb_invalidation_fence_wait Thomas Hellström
2025-08-09 13:51 ` [RFC PATCH 5/6] drm/xe: Add fences argument to xe_vm_range_tilemask_tlb_invalidation Thomas Hellström
2025-08-09 13:51 ` [RFC PATCH 6/6] drm/xe: Implement two pass MMU notifiers for SVM Thomas Hellström
2025-08-11 20:46 ` Matthew Brost
2025-08-12 9:06 ` Thomas Hellström
2025-08-09 14:00 ` ✓ CI.KUnit: success for Multi-pass MMU interval notifiers Patchwork
2025-08-09 14:15 ` ✗ CI.checksparse: warning " Patchwork
2025-08-09 14:35 ` ✓ Xe.CI.BAT: success " Patchwork
2025-08-09 16:20 ` ✓ Xe.CI.Full: " Patchwork
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250809135137.259427-2-thomas.hellstrom@linux.intel.com \
--to=thomas.hellstrom@linux.intel.com \
--cc=airlied@gmail.com \
--cc=akpm@linux-foundation.org \
--cc=christian.koenig@amd.com \
--cc=dri-devel@lists.freedesktop.org \
--cc=intel-xe@lists.freedesktop.org \
--cc=jgg@ziepe.ca \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=matthew.brost@intel.com \
--cc=simona.vetter@ffwll.ch \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.