From: Matthew Brost <matthew.brost@intel.com>
To: intel-xe@lists.freedesktop.org
Cc: stuart.summers@intel.com, arvind.yadav@intel.com,
himal.prasad.ghimiray@intel.com,
thomas.hellstrom@linux.intel.com, francois.dugast@intel.com
Subject: [PATCH v3 04/12] drm/xe: Use a single page-fault queue with multiple workers
Date: Wed, 25 Feb 2026 12:27:28 -0800 [thread overview]
Message-ID: <20260225202736.2723250-5-matthew.brost@intel.com> (raw)
In-Reply-To: <20260225202736.2723250-1-matthew.brost@intel.com>
With fine-grained page-fault locking, it no longer makes sense to
maintain multiple page-fault queues, as we no longer hash queues based
on the VM’s ASID. Multiple workers can pull page faults from a single
queue, eliminating any head-of-queue blocking. Refactor the structures
and code to use a single shared queue.
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
drivers/gpu/drm/xe/xe_device_types.h | 12 +++---
drivers/gpu/drm/xe/xe_pagefault.c | 52 +++++++++++++------------
drivers/gpu/drm/xe/xe_pagefault_types.h | 17 +++++++-
3 files changed, 50 insertions(+), 31 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 1eb0fe118940..0558dfd52541 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -304,8 +304,8 @@ struct xe_device {
struct xarray asid_to_vm;
/** @usm.next_asid: next ASID, used to cyclical alloc asids */
u32 next_asid;
- /** @usm.current_pf_queue: current page fault queue */
- u32 current_pf_queue;
+ /** @usm.current_pf_work: current page fault work item */
+ u32 current_pf_work;
/** @usm.lock: protects UM state */
struct rw_semaphore lock;
/** @usm.pf_wq: page fault work queue, unbound, high priority */
@@ -315,9 +315,11 @@ struct xe_device {
* yields the best bandwidth utilization of the kernel paging
* engine.
*/
-#define XE_PAGEFAULT_QUEUE_COUNT 4
- /** @usm.pf_queue: Page fault queues */
- struct xe_pagefault_queue pf_queue[XE_PAGEFAULT_QUEUE_COUNT];
+#define XE_PAGEFAULT_WORK_COUNT 4
+ /** @usm.pf_workers: Page fault workers */
+ struct xe_pagefault_work pf_workers[XE_PAGEFAULT_WORK_COUNT];
+ /** @usm.pf_queue: Page fault queue */
+ struct xe_pagefault_queue pf_queue;
#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP)
/** @usm.pagemap_shrinker: Shrinker for unused pagemaps */
struct drm_pagemap_shrinker *dpagemap_shrinker;
diff --git a/drivers/gpu/drm/xe/xe_pagefault.c b/drivers/gpu/drm/xe/xe_pagefault.c
index a372db7cd839..7880fc7e7eb4 100644
--- a/drivers/gpu/drm/xe/xe_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_pagefault.c
@@ -222,6 +222,7 @@ static void xe_pagefault_queue_retry(struct xe_pagefault_queue *pf_queue,
pf_queue->tail = pf_queue->size - xe_pagefault_entry_size();
else
pf_queue->tail -= xe_pagefault_entry_size();
+ memcpy(pf_queue->data + pf_queue->tail, pf, sizeof(*pf));
spin_unlock_irq(&pf_queue->lock);
}
@@ -267,8 +268,10 @@ static void xe_pagefault_print(struct xe_pagefault *pf)
static void xe_pagefault_queue_work(struct work_struct *w)
{
- struct xe_pagefault_queue *pf_queue =
- container_of(w, typeof(*pf_queue), worker);
+ struct xe_pagefault_work *pf_work =
+ container_of(w, typeof(*pf_work), work);
+ struct xe_device *xe = pf_work->xe;
+ struct xe_pagefault_queue *pf_queue = &xe->usm.pf_queue;
struct xe_pagefault pf;
unsigned long threshold;
@@ -285,7 +288,7 @@ static void xe_pagefault_queue_work(struct work_struct *w)
if (err == -EAGAIN) {
xe_pagefault_queue_retry(pf_queue, &pf);
- queue_work(gt_to_xe(pf.gt)->usm.pf_wq, w);
+ queue_work(xe->usm.pf_wq, w);
break;
} else if (err) {
if (!(pf.consumer.access_type & XE_PAGEFAULT_ACCESS_PREFETCH)) {
@@ -302,7 +305,7 @@ static void xe_pagefault_queue_work(struct work_struct *w)
pf.producer.ops->ack_fault(&pf, err);
if (time_after(jiffies, threshold)) {
- queue_work(gt_to_xe(pf.gt)->usm.pf_wq, w);
+ queue_work(xe->usm.pf_wq, w);
break;
}
}
@@ -348,7 +351,6 @@ static int xe_pagefault_queue_init(struct xe_device *xe,
xe_pagefault_entry_size(), total_num_eus, pf_queue->size);
spin_lock_init(&pf_queue->lock);
- INIT_WORK(&pf_queue->worker, xe_pagefault_queue_work);
pf_queue->data = drmm_kzalloc(&xe->drm, pf_queue->size, GFP_KERNEL);
if (!pf_queue->data)
@@ -381,14 +383,20 @@ int xe_pagefault_init(struct xe_device *xe)
xe->usm.pf_wq = alloc_workqueue("xe_page_fault_work_queue",
WQ_UNBOUND | WQ_HIGHPRI,
- XE_PAGEFAULT_QUEUE_COUNT);
+ XE_PAGEFAULT_WORK_COUNT);
if (!xe->usm.pf_wq)
return -ENOMEM;
- for (i = 0; i < XE_PAGEFAULT_QUEUE_COUNT; ++i) {
- err = xe_pagefault_queue_init(xe, xe->usm.pf_queue + i);
- if (err)
- goto err_out;
+ err = xe_pagefault_queue_init(xe, &xe->usm.pf_queue);
+ if (err)
+ goto err_out;
+
+ for (i = 0; i < XE_PAGEFAULT_WORK_COUNT; ++i) {
+ struct xe_pagefault_work *pf_work = xe->usm.pf_workers + i;
+
+ pf_work->xe = xe;
+ pf_work->id = i;
+ INIT_WORK(&pf_work->work, xe_pagefault_queue_work);
}
return devm_add_action_or_reset(xe->drm.dev, xe_pagefault_fini, xe);
@@ -430,10 +438,7 @@ static void xe_pagefault_queue_reset(struct xe_device *xe, struct xe_gt *gt,
*/
void xe_pagefault_reset(struct xe_device *xe, struct xe_gt *gt)
{
- int i;
-
- for (i = 0; i < XE_PAGEFAULT_QUEUE_COUNT; ++i)
- xe_pagefault_queue_reset(xe, gt, xe->usm.pf_queue + i);
+ xe_pagefault_queue_reset(xe, gt, &xe->usm.pf_queue);
}
static bool xe_pagefault_queue_full(struct xe_pagefault_queue *pf_queue)
@@ -448,13 +453,11 @@ static bool xe_pagefault_queue_full(struct xe_pagefault_queue *pf_queue)
* This function can race with multiple page fault producers, but worst case we
* stick a page fault on the same queue for consumption.
*/
-static int xe_pagefault_queue_index(struct xe_device *xe)
+static int xe_pagefault_work_index(struct xe_device *xe)
{
- u32 old_pf_queue = READ_ONCE(xe->usm.current_pf_queue);
-
- WRITE_ONCE(xe->usm.current_pf_queue, (old_pf_queue + 1));
+ lockdep_assert_held(&xe->usm.pf_queue.lock);
- return old_pf_queue % XE_PAGEFAULT_QUEUE_COUNT;
+ return xe->usm.current_pf_work++ % XE_PAGEFAULT_WORK_COUNT;
}
/**
@@ -469,22 +472,23 @@ static int xe_pagefault_queue_index(struct xe_device *xe)
*/
int xe_pagefault_handler(struct xe_device *xe, struct xe_pagefault *pf)
{
- int queue_index = xe_pagefault_queue_index(xe);
- struct xe_pagefault_queue *pf_queue = xe->usm.pf_queue + queue_index;
+ struct xe_pagefault_queue *pf_queue = &xe->usm.pf_queue;
unsigned long flags;
+ int work_index;
bool full;
spin_lock_irqsave(&pf_queue->lock, flags);
+ work_index = xe_pagefault_work_index(xe);
full = xe_pagefault_queue_full(pf_queue);
if (!full) {
memcpy(pf_queue->data + pf_queue->head, pf, sizeof(*pf));
pf_queue->head = (pf_queue->head + xe_pagefault_entry_size()) %
pf_queue->size;
- queue_work(xe->usm.pf_wq, &pf_queue->worker);
+ queue_work(xe->usm.pf_wq,
+ &xe->usm.pf_workers[work_index].work);
} else {
drm_warn(&xe->drm,
- "PageFault Queue (%d) full, shouldn't be possible\n",
- queue_index);
+ "PageFault Queue full, shouldn't be possible\n");
}
spin_unlock_irqrestore(&pf_queue->lock, flags);
diff --git a/drivers/gpu/drm/xe/xe_pagefault_types.h b/drivers/gpu/drm/xe/xe_pagefault_types.h
index b3289219b1be..45065c25c25f 100644
--- a/drivers/gpu/drm/xe/xe_pagefault_types.h
+++ b/drivers/gpu/drm/xe/xe_pagefault_types.h
@@ -131,8 +131,21 @@ struct xe_pagefault_queue {
u32 tail;
/** @lock: protects page fault queue */
spinlock_t lock;
- /** @worker: to process page faults */
- struct work_struct worker;
+};
+
+/**
+ * struct xe_pagefault_work - Xe page fault work item (consumer)
+ *
+ * Represents a worker that pops a &struct xe_pagefault from the page fault
+ * queue and processes it.
+ */
+struct xe_pagefault_work {
+ /** @xe: Back-pointer to the Xe device */
+ struct xe_device *xe;
+ /** @id: Identifier for this work item */
+ int id;
+ /** @work: Work item used to process the page fault */
+ struct work_struct work;
};
#endif
--
2.34.1
next prev parent reply other threads:[~2026-02-25 20:28 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-02-25 20:27 [PATCH v3 00/12] Fine grained fault locking, threaded prefetch, storm cache Matthew Brost
2026-02-25 20:27 ` [PATCH v3 01/12] drm/xe: Fine grained page fault locking Matthew Brost
2026-02-25 20:27 ` [PATCH v3 02/12] drm/xe: Allow prefetch-only VM bind IOCTLs to use VM read lock Matthew Brost
2026-02-25 20:27 ` [PATCH v3 03/12] drm/xe: Thread prefetch of SVM ranges Matthew Brost
2026-02-25 20:27 ` Matthew Brost [this message]
2026-02-25 20:27 ` [PATCH v3 05/12] drm/xe: Add num_pf_work modparam Matthew Brost
2026-02-25 20:27 ` [PATCH v3 06/12] drm/xe: Engine class and instance into a u8 Matthew Brost
2026-02-25 20:27 ` [PATCH v3 07/12] drm/xe: Track pagefault worker runtime Matthew Brost
2026-02-25 20:27 ` [PATCH v3 08/12] drm/xe: Chain page faults via queue-resident cache to avoid fault storms Matthew Brost
2026-02-25 20:27 ` [PATCH v3 09/12] drm/xe: Add pagefault chaining stats Matthew Brost
2026-02-25 20:27 ` [PATCH v3 10/12] drm/xe: Add debugfs pagefault_info Matthew Brost
2026-02-25 20:27 ` [PATCH v3 11/12] drm/xe: batch CT pagefault acks with periodic flush Matthew Brost
2026-02-25 20:27 ` [PATCH v3 12/12] drm/xe: Track parallel page fault activity in GT stats Matthew Brost
2026-02-26 3:51 ` ✗ CI.checkpatch: warning for Fine grained fault locking, threaded prefetch, storm cache (rev3) Patchwork
2026-02-26 3:51 ` ✗ CI.KUnit: failure " Patchwork
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260225202736.2723250-5-matthew.brost@intel.com \
--to=matthew.brost@intel.com \
--cc=arvind.yadav@intel.com \
--cc=francois.dugast@intel.com \
--cc=himal.prasad.ghimiray@intel.com \
--cc=intel-xe@lists.freedesktop.org \
--cc=stuart.summers@intel.com \
--cc=thomas.hellstrom@linux.intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox