From: Matthew Brost <matthew.brost@intel.com>
To: Stuart Summers <stuart.summers@intel.com>
Cc: <michal.wajdeczko@intel.com>, <ilia.levi@intel.com>,
<x.wang@intel.com>, <rodrigo.vivi@intel.com>,
<intel-xe@lists.freedesktop.org>,
<alan.previn.teres.alexis@intel.com>
Subject: Re: [PATCH 07/11] drm/xe: Add per-exec-queue user fence wait queue
Date: Wed, 10 Jun 2026 16:35:19 -0700 [thread overview]
Message-ID: <ain0t5LRdOGS1KM/@gsse-cloud1.jf.intel.com> (raw)
In-Reply-To: <20260610212833.153366-20-stuart.summers@intel.com>
On Wed, Jun 10, 2026 at 09:28:39PM +0000, Stuart Summers wrote:
> Add a new ufence_wq wait queue to struct xe_exec_queue and initialize it
> at queue allocation time. Also introduce xe_wait_user_fence_wake() to
> centralize the ufence wake call sites.
>
> This patch just adds the infrastructure in place to do this and doesn't
> actually start using it. The reason being that we need to be careful
> when handling a case where a user does a VM bind without a bind queue
> and mistakenly passes a non-bind queue to the wait user fence. There
> are a couple of IGT cases at least that are doing this today and to
> avoid regressing any user code around this, we'll add some additional
> handling in a subsequent patch before connecting this to the actual
> wait user fence code.
>
> Signed-off-by: Stuart Summers <stuart.summers@intel.com>
> Assisted-by: Copilot:claude-sonnet-4.6
> ---
> drivers/gpu/drm/xe/xe_exec_queue.c | 1 +
> drivers/gpu/drm/xe/xe_exec_queue_types.h | 3 +++
> drivers/gpu/drm/xe/xe_guc_submit.c | 6 ++----
> drivers/gpu/drm/xe/xe_hw_engine.c | 6 ++++--
> drivers/gpu/drm/xe/xe_hw_engine.h | 3 ++-
> drivers/gpu/drm/xe/xe_irq.c | 2 +-
> drivers/gpu/drm/xe/xe_memirq.c | 2 +-
> drivers/gpu/drm/xe/xe_sync.c | 3 ++-
> drivers/gpu/drm/xe/xe_wait_user_fence.c | 16 +++++++++++++++-
> drivers/gpu/drm/xe/xe_wait_user_fence.h | 4 ++++
> 10 files changed, 35 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
> index 6c101b4f6488..aa49400b67ba 100644
> --- a/drivers/gpu/drm/xe/xe_exec_queue.c
> +++ b/drivers/gpu/drm/xe/xe_exec_queue.c
> @@ -260,6 +260,7 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe,
> INIT_LIST_HEAD(&q->multi_gt_link);
> INIT_LIST_HEAD(&q->hw_engine_group_link);
> INIT_LIST_HEAD(&q->pxp.link);
> + init_waitqueue_head(&q->ufence_wq);
Would it be better to make this a pointer and malloc/initialize a
wait_queue if it has a unique MSIX vector; otherwise, point it to
xe->ufence_wq? Likewise, if a platform doesn’t support MSIX, could we
just point it to xe->ufence_wq?
This ties into a suggestion below.
> spin_lock_init(&q->multi_queue.lock);
> spin_lock_init(&q->lrc_lookup_lock);
> q->multi_queue.priority = XE_MULTI_QUEUE_PRIORITY_NORMAL;
> diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
> index d27ce24daae5..fdc7baaa952e 100644
> --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
> +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
> @@ -231,6 +231,9 @@ struct xe_exec_queue {
> struct list_head link;
> } pxp;
>
> + /** @ufence_wq: per-queue user fence wait queue */
> + wait_queue_head_t ufence_wq;
> +
> /** @ufence_syncobj: User fence syncobj */
> struct drm_syncobj *ufence_syncobj;
>
> diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
> index b29cc08e6291..16d609e7b40f 100644
> --- a/drivers/gpu/drm/xe/xe_guc_submit.c
> +++ b/drivers/gpu/drm/xe/xe_guc_submit.c
> @@ -21,6 +21,7 @@
> #include "xe_device.h"
> #include "xe_exec_queue.h"
> #include "xe_force_wake.h"
> +#include "xe_wait_user_fence.h"
> #include "xe_gpu_scheduler.h"
> #include "xe_gt.h"
> #include "xe_gt_clock.h"
> @@ -555,11 +556,8 @@ static bool vf_recovery(struct xe_guc *guc)
>
> static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q)
> {
> - struct xe_guc *guc = exec_queue_to_guc(q);
> - struct xe_device *xe = guc_to_xe(guc);
> -
> /** to wakeup xe_wait_user_fence ioctl if exec queue is reset */
> - wake_up_all(&xe->ufence_wq);
> + xe_wait_user_fence_wake(gt_to_xe(q->gt), q);
>
> xe_sched_tdr_queue_imm(&q->guc->sched);
> }
> diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
> index 98265293f2dc..05780bd5beba 100644
> --- a/drivers/gpu/drm/xe/xe_hw_engine.c
> +++ b/drivers/gpu/drm/xe/xe_hw_engine.c
> @@ -42,6 +42,7 @@
> #include "xe_tuning.h"
> #include "xe_uc_fw.h"
> #include "xe_wa.h"
> +#include "xe_wait_user_fence.h"
>
> #define MAX_MMIO_BASES 3
> struct engine_info {
> @@ -894,9 +895,10 @@ int xe_hw_engines_init(struct xe_gt *gt)
> return 0;
> }
>
> -void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec)
> +void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec,
> + struct xe_exec_queue *q)
> {
> - wake_up_all(>_to_xe(hwe->gt)->ufence_wq);
> + xe_wait_user_fence_wake(gt_to_xe(hwe->gt), q);
>
> if (hwe->irq_handler)
> hwe->irq_handler(hwe, intr_vec);
> diff --git a/drivers/gpu/drm/xe/xe_hw_engine.h b/drivers/gpu/drm/xe/xe_hw_engine.h
> index c3ee37f8cfc0..7501c9051a71 100644
> --- a/drivers/gpu/drm/xe/xe_hw_engine.h
> +++ b/drivers/gpu/drm/xe/xe_hw_engine.h
> @@ -51,7 +51,8 @@ struct xe_exec_queue;
>
> int xe_hw_engines_init_early(struct xe_gt *gt);
> int xe_hw_engines_init(struct xe_gt *gt);
> -void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec);
> +void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec,
> + struct xe_exec_queue *q);
> void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe);
> u32 xe_hw_engine_mask_per_class(struct xe_gt *gt,
> enum xe_engine_class engine_class);
> diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
> index 40d3d43e492f..fc99d021405f 100644
> --- a/drivers/gpu/drm/xe/xe_irq.c
> +++ b/drivers/gpu/drm/xe/xe_irq.c
> @@ -385,7 +385,7 @@ static void gt_irq_handler(struct xe_tile *tile,
>
> hwe = xe_gt_hw_engine(engine_gt, class, instance, false);
> if (hwe) {
> - xe_hw_engine_handle_irq(hwe, intr_vec);
> + xe_hw_engine_handle_irq(hwe, intr_vec, NULL);
> continue;
> }
>
> diff --git a/drivers/gpu/drm/xe/xe_memirq.c b/drivers/gpu/drm/xe/xe_memirq.c
> index 96ab2c59c5d7..318ef7c72eba 100644
> --- a/drivers/gpu/drm/xe/xe_memirq.c
> +++ b/drivers/gpu/drm/xe/xe_memirq.c
> @@ -493,7 +493,7 @@ void xe_memirq_hwe_handler(struct xe_memirq *memirq, struct xe_hw_engine *hwe)
> * is opportunistic, unconditionally pass MI_USER_INTERRUPT to issue
> * that check.
> */
> - xe_hw_engine_handle_irq(hwe, GT_MI_USER_INTERRUPT);
> + xe_hw_engine_handle_irq(hwe, GT_MI_USER_INTERRUPT, NULL);
> }
>
> /**
> diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c
> index 37866768d64c..652341f22460 100644
> --- a/drivers/gpu/drm/xe/xe_sync.c
> +++ b/drivers/gpu/drm/xe/xe_sync.c
> @@ -18,6 +18,7 @@
> #include "xe_exec_queue.h"
> #include "xe_macros.h"
> #include "xe_sched_job_types.h"
> +#include "xe_wait_user_fence.h"
>
> struct xe_user_fence {
> struct xe_device *xe;
> @@ -92,7 +93,7 @@ static void user_fence_worker(struct work_struct *w)
> * Wake up waiters only after updating the ufence state, allowing the UMD
> * to safely reuse the same ufence without encountering -EBUSY errors.
> */
> - wake_up_all(&ufence->xe->ufence_wq);
> + xe_wait_user_fence_wake(ufence->xe, NULL);
Is this the only call site where q is NULL? I’m certain we can obtain q
from the exec IOCTL and VM bind IOCTLs and pass into the sync layer.
OA also uses syncs and appears to have a q as well; however, I’m not
that familiar with that code, though I’m sure we can work out any
OA-related details.
Given this, starting with the first suggestion above, I would wire in q
from the exec IOCTL, VM bind IOCTLs, and OA IOCTLs. Next, I would
completely drop ufence_list, as it doesn’t appear to be needed.
Finally, I would rewrite xe_wait_user_fence_wake in patch #10 as
follows:
void xe_wait_user_fence_wake(struct xe_device *xe, struct xe_exec_queue *q)
{
/* Wakeup non-attached queue waiters */
wake_up_all(&xe->ufence_wq);
/*
* XXX: Maybe even bail before xe_exec_queue_get_unless_zero if
* MSIX isn't supported.
*/
q = xe_exec_queue_get_unless_zero(q);
if (q) {
if (q->ufence_wq != &xe->ufence_wq)
wake_up_all(&q->ufence_wq);
xe_exec_queue_put(q);
}
}
I’m fairly certain this will work and be a bit simpler.
Also, I believe patch #10 contains a bug: if xe_wait_user_fence_wake is
passed a valid q and a wait IOCTL omits q, then xe->ufence_wq isn’t
woken. I’m pretty sure my suggestion here resolves that as well.
What do you think?
Matt
> user_fence_put(ufence);
> }
>
> diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c
> index 12ceb3efa8ea..7c9d52b50580 100644
> --- a/drivers/gpu/drm/xe/xe_wait_user_fence.c
> +++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c
> @@ -54,6 +54,20 @@ static int do_compare(u64 addr, u64 value, u64 mask, u16 op)
> #define VALID_FLAGS DRM_XE_UFENCE_WAIT_FLAG_ABSTIME
> #define MAX_OP DRM_XE_UFENCE_WAIT_OP_LTE
>
> +/**
> + * xe_wait_user_fence_wake() - Wake user fence waiters
> + * @xe: the xe device
> + * @q: exec queue (reserved; per-queue wake-up is enabled in a later patch)
> + *
> + * Wakes all user fence waiters on the device-level wait queue.
> + * Per-exec-queue and ufence_list broadcast support are introduced in
> + * subsequent patches once the full infrastructure is in place.
> + */
> +void xe_wait_user_fence_wake(struct xe_device *xe, struct xe_exec_queue *q)
> +{
> + wake_up_all(&xe->ufence_wq);
> +}
> +
> static long to_jiffies_timeout(struct xe_device *xe,
> struct drm_xe_wait_user_fence *args)
> {
> @@ -110,7 +124,7 @@ static long to_jiffies_timeout(struct xe_device *xe,
> *
> * If an exec queue ID is provided, the wait is aborted early if the
> * queue enters a reset state. The device-level wait queue is used for
> - * wakeups.
> + * wakeups in all cases.
> *
> * On return, @timeout is updated to reflect the remaining time (or zero
> * on expiry), unless %DRM_XE_UFENCE_WAIT_FLAG_ABSTIME is set.
> diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.h b/drivers/gpu/drm/xe/xe_wait_user_fence.h
> index 0e268978f9e6..64e5000eabb4 100644
> --- a/drivers/gpu/drm/xe/xe_wait_user_fence.h
> +++ b/drivers/gpu/drm/xe/xe_wait_user_fence.h
> @@ -8,6 +8,10 @@
>
> struct drm_device;
> struct drm_file;
> +struct xe_device;
> +struct xe_exec_queue;
> +
> +void xe_wait_user_fence_wake(struct xe_device *xe, struct xe_exec_queue *q);
>
> int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
> struct drm_file *file);
> --
> 2.43.0
>
next prev parent reply other threads:[~2026-06-10 23:35 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-06-10 21:28 [PATCH 00/11] Enable per exec queue MSI-X vector assignment Stuart Summers
2026-06-10 21:28 ` [PATCH 01/11] drm/xe: Add kerneldoc to xe_wait_user_fence_ioctl() Stuart Summers
2026-06-10 21:28 ` [PATCH 02/11] drm/xe: Handle NULL in xe_exec_queue_get_unless_zero() Stuart Summers
2026-06-10 21:28 ` [PATCH 03/11] drm/xe: Cap MSI-X vector count to XE_MSIX_MAX_VECS Stuart Summers
2026-06-11 10:47 ` Maarten Lankhorst
2026-06-11 22:49 ` Summers, Stuart
2026-06-12 8:50 ` Maarten Lankhorst
2026-06-12 15:32 ` Summers, Stuart
2026-06-10 21:28 ` [PATCH 04/11] drm/xe: Assign dedicated MSI-X vectors to exec queues Stuart Summers
2026-06-10 21:28 ` [PATCH 05/11] drm/xe: Add configfs max_msix_vecs attribute Stuart Summers
2026-06-10 21:28 ` [PATCH 06/11] drm/xe: Remove memirq status and source checks for engine interrupts Stuart Summers
2026-06-10 21:28 ` [PATCH 07/11] drm/xe: Add per-exec-queue user fence wait queue Stuart Summers
2026-06-10 23:35 ` Matthew Brost [this message]
2026-06-11 22:50 ` Summers, Stuart
2026-06-10 21:28 ` [PATCH 08/11] drm/xe: Track all exec queues in a device-level ufence list Stuart Summers
2026-06-10 21:28 ` [PATCH 09/11] drm/xe: Hook up per queue thread wake to the unique MSI-X vector allocation Stuart Summers
2026-06-10 21:28 ` [PATCH 10/11] drm/xe: Enable per-queue ufence wake in ioctl and wake function Stuart Summers
2026-06-10 21:28 ` [PATCH 11/11] drm/xe/memirq: Enable compute walker post-sync interrupt Stuart Summers
2026-06-10 22:06 ` ✗ CI.checkpatch: warning for Enable per exec queue MSI-X vector assignment (rev2) Patchwork
2026-06-10 22:07 ` ✓ CI.KUnit: success " Patchwork
2026-06-10 22:59 ` ✓ Xe.CI.BAT: " Patchwork
2026-06-10 23:51 ` [PATCH 00/11] Enable per exec queue MSI-X vector assignment Matthew Brost
2026-06-11 23:08 ` Summers, Stuart
2026-06-12 0:17 ` Matthew Brost
2026-06-11 6:58 ` ✓ Xe.CI.FULL: success for Enable per exec queue MSI-X vector assignment (rev2) Patchwork
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=ain0t5LRdOGS1KM/@gsse-cloud1.jf.intel.com \
--to=matthew.brost@intel.com \
--cc=alan.previn.teres.alexis@intel.com \
--cc=ilia.levi@intel.com \
--cc=intel-xe@lists.freedesktop.org \
--cc=michal.wajdeczko@intel.com \
--cc=rodrigo.vivi@intel.com \
--cc=stuart.summers@intel.com \
--cc=x.wang@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox