Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: Matthew Brost <matthew.brost@intel.com>
To: Stuart Summers <stuart.summers@intel.com>
Cc: <michal.wajdeczko@intel.com>, <ilia.levi@intel.com>,
	<x.wang@intel.com>, <rodrigo.vivi@intel.com>,
	<intel-xe@lists.freedesktop.org>,
	<alan.previn.teres.alexis@intel.com>
Subject: Re: [PATCH 07/11] drm/xe: Add per-exec-queue user fence wait queue
Date: Wed, 10 Jun 2026 16:35:19 -0700	[thread overview]
Message-ID: <ain0t5LRdOGS1KM/@gsse-cloud1.jf.intel.com> (raw)
In-Reply-To: <20260610212833.153366-20-stuart.summers@intel.com>

On Wed, Jun 10, 2026 at 09:28:39PM +0000, Stuart Summers wrote:
> Add a new ufence_wq wait queue to struct xe_exec_queue and initialize it
> at queue allocation time. Also introduce xe_wait_user_fence_wake() to
> centralize the ufence wake call sites.
> 
> This patch just adds the infrastructure in place to do this and doesn't
> actually start using it. The reason being that we need to be careful
> when handling a case where a user does a VM bind without a bind queue
> and mistakenly passes a non-bind queue to the wait user fence. There
> are a couple of IGT cases at least that are doing this today and to
> avoid regressing any user code around this, we'll add some additional
> handling in a subsequent patch before connecting this to the actual
> wait user fence code.
> 
> Signed-off-by: Stuart Summers <stuart.summers@intel.com>
> Assisted-by: Copilot:claude-sonnet-4.6
> ---
>  drivers/gpu/drm/xe/xe_exec_queue.c       |  1 +
>  drivers/gpu/drm/xe/xe_exec_queue_types.h |  3 +++
>  drivers/gpu/drm/xe/xe_guc_submit.c       |  6 ++----
>  drivers/gpu/drm/xe/xe_hw_engine.c        |  6 ++++--
>  drivers/gpu/drm/xe/xe_hw_engine.h        |  3 ++-
>  drivers/gpu/drm/xe/xe_irq.c              |  2 +-
>  drivers/gpu/drm/xe/xe_memirq.c           |  2 +-
>  drivers/gpu/drm/xe/xe_sync.c             |  3 ++-
>  drivers/gpu/drm/xe/xe_wait_user_fence.c  | 16 +++++++++++++++-
>  drivers/gpu/drm/xe/xe_wait_user_fence.h  |  4 ++++
>  10 files changed, 35 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
> index 6c101b4f6488..aa49400b67ba 100644
> --- a/drivers/gpu/drm/xe/xe_exec_queue.c
> +++ b/drivers/gpu/drm/xe/xe_exec_queue.c
> @@ -260,6 +260,7 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe,
>  	INIT_LIST_HEAD(&q->multi_gt_link);
>  	INIT_LIST_HEAD(&q->hw_engine_group_link);
>  	INIT_LIST_HEAD(&q->pxp.link);
> +	init_waitqueue_head(&q->ufence_wq);

Would it be better to make this a pointer and malloc/initialize a
wait_queue if it has a unique MSIX vector; otherwise, point it to
xe->ufence_wq? Likewise, if a platform doesn’t support MSIX, could we
just point it to xe->ufence_wq?

This ties into a suggestion below.

>  	spin_lock_init(&q->multi_queue.lock);
>  	spin_lock_init(&q->lrc_lookup_lock);
>  	q->multi_queue.priority = XE_MULTI_QUEUE_PRIORITY_NORMAL;
> diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
> index d27ce24daae5..fdc7baaa952e 100644
> --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
> +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
> @@ -231,6 +231,9 @@ struct xe_exec_queue {
>  		struct list_head link;
>  	} pxp;
>  
> +	/** @ufence_wq: per-queue user fence wait queue */
> +	wait_queue_head_t ufence_wq;
> +
>  	/** @ufence_syncobj: User fence syncobj */
>  	struct drm_syncobj *ufence_syncobj;
>  
> diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
> index b29cc08e6291..16d609e7b40f 100644
> --- a/drivers/gpu/drm/xe/xe_guc_submit.c
> +++ b/drivers/gpu/drm/xe/xe_guc_submit.c
> @@ -21,6 +21,7 @@
>  #include "xe_device.h"
>  #include "xe_exec_queue.h"
>  #include "xe_force_wake.h"
> +#include "xe_wait_user_fence.h"
>  #include "xe_gpu_scheduler.h"
>  #include "xe_gt.h"
>  #include "xe_gt_clock.h"
> @@ -555,11 +556,8 @@ static bool vf_recovery(struct xe_guc *guc)
>  
>  static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q)
>  {
> -	struct xe_guc *guc = exec_queue_to_guc(q);
> -	struct xe_device *xe = guc_to_xe(guc);
> -
>  	/** to wakeup xe_wait_user_fence ioctl if exec queue is reset */
> -	wake_up_all(&xe->ufence_wq);
> +	xe_wait_user_fence_wake(gt_to_xe(q->gt), q);
>  
>  	xe_sched_tdr_queue_imm(&q->guc->sched);
>  }
> diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
> index 98265293f2dc..05780bd5beba 100644
> --- a/drivers/gpu/drm/xe/xe_hw_engine.c
> +++ b/drivers/gpu/drm/xe/xe_hw_engine.c
> @@ -42,6 +42,7 @@
>  #include "xe_tuning.h"
>  #include "xe_uc_fw.h"
>  #include "xe_wa.h"
> +#include "xe_wait_user_fence.h"
>  
>  #define MAX_MMIO_BASES 3
>  struct engine_info {
> @@ -894,9 +895,10 @@ int xe_hw_engines_init(struct xe_gt *gt)
>  	return 0;
>  }
>  
> -void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec)
> +void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec,
> +			     struct xe_exec_queue *q)
>  {
> -	wake_up_all(&gt_to_xe(hwe->gt)->ufence_wq);
> +	xe_wait_user_fence_wake(gt_to_xe(hwe->gt), q);
>  
>  	if (hwe->irq_handler)
>  		hwe->irq_handler(hwe, intr_vec);
> diff --git a/drivers/gpu/drm/xe/xe_hw_engine.h b/drivers/gpu/drm/xe/xe_hw_engine.h
> index c3ee37f8cfc0..7501c9051a71 100644
> --- a/drivers/gpu/drm/xe/xe_hw_engine.h
> +++ b/drivers/gpu/drm/xe/xe_hw_engine.h
> @@ -51,7 +51,8 @@ struct xe_exec_queue;
>  
>  int xe_hw_engines_init_early(struct xe_gt *gt);
>  int xe_hw_engines_init(struct xe_gt *gt);
> -void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec);
> +void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec,
> +			     struct xe_exec_queue *q);
>  void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe);
>  u32 xe_hw_engine_mask_per_class(struct xe_gt *gt,
>  				enum xe_engine_class engine_class);
> diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
> index 40d3d43e492f..fc99d021405f 100644
> --- a/drivers/gpu/drm/xe/xe_irq.c
> +++ b/drivers/gpu/drm/xe/xe_irq.c
> @@ -385,7 +385,7 @@ static void gt_irq_handler(struct xe_tile *tile,
>  
>  			hwe = xe_gt_hw_engine(engine_gt, class, instance, false);
>  			if (hwe) {
> -				xe_hw_engine_handle_irq(hwe, intr_vec);
> +				xe_hw_engine_handle_irq(hwe, intr_vec, NULL);
>  				continue;
>  			}
>  
> diff --git a/drivers/gpu/drm/xe/xe_memirq.c b/drivers/gpu/drm/xe/xe_memirq.c
> index 96ab2c59c5d7..318ef7c72eba 100644
> --- a/drivers/gpu/drm/xe/xe_memirq.c
> +++ b/drivers/gpu/drm/xe/xe_memirq.c
> @@ -493,7 +493,7 @@ void xe_memirq_hwe_handler(struct xe_memirq *memirq, struct xe_hw_engine *hwe)
>  	 * is opportunistic, unconditionally pass MI_USER_INTERRUPT to issue
>  	 * that check.
>  	 */
> -	xe_hw_engine_handle_irq(hwe, GT_MI_USER_INTERRUPT);
> +	xe_hw_engine_handle_irq(hwe, GT_MI_USER_INTERRUPT, NULL);
>  }
>  
>  /**
> diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c
> index 37866768d64c..652341f22460 100644
> --- a/drivers/gpu/drm/xe/xe_sync.c
> +++ b/drivers/gpu/drm/xe/xe_sync.c
> @@ -18,6 +18,7 @@
>  #include "xe_exec_queue.h"
>  #include "xe_macros.h"
>  #include "xe_sched_job_types.h"
> +#include "xe_wait_user_fence.h"
>  
>  struct xe_user_fence {
>  	struct xe_device *xe;
> @@ -92,7 +93,7 @@ static void user_fence_worker(struct work_struct *w)
>  	 * Wake up waiters only after updating the ufence state, allowing the UMD
>  	 * to safely reuse the same ufence without encountering -EBUSY errors.
>  	 */
> -	wake_up_all(&ufence->xe->ufence_wq);
> +	xe_wait_user_fence_wake(ufence->xe, NULL);

Is this the only call site where q is NULL? I’m certain we can obtain q
from the exec IOCTL and VM bind IOCTLs and pass into the sync layer.

OA also uses syncs and appears to have a q as well; however, I’m not
that familiar with that code, though I’m sure we can work out any
OA-related details.

Given this, starting with the first suggestion above, I would wire in q
from the exec IOCTL, VM bind IOCTLs, and OA IOCTLs. Next, I would
completely drop ufence_list, as it doesn’t appear to be needed.

Finally, I would rewrite xe_wait_user_fence_wake in patch #10 as
follows:

void xe_wait_user_fence_wake(struct xe_device *xe, struct xe_exec_queue *q)
{
	/* Wakeup non-attached queue waiters */
	wake_up_all(&xe->ufence_wq);

	/*
	 * XXX: Maybe even bail before xe_exec_queue_get_unless_zero if
	 * MSIX isn't supported.
	 */

	q = xe_exec_queue_get_unless_zero(q);
	if (q) {
		if (q->ufence_wq != &xe->ufence_wq)
			wake_up_all(&q->ufence_wq);
		xe_exec_queue_put(q);
	}
}

I’m fairly certain this will work and be a bit simpler.

Also, I believe patch #10 contains a bug: if xe_wait_user_fence_wake is
passed a valid q and a wait IOCTL omits q, then xe->ufence_wq isn’t
woken. I’m pretty sure my suggestion here resolves that as well.

What do you think?

Matt

>  	user_fence_put(ufence);
>  }
>  
> diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c
> index 12ceb3efa8ea..7c9d52b50580 100644
> --- a/drivers/gpu/drm/xe/xe_wait_user_fence.c
> +++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c
> @@ -54,6 +54,20 @@ static int do_compare(u64 addr, u64 value, u64 mask, u16 op)
>  #define VALID_FLAGS	DRM_XE_UFENCE_WAIT_FLAG_ABSTIME
>  #define MAX_OP		DRM_XE_UFENCE_WAIT_OP_LTE
>  
> +/**
> + * xe_wait_user_fence_wake() - Wake user fence waiters
> + * @xe: the xe device
> + * @q: exec queue (reserved; per-queue wake-up is enabled in a later patch)
> + *
> + * Wakes all user fence waiters on the device-level wait queue.
> + * Per-exec-queue and ufence_list broadcast support are introduced in
> + * subsequent patches once the full infrastructure is in place.
> + */
> +void xe_wait_user_fence_wake(struct xe_device *xe, struct xe_exec_queue *q)
> +{
> +	wake_up_all(&xe->ufence_wq);
> +}
> +
>  static long to_jiffies_timeout(struct xe_device *xe,
>  			       struct drm_xe_wait_user_fence *args)
>  {
> @@ -110,7 +124,7 @@ static long to_jiffies_timeout(struct xe_device *xe,
>   *
>   * If an exec queue ID is provided, the wait is aborted early if the
>   * queue enters a reset state. The device-level wait queue is used for
> - * wakeups.
> + * wakeups in all cases.
>   *
>   * On return, @timeout is updated to reflect the remaining time (or zero
>   * on expiry), unless %DRM_XE_UFENCE_WAIT_FLAG_ABSTIME is set.
> diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.h b/drivers/gpu/drm/xe/xe_wait_user_fence.h
> index 0e268978f9e6..64e5000eabb4 100644
> --- a/drivers/gpu/drm/xe/xe_wait_user_fence.h
> +++ b/drivers/gpu/drm/xe/xe_wait_user_fence.h
> @@ -8,6 +8,10 @@
>  
>  struct drm_device;
>  struct drm_file;
> +struct xe_device;
> +struct xe_exec_queue;
> +
> +void xe_wait_user_fence_wake(struct xe_device *xe, struct xe_exec_queue *q);
>  
>  int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
>  			     struct drm_file *file);
> -- 
> 2.43.0
> 

  reply	other threads:[~2026-06-10 23:35 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-06-10 21:28 [PATCH 00/11] Enable per exec queue MSI-X vector assignment Stuart Summers
2026-06-10 21:28 ` [PATCH 01/11] drm/xe: Add kerneldoc to xe_wait_user_fence_ioctl() Stuart Summers
2026-06-10 21:28 ` [PATCH 02/11] drm/xe: Handle NULL in xe_exec_queue_get_unless_zero() Stuart Summers
2026-06-10 21:28 ` [PATCH 03/11] drm/xe: Cap MSI-X vector count to XE_MSIX_MAX_VECS Stuart Summers
2026-06-11 10:47   ` Maarten Lankhorst
2026-06-11 22:49     ` Summers, Stuart
2026-06-12  8:50       ` Maarten Lankhorst
2026-06-12 15:32         ` Summers, Stuart
2026-06-10 21:28 ` [PATCH 04/11] drm/xe: Assign dedicated MSI-X vectors to exec queues Stuart Summers
2026-06-10 21:28 ` [PATCH 05/11] drm/xe: Add configfs max_msix_vecs attribute Stuart Summers
2026-06-10 21:28 ` [PATCH 06/11] drm/xe: Remove memirq status and source checks for engine interrupts Stuart Summers
2026-06-10 21:28 ` [PATCH 07/11] drm/xe: Add per-exec-queue user fence wait queue Stuart Summers
2026-06-10 23:35   ` Matthew Brost [this message]
2026-06-11 22:50     ` Summers, Stuart
2026-06-10 21:28 ` [PATCH 08/11] drm/xe: Track all exec queues in a device-level ufence list Stuart Summers
2026-06-10 21:28 ` [PATCH 09/11] drm/xe: Hook up per queue thread wake to the unique MSI-X vector allocation Stuart Summers
2026-06-10 21:28 ` [PATCH 10/11] drm/xe: Enable per-queue ufence wake in ioctl and wake function Stuart Summers
2026-06-10 21:28 ` [PATCH 11/11] drm/xe/memirq: Enable compute walker post-sync interrupt Stuart Summers
2026-06-10 22:06 ` ✗ CI.checkpatch: warning for Enable per exec queue MSI-X vector assignment (rev2) Patchwork
2026-06-10 22:07 ` ✓ CI.KUnit: success " Patchwork
2026-06-10 22:59 ` ✓ Xe.CI.BAT: " Patchwork
2026-06-10 23:51 ` [PATCH 00/11] Enable per exec queue MSI-X vector assignment Matthew Brost
2026-06-11 23:08   ` Summers, Stuart
2026-06-12  0:17     ` Matthew Brost
2026-06-11  6:58 ` ✓ Xe.CI.FULL: success for Enable per exec queue MSI-X vector assignment (rev2) Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ain0t5LRdOGS1KM/@gsse-cloud1.jf.intel.com \
    --to=matthew.brost@intel.com \
    --cc=alan.previn.teres.alexis@intel.com \
    --cc=ilia.levi@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=michal.wajdeczko@intel.com \
    --cc=rodrigo.vivi@intel.com \
    --cc=stuart.summers@intel.com \
    --cc=x.wang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox