From: "Thomas Hellström" <thomas.hellstrom@linux.intel.com>
To: Matthew Brost <matthew.brost@intel.com>, intel-xe@lists.freedesktop.org
Cc: francois.dugast@intel.com, michal.mrozek@intel.com
Subject: Re: [PATCH 5/6] drm/xe: Wait on in-syncs when swicthing to dma-fence mode
Date: Fri, 12 Dec 2025 10:22:54 +0100 [thread overview]
Message-ID: <97f524d334045e0819cee861e6e7d21598509424.camel@linux.intel.com> (raw)
In-Reply-To: <20251211210032.1520113-6-matthew.brost@intel.com>
On Thu, 2025-12-11 at 13:00 -0800, Matthew Brost wrote:
> If a dma-fence submission has in-fences and pagefault queues are
> running
> work, there is little incentive to kick the pagefault queues off the
> hardware until the dma-fence submission is ready to run. Therefore,
> wait
> on the in-fences of the dma-fence submission before removing the
> pagefault queues from the hardware.
>
> Suggested-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> ---
> drivers/gpu/drm/xe/xe_exec.c | 3 ++-
> drivers/gpu/drm/xe/xe_hw_engine_group.c | 34 +++++++++++++++++++----
> --
> drivers/gpu/drm/xe/xe_hw_engine_group.h | 4 ++-
> drivers/gpu/drm/xe/xe_sync.c | 14 ++++++++++
> drivers/gpu/drm/xe/xe_sync.h | 1 +
> 5 files changed, 46 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_exec.c
> b/drivers/gpu/drm/xe/xe_exec.c
> index 4d81210e41f5..5bc598ba6afa 100644
> --- a/drivers/gpu/drm/xe/xe_exec.c
> +++ b/drivers/gpu/drm/xe/xe_exec.c
> @@ -202,7 +202,8 @@ int xe_exec_ioctl(struct drm_device *dev, void
> *data, struct drm_file *file)
> mode = xe_hw_engine_group_find_exec_mode(q);
>
> if (mode == EXEC_MODE_DMA_FENCE) {
> - err = xe_hw_engine_group_get_mode(group, mode,
> &previous_mode);
> + err = xe_hw_engine_group_get_mode(group, mode,
> &previous_mode,
> + syncs, num_syncs);
> if (err)
> goto err_syncs;
> }
> diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c
> b/drivers/gpu/drm/xe/xe_hw_engine_group.c
> index 4d9263a1a208..35966889c776 100644
> --- a/drivers/gpu/drm/xe/xe_hw_engine_group.c
> +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c
> @@ -11,6 +11,7 @@
> #include "xe_gt.h"
> #include "xe_gt_stats.h"
> #include "xe_hw_engine_group.h"
> +#include "xe_sync.h"
> #include "xe_vm.h"
>
> static void
> @@ -21,7 +22,8 @@ hw_engine_group_resume_lr_jobs_func(struct
> work_struct *w)
> int err;
> enum xe_hw_engine_group_execution_mode previous_mode;
>
> - err = xe_hw_engine_group_get_mode(group, EXEC_MODE_LR,
> &previous_mode);
> + err = xe_hw_engine_group_get_mode(group, EXEC_MODE_LR,
> &previous_mode,
> + NULL, 0);
> if (err)
> return;
>
> @@ -192,20 +194,32 @@ void
> xe_hw_engine_group_resume_faulting_lr_jobs(struct xe_hw_engine_group
> *group
> *
> * Return: 0 on success, negative error code on error.
> */
> -static int xe_hw_engine_group_suspend_faulting_lr_jobs(struct
> xe_hw_engine_group *group)
> +static int xe_hw_engine_group_suspend_faulting_lr_jobs(struct
> xe_hw_engine_group *group,
> + struct
> xe_sync_entry *syncs,
> + int
> num_syncs)
> {
> - int err;
> + int err, i;
> struct xe_exec_queue *q;
> bool need_resume = false;
>
> lockdep_assert_held_write(&group->mode_sem);
>
> list_for_each_entry(q, &group->exec_queue_list,
> hw_engine_group_link) {
> + bool idle_skip_suspend;
> +
> if (!xe_vm_in_fault_mode(q->vm))
> continue;
>
> xe_gt_stats_incr(q->gt,
> XE_GT_STATS_ID_HW_ENGINE_GROUP_SUSPEND_LR_QUEUE_COUNT, 1);
> - need_resume |= !xe_exec_queue_idle_skip_suspend(q);
> +
> + idle_skip_suspend =
> xe_exec_queue_idle_skip_suspend(q);
> +
> + if (!need_resume && !idle_skip_suspend && num_syncs)
> {
> + for (i = 0; i < num_syncs; ++i)
> + xe_sync_entry_wait(syncs + i);
Here we're in the write-locked mode, right? Ideally I think we should
just check for idle here and return a retry if not idle, and wait
outside of the locks. Perhaps also always wait if there are pf jobs
running on the engine group. Also we might want to include the other
implicit dependencies as well, like kernel fences.
Although perhaps this is good enough for now, and this would be easier
implemented with a scheduler?
/Thomas
> + }
> +
> + need_resume |= !idle_skip_suspend;
> q->ops->suspend(q);
> }
>
> @@ -258,7 +272,8 @@ static int
> xe_hw_engine_group_wait_for_dma_fence_jobs(struct xe_hw_engine_group
> return 0;
> }
>
> -static int switch_mode(struct xe_hw_engine_group *group)
> +static int switch_mode(struct xe_hw_engine_group *group,
> + struct xe_sync_entry *syncs, int num_syncs)
> {
> int err = 0;
> enum xe_hw_engine_group_execution_mode new_mode;
> @@ -268,7 +283,9 @@ static int switch_mode(struct xe_hw_engine_group
> *group)
> switch (group->cur_mode) {
> case EXEC_MODE_LR:
> new_mode = EXEC_MODE_DMA_FENCE;
> - err =
> xe_hw_engine_group_suspend_faulting_lr_jobs(group);
> + err =
> xe_hw_engine_group_suspend_faulting_lr_jobs(group,
> +
> syncs,
> +
> num_syncs);
> break;
> case EXEC_MODE_DMA_FENCE:
> new_mode = EXEC_MODE_LR;
> @@ -294,7 +311,8 @@ static int switch_mode(struct xe_hw_engine_group
> *group)
> */
> int xe_hw_engine_group_get_mode(struct xe_hw_engine_group *group,
> enum
> xe_hw_engine_group_execution_mode new_mode,
> - enum
> xe_hw_engine_group_execution_mode *previous_mode)
> + enum
> xe_hw_engine_group_execution_mode *previous_mode,
> + struct xe_sync_entry *syncs, int
> num_syncs)
> __acquires(&group->mode_sem)
> {
> int err = down_read_interruptible(&group->mode_sem);
> @@ -311,7 +329,7 @@ __acquires(&group->mode_sem)
> return err;
>
> if (new_mode != group->cur_mode) {
> - err = switch_mode(group);
> + err = switch_mode(group, syncs, num_syncs);
> if (err) {
> up_write(&group->mode_sem);
> return err;
> diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.h
> b/drivers/gpu/drm/xe/xe_hw_engine_group.h
> index 797ee81acbf2..8b17ccd30b70 100644
> --- a/drivers/gpu/drm/xe/xe_hw_engine_group.h
> +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.h
> @@ -11,6 +11,7 @@
> struct drm_device;
> struct xe_exec_queue;
> struct xe_gt;
> +struct xe_sync_entry;
>
> int xe_hw_engine_setup_groups(struct xe_gt *gt);
>
> @@ -19,7 +20,8 @@ void xe_hw_engine_group_del_exec_queue(struct
> xe_hw_engine_group *group, struct
>
> int xe_hw_engine_group_get_mode(struct xe_hw_engine_group *group,
> enum
> xe_hw_engine_group_execution_mode new_mode,
> - enum
> xe_hw_engine_group_execution_mode *previous_mode);
> + enum
> xe_hw_engine_group_execution_mode *previous_mode,
> + struct xe_sync_entry *syncs, int
> num_syncs);
> void xe_hw_engine_group_put(struct xe_hw_engine_group *group);
>
> enum xe_hw_engine_group_execution_mode
> diff --git a/drivers/gpu/drm/xe/xe_sync.c
> b/drivers/gpu/drm/xe/xe_sync.c
> index 1fc4fa278b78..127e26129933 100644
> --- a/drivers/gpu/drm/xe/xe_sync.c
> +++ b/drivers/gpu/drm/xe/xe_sync.c
> @@ -228,6 +228,20 @@ int xe_sync_entry_add_deps(struct xe_sync_entry
> *sync, struct xe_sched_job *job)
> return 0;
> }
>
> +/**
> + * xe_sync_entry_wait() - Wait on in-sync
> + * @sync: Sync object
> + *
> + * If the sync is in an in-sync, wait on the sync to signal.
> + */
> +void xe_sync_entry_wait(struct xe_sync_entry *sync)
> +{
> + if (sync->flags & DRM_XE_SYNC_FLAG_SIGNAL)
> + return;
> +
> + dma_fence_wait(sync->fence, false);
> +}
> +
> void xe_sync_entry_signal(struct xe_sync_entry *sync, struct
> dma_fence *fence)
> {
> if (!(sync->flags & DRM_XE_SYNC_FLAG_SIGNAL))
> diff --git a/drivers/gpu/drm/xe/xe_sync.h
> b/drivers/gpu/drm/xe/xe_sync.h
> index 51f2d803e977..1c08f9ed9001 100644
> --- a/drivers/gpu/drm/xe/xe_sync.h
> +++ b/drivers/gpu/drm/xe/xe_sync.h
> @@ -29,6 +29,7 @@ int xe_sync_entry_add_deps(struct xe_sync_entry
> *sync,
> struct xe_sched_job *job);
> void xe_sync_entry_signal(struct xe_sync_entry *sync,
> struct dma_fence *fence);
> +void xe_sync_entry_wait(struct xe_sync_entry *sync);
> void xe_sync_entry_cleanup(struct xe_sync_entry *sync);
> struct dma_fence *
> xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync,
next prev parent reply other threads:[~2025-12-12 9:23 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-12-11 21:00 [PATCH 0/6] Fix performance when pagefaults and 3d/display share resources Matthew Brost
2025-12-11 21:00 ` [PATCH 1/6] drm/xe: Adjust long-running workload timeslices to reasonable values Matthew Brost
2025-12-11 21:00 ` [PATCH 2/6] drm/xe: Use usleep_range for accurate long-running workload timeslicing Matthew Brost
2025-12-11 21:00 ` [PATCH 3/6] drm/xe: Add debugfs knobs to control long running " Matthew Brost
2025-12-11 21:00 ` [PATCH 4/6] drm/xe: Skip exec queue schedule toggle if queue is idle during suspend Matthew Brost
2025-12-11 21:00 ` [PATCH 5/6] drm/xe: Wait on in-syncs when swicthing to dma-fence mode Matthew Brost
2025-12-12 9:22 ` Thomas Hellström [this message]
2025-12-12 16:33 ` Matthew Brost
2025-12-12 16:38 ` Matthew Brost
2025-12-12 18:41 ` Thomas Hellström
2025-12-12 20:20 ` Matthew Brost
2025-12-11 21:00 ` [PATCH 6/6] drm/xe: Add more GT stats around pagefault mode switch flows Matthew Brost
2025-12-12 16:07 ` Francois Dugast
2025-12-12 16:18 ` Matthew Brost
2025-12-11 21:29 ` ✓ CI.KUnit: success for Fix performance when pagefaults and 3d/display share resources Patchwork
2025-12-11 22:34 ` ✗ Xe.CI.BAT: failure " Patchwork
2025-12-12 13:46 ` ✗ Xe.CI.Full: " Patchwork
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=97f524d334045e0819cee861e6e7d21598509424.camel@linux.intel.com \
--to=thomas.hellstrom@linux.intel.com \
--cc=francois.dugast@intel.com \
--cc=intel-xe@lists.freedesktop.org \
--cc=matthew.brost@intel.com \
--cc=michal.mrozek@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox