All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Thomas Hellström" <thomas.hellstrom@linux.intel.com>
To: Matthew Brost <matthew.brost@intel.com>
Cc: intel-xe@lists.freedesktop.org, francois.dugast@intel.com,
	 michal.mrozek@intel.com
Subject: Re: [PATCH v2 5/7] drm/xe: Wait on in-syncs when swicthing to dma-fence mode
Date: Mon, 15 Dec 2025 22:48:59 +0100	[thread overview]
Message-ID: <236818068ac38d0fbfe0603a84cd056d2961bfb8.camel@linux.intel.com> (raw)
In-Reply-To: <aUCBrn8nGPSp4lBR@lstrano-desk.jf.intel.com>

On Mon, 2025-12-15 at 13:46 -0800, Matthew Brost wrote:
> On Mon, Dec 15, 2025 at 11:32:23AM +0100, Thomas Hellström wrote:
> > On Fri, 2025-12-12 at 10:28 -0800, Matthew Brost wrote:
> > > If a dma-fence submission has in-fences and pagefault queues are
> > > running
> > > work, there is little incentive to kick the pagefault queues off
> > > the
> > > hardware until the dma-fence submission is ready to run.
> > > Therefore,
> > > wait
> > > on the in-fences of the dma-fence submission before removing the
> > > pagefault queues from the hardware.
> > > 
> > > v2:
> > >  - Fix kernel doc (CI)
> > >  - Don't wait under lock (Thomas)
> > >  - Make wait interruptable
> > > 
> > > Suggested-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> > > Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> > > ---
> > >  drivers/gpu/drm/xe/xe_exec.c            |  9 +++--
> > >  drivers/gpu/drm/xe/xe_hw_engine_group.c | 44
> > > +++++++++++++++++++++--
> > > --
> > >  drivers/gpu/drm/xe/xe_hw_engine_group.h |  4 ++-
> > >  drivers/gpu/drm/xe/xe_sync.c            | 29 ++++++++++++++++
> > >  drivers/gpu/drm/xe/xe_sync.h            |  2 ++
> > >  5 files changed, 78 insertions(+), 10 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/xe/xe_exec.c
> > > b/drivers/gpu/drm/xe/xe_exec.c
> > > index 4d81210e41f5..d462add2d005 100644
> > > --- a/drivers/gpu/drm/xe/xe_exec.c
> > > +++ b/drivers/gpu/drm/xe/xe_exec.c
> > > @@ -121,7 +121,7 @@ int xe_exec_ioctl(struct drm_device *dev,
> > > void
> > > *data, struct drm_file *file)
> > >  	u64 addresses[XE_HW_ENGINE_MAX_INSTANCE];
> > >  	struct drm_gpuvm_exec vm_exec = {.extra.fn =
> > > xe_exec_fn};
> > >  	struct drm_exec *exec = &vm_exec.exec;
> > > -	u32 i, num_syncs, num_ufence = 0;
> > > +	u32 i, num_syncs, num_in_sync = 0, num_ufence = 0;
> > >  	struct xe_validation_ctx ctx;
> > >  	struct xe_sched_job *job;
> > >  	struct xe_vm *vm;
> > > @@ -182,6 +182,9 @@ int xe_exec_ioctl(struct drm_device *dev,
> > > void
> > > *data, struct drm_file *file)
> > >  
> > >  		if (xe_sync_is_ufence(&syncs[num_syncs]))
> > >  			num_ufence++;
> > > +
> > > +		if (!num_in_sync &&
> > > xe_sync_needs_wait(&syncs[num_syncs]))
> > > +			num_in_sync++;
> > >  	}
> > >  
> > >  	if (XE_IOCTL_DBG(xe, num_ufence > 1)) {
> > > @@ -202,7 +205,9 @@ int xe_exec_ioctl(struct drm_device *dev,
> > > void
> > > *data, struct drm_file *file)
> > >  	mode = xe_hw_engine_group_find_exec_mode(q);
> > >  
> > >  	if (mode == EXEC_MODE_DMA_FENCE) {
> > > -		err = xe_hw_engine_group_get_mode(group, mode,
> > > &previous_mode);
> > > +		err = xe_hw_engine_group_get_mode(group, mode,
> > > &previous_mode,
> > > +						  syncs,
> > > num_in_sync
> > > ?
> > > +						  num_syncs :
> > > 0);
> > >  		if (err)
> > >  			goto err_syncs;
> > >  	}
> > > diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c
> > > b/drivers/gpu/drm/xe/xe_hw_engine_group.c
> > > index 4d9263a1a208..022fc0c30d38 100644
> > > --- a/drivers/gpu/drm/xe/xe_hw_engine_group.c
> > > +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c
> > > @@ -11,6 +11,7 @@
> > >  #include "xe_gt.h"
> > >  #include "xe_gt_stats.h"
> > >  #include "xe_hw_engine_group.h"
> > > +#include "xe_sync.h"
> > >  #include "xe_vm.h"
> > >  
> > >  static void
> > > @@ -21,7 +22,8 @@ hw_engine_group_resume_lr_jobs_func(struct
> > > work_struct *w)
> > >  	int err;
> > >  	enum xe_hw_engine_group_execution_mode previous_mode;
> > >  
> > > -	err = xe_hw_engine_group_get_mode(group, EXEC_MODE_LR,
> > > &previous_mode);
> > > +	err = xe_hw_engine_group_get_mode(group, EXEC_MODE_LR,
> > > &previous_mode,
> > > +					  NULL, 0);
> > >  	if (err)
> > >  		return;
> > >  
> > > @@ -189,10 +191,12 @@ void
> > > xe_hw_engine_group_resume_faulting_lr_jobs(struct
> > > xe_hw_engine_group
> > > *group
> > >  /**
> > >   * xe_hw_engine_group_suspend_faulting_lr_jobs() - Suspend the
> > > faulting LR jobs of this group
> > >   * @group: The hw engine group
> > > + * @has_deps: dma-fence job triggering suspend has dependencies
> > >   *
> > >   * Return: 0 on success, negative error code on error.
> > >   */
> > > -static int xe_hw_engine_group_suspend_faulting_lr_jobs(struct
> > > xe_hw_engine_group *group)
> > > +static int xe_hw_engine_group_suspend_faulting_lr_jobs(struct
> > > xe_hw_engine_group *group,
> > > +						       bool
> > > has_deps)
> > >  {
> > >  	int err;
> > >  	struct xe_exec_queue *q;
> > > @@ -201,11 +205,19 @@ static int
> > > xe_hw_engine_group_suspend_faulting_lr_jobs(struct
> > > xe_hw_engine_group
> > >  	lockdep_assert_held_write(&group->mode_sem);
> > >  
> > >  	list_for_each_entry(q, &group->exec_queue_list,
> > > hw_engine_group_link) {
> > > +		bool idle_skip_suspend;
> > > +
> > >  		if (!xe_vm_in_fault_mode(q->vm))
> > >  			continue;
> > >  
> > > +		idle_skip_suspend =
> > > xe_exec_queue_idle_skip_suspend(q);
> > > +		if (!idle_skip_suspend && has_deps)
> > > +			return -EAGAIN;
> > > +
> > >  		xe_gt_stats_incr(q->gt,
> > > XE_GT_STATS_ID_HW_ENGINE_GROUP_SUSPEND_LR_QUEUE_COUNT, 1);
> > > -		need_resume |=
> > > !xe_exec_queue_idle_skip_suspend(q);
> > > +
> > > +
> > > +		need_resume |= !idle_skip_suspend;
> > >  		q->ops->suspend(q);
> > >  	}
> > >  
> > > @@ -258,7 +270,7 @@ static int
> > > xe_hw_engine_group_wait_for_dma_fence_jobs(struct
> > > xe_hw_engine_group
> > >  	return 0;
> > >  }
> > >  
> > > -static int switch_mode(struct xe_hw_engine_group *group)
> > > +static int switch_mode(struct xe_hw_engine_group *group, bool
> > > has_deps)
> > >  {
> > >  	int err = 0;
> > >  	enum xe_hw_engine_group_execution_mode new_mode;
> > > @@ -268,7 +280,8 @@ static int switch_mode(struct
> > > xe_hw_engine_group
> > > *group)
> > >  	switch (group->cur_mode) {
> > >  	case EXEC_MODE_LR:
> > >  		new_mode = EXEC_MODE_DMA_FENCE;
> > > -		err =
> > > xe_hw_engine_group_suspend_faulting_lr_jobs(group);
> > > +		err =
> > > xe_hw_engine_group_suspend_faulting_lr_jobs(group,
> > > +								 
> > > has_deps);
> > >  		break;
> > >  	case EXEC_MODE_DMA_FENCE:
> > >  		new_mode = EXEC_MODE_LR;
> > > @@ -289,14 +302,18 @@ static int switch_mode(struct
> > > xe_hw_engine_group *group)
> > >   * @group: The hw engine group
> > >   * @new_mode: The new execution mode
> > >   * @previous_mode: Pointer to the previous mode provided for use
> > > by
> > > caller
> > > + * @syncs: Syncs from exec IOCTL
> > > + * @num_syncs: Number of syncs from exec IOCTL
> > >   *
> > >   * Return: 0 if successful, -EINTR if locking failed.
> > >   */
> > >  int xe_hw_engine_group_get_mode(struct xe_hw_engine_group
> > > *group,
> > >  				enum
> > > xe_hw_engine_group_execution_mode new_mode,
> > > -				enum
> > > xe_hw_engine_group_execution_mode *previous_mode)
> > > +				enum
> > > xe_hw_engine_group_execution_mode *previous_mode,
> > > +				struct xe_sync_entry *syncs, int
> > > num_syncs)
> > >  __acquires(&group->mode_sem)
> > >  {
> > > +	bool has_deps = !!num_syncs;
> > >  	int err = down_read_interruptible(&group->mode_sem);
> > >  
> > >  	if (err)
> > > @@ -306,14 +323,27 @@ __acquires(&group->mode_sem)
> > >  
> > >  	if (new_mode != group->cur_mode) {
> > >  		up_read(&group->mode_sem);
> > > +retry:
> > >  		err = down_write_killable(&group->mode_sem);
> > >  		if (err)
> > >  			return err;
> > >  
> > >  		if (new_mode != group->cur_mode) {
> > > -			err = switch_mode(group);
> > > +			err = switch_mode(group, has_deps);
> > >  			if (err) {
> > >  				up_write(&group->mode_sem);
> > > +				if (err == -EAGAIN) {
> > > +					int i;
> > > +
> > > +					for (i = 0; i <
> > > num_syncs;
> > > ++i) {
> > > +						err =
> > > xe_sync_entry_wait(syncs + i);
> > > +						if (err)
> > > +							return
> > > err;
> > > +					}
> > > +
> > > +					has_deps = false;
> > > +					goto retry;
> > > +				}
> > >  				return err;
> > >  			}
> > >  		}
> > > diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.h
> > > b/drivers/gpu/drm/xe/xe_hw_engine_group.h
> > > index 797ee81acbf2..8b17ccd30b70 100644
> > > --- a/drivers/gpu/drm/xe/xe_hw_engine_group.h
> > > +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.h
> > > @@ -11,6 +11,7 @@
> > >  struct drm_device;
> > >  struct xe_exec_queue;
> > >  struct xe_gt;
> > > +struct xe_sync_entry;
> > >  
> > >  int xe_hw_engine_setup_groups(struct xe_gt *gt);
> > >  
> > > @@ -19,7 +20,8 @@ void xe_hw_engine_group_del_exec_queue(struct
> > > xe_hw_engine_group *group, struct
> > >  
> > >  int xe_hw_engine_group_get_mode(struct xe_hw_engine_group
> > > *group,
> > >  				enum
> > > xe_hw_engine_group_execution_mode new_mode,
> > > -				enum
> > > xe_hw_engine_group_execution_mode *previous_mode);
> > > +				enum
> > > xe_hw_engine_group_execution_mode *previous_mode,
> > > +				struct xe_sync_entry *syncs, int
> > > num_syncs);
> > >  void xe_hw_engine_group_put(struct xe_hw_engine_group *group);
> > >  
> > >  enum xe_hw_engine_group_execution_mode
> > > diff --git a/drivers/gpu/drm/xe/xe_sync.c
> > > b/drivers/gpu/drm/xe/xe_sync.c
> > > index 1fc4fa278b78..d970e11962ff 100644
> > > --- a/drivers/gpu/drm/xe/xe_sync.c
> > > +++ b/drivers/gpu/drm/xe/xe_sync.c
> > > @@ -228,6 +228,35 @@ int xe_sync_entry_add_deps(struct
> > > xe_sync_entry
> > > *sync, struct xe_sched_job *job)
> > >  	return 0;
> > >  }
> > >  
> > > +/**
> > > + * xe_sync_entry_wait() - Wait on in-sync
> > > + * @sync: Sync object
> > > + *
> > > + * If the sync is in an in-sync, wait on the sync to signal.
> > > + *
> > > + * Return: 0 on success, -ERESTARTSYS on failure (interruption)
> > > + */
> > > +int xe_sync_entry_wait(struct xe_sync_entry *sync)
> > > +{
> > > +	if (sync->flags & DRM_XE_SYNC_FLAG_SIGNAL)
> > > +		return 0;
> > > +
> > > +	return dma_fence_wait(sync->fence, true);
> > > +}
> > > +
> > > +/**
> > > + * xe_sync_needs_wait() - Sync needs a wait (input dma-fence not
> > > signaled)
> > > + * @sync: Sync object
> > > + *
> > > + * Return: True if sync needs a wait, False otherwise
> > > + */
> > > +bool xe_sync_needs_wait(struct xe_sync_entry *sync)
> > > +{
> > > +
> > > +	return !(sync->flags & DRM_XE_SYNC_FLAG_SIGNAL) &&
> > > +		!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &sync-
> > > >fence-
> > > > flags);
> > 
> > dma_fence_is_signaled() ?
> > 
> 
> I don't want to signal the fence here, Phillip Stanner merged a
> dma-fence helper that does this check to drm-misc-next but this
> change
> hasn't made it to dma-xe-next yet. I have patch built on top of his
> series to to convert Xe to use these helpers, when I rebase that
> patch
> I'll fixup this code too.

OK. Just out of interest, why not signal the fence here?
/Thomas


> 
> Matt 
> 
> > Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> > 
> > > +}
> > > +
> > >  void xe_sync_entry_signal(struct xe_sync_entry *sync, struct
> > > dma_fence *fence)
> > >  {
> > >  	if (!(sync->flags & DRM_XE_SYNC_FLAG_SIGNAL))
> > > diff --git a/drivers/gpu/drm/xe/xe_sync.h
> > > b/drivers/gpu/drm/xe/xe_sync.h
> > > index 51f2d803e977..6b949194acff 100644
> > > --- a/drivers/gpu/drm/xe/xe_sync.h
> > > +++ b/drivers/gpu/drm/xe/xe_sync.h
> > > @@ -29,6 +29,8 @@ int xe_sync_entry_add_deps(struct xe_sync_entry
> > > *sync,
> > >  			   struct xe_sched_job *job);
> > >  void xe_sync_entry_signal(struct xe_sync_entry *sync,
> > >  			  struct dma_fence *fence);
> > > +int xe_sync_entry_wait(struct xe_sync_entry *sync);
> > > +bool xe_sync_needs_wait(struct xe_sync_entry *sync);
> > >  void xe_sync_entry_cleanup(struct xe_sync_entry *sync);
> > >  struct dma_fence *
> > >  xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync,
> > 


  reply	other threads:[~2025-12-15 21:49 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-12-12 18:28 [PATCH v2 0/7] Fix performance when pagefaults and 3d/display share resources Matthew Brost
2025-12-12 18:28 ` [PATCH v2 1/7] drm/xe: Adjust long-running workload timeslices to reasonable values Matthew Brost
2025-12-15 10:08   ` Thomas Hellström
2025-12-15 21:48     ` Matthew Brost
2025-12-12 18:28 ` [PATCH v2 2/7] drm/xe: Use usleep_range for accurate long-running workload timeslicing Matthew Brost
2025-12-15 10:10   ` Thomas Hellström
2025-12-12 18:28 ` [PATCH v2 3/7] drm/xe: Add debugfs knobs to control long running " Matthew Brost
2025-12-15 10:11   ` Thomas Hellström
2025-12-12 18:28 ` [PATCH v2 4/7] drm/xe: Skip exec queue schedule toggle if queue is idle during suspend Matthew Brost
2025-12-15 12:08   ` Thomas Hellström
2025-12-12 18:28 ` [PATCH v2 5/7] drm/xe: Wait on in-syncs when swicthing to dma-fence mode Matthew Brost
2025-12-15 10:32   ` Thomas Hellström
2025-12-15 21:46     ` Matthew Brost
2025-12-15 21:48       ` Thomas Hellström [this message]
2025-12-16  1:12         ` Matthew Brost
2025-12-12 18:28 ` [PATCH v2 6/7] drm/xe: Add GT stats ktime helpers Matthew Brost
2025-12-15 10:17   ` Thomas Hellström
2025-12-12 18:28 ` [PATCH v2 7/7] drm/xe: Add more GT stats around pagefault mode switch flows Matthew Brost
2025-12-15 11:00   ` Thomas Hellström
2025-12-15 13:05   ` Francois Dugast
2025-12-12 22:37 ` ✗ CI.checkpatch: warning for Fix performance when pagefaults and 3d/display share resources (rev2) Patchwork
2025-12-12 22:38 ` ✓ CI.KUnit: success " Patchwork
2025-12-12 23:33 ` ✓ Xe.CI.BAT: " Patchwork
2025-12-13 19:27 ` ✗ Xe.CI.Full: failure " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=236818068ac38d0fbfe0603a84cd056d2961bfb8.camel@linux.intel.com \
    --to=thomas.hellstrom@linux.intel.com \
    --cc=francois.dugast@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=matthew.brost@intel.com \
    --cc=michal.mrozek@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.