Re: [PATCH v3 07/25] drm/xe: Update scheduler job layer to support PT jobs

All of lore.kernel.org
 help / color / mirror / Atom feed

From: "Summers, Stuart" <stuart.summers@intel.com>
To: "intel-xe@lists.freedesktop.org" <intel-xe@lists.freedesktop.org>,
	"Brost,  Matthew" <matthew.brost@intel.com>
Cc: "Ghimiray, Himal Prasad" <himal.prasad.ghimiray@intel.com>,
	"Yadav, Arvind" <arvind.yadav@intel.com>,
	"thomas.hellstrom@linux.intel.com"
	<thomas.hellstrom@linux.intel.com>,
	"Dugast, Francois" <francois.dugast@intel.com>
Subject: Re: [PATCH v3 07/25] drm/xe: Update scheduler job layer to support PT jobs
Date: Tue, 3 Mar 2026 22:50:58 +0000	[thread overview]
Message-ID: <f766b45a9c633aba750334406b1c7a657d186a98.camel@intel.com> (raw)
In-Reply-To: <20260228013501.106680-8-matthew.brost@intel.com>

On Fri, 2026-02-27 at 17:34 -0800, Matthew Brost wrote:
> Update the scheduler job layer to support PT jobs. PT jobs are
> executed
> entirely on the CPU and do not require LRC fences or a batch address.
> Repurpose the LRC fence storage to hold PT‑job arguments and update
> the
> scheduler job layer to distinguish between PT jobs and jobs that
> require
> an LRC.
> 
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> ---
>  drivers/gpu/drm/xe/xe_sched_job.c       | 92 ++++++++++++++++-------
> --
>  drivers/gpu/drm/xe/xe_sched_job_types.h | 31 ++++++++-
>  2 files changed, 88 insertions(+), 35 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_sched_job.c
> b/drivers/gpu/drm/xe/xe_sched_job.c
> index ae5b38b2a884..a8ba7f90368f 100644
> --- a/drivers/gpu/drm/xe/xe_sched_job.c
> +++ b/drivers/gpu/drm/xe/xe_sched_job.c
> @@ -26,19 +26,22 @@ static struct kmem_cache
> *xe_sched_job_parallel_slab;
>  
>  int __init xe_sched_job_module_init(void)
>  {
> +       struct xe_sched_job *job;
> +       size_t size;
> +
> +       size = struct_size(job, ptrs, 1);

Nice..

>         xe_sched_job_slab =
> -               kmem_cache_create("xe_sched_job",
> -                                 sizeof(struct xe_sched_job) +
> -                                 sizeof(struct xe_job_ptrs), 0,
> +               kmem_cache_create("xe_sched_job", size, 0,
>                                   SLAB_HWCACHE_ALIGN, NULL);
>         if (!xe_sched_job_slab)
>                 return -ENOMEM;
>  
> +       size = max_t(size_t,
> +                    struct_size(job, ptrs,
> +                                XE_HW_ENGINE_MAX_INSTANCE),
> +                    struct_size(job, pt_update, 1));
>         xe_sched_job_parallel_slab =
> -               kmem_cache_create("xe_sched_job_parallel",
> -                                 sizeof(struct xe_sched_job) +
> -                                 sizeof(struct xe_job_ptrs) *
> -                                 XE_HW_ENGINE_MAX_INSTANCE, 0,
> +               kmem_cache_create("xe_sched_job_parallel", size, 0,
>                                   SLAB_HWCACHE_ALIGN, NULL);
>         if (!xe_sched_job_parallel_slab) {
>                 kmem_cache_destroy(xe_sched_job_slab);
> @@ -84,7 +87,7 @@ static void xe_sched_job_free_fences(struct
> xe_sched_job *job)
>  {
>         int i;
>  
> -       for (i = 0; i < job->q->width; ++i) {
> +       for (i = 0; !job->is_pt_job && i < job->q->width; ++i) {
>                 struct xe_job_ptrs *ptrs = &job->ptrs[i];
>  
>                 if (ptrs->lrc_fence)
> @@ -93,10 +96,23 @@ static void xe_sched_job_free_fences(struct
> xe_sched_job *job)
>         }
>  }
>  
> +/**
> + * xe_sched_job_create() - Create a scheduler job
> + * @q: exec queue to create the scheduler job for
> + * @batch: array of batch addresses for the job; must match the
> width of @q,
> + *         or NULL to indicate a PT job that does not require a
> batch address
> + *
> + * Create a scheduler job for submission.
> + *
> + * Context: Reclaim
> + *
> + * Return: a &xe_sched_job object on success, or an ERR_PTR on
> failure.
> + */
>  struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q,
>                                          u64 *batch_addr)
>  {
>         bool is_migration = xe_sched_job_is_migration(q);
> +       struct xe_device *xe = gt_to_xe(q->gt);
>         struct xe_sched_job *job;
>         int err;
>         int i;
> @@ -105,6 +121,9 @@ struct xe_sched_job *xe_sched_job_create(struct
> xe_exec_queue *q,
>         /* only a kernel context can submit a vm-less job */
>         XE_WARN_ON(!q->vm && !(q->flags & EXEC_QUEUE_FLAG_KERNEL));
>  
> +       xe_assert(xe, batch_addr ||
> +                 q->flags & (EXEC_QUEUE_FLAG_VM |
> EXEC_QUEUE_FLAG_MIGRATE));

Ok..

> +
>         job = job_alloc(xe_exec_queue_is_parallel(q) ||
> is_migration);
>         if (!job)
>                 return ERR_PTR(-ENOMEM);
> @@ -119,34 +138,39 @@ struct xe_sched_job *xe_sched_job_create(struct
> xe_exec_queue *q,
>         if (err)
>                 goto err_free;
>  
> -       for (i = 0; i < q->width; ++i) {
> -               struct dma_fence *fence = xe_lrc_alloc_seqno_fence();
> -               struct dma_fence_chain *chain;
> -
> -               if (IS_ERR(fence)) {
> -                       err = PTR_ERR(fence);
> -                       goto err_sched_job;
> +       if (!batch_addr) {
> +               job->fence = dma_fence_get_stub();
> +               job->is_pt_job = true;
> +       } else {
> +               for (i = 0; i < q->width; ++i) {
> +                       struct dma_fence *fence =
> xe_lrc_alloc_seqno_fence();
> +                       struct dma_fence_chain *chain;
> +
> +                       if (IS_ERR(fence)) {
> +                               err = PTR_ERR(fence);
> +                               goto err_sched_job;
> +                       }
> +                       job->ptrs[i].lrc_fence = fence;
> +
> +                       if (i + 1 == q->width)
> +                               continue;
> +
> +                       chain = dma_fence_chain_alloc();
> +                       if (!chain) {
> +                               err = -ENOMEM;
> +                               goto err_sched_job;
> +                       }
> +                       job->ptrs[i].chain_fence = chain;
>                 }
> -               job->ptrs[i].lrc_fence = fence;
>  
> -               if (i + 1 == q->width)
> -                       continue;
> +               width = q->width;
> +               if (is_migration)
> +                       width = 2;
>  
> -               chain = dma_fence_chain_alloc();
> -               if (!chain) {
> -                       err = -ENOMEM;
> -                       goto err_sched_job;
> -               }
> -               job->ptrs[i].chain_fence = chain;
> +               for (i = 0; i < width; ++i)
> +                       job->ptrs[i].batch_addr = batch_addr[i];
>         }
>  
> -       width = q->width;
> -       if (is_migration)
> -               width = 2;
> -
> -       for (i = 0; i < width; ++i)
> -               job->ptrs[i].batch_addr = batch_addr[i];
> -
>         atomic_inc(&q->job_cnt);
>         xe_pm_runtime_get_noresume(job_to_xe(job));
>         trace_xe_sched_job_create(job);
> @@ -246,7 +270,7 @@ bool xe_sched_job_completed(struct xe_sched_job
> *job)
>  void xe_sched_job_arm(struct xe_sched_job *job)
>  {
>         struct xe_exec_queue *q = job->q;
> -       struct dma_fence *fence, *prev;
> +       struct dma_fence *fence = job->fence, *prev;

Looks like this was a bug where prev would be NULL in that first for
each queue width loop below? Would be nice for this to go into a
separate patch.

>         struct xe_vm *vm = q->vm;
>         u64 seqno = 0;
>         int i;
> @@ -266,6 +290,9 @@ void xe_sched_job_arm(struct xe_sched_job *job)
>                 job->ring_ops_flush_tlb = true;
>         }
>  
> +       if (job->is_pt_job)
> +               goto arm;
> +
>         /* Arm the pre-allocated fences */
>         for (i = 0; i < q->width; prev = fence, ++i) {

Can we either use the goto above or change this to align with what you
had earlier, something like:
for (i = 0; !job->is_pt_job && i < q->width; prev = fence, ++i) {

Just for consistency...

>                 struct dma_fence_chain *chain;
> @@ -286,6 +313,7 @@ void xe_sched_job_arm(struct xe_sched_job *job)
>                 fence = &chain->base;
>         }
>  
> +arm:
>         job->fence = dma_fence_get(fence);      /* Pairs with put in
> scheduler */
>         drm_sched_job_arm(&job->drm);
>  }
> diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h
> b/drivers/gpu/drm/xe/xe_sched_job_types.h
> index 13c2970e81a8..9be4e2c5989d 100644
> --- a/drivers/gpu/drm/xe/xe_sched_job_types.h
> +++ b/drivers/gpu/drm/xe/xe_sched_job_types.h
> @@ -10,10 +10,29 @@
>  
>  #include <drm/gpu_scheduler.h>
>  
> -struct xe_exec_queue;
>  struct dma_fence;
>  struct dma_fence_chain;
>  
> +struct xe_exec_queue;
> +struct xe_migrate_pt_update_ops;
> +struct xe_pt_job_ops;
> +struct xe_tile;
> +struct xe_vm;
> +
> +/**
> + * struct xe_pt_update_args - PT update arguments
> + */
> +struct xe_pt_update_args {
> +       /** @vm: VM which is being bound */
> +       struct xe_vm *vm;
> +       /** @tile: Tile which page tables belong to */
> +       struct xe_tile *tile;
> +       /** @ops: Migrate PT update ops */
> +       const struct xe_migrate_pt_update_ops *ops;
> +       /** @pt_job_ops: PT job ops state */
> +       struct xe_pt_job_ops *pt_job_ops;
> +};
> +
>  /**
>   * struct xe_job_ptrs - Per hw engine instance data
>   */
> @@ -69,8 +88,14 @@ struct xe_sched_job {
>         bool restore_replay;
>         /** @last_replay: last job being replayed */
>         bool last_replay;
> -       /** @ptrs: per instance pointers. */
> -       struct xe_job_ptrs ptrs[];
> +       /** @is_pt_job: is a PT job */
> +       bool is_pt_job;
> +       union {
> +               /** @ptrs: per instance pointers. */
> +               DECLARE_FLEX_ARRAY(struct xe_job_ptrs, ptrs);

Nice..

Thanks,
Stuart

> +               /** @pt_update: PT update arguments */
> +               DECLARE_FLEX_ARRAY(struct xe_pt_update_args,
> pt_update);
> +       };
>  };
>  
>  struct xe_sched_job_snapshot {

next prev parent reply	other threads:[~2026-03-03 22:51 UTC|newest]

Thread overview: 63+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-02-28  1:34 [PATCH v3 00/25] CPU binds and ULLS on migration queue Matthew Brost
2026-02-28  1:34 ` [PATCH v3 01/25] drm/xe: Drop struct xe_migrate_pt_update argument from populate/clear vfuns Matthew Brost
2026-03-05 14:17   ` Francois Dugast
2026-02-28  1:34 ` [PATCH v3 02/25] drm/xe: Add xe_migrate_update_pgtables_cpu_execute helper Matthew Brost
2026-03-05 14:39   ` Francois Dugast
2026-02-28  1:34 ` [PATCH v3 03/25] drm/xe: Decouple exec queue idle check from LRC Matthew Brost
2026-03-02 20:50   ` Summers, Stuart
2026-03-02 21:02     ` Matthew Brost
2026-03-03 21:26       ` Summers, Stuart
2026-03-03 22:42         ` Matthew Brost
2026-03-03 22:54           ` Summers, Stuart
2026-02-28  1:34 ` [PATCH v3 04/25] drm/xe: Add job count to GuC exec queue snapshot Matthew Brost
2026-03-02 20:50   ` Summers, Stuart
2026-02-28  1:34 ` [PATCH v3 05/25] drm/xe: Update xe_bo_put_deferred arguments to include writeback flag Matthew Brost
2026-04-01 12:20   ` Francois Dugast
2026-04-01 22:39     ` Matthew Brost
2026-02-28  1:34 ` [PATCH v3 06/25] drm/xe: Add XE_BO_FLAG_PUT_VM_ASYNC Matthew Brost
2026-04-01 12:22   ` Francois Dugast
2026-04-01 22:38     ` Matthew Brost
2026-02-28  1:34 ` [PATCH v3 07/25] drm/xe: Update scheduler job layer to support PT jobs Matthew Brost
2026-03-03 22:50   ` Summers, Stuart [this message]
2026-03-03 23:00     ` Matthew Brost
2026-02-28  1:34 ` [PATCH v3 08/25] drm/xe: Add helpers to access PT ops Matthew Brost
2026-04-07 15:22   ` Francois Dugast
2026-02-28  1:34 ` [PATCH v3 09/25] drm/xe: Add struct xe_pt_job_ops Matthew Brost
2026-03-03 23:26   ` Summers, Stuart
2026-03-03 23:28     ` Matthew Brost
2026-02-28  1:34 ` [PATCH v3 10/25] drm/xe: Update GuC submission backend to run PT jobs Matthew Brost
2026-03-03 23:28   ` Summers, Stuart
2026-03-04  0:26     ` Matthew Brost
2026-03-04 20:43       ` Summers, Stuart
2026-03-04 21:53         ` Matthew Brost
2026-03-05 20:24           ` Summers, Stuart
2026-02-28  1:34 ` [PATCH v3 11/25] drm/xe: Store level in struct xe_vm_pgtable_update Matthew Brost
2026-03-03 23:44   ` Summers, Stuart
2026-02-28  1:34 ` [PATCH v3 12/25] drm/xe: Don't use migrate exec queue for page fault binds Matthew Brost
2026-02-28  1:34 ` [PATCH v3 13/25] drm/xe: Enable CPU binds for jobs Matthew Brost
2026-02-28  1:34 ` [PATCH v3 14/25] drm/xe: Remove unused arguments from xe_migrate_pt_update_ops Matthew Brost
2026-02-28  1:34 ` [PATCH v3 15/25] drm/xe: Make bind queues operate cross-tile Matthew Brost
2026-02-28  1:34 ` [PATCH v3 16/25] drm/xe: Add CPU bind layer Matthew Brost
2026-02-28  1:34 ` [PATCH v3 17/25] drm/xe: Add device flag to enable PT mirroring across tiles Matthew Brost
2026-02-28  1:34 ` [PATCH v3 18/25] drm/xe: Add xe_hw_engine_write_ring_tail Matthew Brost
2026-02-28  1:34 ` [PATCH v3 19/25] drm/xe: Add ULLS support to LRC Matthew Brost
2026-03-05 20:21   ` Francois Dugast
2026-02-28  1:34 ` [PATCH v3 20/25] drm/xe: Add ULLS migration job support to migration layer Matthew Brost
2026-03-05 23:34   ` Summers, Stuart
2026-03-09 23:11     ` Matthew Brost
2026-02-28  1:34 ` [PATCH v3 21/25] drm/xe: Add MI_SEMAPHORE_WAIT instruction defs Matthew Brost
2026-02-28  1:34 ` [PATCH v3 22/25] drm/xe: Add ULLS migration job support to ring ops Matthew Brost
2026-02-28  1:34 ` [PATCH v3 23/25] drm/xe: Add ULLS migration job support to GuC submission Matthew Brost
2026-02-28  1:35 ` [PATCH v3 24/25] drm/xe: Enter ULLS for migration jobs upon page fault or SVM prefetch Matthew Brost
2026-02-28  1:35 ` [PATCH v3 25/25] drm/xe: Add modparam to enable / disable ULLS on migrate queue Matthew Brost
2026-03-05 22:59   ` Summers, Stuart
2026-04-01 22:44     ` Matthew Brost
2026-02-28  1:43 ` ✗ CI.checkpatch: warning for CPU binds and ULLS on migration queue (rev3) Patchwork
2026-02-28  1:44 ` ✓ CI.KUnit: success " Patchwork
2026-02-28  2:32 ` ✓ Xe.CI.BAT: " Patchwork
2026-02-28 13:59 ` ✗ Xe.CI.FULL: failure " Patchwork
2026-03-02 17:54   ` Summers, Stuart
2026-03-02 18:13     ` Matthew Brost
2026-03-05 22:56 ` [PATCH v3 00/25] CPU binds and ULLS on migration queue Summers, Stuart
2026-03-10 22:17   ` Matthew Brost
2026-03-20 15:31 ` Thomas Hellström

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=f766b45a9c633aba750334406b1c7a657d186a98.camel@intel.com \
    --to=stuart.summers@intel.com \
    --cc=arvind.yadav@intel.com \
    --cc=francois.dugast@intel.com \
    --cc=himal.prasad.ghimiray@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=matthew.brost@intel.com \
    --cc=thomas.hellstrom@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.