Re: [PATCH v4 11/12] drm/xe: Add context-based invalidation to GuC TLB invalidation backend

All of lore.kernel.org
 help / color / mirror / Atom feed

From: "Summers, Stuart" <stuart.summers@intel.com>
To: "intel-xe@lists.freedesktop.org" <intel-xe@lists.freedesktop.org>,
	"Brost,  Matthew" <matthew.brost@intel.com>
Subject: Re: [PATCH v4 11/12] drm/xe: Add context-based invalidation to GuC TLB invalidation backend
Date: Tue, 13 Jan 2026 21:23:57 +0000	[thread overview]
Message-ID: <ba1c6cb4a6f171d4faef8f00baaedd02ae2189b4.camel@intel.com> (raw)
In-Reply-To: <20260113025232.3504648-12-matthew.brost@intel.com>

On Mon, 2026-01-12 at 18:52 -0800, Matthew Brost wrote:
> Introduce context-based invalidation support to the GuC TLB
> invalidation
> backend. This is implemented by iterating over each exec queue per GT
> within a VM, skipping inactive queues, and issuing a context-based
> (GuC
> ID) H2G TLB invalidation. All H2G messages, except the final one, are
> sent with an invalid seqno, which the G2H handler drops to ensure the
> TLB invalidation fence is only signaled once all H2G messages are
> completed.
> 
> A watermark mechanism is also added to switch between context-based
> TLB
> invalidations and full device-wide invalidations, as the return on
> investment for context-based invalidation diminishes when many exec
> queues are mapped.
> 
> v2:
>  - Fix checkpatch warnings
> v3:
>  - Rebase on PRL
>  - Use ref counting to avoid racing with deregisters
> v4:
>  - Extra braces (Stuart)
>  - Use per GT list (CI)
>  - Reorder put
> 
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> ---
>  drivers/gpu/drm/xe/xe_guc_tlb_inval.c | 145
> +++++++++++++++++++++++++-
>  1 file changed, 141 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_guc_tlb_inval.c
> b/drivers/gpu/drm/xe/xe_guc_tlb_inval.c
> index 070d2e2cb7c9..ced58f46f846 100644
> --- a/drivers/gpu/drm/xe/xe_guc_tlb_inval.c
> +++ b/drivers/gpu/drm/xe/xe_guc_tlb_inval.c
> @@ -6,15 +6,19 @@
>  #include "abi/guc_actions_abi.h"
>  
>  #include "xe_device.h"
> +#include "xe_exec_queue.h"
> +#include "xe_exec_queue_types.h"
>  #include "xe_gt_stats.h"
>  #include "xe_gt_types.h"
>  #include "xe_guc.h"
>  #include "xe_guc_ct.h"
> +#include "xe_guc_exec_queue_types.h"
>  #include "xe_guc_tlb_inval.h"
>  #include "xe_force_wake.h"
>  #include "xe_mmio.h"
>  #include "xe_sa.h"
>  #include "xe_tlb_inval.h"
> +#include "xe_vm.h"
>  
>  #include "regs/xe_guc_regs.h"
>  
> @@ -156,10 +160,16 @@ static int send_tlb_inval_ppgtt(struct xe_guc
> *guc, u32 seqno, u64 start,
>  {
>  #define MAX_TLB_INVALIDATION_LEN       7
>         struct xe_gt *gt = guc_to_gt(guc);
> +       struct xe_device *xe = guc_to_xe(guc);
>         u32 action[MAX_TLB_INVALIDATION_LEN];
>         u64 length = end - start;
>         int len = 0, err;
>  
> +       xe_gt_assert(gt, (type == XE_GUC_TLB_INVAL_PAGE_SELECTIVE &&
> +                         !xe->info.has_ctx_tlb_inval) ||
> +                    (type == XE_GUC_TLB_INVAL_PAGE_SELECTIVE_CTX &&
> +                     xe->info.has_ctx_tlb_inval));
> +
>         action[len++] = XE_GUC_ACTION_TLB_INVALIDATION;
>         action[len++] = !prl_sa ? seqno :
> TLB_INVALIDATION_SEQNO_INVALID;
>         if (!gt_to_xe(gt)->info.has_range_tlb_inval ||
> @@ -168,9 +178,11 @@ static int send_tlb_inval_ppgtt(struct xe_guc
> *guc, u32 seqno, u64 start,
>         } else {
>                 u64 normalize_len = normalize_invalidation_range(gt,
> &start,
>                                                                 
> &end);
> +               bool need_flush = !prl_sa &&
> +                       seqno != TLB_INVALIDATION_SEQNO_INVALID;
>  
>                 /* Flush on NULL case, Media is not required to
> modify flush due to no PPC so NOP */
> -               action[len++] = MAKE_INVAL_OP_FLUSH(type, !prl_sa);
> +               action[len++] = MAKE_INVAL_OP_FLUSH(type,
> need_flush);
>                 action[len++] = id;
>                 action[len++] = lower_32_bits(start);
>                 action[len++] = upper_32_bits(start);
> @@ -181,8 +193,10 @@ static int send_tlb_inval_ppgtt(struct xe_guc
> *guc, u32 seqno, u64 start,
>  #undef MAX_TLB_INVALIDATION_LEN
>  
>         err = send_tlb_inval(guc, action, len);
> -       if (!err && prl_sa)
> +       if (!err && prl_sa) {
> +               xe_gt_assert(gt, seqno !=
> TLB_INVALIDATION_SEQNO_INVALID);
>                 err = send_page_reclaim(guc, seqno,
> xe_sa_bo_gpu_addr(prl_sa));
> +       }
>         return err;
>  }
>  
> @@ -201,6 +215,114 @@ static int send_tlb_inval_asid_ppgtt(struct
> xe_tlb_inval *tlb_inval, u32 seqno,
>                                     XE_GUC_TLB_INVAL_PAGE_SELECTIVE,
> prl_sa);
>  }
>  
> +static int send_tlb_inval_ctx_ppgtt(struct xe_tlb_inval *tlb_inval,
> u32 seqno,
> +                                   u64 start, u64 end, u32 asid,
> +                                   struct drm_suballoc *prl_sa)
> +{
> +       struct xe_guc *guc = tlb_inval->private;
> +       struct xe_device *xe = guc_to_xe(guc);
> +       struct xe_exec_queue *q, *next, *last_q = NULL;
> +       struct xe_vm *vm;
> +       LIST_HEAD(tlb_inval_list);
> +       int err = 0, id = guc_to_gt(guc)->info.id;
> +
> +       lockdep_assert_held(&tlb_inval->seqno_lock);
> +
> +       if (xe->info.force_execlist)
> +               return -ECANCELED;
> +
> +       vm = xe_device_asid_to_vm(xe, asid);
> +       if (IS_ERR(vm))
> +               return PTR_ERR(vm);
> +
> +       down_read(&vm->exec_queues.lock);
> +
> +       /*
> +        * XXX: Randomly picking a threshold for now. This will need
> to be
> +        * tuned based on expected UMD queue counts and performance
> profiling.
> +        */
> +#define EXEC_QUEUE_COUNT_FULL_THRESHOLD        8
> +       if (vm->exec_queues.count[id] >=
> EXEC_QUEUE_COUNT_FULL_THRESHOLD) {
> +               u32 action[] = {
> +                       XE_GUC_ACTION_TLB_INVALIDATION,
> +                       seqno,
> +                       MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL),
> +               };
> +
> +               err = send_tlb_inval(guc, action,
> ARRAY_SIZE(action));
> +               goto err_unlock;
> +       }
> +#undef EXEC_QUEUE_COUNT_FULL_THRESHOLD
> +
> +       /*
> +        * Move exec queues to a temporary list to issue
> invalidations. The exec
> +        * queue must active and a reference must be taken to prevent
> concurrent
> +        * deregistrations.
> +        *
> +        * List modification is safe because we hold 'vm-
> >exec_queues.lock' for
> +        * reading, which prevents external modifications. Using a
> per-GT list
> +        * is also safe since 'tlb_inval->seqno_lock' ensures no
> other GT users
> +        * can enter this code path.
> +        */
> +       list_for_each_entry_safe(q, next, &vm->exec_queues.list[id],
> +                                vm_exec_queue_link) {
> +               if (q->ops->active(q) &&
> xe_exec_queue_get_unless_zero(q)) {
> +                       last_q = q;
> +                       list_move_tail(&q->vm_exec_queue_link,
> &tlb_inval_list);

It would be nice if we could consolidate this into a single list
instead of moving things around like this, but I realize this makes it
easier to track the last active queue so no issue...

I don't see anything blocking us from deregistering a queue though
after it has been moved to the tlb_inval_list here. Should we add that
seqno_lock in the register/deregister routines to be safe? Or maybe
block until tlb_inval_list is empty? Or if I'm missing it otherwise,
how are you preventing the deregistration after this is on the list?

Thanks,
Stuart

> +               }
> +       }
> +
> +       if (!last_q) {
> +               /*
> +                * We can't break fence ordering for TLB invalidation
> jobs, if
> +                * TLB invalidations are inflight issue a dummy
> invalidation to
> +                * maintain ordering. Nor can we move safely the
> seqno_recv when
> +                * returning -ECANCELED if TLB invalidations are in
> flight. Use
> +                * GGTT invalidation as dummy invalidation given ASID
> +                * invalidations are unsupported here.
> +                */
> +               if (xe_tlb_inval_idle(tlb_inval))
> +                       err = -ECANCELED;
> +               else
> +                       err = send_tlb_inval_ggtt(tlb_inval, seqno);
> +               goto err_unlock;
> +       }
> +
> +       list_for_each_entry_safe(q, next, &tlb_inval_list,
> vm_exec_queue_link) {
> +               struct drm_suballoc *__prl_sa = NULL;
> +               int __seqno = TLB_INVALIDATION_SEQNO_INVALID;
> +               u32 type = XE_GUC_TLB_INVAL_PAGE_SELECTIVE_CTX;
> +
> +               xe_assert(xe, q->vm == vm);
> +
> +               if (err)
> +                       goto unref;
> +
> +               if (last_q == q) {
> +                       __prl_sa = prl_sa;
> +                       __seqno = seqno;
> +               }
> +
> +               err = send_tlb_inval_ppgtt(guc, __seqno, start, end,
> +                                          q->guc->id, type,
> __prl_sa);
> +
> +unref:
> +               /*
> +                * Must always return exec queue to original list /
> drop
> +                * reference
> +                */
> +               list_move_tail(&q->vm_exec_queue_link,
> +                              &vm->exec_queues.list[id]);
> +               xe_exec_queue_put(q);
> +       }
> +
> +err_unlock:
> +       up_read(&vm->exec_queues.lock);
> +       xe_vm_put(vm);
> +
> +       return err;
> +}
> +
>  static bool tlb_inval_initialized(struct xe_tlb_inval *tlb_inval)
>  {
>         struct xe_guc *guc = tlb_inval->private;
> @@ -228,7 +350,7 @@ static long tlb_inval_timeout_delay(struct
> xe_tlb_inval *tlb_inval)
>         return hw_tlb_timeout + 2 * delay;
>  }
>  
> -static const struct xe_tlb_inval_ops guc_tlb_inval_ops = {
> +static const struct xe_tlb_inval_ops guc_tlb_inval_asid_ops = {
>         .all = send_tlb_inval_all,
>         .ggtt = send_tlb_inval_ggtt,
>         .ppgtt = send_tlb_inval_asid_ppgtt,
> @@ -237,6 +359,15 @@ static const struct xe_tlb_inval_ops
> guc_tlb_inval_ops = {
>         .timeout_delay = tlb_inval_timeout_delay,
>  };
>  
> +static const struct xe_tlb_inval_ops guc_tlb_inval_ctx_ops = {
> +       .ggtt = send_tlb_inval_ggtt,
> +       .all = send_tlb_inval_all,
> +       .ppgtt = send_tlb_inval_ctx_ppgtt,
> +       .initialized = tlb_inval_initialized,
> +       .flush = tlb_inval_flush,
> +       .timeout_delay = tlb_inval_timeout_delay,
> +};
> +
>  /**
>   * xe_guc_tlb_inval_init_early() - Init GuC TLB invalidation early
>   * @guc: GuC object
> @@ -248,8 +379,14 @@ static const struct xe_tlb_inval_ops
> guc_tlb_inval_ops = {
>  void xe_guc_tlb_inval_init_early(struct xe_guc *guc,
>                                  struct xe_tlb_inval *tlb_inval)
>  {
> +       struct xe_device *xe = guc_to_xe(guc);
> +
>         tlb_inval->private = guc;
> -       tlb_inval->ops = &guc_tlb_inval_ops;
> +
> +       if (xe->info.has_ctx_tlb_inval)
> +               tlb_inval->ops = &guc_tlb_inval_ctx_ops;
> +       else
> +               tlb_inval->ops = &guc_tlb_inval_asid_ops;
>  }
>  
>  /**

next prev parent reply	other threads:[~2026-01-13 21:24 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-01-13  2:52 [PATCH v4 00/12] Context based TLB invalidations Matthew Brost
2026-01-13  2:52 ` [PATCH v4 01/12] drm/xe: Add normalize_invalidation_range Matthew Brost
2026-01-13  2:52 ` [PATCH v4 02/12] drm/xe: Make usm.asid_to_vm allocation use GFP_NOWAIT Matthew Brost
2026-01-13  2:52 ` [PATCH v4 03/12] drm/xe: Add has_ctx_tlb_inval to device info Matthew Brost
2026-01-13 21:24   ` Summers, Stuart
2026-01-13  2:52 ` [PATCH v4 04/12] drm/xe: Add xe_device_asid_to_vm helper Matthew Brost
2026-01-13  2:52 ` [PATCH v4 05/12] drm/xe: Add vm to exec queues association Matthew Brost
2026-01-13 21:26   ` Summers, Stuart
2026-01-13  2:52 ` [PATCH v4 06/12] drm/xe: Taint TLB invalidation seqno lock with GFP_KERNEL Matthew Brost
2026-01-13  2:52 ` [PATCH v4 07/12] drm/xe: Rename send_tlb_inval_ppgtt to send_tlb_inval_asid_ppgtt Matthew Brost
2026-01-13  2:52 ` [PATCH v4 08/12] drm/xe: Add send_tlb_inval_ppgtt helper Matthew Brost
2026-01-13  2:52 ` [PATCH v4 09/12] drm/xe: Add xe_tlb_inval_idle helper Matthew Brost
2026-01-13  2:52 ` [PATCH v4 10/12] drm/xe: Add exec queue active vfunc Matthew Brost
2026-01-13 21:32   ` Summers, Stuart
2026-01-13 22:16     ` Matthew Brost
2026-01-13 22:21       ` Summers, Stuart
2026-01-14 21:17         ` Summers, Stuart
2026-01-13  2:52 ` [PATCH v4 11/12] drm/xe: Add context-based invalidation to GuC TLB invalidation backend Matthew Brost
2026-01-13 21:23   ` Summers, Stuart [this message]
2026-01-13 22:08     ` Matthew Brost
2026-01-13 23:25       ` Summers, Stuart
2026-01-14  0:25         ` Matthew Brost
2026-01-13  2:52 ` [PATCH v4 12/12] drm/xe: Enable context TLB invalidations for CI Matthew Brost
2026-01-13  3:00 ` ✓ CI.KUnit: success for Context based TLB invalidations (rev4) Patchwork
2026-01-13  3:41 ` ✓ Xe.CI.BAT: " Patchwork
2026-01-13  9:45 ` ✓ Xe.CI.Full: " Patchwork
2026-01-16  0:25 ` [PATCH v4 00/12] Context based TLB invalidations Summers, Stuart
2026-01-16  1:52   ` Matthew Brost

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ba1c6cb4a6f171d4faef8f00baaedd02ae2189b4.camel@intel.com \
    --to=stuart.summers@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=matthew.brost@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.