Re: [PATCH v2 11/12] drm/xe: Add context-based invalidation to GuC TLB invalidation backend

Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed

From: "Summers, Stuart" <stuart.summers@intel.com>
To: "intel-xe@lists.freedesktop.org" <intel-xe@lists.freedesktop.org>,
	"Brost,  Matthew" <matthew.brost@intel.com>
Cc: "Roper, Matthew D" <matthew.d.roper@intel.com>,
	"De Marchi, Lucas" <lucas.demarchi@intel.com>
Subject: Re: [PATCH v2 11/12] drm/xe: Add context-based invalidation to GuC TLB invalidation backend
Date: Thu, 6 Nov 2025 21:50:45 +0000	[thread overview]
Message-ID: <746dd3acad81c8af5cb408ff4b50936ecacfeb5c.camel@intel.com> (raw)
In-Reply-To: <20251104195616.3339137-12-matthew.brost@intel.com>

On Tue, 2025-11-04 at 11:56 -0800, Matthew Brost wrote:
> Introduce context-based invalidation support to the GuC TLB
> invalidation
> backend. This is implemented by iterating over each exec queue per GT
> within a VM, skipping inactive queues, and issuing a context-based
> (GuC
> ID) H2G TLB invalidation. All H2G messages, except the final one, are
> sent with an invalid seqno, which the G2H handler drops to ensure the
> TLB invalidation fence is only signaled once all H2G messages are
> completed.
> 
> A watermark mechanism is also added to switch between context-based
> TLB
> invalidations and full device-wide invalidations, as the return on
> investment for context-based invalidation diminishes when many exec
> queues are mapped.
> 
> v2:
>  - Fix checkpatch warnings
> 
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> ---
>  drivers/gpu/drm/xe/xe_device_types.h  |   2 +
>  drivers/gpu/drm/xe/xe_guc_tlb_inval.c | 122
> +++++++++++++++++++++++++-
>  drivers/gpu/drm/xe/xe_pci.c           |   1 +
>  drivers/gpu/drm/xe/xe_pci_types.h     |   1 +
>  4 files changed, 124 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_device_types.h
> b/drivers/gpu/drm/xe/xe_device_types.h
> index 145951dd95c9..ca285f4bce11 100644
> --- a/drivers/gpu/drm/xe/xe_device_types.h
> +++ b/drivers/gpu/drm/xe/xe_device_types.h
> @@ -316,6 +316,8 @@ struct xe_device {
>                 u8 has_mem_copy_instr:1;
>                 /** @info.has_pxp: Device has PXP support */
>                 u8 has_pxp:1;
> +               /** @info.has_ctx_tlb_inval: Has context based TLB
> invalidations */
> +               u8 has_ctx_tlb_inval:1;
>                 /** @info.has_range_tlb_inval: Has range based TLB
> invalidations */
>                 u8 has_range_tlb_inval:1;
>                 /** @info.has_sriov: Supports SR-IOV */
> diff --git a/drivers/gpu/drm/xe/xe_guc_tlb_inval.c
> b/drivers/gpu/drm/xe/xe_guc_tlb_inval.c
> index 6978ee8edf2e..1baaf577cded 100644
> --- a/drivers/gpu/drm/xe/xe_guc_tlb_inval.c
> +++ b/drivers/gpu/drm/xe/xe_guc_tlb_inval.c
> @@ -6,14 +6,17 @@
>  #include "abi/guc_actions_abi.h"
>  
>  #include "xe_device.h"
> +#include "xe_exec_queue_types.h"
>  #include "xe_gt_stats.h"
>  #include "xe_gt_types.h"
>  #include "xe_guc.h"
>  #include "xe_guc_ct.h"
> +#include "xe_guc_exec_queue_types.h"
>  #include "xe_guc_tlb_inval.h"
>  #include "xe_force_wake.h"
>  #include "xe_mmio.h"
>  #include "xe_tlb_inval.h"
> +#include "xe_vm.h"
>  
>  #include "regs/xe_guc_regs.h"
>  
> @@ -136,10 +139,16 @@ static int send_tlb_inval_ppgtt(struct xe_guc
> *guc, u32 seqno, u64 start,
>  {
>  #define MAX_TLB_INVALIDATION_LEN       7
>         struct xe_gt *gt = guc_to_gt(guc);
> +       struct xe_device *xe = guc_to_xe(guc);
>         u32 action[MAX_TLB_INVALIDATION_LEN];
>         u64 length = end - start;
>         int len = 0;
>  
> +       xe_gt_assert(gt, (type == XE_GUC_TLB_INVAL_PAGE_SELECTIVE &&
> +                         !xe->info.has_ctx_tlb_inval) ||
> +                    (type == XE_GUC_TLB_INVAL_PAGE_SELECTIVE_CTX &&
> +                     xe->info.has_ctx_tlb_inval));
> +
>         action[len++] = XE_GUC_ACTION_TLB_INVALIDATION;
>         action[len++] = seqno;
>         if (!gt_to_xe(gt)->info.has_range_tlb_inval ||
> @@ -176,6 +185,100 @@ static int send_tlb_inval_asid_ppgtt(struct
> xe_tlb_inval *tlb_inval, u32 seqno,
>                                     XE_GUC_TLB_INVAL_PAGE_SELECTIVE);
>  }
>  
> +static bool queue_mapped_in_guc(struct xe_guc *guc, struct
> xe_exec_queue *q)
> +{
> +       return q->gt == guc_to_gt(guc);
> +}
> +
> +static int send_tlb_inval_ctx_ppgtt(struct xe_tlb_inval *tlb_inval,
> u32 seqno,
> +                                   u64 start, u64 end, u32 asid)
> +{
> +       struct xe_guc *guc = tlb_inval->private;
> +       struct xe_device *xe = guc_to_xe(guc);
> +       struct xe_exec_queue *q, *last_q = NULL;
> +       struct xe_vm *vm;
> +       int err = 0;
> +
> +       lockdep_assert_held(&tlb_inval->seqno_lock);
> +
> +       if (xe->info.force_execlist)
> +               return -ECANCELED;
> +
> +       vm = xe_device_asid_to_vm(xe, asid);
> +       if (IS_ERR(vm))
> +               return PTR_ERR(vm);
> +
> +       down_read(&vm->exec_queues.lock);
> +
> +       /*
> +        * XXX: Randomly picking a threshold for now. This will need
> to be
> +        * tuned based on expected UMD queue counts and performance
> profiling.
> +        */
> +#define EXEC_QUEUE_COUNT_FULL_THRESHOLD        8

Does seem interesting for this to be configurable. Maybe different
applications have different requirements here... But also we should
have some kind of default. No issue with what you 

> +       if (vm->exec_queues.count[guc_to_gt(guc)->info.id] >=
> +           EXEC_QUEUE_COUNT_FULL_THRESHOLD) {
> +               u32 action[] = {
> +                       XE_GUC_ACTION_TLB_INVALIDATION,
> +                       seqno,
> +                       MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL),
> +               };
> +
> +               err = send_tlb_inval(guc, action,
> ARRAY_SIZE(action));
> +               goto err_unlock;
> +       }
> +#undef EXEC_QUEUE_COUNT_FULL_THRESHOLD
> +
> +       list_for_each_entry_reverse(q, &vm->exec_queues.list,
> +                                   vm_exec_queue_link)

Braces here around the if() {}. Otherwise it's too easy to accidentally
add lines below the if condition with unintended behavior.

> +               if (queue_mapped_in_guc(guc, q) && q->ops->active(q))
> {
> +                       last_q = q;
> +                       break;
> +               }
> +
> +       if (!last_q) {
> +               /*
> +                * We can't break fence ordering for TLB invalidation
> jobs, if
> +                * TLB invalidations are inflight issue a dummy
> invalidation to
> +                * maintain ordering. Nor can we move safely the
> seqno_recv when
> +                * returning -ECANCELED if TLB invalidations are in
> flight. Use
> +                * GGTT invalidation as dummy invalidation given ASID
> +                * invalidations are unsupported here.
> +                */
> +               if (xe_tlb_inval_idle(tlb_inval))
> +                       err = -ECANCELED;
> +               else
> +                       err = send_tlb_inval_ggtt(tlb_inval, seqno);
> +               goto err_unlock;
> +       }
> +
> +       list_for_each_entry(q, &vm->exec_queues.list,
> vm_exec_queue_link) {
> +               int __seqno = last_q == q ? seqno :
> +                       TLB_INVALIDATION_SEQNO_INVALID;
> +               u32 type = XE_GUC_TLB_INVAL_PAGE_SELECTIVE_CTX;
> +
> +               /*
> +                * XXX: Techincally we can race here and queue can
> become
> +                * inactive, not ideal. The TLB invalidation will
> timeout in
> +                * this case unless we get GuC support to convert to
> NOP...

We actually can't support this. It won't just time out, GuC will return
an error (invalid parameter that the context isn't registered and we're
trying to submit something via context invalidation - nothing actually
gets sent to hardware) and we will issue a GT reset. We should make
sure when we send this, the context is registered and any
deregistration explicitly happens afterwards.

This is why generally we want to do this add/remove not at the queue
level but at the guc registration/deregistration level to guarantee
this. But I see you split the queue add and the submission based on
this active flag. Right now I believe we're tracking "pending
deregistration" with exec_queue_destroyed && exec_queue_registered,
although it might be nice to have an explicit state there.

Alternatively we could also capture that error response from GuC,
although there are a few different places we can get an error there and
we might not want to overload the CT error handler in the event there
is a legitimate state or hardware error that requires a guc reload/GT
reset.

Thanks,
Stuart

> +                */
> +               if (!queue_mapped_in_guc(guc, q) || !q->ops-
> >active(q))
> +                       continue;
> +
> +               xe_assert(xe, q->vm == vm);
> +
> +               err = send_tlb_inval_ppgtt(guc, __seqno, start, end,
> +                                          q->guc->id, type);
> +               if (err)
> +                       goto err_unlock;
> +       }
> +
> +err_unlock:
> +       up_read(&vm->exec_queues.lock);
> +       xe_vm_put(vm);
> +
> +       return err;
> +}
> +
>  static bool tlb_inval_initialized(struct xe_tlb_inval *tlb_inval)
>  {
>         struct xe_guc *guc = tlb_inval->private;
> @@ -203,7 +306,7 @@ static long tlb_inval_timeout_delay(struct
> xe_tlb_inval *tlb_inval)
>         return hw_tlb_timeout + 2 * delay;
>  }
>  
> -static const struct xe_tlb_inval_ops guc_tlb_inval_ops = {
> +static const struct xe_tlb_inval_ops guc_tlb_inval_asid_ops = {
>         .all = send_tlb_inval_all,
>         .ggtt = send_tlb_inval_ggtt,
>         .ppgtt = send_tlb_inval_asid_ppgtt,
> @@ -212,6 +315,15 @@ static const struct xe_tlb_inval_ops
> guc_tlb_inval_ops = {
>         .timeout_delay = tlb_inval_timeout_delay,
>  };
>  
> +static const struct xe_tlb_inval_ops guc_tlb_inval_ctx_ops = {
> +       .ggtt = send_tlb_inval_ggtt,
> +       .all = send_tlb_inval_all,
> +       .ppgtt = send_tlb_inval_ctx_ppgtt,
> +       .initialized = tlb_inval_initialized,
> +       .flush = tlb_inval_flush,
> +       .timeout_delay = tlb_inval_timeout_delay,
> +};
> +
>  /**
>   * xe_guc_tlb_inval_init_early() - Init GuC TLB invalidation early
>   * @guc: GuC object
> @@ -223,8 +335,14 @@ static const struct xe_tlb_inval_ops
> guc_tlb_inval_ops = {
>  void xe_guc_tlb_inval_init_early(struct xe_guc *guc,
>                                  struct xe_tlb_inval *tlb_inval)
>  {
> +       struct xe_device *xe = guc_to_xe(guc);
> +
>         tlb_inval->private = guc;
> -       tlb_inval->ops = &guc_tlb_inval_ops;
> +
> +       if (xe->info.has_ctx_tlb_inval)
> +               tlb_inval->ops = &guc_tlb_inval_ctx_ops;
> +       else
> +               tlb_inval->ops = &guc_tlb_inval_asid_ops;
>  }
>  
>  /**
> diff --git a/drivers/gpu/drm/xe/xe_pci.c
> b/drivers/gpu/drm/xe/xe_pci.c
> index 1959de3f7a27..9a11066c7d4a 100644
> --- a/drivers/gpu/drm/xe/xe_pci.c
> +++ b/drivers/gpu/drm/xe/xe_pci.c
> @@ -863,6 +863,7 @@ static int xe_info_init(struct xe_device *xe,
>                 xe->info.has_device_atomics_on_smem = 1;
>  
>         xe->info.has_range_tlb_inval = graphics_desc-
> >has_range_tlb_inval;
> +       xe->info.has_ctx_tlb_inval = graphics_desc-
> >has_ctx_tlb_inval;
>         xe->info.has_usm = graphics_desc->has_usm;
>         xe->info.has_64bit_timestamp = graphics_desc-
> >has_64bit_timestamp;
>  
> diff --git a/drivers/gpu/drm/xe/xe_pci_types.h
> b/drivers/gpu/drm/xe/xe_pci_types.h
> index 9892c063a9c5..c08857c06c7e 100644
> --- a/drivers/gpu/drm/xe/xe_pci_types.h
> +++ b/drivers/gpu/drm/xe/xe_pci_types.h
> @@ -63,6 +63,7 @@ struct xe_graphics_desc {
>         u8 has_atomic_enable_pte_bit:1;
>         u8 has_indirect_ring_state:1;
>         u8 has_range_tlb_inval:1;
> +       u8 has_ctx_tlb_inval:1;
>         u8 has_usm:1;
>         u8 has_64bit_timestamp:1;
>  };

next prev parent reply	other threads:[~2025-11-06 21:50 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-11-04 19:56 [PATCH v2 00/12] Context based TLB invalidations Matthew Brost
2025-11-04 19:56 ` [PATCH v2 01/12] drm/xe: Add normalize_invalidation_range Matthew Brost
2025-11-06 20:03   ` Summers, Stuart
2025-11-04 19:56 ` [PATCH v2 02/12] drm/xe: Make usm.asid_to_vm allocation use GFP_NOWAIT Matthew Brost
2025-11-06 22:08   ` Summers, Stuart
2025-11-06 22:13   ` Summers, Stuart
2025-11-04 19:56 ` [PATCH v2 03/12] drm/xe: Add xe_device_asid_to_vm helper Matthew Brost
2025-12-11 22:07   ` Matt Atwood
2025-11-04 19:56 ` [PATCH v2 04/12] drm/xe: Add vm to exec queues association Matthew Brost
2025-11-06 22:15   ` Summers, Stuart
2025-12-12 21:03   ` Summers, Stuart
2025-12-12 21:24     ` Matthew Brost
2025-12-12 21:37       ` Summers, Stuart
2025-11-04 19:56 ` [PATCH v2 05/12] drm/xe: Taint TLB invalidation seqno lock with GFP_KERNEL Matthew Brost
2025-12-11 22:35   ` Matt Atwood
2025-11-04 19:56 ` [PATCH v2 06/12] drm/xe: Do not forward invalid TLB invalidation seqnos to upper layers Matthew Brost
2025-11-06 22:05   ` Summers, Stuart
2025-11-04 19:56 ` [PATCH v2 07/12] drm/xe: Rename send_tlb_inval_ppgtt to send_tlb_inval_asid_ppgtt Matthew Brost
2025-11-06 20:22   ` Summers, Stuart
2025-11-04 19:56 ` [PATCH v2 08/12] drm/xe: Add send_tlb_inval_ppgtt helper Matthew Brost
2025-11-06 20:25   ` Summers, Stuart
2025-11-04 19:56 ` [PATCH v2 09/12] drm/xe: Add xe_tlb_inval_idle helper Matthew Brost
2025-11-10 18:48   ` Summers, Stuart
2025-12-12 22:00     ` Summers, Stuart
2025-11-04 19:56 ` [PATCH v2 10/12] drm/xe: Add exec queue active vfunc Matthew Brost
2025-11-04 19:56 ` [PATCH v2 11/12] drm/xe: Add context-based invalidation to GuC TLB invalidation backend Matthew Brost
2025-11-06 21:50   ` Summers, Stuart [this message]
2025-11-07  7:01     ` Matthew Brost
2025-11-10 19:29       ` Summers, Stuart
2025-11-11  1:01         ` Matthew Brost
2025-12-12 22:30   ` Summers, Stuart
2025-11-04 19:56 ` [PATCH v2 12/12] drm/xe: Enable context TLB invalidations for CI Matthew Brost

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=746dd3acad81c8af5cb408ff4b50936ecacfeb5c.camel@intel.com \
    --to=stuart.summers@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=lucas.demarchi@intel.com \
    --cc=matthew.brost@intel.com \
    --cc=matthew.d.roper@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox