From: Nirmoy Das <nirmoy.das@linux.intel.com>
To: Matthew Brost <matthew.brost@intel.com>, intel-xe@lists.freedesktop.org
Cc: nirmoy.das@intel.com, rodrigo.vivi@intel.com
Subject: Re: [PATCH v3 3/3] drm/xe: Hold a PM ref when GT TLB invalidations are inflight
Date: Thu, 18 Jul 2024 14:45:44 +0200 [thread overview]
Message-ID: <bce63625-01a4-47e3-9a4f-bca84e1e46ed@linux.intel.com> (raw)
In-Reply-To: <20240718065935.1451330-4-matthew.brost@intel.com>
On 7/18/2024 8:59 AM, Matthew Brost wrote:
> Avoid GT TLB invalidation timeouts by holding a PM ref when
> invalidations are inflight.
>
> v2:
> - Drop PM ref before signaling fence (CI)
>
> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> Cc: Nirmoy Das <nirmoy.das@intel.com>
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
I didn't manage reproduce the issue today for some reason(may be because
of different RIL machine changed timing) but the issue exists.
Reviewed-by: Nirmoy Das <nirmoy.das@intel.com>
> ---
> drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 62 ++++++++++++-------
> drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h | 1 +
> .../gpu/drm/xe/xe_gt_tlb_invalidation_types.h | 4 ++
> drivers/gpu/drm/xe/xe_vm.c | 4 +-
> 4 files changed, 47 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
> index 668c1a3f06ac..481d83d07367 100644
> --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
> +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
> @@ -13,6 +13,7 @@
> #include "xe_guc.h"
> #include "xe_guc_ct.h"
> #include "xe_mmio.h"
> +#include "xe_pm.h"
> #include "xe_sriov.h"
> #include "xe_trace.h"
> #include "regs/xe_guc_regs.h"
> @@ -35,6 +36,24 @@ static long tlb_timeout_jiffies(struct xe_gt *gt)
> return hw_tlb_timeout + 2 * delay;
> }
>
> +static void
> +__invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence)
> +{
> + bool stack = test_bit(FENCE_STACK_BIT, &fence->base.flags);
> +
> + trace_xe_gt_tlb_invalidation_fence_signal(xe, fence);
> + xe_gt_tlb_invalidation_fence_fini(fence);
> + dma_fence_signal(&fence->base);
> + if (!stack)
> + dma_fence_put(&fence->base);
> +}
> +
> +static void
> +invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence)
> +{
> + list_del(&fence->link);
> + __invalidation_fence_signal(xe, fence);
> +}
>
> static void xe_gt_tlb_fence_timeout(struct work_struct *work)
> {
> @@ -56,10 +75,8 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work)
> xe_gt_err(gt, "TLB invalidation fence timeout, seqno=%d recv=%d",
> fence->seqno, gt->tlb_invalidation.seqno_recv);
>
> - list_del(&fence->link);
> fence->base.error = -ETIME;
> - dma_fence_signal(&fence->base);
> - dma_fence_put(&fence->base);
> + invalidation_fence_signal(xe, fence);
> }
> if (!list_empty(>->tlb_invalidation.pending_fences))
> queue_delayed_work(system_wq,
> @@ -89,24 +106,6 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt)
> return 0;
> }
>
> -static void
> -__invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence)
> -{
> - bool stack = test_bit(FENCE_STACK_BIT, &fence->base.flags);
> -
> - trace_xe_gt_tlb_invalidation_fence_signal(xe, fence);
> - dma_fence_signal(&fence->base);
> - if (!stack)
> - dma_fence_put(&fence->base);
> -}
> -
> -static void
> -invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence)
> -{
> - list_del(&fence->link);
> - __invalidation_fence_signal(xe, fence);
> -}
> -
> /**
> * xe_gt_tlb_invalidation_reset - Initialize GT TLB invalidation reset
> * @gt: graphics tile
> @@ -266,8 +265,10 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
>
> xe_gt_tlb_invalidation_fence_init(gt, &fence, true);
> ret = xe_gt_tlb_invalidation_guc(gt, &fence);
> - if (ret < 0)
> + if (ret < 0) {
> + xe_gt_tlb_invalidation_fence_fini(&fence);
> return ret;
> + }
>
> xe_gt_tlb_invalidation_fence_wait(&fence);
> } else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) {
> @@ -492,12 +493,15 @@ static const struct dma_fence_ops invalidation_fence_ops = {
> * @fence: TLB invalidation fence to initialize
> * @stack: fence is stack variable
> *
> - * Initialize TLB invalidation fence for use
> + * Initialize TLB invalidation fence for use. xe_gt_tlb_invalidation_fence_fini
> + * must be called if fence is not signaled.
> */
> void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt,
> struct xe_gt_tlb_invalidation_fence *fence,
> bool stack)
> {
> + xe_pm_runtime_get_noresume(gt_to_xe(gt));
> +
> spin_lock_irq(>->tlb_invalidation.lock);
> dma_fence_init(&fence->base, &invalidation_fence_ops,
> >->tlb_invalidation.lock,
> @@ -508,4 +512,16 @@ void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt,
> set_bit(FENCE_STACK_BIT, &fence->base.flags);
> else
> dma_fence_get(&fence->base);
> + fence->gt = gt;
> +}
> +
> +/**
> + * xe_gt_tlb_invalidation_fence_fini - Finalize TLB invalidation fence
> + * @fence: TLB invalidation fence to finalize
> + *
> + * Drop PM ref which fence took durinig init.
> + */
> +void xe_gt_tlb_invalidation_fence_fini(struct xe_gt_tlb_invalidation_fence *fence)
> +{
> + xe_pm_runtime_put(gt_to_xe(fence->gt));
> }
> diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
> index f430d5797af7..a84065fa324c 100644
> --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
> +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
> @@ -28,6 +28,7 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
> void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt,
> struct xe_gt_tlb_invalidation_fence *fence,
> bool stack);
> +void xe_gt_tlb_invalidation_fence_fini(struct xe_gt_tlb_invalidation_fence *fence);
>
> static inline void
> xe_gt_tlb_invalidation_fence_wait(struct xe_gt_tlb_invalidation_fence *fence)
> diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h
> index 934c828efe31..de6e825e0851 100644
> --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h
> +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h
> @@ -8,6 +8,8 @@
>
> #include <linux/dma-fence.h>
>
> +struct xe_gt;
> +
> /**
> * struct xe_gt_tlb_invalidation_fence - XE GT TLB invalidation fence
> *
> @@ -17,6 +19,8 @@
> struct xe_gt_tlb_invalidation_fence {
> /** @base: dma fence base */
> struct dma_fence base;
> + /** @gt: GT which fence belong to */
> + struct xe_gt *gt;
> /** @link: link into list of pending tlb fences */
> struct list_head link;
> /** @seqno: seqno of TLB invalidation to signal fence one */
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index 4a29d5be7310..9a9c1e11545b 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -3218,8 +3218,10 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
> */
> ret = xe_gt_tlb_invalidation_vma(tile->primary_gt,
> &fence[id], vma);
> - if (ret < 0)
> + if (ret < 0) {
> + xe_gt_tlb_invalidation_fence_fini(&fence[id]);
> goto wait;
> + }
>
> tile_needs_invalidate |= BIT(id);
> }
next prev parent reply other threads:[~2024-07-18 12:45 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-07-18 6:59 [PATCH v3 0/3] Keep device awake during GT TLB invalidations Matthew Brost
2024-07-18 6:59 ` [PATCH v3 1/3] drm/xe: Add xe_gt_tlb_invalidation_fence_init helper Matthew Brost
2024-07-18 6:59 ` [PATCH v3 2/3] drm/xe: Drop xe_gt_tlb_invalidation_wait Matthew Brost
2024-07-18 6:59 ` [PATCH v3 3/3] drm/xe: Hold a PM ref when GT TLB invalidations are inflight Matthew Brost
2024-07-18 12:45 ` Nirmoy Das [this message]
2024-07-18 7:09 ` ✓ CI.Patch_applied: success for Keep device awake during GT TLB invalidations (rev3) Patchwork
2024-07-18 7:09 ` ✓ CI.checkpatch: " Patchwork
2024-07-18 7:11 ` ✓ CI.KUnit: " Patchwork
2024-07-18 7:23 ` ✓ CI.Build: " Patchwork
2024-07-18 7:25 ` ✓ CI.Hooks: " Patchwork
2024-07-18 7:27 ` ✓ CI.checksparse: " Patchwork
2024-07-18 7:47 ` ✓ CI.BAT: " Patchwork
2024-07-18 10:26 ` ✗ CI.FULL: failure " Patchwork
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=bce63625-01a4-47e3-9a4f-bca84e1e46ed@linux.intel.com \
--to=nirmoy.das@linux.intel.com \
--cc=intel-xe@lists.freedesktop.org \
--cc=matthew.brost@intel.com \
--cc=nirmoy.das@intel.com \
--cc=rodrigo.vivi@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox