From: Michal Wajdeczko <michal.wajdeczko@intel.com>
To: Nirmoy Das <nirmoy.das@intel.com>, intel-xe@lists.freedesktop.org
Cc: Matthew Brost <matthew.brost@intel.com>,
Rodrigo Vivi <rodrigo.vivi@intel.com>,
Sai Gowtham Ch <sai.gowtham.ch@intel.com>
Subject: Re: [PATCH 1/2] drm/xe: Add sent and recv counters for tlb invalidations
Date: Tue, 23 Jul 2024 14:22:43 +0200 [thread overview]
Message-ID: <9ecd36c8-b880-4097-a6ae-27e786b15497@intel.com> (raw)
In-Reply-To: <20240723111610.21564-2-nirmoy.das@intel.com>
On 23.07.2024 13:16, Nirmoy Das wrote:
> Add counters for TLB invalidation sent, receive requests which
> then could be query as sysfs files from userspace.
s/sysfs/debugfs ?
>
> Cc: Matthew Brost <matthew.brost@intel.com>
> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> Cc: Sai Gowtham Ch <sai.gowtham.ch@intel.com>
> Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
> ---
> drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 37 +++++++++++++++------
> drivers/gpu/drm/xe/xe_gt_types.h | 4 +++
> 2 files changed, 30 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
> index 481d83d07367..f84717c1aafa 100644
> --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
> +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
> @@ -37,8 +37,11 @@ static long tlb_timeout_jiffies(struct xe_gt *gt)
> }
>
> static void
> -__invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence)
> +__invalidation_fence_signal(struct xe_gt *gt,
> + struct xe_gt_tlb_invalidation_fence *fence,
> + bool failed)
> {
> + struct xe_device *xe = gt_to_xe(gt);
> bool stack = test_bit(FENCE_STACK_BIT, &fence->base.flags);
>
> trace_xe_gt_tlb_invalidation_fence_signal(xe, fence);
> @@ -46,13 +49,19 @@ __invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_
> dma_fence_signal(&fence->base);
> if (!stack)
> dma_fence_put(&fence->base);
> +
> + /* Only increment the counter when tlb inval is done successfully */
> + if (!failed)
> + atomic64_inc(>->tlb_invalidation.received_count);
> }
>
> static void
> -invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence)
> +invalidation_fence_signal(struct xe_gt *gt,
> + struct xe_gt_tlb_invalidation_fence *fence,
> + bool failed)
> {
> list_del(&fence->link);
> - __invalidation_fence_signal(xe, fence);
> + __invalidation_fence_signal(gt, fence, failed);
> }
>
> static void xe_gt_tlb_fence_timeout(struct work_struct *work)
> @@ -76,7 +85,7 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work)
> fence->seqno, gt->tlb_invalidation.seqno_recv);
>
> fence->base.error = -ETIME;
> - invalidation_fence_signal(xe, fence);
> + invalidation_fence_signal(gt, fence, true);
> }
> if (!list_empty(>->tlb_invalidation.pending_fences))
> queue_delayed_work(system_wq,
> @@ -102,6 +111,8 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt)
> spin_lock_init(>->tlb_invalidation.lock);
> INIT_DELAYED_WORK(>->tlb_invalidation.fence_tdr,
> xe_gt_tlb_fence_timeout);
> + atomic64_set(>->tlb_invalidation.sent_count, 0);
> + atomic64_set(>->tlb_invalidation.received_count, 0);
>
> return 0;
> }
> @@ -140,7 +151,9 @@ void xe_gt_tlb_invalidation_reset(struct xe_gt *gt)
>
> list_for_each_entry_safe(fence, next,
> >->tlb_invalidation.pending_fences, link)
> - invalidation_fence_signal(gt_to_xe(gt), fence);
> + invalidation_fence_signal(gt, fence, false);
> + atomic64_set(>->tlb_invalidation.sent_count, 0);
> + atomic64_set(>->tlb_invalidation.received_count, 0);
hmm, any TLB invalidation timeouts/errors, which would make
received_count != sent_count, should trigger a GT reset, which in turn
will reset those counters, so under which condition you expect those two
stats being not equal? is it just during the waiting for some ack?
maybe better/cleaner option would be to track/display number of TLB
invalidation requests in flight ?
> spin_unlock_irq(>->tlb_invalidation.pending_lock);
> mutex_unlock(>->uc.guc.ct.lock);
> }
> @@ -182,7 +195,7 @@ static int send_tlb_invalidation(struct xe_guc *guc,
> action[1] = seqno;
> ret = xe_guc_ct_send_locked(&guc->ct, action, len,
> G2H_LEN_DW_TLB_INVALIDATE, 1);
> - if (!ret && fence) {
> + if (!ret) {
> spin_lock_irq(>->tlb_invalidation.pending_lock);
> /*
> * We haven't actually published the TLB fence as per
> @@ -191,7 +204,7 @@ static int send_tlb_invalidation(struct xe_guc *guc,
> * we can just go ahead and signal the fence here.
> */
> if (tlb_invalidation_seqno_past(gt, seqno)) {
> - __invalidation_fence_signal(xe, fence);
> + __invalidation_fence_signal(gt, fence, false);
> } else {
> fence->invalidation_time = ktime_get();
> list_add_tail(&fence->link,
> @@ -203,14 +216,16 @@ static int send_tlb_invalidation(struct xe_guc *guc,
> tlb_timeout_jiffies(gt));
> }
> spin_unlock_irq(>->tlb_invalidation.pending_lock);
> - } else if (ret < 0 && fence) {
> - __invalidation_fence_signal(xe, fence);
> + } else if (ret < 0) {
> + __invalidation_fence_signal(gt, fence, true);
> }
> if (!ret) {
> gt->tlb_invalidation.seqno = (gt->tlb_invalidation.seqno + 1) %
> TLB_INVALIDATION_SEQNO_MAX;
> if (!gt->tlb_invalidation.seqno)
> gt->tlb_invalidation.seqno = 1;
> +
> + atomic64_inc(>->tlb_invalidation.sent_count);
> }
> mutex_unlock(&guc->ct.lock);
>
> @@ -321,7 +336,7 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
>
> /* Execlists not supported */
> if (gt_to_xe(gt)->info.force_execlist) {
> - __invalidation_fence_signal(xe, fence);
> + __invalidation_fence_signal(gt, fence, true);
> return 0;
> }
>
> @@ -455,7 +470,7 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
> if (!tlb_invalidation_seqno_past(gt, fence->seqno))
> break;
>
> - invalidation_fence_signal(xe, fence);
> + invalidation_fence_signal(gt, fence, false);
> }
>
> if (!list_empty(>->tlb_invalidation.pending_fences))
> diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
> index ef68c4a92972..130d9f5cb5c2 100644
> --- a/drivers/gpu/drm/xe/xe_gt_types.h
> +++ b/drivers/gpu/drm/xe/xe_gt_types.h
> @@ -199,6 +199,10 @@ struct xe_gt {
> struct delayed_work fence_tdr;
> /** @tlb_invalidation.lock: protects TLB invalidation fences */
> spinlock_t lock;
> + /** @tlb_invalidation.sent_count: counter for sent TLB inval requests */
> + atomic64_t sent_count;
> + /** @tlb_invalidation.received_count: counter for received TLB inval requestes */
> + atomic64_t received_count;
> } tlb_invalidation;
>
> /**
next prev parent reply other threads:[~2024-07-23 12:22 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-07-23 11:16 [PATCH 0/2] Add debugfs file to dump tlb inval stats Nirmoy Das
2024-07-23 11:16 ` [PATCH 1/2] drm/xe: Add sent and recv counters for tlb invalidations Nirmoy Das
2024-07-23 12:22 ` Michal Wajdeczko [this message]
2024-07-23 13:07 ` Nirmoy Das
2024-07-23 16:23 ` Matthew Brost
2024-07-24 9:20 ` Nirmoy Das
2024-07-23 11:16 ` [PATCH 2/2] drm/xe: Create debugfs for tlb inval stats Nirmoy Das
2024-07-23 12:14 ` Michal Wajdeczko
2024-07-23 13:15 ` Nirmoy Das
2024-07-23 14:27 ` Michal Wajdeczko
2024-07-24 9:18 ` Nirmoy Das
2024-07-23 11:38 ` ✓ CI.Patch_applied: success for Add debugfs file to dump " Patchwork
2024-07-23 11:38 ` ✓ CI.checkpatch: " Patchwork
2024-07-23 11:39 ` ✓ CI.KUnit: " Patchwork
2024-07-23 11:51 ` ✓ CI.Build: " Patchwork
2024-07-23 11:54 ` ✓ CI.Hooks: " Patchwork
2024-07-23 11:55 ` ✓ CI.checksparse: " Patchwork
2024-07-23 12:15 ` ✓ CI.BAT: " Patchwork
2024-07-23 13:12 ` ✗ CI.FULL: failure " Patchwork
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=9ecd36c8-b880-4097-a6ae-27e786b15497@intel.com \
--to=michal.wajdeczko@intel.com \
--cc=intel-xe@lists.freedesktop.org \
--cc=matthew.brost@intel.com \
--cc=nirmoy.das@intel.com \
--cc=rodrigo.vivi@intel.com \
--cc=sai.gowtham.ch@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox