Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: Nirmoy Das <nirmoy.das@intel.com>
To: Michal Wajdeczko <michal.wajdeczko@intel.com>,
	<intel-xe@lists.freedesktop.org>
Cc: Matthew Brost <matthew.brost@intel.com>,
	Rodrigo Vivi <rodrigo.vivi@intel.com>,
	Sai Gowtham Ch <sai.gowtham.ch@intel.com>
Subject: Re: [PATCH 1/2] drm/xe: Add sent and recv counters for tlb invalidations
Date: Tue, 23 Jul 2024 15:07:05 +0200	[thread overview]
Message-ID: <51db88f1-79b1-44e9-9354-0f628069a64b@intel.com> (raw)
In-Reply-To: <9ecd36c8-b880-4097-a6ae-27e786b15497@intel.com>


On 7/23/2024 2:22 PM, Michal Wajdeczko wrote:
>
> On 23.07.2024 13:16, Nirmoy Das wrote:
>> Add counters for TLB invalidation sent, receive requests which
>> then could be query as sysfs files from userspace.
> s/sysfs/debugfs ?


I will fix it.

>
>> Cc: Matthew Brost <matthew.brost@intel.com>
>> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
>> Cc: Sai Gowtham Ch <sai.gowtham.ch@intel.com>
>> Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
>> ---
>>   drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 37 +++++++++++++++------
>>   drivers/gpu/drm/xe/xe_gt_types.h            |  4 +++
>>   2 files changed, 30 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
>> index 481d83d07367..f84717c1aafa 100644
>> --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
>> +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
>> @@ -37,8 +37,11 @@ static long tlb_timeout_jiffies(struct xe_gt *gt)
>>   }
>>   
>>   static void
>> -__invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence)
>> +__invalidation_fence_signal(struct xe_gt *gt,
>> +			    struct xe_gt_tlb_invalidation_fence *fence,
>> +			    bool failed)
>>   {
>> +	struct xe_device *xe = gt_to_xe(gt);
>>   	bool stack = test_bit(FENCE_STACK_BIT, &fence->base.flags);
>>   
>>   	trace_xe_gt_tlb_invalidation_fence_signal(xe, fence);
>> @@ -46,13 +49,19 @@ __invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_
>>   	dma_fence_signal(&fence->base);
>>   	if (!stack)
>>   		dma_fence_put(&fence->base);
>> +
>> +	/* Only increment the counter when tlb inval is done successfully */
>> +	if (!failed)
>> +		atomic64_inc(&gt->tlb_invalidation.received_count);
>>   }
>>   
>>   static void
>> -invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence)
>> +invalidation_fence_signal(struct xe_gt *gt,
>> +			  struct xe_gt_tlb_invalidation_fence *fence,
>> +			  bool failed)
>>   {
>>   	list_del(&fence->link);
>> -	__invalidation_fence_signal(xe, fence);
>> +	__invalidation_fence_signal(gt, fence, failed);
>>   }
>>   
>>   static void xe_gt_tlb_fence_timeout(struct work_struct *work)
>> @@ -76,7 +85,7 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work)
>>   			  fence->seqno, gt->tlb_invalidation.seqno_recv);
>>   
>>   		fence->base.error = -ETIME;
>> -		invalidation_fence_signal(xe, fence);
>> +		invalidation_fence_signal(gt, fence, true);
>>   	}
>>   	if (!list_empty(&gt->tlb_invalidation.pending_fences))
>>   		queue_delayed_work(system_wq,
>> @@ -102,6 +111,8 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt)
>>   	spin_lock_init(&gt->tlb_invalidation.lock);
>>   	INIT_DELAYED_WORK(&gt->tlb_invalidation.fence_tdr,
>>   			  xe_gt_tlb_fence_timeout);
>> +	atomic64_set(&gt->tlb_invalidation.sent_count, 0);
>> +	atomic64_set(&gt->tlb_invalidation.received_count, 0);
>>   
>>   	return 0;
>>   }
>> @@ -140,7 +151,9 @@ void xe_gt_tlb_invalidation_reset(struct xe_gt *gt)
>>   
>>   	list_for_each_entry_safe(fence, next,
>>   				 &gt->tlb_invalidation.pending_fences, link)
>> -		invalidation_fence_signal(gt_to_xe(gt), fence);
>> +		invalidation_fence_signal(gt, fence, false);
>> +	atomic64_set(&gt->tlb_invalidation.sent_count, 0);
>> +	atomic64_set(&gt->tlb_invalidation.received_count, 0);
> hmm, any TLB invalidation timeouts/errors, which would make
> received_count != sent_count, should trigger a GT reset, which in turn
> will reset those counters, so under which condition you expect those two
> stats being not equal?

We tolerate GGTT tlb inval timeouts without needed to do a GT reset, 
probably we shouldn't? If not then, I agree that we can have

a total sent counter  and another for inflight counter.


> is it just during the waiting for some ack?
>
> maybe better/cleaner option would be to track/display number of TLB
> invalidation requests in flight ?


Request from Sai was about having total tlb inval sent counter and I 
think inflight would be a bonus and should be useful for debugging.


Regards,

Nirmoy

>
>>   	spin_unlock_irq(&gt->tlb_invalidation.pending_lock);
>>   	mutex_unlock(&gt->uc.guc.ct.lock);
>>   }
>> @@ -182,7 +195,7 @@ static int send_tlb_invalidation(struct xe_guc *guc,
>>   	action[1] = seqno;
>>   	ret = xe_guc_ct_send_locked(&guc->ct, action, len,
>>   				    G2H_LEN_DW_TLB_INVALIDATE, 1);
>> -	if (!ret && fence) {
>> +	if (!ret) {
>>   		spin_lock_irq(&gt->tlb_invalidation.pending_lock);
>>   		/*
>>   		 * We haven't actually published the TLB fence as per
>> @@ -191,7 +204,7 @@ static int send_tlb_invalidation(struct xe_guc *guc,
>>   		 * we can just go ahead and signal the fence here.
>>   		 */
>>   		if (tlb_invalidation_seqno_past(gt, seqno)) {
>> -			__invalidation_fence_signal(xe, fence);
>> +			__invalidation_fence_signal(gt, fence, false);
>>   		} else {
>>   			fence->invalidation_time = ktime_get();
>>   			list_add_tail(&fence->link,
>> @@ -203,14 +216,16 @@ static int send_tlb_invalidation(struct xe_guc *guc,
>>   						   tlb_timeout_jiffies(gt));
>>   		}
>>   		spin_unlock_irq(&gt->tlb_invalidation.pending_lock);
>> -	} else if (ret < 0 && fence) {
>> -		__invalidation_fence_signal(xe, fence);
>> +	} else if (ret < 0) {
>> +		__invalidation_fence_signal(gt, fence, true);
>>   	}
>>   	if (!ret) {
>>   		gt->tlb_invalidation.seqno = (gt->tlb_invalidation.seqno + 1) %
>>   			TLB_INVALIDATION_SEQNO_MAX;
>>   		if (!gt->tlb_invalidation.seqno)
>>   			gt->tlb_invalidation.seqno = 1;
>> +
>> +		atomic64_inc(&gt->tlb_invalidation.sent_count);
>>   	}
>>   	mutex_unlock(&guc->ct.lock);
>>   
>> @@ -321,7 +336,7 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
>>   
>>   	/* Execlists not supported */
>>   	if (gt_to_xe(gt)->info.force_execlist) {
>> -		__invalidation_fence_signal(xe, fence);
>> +		__invalidation_fence_signal(gt, fence, true);
>>   		return 0;
>>   	}
>>   
>> @@ -455,7 +470,7 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
>>   		if (!tlb_invalidation_seqno_past(gt, fence->seqno))
>>   			break;
>>   
>> -		invalidation_fence_signal(xe, fence);
>> +		invalidation_fence_signal(gt, fence, false);
>>   	}
>>   
>>   	if (!list_empty(&gt->tlb_invalidation.pending_fences))
>> diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
>> index ef68c4a92972..130d9f5cb5c2 100644
>> --- a/drivers/gpu/drm/xe/xe_gt_types.h
>> +++ b/drivers/gpu/drm/xe/xe_gt_types.h
>> @@ -199,6 +199,10 @@ struct xe_gt {
>>   		struct delayed_work fence_tdr;
>>   		/** @tlb_invalidation.lock: protects TLB invalidation fences */
>>   		spinlock_t lock;
>> +		/** @tlb_invalidation.sent_count: counter for sent TLB inval requests */
>> +		atomic64_t sent_count;
>> +		/** @tlb_invalidation.received_count: counter for received TLB inval requestes */
>> +		atomic64_t received_count;
>>   	} tlb_invalidation;
>>   
>>   	/**

  reply	other threads:[~2024-07-23 13:07 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-07-23 11:16 [PATCH 0/2] Add debugfs file to dump tlb inval stats Nirmoy Das
2024-07-23 11:16 ` [PATCH 1/2] drm/xe: Add sent and recv counters for tlb invalidations Nirmoy Das
2024-07-23 12:22   ` Michal Wajdeczko
2024-07-23 13:07     ` Nirmoy Das [this message]
2024-07-23 16:23       ` Matthew Brost
2024-07-24  9:20         ` Nirmoy Das
2024-07-23 11:16 ` [PATCH 2/2] drm/xe: Create debugfs for tlb inval stats Nirmoy Das
2024-07-23 12:14   ` Michal Wajdeczko
2024-07-23 13:15     ` Nirmoy Das
2024-07-23 14:27       ` Michal Wajdeczko
2024-07-24  9:18         ` Nirmoy Das
2024-07-23 11:38 ` ✓ CI.Patch_applied: success for Add debugfs file to dump " Patchwork
2024-07-23 11:38 ` ✓ CI.checkpatch: " Patchwork
2024-07-23 11:39 ` ✓ CI.KUnit: " Patchwork
2024-07-23 11:51 ` ✓ CI.Build: " Patchwork
2024-07-23 11:54 ` ✓ CI.Hooks: " Patchwork
2024-07-23 11:55 ` ✓ CI.checksparse: " Patchwork
2024-07-23 12:15 ` ✓ CI.BAT: " Patchwork
2024-07-23 13:12 ` ✗ CI.FULL: failure " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=51db88f1-79b1-44e9-9354-0f628069a64b@intel.com \
    --to=nirmoy.das@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=matthew.brost@intel.com \
    --cc=michal.wajdeczko@intel.com \
    --cc=rodrigo.vivi@intel.com \
    --cc=sai.gowtham.ch@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox