Re: [PATCH v13 7/7] drm/xe/pmu: Add GT C6 events

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Rodrigo Vivi <rodrigo.vivi@intel.com>
To: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: <intel-xe@lists.freedesktop.org>,
	Peter Zijlstra <peterz@infradead.org>,
	<linux-perf-users@vger.kernel.org>,
	Vinay Belgaumkar <vinay.belgaumkar@intel.com>
Subject: Re: [PATCH v13 7/7] drm/xe/pmu: Add GT C6 events
Date: Fri, 17 Jan 2025 13:44:02 -0500	[thread overview]
Message-ID: <Z4qk8qv1_VyV8W80@intel.com> (raw)
In-Reply-To: <20250116230718.82460-8-lucas.demarchi@intel.com>

On Thu, Jan 16, 2025 at 03:07:18PM -0800, Lucas De Marchi wrote:
> From: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
> 
> Provide a PMU interface for GT C6 residency counters. The implementation
> is ported over from the i915 PMU code. Residency is provided in units of
> ms(like sysfs entry in - /sys/class/drm/card0/device/tile0/gt0/gtidle).
> 
> Sample usage and output:
> 
> 	$ perf list | grep gt-c6
> 	  xe_0000_00_02.0/gt-c6-residency/                   [Kernel PMU event]
> 
> 	$ tail /sys/bus/event_source/devices/xe_0000_00_02.0/events/gt-c6-residency*
> 	==> /sys/bus/event_source/devices/xe_0000_00_02.0/events/gt-c6-residency <==
> 	event=0x01
> 
> 	==> /sys/bus/event_source/devices/xe_0000_00_02.0/events/gt-c6-residency.unit <==
> 	ms
> 
> 	$ perf stat -e xe_0000_00_02.0/gt-c6-residency,gt=0/ -I1000
> 	#           time             counts unit events
> 	     1.001196056              1,001 ms   xe_0000_00_02.0/gt-c6-residency,gt=0/
> 	     2.005216219              1,003 ms   xe_0000_00_02.0/gt-c6-residency,gt=0/
> 
> Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
> Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
> ---
> 
> Besides the rebase, that changed a lot how the event was added,
> here is a summary of other changes:
> 
> - Use xe_pm_runtime_get_if_active() when reading
>   xe_gt_idle_residency_msec() as there's not guarantee it will not be
>   suspended anymore by the time it reads the counter
> 
> - Drop sample[] from the pmu struct and only use the prev/counter from
>   the perf_event struct. This avoids mixing the counter reported to 2
>   separate clients.
> 
> - Drop time ktime helpers and just use what's provided by
>   include/linux/ktime.h
> 
>  drivers/gpu/drm/xe/xe_pmu.c | 56 +++++++++++++++++++++++++++++++------
>  1 file changed, 48 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c
> index c2af82ec3f793..37df9d3cc110c 100644
> --- a/drivers/gpu/drm/xe/xe_pmu.c
> +++ b/drivers/gpu/drm/xe/xe_pmu.c
> @@ -11,6 +11,7 @@
>  #include "xe_device.h"
>  #include "xe_force_wake.h"
>  #include "xe_gt_clock.h"
> +#include "xe_gt_idle.h"
>  #include "xe_gt_printk.h"
>  #include "xe_mmio.h"
>  #include "xe_macros.h"
> @@ -117,16 +118,50 @@ static int xe_pmu_event_init(struct perf_event *event)
>  	return 0;
>  }
>  
> -static u64 __xe_pmu_event_read(struct perf_event *event)
> +static u64 read_gt_c6_residency(struct xe_pmu *pmu, struct xe_gt *gt, u64 prev)
>  {
> -	struct xe_device *xe = container_of(event->pmu, typeof(*xe), pmu.base);
> +	struct xe_device *xe = gt_to_xe(gt);
> +	unsigned long flags;
> +	ktime_t t0;
> +	s64 delta;
> +
> +	if (xe_pm_runtime_get_if_active(xe)) {
> +		u64 val = xe_gt_idle_residency_msec(&gt->gtidle);
> +
> +		xe_pm_runtime_put(xe);
> +
> +		return val;
> +	}
> +
> +	/*
> +	 * Estimate the idle residency by looking at the time the device was
> +	 * suspended: should be good enough as long as the sampling frequency is
> +	 * 2x or more than the suspend frequency.
> +	 */
> +	raw_spin_lock_irqsave(&pmu->lock, flags);
> +	t0 = pmu->suspend_timestamp[gt->info.id];
> +	raw_spin_unlock_irqrestore(&pmu->lock, flags);

I believe we should simply avoid the patch 4 and this block and use
xe_pm_runtime_get(xe) instead of the if_active variant.

I'm afraid this won't be a good estimative since this mix 2 different wakes
of 2 different IP blocks... the runtime pm is for the entire device, while
in runtime active the GTs can be sleeping in GT-C6.

or did we get some deadlock risk on that scenario?
perhaps we should get runtime pm in upper layers when using PMU?

> +
> +	delta = ktime_ms_delta(ktime_get(), t0);
> +
> +	return prev + delta;
> +}
> +
> +static u64 __xe_pmu_event_read(struct perf_event *event, u64 prev)
> +{
> +	struct xe_pmu *pmu = container_of(event->pmu, typeof(*pmu), base);
> +	struct xe_device *xe = container_of(pmu, typeof(*xe), pmu);
>  	struct xe_gt *gt = event_to_gt(event);
> -	u64 val = 0;
>  
>  	if (!gt)
> -		return 0;
> +		return prev;
> +
> +	switch (config_to_event_id(event->attr.config)) {
> +	case XE_PMU_EVENT_GT_C6_RESIDENCY:
> +		return read_gt_c6_residency(pmu, gt, prev);
> +	}
>  
> -	return val;
> +	return prev;
>  }
>  
>  static void xe_pmu_event_update(struct perf_event *event)
> @@ -136,10 +171,11 @@ static void xe_pmu_event_update(struct perf_event *event)
>  
>  	prev = local64_read(&hwc->prev_count);
>  	do {
> -		new = __xe_pmu_event_read(event);
> +		new = __xe_pmu_event_read(event, prev);
>  	} while (!local64_try_cmpxchg(&hwc->prev_count, &prev, new));
>  
> -	local64_add(new - prev, &event->count);
> +	if (new > prev)
> +		local64_add(new - prev, &event->count);
>  }
>  
>  static void xe_pmu_event_read(struct perf_event *event)
> @@ -162,7 +198,7 @@ static void xe_pmu_enable(struct perf_event *event)
>  	 * for all listeners. Even when the event was already enabled and has
>  	 * an existing non-zero value.
>  	 */
> -	local64_set(&event->hw.prev_count, __xe_pmu_event_read(event));
> +	local64_set(&event->hw.prev_count, __xe_pmu_event_read(event, 0));
>  }
>  
>  static void xe_pmu_event_start(struct perf_event *event, int flags)
> @@ -267,6 +303,10 @@ static const struct attribute_group pmu_events_attr_group = {
>  
>  static void set_supported_events(struct xe_pmu *pmu)
>  {
> +	struct xe_device *xe = container_of(pmu, typeof(*xe), pmu);
> +
> +	if (!xe->info.skip_guc_pc)
> +		pmu->supported_events |= BIT_ULL(XE_PMU_EVENT_GT_C6_RESIDENCY);
>  }
>  
>  /**
> -- 
> 2.48.0
>

next prev parent reply	other threads:[~2025-01-17 18:44 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-01-16 23:07 [PATCH v13 0/7] drm/xe/pmu: PMU interface for Xe Lucas De Marchi
2025-01-16 23:07 ` [PATCH v13 1/7] perf/core: Add PMU_EVENT_ATTR_ID_STRING Lucas De Marchi
2025-01-16 23:07 ` [PATCH v13 2/7] drm/xe/pmu: Enable PMU interface Lucas De Marchi
2025-01-17 18:30   ` Rodrigo Vivi
2025-01-17 22:19     ` Lucas De Marchi
2025-01-16 23:07 ` [PATCH v13 3/7] drm/xe/pmu: Assert max gt Lucas De Marchi
2025-01-17 18:31   ` Rodrigo Vivi
2025-01-16 23:07 ` [PATCH v13 4/7] drm/xe/pmu: Hook up gt suspend notification Lucas De Marchi
2025-01-16 23:07 ` [PATCH v13 5/7] drm/xe/pmu: Extract xe_pmu_event_update() Lucas De Marchi
2025-01-16 23:07 ` [PATCH v13 6/7] drm/xe/pmu: Add attribute skeleton Lucas De Marchi
2025-01-16 23:07 ` [PATCH v13 7/7] drm/xe/pmu: Add GT C6 events Lucas De Marchi
2025-01-17 18:44   ` Rodrigo Vivi [this message]
2025-01-17 22:40     ` Lucas De Marchi
2025-01-17 22:52       ` Rodrigo Vivi
2025-01-21  6:16   ` Riana Tauro
2025-01-21 20:38     ` Lucas De Marchi
2025-01-22  5:28       ` Riana Tauro
2025-01-17  1:45 ` ✓ CI.Patch_applied: success for drm/xe/pmu: PMU interface for Xe (rev13) Patchwork
2025-01-17  1:45 ` ✗ CI.checkpatch: warning " Patchwork
2025-01-17  1:46 ` ✓ CI.KUnit: success " Patchwork
2025-01-17  2:04 ` ✓ CI.Build: " Patchwork
2025-01-17  2:07 ` ✓ CI.Hooks: " Patchwork
2025-01-17  2:08 ` ✗ CI.checksparse: warning " Patchwork
2025-01-17  2:34 ` ✓ Xe.CI.BAT: success " Patchwork
2025-01-17  7:02 ` ✗ Xe.CI.Full: failure " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Z4qk8qv1_VyV8W80@intel.com \
    --to=rodrigo.vivi@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=lucas.demarchi@intel.com \
    --cc=peterz@infradead.org \
    --cc=vinay.belgaumkar@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.