Re: [PATCH v7 10/10] perf/x86/rapl: Add core energy counter support for AMD CPUs

public inbox for linux-perf-users@vger.kernel.org
 help / color / mirror / Atom feed

From: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
To: Koichiro Den <koichiro.den@canonical.com>
Cc: peterz@infradead.org, mingo@redhat.com, rui.zhang@intel.com,
	irogers@google.com, kan.liang@linux.intel.com,
	tglx@linutronix.de, bp@alien8.dei, gautham.shenoy@amd.com,
	kprateek.nayak@amd.com, ravi.bangoria@amd.com, x86@kernel.org,
	linux-perf-users@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: Re: [PATCH v7 10/10] perf/x86/rapl: Add core energy counter support for AMD CPUs
Date: Mon, 13 Jan 2025 12:04:43 +0530	[thread overview]
Message-ID: <98f6108a-3a4f-4ad1-8f0a-a03264f7a2d7@amd.com> (raw)
In-Reply-To: <uv7mz6vew2bzgre5jdpmwldxljp5djzmuiksqdcdwipfm4zm7w@ribobcretidk>

On 1/12/2025 7:12 PM, Koichiro Den wrote:
> On Fri, Nov 15, 2024 at 06:08:06AM GMT, Dhananjay Ugwekar wrote:
>> Add a new "power_core" PMU and "energy-core" event for monitoring
>> energy consumption by each individual core. The existing energy-cores
>> event aggregates the energy consumption of CPU cores at the package level.
>> This new event aligns with the AMD's per-core energy counters.
>>
>> Tested the package level and core level PMU counters with workloads
>> pinned to different CPUs.
>>
>> Results with workload pinned to CPU 4 in core 4 on an AMD Zen4 Genoa
>> machine:
>>
>> $ sudo perf stat --per-core -e power_core/energy-core/ -- taskset -c 4 stress-ng --matrix 1 --timeout 5s
>> stress-ng: info:  [21250] setting to a 5 second run per stressor
>> stress-ng: info:  [21250] dispatching hogs: 1 matrix
>> stress-ng: info:  [21250] successful run completed in 5.00s
>>
>>  Performance counter stats for 'system wide':
>>
>> S0-D0-C0              1               0.00 Joules power_core/energy-core/
>> S0-D0-C1              1               0.00 Joules power_core/energy-core/
>> S0-D0-C2              1               0.00 Joules power_core/energy-core/
>> S0-D0-C3              1               0.00 Joules power_core/energy-core/
>> S0-D0-C4              1               8.43 Joules power_core/energy-core/
>> S0-D0-C5              1               0.00 Joules power_core/energy-core/
>> S0-D0-C6              1               0.00 Joules power_core/energy-core/
>> S0-D0-C7              1               0.00 Joules power_core/energy-core/
>> S0-D1-C8              1               0.00 Joules power_core/energy-core/
>> S0-D1-C9              1               0.00 Joules power_core/energy-core/
>> S0-D1-C10             1               0.00 Joules power_core/energy-core/
>>
>> Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
>> Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
>> ---
>>  arch/x86/events/rapl.c | 185 +++++++++++++++++++++++++++++++++--------
>>  1 file changed, 152 insertions(+), 33 deletions(-)
>>
>> diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c
>> index 6e51386ff91f..e9be1f31163d 100644
>> --- a/arch/x86/events/rapl.c
>> +++ b/arch/x86/events/rapl.c
>> @@ -39,6 +39,10 @@
>>   *	  event: rapl_energy_psys
>>   *    perf code: 0x5
>>   *
>> + *  core counter: consumption of a single physical core
>> + *	  event: rapl_energy_core (power_core PMU)
>> + *    perf code: 0x1
>> + *
>>   * We manage those counters as free running (read-only). They may be
>>   * use simultaneously by other tools, such as turbostat.
>>   *
>> @@ -81,6 +85,10 @@ enum perf_rapl_pkg_events {
>>  	NR_RAPL_PKG_DOMAINS = PERF_RAPL_PKG_EVENTS_MAX,
>>  };
>>  
>> +#define PERF_RAPL_CORE			0		/* single core */
>> +#define PERF_RAPL_CORE_EVENTS_MAX	1
>> +#define NR_RAPL_CORE_DOMAINS		PERF_RAPL_CORE_EVENTS_MAX
>> +
>>  static const char *const rapl_pkg_domain_names[NR_RAPL_PKG_DOMAINS] __initconst = {
>>  	"pp0-core",
>>  	"package",
>> @@ -89,6 +97,8 @@ static const char *const rapl_pkg_domain_names[NR_RAPL_PKG_DOMAINS] __initconst
>>  	"psys",
>>  };
>>  
>> +static const char *const rapl_core_domain_name __initconst = "core";
>> +
>>  /*
>>   * event code: LSB 8 bits, passed in attr->config
>>   * any other bit is reserved
>> @@ -141,14 +151,18 @@ enum rapl_unit_quirk {
>>  
>>  struct rapl_model {
>>  	struct perf_msr *rapl_pkg_msrs;
>> +	struct perf_msr *rapl_core_msrs;
>>  	unsigned long	pkg_events;
>> +	unsigned long	core_events;
>>  	unsigned int	msr_power_unit;
>>  	enum rapl_unit_quirk	unit_quirk;
>>  };
>>  
>>   /* 1/2^hw_unit Joule */
>>  static int rapl_pkg_hw_unit[NR_RAPL_PKG_DOMAINS] __read_mostly;
>> +static int rapl_core_hw_unit __read_mostly;
>>  static struct rapl_pmus *rapl_pmus_pkg;
>> +static struct rapl_pmus *rapl_pmus_core;
>>  static u64 rapl_timer_ms;
>>  static struct rapl_model *rapl_model;
>>  
>> @@ -156,14 +170,23 @@ static struct rapl_model *rapl_model;
>>   * Helper function to get the correct topology id according to the
>>   * RAPL PMU scope.
>>   */
>> -static inline unsigned int get_rapl_pmu_idx(int cpu)
>> -{	/*
>> +static inline unsigned int get_rapl_pmu_idx(int cpu, int scope)
>> +{
>> +	/*
>>  	 * Returns unsigned int, which converts the '-1' return value
>>  	 * (for non-existent mappings in topology map) to UINT_MAX, so
>>  	 * the error check in the caller is simplified.
>>  	 */
>> -	return rapl_pkg_pmu_is_pkg_scope() ? topology_logical_package_id(cpu) :
>> -					     topology_logical_die_id(cpu);
>> +	switch (scope) {
>> +	case PERF_PMU_SCOPE_PKG:
>> +		return topology_logical_package_id(cpu);
>> +	case PERF_PMU_SCOPE_DIE:
>> +		return topology_logical_die_id(cpu);
>> +	case PERF_PMU_SCOPE_CORE:
>> +		return topology_logical_core_id(cpu);
>> +	default:
>> +		return -EINVAL;
>> +	}
>>  }
>>  
>>  static inline u64 rapl_read_counter(struct perf_event *event)
>> @@ -173,19 +196,20 @@ static inline u64 rapl_read_counter(struct perf_event *event)
>>  	return raw;
>>  }
>>  
>> -static inline u64 rapl_scale(u64 v, int cfg)
>> +static inline u64 rapl_scale(u64 v, struct perf_event *event)
>>  {
>> -	if (cfg > NR_RAPL_PKG_DOMAINS) {
>> -		pr_warn("Invalid domain %d, failed to scale data\n", cfg);
>> -		return v;
>> -	}
>> +	int hw_unit = rapl_pkg_hw_unit[event->hw.config - 1];
>> +
>> +	if (event->pmu->scope == PERF_PMU_SCOPE_CORE)
>> +		hw_unit = rapl_core_hw_unit;
>> +
>>  	/*
>>  	 * scale delta to smallest unit (1/2^32)
>>  	 * users must then scale back: count * 1/(1e9*2^32) to get Joules
>>  	 * or use ldexp(count, -32).
>>  	 * Watts = Joules/Time delta
>>  	 */
>> -	return v << (32 - rapl_pkg_hw_unit[cfg - 1]);
>> +	return v << (32 - hw_unit);
>>  }
>>  
>>  static u64 rapl_event_update(struct perf_event *event)
>> @@ -212,7 +236,7 @@ static u64 rapl_event_update(struct perf_event *event)
>>  	delta = (new_raw_count << shift) - (prev_raw_count << shift);
>>  	delta >>= shift;
>>  
>> -	sdelta = rapl_scale(delta, event->hw.config);
>> +	sdelta = rapl_scale(delta, event);
>>  
>>  	local64_add(sdelta, &event->count);
>>  
>> @@ -341,13 +365,14 @@ static void rapl_pmu_event_del(struct perf_event *event, int flags)
>>  static int rapl_pmu_event_init(struct perf_event *event)
>>  {
>>  	u64 cfg = event->attr.config & RAPL_EVENT_MASK;
>> -	int bit, ret = 0;
>> +	int bit, rapl_pmus_scope, ret = 0;
>>  	struct rapl_pmu *rapl_pmu;
>>  	unsigned int rapl_pmu_idx;
>> +	struct rapl_pmus *rapl_pmus;
>>  
>> -	/* only look at RAPL events */
>> -	if (event->attr.type != rapl_pmus_pkg->pmu.type)
>> -		return -ENOENT;
>> +	/* unsupported modes and filters */
>> +	if (event->attr.sample_period) /* no sampling */
>> +		return -EINVAL;
> 
> Hi Dhananjay,
> 
> On linux-next, since this commit, it seems that simple sampling with 'perf
> record -- <command>' (i.e. the default event), 'perf top' etc. can
> unexpectedly fail because rapl_pmu_event_init() now returns -EINVAL instead
> of -ENOENT even in such cases of a type mismatch. I observed that this
> prevents evsel__fallback() from falling back to cpu-clock or task-clock.
> 
> Should we reorder the checks in rapl_pmu_event_init() to allow an early
> return with -ENOENT in such cases, as shown below? I'm not very familiar
> with this area and I might be missing something. I'd appreciate it if you
> could share your thoughts.
> 
> --- a/arch/x86/events/rapl.c
> +++ b/arch/x86/events/rapl.c
> @@ -370,17 +370,6 @@ static int rapl_pmu_event_init(struct perf_event *event)
>         unsigned int rapl_pmu_idx;
>         struct rapl_pmus *rapl_pmus;
> 
> -       /* unsupported modes and filters */
> -       if (event->attr.sample_period) /* no sampling */
> -               return -EINVAL;
> -
> -       /* check only supported bits are set */
> -       if (event->attr.config & ~RAPL_EVENT_MASK)
> -               return -EINVAL;
> -
> -       if (event->cpu < 0)
> -               return -EINVAL;
> -
>         rapl_pmus = container_of(event->pmu, struct rapl_pmus, pmu);
>         if (!rapl_pmus)
>                 return -EINVAL;
> @@ -411,6 +400,17 @@ static int rapl_pmu_event_init(struct perf_event *event)
>         } else
>                 return -EINVAL;
> 
> +       /* unsupported modes and filters */
> +       if (event->attr.sample_period) /* no sampling */
> +               return -EINVAL;
> +
> +       /* check only supported bits are set */
> +       if (event->attr.config & ~RAPL_EVENT_MASK)
> +               return -EINVAL;
> +
> +       if (event->cpu < 0)
> +               return -EINVAL;
> +
>         /* check event supported */
>         if (!(rapl_pmus->cntr_mask & (1 << bit)))
>                 return -EINVAL;
> 
> Thanks.
> 
> -Koichiro

Hello Koichiro,

I tried reproducing the issue you mentioned using "sudo perf record -- sleep 2" and 
"sudo perf top" commands on an AMD EPYC system, the commands worked successfully. 
Can you please mention which system and which exact commands you're 
running that reproduced the issue?

My analysis is, if we are running "perf record/top" with the default event, we would 
not enter the rapl_pmu_event_init() function, which renders the reordering of the type 
checks irrelevant. Regardless, please let me know how I can reproduce the issue.

Thanks,
Dhananjay

next prev parent reply	other threads:[~2025-01-13  6:34 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-11-15  6:07 [PATCH v7 00/10] Add RAPL core energy counter support for AMD CPUs Dhananjay Ugwekar
2024-11-15  6:07 ` [PATCH v7 01/10] perf/x86/rapl: Remove the unused get_rapl_pmu_cpumask() function Dhananjay Ugwekar
2024-11-15  6:07 ` [PATCH v7 02/10] x86/topology: Introduce topology_logical_core_id() Dhananjay Ugwekar
2024-11-15  6:07 ` [PATCH v7 03/10] perf/x86/rapl: Remove the cpu_to_rapl_pmu() function Dhananjay Ugwekar
2024-11-15  6:08 ` [PATCH v7 04/10] perf/x86/rapl: Rename rapl_pmu variables Dhananjay Ugwekar
2024-11-15  6:08 ` [PATCH v7 05/10] perf/x86/rapl: Make rapl_model struct global Dhananjay Ugwekar
2024-11-15  6:08 ` [PATCH v7 06/10] perf/x86/rapl: Add arguments to the init and cleanup functions Dhananjay Ugwekar
2024-11-15  6:08 ` [PATCH v7 07/10] perf/x86/rapl: Modify the generic variable names to *_pkg* Dhananjay Ugwekar
2024-11-15  6:08 ` [PATCH v7 08/10] perf/x86/rapl: Remove the global variable rapl_msrs Dhananjay Ugwekar
2024-11-15  6:08 ` [PATCH v7 09/10] perf/x86/rapl: Move the cntr_mask to rapl_pmus struct Dhananjay Ugwekar
2024-11-15  6:08 ` [PATCH v7 10/10] perf/x86/rapl: Add core energy counter support for AMD CPUs Dhananjay Ugwekar
2024-11-20  7:58   ` Peter Jung
2024-11-20  8:18     ` Peter Jung
2024-11-20 13:58     ` Dhananjay Ugwekar
2024-11-20 14:30       ` Peter Jung
2024-11-21 15:58         ` Dhananjay Ugwekar
2025-01-12 13:42   ` Koichiro Den
2025-01-13  6:34     ` Dhananjay Ugwekar [this message]
2025-01-15 14:23       ` Koichiro Den
2025-01-20 11:42         ` Dhananjay Ugwekar
2025-01-28  8:11           ` Dhananjay Ugwekar
2025-01-29  3:03             ` Koichiro Den
2025-01-29  5:34               ` Dhananjay Ugwekar
2024-11-19 12:20 ` [PATCH v7 00/10] Add RAPL " Peter Zijlstra
2024-11-22  6:34   ` Dhananjay Ugwekar
2024-11-22 11:19     ` Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=98f6108a-3a4f-4ad1-8f0a-a03264f7a2d7@amd.com \
    --to=dhananjay.ugwekar@amd.com \
    --cc=bp@alien8.dei \
    --cc=gautham.shenoy@amd.com \
    --cc=irogers@google.com \
    --cc=kan.liang@linux.intel.com \
    --cc=koichiro.den@canonical.com \
    --cc=kprateek.nayak@amd.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=ravi.bangoria@amd.com \
    --cc=rui.zhang@intel.com \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox