All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mario Limonciello <superm1@kernel.org>
To: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>,
	"Gautham R . Shenoy" <gautham.shenoy@amd.com>,
	Perry Yuan <perry.yuan@amd.com>
Cc: "open list:X86 ARCHITECTURE (32-BIT AND 64-BIT)"
	<linux-kernel@vger.kernel.org>,
	"open list:CPU FREQUENCY SCALING FRAMEWORK"
	<linux-pm@vger.kernel.org>,
	Mario Limonciello <mario.limonciello@amd.com>
Subject: Re: [PATCH v3 04/18] cpufreq/amd-pstate: Move perf values into a union
Date: Mon, 24 Feb 2025 18:29:05 -0600	[thread overview]
Message-ID: <9db9da8f-859d-4e23-94ca-e14905c8c6c7@kernel.org> (raw)
In-Reply-To: <ccac287d-5bde-4b0d-a1d6-b1e8b5f4e6cb@amd.com>

On 2/19/2025 04:57, Dhananjay Ugwekar wrote:
> On 2/18/2025 3:36 AM, Mario Limonciello wrote:
>> From: Mario Limonciello <mario.limonciello@amd.com>
>>
>> By storing perf values in a union all the writes and reads can
>> be done atomically, removing the need for some concurrency protections.
>>
>> While making this change, also drop the cached frequency values,
>> using inline helpers to calculate them on demand from perf value.
>>
>> Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
>> Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
>> ---
>> v3:
>>   * Pick up tag
>> v2:
>>   * cache perf variable in unit tests
>>   * Drop unnecessary check from amd_pstate_update_min_max_limit()
>>   * Consistency with READ_ONCE()
>>   * Drop unneeded policy checks
>>   * add kdoc
>> ---
>>   drivers/cpufreq/amd-pstate-ut.c |  18 +--
>>   drivers/cpufreq/amd-pstate.c    | 195 ++++++++++++++++++--------------
>>   drivers/cpufreq/amd-pstate.h    |  49 +++++---
>>   3 files changed, 151 insertions(+), 111 deletions(-)
>>
>> diff --git a/drivers/cpufreq/amd-pstate-ut.c b/drivers/cpufreq/amd-pstate-ut.c
>> index 445278cf40b61..ba3e06f349c6d 100644
>> --- a/drivers/cpufreq/amd-pstate-ut.c
>> +++ b/drivers/cpufreq/amd-pstate-ut.c
>> @@ -129,6 +129,7 @@ static void amd_pstate_ut_check_perf(u32 index)
>>   	struct cppc_perf_caps cppc_perf;
>>   	struct cpufreq_policy *policy = NULL;
>>   	struct amd_cpudata *cpudata = NULL;
>> +	union perf_cached cur_perf;
>>   
>>   	for_each_possible_cpu(cpu) {
>>   		policy = cpufreq_cpu_get(cpu);
>> @@ -162,19 +163,20 @@ static void amd_pstate_ut_check_perf(u32 index)
>>   			lowest_perf = AMD_CPPC_LOWEST_PERF(cap1);
>>   		}
>>   
>> -		if (highest_perf != READ_ONCE(cpudata->highest_perf) && !cpudata->hw_prefcore) {
>> +		cur_perf = READ_ONCE(cpudata->perf);
>> +		if (highest_perf != cur_perf.highest_perf && !cpudata->hw_prefcore) {
>>   			pr_err("%s cpu%d highest=%d %d highest perf doesn't match\n",
>> -				__func__, cpu, highest_perf, cpudata->highest_perf);
>> +				__func__, cpu, highest_perf, cpudata->perf.highest_perf);
> 						  Can we use cur_perf.highest_perf here ?

Ack.

> 
>>   			goto skip_test;
>>   		}
>> -		if ((nominal_perf != READ_ONCE(cpudata->nominal_perf)) ||
>> -			(lowest_nonlinear_perf != READ_ONCE(cpudata->lowest_nonlinear_perf)) ||
>> -			(lowest_perf != READ_ONCE(cpudata->lowest_perf))) {
>> +		if (nominal_perf != cur_perf.nominal_perf ||
>> +		   (lowest_nonlinear_perf != cur_perf.lowest_nonlinear_perf) ||
>> +		   (lowest_perf != cur_perf.lowest_perf)) {
>>   			amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
>>   			pr_err("%s cpu%d nominal=%d %d lowest_nonlinear=%d %d lowest=%d %d, they should be equal!\n",
>> -				__func__, cpu, nominal_perf, cpudata->nominal_perf,
>> -				lowest_nonlinear_perf, cpudata->lowest_nonlinear_perf,
>> -				lowest_perf, cpudata->lowest_perf);
>> +				__func__, cpu, nominal_perf, cpudata->perf.nominal_perf,
>> +				lowest_nonlinear_perf, cpudata->perf.lowest_nonlinear_perf,
>> +				lowest_perf, cpudata->perf.lowest_perf);
> 			          Can we use cur_perf.(nominal/lowest_nonlinear/lowest)_perf here as well ?			

Ack.
			
> 
>>   			goto skip_test;
>>   		}
>>
> [Snip]
>> @@ -888,25 +896,24 @@ static u32 amd_pstate_get_transition_latency(unsigned int cpu)
>>   }
>>   
>>   /*
>> - * amd_pstate_init_freq: Initialize the max_freq, min_freq,
>> - *                       nominal_freq and lowest_nonlinear_freq for
>> - *                       the @cpudata object.
>> + * amd_pstate_init_freq: Initialize the nominal_freq and lowest_nonlinear_freq
>> + *			 for the @cpudata object.
>>    *
>> - *  Requires: highest_perf, lowest_perf, nominal_perf and
>> - *            lowest_nonlinear_perf members of @cpudata to be
>> - *            initialized.
>> + * Requires: all perf members of @cpudata to be initialized.
>>    *
>> - *  Returns 0 on success, non-zero value on failure.
>> + * Returns 0 on success, non-zero value on failure.
>>    */
>>   static int amd_pstate_init_freq(struct amd_cpudata *cpudata)
>>   {
>> -	int ret;
>>   	u32 min_freq, nominal_freq, lowest_nonlinear_freq;
>>   	struct cppc_perf_caps cppc_perf;
>> +	union perf_cached perf;
>> +	int ret;
>>   
>>   	ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
>>   	if (ret)
>>   		return ret;
>> +	perf = READ_ONCE(cpudata->perf);
>>   
>>   	if (quirks && quirks->nominal_freq)
>>   		nominal_freq = quirks->nominal_freq;
>> @@ -918,6 +925,7 @@ static int amd_pstate_init_freq(struct amd_cpudata *cpudata)
>>   
>>   	if (quirks && quirks->lowest_freq) {
>>   		min_freq = quirks->lowest_freq;
>> +		perf.lowest_perf = freq_to_perf(perf, nominal_freq, min_freq);
> 
> I think we forgot to write back this value to the cpudata->perf variable

Ack, great catch.

> 
>>   	} else
>>   		min_freq = cppc_perf.lowest_freq;
>>   	min_freq *= 1000;
>> @@ -934,7 +942,7 @@ static int amd_pstate_init_freq(struct amd_cpudata *cpudata)
>>   		return -EINVAL;
>>   	}
>>   
>> -	lowest_nonlinear_freq = perf_to_freq(cpudata, cpudata->lowest_nonlinear_perf);
>> +	lowest_nonlinear_freq = perf_to_freq(perf, nominal_freq, perf.lowest_nonlinear_perf);
>>   	WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq);
>>   
>>   	if (lowest_nonlinear_freq <= min_freq || lowest_nonlinear_freq > nominal_freq) {
> [Snip]
>> diff --git a/drivers/cpufreq/amd-pstate.h b/drivers/cpufreq/amd-pstate.h
>> index 0149933692458..8421c83c07919 100644
>> --- a/drivers/cpufreq/amd-pstate.h
>> +++ b/drivers/cpufreq/amd-pstate.h
>> @@ -13,6 +13,34 @@
>>   /*********************************************************************
>>    *                        AMD P-state INTERFACE                       *
>>    *********************************************************************/
>> +
>> +/**
>> + * union perf_cached - A union to cache performance-related data.
>> + * @highest_perf: the maximum performance an individual processor may reach,
>> + *		  assuming ideal conditions
>> + *		  For platforms that do not support the preferred core feature, the
>> + *		  highest_pef may be configured with 166 or 255, to avoid max frequency
> 
> s/highest_pef/highest_perf/
> 
> Also I think this statement is bit confusing, how about,
> 
> "For platforms that support the preferred core feature, the highest_perf value maybe
> configured to any value in the range 166-256 by the firmware (because the preferred
> core ranking is encoded in the highest_perf value). To maintain consistency across
> all platforms, we split the highest_perf and preferred core ranking values into
> cpudata->perf.highest_perf and cpudata->prefcore_ranking."

I like it, thanks.

> 
>> + *		  calculated wrongly. we take the fixed value as the highest_perf.
>> + * @nominal_perf: the maximum sustained performance level of the processor,
>> + *		  assuming ideal operating conditions
>> + * @lowest_nonlinear_perf: the lowest performance level at which nonlinear power
>> + *			   savings are achieved
>> + * @lowest_perf: the absolute lowest performance level of the processor
>> + * @min_limit_perf: Cached value of the performance corresponding to policy->min
>> + * @max_limit_perf: Cached value of the performance corresponding to policy->max
>> + */
>> +union perf_cached {
>> +	struct {
>> +		u8	highest_perf;
>> +		u8	nominal_perf;
>> +		u8	lowest_nonlinear_perf;
>> +		u8	lowest_perf;
>> +		u8	min_limit_perf;
>> +		u8	max_limit_perf;
> 
> Just a thought, how about adding the "u8 desired_perf" (last requested) and "u8 prefcore_ranking"
> in this. We can pursue it as a separate patch if you want.
> 
> I think there is value in adding desired_perf atleast, so that when we are caching the
> min, max limits in the perf_cached variable, desired perf level is also atomically
> updated into the shared cpudata structure.

Can you see if there is any performance advantage to caching these?
If there is, can you please do a follow up to my v5 series?

It's going to mean another write in amd_pstate_update() potentially.

> 
> Thanks,
> Dhananjay
> 
>> +	};
>> +	u64	val;
>> +};
>> +
>>   /**
>>    * struct  amd_aperf_mperf
>>    * @aperf: actual performance frequency clock count
>> @@ -30,20 +58,9 @@ struct amd_aperf_mperf {
>>    * @cpu: CPU number
>>    * @req: constraint request to apply
>>    * @cppc_req_cached: cached performance request hints
>> - * @highest_perf: the maximum performance an individual processor may reach,
>> - *		  assuming ideal conditions
>> - *		  For platforms that do not support the preferred core feature, the
>> - *		  highest_pef may be configured with 166 or 255, to avoid max frequency
>> - *		  calculated wrongly. we take the fixed value as the highest_perf.
>> - * @nominal_perf: the maximum sustained performance level of the processor,
>> - *		  assuming ideal operating conditions
>> - * @lowest_nonlinear_perf: the lowest performance level at which nonlinear power
>> - *			   savings are achieved
>> - * @lowest_perf: the absolute lowest performance level of the processor
>> + * @perf: cached performance-related data
>>    * @prefcore_ranking: the preferred core ranking, the higher value indicates a higher
>>    * 		  priority.
>> - * @min_limit_perf: Cached value of the performance corresponding to policy->min
>> - * @max_limit_perf: Cached value of the performance corresponding to policy->max
>>    * @min_limit_freq: Cached value of policy->min (in khz)
>>    * @max_limit_freq: Cached value of policy->max (in khz)
>>    * @nominal_freq: the frequency (in khz) that mapped to nominal_perf
>> @@ -68,13 +85,9 @@ struct amd_cpudata {
>>   	struct	freq_qos_request req[2];
>>   	u64	cppc_req_cached;
>>   
>> -	u8	highest_perf;
>> -	u8	nominal_perf;
>> -	u8	lowest_nonlinear_perf;
>> -	u8	lowest_perf;
>> +	union perf_cached perf;
>> +
>>   	u8	prefcore_ranking;
>> -	u8	min_limit_perf;
>> -	u8	max_limit_perf;
>>   	u32	min_limit_freq;
>>   	u32	max_limit_freq;
>>   	u32	nominal_freq;
> 


  reply	other threads:[~2025-02-25  2:35 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-02-17 22:06 [PATCH v3 00/18] amd-pstate cleanups Mario Limonciello
2025-02-17 22:06 ` [PATCH v3 01/18] cpufreq/amd-pstate: Invalidate cppc_req_cached during suspend Mario Limonciello
2025-02-19  5:24   ` Gautham R. Shenoy
2025-02-19 17:21     ` Mario Limonciello
2025-02-19  6:12   ` Dhananjay Ugwekar
2025-02-19  6:37     ` Dhananjay Ugwekar
2025-02-17 22:06 ` [PATCH v3 02/18] cpufreq/amd-pstate: Show a warning when a CPU fails to setup Mario Limonciello
2025-02-19  6:14   ` Dhananjay Ugwekar
2025-02-17 22:06 ` [PATCH v3 03/18] cpufreq/amd-pstate: Drop min and max cached frequencies Mario Limonciello
2025-02-19  5:25   ` Gautham R. Shenoy
2025-02-19  8:00   ` Dhananjay Ugwekar
2025-02-19 17:29     ` Mario Limonciello
2025-02-17 22:06 ` [PATCH v3 04/18] cpufreq/amd-pstate: Move perf values into a union Mario Limonciello
2025-02-19 10:57   ` Dhananjay Ugwekar
2025-02-25  0:29     ` Mario Limonciello [this message]
2025-02-25  4:28       ` Dhananjay Ugwekar
2025-02-17 22:06 ` [PATCH v3 05/18] cpufreq/amd-pstate: Overhaul locking Mario Limonciello
2025-02-17 22:06 ` [PATCH v3 06/18] cpufreq/amd-pstate: Drop `cppc_cap1_cached` Mario Limonciello
2025-02-17 22:06 ` [PATCH v3 07/18] cpufreq/amd-pstate-ut: Use _free macro to free put policy Mario Limonciello
2025-02-17 22:06 ` [PATCH v3 08/18] cpufreq/amd-pstate-ut: Allow lowest nonlinear and lowest to be the same Mario Limonciello
2025-02-17 22:06 ` [PATCH v3 09/18] cpufreq/amd-pstate-ut: Drop SUCCESS and FAIL enums Mario Limonciello
2025-02-17 22:06 ` [PATCH v3 10/18] cpufreq/amd-pstate-ut: Run on all of the correct CPUs Mario Limonciello
2025-02-19  5:26   ` Gautham R. Shenoy
2025-02-17 22:07 ` [PATCH v3 11/18] cpufreq/amd-pstate-ut: Adjust variable scope for amd_pstate_ut_check_freq() Mario Limonciello
2025-02-24  6:12   ` Dhananjay Ugwekar
2025-02-17 22:07 ` [PATCH v3 12/18] cpufreq/amd-pstate: Replace all AMD_CPPC_* macros with masks Mario Limonciello
2025-02-17 22:07 ` [PATCH v3 13/18] cpufreq/amd-pstate: Cache CPPC request in shared mem case too Mario Limonciello
2025-02-17 22:07 ` [PATCH v3 14/18] cpufreq/amd-pstate: Move all EPP tracing into *_update_perf and *_set_epp functions Mario Limonciello
2025-02-17 22:07 ` [PATCH v3 15/18] cpufreq/amd-pstate: Update cppc_req_cached for shared mem EPP writes Mario Limonciello
2025-02-17 22:07 ` [PATCH v3 16/18] cpufreq/amd-pstate: Drop debug statements for policy setting Mario Limonciello
2025-02-17 22:07 ` [PATCH v3 17/18] cpufreq/amd-pstate: Rework CPPC enabling Mario Limonciello
2025-02-19 15:25   ` Gautham R. Shenoy
2025-02-19 18:05     ` Mario Limonciello
2025-02-17 22:07 ` [PATCH v3 18/18] cpufreq/amd-pstate: Stop caching EPP Mario Limonciello
2025-02-19 15:41   ` Gautham R. Shenoy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=9db9da8f-859d-4e23-94ca-e14905c8c6c7@kernel.org \
    --to=superm1@kernel.org \
    --cc=Dhananjay.Ugwekar@amd.com \
    --cc=gautham.shenoy@amd.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-pm@vger.kernel.org \
    --cc=mario.limonciello@amd.com \
    --cc=perry.yuan@amd.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.