Linux Power Management development
 help / color / mirror / Atom feed
* Re: [PATCH] thermal/of: Move OF code where it belongs to
From: Rafael J. Wysocki @ 2026-04-07 17:10 UTC (permalink / raw)
  To: Daniel Lezcano
  Cc: rafael, Daniel Lezcano, Zhang Rui, Lukasz Luba, open list:THERMAL,
	open list
In-Reply-To: <20260407155110.3920703-1-daniel.lezcano@kernel.org>

On Tue, Apr 7, 2026 at 5:51 PM Daniel Lezcano <daniel.lezcano@kernel.org> wrote:
>
> From: Daniel Lezcano <daniel.lezcano@oss.qualcomm.com>
>
> The functions:
>  - thermal_of_cooling_device_register()
>  - devm_thermal_of_cooling_device_register()
>
>  are related to thermal-of but they are implemented in
>  thermal-core. Move these functions to the right file.
>
> Pure move patch.
>
> No functional change intended.
>
> Signed-off-by: Daniel Lezcano <daniel.lezcano@oss.qualcomm.com>
> Signed-off-by: Daniel Lezcano <daniel.lezcano@kernel.org>
> ---
>  drivers/thermal/thermal_core.c | 75 +---------------------------------
>  drivers/thermal/thermal_core.h |  5 +++
>  drivers/thermal/thermal_of.c   | 72 ++++++++++++++++++++++++++++++++
>  3 files changed, 78 insertions(+), 74 deletions(-)
>
> diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
> index b7d706ed7ed9..f0049cff1128 100644
> --- a/drivers/thermal/thermal_core.c
> +++ b/drivers/thermal/thermal_core.c
> @@ -1054,7 +1054,7 @@ static void thermal_cooling_device_init_complete(struct thermal_cooling_device *
>   * Return: a pointer to the created struct thermal_cooling_device or an
>   * ERR_PTR. Caller must check return value with IS_ERR*() helpers.
>   */
> -static struct thermal_cooling_device *
> +struct thermal_cooling_device *
>  __thermal_cooling_device_register(struct device_node *np,
>                                   const char *type, void *devdata,
>                                   const struct thermal_cooling_device_ops *ops)
> @@ -1162,79 +1162,6 @@ thermal_cooling_device_register(const char *type, void *devdata,
>  }
>  EXPORT_SYMBOL_GPL(thermal_cooling_device_register);
>
> -/**
> - * thermal_of_cooling_device_register() - register an OF thermal cooling device
> - * @np:                a pointer to a device tree node.
> - * @type:      the thermal cooling device type.
> - * @devdata:   device private data.
> - * @ops:               standard thermal cooling devices callbacks.
> - *
> - * This function will register a cooling device with device tree node reference.
> - * This interface function adds a new thermal cooling device (fan/processor/...)
> - * to /sys/class/thermal/ folder as cooling_device[0-*]. It tries to bind itself
> - * to all the thermal zone devices registered at the same time.
> - *
> - * Return: a pointer to the created struct thermal_cooling_device or an
> - * ERR_PTR. Caller must check return value with IS_ERR*() helpers.
> - */
> -struct thermal_cooling_device *
> -thermal_of_cooling_device_register(struct device_node *np,
> -                                  const char *type, void *devdata,
> -                                  const struct thermal_cooling_device_ops *ops)
> -{
> -       return __thermal_cooling_device_register(np, type, devdata, ops);
> -}
> -EXPORT_SYMBOL_GPL(thermal_of_cooling_device_register);
> -
> -static void thermal_cooling_device_release(struct device *dev, void *res)
> -{
> -       thermal_cooling_device_unregister(
> -                               *(struct thermal_cooling_device **)res);
> -}
> -
> -/**
> - * devm_thermal_of_cooling_device_register() - register an OF thermal cooling
> - *                                            device
> - * @dev:       a valid struct device pointer of a sensor device.
> - * @np:                a pointer to a device tree node.
> - * @type:      the thermal cooling device type.
> - * @devdata:   device private data.
> - * @ops:       standard thermal cooling devices callbacks.
> - *
> - * This function will register a cooling device with device tree node reference.
> - * This interface function adds a new thermal cooling device (fan/processor/...)
> - * to /sys/class/thermal/ folder as cooling_device[0-*]. It tries to bind itself
> - * to all the thermal zone devices registered at the same time.
> - *
> - * Return: a pointer to the created struct thermal_cooling_device or an
> - * ERR_PTR. Caller must check return value with IS_ERR*() helpers.
> - */
> -struct thermal_cooling_device *
> -devm_thermal_of_cooling_device_register(struct device *dev,
> -                               struct device_node *np,
> -                               const char *type, void *devdata,
> -                               const struct thermal_cooling_device_ops *ops)
> -{
> -       struct thermal_cooling_device **ptr, *tcd;
> -
> -       ptr = devres_alloc(thermal_cooling_device_release, sizeof(*ptr),
> -                          GFP_KERNEL);
> -       if (!ptr)
> -               return ERR_PTR(-ENOMEM);
> -
> -       tcd = __thermal_cooling_device_register(np, type, devdata, ops);
> -       if (IS_ERR(tcd)) {
> -               devres_free(ptr);
> -               return tcd;
> -       }
> -
> -       *ptr = tcd;
> -       devres_add(dev, ptr);
> -
> -       return tcd;
> -}
> -EXPORT_SYMBOL_GPL(devm_thermal_of_cooling_device_register);
> -
>  static bool thermal_cooling_device_present(struct thermal_cooling_device *cdev)
>  {
>         struct thermal_cooling_device *pos = NULL;
> diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h
> index d3acff602f9c..bdd59947b24f 100644
> --- a/drivers/thermal/thermal_core.h
> +++ b/drivers/thermal/thermal_core.h
> @@ -269,6 +269,11 @@ void thermal_zone_device_critical_shutdown(struct thermal_zone_device *tz);
>  void thermal_governor_update_tz(struct thermal_zone_device *tz,
>                                 enum thermal_notify_event reason);
>
> +struct thermal_cooling_device *
> +__thermal_cooling_device_register(struct device_node *np,
> +                                 const char *type, void *devdata,
> +                                 const struct thermal_cooling_device_ops *ops);
> +
>  /* Helpers */
>  #define for_each_trip_desc(__tz, __td) \
>         for (__td = __tz->trips; __td - __tz->trips < __tz->num_trips; __td++)
> diff --git a/drivers/thermal/thermal_of.c b/drivers/thermal/thermal_of.c
> index 99085c806a1f..398157e740fc 100644
> --- a/drivers/thermal/thermal_of.c
> +++ b/drivers/thermal/thermal_of.c
> @@ -510,3 +510,75 @@ void devm_thermal_of_zone_unregister(struct device *dev, struct thermal_zone_dev
>                                devm_thermal_of_zone_match, tz));
>  }
>  EXPORT_SYMBOL_GPL(devm_thermal_of_zone_unregister);
> +
> +/**
> + * thermal_of_cooling_device_register() - register an OF thermal cooling device
> + * @np:                a pointer to a device tree node.
> + * @type:      the thermal cooling device type.
> + * @devdata:   device private data.
> + * @ops:               standard thermal cooling devices callbacks.
> + *
> + * This function will register a cooling device with device tree node reference.
> + * This interface function adds a new thermal cooling device (fan/processor/...)
> + * to /sys/class/thermal/ folder as cooling_device[0-*]. It tries to bind itself
> + * to all the thermal zone devices registered at the same time.
> + *
> + * Return: a pointer to the created struct thermal_cooling_device or an
> + * ERR_PTR. Caller must check return value with IS_ERR*() helpers.
> + */
> +struct thermal_cooling_device *
> +thermal_of_cooling_device_register(struct device_node *np,
> +                                  const char *type, void *devdata,
> +                                  const struct thermal_cooling_device_ops *ops)
> +{
> +       return __thermal_cooling_device_register(np, type, devdata, ops);
> +}
> +EXPORT_SYMBOL_GPL(thermal_of_cooling_device_register);
> +
> +static void thermal_cooling_device_release(struct device *dev, void *res)
> +{
> +       thermal_cooling_device_unregister(*(struct thermal_cooling_device **)res);
> +}
> +
> +/**
> + * devm_thermal_of_cooling_device_register() - register an OF thermal cooling
> + *                                            device
> + * @dev:       a valid struct device pointer of a sensor device.
> + * @np:                a pointer to a device tree node.
> + * @type:      the thermal cooling device type.
> + * @devdata:   device private data.
> + * @ops:       standard thermal cooling devices callbacks.
> + *
> + * This function will register a cooling device with device tree node reference.
> + * This interface function adds a new thermal cooling device (fan/processor/...)
> + * to /sys/class/thermal/ folder as cooling_device[0-*]. It tries to bind itself
> + * to all the thermal zone devices registered at the same time.
> + *
> + * Return: a pointer to the created struct thermal_cooling_device or an
> + * ERR_PTR. Caller must check return value with IS_ERR*() helpers.
> + */
> +struct thermal_cooling_device *
> +devm_thermal_of_cooling_device_register(struct device *dev,
> +                                       struct device_node *np,
> +                                       const char *type, void *devdata,
> +                                       const struct thermal_cooling_device_ops *ops)
> +{
> +       struct thermal_cooling_device **ptr, *tcd;
> +
> +       ptr = devres_alloc(thermal_cooling_device_release, sizeof(*ptr),
> +                          GFP_KERNEL);
> +       if (!ptr)
> +               return ERR_PTR(-ENOMEM);
> +
> +       tcd = __thermal_cooling_device_register(np, type, devdata, ops);
> +       if (IS_ERR(tcd)) {
> +               devres_free(ptr);
> +               return tcd;
> +       }
> +
> +       *ptr = tcd;
> +       devres_add(dev, ptr);
> +
> +       return tcd;
> +}
> +EXPORT_SYMBOL_GPL(devm_thermal_of_cooling_device_register);
> --

Applied as 7.1 material, thanks!

^ permalink raw reply

* Re: [PATCH] cpufreq/amd-pstate: Add POWER_SUPPLY dependency for dynamic EPP
From: K Prateek Nayak @ 2026-04-07 17:08 UTC (permalink / raw)
  To: Mario Limonciello
  Cc: Perry Yuan, open list:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	open list:CPU FREQUENCY SCALING FRAMEWORK, kernel test robot
In-Reply-To: <20260407145458.163727-1-mario.limonciello@amd.com>

Hello Mario,

On 4/7/2026 8:24 PM, Mario Limonciello wrote:
>  config X86_AMD_PSTATE_DYNAMIC_EPP
>  	bool "AMD Processor P-State dynamic EPP support"
> -	depends on X86_AMD_PSTATE
> +	depends on X86_AMD_PSTATE && POWER_SUPPLY

This config only controls the default selection right? The offending
power_supply_{reg,unreg}_notifier() isn't guarded behind
#ifdef CONFIG_X86_AMD_PSTATE_DYNAMIC_EPP so we'll still run into this.

I thing X86_AMD_PSTATE doing a "select POWER_SUPPLY" should cure this.
Thoughts?

>  	default n
>  	help
>  	  Allow the kernel to dynamically change the energy performance

-- 
Thanks and Regards,
Prateek


^ permalink raw reply

* Re: [PATCH] cpufreq/amd-pstate: Add POWER_SUPPLY dependency for dynamic EPP
From: Mario Limonciello @ 2026-04-07 17:09 UTC (permalink / raw)
  To: K Prateek Nayak
  Cc: Perry Yuan, open list:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	open list:CPU FREQUENCY SCALING FRAMEWORK, kernel test robot
In-Reply-To: <50b3313d-aa85-4cb8-801a-e40e3168383a@amd.com>



On 4/7/26 12:08, K Prateek Nayak wrote:
> Hello Mario,
> 
> On 4/7/2026 8:24 PM, Mario Limonciello wrote:
>>   config X86_AMD_PSTATE_DYNAMIC_EPP
>>   	bool "AMD Processor P-State dynamic EPP support"
>> -	depends on X86_AMD_PSTATE
>> +	depends on X86_AMD_PSTATE && POWER_SUPPLY
> 
> This config only controls the default selection right? The offending
> power_supply_{reg,unreg}_notifier() isn't guarded behind
> #ifdef CONFIG_X86_AMD_PSTATE_DYNAMIC_EPP so we'll still run into this.
> 
> I thing X86_AMD_PSTATE doing a "select POWER_SUPPLY" should cure this.
> Thoughts?
> 

Oh that's a really good point, thanks.  I'll send another patch as a 
follow up.


^ permalink raw reply

* Re: [PATCH] cpufreq/amd-pstate: Add POWER_SUPPLY dependency for dynamic EPP
From: Rafael J. Wysocki @ 2026-04-07 17:07 UTC (permalink / raw)
  To: Mario Limonciello
  Cc: K Prateek Nayak, Perry Yuan,
	open list:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	open list:CPU FREQUENCY SCALING FRAMEWORK, kernel test robot
In-Reply-To: <20260407145458.163727-1-mario.limonciello@amd.com>

On Tue, Apr 7, 2026 at 4:55 PM Mario Limonciello
<mario.limonciello@amd.com> wrote:
>
> The dynamic EPP feature uses power_supply_reg_notifier() and
> power_supply_unreg_notifier() but doesn't declare a dependency on
> POWER_SUPPLY, causing linker errors when POWER_SUPPLY is not enabled.
>
> Add POWER_SUPPLY to the depends line to fix the build issue.
>
> Reported-by: kernel test robot <lkp@intel.com>
> Closes: https://lore.kernel.org/oe-kbuild-all/202604040742.ySEdkuAa-lkp@intel.com/
> Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
> ---
>  drivers/cpufreq/Kconfig.x86 | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86
> index a0dbb9808ae9..53e2702e6f1c 100644
> --- a/drivers/cpufreq/Kconfig.x86
> +++ b/drivers/cpufreq/Kconfig.x86
> @@ -71,7 +71,7 @@ config X86_AMD_PSTATE_DEFAULT_MODE
>
>  config X86_AMD_PSTATE_DYNAMIC_EPP
>         bool "AMD Processor P-State dynamic EPP support"
> -       depends on X86_AMD_PSTATE
> +       depends on X86_AMD_PSTATE && POWER_SUPPLY
>         default n
>         help
>           Allow the kernel to dynamically change the energy performance
> --

Applied, thanks!

^ permalink raw reply

* Re: [PATCH v2] dt-bindings: thermal: idle: Complete the example code
From: Conor Dooley @ 2026-04-07 16:24 UTC (permalink / raw)
  To: Krzysztof Kozlowski
  Cc: Rafael J. Wysocki, Daniel Lezcano, Zhang Rui, Lukasz Luba,
	Rob Herring, Krzysztof Kozlowski, Conor Dooley, linux-pm,
	devicetree, linux-kernel
In-Reply-To: <20260407053957.10508-2-krzysztof.kozlowski@oss.qualcomm.com>

[-- Attachment #1: Type: text/plain, Size: 75 bytes --]

Acked-by: Conor Dooley <conor.dooley@microchip.com>
pw-bot: not-applicable

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]

^ permalink raw reply

* Re: [PATCH v2 2/2] interconnect: qcom: add Hawi interconnect provider driver
From: Mike Tipton @ 2026-04-07 16:18 UTC (permalink / raw)
  To: Vivek Aknurwar
  Cc: Georgi Djakov, Rob Herring, Krzysztof Kozlowski, Conor Dooley,
	linux-arm-msm, linux-pm, devicetree, linux-kernel,
	Krzysztof Kozlowski
In-Reply-To: <20260406-icc-hawi-v2-2-6cfee87a1d25@oss.qualcomm.com>

On Mon, Apr 06, 2026 at 04:04:42PM -0700, Vivek Aknurwar wrote:
> Add driver for the Qualcomm interconnect buses found in Hawi
> based platforms. The topology consists of several NoCs that are
> controlled by a remote processor that collects the aggregated
> bandwidth for each master-slave pair.
> 
> Signed-off-by: Vivek Aknurwar <vivek.aknurwar@oss.qualcomm.com>
> Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
> ---

Reviewed-by: Mike Tipton <mike.tipton@oss.qualcomm.com>

Thanks,
Mike

^ permalink raw reply

* Re: [patch 00/12] hrtimers: Prevent hrtimer interrupt starvation
From: Thomas Gleixner @ 2026-04-07 16:17 UTC (permalink / raw)
  To: LKML
  Cc: Calvin Owens, Peter Zijlstra, Anna-Maria Behnsen,
	Frederic Weisbecker, Ingo Molnar, John Stultz, Stephen Boyd,
	Alexander Viro, Christian Brauner, Jan Kara, linux-fsdevel,
	Sebastian Reichel, linux-pm, Pablo Neira Ayuso, Florian Westphal,
	Phil Sutter, netfilter-devel, coreteam
In-Reply-To: <87wlyi4yrj.ffs@tglx>

On Tue, Apr 07 2026 at 16:43, Thomas Gleixner wrote:
> On Tue, Apr 07 2026 at 10:54, Thomas Gleixner wrote:
>> There needs to be some discussion about the scope of backporting. The first
>> patch preventing the stall is obviously a backport candidate. The remaining
>> series can be obviously argued about, but in my opinion it should be
>> backported as well as it prevents stupid or malicious user space from
>> generating tons of pointless timer interrupts.
>
> Peter and me just discussed it over IRC. With the clockevents prevention
> in place, the effect of stupid/malicious code is pretty much affecting
> only the user space task itself. As the timer is forced to expire once
> the clockevent device has been force armed, it won't have other side
> effects as device interrupts or IPIs are not blocked out and in the
> worst case marginally delayed by the high frequency timer interrupt.
>
> Once the task is scheduled out that subsides as there is nothing which
> re-arms the timer anymore.
>
> So we should be fine with backporting the clockevents fix and leave the
> other parts of the series for upstream only. I still need to investigate
> how all of that affects the pending changes vs. TSC deadline timer (and
> similar devices) which are not going to reach that modified clockevents
> code anymore.

It's pretty much the same as the above with the difference that a timer
armed in the past will result in an instantaneous interrupt as the
coupled event devices must provide a less than or equal comparator. So
again the task can only delay itself with hrtimer interrupts.

Thanks,

        tglx



^ permalink raw reply

* Re: [PATCH v2 1/2] dt-bindings: interconnect: document the RPMh Network-On-Chip interconnect in Hawi SoC
From: Mike Tipton @ 2026-04-07 16:16 UTC (permalink / raw)
  To: Vivek Aknurwar
  Cc: Georgi Djakov, Rob Herring, Krzysztof Kozlowski, Conor Dooley,
	linux-arm-msm, linux-pm, devicetree, linux-kernel
In-Reply-To: <20260406-icc-hawi-v2-1-6cfee87a1d25@oss.qualcomm.com>

On Mon, Apr 06, 2026 at 04:04:41PM -0700, Vivek Aknurwar wrote:
> Document the RPMh Network-On-Chip Interconnect of the Hawi platform.
> 
> Signed-off-by: Vivek Aknurwar <vivek.aknurwar@oss.qualcomm.com>
> ---
>  .../bindings/interconnect/qcom,hawi-rpmh.yaml      | 131 ++++++++++++++++
>  include/dt-bindings/interconnect/qcom,hawi-rpmh.h  | 164 +++++++++++++++++++++
>  2 files changed, 295 insertions(+)
> 

[..]

> +
> +#ifndef __DT_BINDINGS_INTERCONNECT_QCOM_HAWI_H
> +#define __DT_BINDINGS_INTERCONNECT_QCOM_HAWI_H
> +
> +#define MASTER_QSPI_0				0
> +#define MASTER_QUP_2				1
> +#define MASTER_QUP_3				2
> +#define MASTER_QUP_4				3
> +#define MASTER_CRYPTO				4
> +#define MASTER_IPA				5
> +#define MASTER_QUP_1				6
> +#define MASTER_SOCCP_PROC				7
> +#define MASTER_QDSS_ETR				8
> +#define MASTER_QDSS_ETR_1				9
> +#define MASTER_SDCC_2				10
> +#define MASTER_SDCC_4				11
> +#define MASTER_UFS_MEM				12
> +#define MASTER_USB3				13
> +#define SLAVE_A1NOC_SNOC				14

Let's align these values.

Thanks,
Mike

^ permalink raw reply

* Re: [PATCH v2 1/2] dt-bindings: interconnect: document the RPMh Network-On-Chip interconnect in Hawi SoC
From: Mike Tipton @ 2026-04-07 16:11 UTC (permalink / raw)
  To: Krzysztof Kozlowski
  Cc: Vivek Aknurwar, Georgi Djakov, Rob Herring, Krzysztof Kozlowski,
	Conor Dooley, linux-arm-msm, linux-pm, devicetree, linux-kernel
In-Reply-To: <20260407-prehistoric-inescapable-loon-cf0dd0@quoll>

Hi Krzysztof,

On Tue, Apr 07, 2026 at 09:42:09AM +0200, Krzysztof Kozlowski wrote:
> On Mon, Apr 06, 2026 at 04:04:41PM -0700, Vivek Aknurwar wrote:
> > Document the RPMh Network-On-Chip Interconnect of the Hawi platform.
> > 
> > Signed-off-by: Vivek Aknurwar <vivek.aknurwar@oss.qualcomm.com>
> 
> Same fixes needed I wrote to Hawi upstreaming lead in private. That's
> why I gave that feedback (privately) very fast, to avoid repeating the
> mistake. So since private feedback was ignored, you have now review on
> the lists.
> 
> All Qualcomm previous patches are poor:
> 
> document the RPMh Network-On-Chip interconnect in Mahua SoC
> document the RPMh Network-On-Chip interconnect in Eliza SoC
> document the RPMh Network-On-Chip interconnect in Kaanapali SoC
> document the RPMh Network-On-Chip interconnect in Glymur SoC
> 
> Made by the same people.
> 
> Why can't you look how Neil did it for SM8650? Or Luca recently for
> Milos? Or if you cannot look at non-qcom commits then Rajendra for X1E?

I believe you're mainly referring to the lack of "Qualcomm" in the
commit summary and description? I agree that should have been added.
That's understood and it was overlooked for this patch. Most of our
other patches inherently have "qcom" in the area prefixes, but yes when
that isn't present which should clarify that this is Qualcomm-specific.

Thanks,
Mike

^ permalink raw reply

* Re: [patch 01/12] clockevents: Prevent timer interrupt starvation
From: Thomas Gleixner @ 2026-04-07 16:08 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: LKML, Calvin Owens, Peter Zijlstra, Anna-Maria Behnsen,
	Ingo Molnar, John Stultz, Stephen Boyd, Alexander Viro,
	Christian Brauner, Jan Kara, linux-fsdevel, Sebastian Reichel,
	linux-pm, Pablo Neira Ayuso, Florian Westphal, Phil Sutter,
	netfilter-devel, coreteam
In-Reply-To: <adUN5Y9-1kx5FVHd@localhost.localdomain>

On Tue, Apr 07 2026 at 16:00, Frederic Weisbecker wrote:
> Le Tue, Apr 07, 2026 at 10:54:17AM +0200, Thomas Gleixner a écrit :
>> From: Thomas Gleixner <tglx@kernel.org>
>> 
>> Calvin reported an odd NMI watchdog lockup which claims that the CPU locked
>> up in user space. He provided a reproducer, which sets up a timerfd based
>> timer and then rearms it in a loop with an absolute expiry time of 1ns.
>> 
>> As the expiry time is in the past, the timer ends up as the first expiring
>> timer in the per CPU hrtimer base and the clockevent device is programmed
>> with the minimum delta value. If the machine is fast enough, this ends up
>> in a endless loop of programming the delta value to the minimum value
>> defined by the clock event device, before the timer interrupt can fire,
>> which starves the interrupt and consequently triggers the lockup detector
>> because the hrtimer callback of the lockup mechanism is never invoked.
>> 
>> As a first step to prevent this, avoid reprogramming the clock event device
>> when:
>>      - a forced minimum delta event is pending
>>      - the new expiry delta is less then or equal to the minimum delta
>> 
>> Thanks to Calvin for providing the reproducer and to Borislav for testing
>> and providing data from his Zen5 machine.
>> 
>> The problem is not limited to Zen5, but depending on the underlying
>> clock event device (e.g. TSC deadline timer on Intel) and the CPU speed
>> not necessarily observable.
>> 
>> This change serves only as the last resort and further changes will be made
>> to prevent this scenario earlier in the call chain as far as possible.
>> 
>> Fixes: d316c57ff6bf ("[PATCH] clockevents: add core functionality")
>> Reported-by: Calvin Owens <calvin@wbinvd.org>
>> Signed-off-by: Thomas Gleixner <tglx@kernel.org>
>> Cc: Peter Zijlstra <peterz@infradead.org>
>> Cc: Anna-Maria Behnsen <anna-maria@linutronix.de>
>> Cc: Frederic Weisbecker <frederic@kernel.org>
>> Cc: Ingo Molnar <mingo@kernel.org>
>> Link: https://lore.kernel.org/lkml/acMe-QZUel-bBYUh@mozart.vkv.me/
>> ---
>> V2: Simplified the clockevents code - Peter
>
> Isn't it possible to rely on dev->next_event instead? In the above scenario,
> subsequent 0 delta would not reprogram if dev->next_event is already below
> the new call to ktime_get() ?

It does if force is set and that is set when hrtimer calls into it:

	if (delta <= 0)
		return force ? clockevents_program_min_delta(dev) : -ETIME;

I can't change that for various reasons.

But we always need the flag which tells us that the programming was
forced in order to prevent the above scenario. And delta <= 0 is not the
only way how to achieve that. You can have a delta > 0 and < min_delta
anc achieve the same effect. That needs more effort on the callsite, but
it's trivially doable as the systemcall to reprogram time is pretty
constant.

As I had to introduce the flag and prevent the other scenraio I just
consolidated everything into one code path.

Thanks,

        tglx

^ permalink raw reply

* [PATCH] thermal/of: Move OF code where it belongs to
From: Daniel Lezcano @ 2026-04-07 15:51 UTC (permalink / raw)
  To: rafael; +Cc: Daniel Lezcano, Zhang Rui, Lukasz Luba, open list:THERMAL,
	open list

From: Daniel Lezcano <daniel.lezcano@oss.qualcomm.com>

The functions:
 - thermal_of_cooling_device_register()
 - devm_thermal_of_cooling_device_register()

 are related to thermal-of but they are implemented in
 thermal-core. Move these functions to the right file.

Pure move patch.

No functional change intended.

Signed-off-by: Daniel Lezcano <daniel.lezcano@oss.qualcomm.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@kernel.org>
---
 drivers/thermal/thermal_core.c | 75 +---------------------------------
 drivers/thermal/thermal_core.h |  5 +++
 drivers/thermal/thermal_of.c   | 72 ++++++++++++++++++++++++++++++++
 3 files changed, 78 insertions(+), 74 deletions(-)

diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index b7d706ed7ed9..f0049cff1128 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -1054,7 +1054,7 @@ static void thermal_cooling_device_init_complete(struct thermal_cooling_device *
  * Return: a pointer to the created struct thermal_cooling_device or an
  * ERR_PTR. Caller must check return value with IS_ERR*() helpers.
  */
-static struct thermal_cooling_device *
+struct thermal_cooling_device *
 __thermal_cooling_device_register(struct device_node *np,
 				  const char *type, void *devdata,
 				  const struct thermal_cooling_device_ops *ops)
@@ -1162,79 +1162,6 @@ thermal_cooling_device_register(const char *type, void *devdata,
 }
 EXPORT_SYMBOL_GPL(thermal_cooling_device_register);
 
-/**
- * thermal_of_cooling_device_register() - register an OF thermal cooling device
- * @np:		a pointer to a device tree node.
- * @type:	the thermal cooling device type.
- * @devdata:	device private data.
- * @ops:		standard thermal cooling devices callbacks.
- *
- * This function will register a cooling device with device tree node reference.
- * This interface function adds a new thermal cooling device (fan/processor/...)
- * to /sys/class/thermal/ folder as cooling_device[0-*]. It tries to bind itself
- * to all the thermal zone devices registered at the same time.
- *
- * Return: a pointer to the created struct thermal_cooling_device or an
- * ERR_PTR. Caller must check return value with IS_ERR*() helpers.
- */
-struct thermal_cooling_device *
-thermal_of_cooling_device_register(struct device_node *np,
-				   const char *type, void *devdata,
-				   const struct thermal_cooling_device_ops *ops)
-{
-	return __thermal_cooling_device_register(np, type, devdata, ops);
-}
-EXPORT_SYMBOL_GPL(thermal_of_cooling_device_register);
-
-static void thermal_cooling_device_release(struct device *dev, void *res)
-{
-	thermal_cooling_device_unregister(
-				*(struct thermal_cooling_device **)res);
-}
-
-/**
- * devm_thermal_of_cooling_device_register() - register an OF thermal cooling
- *					       device
- * @dev:	a valid struct device pointer of a sensor device.
- * @np:		a pointer to a device tree node.
- * @type:	the thermal cooling device type.
- * @devdata:	device private data.
- * @ops:	standard thermal cooling devices callbacks.
- *
- * This function will register a cooling device with device tree node reference.
- * This interface function adds a new thermal cooling device (fan/processor/...)
- * to /sys/class/thermal/ folder as cooling_device[0-*]. It tries to bind itself
- * to all the thermal zone devices registered at the same time.
- *
- * Return: a pointer to the created struct thermal_cooling_device or an
- * ERR_PTR. Caller must check return value with IS_ERR*() helpers.
- */
-struct thermal_cooling_device *
-devm_thermal_of_cooling_device_register(struct device *dev,
-				struct device_node *np,
-				const char *type, void *devdata,
-				const struct thermal_cooling_device_ops *ops)
-{
-	struct thermal_cooling_device **ptr, *tcd;
-
-	ptr = devres_alloc(thermal_cooling_device_release, sizeof(*ptr),
-			   GFP_KERNEL);
-	if (!ptr)
-		return ERR_PTR(-ENOMEM);
-
-	tcd = __thermal_cooling_device_register(np, type, devdata, ops);
-	if (IS_ERR(tcd)) {
-		devres_free(ptr);
-		return tcd;
-	}
-
-	*ptr = tcd;
-	devres_add(dev, ptr);
-
-	return tcd;
-}
-EXPORT_SYMBOL_GPL(devm_thermal_of_cooling_device_register);
-
 static bool thermal_cooling_device_present(struct thermal_cooling_device *cdev)
 {
 	struct thermal_cooling_device *pos = NULL;
diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h
index d3acff602f9c..bdd59947b24f 100644
--- a/drivers/thermal/thermal_core.h
+++ b/drivers/thermal/thermal_core.h
@@ -269,6 +269,11 @@ void thermal_zone_device_critical_shutdown(struct thermal_zone_device *tz);
 void thermal_governor_update_tz(struct thermal_zone_device *tz,
 				enum thermal_notify_event reason);
 
+struct thermal_cooling_device *
+__thermal_cooling_device_register(struct device_node *np,
+				  const char *type, void *devdata,
+				  const struct thermal_cooling_device_ops *ops);
+
 /* Helpers */
 #define for_each_trip_desc(__tz, __td)	\
 	for (__td = __tz->trips; __td - __tz->trips < __tz->num_trips; __td++)
diff --git a/drivers/thermal/thermal_of.c b/drivers/thermal/thermal_of.c
index 99085c806a1f..398157e740fc 100644
--- a/drivers/thermal/thermal_of.c
+++ b/drivers/thermal/thermal_of.c
@@ -510,3 +510,75 @@ void devm_thermal_of_zone_unregister(struct device *dev, struct thermal_zone_dev
 			       devm_thermal_of_zone_match, tz));
 }
 EXPORT_SYMBOL_GPL(devm_thermal_of_zone_unregister);
+
+/**
+ * thermal_of_cooling_device_register() - register an OF thermal cooling device
+ * @np:		a pointer to a device tree node.
+ * @type:	the thermal cooling device type.
+ * @devdata:	device private data.
+ * @ops:		standard thermal cooling devices callbacks.
+ *
+ * This function will register a cooling device with device tree node reference.
+ * This interface function adds a new thermal cooling device (fan/processor/...)
+ * to /sys/class/thermal/ folder as cooling_device[0-*]. It tries to bind itself
+ * to all the thermal zone devices registered at the same time.
+ *
+ * Return: a pointer to the created struct thermal_cooling_device or an
+ * ERR_PTR. Caller must check return value with IS_ERR*() helpers.
+ */
+struct thermal_cooling_device *
+thermal_of_cooling_device_register(struct device_node *np,
+				   const char *type, void *devdata,
+				   const struct thermal_cooling_device_ops *ops)
+{
+	return __thermal_cooling_device_register(np, type, devdata, ops);
+}
+EXPORT_SYMBOL_GPL(thermal_of_cooling_device_register);
+
+static void thermal_cooling_device_release(struct device *dev, void *res)
+{
+	thermal_cooling_device_unregister(*(struct thermal_cooling_device **)res);
+}
+
+/**
+ * devm_thermal_of_cooling_device_register() - register an OF thermal cooling
+ *					       device
+ * @dev:	a valid struct device pointer of a sensor device.
+ * @np:		a pointer to a device tree node.
+ * @type:	the thermal cooling device type.
+ * @devdata:	device private data.
+ * @ops:	standard thermal cooling devices callbacks.
+ *
+ * This function will register a cooling device with device tree node reference.
+ * This interface function adds a new thermal cooling device (fan/processor/...)
+ * to /sys/class/thermal/ folder as cooling_device[0-*]. It tries to bind itself
+ * to all the thermal zone devices registered at the same time.
+ *
+ * Return: a pointer to the created struct thermal_cooling_device or an
+ * ERR_PTR. Caller must check return value with IS_ERR*() helpers.
+ */
+struct thermal_cooling_device *
+devm_thermal_of_cooling_device_register(struct device *dev,
+					struct device_node *np,
+					const char *type, void *devdata,
+					const struct thermal_cooling_device_ops *ops)
+{
+	struct thermal_cooling_device **ptr, *tcd;
+
+	ptr = devres_alloc(thermal_cooling_device_release, sizeof(*ptr),
+			   GFP_KERNEL);
+	if (!ptr)
+		return ERR_PTR(-ENOMEM);
+
+	tcd = __thermal_cooling_device_register(np, type, devdata, ops);
+	if (IS_ERR(tcd)) {
+		devres_free(ptr);
+		return tcd;
+	}
+
+	*ptr = tcd;
+	devres_add(dev, ptr);
+
+	return tcd;
+}
+EXPORT_SYMBOL_GPL(devm_thermal_of_cooling_device_register);
-- 
2.43.0


^ permalink raw reply related

* [PATCH] cpufreq/amd-pstate: Add POWER_SUPPLY dependency for dynamic EPP
From: Mario Limonciello @ 2026-04-07 14:54 UTC (permalink / raw)
  To: K Prateek Nayak
  Cc: Perry Yuan, open list:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	open list:CPU FREQUENCY SCALING FRAMEWORK, Mario Limonciello,
	kernel test robot

The dynamic EPP feature uses power_supply_reg_notifier() and
power_supply_unreg_notifier() but doesn't declare a dependency on
POWER_SUPPLY, causing linker errors when POWER_SUPPLY is not enabled.

Add POWER_SUPPLY to the depends line to fix the build issue.

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202604040742.ySEdkuAa-lkp@intel.com/
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
 drivers/cpufreq/Kconfig.x86 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86
index a0dbb9808ae9..53e2702e6f1c 100644
--- a/drivers/cpufreq/Kconfig.x86
+++ b/drivers/cpufreq/Kconfig.x86
@@ -71,7 +71,7 @@ config X86_AMD_PSTATE_DEFAULT_MODE
 
 config X86_AMD_PSTATE_DYNAMIC_EPP
 	bool "AMD Processor P-State dynamic EPP support"
-	depends on X86_AMD_PSTATE
+	depends on X86_AMD_PSTATE && POWER_SUPPLY
 	default n
 	help
 	  Allow the kernel to dynamically change the energy performance
-- 
2.43.0


^ permalink raw reply related

* Re: [patch 00/12] hrtimers: Prevent hrtimer interrupt starvation
From: Thomas Gleixner @ 2026-04-07 14:43 UTC (permalink / raw)
  To: LKML
  Cc: Calvin Owens, Peter Zijlstra, Anna-Maria Behnsen,
	Frederic Weisbecker, Ingo Molnar, John Stultz, Stephen Boyd,
	Alexander Viro, Christian Brauner, Jan Kara, linux-fsdevel,
	Sebastian Reichel, linux-pm, Pablo Neira Ayuso, Florian Westphal,
	Phil Sutter, netfilter-devel, coreteam
In-Reply-To: <20260407083219.478203185@kernel.org>

On Tue, Apr 07 2026 at 10:54, Thomas Gleixner wrote:
> There needs to be some discussion about the scope of backporting. The first
> patch preventing the stall is obviously a backport candidate. The remaining
> series can be obviously argued about, but in my opinion it should be
> backported as well as it prevents stupid or malicious user space from
> generating tons of pointless timer interrupts.

Peter and me just discussed it over IRC. With the clockevents prevention
in place, the effect of stupid/malicious code is pretty much affecting
only the user space task itself. As the timer is forced to expire once
the clockevent device has been force armed, it won't have other side
effects as device interrupts or IPIs are not blocked out and in the
worst case marginally delayed by the high frequency timer interrupt.

Once the task is scheduled out that subsides as there is nothing which
re-arms the timer anymore.

So we should be fine with backporting the clockevents fix and leave the
other parts of the series for upstream only. I still need to investigate
how all of that affects the pending changes vs. TSC deadline timer (and
similar devices) which are not going to reach that modified clockevents
code anymore.

Thanks,

        tglx



^ permalink raw reply

* Re: [patch 01/12] clockevents: Prevent timer interrupt starvation
From: Thomas Gleixner @ 2026-04-07 14:33 UTC (permalink / raw)
  To: LKML
  Cc: Calvin Owens, Peter Zijlstra, Anna-Maria Behnsen,
	Frederic Weisbecker, Ingo Molnar, John Stultz, Stephen Boyd,
	Alexander Viro, Christian Brauner, Jan Kara, linux-fsdevel,
	Sebastian Reichel, linux-pm, Pablo Neira Ayuso, Florian Westphal,
	Phil Sutter, netfilter-devel, coreteam
In-Reply-To: <20260407083247.562657657@kernel.org>

Calvin!

On Tue, Apr 07 2026 at 10:54, Thomas Gleixner wrote:
> From: Thomas Gleixner <tglx@kernel.org>
>
> Calvin reported an odd NMI watchdog lockup which claims that the CPU locked
> up in user space. He provided a reproducer, which sets up a timerfd based
> timer and then rearms it in a loop with an absolute expiry time of 1ns.
>
> As the expiry time is in the past, the timer ends up as the first expiring
> timer in the per CPU hrtimer base and the clockevent device is programmed
> with the minimum delta value. If the machine is fast enough, this ends up
> in a endless loop of programming the delta value to the minimum value
> defined by the clock event device, before the timer interrupt can fire,
> which starves the interrupt and consequently triggers the lockup detector
> because the hrtimer callback of the lockup mechanism is never invoked.
>
> As a first step to prevent this, avoid reprogramming the clock event device
> when:
>      - a forced minimum delta event is pending
>      - the new expiry delta is less then or equal to the minimum delta
>
> Thanks to Calvin for providing the reproducer and to Borislav for testing
> and providing data from his Zen5 machine.
>
> The problem is not limited to Zen5, but depending on the underlying
> clock event device (e.g. TSC deadline timer on Intel) and the CPU speed
> not necessarily observable.
>
> This change serves only as the last resort and further changes will be made
> to prevent this scenario earlier in the call chain as far as possible.

It'd be great if you could re-test this one independently of the other
changes, so we can get that on the way ASAP.

Thanks,

        tglx

^ permalink raw reply

* [PATCH v4 0/6] thermal: core: Fixes, simplifications and suspend/resume relocation
From: Rafael J. Wysocki @ 2026-04-07 13:51 UTC (permalink / raw)
  To: Linux PM; +Cc: Daniel Lezcano, LKML, Lukasz Luba, Armin Wolf

Hi All,

This is an update of

https://lore.kernel.org/linux-pm/5119690.31r3eYUQgx@rafael.j.wysocki/

changing the first two patches to address Sashiko feedback.

This series is intended for 7.1 (it applies on top of linux-next).

If fixes the thermal zone removal and registration rollback path by
addressing possible race conditions and a memory leak in that code (patches
[1-2/6]), removes a redundant check (patch [3/6]), changes the thermal
workqueue to an unbound and non-freezable one (patch [4/6]), changes the
allocation of thermal_class to static (patch [5/4]), and relocates the
suspend and resume of thermal zones closer to the suspend and resume of
devices, respectively (patch [6/6]).

Thanks!




^ permalink raw reply

* [PATCH v4 1/6] thermal: core: Fix thermal zone governor cleanup issues
From: Rafael J. Wysocki @ 2026-04-07 13:55 UTC (permalink / raw)
  To: Linux PM; +Cc: Daniel Lezcano, LKML, Lukasz Luba, Armin Wolf
In-Reply-To: <12871778.O9o76ZdvQC@rafael.j.wysocki>

From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

If thermal_zone_device_register_with_trips() fails after adding
a thermal governor to the thermal zone being registered, the
governor is not removed from it as appropriate which may lead to
a memory leak.

In turn, thermal_zone_device_unregister() calls thermal_set_governor()
without acquiring the thermal zone lock beforehand which may race with
a governor update via sysfs and may lead to a use-after-free in that
case.

Address these issues by adding two thermal_set_governor() calls, one to
thermal_release() to remove the governor from the given thermal zone,
and one to the thermal zone registration error path to cover failures
preceding the thermal zone device registration.

Fixes: e33df1d2f3a0 ("thermal: let governors have private data for each thermal zone")
Cc: All applicable <stable@vger.kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---

v3 -> v4:
   * Call thermal_set_governor() from thermal_release() to avoid use-after-free
     of the device name (Sashiko)
   * Call thermal_set_governor() in thermal zone device registration rollback
     path if it fails before device registration

v2 -> v3: New patch

---
 drivers/thermal/thermal_core.c |    7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -971,6 +971,7 @@ static void thermal_release(struct devic
 		     sizeof("thermal_zone") - 1)) {
 		tz = to_thermal_zone(dev);
 		thermal_zone_destroy_device_groups(tz);
+		thermal_set_governor(tz, NULL);
 		mutex_destroy(&tz->lock);
 		complete(&tz->removal);
 	} else if (!strncmp(dev_name(dev), "cooling_device",
@@ -1617,8 +1618,10 @@ thermal_zone_device_register_with_trips(
 	/* sys I/F */
 	/* Add nodes that are always present via .groups */
 	result = thermal_zone_create_device_groups(tz);
-	if (result)
+	if (result) {
+		thermal_set_governor(tz, NULL);
 		goto remove_id;
+	}
 
 	result = device_register(&tz->device);
 	if (result)
@@ -1731,8 +1734,6 @@ void thermal_zone_device_unregister(stru
 
 	cancel_delayed_work_sync(&tz->poll_queue);
 
-	thermal_set_governor(tz, NULL);
-
 	thermal_thresholds_exit(tz);
 	thermal_remove_hwmon_sysfs(tz);
 	ida_free(&thermal_tz_ida, tz->id);




^ permalink raw reply

* [PATCH v4 2/6] thermal: core: Free thermal zone ID later during removal
From: Rafael J. Wysocki @ 2026-04-07 13:58 UTC (permalink / raw)
  To: Linux PM; +Cc: Daniel Lezcano, LKML, Lukasz Luba, Armin Wolf
In-Reply-To: <12871778.O9o76ZdvQC@rafael.j.wysocki>

From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

The thermal zone removal ordering is different from the thermal zone
registration rollback path ordering and the former is arguably
problematic because freeing a thermal zone ID prematurely may cause
it to be used during the registration of another thermal zone which
may fail as a result.

Prevent that from occurring by changing the thermal zone removal
ordering to reflect the thermal zone registration rollback path
ordering.

Also more the ida_destroy() call from thermal_zone_device_unregister()
to thermal_release() for consistency.

Fixes: b31ef8285b19 ("thermal core: convert ID allocation to IDA")
Cc: All applicable <stable@vger.kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---

v3 -> v4:
   * Call ida_destroy() in thermal_release() in analogy with the mutex
     cleanup

v2 -> v3: New patch

---
 drivers/thermal/thermal_core.c |    6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -972,6 +972,7 @@ static void thermal_release(struct devic
 		tz = to_thermal_zone(dev);
 		thermal_zone_destroy_device_groups(tz);
 		thermal_set_governor(tz, NULL);
+		ida_destroy(&tz->ida);
 		mutex_destroy(&tz->lock);
 		complete(&tz->removal);
 	} else if (!strncmp(dev_name(dev), "cooling_device",
@@ -1736,8 +1737,6 @@ void thermal_zone_device_unregister(stru
 
 	thermal_thresholds_exit(tz);
 	thermal_remove_hwmon_sysfs(tz);
-	ida_free(&thermal_tz_ida, tz->id);
-	ida_destroy(&tz->ida);
 
 	device_del(&tz->device);
 	put_device(&tz->device);
@@ -1745,6 +1744,9 @@ void thermal_zone_device_unregister(stru
 	thermal_notify_tz_delete(tz);
 
 	wait_for_completion(&tz->removal);
+
+	ida_free(&thermal_tz_ida, tz->id);
+
 	kfree(tz->tzp);
 	kfree(tz);
 }




^ permalink raw reply

* [PATCH v4 3/6] thermal: core: Drop redundant check from thermal_zone_device_update()
From: Rafael J. Wysocki @ 2026-04-07 14:06 UTC (permalink / raw)
  To: Linux PM; +Cc: Daniel Lezcano, LKML, Lukasz Luba, Armin Wolf
In-Reply-To: <12871778.O9o76ZdvQC@rafael.j.wysocki>

From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

Since __thermal_zone_device_update() checks if tz->state is
TZ_STATE_READY and bails out immediately otherwise, it is not
necessary to check the thermal_zone_is_present() return value in
thermal_zone_device_update().  Namely, tz->state is equal to
TZ_STATE_FLAG_INIT initially and that flag is only cleared in
thermal_zone_init_complete() after adding tz to the list of thermal
zones, and thermal_zone_exit() sets TZ_STATE_FLAG_EXIT in tz->state
while removing tz from that list.  Thus tz->state is not TZ_STATE_READY
when tz is not in the list and the check mentioned above is redundant.

Accordingly, drop the redundant thermal_zone_is_present() check from
thermal_zone_device_update() and drop the former altogether because it
has no more users.

No intentional functional impact.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---

v1 -> v4: No changes

---
 drivers/thermal/thermal_core.c |    8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -702,18 +702,12 @@ int thermal_zone_device_disable(struct t
 }
 EXPORT_SYMBOL_GPL(thermal_zone_device_disable);
 
-static bool thermal_zone_is_present(struct thermal_zone_device *tz)
-{
-	return !list_empty(&tz->node);
-}
-
 void thermal_zone_device_update(struct thermal_zone_device *tz,
 				enum thermal_notify_event event)
 {
 	guard(thermal_zone)(tz);
 
-	if (thermal_zone_is_present(tz))
-		__thermal_zone_device_update(tz, event);
+	__thermal_zone_device_update(tz, event);
 }
 EXPORT_SYMBOL_GPL(thermal_zone_device_update);
 




^ permalink raw reply

* [PATCH v4 4/6] thermal: core: Change thermal_wq to be unbound and not freezable
From: Rafael J. Wysocki @ 2026-04-07 14:06 UTC (permalink / raw)
  To: Linux PM; +Cc: Daniel Lezcano, LKML, Lukasz Luba, Armin Wolf
In-Reply-To: <12871778.O9o76ZdvQC@rafael.j.wysocki>

From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

The thermal workqueue doesn't need to be freezable or per-CPU, so drop
WQ_FREEZABLE and WQ_PERCPU from the flags when allocating it.

No intentional functional impact.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---

v1 -> v4: No changes

---
 drivers/thermal/thermal_core.c |    3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -1918,8 +1918,7 @@ static int __init thermal_init(void)
 	if (result)
 		goto error;
 
-	thermal_wq = alloc_workqueue("thermal_events",
-				      WQ_FREEZABLE | WQ_POWER_EFFICIENT | WQ_PERCPU, 0);
+	thermal_wq = alloc_workqueue("thermal_events", WQ_POWER_EFFICIENT, 0);
 	if (!thermal_wq) {
 		result = -ENOMEM;
 		goto unregister_netlink;




^ permalink raw reply

* [PATCH v4 5/6] thermal: core: Allocate thermal_class statically
From: Rafael J. Wysocki @ 2026-04-07 14:07 UTC (permalink / raw)
  To: Linux PM; +Cc: Daniel Lezcano, LKML, Lukasz Luba, Armin Wolf
In-Reply-To: <12871778.O9o76ZdvQC@rafael.j.wysocki>

From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

Define thermal_class as a static structure to simplify thermal_init()
and to simplify thermal class availability checks that will need to
be carried out during the suspend and resume of thermal zones after
subsequent changes.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---

v3 -> v4: No changes

v2 -> v3:
   * Use static variable thermal_class_unavailable (instead of a function)
     for checking if thermal_class is available.

v1 -> v2:
   * Reorder with respect to the next patch to allow the latter to be simpler
   * Add thermal_class_unavailable() (the next patch uses it too)

---
 drivers/thermal/thermal_core.c |   30 ++++++++++++------------------
 1 file changed, 12 insertions(+), 18 deletions(-)

--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -979,7 +979,11 @@ static void thermal_release(struct devic
 	}
 }
 
-static struct class *thermal_class;
+static const struct class thermal_class = {
+	.name = "thermal",
+	.dev_release = thermal_release,
+};
+static bool thermal_class_unavailable __ro_after_init = true;
 
 static inline
 void print_bind_err_msg(struct thermal_zone_device *tz,
@@ -1072,7 +1076,7 @@ __thermal_cooling_device_register(struct
 	    !ops->set_cur_state)
 		return ERR_PTR(-EINVAL);
 
-	if (!thermal_class)
+	if (thermal_class_unavailable)
 		return ERR_PTR(-ENODEV);
 
 	cdev = kzalloc_obj(*cdev);
@@ -1095,7 +1099,7 @@ __thermal_cooling_device_register(struct
 	cdev->np = np;
 	cdev->ops = ops;
 	cdev->updated = false;
-	cdev->device.class = thermal_class;
+	cdev->device.class = &thermal_class;
 	cdev->devdata = devdata;
 
 	ret = cdev->ops->get_max_state(cdev, &cdev->max_state);
@@ -1543,7 +1547,7 @@ thermal_zone_device_register_with_trips(
 	if (polling_delay && passive_delay > polling_delay)
 		return ERR_PTR(-EINVAL);
 
-	if (!thermal_class)
+	if (thermal_class_unavailable)
 		return ERR_PTR(-ENODEV);
 
 	tz = kzalloc_flex(*tz, trips, num_trips);
@@ -1579,7 +1583,7 @@ thermal_zone_device_register_with_trips(
 	if (!tz->ops.critical)
 		tz->ops.critical = thermal_zone_device_critical;
 
-	tz->device.class = thermal_class;
+	tz->device.class = &thermal_class;
 	tz->devdata = devdata;
 	tz->num_trips = num_trips;
 	for_each_trip_desc(tz, td) {
@@ -1928,21 +1932,11 @@ static int __init thermal_init(void)
 	if (result)
 		goto destroy_workqueue;
 
-	thermal_class = kzalloc_obj(*thermal_class);
-	if (!thermal_class) {
-		result = -ENOMEM;
+	result = class_register(&thermal_class);
+	if (result)
 		goto unregister_governors;
-	}
 
-	thermal_class->name = "thermal";
-	thermal_class->dev_release = thermal_release;
-
-	result = class_register(thermal_class);
-	if (result) {
-		kfree(thermal_class);
-		thermal_class = NULL;
-		goto unregister_governors;
-	}
+	thermal_class_unavailable = false;
 
 	result = register_pm_notifier(&thermal_pm_nb);
 	if (result)




^ permalink raw reply

* [PATCH v4 6/6] thermal: core: Suspend thermal zones later and resume them earlier
From: Rafael J. Wysocki @ 2026-04-07 14:09 UTC (permalink / raw)
  To: Linux PM; +Cc: Daniel Lezcano, LKML, Lukasz Luba, Armin Wolf
In-Reply-To: <12871778.O9o76ZdvQC@rafael.j.wysocki>

From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

To avoid some undesirable interactions between thermal zone suspend
and resume with user space that is running when those operations are
carried out, move them closer to the suspend and resume of devices,
respectively, by updating dpm_prepare() to carry out thermal zone
suspend and dpm_complete() to start thermal zone resume (that will
continue asynchronously).

This also makes the code easier to follow by removing one, arguably
redundant, level of indirection represented by the thermal PM notifier.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Armin Wolf <W_Armin@gmx.de>
---

v3 -> v4:
   * Add R-by from Armin

v2 -> v3:
   * Rebase on top of the v3 of the previous patch

v1 -> v2:
   * Reorder with respect to the previous patch
   * Use thermal_class_unavailable() to avoid running code that should
     not run without the thermal class
   * Suspend thermal zones after disabling device probing and resume
     them before enabling device probing for better synchronization

---
 drivers/base/power/main.c      |    5 +++
 drivers/thermal/thermal_core.c |   60 ++++++++++++-----------------------------
 include/linux/thermal.h        |    6 ++++
 3 files changed, 29 insertions(+), 42 deletions(-)

--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -33,6 +33,7 @@
 #include <trace/events/power.h>
 #include <linux/cpufreq.h>
 #include <linux/devfreq.h>
+#include <linux/thermal.h>
 #include <linux/timer.h>
 #include <linux/nmi.h>
 
@@ -1282,6 +1283,8 @@ void dpm_complete(pm_message_t state)
 	list_splice(&list, &dpm_list);
 	mutex_unlock(&dpm_list_mtx);
 
+	/* Start resuming thermal control */
+	thermal_pm_complete();
 	/* Allow device probing and trigger re-probing of deferred devices */
 	device_unblock_probing();
 	trace_suspend_resume(TPS("dpm_complete"), state.event, false);
@@ -2225,6 +2228,8 @@ int dpm_prepare(pm_message_t state)
 	 * instead. The normal behavior will be restored in dpm_complete().
 	 */
 	device_block_probing();
+	/* Suspend thermal control. */
+	thermal_pm_prepare();
 
 	mutex_lock(&dpm_list_mtx);
 	while (!list_empty(&dpm_list) && !error) {
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -1837,7 +1837,7 @@ static void thermal_zone_pm_prepare(stru
 	cancel_delayed_work(&tz->poll_queue);
 }
 
-static void thermal_pm_notify_prepare(void)
+static void __thermal_pm_prepare(void)
 {
 	struct thermal_zone_device *tz;
 
@@ -1849,6 +1849,19 @@ static void thermal_pm_notify_prepare(vo
 		thermal_zone_pm_prepare(tz);
 }
 
+void thermal_pm_prepare(void)
+{
+	if (thermal_class_unavailable)
+		return;
+
+	__thermal_pm_prepare();
+	/*
+	 * Allow any leftover thermal work items already on the worqueue to
+	 * complete so they don't get in the way later.
+	 */
+	flush_workqueue(thermal_wq);
+}
+
 static void thermal_zone_pm_complete(struct thermal_zone_device *tz)
 {
 	guard(thermal_zone)(tz);
@@ -1865,10 +1878,13 @@ static void thermal_zone_pm_complete(str
 	mod_delayed_work(thermal_wq, &tz->poll_queue, 0);
 }
 
-static void thermal_pm_notify_complete(void)
+void thermal_pm_complete(void)
 {
 	struct thermal_zone_device *tz;
 
+	if (thermal_class_unavailable)
+		return;
+
 	guard(mutex)(&thermal_list_lock);
 
 	thermal_pm_suspended = false;
@@ -1877,41 +1893,6 @@ static void thermal_pm_notify_complete(v
 		thermal_zone_pm_complete(tz);
 }
 
-static int thermal_pm_notify(struct notifier_block *nb,
-			     unsigned long mode, void *_unused)
-{
-	switch (mode) {
-	case PM_HIBERNATION_PREPARE:
-	case PM_RESTORE_PREPARE:
-	case PM_SUSPEND_PREPARE:
-		thermal_pm_notify_prepare();
-		/*
-		 * Allow any leftover thermal work items already on the
-		 * worqueue to complete so they don't get in the way later.
-		 */
-		flush_workqueue(thermal_wq);
-		break;
-	case PM_POST_HIBERNATION:
-	case PM_POST_RESTORE:
-	case PM_POST_SUSPEND:
-		thermal_pm_notify_complete();
-		break;
-	default:
-		break;
-	}
-	return 0;
-}
-
-static struct notifier_block thermal_pm_nb = {
-	.notifier_call = thermal_pm_notify,
-	/*
-	 * Run at the lowest priority to avoid interference between the thermal
-	 * zone resume work items spawned by thermal_pm_notify() and the other
-	 * PM notifiers.
-	 */
-	.priority = INT_MIN,
-};
-
 static int __init thermal_init(void)
 {
 	int result;
@@ -1938,11 +1919,6 @@ static int __init thermal_init(void)
 
 	thermal_class_unavailable = false;
 
-	result = register_pm_notifier(&thermal_pm_nb);
-	if (result)
-		pr_warn("Thermal: Can not register suspend notifier, return %d\n",
-			result);
-
 	return 0;
 
 unregister_governors:
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -273,6 +273,9 @@ bool thermal_trip_is_bound_to_cdev(struc
 int thermal_zone_device_enable(struct thermal_zone_device *tz);
 int thermal_zone_device_disable(struct thermal_zone_device *tz);
 void thermal_zone_device_critical(struct thermal_zone_device *tz);
+
+void thermal_pm_prepare(void);
+void thermal_pm_complete(void);
 #else
 static inline struct thermal_zone_device *thermal_zone_device_register_with_trips(
 					const char *type,
@@ -350,6 +353,9 @@ static inline int thermal_zone_device_en
 
 static inline int thermal_zone_device_disable(struct thermal_zone_device *tz)
 { return -ENODEV; }
+
+static inline void thermal_pm_prepare(void) {}
+static inline void thermal_pm_complete(void) {}
 #endif /* CONFIG_THERMAL */
 
 #endif /* __THERMAL_H__ */




^ permalink raw reply

* Re: [patch 01/12] clockevents: Prevent timer interrupt starvation
From: Frederic Weisbecker @ 2026-04-07 14:00 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: LKML, Calvin Owens, Peter Zijlstra, Anna-Maria Behnsen,
	Ingo Molnar, John Stultz, Stephen Boyd, Alexander Viro,
	Christian Brauner, Jan Kara, linux-fsdevel, Sebastian Reichel,
	linux-pm, Pablo Neira Ayuso, Florian Westphal, Phil Sutter,
	netfilter-devel, coreteam
In-Reply-To: <20260407083247.562657657@kernel.org>

Le Tue, Apr 07, 2026 at 10:54:17AM +0200, Thomas Gleixner a écrit :
> From: Thomas Gleixner <tglx@kernel.org>
> 
> Calvin reported an odd NMI watchdog lockup which claims that the CPU locked
> up in user space. He provided a reproducer, which sets up a timerfd based
> timer and then rearms it in a loop with an absolute expiry time of 1ns.
> 
> As the expiry time is in the past, the timer ends up as the first expiring
> timer in the per CPU hrtimer base and the clockevent device is programmed
> with the minimum delta value. If the machine is fast enough, this ends up
> in a endless loop of programming the delta value to the minimum value
> defined by the clock event device, before the timer interrupt can fire,
> which starves the interrupt and consequently triggers the lockup detector
> because the hrtimer callback of the lockup mechanism is never invoked.
> 
> As a first step to prevent this, avoid reprogramming the clock event device
> when:
>      - a forced minimum delta event is pending
>      - the new expiry delta is less then or equal to the minimum delta
> 
> Thanks to Calvin for providing the reproducer and to Borislav for testing
> and providing data from his Zen5 machine.
> 
> The problem is not limited to Zen5, but depending on the underlying
> clock event device (e.g. TSC deadline timer on Intel) and the CPU speed
> not necessarily observable.
> 
> This change serves only as the last resort and further changes will be made
> to prevent this scenario earlier in the call chain as far as possible.
> 
> Fixes: d316c57ff6bf ("[PATCH] clockevents: add core functionality")
> Reported-by: Calvin Owens <calvin@wbinvd.org>
> Signed-off-by: Thomas Gleixner <tglx@kernel.org>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: Anna-Maria Behnsen <anna-maria@linutronix.de>
> Cc: Frederic Weisbecker <frederic@kernel.org>
> Cc: Ingo Molnar <mingo@kernel.org>
> Link: https://lore.kernel.org/lkml/acMe-QZUel-bBYUh@mozart.vkv.me/
> ---
> V2: Simplified the clockevents code - Peter

Isn't it possible to rely on dev->next_event instead? In the above scenario,
subsequent 0 delta would not reprogram if dev->next_event is already below
the new call to ktime_get() ?

Thanks.

-- 
Frederic Weisbecker
SUSE Labs

^ permalink raw reply

* Re: [patch 01/12] clockevents: Prevent timer interrupt starvation
From: Thomas Gleixner @ 2026-04-07 13:59 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: LKML, Calvin Owens, Anna-Maria Behnsen, Frederic Weisbecker,
	Ingo Molnar, John Stultz, Stephen Boyd, Alexander Viro,
	Christian Brauner, Jan Kara, linux-fsdevel, Sebastian Reichel,
	linux-pm, Pablo Neira Ayuso, Florian Westphal, Phil Sutter,
	netfilter-devel, coreteam
In-Reply-To: <20260407114905.GH3738786@noisy.programming.kicks-ass.net>

On Tue, Apr 07 2026 at 13:49, Peter Zijlstra wrote:
> On Tue, Apr 07, 2026 at 01:30:42PM +0200, Thomas Gleixner wrote:
>> > The only thing that seems to be different, is that the old code would
>> > return the ->set_next_event() error code, rather than 0 in the !force
>> > case.
>> 
>> You mean when dev->next_event_forced is set and the set_event() callback
>> above failed?
>
> next_event_foced = 0;
> force = 0;
>
> Then the old code would return rc (return value of ->set_next_event),
> while the new code will return -ETIME.
>
> (not 0 like I said).

Ah. Now it makes sense :)

> I suppose ->set_next_event() will only ever fail with -ETIME?

Yes.

^ permalink raw reply

* Re: [GIT PULL] cpupower next update for Linux 7.1-rc1
From: Rafael J. Wysocki @ 2026-04-07 13:42 UTC (permalink / raw)
  To: Shuah Khan
  Cc: Rafael J. Wysocki, shuah, Thomas Renninger, John B. Wyatt IV,
	John Kacur, Thomas Renninger, linux-pm, linux-kernel
In-Reply-To: <651e317c-dc31-4895-ac09-2f12398ed3be@linuxfoundation.org>

Hi Shuah,

On Mon, Apr 6, 2026 at 8:40 PM Shuah Khan <skhan@linuxfoundation.org> wrote:
>
> Hi Rafael,
>
> Please pull the cpupower next update for Linux 7.1-rc1.
>
> - Fixes errors in cpupower-frequency-info short option names
>    to its manpage.
> - Fixes cpupower-idle-info perf option name to its manpage.
> - Adds boost and epp options to cpupower-frequency-info to its
>    manpage.
> - Adds description for perf-bias option to cpupower-info to its
>    manpage.
> - Removes unnecessary extern declarations from getopt.h in arguments
>    parsing functions in cpufreq-set, cpuidle-info, cpuidle-set,
>    cpupower-info, and cpupower-set utilities. These functions are
>    defined getopt.h file.
>
> diff is attached.
>
> thanks,
> -- Shuah
>
> ----------------------------------------------------------------
> The following changes since commit c369299895a591d96745d6492d4888259b004a9e:
>
>    Linux 7.0-rc5 (2026-03-22 14:42:17 -0700)
>
> are available in the Git repository at:
>
>    git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux tags/linux-cpupower-7.1-rc1
>
> for you to fetch changes up to 2fd3b83cacfb9160b896fb26117328eeb0598c54:
>
>    cpupower: remove extern declarations in cmd functions (2026-04-06 11:25:32 -0600)
>
> ----------------------------------------------------------------
> linux-cpupower-7.1-rc1
>
> - Fixes errors in cpupower-frequency-info short option names
>    to its manpage.
> - Fixes cpupower-idle-info perf option name to its manpage.
> - Adds boost and epp options to cpupower-frequency-info to its
>    manpage.
> - Adds description for perf-bias option to cpupower-info to its
>    manpage.
> - Removes unnecessary extern declarations from getopt.h in arguments
>    parsing functions in cpufreq-set, cpuidle-info, cpuidle-set,
>    cpupower-info, and cpupower-set utilities. These functions are
>    defined getopt.h file.
>
> ----------------------------------------------------------------
> Kaushlendra Kumar (1):
>        cpupower: remove extern declarations in cmd functions
>
> Roberto Ricci (4):
>        cpupower-idle-info.1: fix short option names
>        cpupower-frequency-info.1: use the proper name of the --perf option
>        cpupower-frequency-info.1: document --boost and --epp options
>        cpupower-info.1: describe the --perf-bias option
>
>   tools/power/cpupower/man/cpupower-frequency-info.1 | 8 +++++++-
>   tools/power/cpupower/man/cpupower-idle-info.1      | 4 ++--
>   tools/power/cpupower/man/cpupower-info.1           | 9 ++++++++-
>   tools/power/cpupower/utils/cpufreq-info.c          | 2 --
>   tools/power/cpupower/utils/cpufreq-set.c           | 2 --
>   tools/power/cpupower/utils/cpuidle-info.c          | 2 --
>   tools/power/cpupower/utils/cpuidle-set.c           | 2 --
>   tools/power/cpupower/utils/cpupower-info.c         | 2 --
>   tools/power/cpupower/utils/cpupower-set.c          | 2 --
>   9 files changed, 17 insertions(+), 16 deletions(-)
> ----------------------------------------------------------------

Pulled and added to linux-pm.git/linux-next, thanks!

^ permalink raw reply

* [PATCH v2] power: supply: max17042: fix OF node reference imbalance
From: Johan Hovold @ 2026-04-07 12:33 UTC (permalink / raw)
  To: Sebastian Reichel
  Cc: Hans de Goede, Krzysztof Kozlowski, Marek Szyprowski,
	Sebastian Krzyszkowiak, Purism Kernel Team, linux-pm,
	linux-kernel, Johan Hovold, stable, Dzmitry Sankouski

The driver reuses the OF node of the parent multi-function device but
fails to take another reference to balance the one dropped by the
platform bus code when unbinding the MFD and deregistering the child
devices.

Fix this by using the intended helper for reusing OF nodes.

Fixes: 0cd4f1f77ad4 ("power: supply: max17042: add platform driver variant")
Cc: stable@vger.kernel.org	# 6.14
Cc: Dzmitry Sankouski <dsankouski@gmail.com>
Signed-off-by: Johan Hovold <johan@kernel.org>
---

Changes in v2:
 - add missing driver name to patch summary prefix


 drivers/power/supply/max17042_battery.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/power/supply/max17042_battery.c b/drivers/power/supply/max17042_battery.c
index acea176101fa..914f18ce79b3 100644
--- a/drivers/power/supply/max17042_battery.c
+++ b/drivers/power/supply/max17042_battery.c
@@ -1165,7 +1165,8 @@ static int max17042_platform_probe(struct platform_device *pdev)
 	if (!i2c)
 		return -EINVAL;
 
-	dev->of_node = dev->parent->of_node;
+	device_set_of_node_from_dev(dev, dev->parent);
+
 	id = platform_get_device_id(pdev);
 	irq = platform_get_irq(pdev, 0);
 
-- 
2.52.0


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox