LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 3/3] powerpc/kexec: fix double get_cpu() imbalance in kexec_prepare_cpus
  2026-05-18  5:08 [PATCH 0/3] powerpc: fix preempt_count imbalances in perf and kexec paths Aboorva Devarajan
@ 2026-05-18  5:08 ` Aboorva Devarajan
  2026-05-18  6:02   ` Shrikanth Hegde
  0 siblings, 1 reply; 8+ messages in thread
From: Aboorva Devarajan @ 2026-05-18  5:08 UTC (permalink / raw)
  To: Madhavan Srinivasan, linuxppc-dev
  Cc: Athira Rajeev, Aboorva Devarajan, Christophe Leroy, linux-kernel,
	Sourabh Jain, Ritesh Harjani, Shrikanth Hegde

kexec_prepare_cpus_wait() calls get_cpu() internally to obtain the
current CPU id. kexec_prepare_cpus() calls kexec_prepare_cpus_wait()
twice -- once for KEXEC_STATE_IRQS_OFF and once for
KEXEC_STATE_REAL_MODE -- but only issues a single put_cpu() at the end,
leaving preempt_count elevated by one extra nesting level.

In practice the imbalance does not trigger a 'scheduling while atomic'
splat because the kexec path is a one-way trip: IRQs are already
disabled, no schedule() occurs after the leak, and
default_machine_kexec() overwrites preempt_count with HARDIRQ_OFFSET
before jumping into kexec_sequence() which never returns. However the
bookkeeping is still wrong.

Lift the get_cpu()/put_cpu() pair into kexec_prepare_cpus() so it is
called exactly once, and pass the CPU id to kexec_prepare_cpus_wait()
as a parameter. This keeps preempt_count correctly balanced.

Fixes: 1fc711f7ffb01 ("powerpc/kexec: Fix race in kexec shutdown")
Signed-off-by: Aboorva Devarajan <aboorvad@linux.ibm.com>
---
 arch/powerpc/kexec/core_64.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c
index 825ab8a88f18e..9d7e5a1e6e5b8 100644
--- a/arch/powerpc/kexec/core_64.c
+++ b/arch/powerpc/kexec/core_64.c
@@ -164,12 +164,11 @@ static void kexec_smp_down(void *arg)
 	/* NOTREACHED */
 }
 
-static void kexec_prepare_cpus_wait(int wait_state)
+static void kexec_prepare_cpus_wait(int wait_state, int my_cpu)
 {
-	int my_cpu, i, notified=-1;
+	int i, notified = -1;
 
 	hw_breakpoint_disable();
-	my_cpu = get_cpu();
 	/* Make sure each CPU has at least made it to the state we need.
 	 *
 	 * FIXME: There is a (slim) chance of a problem if not all of the CPUs
@@ -246,6 +245,8 @@ static void wake_offline_cpus(void)
 
 static void kexec_prepare_cpus(void)
 {
+	int my_cpu;
+
 	wake_offline_cpus();
 	smp_call_function(kexec_smp_down, NULL, /* wait */0);
 	local_irq_disable();
@@ -254,7 +255,8 @@ static void kexec_prepare_cpus(void)
 	mb(); /* make sure IRQs are disabled before we say they are */
 	get_paca()->kexec_state = KEXEC_STATE_IRQS_OFF;
 
-	kexec_prepare_cpus_wait(KEXEC_STATE_IRQS_OFF);
+	my_cpu = get_cpu();
+	kexec_prepare_cpus_wait(KEXEC_STATE_IRQS_OFF, my_cpu);
 	/* we are sure every CPU has IRQs off at this point */
 	kexec_all_irq_disabled = 1;
 
@@ -262,13 +264,12 @@ static void kexec_prepare_cpus(void)
 	 * Before removing MMU mappings make sure all CPUs have entered real
 	 * mode:
 	 */
-	kexec_prepare_cpus_wait(KEXEC_STATE_REAL_MODE);
+	kexec_prepare_cpus_wait(KEXEC_STATE_REAL_MODE, my_cpu);
+	put_cpu();
 
 	/* after we tell the others to go down */
 	if (ppc_md.kexec_cpu_down)
 		ppc_md.kexec_cpu_down(0, 0);
-
-	put_cpu();
 }
 
 #else /* ! SMP */
-- 
2.54.0



^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 3/3] powerpc/kexec: fix double get_cpu() imbalance in kexec_prepare_cpus
  2026-05-18  5:08 ` [PATCH 3/3] powerpc/kexec: fix double get_cpu() imbalance in kexec_prepare_cpus Aboorva Devarajan
@ 2026-05-18  6:02   ` Shrikanth Hegde
  2026-06-03  6:14     ` Aboorva Devarajan
  0 siblings, 1 reply; 8+ messages in thread
From: Shrikanth Hegde @ 2026-05-18  6:02 UTC (permalink / raw)
  To: Aboorva Devarajan, Madhavan Srinivasan, linuxppc-dev
  Cc: Athira Rajeev, Christophe Leroy, linux-kernel, Sourabh Jain,
	Ritesh Harjani

Hi Aboorva.

On 5/18/26 10:38 AM, Aboorva Devarajan wrote:
> kexec_prepare_cpus_wait() calls get_cpu() internally to obtain the
> current CPU id. kexec_prepare_cpus() calls kexec_prepare_cpus_wait()
> twice -- once for KEXEC_STATE_IRQS_OFF and once for
> KEXEC_STATE_REAL_MODE -- but only issues a single put_cpu() at the end,
> leaving preempt_count elevated by one extra nesting level.
> 
> In practice the imbalance does not trigger a 'scheduling while atomic'
> splat because the kexec path is a one-way trip: IRQs are already
> disabled, no schedule() occurs after the leak, and
> default_machine_kexec() overwrites preempt_count with HARDIRQ_OFFSET
> before jumping into kexec_sequence() which never returns. However the
> bookkeeping is still wrong.
> 
> Lift the get_cpu()/put_cpu() pair into kexec_prepare_cpus() so it is
> called exactly once, and pass the CPU id to kexec_prepare_cpus_wait()
> as a parameter. This keeps preempt_count correctly balanced.
> 
> Fixes: 1fc711f7ffb01 ("powerpc/kexec: Fix race in kexec shutdown")
> Signed-off-by: Aboorva Devarajan <aboorvad@linux.ibm.com>
> ---
>   arch/powerpc/kexec/core_64.c | 15 ++++++++-------
>   1 file changed, 8 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c
> index 825ab8a88f18e..9d7e5a1e6e5b8 100644
> --- a/arch/powerpc/kexec/core_64.c
> +++ b/arch/powerpc/kexec/core_64.c
> @@ -164,12 +164,11 @@ static void kexec_smp_down(void *arg)
>   	/* NOTREACHED */
>   }
>   
> -static void kexec_prepare_cpus_wait(int wait_state)
> +static void kexec_prepare_cpus_wait(int wait_state, int my_cpu)
>   {
> -	int my_cpu, i, notified=-1;
> +	int i, notified = -1;
>   
>   	hw_breakpoint_disable();
> -	my_cpu = get_cpu();
>   	/* Make sure each CPU has at least made it to the state we need.
>   	 *
>   	 * FIXME: There is a (slim) chance of a problem if not all of the CPUs
> @@ -246,6 +245,8 @@ static void wake_offline_cpus(void)
>   
>   static void kexec_prepare_cpus(void)
>   {
> +	int my_cpu;
> +
>   	wake_offline_cpus();
>   	smp_call_function(kexec_smp_down, NULL, /* wait */0);
>   	local_irq_disable();
> @@ -254,7 +255,8 @@ static void kexec_prepare_cpus(void)
>   	mb(); /* make sure IRQs are disabled before we say they are */
>   	get_paca()->kexec_state = KEXEC_STATE_IRQS_OFF;
>   
> -	kexec_prepare_cpus_wait(KEXEC_STATE_IRQS_OFF);
> +	my_cpu = get_cpu();

raw_smp_processor_id() is better here. All it needs is get current cpu?
caller does irq_disable above and that renders call for get_cpu un-necessary.


> +	kexec_prepare_cpus_wait(KEXEC_STATE_IRQS_OFF, my_cpu);
>   	/* we are sure every CPU has IRQs off at this point */
>   	kexec_all_irq_disabled = 1;
>   
> @@ -262,13 +264,12 @@ static void kexec_prepare_cpus(void)
>   	 * Before removing MMU mappings make sure all CPUs have entered real
>   	 * mode:
>   	 */
> -	kexec_prepare_cpus_wait(KEXEC_STATE_REAL_MODE);
> +	kexec_prepare_cpus_wait(KEXEC_STATE_REAL_MODE, my_cpu);
> +	put_cpu();
>   
>   	/* after we tell the others to go down */
>   	if (ppc_md.kexec_cpu_down)
>   		ppc_md.kexec_cpu_down(0, 0);
> -
> -	put_cpu();
>   }
>   
>   #else /* ! SMP */



^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 3/3] powerpc/kexec: fix double get_cpu() imbalance in kexec_prepare_cpus
  2026-05-18  6:02   ` Shrikanth Hegde
@ 2026-06-03  6:14     ` Aboorva Devarajan
  2026-06-03  6:16       ` Shrikanth Hegde
  0 siblings, 1 reply; 8+ messages in thread
From: Aboorva Devarajan @ 2026-06-03  6:14 UTC (permalink / raw)
  To: Shrikanth Hegde, Madhavan Srinivasan, linuxppc-dev
  Cc: Athira Rajeev, Christophe Leroy, linux-kernel, Sourabh Jain,
	Ritesh Harjani

Hi Shrikanth,

On Mon, 2026-05-18 at 11:32 +0530, Shrikanth Hegde wrote:
> Hi Aboorva.
> 
> On 5/18/26 10:38 AM, Aboorva Devarajan wrote:
> > kexec_prepare_cpus_wait() calls get_cpu() internally to obtain the
> > current CPU id. kexec_prepare_cpus() calls kexec_prepare_cpus_wait()
> > twice -- once for KEXEC_STATE_IRQS_OFF and once for
> > KEXEC_STATE_REAL_MODE -- but only issues a single put_cpu() at the end,
> > leaving preempt_count elevated by one extra nesting level.
> > 
> > In practice the imbalance does not trigger a 'scheduling while atomic'
> > splat because the kexec path is a one-way trip: IRQs are already
> > disabled, no schedule() occurs after the leak, and
> > default_machine_kexec() overwrites preempt_count with HARDIRQ_OFFSET
> > before jumping into kexec_sequence() which never returns. However the
> > bookkeeping is still wrong.
> > 
> > Lift the get_cpu()/put_cpu() pair into kexec_prepare_cpus() so it is
> > called exactly once, and pass the CPU id to kexec_prepare_cpus_wait()
> > as a parameter. This keeps preempt_count correctly balanced.
> > 
> > Fixes: 1fc711f7ffb01 ("powerpc/kexec: Fix race in kexec shutdown")
> > Signed-off-by: Aboorva Devarajan <aboorvad@linux.ibm.com>
> > ---
> >   arch/powerpc/kexec/core_64.c | 15 ++++++++-------
> >   1 file changed, 8 insertions(+), 7 deletions(-)
> > 
> > diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c
> > index 825ab8a88f18e..9d7e5a1e6e5b8 100644
> > --- a/arch/powerpc/kexec/core_64.c
> > +++ b/arch/powerpc/kexec/core_64.c
> > @@ -164,12 +164,11 @@ static void kexec_smp_down(void *arg)
> >   	/* NOTREACHED */
> >   }
> >   
> > -static void kexec_prepare_cpus_wait(int wait_state)
> > +static void kexec_prepare_cpus_wait(int wait_state, int my_cpu)
> >   {
> > -	int my_cpu, i, notified=-1;
> > +	int i, notified = -1;
> >   
> >   	hw_breakpoint_disable();
> > -	my_cpu = get_cpu();
> >   	/* Make sure each CPU has at least made it to the state we need.
> >   	 *
> >   	 * FIXME: There is a (slim) chance of a problem if not all of the CPUs
> > @@ -246,6 +245,8 @@ static void wake_offline_cpus(void)
> >   
> >   static void kexec_prepare_cpus(void)
> >   {
> > +	int my_cpu;
> > +
> >   	wake_offline_cpus();
> >   	smp_call_function(kexec_smp_down, NULL, /* wait */0);
> >   	local_irq_disable();
> > @@ -254,7 +255,8 @@ static void kexec_prepare_cpus(void)
> >   	mb(); /* make sure IRQs are disabled before we say they are */
> >   	get_paca()->kexec_state = KEXEC_STATE_IRQS_OFF;
> >   
> > -	kexec_prepare_cpus_wait(KEXEC_STATE_IRQS_OFF);
> > +	my_cpu = get_cpu();

> raw_smp_processor_id() is better here. All it needs is get current cpu?
> caller does irq_disable above and that renders call for get_cpu un-necessary.

Agreed, get_cpu() is not needed here. kexec_prepare_cpus() already does
local_irq_disable()/hard_irq_disable() before calling
kexec_prepare_cpus_wait(), so we only need the current cpu id.

I will go ahead with smp_processor_id() rather than
raw_smp_processor_id() to stay consistent with Patch 2 and to keep the
CONFIG_DEBUG_PREEMPT check.

> >   
> > @@ -262,13 +264,12 @@ static void kexec_prepare_cpus(void)
> >   	 * Before removing MMU mappings make sure all CPUs have entered real
> >   	 * mode:
> >   	 */
> > -	kexec_prepare_cpus_wait(KEXEC_STATE_REAL_MODE);
> > +	kexec_prepare_cpus_wait(KEXEC_STATE_REAL_MODE, my_cpu);
> > +	put_cpu();
> >   
> >   	/* after we tell the others to go down */
> >   	if (ppc_md.kexec_cpu_down)
> >   		ppc_md.kexec_cpu_down(0, 0);
> > -
> > -	put_cpu();
> >   }
> >   
> >   #else /* ! SMP */

Regards,
Aboorva


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 3/3] powerpc/kexec: fix double get_cpu() imbalance in kexec_prepare_cpus
  2026-06-03  6:14     ` Aboorva Devarajan
@ 2026-06-03  6:16       ` Shrikanth Hegde
  0 siblings, 0 replies; 8+ messages in thread
From: Shrikanth Hegde @ 2026-06-03  6:16 UTC (permalink / raw)
  To: Aboorva Devarajan, Madhavan Srinivasan, linuxppc-dev
  Cc: Athira Rajeev, Christophe Leroy, linux-kernel, Sourabh Jain,
	Ritesh Harjani



On 6/3/26 11:44 AM, Aboorva Devarajan wrote:
> Hi Shrikanth,
> 
> On Mon, 2026-05-18 at 11:32 +0530, Shrikanth Hegde wrote:
>> Hi Aboorva.
>>
>> On 5/18/26 10:38 AM, Aboorva Devarajan wrote:
>>> kexec_prepare_cpus_wait() calls get_cpu() internally to obtain the
>>> current CPU id. kexec_prepare_cpus() calls kexec_prepare_cpus_wait()
>>> twice -- once for KEXEC_STATE_IRQS_OFF and once for
>>> KEXEC_STATE_REAL_MODE -- but only issues a single put_cpu() at the end,
>>> leaving preempt_count elevated by one extra nesting level.
>>>
>>> In practice the imbalance does not trigger a 'scheduling while atomic'
>>> splat because the kexec path is a one-way trip: IRQs are already
>>> disabled, no schedule() occurs after the leak, and
>>> default_machine_kexec() overwrites preempt_count with HARDIRQ_OFFSET
>>> before jumping into kexec_sequence() which never returns. However the
>>> bookkeeping is still wrong.
>>>
>>> Lift the get_cpu()/put_cpu() pair into kexec_prepare_cpus() so it is
>>> called exactly once, and pass the CPU id to kexec_prepare_cpus_wait()
>>> as a parameter. This keeps preempt_count correctly balanced.
>>>
>>> Fixes: 1fc711f7ffb01 ("powerpc/kexec: Fix race in kexec shutdown")
>>> Signed-off-by: Aboorva Devarajan <aboorvad@linux.ibm.com>
>>> ---
>>>    arch/powerpc/kexec/core_64.c | 15 ++++++++-------
>>>    1 file changed, 8 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c
>>> index 825ab8a88f18e..9d7e5a1e6e5b8 100644
>>> --- a/arch/powerpc/kexec/core_64.c
>>> +++ b/arch/powerpc/kexec/core_64.c
>>> @@ -164,12 +164,11 @@ static void kexec_smp_down(void *arg)
>>>    	/* NOTREACHED */
>>>    }
>>>    
>>> -static void kexec_prepare_cpus_wait(int wait_state)
>>> +static void kexec_prepare_cpus_wait(int wait_state, int my_cpu)
>>>    {
>>> -	int my_cpu, i, notified=-1;
>>> +	int i, notified = -1;
>>>    
>>>    	hw_breakpoint_disable();
>>> -	my_cpu = get_cpu();
>>>    	/* Make sure each CPU has at least made it to the state we need.
>>>    	 *
>>>    	 * FIXME: There is a (slim) chance of a problem if not all of the CPUs
>>> @@ -246,6 +245,8 @@ static void wake_offline_cpus(void)
>>>    
>>>    static void kexec_prepare_cpus(void)
>>>    {
>>> +	int my_cpu;
>>> +
>>>    	wake_offline_cpus();
>>>    	smp_call_function(kexec_smp_down, NULL, /* wait */0);
>>>    	local_irq_disable();
>>> @@ -254,7 +255,8 @@ static void kexec_prepare_cpus(void)
>>>    	mb(); /* make sure IRQs are disabled before we say they are */
>>>    	get_paca()->kexec_state = KEXEC_STATE_IRQS_OFF;
>>>    
>>> -	kexec_prepare_cpus_wait(KEXEC_STATE_IRQS_OFF);
>>> +	my_cpu = get_cpu();
> 
>> raw_smp_processor_id() is better here. All it needs is get current cpu?
>> caller does irq_disable above and that renders call for get_cpu un-necessary.
> 
> Agreed, get_cpu() is not needed here. kexec_prepare_cpus() already does
> local_irq_disable()/hard_irq_disable() before calling
> kexec_prepare_cpus_wait(), so we only need the current cpu id.
> 
> I will go ahead with smp_processor_id() rather than
> raw_smp_processor_id() to stay consistent with Patch 2 and to keep the
> CONFIG_DEBUG_PREEMPT check.


If the irq's are disabled then use raw_smp_processor_id() in both the places.
For patch2, just put a comment saying irq's are disabled when its get there.

> 
>>>    
>>> @@ -262,13 +264,12 @@ static void kexec_prepare_cpus(void)
>>>    	 * Before removing MMU mappings make sure all CPUs have entered real
>>>    	 * mode:
>>>    	 */
>>> -	kexec_prepare_cpus_wait(KEXEC_STATE_REAL_MODE);
>>> +	kexec_prepare_cpus_wait(KEXEC_STATE_REAL_MODE, my_cpu);
>>> +	put_cpu();
>>>    
>>>    	/* after we tell the others to go down */
>>>    	if (ppc_md.kexec_cpu_down)
>>>    		ppc_md.kexec_cpu_down(0, 0);
>>> -
>>> -	put_cpu();
>>>    }
>>>    
>>>    #else /* ! SMP */
> 
> Regards,
> Aboorva



^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v2 0/3] powerpc: fix preempt_count imbalances in perf and kexec paths
@ 2026-06-03  6:27 Aboorva Devarajan
  2026-06-03  6:27 ` [PATCH 1/3] powerpc/perf: fix preempt count underflow in fsl_emb_pmu_del Aboorva Devarajan
                   ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: Aboorva Devarajan @ 2026-06-03  6:27 UTC (permalink / raw)
  To: Madhavan Srinivasan, linuxppc-dev
  Cc: Athira Rajeev, Aboorva Devarajan, Christophe Leroy, linux-kernel,
	Sourabh Jain, Ritesh Harjani, Shrikanth Hegde

Hi all,

This patch series fixes some minor preempt_count bookkeeping issues in
arch/powerpc/ found during a preemption leak audit prompted by the
lazy/full preemption model changes. These are get_cpu/put_cpu and
get_cpu_var/put_cpu_var pairing errors that leave preempt_count
incorrectly elevated or underflowed.

v1: https://lore.kernel.org/all/20260518050855.1147242-1-aboorvad@linux.ibm.com/

v1 -> v2:
 - Patch 1 (fsl_emb_pmu_del): no functional change; picked up
   Shrikanth's Reviewed-by.
 - Patch 2 (pnv_kexec_wait_secondaries_down): per Shrikanth's review,
   use smp_processor_id() instead of raw_smp_processor_id(). The kexec/
   crash teardown path runs with IRQs disabled so it is safe.
 - Patch 3 (kexec_prepare_cpus): per Shrikanth's review, dropped the
   approach of passing the cpu id through kexec_prepare_cpus_wait() as
   a parameter. kexec_prepare_cpus() already runs local_irq_disable()/
   hard_irq_disable() before the wait, so get_cpu()/put_cpu() is
   unnecessary: just read the cpu id in kexec_prepare_cpus_wait() and
   drop the trailing put_cpu(). This keeps the function signature
   unchanged. smp_processor_id() is used here too, consistent with patch 2.

Testing:
 - Patch 1: compile-tested only (ppc64e_defconfig + CONFIG_FSL_EMB_PERFMON);
   no e500/fsl_emb hardware to runtime-test so far.
 - Patches 2 and 3: kexec boot-tested on PowerNV (powernv_defconfig)
   and pseries (pseries_defconfig).

Please let me know your comments.

Thanks,
Aboorva

Aboorva Devarajan (3):
  powerpc/perf: fix preempt count underflow in fsl_emb_pmu_del
  powerpc/powernv: fix preempt count leak in
    pnv_kexec_wait_secondaries_down
  powerpc/kexec: fix double get_cpu() imbalance in kexec_prepare_cpus

 arch/powerpc/kexec/core_64.c           | 4 +---
 arch/powerpc/perf/core-fsl-emb.c       | 3 ++-
 arch/powerpc/platforms/powernv/setup.c | 2 +-
 3 files changed, 4 insertions(+), 5 deletions(-)

-- 
2.54.0



^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 1/3] powerpc/perf: fix preempt count underflow in fsl_emb_pmu_del
  2026-06-03  6:27 [PATCH v2 0/3] powerpc: fix preempt_count imbalances in perf and kexec paths Aboorva Devarajan
@ 2026-06-03  6:27 ` Aboorva Devarajan
  2026-06-03  6:27 ` [PATCH 2/3] powerpc/powernv: fix preempt count leak in pnv_kexec_wait_secondaries_down Aboorva Devarajan
  2026-06-03  6:27 ` [PATCH 3/3] powerpc/kexec: fix double get_cpu() imbalance in kexec_prepare_cpus Aboorva Devarajan
  2 siblings, 0 replies; 8+ messages in thread
From: Aboorva Devarajan @ 2026-06-03  6:27 UTC (permalink / raw)
  To: Madhavan Srinivasan, linuxppc-dev
  Cc: Athira Rajeev, Aboorva Devarajan, Christophe Leroy, linux-kernel,
	Sourabh Jain, Ritesh Harjani, Shrikanth Hegde

fsl_emb_pmu_del() unconditionally calls put_cpu_var(cpu_hw_events) at
the 'out:' label, but only calls the matching get_cpu_var() after the
'i < 0' early-return check. When event->hw.idx is negative the
function jumps to 'out:' without having taken get_cpu_var(), and the
trailing put_cpu_var() then issues an unmatched preempt_enable(),
underflowing preempt_count.

On a CONFIG_PREEMPT=y kernel preempt_count would underflow and
eventually present as a 'scheduling while atomic' BUG.

Move put_cpu_var() to pair with get_cpu_var() so the percpu access is
correctly bracketed and the 'out:' label only handles perf_pmu_enable.

Fixes: a11106544f33 ("powerpc/perf: e500 support")
Reviewed-by: Shrikanth Hegde <sshegde@linux.ibm.com>
Signed-off-by: Aboorva Devarajan <aboorvad@linux.ibm.com>
---
 arch/powerpc/perf/core-fsl-emb.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/perf/core-fsl-emb.c b/arch/powerpc/perf/core-fsl-emb.c
index 7120ab20cbfe..02b5dd74c187 100644
--- a/arch/powerpc/perf/core-fsl-emb.c
+++ b/arch/powerpc/perf/core-fsl-emb.c
@@ -366,9 +366,10 @@ static void fsl_emb_pmu_del(struct perf_event *event, int flags)
 
 	cpuhw->n_events--;
 
+	put_cpu_var(cpu_hw_events);
+
  out:
 	perf_pmu_enable(event->pmu);
-	put_cpu_var(cpu_hw_events);
 }
 
 static void fsl_emb_pmu_start(struct perf_event *event, int ef_flags)
-- 
2.54.0



^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/3] powerpc/powernv: fix preempt count leak in pnv_kexec_wait_secondaries_down
  2026-06-03  6:27 [PATCH v2 0/3] powerpc: fix preempt_count imbalances in perf and kexec paths Aboorva Devarajan
  2026-06-03  6:27 ` [PATCH 1/3] powerpc/perf: fix preempt count underflow in fsl_emb_pmu_del Aboorva Devarajan
@ 2026-06-03  6:27 ` Aboorva Devarajan
  2026-06-03  6:27 ` [PATCH 3/3] powerpc/kexec: fix double get_cpu() imbalance in kexec_prepare_cpus Aboorva Devarajan
  2 siblings, 0 replies; 8+ messages in thread
From: Aboorva Devarajan @ 2026-06-03  6:27 UTC (permalink / raw)
  To: Madhavan Srinivasan, linuxppc-dev
  Cc: Athira Rajeev, Aboorva Devarajan, Christophe Leroy, linux-kernel,
	Sourabh Jain, Ritesh Harjani, Shrikanth Hegde

pnv_kexec_wait_secondaries_down() calls get_cpu() to obtain the current
CPU id but never calls the matching put_cpu(), leaking one
preempt_disable() nesting level on every invocation.

In practice the imbalance does not trigger a visible splat because the
kexec teardown path is a one-way trip: IRQs are already disabled, no
schedule() occurs after the leak, and default_machine_kexec() overwrites
preempt_count with HARDIRQ_OFFSET before jumping into kexec_sequence()
which never returns. However the bookkeeping is still wrong.

The function only needs the current CPU id, and this path runs with the
CPU pinned and IRQs disabled, so the preempt_disable() side-effect of
get_cpu() is unnecessary. Replace it with smp_processor_id(), which
returns the CPU id without touching preempt_count.

Fixes: 298b34d7d578 ("powerpc/powernv: Fix kexec races going back to OPAL")
Signed-off-by: Aboorva Devarajan <aboorvad@linux.ibm.com>
---
 arch/powerpc/platforms/powernv/setup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 4dbb47ddbdcc..73193264cbe7 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -396,7 +396,7 @@ static void pnv_kexec_wait_secondaries_down(void)
 {
 	int my_cpu, i, notified = -1;
 
-	my_cpu = get_cpu();
+	my_cpu = smp_processor_id();
 
 	for_each_online_cpu(i) {
 		uint8_t status;
-- 
2.54.0



^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 3/3] powerpc/kexec: fix double get_cpu() imbalance in kexec_prepare_cpus
  2026-06-03  6:27 [PATCH v2 0/3] powerpc: fix preempt_count imbalances in perf and kexec paths Aboorva Devarajan
  2026-06-03  6:27 ` [PATCH 1/3] powerpc/perf: fix preempt count underflow in fsl_emb_pmu_del Aboorva Devarajan
  2026-06-03  6:27 ` [PATCH 2/3] powerpc/powernv: fix preempt count leak in pnv_kexec_wait_secondaries_down Aboorva Devarajan
@ 2026-06-03  6:27 ` Aboorva Devarajan
  2 siblings, 0 replies; 8+ messages in thread
From: Aboorva Devarajan @ 2026-06-03  6:27 UTC (permalink / raw)
  To: Madhavan Srinivasan, linuxppc-dev
  Cc: Athira Rajeev, Aboorva Devarajan, Christophe Leroy, linux-kernel,
	Sourabh Jain, Ritesh Harjani, Shrikanth Hegde

kexec_prepare_cpus_wait() calls get_cpu() internally to obtain the
current CPU id. kexec_prepare_cpus() calls kexec_prepare_cpus_wait()
twice -- once for KEXEC_STATE_IRQS_OFF and once for
KEXEC_STATE_REAL_MODE -- but only issues a single put_cpu() at the end,
leaving preempt_count elevated by one extra nesting level.

In practice the imbalance does not trigger a 'scheduling while atomic'
splat because the kexec path is a one-way trip: IRQs are already
disabled, no schedule() occurs after the leak, and
default_machine_kexec() overwrites preempt_count with HARDIRQ_OFFSET
before jumping into kexec_sequence() which never returns. However the
bookkeeping is still wrong.

kexec_prepare_cpus() calls local_irq_disable()/hard_irq_disable()
before invoking kexec_prepare_cpus_wait(), so the CPU is already pinned
and the get_cpu()/put_cpu() preempt_disable() bracketing is unnecessary.
Only the current CPU id is needed, so replace get_cpu() with
smp_processor_id() and drop the now-unneeded put_cpu().

Fixes: 1fc711f7ffb0 ("powerpc/kexec: Fix race in kexec shutdown")
Signed-off-by: Aboorva Devarajan <aboorvad@linux.ibm.com>
---
 arch/powerpc/kexec/core_64.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c
index 825ab8a88f18..d7bdc362d497 100644
--- a/arch/powerpc/kexec/core_64.c
+++ b/arch/powerpc/kexec/core_64.c
@@ -169,7 +169,7 @@ static void kexec_prepare_cpus_wait(int wait_state)
 	int my_cpu, i, notified=-1;
 
 	hw_breakpoint_disable();
-	my_cpu = get_cpu();
+	my_cpu = smp_processor_id();
 	/* Make sure each CPU has at least made it to the state we need.
 	 *
 	 * FIXME: There is a (slim) chance of a problem if not all of the CPUs
@@ -267,8 +267,6 @@ static void kexec_prepare_cpus(void)
 	/* after we tell the others to go down */
 	if (ppc_md.kexec_cpu_down)
 		ppc_md.kexec_cpu_down(0, 0);
-
-	put_cpu();
 }
 
 #else /* ! SMP */
-- 
2.54.0



^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2026-06-03  6:28 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-06-03  6:27 [PATCH v2 0/3] powerpc: fix preempt_count imbalances in perf and kexec paths Aboorva Devarajan
2026-06-03  6:27 ` [PATCH 1/3] powerpc/perf: fix preempt count underflow in fsl_emb_pmu_del Aboorva Devarajan
2026-06-03  6:27 ` [PATCH 2/3] powerpc/powernv: fix preempt count leak in pnv_kexec_wait_secondaries_down Aboorva Devarajan
2026-06-03  6:27 ` [PATCH 3/3] powerpc/kexec: fix double get_cpu() imbalance in kexec_prepare_cpus Aboorva Devarajan
  -- strict thread matches above, loose matches on Subject: below --
2026-05-18  5:08 [PATCH 0/3] powerpc: fix preempt_count imbalances in perf and kexec paths Aboorva Devarajan
2026-05-18  5:08 ` [PATCH 3/3] powerpc/kexec: fix double get_cpu() imbalance in kexec_prepare_cpus Aboorva Devarajan
2026-05-18  6:02   ` Shrikanth Hegde
2026-06-03  6:14     ` Aboorva Devarajan
2026-06-03  6:16       ` Shrikanth Hegde

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox