LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v5 1/8] powerpc: rename arch_irq_disabled_regs
From: Mukesh Kumar Chaurasiya (IBM) @ 2026-04-27 12:27 UTC (permalink / raw)
  To: maddy, mpe, npiggin, chleroy, ryabinin.a.a, glider, andreyknvl,
	dvyukov, vincenzo.frascino, oleg, kees, luto, wad, mchauras,
	sshegde, thuth, ruanjinjie, akpm, macro, ldv, charlie, deller,
	kevin.brodsky, ritesh.list, yeoreum.yun, agordeev, segher,
	mark.rutland, ryan.roberts, pmladek, feng.tang, peterz, kan.liang,
	linuxppc-dev, linux-kernel, kasan-dev
  Cc: Samir M, David Gow, Venkat Rao Bagalkote
In-Reply-To: <20260427122742.210074-1-mkchauras@gmail.com>

From: Mukesh Kumar Chaurasiya <mchauras@linux.ibm.com>

Rename arch_irq_disabled_regs() to regs_irqs_disabled() to align with the
naming used in the generic irqentry framework. This makes the function
available for use both in the PowerPC architecture code and in the
common entry/exit paths shared with other architectures.

This is a preparatory change for enabling the generic irqentry framework
on PowerPC.

Signed-off-by: Mukesh Kumar Chaurasiya <mchauras@linux.ibm.com>
Reviewed-by: Shrikanth Hegde <sshegde@linux.ibm.com>
Reviewed-by: Jinjie Ruan <ruanjinjie@huawei.com>
Tested-by: Samir M <samir@linux.ibm.com>
Tested-by: David Gow <davidgow@google.com>
Tested-by: Venkat Rao Bagalkote <venkat88@linux.ibm.com>
---
 arch/powerpc/include/asm/hw_irq.h    |  4 ++--
 arch/powerpc/include/asm/interrupt.h | 16 ++++++++--------
 arch/powerpc/kernel/interrupt.c      |  4 ++--
 arch/powerpc/kernel/syscall.c        |  2 +-
 arch/powerpc/kernel/traps.c          |  2 +-
 arch/powerpc/kernel/watchdog.c       |  2 +-
 arch/powerpc/perf/core-book3s.c      |  2 +-
 7 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index 9cd945f2acaf..b7eee6385ae5 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -393,7 +393,7 @@ static inline void do_hard_irq_enable(void)
 	__hard_irq_enable();
 }
 
-static inline bool arch_irq_disabled_regs(struct pt_regs *regs)
+static inline bool regs_irqs_disabled(struct pt_regs *regs)
 {
 	return (regs->softe & IRQS_DISABLED);
 }
@@ -466,7 +466,7 @@ static inline bool arch_irqs_disabled(void)
 
 #define hard_irq_disable()		arch_local_irq_disable()
 
-static inline bool arch_irq_disabled_regs(struct pt_regs *regs)
+static inline bool regs_irqs_disabled(struct pt_regs *regs)
 {
 	return !(regs->msr & MSR_EE);
 }
diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index eb0e4a20b818..0e2cddf8bd21 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -172,7 +172,7 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs)
 	/* Enable MSR[RI] early, to support kernel SLB and hash faults */
 #endif
 
-	if (!arch_irq_disabled_regs(regs))
+	if (!regs_irqs_disabled(regs))
 		trace_hardirqs_off();
 
 	if (user_mode(regs)) {
@@ -192,11 +192,11 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs)
 			CT_WARN_ON(ct_state() != CT_STATE_KERNEL &&
 				   ct_state() != CT_STATE_IDLE);
 		INT_SOFT_MASK_BUG_ON(regs, is_implicit_soft_masked(regs));
-		INT_SOFT_MASK_BUG_ON(regs, arch_irq_disabled_regs(regs) &&
-					   search_kernel_restart_table(regs->nip));
+		INT_SOFT_MASK_BUG_ON(regs, regs_irqs_disabled(regs) &&
+				     search_kernel_restart_table(regs->nip));
 	}
-	INT_SOFT_MASK_BUG_ON(regs, !arch_irq_disabled_regs(regs) &&
-				   !(regs->msr & MSR_EE));
+	INT_SOFT_MASK_BUG_ON(regs, !regs_irqs_disabled(regs) &&
+			     !(regs->msr & MSR_EE));
 
 	booke_restore_dbcr0();
 }
@@ -298,7 +298,7 @@ static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct inte
 		 * Adjust regs->softe to be soft-masked if it had not been
 		 * reconcied (e.g., interrupt entry with MSR[EE]=0 but softe
 		 * not yet set disabled), or if it was in an implicit soft
-		 * masked state. This makes arch_irq_disabled_regs(regs)
+		 * masked state. This makes regs_irqs_disabled(regs)
 		 * behave as expected.
 		 */
 		regs->softe = IRQS_ALL_DISABLED;
@@ -372,7 +372,7 @@ static inline void interrupt_nmi_exit_prepare(struct pt_regs *regs, struct inter
 
 #ifdef CONFIG_PPC64
 #ifdef CONFIG_PPC_BOOK3S
-	if (arch_irq_disabled_regs(regs)) {
+	if (regs_irqs_disabled(regs)) {
 		unsigned long rst = search_kernel_restart_table(regs->nip);
 		if (rst)
 			regs_set_return_ip(regs, rst);
@@ -661,7 +661,7 @@ void replay_soft_interrupts(void);
 
 static inline void interrupt_cond_local_irq_enable(struct pt_regs *regs)
 {
-	if (!arch_irq_disabled_regs(regs))
+	if (!regs_irqs_disabled(regs))
 		local_irq_enable();
 }
 
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index e63bfde13e03..666eadb589a5 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -347,7 +347,7 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs)
 	unsigned long ret;
 
 	BUG_ON(regs_is_unrecoverable(regs));
-	BUG_ON(arch_irq_disabled_regs(regs));
+	BUG_ON(regs_irqs_disabled(regs));
 	CT_WARN_ON(ct_state() == CT_STATE_USER);
 
 	/*
@@ -396,7 +396,7 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs)
 
 	local_irq_disable();
 
-	if (!arch_irq_disabled_regs(regs)) {
+	if (!regs_irqs_disabled(regs)) {
 		/* Returning to a kernel context with local irqs enabled. */
 		WARN_ON_ONCE(!(regs->msr & MSR_EE));
 again:
diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c
index b762677f8737..52d6e10eab22 100644
--- a/arch/powerpc/kernel/syscall.c
+++ b/arch/powerpc/kernel/syscall.c
@@ -32,7 +32,7 @@ notrace long system_call_exception(struct pt_regs *regs, unsigned long r0)
 
 	BUG_ON(regs_is_unrecoverable(regs));
 	BUG_ON(!user_mode(regs));
-	BUG_ON(arch_irq_disabled_regs(regs));
+	BUG_ON(regs_irqs_disabled(regs));
 
 #ifdef CONFIG_PPC_PKEY
 	if (mmu_has_feature(MMU_FTR_PKEY)) {
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index cb8e9357383e..629f2a2d4780 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -1956,7 +1956,7 @@ DEFINE_INTERRUPT_HANDLER_RAW(performance_monitor_exception)
 	 * prevent hash faults on user addresses when reading callchains (and
 	 * looks better from an irq tracing perspective).
 	 */
-	if (IS_ENABLED(CONFIG_PPC64) && unlikely(arch_irq_disabled_regs(regs)))
+	if (IS_ENABLED(CONFIG_PPC64) && unlikely(regs_irqs_disabled(regs)))
 		performance_monitor_exception_nmi(regs);
 	else
 		performance_monitor_exception_async(regs);
diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index 764001deb060..c40c69368476 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -376,7 +376,7 @@ DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt)
 	u64 tb;
 
 	/* should only arrive from kernel, with irqs disabled */
-	WARN_ON_ONCE(!arch_irq_disabled_regs(regs));
+	WARN_ON_ONCE(!regs_irqs_disabled(regs));
 
 	if (!cpumask_test_cpu(cpu, &wd_cpus_enabled))
 		return 0;
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 8b0081441f85..f7518b7e3055 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -2482,7 +2482,7 @@ static void __perf_event_interrupt(struct pt_regs *regs)
 	 * will trigger a PMI after waking up from idle. Since counter values are _not_
 	 * saved/restored in idle path, can lead to below "Can't find PMC" message.
 	 */
-	if (unlikely(!found) && !arch_irq_disabled_regs(regs))
+	if (unlikely(!found) && !regs_irqs_disabled(regs))
 		printk_ratelimited(KERN_WARNING "Can't find PMC that caused IRQ\n");
 
 	/*
-- 
2.53.0



^ permalink raw reply related

* [PATCH v5 0/8] Generic IRQ entry/exit support for powerpc
From: Mukesh Kumar Chaurasiya (IBM) @ 2026-04-27 12:27 UTC (permalink / raw)
  To: maddy, mpe, npiggin, chleroy, ryabinin.a.a, glider, andreyknvl,
	dvyukov, vincenzo.frascino, oleg, kees, luto, wad, mchauras,
	sshegde, thuth, ruanjinjie, akpm, macro, ldv, charlie, deller,
	kevin.brodsky, ritesh.list, yeoreum.yun, agordeev, segher,
	mark.rutland, ryan.roberts, pmladek, feng.tang, peterz, kan.liang,
	linuxppc-dev, linux-kernel, kasan-dev
  Cc: Mukesh Kumar Chaurasiya (IBM)

Adding support for the generic irq entry/exit handling for PowerPC. The
goal is to bring PowerPC in line with other architectures that already
use the common irq entry infrastructure, reducing duplicated code and
making it easier to share future changes in entry/exit paths.

This is slightly tested of ppc64le and ppc32.

The performance benchmarks are below:

perf bench syscall usec/op (-ve is improvement)

| Syscall | Base        | test        | change % |
| ------- | ----------- | ----------- | -------- |
| basic   | 0.093543    | 0.093023    | -0.56    |
| execve  | 446.557781  | 450.107172  | +0.79    |
| fork    | 1142.204391 | 1156.377214 | +1.24    |
| getpgid | 0.097666    | 0.092677    | -5.11    |

perf bench syscall ops/sec (+ve is improvement)

| Syscall | Base     | New      | change % |
| ------- | -------- | -------- | -------- |
| basic   | 10690548 | 10750140 | +0.56    |
| execve  | 2239     | 2221     | -0.80    |
| fork    | 875      | 864      | -1.26    |
| getpgid | 10239026 | 10790324 | +5.38    |


IPI latency benchmark (-ve is improvement)

| Metric         | Base (ns)     | New (ns)      | % Change |
| -------------- | ------------- | ------------- | -------- |
| Dry run        | 583136.56     | 584136.35     | 0.17%    |
| Self IPI       | 4167393.42    | 4149093.90    | -0.44%   |
| Normal IPI     | 61769347.82   | 61753728.39   | -0.03%   |
| Broadcast IPI  | 2235584825.02 | 2227521401.45 | -0.36%   |
| Broadcast lock | 2164964433.31 | 2125658641.76 | -1.82%   |


Thats very close to performance earlier with arch specific handling.

Tests done:
 - Build and boot on ppc64le pseries.
 - Build and boot on ppc64le powernv8 powernv9 powernv10.
 - Build and boot on ppc32.
 - Performance benchmark done with perf syscall basic on pseries.

Changelog:
V4 -> V5:
 - Rebased on latest mainline
V4: https://lore.kernel.org/all/20260123073916.956498-1-mkchauras@linux.ibm.com/

V3 -> V4:
 - Fixed the issue in older gcc version where linker couldn't find
   mem functions
 - Merged IRQ enable and syscall enable into a single patch
 - Cleanup for unused functions done in separate patch.
 - Some other cosmetic changes
V3: https://lore.kernel.org/all/20251229045416.3193779-1-mkchauras@linux.ibm.com/

V2 -> V3:
 - #ifdef CONFIG_GENERIC_IRQ_ENTRY removed from unnecessary places
 - Some functions made __always_inline
 - pt_regs padding changed to match 16byte interrupt stack alignment
 - And some cosmetic changes from reviews from earlier patch
V2: https://lore.kernel.org/all/20251214130245.43664-1-mkchauras@linux.ibm.com/

V1 -> V2:
 - Fix an issue where context tracking was showing warnings for
   incorrect context
V1: https://lore.kernel.org/all/20251102115358.1744304-1-mkchauras@linux.ibm.com/

RFC -> PATCH V1:
 - Fix for ppc32 spitting out kuap lock warnings.
 - ppc64le powernv8 crash fix.
 - Review comments incorporated from previous RFC.
RFC https://lore.kernel.org/all/20250908210235.137300-2-mchauras@linux.ibm.com/

Mukesh Kumar Chaurasiya (8):
  powerpc: rename arch_irq_disabled_regs
  powerpc: Prepare to build with generic entry/exit framework
  powerpc: introduce arch_enter_from_user_mode
  powerpc: Introduce syscall exit arch functions
  powerpc: add exit_flags field in pt_regs
  powerpc: Prepare for IRQ entry exit
  powerpc: Enable GENERIC_ENTRY feature
  powerpc: Remove unused functions

 arch/powerpc/Kconfig                    |   1 +
 arch/powerpc/include/asm/entry-common.h | 533 ++++++++++++++++++++++++
 arch/powerpc/include/asm/hw_irq.h       |   4 +-
 arch/powerpc/include/asm/interrupt.h    | 386 +++--------------
 arch/powerpc/include/asm/kasan.h        |  15 +-
 arch/powerpc/include/asm/ptrace.h       |   6 +-
 arch/powerpc/include/asm/signal.h       |   1 -
 arch/powerpc/include/asm/stacktrace.h   |   6 +
 arch/powerpc/include/asm/syscall.h      |   5 +
 arch/powerpc/include/asm/thread_info.h  |   1 +
 arch/powerpc/include/uapi/asm/ptrace.h  |  14 +-
 arch/powerpc/kernel/interrupt.c         | 254 ++---------
 arch/powerpc/kernel/ptrace/ptrace.c     | 142 +------
 arch/powerpc/kernel/signal.c            |  25 +-
 arch/powerpc/kernel/syscall.c           | 119 +-----
 arch/powerpc/kernel/traps.c             |   2 +-
 arch/powerpc/kernel/watchdog.c          |   2 +-
 arch/powerpc/perf/core-book3s.c         |   2 +-
 18 files changed, 690 insertions(+), 828 deletions(-)
 create mode 100644 arch/powerpc/include/asm/entry-common.h

-- 
2.53.0



^ permalink raw reply

* Re: [PATCH] powerpc/perf: Update check for PERF_SAMPLE_DATA_SRC marked events
From: Mukesh Kumar Chaurasiya @ 2026-04-27 12:24 UTC (permalink / raw)
  To: Shivani Nittor; +Cc: maddy, linuxppc-dev, atrajeev, hbathini, Tejas.Manhas1
In-Reply-To: <20260421150628.96500-1-shivani@linux.ibm.com>

On Tue, Apr 21, 2026 at 08:36:28PM +0530, Shivani Nittor wrote:
> The core-book3s PMU sampling code validates the SIER TYPE field
> when PERF_SAMPLE_DATA_SRC is requested. The SIER TYPE field
> indicates the instruction type and is only valid for
> random sampling (marked events). To handle cases observed where
> SIER TYPE could be zero even for marked events,validation was
> added to drop such samples and increment event->lost_samples.
> 
> However, this validation was applied to all samples,
> including continuous sampling. In continuous sampling mode,
> the PMU does not set the SIER TYPE field, so it remains zero.
> As a result, valid continuous samples were incorrectly
> treated as invalid and dropped. Fixed this by gating the
> SIER TYPE validation with mark_event, so the check runs only
> for marked (random) events. Continuous samples now skip this
> check and are recorded normally in the final data recording path.
> 
> Fixes: 2ffb26afa642 ("arch/powerpc/perf: Check the instruction type before creating sample with perf_mem_data_src")
> Signed-off-by: Shivani Nittor <shivani@linux.ibm.com>
> ---
> 
>  arch/powerpc/perf/core-book3s.c | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
> index 8b0081441f85..2e6adf5b95c4 100644
> --- a/arch/powerpc/perf/core-book3s.c
> +++ b/arch/powerpc/perf/core-book3s.c
> @@ -2242,6 +2242,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
>  	const u64 last_period = event->hw.last_period;
>  	s64 prev, delta, left;
>  	int record = 0;
> +	int mark_event = regs->dsisr & MMCRA_SAMPLE_ENABLE;
>  
>  	if (event->hw.state & PERF_HES_STOPPED) {
>  		write_pmc(event->hw.idx, 0);
> @@ -2304,9 +2305,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
>  	 * In ISA v3.0 and before values "0" and "7" are considered reserved.
>  	 * In ISA v3.1, value "7" has been used to indicate "larx/stcx".
>  	 * Drop the sample if "type" has reserved values for this field with a
> -	 * ISA version check.
> +	 * ISA version check for marked events.
>  	 */
> -	if (event->attr.sample_type & PERF_SAMPLE_DATA_SRC &&
> +	if (mark_event && event->attr.sample_type & PERF_SAMPLE_DATA_SRC &&
>  			ppmu->get_mem_data_src) {
>  		val = (regs->dar & SIER_TYPE_MASK) >> SIER_TYPE_SHIFT;
>  		if (val == 0 || (val == 7 && !cpu_has_feature(CPU_FTR_ARCH_31))) {
> -- 
> 2.47.3
> 
> 
LGTM

Reviewed-by: Mukesh Kumar Chaurasiya (IBM) <mkchauras@gmail.com>


^ permalink raw reply

* Re: [mainline][BUG] Observed Workqueue lockups on offline CPUs.
From: Samir M @ 2026-04-27 11:30 UTC (permalink / raw)
  To: Paul E . McKenney
  Cc: Boqun Feng, LKML, Tejun Heo, RCU, linuxppc-dev, Shrikanth Hegde
In-Reply-To: <97a7d011-d573-4754-9e5d-68b562c64089@linux.ibm.com>


On 27/04/26 3:32 pm, Samir M wrote:
> Hi Paul,
>
> I've been testing the latest upstream kernel on a PowerPC system and 
> encountered workqueue lockup issues that I've bisected to commit 
> 61bbcfb50514 ("srcu: Push srcu_node allocation to GP when 
> non-preemptible").
> After booting, I'm seeing workqueue lockup warnings for CPUs 81-96, 
> which are offline on my system. The workqueues remain stuck for over 
> 237 seconds:
>
> [  243.309302][    C0] BUG: workqueue lockup - pool cpus=81 node=0 
> flags=0x4 nice=0 stuck for 237s!
> [  243.309311][    C0] BUG: workqueue lockup - pool cpus=82 node=0 
> flags=0x4 nice=0 stuck for 237s!
> [  243.309318][    C0] BUG: workqueue lockup - pool cpus=83 node=0 
> flags=0x4 nice=0 stuck for 237s!
> [  243.309326][    C0] BUG: workqueue lockup - pool cpus=84 node=0 
> flags=0x4 nice=0 stuck for 237s!
> [  243.309333][    C0] BUG: workqueue lockup - pool cpus=85 node=0 
> flags=0x4 nice=0 stuck for 237s!
> [  243.309341][    C0] BUG: workqueue lockup - pool cpus=86 node=0 
> flags=0x4 nice=0 stuck for 237s!
> [  243.309348][    C0] BUG: workqueue lockup - pool cpus=87 node=0 
> flags=0x4 nice=0 stuck for 237s!
> [  243.309355][    C0] BUG: workqueue lockup - pool cpus=88 node=0 
> flags=0x4 nice=0 stuck for 237s!
> [  243.309363][    C0] BUG: workqueue lockup - pool cpus=89 node=0 
> flags=0x4 nice=0 stuck for 237s!
> [  243.309370][    C0] BUG: workqueue lockup - pool cpus=90 node=0 
> flags=0x4 nice=0 stuck for 237s!
> [  243.309377][    C0] BUG: workqueue lockup - pool cpus=91 node=0 
> flags=0x4 nice=0 stuck for 237s!
> [  243.309384][    C0] BUG: workqueue lockup - pool cpus=92 node=0 
> flags=0x4 nice=0 stuck for 237s!
> [  243.309392][    C0] BUG: workqueue lockup - pool cpus=93 node=0 
> flags=0x4 nice=0 stuck for 237s!
> [  243.309399][    C0] BUG: workqueue lockup - pool cpus=94 node=0 
> flags=0x4 nice=0 stuck for 237s!
> [  243.309406][    C0] BUG: workqueue lockup - pool cpus=95 node=0 
> flags=0x4 nice=0 stuck for 237s!
> [  243.309413][    C0] BUG: workqueue lockup - pool cpus=96 node=0 
> flags=0x4 nice=0 stuck for 237s!
>
> Git bisect identified this as the first bad commit:
>
> commit 61bbcfb50514a8a94e035a7349697a3790ab4783
> Author: Paul E. McKenney <paulmck@kernel.org>
> Date:   Fri Mar 20 20:29:20 2026 -0700
>
>     srcu: Push srcu_node allocation to GP when non-preemptible
>
>     When the srcutree.convert_to_big and srcutree.big_cpu_lim kernel boot
>     parameters specify initialization-time allocation of the srcu_node
>     tree for statically allocated srcu_struct structures (for example, in
>     DEFINE_SRCU() at build time instead of init_srcu_struct() at 
> runtime),
>     init_srcu_struct_nodes() will attempt to dynamically allocate this 
> tree
>     at the first run-time update-side use of this srcu_struct structure,
>     but while holding a raw spinlock. Because the memory allocator can
>     acquire non-raw spinlocks, this can result in lockdep splats.
>
>     This commit therefore uses the same SRCU_SIZE_ALLOC trick that is 
> used
>     when the first run-time update-side use of this srcu_struct structure
>     happens before srcu_init() is called. The actual allocation then 
> takes
>     place from workqueue context at the ends of upcoming SRCU grace 
> periods.
>
>     [boqun: Adjust the sha1 of the Fixes tag]
>
>     Fixes: 175b45ed343a ("srcu: Use raw spinlocks so call_srcu() can 
> be used under preempt_disable()")
>     Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
>     Signed-off-by: Boqun Feng <boqun@kernel.org>
>
>  kernel/rcu/srcutree.c | 7 +++++--
>  1 file changed, 5 insertions(+), 2 deletions(-)
>
> Reverting this commit resolves the issue.
>
> The problem appears to be that the workqueue is attempting to execute 
> on offline CPUs. The commit moves SRCU node allocation to workqueue 
> context to avoid lockdep issues with memory allocation under raw 
> spinlocks, which makes sense. However, it seems the workqueue 
> scheduling doesn't properly account for CPU online/offline state in 
> this code path.
>
> My test environment:
> - Architecture: PowerPC
> - Kernel version: Latest upstream (7.1-rc1)
> - CPUs 81-96 are offline at boot time
>
> I suspect the issue might be related to:
> 1. Workqueue not checking CPU online status before scheduling SRCU 
> allocation work
> 2. Missing CPU hotplug awareness in the new workqueue-based allocation 
> path
> 3. Possible race condition with CPU hotplug events
>
> Would it make sense to use queue_work_on() with explicit online CPU 
> selection, or add CPU hotplug handlers for this workqueue? I'm not 
> deeply familiar with the workqueue internals, so I might be missing 
> something.
> Please let me know if you need any additional details or if you'd like 
> me to test any patches.
>
> If you happen to fix the above issue, then please add below tag.
> Reported-by: Samir M <samir@linux.ibm.com>
>
>
> Thanks,
> Samir

Hi Paul,


I worked on fixing the issue and introduced the changes below. With 
these updates, I no longer observe any workqueue lockup messages for 
offline CPUs.
Could you please review the changes and share your feedback?

The commit 61bbcfb50514 ("srcu: Push srcu_node allocation to GP when
non-preemptible") introduced workqueue lockups on systems with offline
CPUs. The issue occurs because srcu_queue_delayed_work_on() calls
queue_work_on() with sdp->cpu, which may be offline, causing the
workqueue to spin indefinitely on that CPU.

This patch fixes the issue by checking if the target CPU is online
before queuing work on it. If the CPU is offline, we fall back to
using queue_work() which will schedule the work on any available
online CPU.

Fixes: 61bbcfb50514 ("srcu: Push srcu_node allocation to GP when 
non-preemptible")

Signed-off-by: Samir <samir@linux.ibm.com>
---
  kernel/rcu/srcutree.c | 7 ++++++-
  1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 0d01cd8c4b4a..55a90dd4a030 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -869,10 +869,15 @@ static void srcu_delay_timer(struct timer_list *t)
  static void srcu_queue_delayed_work_on(struct srcu_data *sdp,
  unsigned long delay)
  {
-       if (!delay) {
+       if (!delay && cpu_online(sdp->cpu)) {
                 queue_work_on(sdp->cpu, rcu_gp_wq, &sdp->work);
                 return;
+       } else if (!delay) {
+               /* CPU is offline, queue on any available CPU */
+               queue_work(rcu_gp_wq, &sdp->work);
+               return;
+       }

         timer_reduce(&sdp->delay_work, jiffies + delay);
  }
--


Thanks,
Samir


^ permalink raw reply related

* Re: [PATCH 3/5] ASoC: fsl-asoc-card: add channel and rate constraints for CS42888
From: Shengjiu Wang @ 2026-04-27 10:47 UTC (permalink / raw)
  To: Mark Brown
  Cc: Shengjiu Wang, Xiubo.Lee, festevam, nicoleotsuka, lgirdwood,
	perex, tiwai, linux-sound, linuxppc-dev, linux-kernel
In-Reply-To: <c925e9e6-934d-4b07-aaef-a7f737c687a2@sirena.org.uk>

On Sat, Apr 25, 2026 at 12:09 AM Mark Brown <broonie@kernel.org> wrote:
>
> On Fri, Apr 24, 2026 at 06:38:04PM +0800, Shengjiu Wang wrote:
>
> > The CS42888 codec has 4 I2S lanes with 2 channels per lane. Using odd
> > channel counts (3, 5, 7) causes data misalignment in the I2S frame,
> > resulting in incorrect channel mapping. Only mono and even channel
> > counts (1, 2, 4, 6, 8) work correctly.
>
> > +static int fsl_asoc_card_startup(struct snd_pcm_substream *substream)
> > +{
> > +     struct snd_soc_pcm_runtime *rtd = substream->private_data;
> > +     struct fsl_asoc_card_priv *priv = snd_soc_card_get_drvdata(rtd->card);
> > +     struct snd_pcm_runtime *runtime = substream->runtime;
> > +     static struct snd_pcm_hw_constraint_list constraint_rates;
> > +     static struct snd_pcm_hw_constraint_list constraint_channels;
>
> This makes the constraints global for all substreams, given that the
> Freescale SoCs tend to have multiple DAIs and have things like direct
> PDM inputs I'd expect it'd be relatively common to have disjoint
> constraints.

Thanks for the comments.  I will refine it.

Best regards
Shengjiu Wang


^ permalink raw reply

* Re: [PATCH v2 1/2] ASoC: dt-bindings: fsl-sai: Document RX/TX BCLK swap support
From: Mark Brown @ 2026-04-26 23:31 UTC (permalink / raw)
  To: linux-sound, Marek Vasut
  Cc: Conor Dooley, Fabio Estevam, Jaroslav Kysela, Krzysztof Kozlowski,
	Liam Girdwood, Nicolin Chen, Rob Herring, Shengjiu Wang,
	Takashi Iwai, Xiubo Li, devicetree, linux-kernel, linuxppc-dev
In-Reply-To: <20260404183547.46509-1-marex@nabladev.com>

On Sat, 04 Apr 2026 20:35:00 +0200, Marek Vasut wrote:
> ASoC: dt-bindings: fsl-sai: Document RX/TX BCLK swap support

Applied to

   https://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound.git for-7.2

Thanks!

[1/2] ASoC: dt-bindings: fsl-sai: Document RX/TX BCLK swap support
      https://git.kernel.org/broonie/sound/c/9897a9776681
[2/2] ASoC: fsl_sai: Add RX/TX BCLK swap support
      https://git.kernel.org/broonie/sound/c/dc06cf4268a4

All being well this means that it will be integrated into the linux-next
tree (usually sometime in the next 24 hours) and sent to Linus during
the next merge window (or sooner if it is a bug fix), however if
problems are discovered then the patch may be dropped or reverted.

You may get further e-mails resulting from automated or manual testing
and review of the tree, please engage with people reporting problems and
send followup patches addressing any issues that are reported if needed.

If any updates are required or you are submitting further changes they
should be sent as incremental updates against current git, existing
patches will not be replaced.

Please add any relevant lists and maintainers to the CCs when replying
to this mail.

Thanks,
Mark



^ permalink raw reply

* Re: [PATCH v7 4/6] mm/sparse-vmemmap: Fix DAX vmemmap accounting with optimization
From: David Hildenbrand (Arm) @ 2026-04-27 10:17 UTC (permalink / raw)
  To: Muchun Song, Andrew Morton, Muchun Song, Oscar Salvador,
	Michael Ellerman, Madhavan Srinivasan
  Cc: Lorenzo Stoakes, Liam R . Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, Nicholas Piggin,
	Christophe Leroy, aneesh.kumar, joao.m.martins, linux-mm,
	linuxppc-dev, linux-kernel, stable
In-Reply-To: <20260426092640.375967-5-songmuchun@bytedance.com>

On 4/26/26 11:26, Muchun Song wrote:
> When vmemmap optimization is enabled for DAX, the nr_memmap_pages
> counter in /proc/vmstat is incorrect. The current code always accounts
> for the full, non-optimized vmemmap size, but vmemmap optimization
> reduces the actual number of vmemmap pages by reusing tail pages. This
> causes the system to overcount vmemmap usage, leading to inaccurate
> page statistics in /proc/vmstat.
> 
> Fix this by introducing section_nr_vmemmap_pages(), which returns the exact
> vmemmap page count for a given pfn range based on whether optimization
> is in effect.
> 
> Fixes: 15995a352474 ("mm: report per-page metadata information")
> Cc: stable@vger.kernel.org
> Signed-off-by: Muchun Song <songmuchun@bytedance.com>
> Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
> Acked-by: Oscar Salvador <osalvador@suse.de>
> ---
> v6 -> v7:
> - Refine the alignment assertions in section_nr_vmemmap_pages().
> ---
>  mm/sparse-vmemmap.c | 34 ++++++++++++++++++++++++++++++----
>  1 file changed, 30 insertions(+), 4 deletions(-)
> 
> diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
> index 3340f6d30b01..01f448607bad 100644
> --- a/mm/sparse-vmemmap.c
> +++ b/mm/sparse-vmemmap.c
> @@ -652,6 +652,31 @@ void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
>  	}
>  }
>  
> +static int __meminit section_nr_vmemmap_pages(unsigned long pfn, unsigned long nr_pages,
> +		struct vmem_altmap *altmap, struct dev_pagemap *pgmap)
> +{
> +	const unsigned int order = pgmap ? pgmap->vmemmap_shift : 0;
> +	const unsigned long pages_per_compound = 1UL << order;
> +
> +	VM_WARN_ON_ONCE(!IS_ALIGNED(pfn | nr_pages, PAGES_PER_SUBSECTION));
> +
> +	if (!vmemmap_can_optimize(altmap, pgmap))
> +		return DIV_ROUND_UP(nr_pages * sizeof(struct page), PAGE_SIZE);
> +
> +	if (order < PFN_SECTION_SHIFT) {
> +		VM_WARN_ON_ONCE(!IS_ALIGNED(pfn | nr_pages, pages_per_compound));
> +		return VMEMMAP_RESERVE_NR * nr_pages / pages_per_compound;
> +	}
> +
> +	VM_WARN_ON_ONCE(!IS_ALIGNED(pfn | nr_pages, PAGES_PER_SECTION));
> +	VM_WARN_ON_ONCE(nr_pages > PAGES_PER_SECTION);

I would just have done that at the very top, as this check applies to all cases.

Acked-by: David Hildenbrand (Arm) <david@kernel.org>

-- 
Cheers,

David


^ permalink raw reply

* [mainline][BUG] Observed Workqueue lockups on offline CPUs.
From: Samir M @ 2026-04-27 10:02 UTC (permalink / raw)
  To: Paul E . McKenney
  Cc: Boqun Feng, LKML, Tejun Heo, RCU, linuxppc-dev, Shrikanth Hegde

Hi Paul,

I've been testing the latest upstream kernel on a PowerPC system and 
encountered workqueue lockup issues that I've bisected to commit 
61bbcfb50514 ("srcu: Push srcu_node allocation to GP when non-preemptible").
After booting, I'm seeing workqueue lockup warnings for CPUs 81-96, 
which are offline on my system. The workqueues remain stuck for over 237 
seconds:

[  243.309302][    C0] BUG: workqueue lockup - pool cpus=81 node=0 
flags=0x4 nice=0 stuck for 237s!
[  243.309311][    C0] BUG: workqueue lockup - pool cpus=82 node=0 
flags=0x4 nice=0 stuck for 237s!
[  243.309318][    C0] BUG: workqueue lockup - pool cpus=83 node=0 
flags=0x4 nice=0 stuck for 237s!
[  243.309326][    C0] BUG: workqueue lockup - pool cpus=84 node=0 
flags=0x4 nice=0 stuck for 237s!
[  243.309333][    C0] BUG: workqueue lockup - pool cpus=85 node=0 
flags=0x4 nice=0 stuck for 237s!
[  243.309341][    C0] BUG: workqueue lockup - pool cpus=86 node=0 
flags=0x4 nice=0 stuck for 237s!
[  243.309348][    C0] BUG: workqueue lockup - pool cpus=87 node=0 
flags=0x4 nice=0 stuck for 237s!
[  243.309355][    C0] BUG: workqueue lockup - pool cpus=88 node=0 
flags=0x4 nice=0 stuck for 237s!
[  243.309363][    C0] BUG: workqueue lockup - pool cpus=89 node=0 
flags=0x4 nice=0 stuck for 237s!
[  243.309370][    C0] BUG: workqueue lockup - pool cpus=90 node=0 
flags=0x4 nice=0 stuck for 237s!
[  243.309377][    C0] BUG: workqueue lockup - pool cpus=91 node=0 
flags=0x4 nice=0 stuck for 237s!
[  243.309384][    C0] BUG: workqueue lockup - pool cpus=92 node=0 
flags=0x4 nice=0 stuck for 237s!
[  243.309392][    C0] BUG: workqueue lockup - pool cpus=93 node=0 
flags=0x4 nice=0 stuck for 237s!
[  243.309399][    C0] BUG: workqueue lockup - pool cpus=94 node=0 
flags=0x4 nice=0 stuck for 237s!
[  243.309406][    C0] BUG: workqueue lockup - pool cpus=95 node=0 
flags=0x4 nice=0 stuck for 237s!
[  243.309413][    C0] BUG: workqueue lockup - pool cpus=96 node=0 
flags=0x4 nice=0 stuck for 237s!

Git bisect identified this as the first bad commit:

commit 61bbcfb50514a8a94e035a7349697a3790ab4783
Author: Paul E. McKenney <paulmck@kernel.org>
Date:   Fri Mar 20 20:29:20 2026 -0700

     srcu: Push srcu_node allocation to GP when non-preemptible

     When the srcutree.convert_to_big and srcutree.big_cpu_lim kernel boot
     parameters specify initialization-time allocation of the srcu_node
     tree for statically allocated srcu_struct structures (for example, in
     DEFINE_SRCU() at build time instead of init_srcu_struct() at runtime),
     init_srcu_struct_nodes() will attempt to dynamically allocate this tree
     at the first run-time update-side use of this srcu_struct structure,
     but while holding a raw spinlock. Because the memory allocator can
     acquire non-raw spinlocks, this can result in lockdep splats.

     This commit therefore uses the same SRCU_SIZE_ALLOC trick that is used
     when the first run-time update-side use of this srcu_struct structure
     happens before srcu_init() is called. The actual allocation then takes
     place from workqueue context at the ends of upcoming SRCU grace 
periods.

     [boqun: Adjust the sha1 of the Fixes tag]

     Fixes: 175b45ed343a ("srcu: Use raw spinlocks so call_srcu() can be 
used under preempt_disable()")
     Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
     Signed-off-by: Boqun Feng <boqun@kernel.org>

  kernel/rcu/srcutree.c | 7 +++++--
  1 file changed, 5 insertions(+), 2 deletions(-)

Reverting this commit resolves the issue.

The problem appears to be that the workqueue is attempting to execute on 
offline CPUs. The commit moves SRCU node allocation to workqueue context 
to avoid lockdep issues with memory allocation under raw spinlocks, 
which makes sense. However, it seems the workqueue scheduling doesn't 
properly account for CPU online/offline state in this code path.

My test environment:
- Architecture: PowerPC
- Kernel version: Latest upstream (7.1-rc1)
- CPUs 81-96 are offline at boot time

I suspect the issue might be related to:
1. Workqueue not checking CPU online status before scheduling SRCU 
allocation work
2. Missing CPU hotplug awareness in the new workqueue-based allocation path
3. Possible race condition with CPU hotplug events

Would it make sense to use queue_work_on() with explicit online CPU 
selection, or add CPU hotplug handlers for this workqueue? I'm not 
deeply familiar with the workqueue internals, so I might be missing 
something.
Please let me know if you need any additional details or if you'd like 
me to test any patches.

If you happen to fix the above issue, then please add below tag.
Reported-by: Samir M <samir@linux.ibm.com>


Thanks,
Samir


^ permalink raw reply

* Re: [PATCH v2 6/9] soc: imx8m: don't access of_root directly
From: Francesco Dolcini @ 2026-04-27  9:31 UTC (permalink / raw)
  To: Francesco Dolcini, peng.fan, Alexander Stein, Bartosz Golaszewski
  Cc: Rob Herring, Saravana Kannan, Greg Kroah-Hartman,
	Rafael J. Wysocki, Danilo Krummrich, Christophe Leroy (CS GROUP),
	Shawn Guo, Sascha Hauer, Pengutronix Kernel Team, Fabio Estevam,
	Geert Uytterhoeven, Magnus Damm, Chen-Yu Tsai, Jernej Skrabec,
	Samuel Holland, Frank Li, linux-arm-kernel, devicetree,
	linux-kernel, linuxppc-dev, imx, linux-renesas-soc, linux-sunxi,
	driver-core, regressions
In-Reply-To: <20260427064704.GA17710@francesco-nb>

+Peng

Hello all,

On Mon, Apr 27, 2026 at 08:47:04AM +0200, Francesco Dolcini wrote:
> On Tue, Mar 24, 2026 at 11:24:09AM +0100, Alexander Stein wrote:
> > Hi,
> > 
> > Am Montag, 23. Februar 2026, 14:37:21 CET schrieb Bartosz Golaszewski:
> > > Don't access of_root directly as it reduces the build test coverage for
> > > this driver with COMPILE_TEST=y and OF=n. Use existing helper functions
> > > to retrieve the relevant information.
> > > 
> > > Suggested-by: Rob Herring <robh@kernel.org>
> > > Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
> > 
> > today I noticed the following warning running next-20260323:
> > > caam 30900000.crypto: No clock data provided for i.MX SoC
> > 
> > This happens when there is no matching against the soc_id.
> > 
> > Checking the source it turns out this patch is the cause that the SoC info
> > does not provide soc_id anymore.
> > next-20260323:
> > > $ grep . /sys/devices/soc0/*
> > > /sys/devices/soc0/family:Freescale i.MX
> > > /sys/devices/soc0/machine:TQ-Systems i.MX8MPlus TQMa8MPxL on MBa8MP-RAS314
> > > grep: /sys/devices/soc0/power: Is a directory
> > > /sys/devices/soc0/revision:unknown
> > > /sys/devices/soc0/serial_number:0000000000000000
> > > grep: /sys/devices/soc0/subsystem: Is a directory
> > 
> > reverting this patch (2524b293a59e586afd06358d0b191ab57208a920):
> > > $ grep . /sys/devices/soc0/*
> > > /sys/devices/soc0/family:Freescale i.MX
> > > /sys/devices/soc0/machine:TQ-Systems i.MX8MPlus TQMa8MPxL on MBa8MP-RAS314
> > > grep: /sys/devices/soc0/power: Is a directory
> > > /sys/devices/soc0/revision:1.1
> > > /sys/devices/soc0/serial_number:469677A693A4B8CE131D180033E44903
> > > /sys/devices/soc0/soc_id:i.MX8MP
> > > grep: /sys/devices/soc0/subsystem: Is a directory
> > 
> > soc_id is restored. Now that I write these lines I noticed that
> > serial_number also contained empty value which is restored with the revert.
> 
> Any update on this? I would say this is a regression in 7.1-rc1.
> 
> I noticed the same issue, and CAAM is not working.
> 
> [    0.000000] Linux version 7.1.0-rc1-0.0.0-devel (oe-user@oe-host) (aarch64-tdx-linux-gcc (GCC) 15.2.0, GNU ld (GNU Binutils) 2.46) #1 SMP PREEMPT Sun Apr 26 21:19:00 UTC 2026
> ...
> [   10.611139] caam 30900000.crypto: No clock data provided for i.MX SoC
> [   10.611211] caam 30900000.crypto: probe with driver caam failed with error -22

I guess this is the fix

https://lore.kernel.org/all/20260427-soc-imx8m-fix-v1-1-1fe5b43d8090@nxp.com/



^ permalink raw reply

* Re: [PATCH v2 6/9] soc: imx8m: don't access of_root directly
From: Francesco Dolcini @ 2026-04-27  6:47 UTC (permalink / raw)
  To: Alexander Stein, Bartosz Golaszewski
  Cc: Rob Herring, Saravana Kannan, Greg Kroah-Hartman,
	Rafael J. Wysocki, Danilo Krummrich, Christophe Leroy (CS GROUP),
	Shawn Guo, Sascha Hauer, Pengutronix Kernel Team, Fabio Estevam,
	Geert Uytterhoeven, Magnus Damm, Chen-Yu Tsai, Jernej Skrabec,
	Samuel Holland, Frank Li, linux-arm-kernel, devicetree,
	linux-kernel, linuxppc-dev, imx, linux-renesas-soc, linux-sunxi,
	driver-core, regressions
In-Reply-To: <6593091.DvuYhMxLoT@steina-w>

Hello Alexander, Bartosz

On Tue, Mar 24, 2026 at 11:24:09AM +0100, Alexander Stein wrote:
> Hi,
> 
> Am Montag, 23. Februar 2026, 14:37:21 CET schrieb Bartosz Golaszewski:
> > Don't access of_root directly as it reduces the build test coverage for
> > this driver with COMPILE_TEST=y and OF=n. Use existing helper functions
> > to retrieve the relevant information.
> > 
> > Suggested-by: Rob Herring <robh@kernel.org>
> > Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>
> 
> today I noticed the following warning running next-20260323:
> > caam 30900000.crypto: No clock data provided for i.MX SoC
> 
> This happens when there is no matching against the soc_id.
> 
> Checking the source it turns out this patch is the cause that the SoC info
> does not provide soc_id anymore.
> next-20260323:
> > $ grep . /sys/devices/soc0/*
> > /sys/devices/soc0/family:Freescale i.MX
> > /sys/devices/soc0/machine:TQ-Systems i.MX8MPlus TQMa8MPxL on MBa8MP-RAS314
> > grep: /sys/devices/soc0/power: Is a directory
> > /sys/devices/soc0/revision:unknown
> > /sys/devices/soc0/serial_number:0000000000000000
> > grep: /sys/devices/soc0/subsystem: Is a directory
> 
> reverting this patch (2524b293a59e586afd06358d0b191ab57208a920):
> > $ grep . /sys/devices/soc0/*
> > /sys/devices/soc0/family:Freescale i.MX
> > /sys/devices/soc0/machine:TQ-Systems i.MX8MPlus TQMa8MPxL on MBa8MP-RAS314
> > grep: /sys/devices/soc0/power: Is a directory
> > /sys/devices/soc0/revision:1.1
> > /sys/devices/soc0/serial_number:469677A693A4B8CE131D180033E44903
> > /sys/devices/soc0/soc_id:i.MX8MP
> > grep: /sys/devices/soc0/subsystem: Is a directory
> 
> soc_id is restored. Now that I write these lines I noticed that
> serial_number also contained empty value which is restored with the revert.

Any update on this? I would say this is a regression in 7.1-rc1.

I noticed the same issue, and CAAM is not working.

[    0.000000] Linux version 7.1.0-rc1-0.0.0-devel (oe-user@oe-host) (aarch64-tdx-linux-gcc (GCC) 15.2.0, GNU ld (GNU Binutils) 2.46) #1 SMP PREEMPT Sun Apr 26 21:19:00 UTC 2026
...
[   10.611139] caam 30900000.crypto: No clock data provided for i.MX SoC
[   10.611211] caam 30900000.crypto: probe with driver caam failed with error -22

Francesco



^ permalink raw reply

* Re: [PATCH v2 0/3] KVM: Fix and clean up kvm_vcpu_map[_readonly]() usages
From: Peter Fang @ 2026-04-27  8:05 UTC (permalink / raw)
  To: David Woodhouse
  Cc: Paolo Bonzini, Sean Christopherson, Madhavan Srinivasan,
	Nicholas Piggin, Griffoul, Fred, Yosry Ahmed, Ritesh Harjani,
	Michael Ellerman, Christophe Leroy (CS GROUP), Thomas Gleixner,
	Ingo Molnar, Borislav Petkov, Dave Hansen, x86, H. Peter Anvin,
	kvm, linuxppc-dev, linux-kernel
In-Reply-To: <c2fad21764d2561469151f5dd5d35833ae8f76e6.camel@infradead.org>

On Fri, Apr 24, 2026 at 11:27:03AM +0100, David Woodhouse wrote:
> 
> Fred is already removing all the usage of kvm_vcpu_map() in nested VMX¹
> and nested SVM probably wants the same treatment. And the PowerPC one
> looks like it could just as easily operate on the userspace address? 
> 
> Could we just kill kvm_vcpu_map() completely?

Thanks David!

I think I'd need at least input from the maintainers on this but just by
code inspection, the kvm_vcpu_map() usage in sev.c seems a bit tricky.
Unmapping doesn't happen until right before switching to the guest, so
this might fall into the "keep the mapping around for a longer time"
category [1].

[1] https://lore.kernel.org/kvm/20211115165030.7422-8-dwmw2@infradead.org/

> 
> 
> 
> ¹ https://lore.kernel.org/kvm/20260102142429.896101-1-griffoul@gmail.com/




^ permalink raw reply

* Re: [PATCH] tools/perf/sched: Update process names of processes in zombie state for both -s and -S options
From: Venkat @ 2026-04-27  6:43 UTC (permalink / raw)
  To: Athira Rajeev
  Cc: acme, jolsa, adrian.hunter, mpetlan, tmricht, maddy, irogers,
	namhyung, linux-perf-users, linuxppc-dev, hbathini, Tejas.Manhas1,
	Tanushree.Shah, Shivani.Nittor
In-Reply-To: <20260426093930.47809-1-atrajeev@linux.ibm.com>



> On 26 Apr 2026, at 3:09 PM, Athira Rajeev <atrajeev@linux.ibm.com> wrote:
> 
> In redhat perftool testsuite, observed fail for this test:
>    -- [ FAIL ] -- perf_sched :: test_timehist :: --with-summary (output regexp parsing)
> 
> This led to analysis of "perf sched timehist" summary options.
> 
>  # perf sched record -a -o ./perf.data -- sleep 0.1
>   This will record using perf sched record
> 
> perf sched timeliest has two options "-s" and "-S"
>  # perf sched -i ./perf.data timehist -S
> -S : Captures summary also at the end
> 
>  # perf sched -i ./perf.data timehist -s
> -s : Captures only summary
> 
> The test saves -s result which has only summary and compares with
> summary which comes at the end from -S . Since there is a difference
> in these two, test fails.
> 
> Checking the behaviour change in -S and -s results, difference is:
> 
>                  rcu_sched[16]       2          4        0.013      0.001       0.003       0.006   33.23       0
>               migration/11[73]       2          1        0.006      0.006       0.006       0.006    0.00       0
>                migration/3[33]       2          1        0.006      0.006       0.006       0.006    0.00       0
> -               :216753[216753]      -1          1        0.041      0.041       0.041       0.041    0.00       0
> +                 sleep[216753]      -1          1        0.041      0.041       0.041       0.041    0.00       0
>                migration/8[58]       2          1        0.005      0.005       0.005       0.005    0.00       0
>            NetworkManager[811]       1          2        0.089      0.028       0.044       0.060   36.06       0
>               migration/13[83]       2          1        0.005      0.005       0.005       0.005    0.00       0
> 
> Here 216753 is pid for sleep which is a zombie process. This is
> happening in latest kernel due to an update in "-S" result.
> In -S, the process name appears in the results "sleep[216753]",
> where as in the -s, only pid is present in the summary result
> ":216753[216753]".
> 
> After commit 39f473f6d0b2 ("perf sched timehist: decode process names
> of processes in zombie state")
> for -S option, if process name is using pid, it uses different way to
> set it. So that we get the process name and not just Pid.
> 
> This change went in only for timehist_print_sample() function.
> Add this improvement in generic place so that even -s option (which
> captures summary) also will have meaningful information.
> 
> Signed-off-by: Athira Rajeev <atrajeev@linux.ibm.com>
> ---

Tested this patch, by applying on top of mainline kernel.

WithOut Fix:

# ./perf sched -i perf.data timehist -s
Samples of sched_switch event do not have callchains.

Runtime summary
                          comm  parent   sched-in     run-time    min-run     avg-run     max-run  stddev  migrations
                                          (count)       (msec)     (msec)      (msec)      (msec)       %
---------------------------------------------------------------------------------------------------------------------
              migration/14[88]       2          1        0.006      0.006       0.006       0.006    0.00       0
               migration/6[48]       2          1        0.008      0.008       0.008       0.008    0.00       0
               rcu_preempt[16]       2          7        0.042      0.003       0.006       0.014   23.80       0
              migration/11[73]       2          1        0.010      0.010       0.010       0.010    0.00       0
               migration/3[33]       2          1        0.009      0.009       0.009       0.009    0.00       0
          sshd-session[384033]   384025          4        0.104      0.023       0.026       0.032    8.14       0
       kworker/u68:3-e[316378]       2          1        0.003      0.003       0.003       0.003    0.00       0
               kcompactd3[114]       2          1        0.003      0.003       0.003       0.003    0.00       0
               migration/8[58]       2          1        0.012      0.012       0.012       0.012    0.00       0
              migration/13[83]       2          1        0.008      0.008       0.008       0.008    0.00       0
               migration/5[43]       2          1        0.007      0.007       0.007       0.007    0.00       0
               migration/0[19]       2          1        0.010      0.010       0.010       0.010    0.00       0
              migration/10[68]       2          1        0.007      0.007       0.007       0.007    0.00       0
               ksoftirqd/5[44]       2          1        0.184      0.184       0.184       0.184    0.00       0
               migration/2[28]       2          1        0.008      0.008       0.008       0.008    0.00       0
       kworker/4:0-mm_[383676]       2          1        0.013      0.013       0.013       0.013    0.00       0
                  perf[390116]   384034         17        0.065      0.000       0.003       0.065   100.00       0
             xfsaild/dm-2[861]       2          1        0.008      0.008       0.008       0.008    0.00       0
              migration/15[93]       2          1        0.009      0.009       0.009       0.009    0.00       0
               :390117[390117]      -1          1        0.124      0.124       0.124       0.124    0.00       0
       kworker/9:4-mm_[390101]       2          1        0.069      0.069       0.069       0.069    0.00       0
               migration/7[53]       2          1        0.008      0.008       0.008       0.008    0.00       0
       kworker/5:0-xfs[390077]       2          1        0.015      0.015       0.015       0.015    0.00       0
              migration/12[78]       2          1        0.007      0.007       0.007       0.007    0.00       0
               migration/4[38]       2          1        0.005      0.005       0.005       0.005    0.00       0
           NetworkManager[903]       1          4        0.275      0.032       0.068       0.105   29.66       0
               migration/1[23]       2          1        0.010      0.010       0.010       0.010    0.00       0
               migration/9[63]       2          1        0.011      0.011       0.011       0.011    0.00       0



With FIX:

# ./perf sched -i perf.data timehist -s
Samples of sched_switch event do not have callchains.

Runtime summary
                          comm  parent   sched-in     run-time    min-run     avg-run     max-run  stddev  migrations
                                          (count)       (msec)     (msec)      (msec)      (msec)       %
---------------------------------------------------------------------------------------------------------------------
              migration/14[88]       2          1        0.006      0.006       0.006       0.006    0.00       0
               migration/6[48]       2          1        0.008      0.008       0.008       0.008    0.00       0
               rcu_preempt[16]       2          7        0.042      0.003       0.006       0.014   23.80       0
              migration/11[73]       2          1        0.010      0.010       0.010       0.010    0.00       0
               migration/3[33]       2          1        0.009      0.009       0.009       0.009    0.00       0
          sshd-session[384033]   384025          4        0.104      0.023       0.026       0.032    8.14       0
       kworker/u68:3-e[316378]       2          1        0.003      0.003       0.003       0.003    0.00       0
               kcompactd3[114]       2          1        0.003      0.003       0.003       0.003    0.00       0
               migration/8[58]       2          1        0.012      0.012       0.012       0.012    0.00       0
              migration/13[83]       2          1        0.008      0.008       0.008       0.008    0.00       0
               migration/5[43]       2          1        0.007      0.007       0.007       0.007    0.00       0
               migration/0[19]       2          1        0.010      0.010       0.010       0.010    0.00       0
              migration/10[68]       2          1        0.007      0.007       0.007       0.007    0.00       0
               ksoftirqd/5[44]       2          1        0.184      0.184       0.184       0.184    0.00       0
               migration/2[28]       2          1        0.008      0.008       0.008       0.008    0.00       0
       kworker/4:0-mm_[383676]       2          1        0.013      0.013       0.013       0.013    0.00       0
                  perf[390116]   384034         17        0.065      0.000       0.003       0.065   100.00       0
             xfsaild/dm-2[861]       2          1        0.008      0.008       0.008       0.008    0.00       0
              migration/15[93]       2          1        0.009      0.009       0.009       0.009    0.00       0
                 sleep[390117]      -1          1        0.124      0.124       0.124       0.124    0.00       0
       kworker/9:4-mm_[390101]       2          1        0.069      0.069       0.069       0.069    0.00       0
               migration/7[53]       2          1        0.008      0.008       0.008       0.008    0.00       0
       kworker/5:0-xfs[390077]       2          1        0.015      0.015       0.015       0.015    0.00       0
              migration/12[78]       2          1        0.007      0.007       0.007       0.007    0.00       0
               migration/4[38]       2          1        0.005      0.005       0.005       0.005    0.00       0
           NetworkManager[903]       1          4        0.275      0.032       0.068       0.105   29.66       0
               migration/1[23]       2          1        0.010      0.010       0.010       0.010    0.00       0
               migration/9[63]       2          1        0.011      0.011       0.011       0.011    0.00       0

Please add below tag.

Tested-by: Venkat Rao Bagalkote <venkat88@linux.ibm.com>

Regards,
Venkat.
> tools/perf/builtin-sched.c | 14 +++++++++-----
> 1 file changed, 9 insertions(+), 5 deletions(-)
> 
> diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
> index 555247568e7a..ee1b89a6af50 100644
> --- a/tools/perf/builtin-sched.c
> +++ b/tools/perf/builtin-sched.c
> @@ -2197,11 +2197,6 @@ static void timehist_print_sample(struct perf_sched *sched,
> printf(" ");
> }
> 
> - if (!thread__comm_set(thread)) {
> -                const char *prev_comm = evsel__strval(evsel, sample, "prev_comm");
> -                thread__set_comm(thread, prev_comm, sample->time);
> -        }
> -
> printf(" %-*s ", comm_width, timehist_get_commstr(thread));
> 
> if (sched->show_prio)
> @@ -2890,6 +2885,15 @@ static int timehist_sched_change_event(const struct perf_tool *tool,
> itr->last_thread = NULL;
> }
> 
> + /*
> + * If the process name is not set for the thread, use "prev_comm"
> + * to set it. Otherwise the sched summary will have just pid information
> + */
> + if (!thread__comm_set(thread)) {
> + const char *prev_comm = evsel__strval(evsel, sample, "prev_comm");
> + thread__set_comm(thread, prev_comm, sample->time);
> + }
> +
> if (!sched->summary_only)
> timehist_print_sample(sched, evsel, sample, &al, thread, t, state);
> }
> -- 
> 2.47.3
> 



^ permalink raw reply

* Re: [PATCH V3] tools/perf: Fix the check for parameterized field in event term
From: Namhyung Kim @ 2026-04-27  6:01 UTC (permalink / raw)
  To: Athira Rajeev
  Cc: acme, jolsa, adrian.hunter, mpetlan, tmricht, maddy, irogers,
	linux-perf-users, linuxppc-dev, hbathini, Tejas.Manhas1,
	Tanushree.Shah, Shivani.Nittor
In-Reply-To: <20260426080056.16558-1-atrajeev@linux.ibm.com>

On Sun, Apr 26, 2026 at 01:30:56PM +0530, Athira Rajeev wrote:
> The format_alias() function in util/pmu.c has a check to
> detect whether the event has parameterized field ( =? ).
> The string alias->terms contains the event and if the event
> has user configurable parameter, there will be presence of
> sub string "=?" in the alias->terms.
> 
> Snippet of code:
> 
>  /* Paramemterized events have the parameters shown. */
>        if (strstr(alias->terms, "=?")) {
>                /* No parameters. */
>                snprintf(buf, len, "%.*s/%s/", (int)pmu_name_len, pmu->name, alias->name);
> 
> if "strstr" contains the substring, it returns a pointer
> and hence enters the above check which is not the expected
> check. And hence "perf list" doesn't have the parameterized
> fields in the result.
> 
> Fix this check to use:
> 
> if (!strstr(alias->terms, "=?")) {
> 
> With this change, perf list shows the events correctly with
> the strings showing parameters.
> 
> Other changes in this patch:
> - Replace snprintf with scnprintf in buffer offset calculations to
> ensure the 'used' count will not exceed the "len"

Please split this change into a separate commit.  Ideally each commit
would do one thing at a time.

Thanks,
Namhyung


^ permalink raw reply

* Re: [PATCH] tools/perf/sched: Update process names of processes in zombie state for both -s and -S options
From: Namhyung Kim @ 2026-04-27  5:56 UTC (permalink / raw)
  To: Athira Rajeev
  Cc: acme, jolsa, adrian.hunter, mpetlan, tmricht, maddy, irogers,
	linux-perf-users, linuxppc-dev, hbathini, Tejas.Manhas1,
	Tanushree.Shah, Shivani.Nittor
In-Reply-To: <20260426093930.47809-1-atrajeev@linux.ibm.com>

On Sun, Apr 26, 2026 at 03:09:30PM +0530, Athira Rajeev wrote:
> In redhat perftool testsuite, observed fail for this test:
>     -- [ FAIL ] -- perf_sched :: test_timehist :: --with-summary (output regexp parsing)
> 
> This led to analysis of "perf sched timehist" summary options.
> 
>   # perf sched record -a -o ./perf.data -- sleep 0.1
>    This will record using perf sched record
> 
> perf sched timeliest has two options "-s" and "-S"
>   # perf sched -i ./perf.data timehist -S
>  -S : Captures summary also at the end
> 
>   # perf sched -i ./perf.data timehist -s
>  -s : Captures only summary
> 
> The test saves -s result which has only summary and compares with
> summary which comes at the end from -S . Since there is a difference
> in these two, test fails.
> 
> Checking the behaviour change in -S and -s results, difference is:
> 
>                   rcu_sched[16]       2          4        0.013      0.001       0.003       0.006   33.23       0
>                migration/11[73]       2          1        0.006      0.006       0.006       0.006    0.00       0
>                 migration/3[33]       2          1        0.006      0.006       0.006       0.006    0.00       0
>  -               :216753[216753]      -1          1        0.041      0.041       0.041       0.041    0.00       0
>  +                 sleep[216753]      -1          1        0.041      0.041       0.041       0.041    0.00       0
>                 migration/8[58]       2          1        0.005      0.005       0.005       0.005    0.00       0
>             NetworkManager[811]       1          2        0.089      0.028       0.044       0.060   36.06       0
>                migration/13[83]       2          1        0.005      0.005       0.005       0.005    0.00       0
> 
> Here 216753 is pid for sleep which is a zombie process. This is
> happening in latest kernel due to an update in "-S" result.
> In -S, the process name appears in the results "sleep[216753]",
> where as in the -s, only pid is present in the summary result
> ":216753[216753]".
> 
> After commit 39f473f6d0b2 ("perf sched timehist: decode process names
> of processes in zombie state")
> for -S option, if process name is using pid, it uses different way to
> set it. So that we get the process name and not just Pid.
> 
> This change went in only for timehist_print_sample() function.
> Add this improvement in generic place so that even -s option (which
> captures summary) also will have meaningful information.
> 
> Signed-off-by: Athira Rajeev <atrajeev@linux.ibm.com>

Acked-by: Namhyung Kim <namhyung@kernel.org>

Thanks,
Namhyung

> ---
>  tools/perf/builtin-sched.c | 14 +++++++++-----
>  1 file changed, 9 insertions(+), 5 deletions(-)
> 
> diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
> index 555247568e7a..ee1b89a6af50 100644
> --- a/tools/perf/builtin-sched.c
> +++ b/tools/perf/builtin-sched.c
> @@ -2197,11 +2197,6 @@ static void timehist_print_sample(struct perf_sched *sched,
>  		printf(" ");
>  	}
>  
> -	if (!thread__comm_set(thread)) {
> -                const char *prev_comm = evsel__strval(evsel, sample, "prev_comm");
> -                thread__set_comm(thread, prev_comm, sample->time);
> -        }
> -
>  	printf(" %-*s ", comm_width, timehist_get_commstr(thread));
>  
>  	if (sched->show_prio)
> @@ -2890,6 +2885,15 @@ static int timehist_sched_change_event(const struct perf_tool *tool,
>  			itr->last_thread = NULL;
>  		}
>  
> +		/*
> +		 * If the process name is not set for the thread, use "prev_comm"
> +		 * to set it. Otherwise the sched summary will have just pid information
> +		 */
> +		if (!thread__comm_set(thread)) {
> +			const char *prev_comm = evsel__strval(evsel, sample, "prev_comm");
> +			thread__set_comm(thread, prev_comm, sample->time);
> +		}
> +
>  		if (!sched->summary_only)
>  			timehist_print_sample(sched, evsel, sample, &al, thread, t, state);
>  	}
> -- 
> 2.47.3
> 


^ permalink raw reply

* Re: [PATCH] powerpc tools perf: Initialize error code in auxtrace_record_init function
From: Namhyung Kim @ 2026-04-27  5:43 UTC (permalink / raw)
  To: Athira Rajeev
  Cc: acme, jolsa, adrian.hunter, mpetlan, tmricht, maddy, irogers,
	linux-perf-users, linuxppc-dev, hbathini, Tejas.Manhas1,
	Tanushree.Shah, Shivani.Nittor
In-Reply-To: <20260426064301.90614-1-atrajeev@linux.ibm.com>

Hello,

On Sun, Apr 26, 2026 at 12:13:01PM +0530, Athira Rajeev wrote:
> perf trace record fails some cases in powerpc
> 
>  # ./perf test "perf trace record and replay"
>  128: perf trace record and replay                                    : FAILED!
> 
>  # ./perf trace record sleep 1
>  # echo $?
>    32
> 
> This is happening because of non-zero err value from
> auxtrace_record__init() function.
> 
>  static int record__auxtrace_init(struct record *rec)
>  {
>         int err;
> 
>         if ((rec->opts.auxtrace_snapshot_opts || rec->opts.auxtrace_sample_opts)
>             && record__threads_enabled(rec)) {
>                 pr_err("AUX area tracing options are not available in parallel streaming mode.\n");
>                 return -EINVAL;
>         }
> 
>         if (!rec->itr) {
>                 rec->itr = auxtrace_record__init(rec->evlist, &err);
>                 if (err)
>                         return err;
>         }
> 
> Here "int err" is not initialised. The code expects "err" to be set
> from auxtrace_record__init() function.
> 
> Update auxtrace_record__init() in arch/powerpc/util/auxtrace.c to clear
> err value in the beginning.
> 
> - Clear err value in beginning of function. Any fail later will
> set appropriate return code to err.
> - Even if we haven't found any event for auxtrace, perf record
> should continue for other events. NULL return
> will indicate that there is no auxtrace record initialized.
> - Not having "err" set here will affect monitoring of other events
> also because perf record will fail seeing random value in err.
> 
> With the fix,
> 
>  # ./perf trace record sleep 1
>  [ perf record: Woken up 2 times to write data ]
>  [ perf record: Captured and wrote 0.033 MB perf.data (228 samples) ]

Sounds like you need a Fixes tag.

Thanks,
Namhyung

> 
> Signed-off-by: Athira Rajeev <atrajeev@linux.ibm.com>
> ---
>  tools/perf/arch/powerpc/util/auxtrace.c | 13 +++++++++++++
>  1 file changed, 13 insertions(+)
> 
> diff --git a/tools/perf/arch/powerpc/util/auxtrace.c b/tools/perf/arch/powerpc/util/auxtrace.c
> index e39deff6c857..fe1ea4e222f3 100644
> --- a/tools/perf/arch/powerpc/util/auxtrace.c
> +++ b/tools/perf/arch/powerpc/util/auxtrace.c
> @@ -71,6 +71,19 @@ struct auxtrace_record *auxtrace_record__init(struct evlist *evlist,
>  	struct evsel *pos;
>  	int found = 0;
>  
> +	/*
> +	 * Assign err value to zero here. Any fail later
> +	 * will set appropriate return code to err.
> +	 * Even if we haven't found any event for auxtrace, perf
> +	 * record should continue for other events. NULL return
> +	 * will indicate that there is no auxtrace record initialized.
> +	 *
> +	 * Not having "err" set here will affect monitoring
> +	 * of other events also because perf record will fail seeing
> +	 * random value in err.
> +	 */
> +	*err = 0;
> +
>  	evlist__for_each_entry(evlist, pos) {
>  		if (strstarts(pos->name, "vpa_dtl")) {
>  			found = 1;
> -- 
> 2.47.3
> 


^ permalink raw reply

* Re: [PATCH] powerpc tools perf: Initialize error code in auxtrace_record_init function
From: Adrian Hunter @ 2026-04-27  5:35 UTC (permalink / raw)
  To: Athira Rajeev, acme, jolsa, mpetlan, tmricht, maddy, irogers,
	namhyung
  Cc: linux-perf-users, linuxppc-dev, hbathini, Tejas.Manhas1,
	Tanushree.Shah, Shivani.Nittor
In-Reply-To: <20260426064301.90614-1-atrajeev@linux.ibm.com>

On 26/04/2026 09:43, Athira Rajeev wrote:
> perf trace record fails some cases in powerpc
> 
>  # ./perf test "perf trace record and replay"
>  128: perf trace record and replay                                    : FAILED!
> 
>  # ./perf trace record sleep 1
>  # echo $?
>    32
> 
> This is happening because of non-zero err value from
> auxtrace_record__init() function.
> 
>  static int record__auxtrace_init(struct record *rec)
>  {
>         int err;
> 
>         if ((rec->opts.auxtrace_snapshot_opts || rec->opts.auxtrace_sample_opts)
>             && record__threads_enabled(rec)) {
>                 pr_err("AUX area tracing options are not available in parallel streaming mode.\n");
>                 return -EINVAL;
>         }
> 
>         if (!rec->itr) {

Perhaps also add here:

                  err = -EINVAL;
		
>                 rec->itr = auxtrace_record__init(rec->evlist, &err);
>                 if (err)
>                         return err;
>         }
> 
> Here "int err" is not initialised. The code expects "err" to be set
> from auxtrace_record__init() function.
> 
> Update auxtrace_record__init() in arch/powerpc/util/auxtrace.c to clear
> err value in the beginning.
> 
> - Clear err value in beginning of function. Any fail later will
> set appropriate return code to err.
> - Even if we haven't found any event for auxtrace, perf record
> should continue for other events. NULL return
> will indicate that there is no auxtrace record initialized.
> - Not having "err" set here will affect monitoring of other events
> also because perf record will fail seeing random value in err.
> 
> With the fix,
> 
>  # ./perf trace record sleep 1
>  [ perf record: Woken up 2 times to write data ]
>  [ perf record: Captured and wrote 0.033 MB perf.data (228 samples) ]
> 
> Signed-off-by: Athira Rajeev <atrajeev@linux.ibm.com>
> ---
>  tools/perf/arch/powerpc/util/auxtrace.c | 13 +++++++++++++
>  1 file changed, 13 insertions(+)
> 
> diff --git a/tools/perf/arch/powerpc/util/auxtrace.c b/tools/perf/arch/powerpc/util/auxtrace.c
> index e39deff6c857..fe1ea4e222f3 100644
> --- a/tools/perf/arch/powerpc/util/auxtrace.c
> +++ b/tools/perf/arch/powerpc/util/auxtrace.c
> @@ -71,6 +71,19 @@ struct auxtrace_record *auxtrace_record__init(struct evlist *evlist,
>  	struct evsel *pos;
>  	int found = 0;
>  
> +	/*
> +	 * Assign err value to zero here. Any fail later
> +	 * will set appropriate return code to err.
> +	 * Even if we haven't found any event for auxtrace, perf
> +	 * record should continue for other events. NULL return
> +	 * will indicate that there is no auxtrace record initialized.
> +	 *
> +	 * Not having "err" set here will affect monitoring
> +	 * of other events also because perf record will fail seeing
> +	 * random value in err.
> +	 */

Comment seems like overkill.  Prefer to add kernel-doc to
auxtrace_record__init() in tools/perf/util/auxtrace.c

> +	*err = 0;
> +
>  	evlist__for_each_entry(evlist, pos) {
>  		if (strstarts(pos->name, "vpa_dtl")) {
>  			found = 1;



^ permalink raw reply

* [PATCH v2 4/4] powerpc/xive: Add warning if target CPU not found
From: Shrikanth Hegde @ 2026-04-27  4:47 UTC (permalink / raw)
  To: maddy, linuxppc-dev, yury.norov, linux, linux-kernel; +Cc: sshegde, chleroy
In-Reply-To: <20260427044715.559137-1-sshegde@linux.ibm.com>

Add a warn_once to warn if the CPU target is not found. This could help
to find about any such usecase.

This is a very rare case, which either means mask was empty or
atomic update failed for all online CPUs. So it is worth printing that
path for potential fix.

Signed-off-by: Shrikanth Hegde <sshegde@linux.ibm.com>
---
 arch/powerpc/sysdev/xive/common.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index c120be73d149..dadd1f46ec93 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -564,6 +564,7 @@ static int xive_find_target_in_mask(const struct cpumask *mask,
 			return cpu;
 	}
 
+	WARN_ONCE(1, "target CPU not found in mask: %*pbl\n", cpumask_pr_args(mask));
 	return -1;
 }
 
-- 
2.47.3



^ permalink raw reply related

* [PATCH v2 3/4] powerpc/perf: Use cpumask_intersects api for checking disable path
From: Shrikanth Hegde @ 2026-04-27  4:47 UTC (permalink / raw)
  To: maddy, linuxppc-dev, yury.norov, linux, linux-kernel; +Cc: sshegde, chleroy
In-Reply-To: <20260427044715.559137-1-sshegde@linux.ibm.com>

First online CPU in the node disables the nest counters by
making an OPAL call. Any other CPU in that node, will bail out.

Instead of using a temporary mask to find out if any cpu in the
node is visited or not, it is better to use the cpumask_intersects
api to achieve the same.

Similarly a temporary cpumask is used to check if a core is already part
of core_imc_cpumask. Use the same cpumask_intersects api there.

Signed-off-by: Shrikanth Hegde <sshegde@linux.ibm.com>
---
 arch/powerpc/perf/imc-pmu.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index c1563b4eaa94..e3822f36c419 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -421,7 +421,6 @@ static int ppc_nest_imc_cpu_offline(unsigned int cpu)
 static int ppc_nest_imc_cpu_online(unsigned int cpu)
 {
 	const struct cpumask *l_cpumask;
-	static struct cpumask tmp_mask;
 	int res;
 
 	/* Get the cpumask of this node */
@@ -431,7 +430,7 @@ static int ppc_nest_imc_cpu_online(unsigned int cpu)
 	 * If this is not the first online CPU on this node, then
 	 * just return.
 	 */
-	if (cpumask_and(&tmp_mask, l_cpumask, &nest_imc_cpumask))
+	if (cpumask_intersects(l_cpumask, &nest_imc_cpumask))
 		return 0;
 
 	/*
@@ -647,14 +646,13 @@ static bool is_core_imc_mem_inited(int cpu)
 static int ppc_core_imc_cpu_online(unsigned int cpu)
 {
 	const struct cpumask *l_cpumask;
-	static struct cpumask tmp_mask;
 	int ret = 0;
 
 	/* Get the cpumask for this core */
 	l_cpumask = cpu_sibling_mask(cpu);
 
 	/* If a cpu for this core is already set, then, don't do anything */
-	if (cpumask_and(&tmp_mask, l_cpumask, &core_imc_cpumask))
+	if (cpumask_intersects(l_cpumask, &core_imc_cpumask))
 		return 0;
 
 	if (!is_core_imc_mem_inited(cpu)) {
-- 
2.47.3



^ permalink raw reply related

* [PATCH v2 2/4] powerpc: Simplify cpumask api usage for cpuinfo display
From: Shrikanth Hegde @ 2026-04-27  4:47 UTC (permalink / raw)
  To: maddy, linuxppc-dev, yury.norov, linux, linux-kernel
  Cc: sshegde, chleroy, Yury Norov
In-Reply-To: <20260427044715.559137-1-sshegde@linux.ibm.com>

- cpumask_next can take -1 as valid argument. So simplify cpuinfo
  iterator.

- Use cpumask_last to find if this_cpu is last online CPU.

/proc/cpuinfo shows same info with patch.

Reviewed-by: Yury Norov <ynorov@nvidia.com>
Signed-off-by: Shrikanth Hegde <sshegde@linux.ibm.com>
---
 arch/powerpc/kernel/setup-common.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 8a86b0efcb1c..aecabe9cf139 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -323,7 +323,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 	seq_putc(m, '\n');
 
 	/* If this is the last cpu, print the summary */
-	if (cpumask_next(cpu_id, cpu_online_mask) >= nr_cpu_ids)
+	if (cpu_id == cpumask_last(cpu_online_mask))
 		show_cpuinfo_summary(m);
 
 	return 0;
@@ -331,10 +331,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 
 static void *c_start(struct seq_file *m, loff_t *pos)
 {
-	if (*pos == 0)	/* just in case, cpu 0 is not the first */
-		*pos = cpumask_first(cpu_online_mask);
-	else
-		*pos = cpumask_next(*pos - 1, cpu_online_mask);
+	*pos = cpumask_next(*pos - 1, cpu_online_mask);
 	if ((*pos) < nr_cpu_ids)
 		return (void *)(unsigned long)(*pos + 1);
 	return NULL;
-- 
2.47.3



^ permalink raw reply related

* [PATCH v2 1/4] powerpc: Use cpumask_next_wrap instead
From: Shrikanth Hegde @ 2026-04-27  4:47 UTC (permalink / raw)
  To: maddy, linuxppc-dev, yury.norov, linux, linux-kernel
  Cc: sshegde, chleroy, Yury Norov
In-Reply-To: <20260427044715.559137-1-sshegde@linux.ibm.com>

cpu = cpumask_next(cpu, mask)
if (cpu >= nr_cpu_ids)
    cpu = cpumask_first(mask)

Above block is identical to:
cpu = cpumask_next_wrap(cpu, mask)

Replace it, No change in functionality or performance.
Slightly simpler code.

Reviewed-by: Yury Norov <ynorov@nvidia.com>
Signed-off-by: Shrikanth Hegde <sshegde@linux.ibm.com>
---
 arch/powerpc/kernel/irq.c             | 5 +----
 arch/powerpc/mm/book3s64/hash_utils.c | 4 +---
 2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index a0e8b998c9b5..f69de08ad347 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -370,10 +370,7 @@ int irq_choose_cpu(const struct cpumask *mask)
 do_round_robin:
 		raw_spin_lock_irqsave(&irq_rover_lock, flags);
 
-		irq_rover = cpumask_next(irq_rover, cpu_online_mask);
-		if (irq_rover >= nr_cpu_ids)
-			irq_rover = cpumask_first(cpu_online_mask);
-
+		irq_rover = cpumask_next_wrap(irq_rover, cpu_online_mask);
 		cpuid = irq_rover;
 
 		raw_spin_unlock_irqrestore(&irq_rover_lock, flags);
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index 9dc5889d6ecb..e4fcf929cb33 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -1299,9 +1299,7 @@ static void stress_hpt_timer_fn(struct timer_list *timer)
 	if (!firmware_has_feature(FW_FEATURE_LPAR))
 		tlbiel_all();
 
-	next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask);
-	if (next_cpu >= nr_cpu_ids)
-		next_cpu = cpumask_first(cpu_online_mask);
+	next_cpu = cpumask_next_wrap(raw_smp_processor_id(), cpu_online_mask);
 	stress_hpt_timer.expires = jiffies + msecs_to_jiffies(10);
 	add_timer_on(&stress_hpt_timer, next_cpu);
 }
-- 
2.47.3



^ permalink raw reply related

* [PATCH v2 0/4] powerpc: A few misc cpumask changes
From: Shrikanth Hegde @ 2026-04-27  4:47 UTC (permalink / raw)
  To: maddy, linuxppc-dev, yury.norov, linux, linux-kernel; +Cc: sshegde, chleroy

These are some miscellaneous code refactors around using cpumask
APIs. They are mainly aimed at code simplification.

Changes since v1:
- Dropped the changes done in xive_find_target_in_mask. Those changes are
  done by Yury Norov already.
- Collected the tags. Thanks to Yury Norov. 
- Added new patch to use cpumask_intersects instead of using
  temp_mask to achieve the same goal.
- Added new patch to print the warning when target CPU not found
  as discussed in [2]. This patch is kept separate since this adds
  a new warning in case of failure. So one may perceive it as functional
  change.

v1[1]: https://lore.kernel.org/all/20260327063129.127302-1-sshegde@linux.ibm.com/
[2]:   https://lore.kernel.org/all/20260319033647.881246-1-ynorov@nvidia.com/

Based on tip/master at: (dffc5753ba4c "Merge branch into tip/master: 'timers/clocksource'")

Shrikanth Hegde (4):
  powerpc: Use cpumask_next_wrap instead
  powerpc: Simplify cpumask api usage for cpuinfo display
  powerpc/perf: Use cpumask_intersects api for checking disable path
  powerpc/xive: Add warning if target CPU not found

 arch/powerpc/kernel/irq.c             | 5 +----
 arch/powerpc/kernel/setup-common.c    | 7 ++-----
 arch/powerpc/mm/book3s64/hash_utils.c | 4 +---
 arch/powerpc/perf/imc-pmu.c           | 6 ++----
 arch/powerpc/sysdev/xive/common.c     | 1 +
 5 files changed, 7 insertions(+), 16 deletions(-)

-- 
2.47.3



^ permalink raw reply

* [PATCH] powerpc/eeh: Fix recursive locking on devices without EEH sensitive driver
From: Shivaprasad G Bhat @ 2026-04-27  3:00 UTC (permalink / raw)
  To: maddy, linuxppc-dev; +Cc: mpe, npiggin, chleroy, sbhat, linux-kernel

The commit 1010b4c012b0 ("powerpc/eeh: Make EEH driver device hotplug
safe") refactored the EEH code such that the pci_rescan_remove_lock is
held at the beginning of eeh_handle_normal_event() and the
eeh_reset_device() is called with that lock being held. Looks like the
commit missed to remove the existing lock/unlock inside eeh_rmv_device()
which is no longer necessary. This is causing the eehd to hang on the
lock which it actually holds when that code path is taken.

[<0>] 0xc00000011c78f870
[<0>] __switch_to+0xfc/0x1a0
[<0>] pci_lock_rescan_remove+0x30/0x44
[<0>] eeh_rmv_device+0x290/0x2e0
[<0>] eeh_pe_dev_traverse+0x80/0x130
[<0>] eeh_reset_device+0xcc/0x23c
[<0>] eeh_handle_normal_event+0x830/0xa80
[<0>] eeh_event_handler+0xf8/0x190
[<0>] kthread+0x194/0x1b0
[<0>] start_kernel_thread+0x14/0x18

The issue is seen for cases where the errors are detected on the PHB
directly AND|OR for devices where the driver error_detected() returns
PCI_ERS_RESULT_NEED_RESET, and driver being not EEH sensitive(i.e no
error handlers like slot_reset(), resume() etc defined).

Fixes: 1010b4c012b0 ("powerpc/eeh: Make EEH driver device hotplug safe")
Signed-off-by: Shivaprasad G Bhat <sbhat@linux.ibm.com>
---
 arch/powerpc/kernel/eeh_driver.c |    2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 028f69158532..d64cce17a4e0 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -533,9 +533,7 @@ static void eeh_rmv_device(struct eeh_dev *edev, void *userdata)
 		if (rmv_data)
 			list_add(&edev->rmv_entry, &rmv_data->removed_vf_list);
 	} else {
-		pci_lock_rescan_remove();
 		pci_stop_and_remove_bus_device(dev);
-		pci_unlock_rescan_remove();
 	}
 }
 




^ permalink raw reply related

* [PATCH] powerpc/rtas_pci: No hotplug on permanently removed device on pSeries
From: Shivaprasad G Bhat @ 2026-04-27  2:55 UTC (permalink / raw)
  To: maddy, linuxppc-dev; +Cc: mpe, npiggin, chleroy, sbhat, linux-kernel

The eeh_driver disables and offlines the PE permanently when it
exceeds the freeze count beyond eeh_max_freeze within the last hour.
The PE is only offline, so the device tree entries, eeh device
references are all intact till the real unplug of the device from
the guest/host takes place.

On pSeries, with a new hotplug of any PCI device, the drmgr initiates
a system-wide PCI rescan, which finds devices offlined by the eeh_driver
and there will be attempts to bring them online. This leads to
recurring EEHs either at the config read time itself or a bit
later depending on the type of the problem.

For PowerNV, the commit d2b0f6f77ee5 ("powerpc/eeh: No hotplug on
permanently removed dev") introduced the EEH_DEV_REMOVED flag to
prevent such inadvertent rescans on hierarchical toplogies relavent in
Baremetal setups. For pSeries, such topologies don't really make sense
as the devices are either part of the same PE OR exposed as independent
devices on multiple virtual PHBs. However, the inadvertent rescans are
still a possibility with either hotplug of a new device or otherwise
with manual system-wide pci bus rescan attempts.

So the patch checks for EEH_DEV_REMOVED before allowing config space
access just like PowerNV, making the PCI core omit the PE, and thus
preventing subsequent EEH recurances. The patch is tested on PowerVM
and KVM machines with single and multi-function devices, and on the
devices behind a switch. The unplug of the affected devices post EEH
removal is also working fine as expected.

Signed-off-by: Shivaprasad G Bhat <sbhat@linux.ibm.com>
References: d2b0f6f77ee5 ("powerpc/eeh: No hotplug on permanently removed dev")
---
 arch/powerpc/kernel/rtas_pci.c |    6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c
index fccf96e897f6..ce24b18712ca 100644
--- a/arch/powerpc/kernel/rtas_pci.c
+++ b/arch/powerpc/kernel/rtas_pci.c
@@ -57,6 +57,9 @@ int rtas_pci_dn_read_config(struct pci_dn *pdn, int where, int size, u32 *val)
 	if (pdn->edev && pdn->edev->pe &&
 	    (pdn->edev->pe->state & EEH_PE_CFG_BLOCKED))
 		return PCIBIOS_SET_FAILED;
+
+	if (pdn->edev && pdn->edev->mode & EEH_DEV_REMOVED)
+		return PCIBIOS_SET_FAILED;
 #endif
 
 	addr = rtas_config_addr(pdn->busno, pdn->devfn, where);
@@ -108,6 +111,9 @@ int rtas_pci_dn_write_config(struct pci_dn *pdn, int where, int size, u32 val)
 	if (pdn->edev && pdn->edev->pe &&
 	    (pdn->edev->pe->state & EEH_PE_CFG_BLOCKED))
 		return PCIBIOS_SET_FAILED;
+
+	if (pdn->edev && pdn->edev->mode & EEH_DEV_REMOVED)
+		return PCIBIOS_SET_FAILED;
 #endif
 
 	addr = rtas_config_addr(pdn->busno, pdn->devfn, where);




^ permalink raw reply related

* Re: [PATCH v7 0/6] mm: Fix vmemmap optimization accounting and initialization
From: Muchun Song @ 2026-04-27  1:50 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Muchun Song, David Hildenbrand, Oscar Salvador, Michael Ellerman,
	Madhavan Srinivasan, Lorenzo Stoakes, Liam R Howlett,
	Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan, Michal Hocko,
	Nicholas Piggin, Christophe Leroy, aneesh.kumar, joao.m.martins,
	linux-mm, linuxppc-dev, linux-kernel
In-Reply-To: <20260426125422.f9fa79ec47db2e552d75e29b@linux-foundation.org>

[-- Attachment #1: Type: text/plain, Size: 1082 bytes --]



> On Apr 27, 2026, at 03:54, Andrew Morton <akpm@linux-foundation.org> wrote:
> 
> On Sun, 26 Apr 2026 17:26:34 +0800 Muchun Song <songmuchun@bytedance.com> wrote:
> 
>> The series fixes several bugs in vmemmap optimization, mainly around
>> incorrect page accounting and memmap initialization in DAX and memory
>> hotplug paths. It also fixes pageblock migratetype initialization and
>> struct page initialization for ZONE_DEVICE compound pages.
> 
> Thanks.  I'm assuming that none of this is urgent, so there's no need
> to fast-track any of these fixes into 7.1-rcX?

Not urgent.

> 
> So the cc:stable patches will be offered to -stable maintainers after
> 7.2-rc1 is released?

No problem.

> 
> Sashiko might have found another bug in there btw
>    https://sashiko.dev/#/patchset/20260426092640.375967-1-songmuchun@bytedance.com

I see. it’s a real issue, but not introduced by this series.
I’ve sent a separate series to fix this issue.

See https://lore.kernel.org/linux-mm/20260426144447.817722-1-songmuchun@bytedance.com/

Thanks.

[-- Attachment #2: Type: text/html, Size: 2107 bytes --]

^ permalink raw reply

* Re: [PATCH net-deletions v2] net: remove unused ATM protocols and legacy ATM device drivers
From: patchwork-bot+netdevbpf @ 2026-04-27  0:36 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: davem, netdev, edumazet, pabeni, andrew+netdev, horms, corbet,
	skhan, linux, tsbogend, maddy, mpe, npiggin, chleroy, 3chas3,
	razor, idosch, jani.nikula, mchehab+huawei, tytso, herbert, geert,
	ebiggers, johannes.berg, jonathan.cameron, kees, kuniyu,
	fourier.thomas, andriy.shevchenko, rdunlap, akpm, linux-doc,
	linux-mips, linuxppc-dev, bridge, dwmw2
In-Reply-To: <20260422041846.2035118-1-kuba@kernel.org>

Hello:

This patch was applied to netdev/net.git (main)
by Jakub Kicinski <kuba@kernel.org>:

On Tue, 21 Apr 2026 21:18:44 -0700 you wrote:
> Remove the ATM protocol modules and PCI/SBUS ATM device drivers
> that are no longer in active use.
> 
> The ATM core protocol stack, PPPoATM, BR2684, and USB DSL modem
> drivers (drivers/usb/atm/) are retained in-tree to maintain PPP
> over ATM (PPPoA) and PPPoE-over-BR2684 support for DSL connections.
> 
> [...]

Here is the summary with links:
  - [net-deletions,v2] net: remove unused ATM protocols and legacy ATM device drivers
    https://git.kernel.org/netdev/net/c/6deb53595092

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html




^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox