From: Paul Mackerras <paulus@samba.org>
To: Benjamin Herrenschmidt <benh@kernel.crashing.org>,
Ingo Molnar <mingo@elte.hu>,
Peter Zijlstra <peterz@infradead.org>
Cc: linuxppc-dev@ozlabs.org, linux-kernel@vger.kernel.org,
Anton Blanchard <anton@samba.org>
Subject: [PATCH] powerpc/perf_event: Fix oops due to perf_event_do_pending call
Date: Wed, 14 Apr 2010 16:46:04 +1000 [thread overview]
Message-ID: <20100414064603.GA4714@drongo> (raw)
Anton Blanchard found that large POWER systems would occasionally
crash in the exception exit path when profiling with perf_events.
The symptom was that an interrupt would occur late in the exit path
when the MSR[RI] (recoverable interrupt) bit was clear. Interrupts
should be hard-disabled at this point but they were enabled. Because
the interrupt was not recoverable the system panicked.
The reason is that the exception exit path was calling
perf_event_do_pending after hard-disabling interrupts, and
perf_event_do_pending will re-enable interrupts.
The simplest and cleanest fix for this is to use the same mechanism
that 32-bit powerpc does, namely to cause a self-IPI by setting the
decrementer to 1. This means we can remove the tests in the exception
exit path and raw_local_irq_restore.
This also makes sure that the call to perf_event_do_pending from
timer_interrupt() happens within irq_enter/irq_exit. (Note that
calling perf_event_do_pending from timer_interrupt does not mean that
there is a possible 1/HZ latency; setting the decrementer to 1 ensures
that the timer interrupt will happen immediately, i.e. within one
timebase tick, which is a few nanoseconds or 10s of nanoseconds.)
Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: stable@kernel.org
---
Ben, please put this in your tree of fixes to go to Linus for 2.6.34,
since it fixes a potential panic.
arch/powerpc/include/asm/hw_irq.h | 38 ------------------------
arch/powerpc/kernel/asm-offsets.c | 1
arch/powerpc/kernel/entry_64.S | 9 -----
arch/powerpc/kernel/irq.c | 6 ---
arch/powerpc/kernel/time.c | 60 ++++++++++++++++++++++++++++++--------
5 files changed, 48 insertions(+), 66 deletions(-)
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index 9f4c9d4..bd100fc 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -130,43 +130,5 @@ static inline int irqs_disabled_flags(unsigned long flags)
*/
struct irq_chip;
-#ifdef CONFIG_PERF_EVENTS
-
-#ifdef CONFIG_PPC64
-static inline unsigned long test_perf_event_pending(void)
-{
- unsigned long x;
-
- asm volatile("lbz %0,%1(13)"
- : "=r" (x)
- : "i" (offsetof(struct paca_struct, perf_event_pending)));
- return x;
-}
-
-static inline void set_perf_event_pending(void)
-{
- asm volatile("stb %0,%1(13)" : :
- "r" (1),
- "i" (offsetof(struct paca_struct, perf_event_pending)));
-}
-
-static inline void clear_perf_event_pending(void)
-{
- asm volatile("stb %0,%1(13)" : :
- "r" (0),
- "i" (offsetof(struct paca_struct, perf_event_pending)));
-}
-#endif /* CONFIG_PPC64 */
-
-#else /* CONFIG_PERF_EVENTS */
-
-static inline unsigned long test_perf_event_pending(void)
-{
- return 0;
-}
-
-static inline void clear_perf_event_pending(void) {}
-#endif /* CONFIG_PERF_EVENTS */
-
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_HW_IRQ_H */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 957ceb7..c09138d 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -133,7 +133,6 @@ int main(void)
DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr));
DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled));
DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled));
- DEFINE(PACAPERFPEND, offsetof(struct paca_struct, perf_event_pending));
DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
#ifdef CONFIG_PPC_MM_SLICES
DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct,
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 07109d8..42e9d90 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -556,15 +556,6 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES)
2:
TRACE_AND_RESTORE_IRQ(r5);
-#ifdef CONFIG_PERF_EVENTS
- /* check paca->perf_event_pending if we're enabling ints */
- lbz r3,PACAPERFPEND(r13)
- and. r3,r3,r5
- beq 27f
- bl .perf_event_do_pending
-27:
-#endif /* CONFIG_PERF_EVENTS */
-
/* extract EE bit and use it to restore paca->hard_enabled */
ld r3,_MSR(r1)
rldicl r4,r3,49,63 /* r0 = (r3 >> 15) & 1 */
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 64f6f20..066bd31 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -53,7 +53,6 @@
#include <linux/bootmem.h>
#include <linux/pci.h>
#include <linux/debugfs.h>
-#include <linux/perf_event.h>
#include <asm/uaccess.h>
#include <asm/system.h>
@@ -145,11 +144,6 @@ notrace void raw_local_irq_restore(unsigned long en)
}
#endif /* CONFIG_PPC_STD_MMU_64 */
- if (test_perf_event_pending()) {
- clear_perf_event_pending();
- perf_event_do_pending();
- }
-
/*
* if (get_paca()->hard_enabled) return;
* But again we need to take care that gcc gets hard_enabled directly
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 1b16b9a..0441bbd 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -532,25 +532,60 @@ void __init iSeries_time_init_early(void)
}
#endif /* CONFIG_PPC_ISERIES */
-#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_PPC32)
-DEFINE_PER_CPU(u8, perf_event_pending);
+#ifdef CONFIG_PERF_EVENTS
-void set_perf_event_pending(void)
+/*
+ * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable...
+ */
+#ifdef CONFIG_PPC64
+static inline unsigned long test_perf_event_pending(void)
{
- get_cpu_var(perf_event_pending) = 1;
- set_dec(1);
- put_cpu_var(perf_event_pending);
+ unsigned long x;
+
+ asm volatile("lbz %0,%1(13)"
+ : "=r" (x)
+ : "i" (offsetof(struct paca_struct, perf_event_pending)));
+ return x;
}
+static inline void set_perf_event_pending_flag(void)
+{
+ asm volatile("stb %0,%1(13)" : :
+ "r" (1),
+ "i" (offsetof(struct paca_struct, perf_event_pending)));
+}
+
+static inline void clear_perf_event_pending(void)
+{
+ asm volatile("stb %0,%1(13)" : :
+ "r" (0),
+ "i" (offsetof(struct paca_struct, perf_event_pending)));
+}
+
+#else /* 32-bit */
+
+DEFINE_PER_CPU(u8, perf_event_pending);
+
+#define set_perf_event_pending_flag() __get_cpu_var(perf_event_pending) = 1
#define test_perf_event_pending() __get_cpu_var(perf_event_pending)
#define clear_perf_event_pending() __get_cpu_var(perf_event_pending) = 0
-#else /* CONFIG_PERF_EVENTS && CONFIG_PPC32 */
+#endif /* 32 vs 64 bit */
+
+void set_perf_event_pending(void)
+{
+ preempt_disable();
+ set_perf_event_pending_flag();
+ set_dec(1);
+ preempt_enable();
+}
+
+#else /* CONFIG_PERF_EVENTS */
#define test_perf_event_pending() 0
#define clear_perf_event_pending()
-#endif /* CONFIG_PERF_EVENTS && CONFIG_PPC32 */
+#endif /* CONFIG_PERF_EVENTS */
/*
* For iSeries shared processors, we have to let the hypervisor
@@ -582,10 +617,6 @@ void timer_interrupt(struct pt_regs * regs)
set_dec(DECREMENTER_MAX);
#ifdef CONFIG_PPC32
- if (test_perf_event_pending()) {
- clear_perf_event_pending();
- perf_event_do_pending();
- }
if (atomic_read(&ppc_n_lost_interrupts) != 0)
do_IRQ(regs);
#endif
@@ -604,6 +635,11 @@ void timer_interrupt(struct pt_regs * regs)
calculate_steal_time();
+ if (test_perf_event_pending()) {
+ clear_perf_event_pending();
+ perf_event_do_pending();
+ }
+
#ifdef CONFIG_PPC_ISERIES
if (firmware_has_feature(FW_FEATURE_ISERIES))
get_lppaca()->int_dword.fields.decr_int = 0;
WARNING: multiple messages have this Message-ID (diff)
From: Paul Mackerras <paulus@samba.org>
To: Benjamin Herrenschmidt <benh@kernel.crashing.org>,
Ingo Molnar <mingo@elte.hu>,
Peter Zijlstra <peterz@infradead.org>
Cc: linux-kernel@vger.kernel.org, linuxppc-dev@ozlabs.org,
Anton Blanchard <anton@samba.org>
Subject: [PATCH] powerpc/perf_event: Fix oops due to perf_event_do_pending call
Date: Wed, 14 Apr 2010 16:46:04 +1000 [thread overview]
Message-ID: <20100414064603.GA4714@drongo> (raw)
Anton Blanchard found that large POWER systems would occasionally
crash in the exception exit path when profiling with perf_events.
The symptom was that an interrupt would occur late in the exit path
when the MSR[RI] (recoverable interrupt) bit was clear. Interrupts
should be hard-disabled at this point but they were enabled. Because
the interrupt was not recoverable the system panicked.
The reason is that the exception exit path was calling
perf_event_do_pending after hard-disabling interrupts, and
perf_event_do_pending will re-enable interrupts.
The simplest and cleanest fix for this is to use the same mechanism
that 32-bit powerpc does, namely to cause a self-IPI by setting the
decrementer to 1. This means we can remove the tests in the exception
exit path and raw_local_irq_restore.
This also makes sure that the call to perf_event_do_pending from
timer_interrupt() happens within irq_enter/irq_exit. (Note that
calling perf_event_do_pending from timer_interrupt does not mean that
there is a possible 1/HZ latency; setting the decrementer to 1 ensures
that the timer interrupt will happen immediately, i.e. within one
timebase tick, which is a few nanoseconds or 10s of nanoseconds.)
Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: stable@kernel.org
---
Ben, please put this in your tree of fixes to go to Linus for 2.6.34,
since it fixes a potential panic.
arch/powerpc/include/asm/hw_irq.h | 38 ------------------------
arch/powerpc/kernel/asm-offsets.c | 1
arch/powerpc/kernel/entry_64.S | 9 -----
arch/powerpc/kernel/irq.c | 6 ---
arch/powerpc/kernel/time.c | 60 ++++++++++++++++++++++++++++++--------
5 files changed, 48 insertions(+), 66 deletions(-)
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index 9f4c9d4..bd100fc 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -130,43 +130,5 @@ static inline int irqs_disabled_flags(unsigned long flags)
*/
struct irq_chip;
-#ifdef CONFIG_PERF_EVENTS
-
-#ifdef CONFIG_PPC64
-static inline unsigned long test_perf_event_pending(void)
-{
- unsigned long x;
-
- asm volatile("lbz %0,%1(13)"
- : "=r" (x)
- : "i" (offsetof(struct paca_struct, perf_event_pending)));
- return x;
-}
-
-static inline void set_perf_event_pending(void)
-{
- asm volatile("stb %0,%1(13)" : :
- "r" (1),
- "i" (offsetof(struct paca_struct, perf_event_pending)));
-}
-
-static inline void clear_perf_event_pending(void)
-{
- asm volatile("stb %0,%1(13)" : :
- "r" (0),
- "i" (offsetof(struct paca_struct, perf_event_pending)));
-}
-#endif /* CONFIG_PPC64 */
-
-#else /* CONFIG_PERF_EVENTS */
-
-static inline unsigned long test_perf_event_pending(void)
-{
- return 0;
-}
-
-static inline void clear_perf_event_pending(void) {}
-#endif /* CONFIG_PERF_EVENTS */
-
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_HW_IRQ_H */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 957ceb7..c09138d 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -133,7 +133,6 @@ int main(void)
DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr));
DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled));
DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled));
- DEFINE(PACAPERFPEND, offsetof(struct paca_struct, perf_event_pending));
DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
#ifdef CONFIG_PPC_MM_SLICES
DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct,
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 07109d8..42e9d90 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -556,15 +556,6 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES)
2:
TRACE_AND_RESTORE_IRQ(r5);
-#ifdef CONFIG_PERF_EVENTS
- /* check paca->perf_event_pending if we're enabling ints */
- lbz r3,PACAPERFPEND(r13)
- and. r3,r3,r5
- beq 27f
- bl .perf_event_do_pending
-27:
-#endif /* CONFIG_PERF_EVENTS */
-
/* extract EE bit and use it to restore paca->hard_enabled */
ld r3,_MSR(r1)
rldicl r4,r3,49,63 /* r0 = (r3 >> 15) & 1 */
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 64f6f20..066bd31 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -53,7 +53,6 @@
#include <linux/bootmem.h>
#include <linux/pci.h>
#include <linux/debugfs.h>
-#include <linux/perf_event.h>
#include <asm/uaccess.h>
#include <asm/system.h>
@@ -145,11 +144,6 @@ notrace void raw_local_irq_restore(unsigned long en)
}
#endif /* CONFIG_PPC_STD_MMU_64 */
- if (test_perf_event_pending()) {
- clear_perf_event_pending();
- perf_event_do_pending();
- }
-
/*
* if (get_paca()->hard_enabled) return;
* But again we need to take care that gcc gets hard_enabled directly
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 1b16b9a..0441bbd 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -532,25 +532,60 @@ void __init iSeries_time_init_early(void)
}
#endif /* CONFIG_PPC_ISERIES */
-#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_PPC32)
-DEFINE_PER_CPU(u8, perf_event_pending);
+#ifdef CONFIG_PERF_EVENTS
-void set_perf_event_pending(void)
+/*
+ * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable...
+ */
+#ifdef CONFIG_PPC64
+static inline unsigned long test_perf_event_pending(void)
{
- get_cpu_var(perf_event_pending) = 1;
- set_dec(1);
- put_cpu_var(perf_event_pending);
+ unsigned long x;
+
+ asm volatile("lbz %0,%1(13)"
+ : "=r" (x)
+ : "i" (offsetof(struct paca_struct, perf_event_pending)));
+ return x;
}
+static inline void set_perf_event_pending_flag(void)
+{
+ asm volatile("stb %0,%1(13)" : :
+ "r" (1),
+ "i" (offsetof(struct paca_struct, perf_event_pending)));
+}
+
+static inline void clear_perf_event_pending(void)
+{
+ asm volatile("stb %0,%1(13)" : :
+ "r" (0),
+ "i" (offsetof(struct paca_struct, perf_event_pending)));
+}
+
+#else /* 32-bit */
+
+DEFINE_PER_CPU(u8, perf_event_pending);
+
+#define set_perf_event_pending_flag() __get_cpu_var(perf_event_pending) = 1
#define test_perf_event_pending() __get_cpu_var(perf_event_pending)
#define clear_perf_event_pending() __get_cpu_var(perf_event_pending) = 0
-#else /* CONFIG_PERF_EVENTS && CONFIG_PPC32 */
+#endif /* 32 vs 64 bit */
+
+void set_perf_event_pending(void)
+{
+ preempt_disable();
+ set_perf_event_pending_flag();
+ set_dec(1);
+ preempt_enable();
+}
+
+#else /* CONFIG_PERF_EVENTS */
#define test_perf_event_pending() 0
#define clear_perf_event_pending()
-#endif /* CONFIG_PERF_EVENTS && CONFIG_PPC32 */
+#endif /* CONFIG_PERF_EVENTS */
/*
* For iSeries shared processors, we have to let the hypervisor
@@ -582,10 +617,6 @@ void timer_interrupt(struct pt_regs * regs)
set_dec(DECREMENTER_MAX);
#ifdef CONFIG_PPC32
- if (test_perf_event_pending()) {
- clear_perf_event_pending();
- perf_event_do_pending();
- }
if (atomic_read(&ppc_n_lost_interrupts) != 0)
do_IRQ(regs);
#endif
@@ -604,6 +635,11 @@ void timer_interrupt(struct pt_regs * regs)
calculate_steal_time();
+ if (test_perf_event_pending()) {
+ clear_perf_event_pending();
+ perf_event_do_pending();
+ }
+
#ifdef CONFIG_PPC_ISERIES
if (firmware_has_feature(FW_FEATURE_ISERIES))
get_lppaca()->int_dword.fields.decr_int = 0;
next reply other threads:[~2010-04-14 6:46 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-04-14 6:46 Paul Mackerras [this message]
2010-04-14 6:46 ` [PATCH] powerpc/perf_event: Fix oops due to perf_event_do_pending call Paul Mackerras
2010-04-14 6:53 ` Benjamin Herrenschmidt
2010-04-14 6:53 ` Benjamin Herrenschmidt
2010-04-14 12:11 ` Michael Ellerman
2010-04-14 12:11 ` Michael Ellerman
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20100414064603.GA4714@drongo \
--to=paulus@samba.org \
--cc=anton@samba.org \
--cc=benh@kernel.crashing.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linuxppc-dev@ozlabs.org \
--cc=mingo@elte.hu \
--cc=peterz@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.