From: Greg KH <gregkh@suse.de>
To: linux-kernel@vger.kernel.org, stable@kernel.org
Cc: stable-review@kernel.org, torvalds@linux-foundation.org,
akpm@linux-foundation.org, alan@lxorguk.ukuu.org.uk,
Paul Mackerras <paulus@samba.org>,
Benjamin Herrenschmidt <benh@kernel.crashing.org>
Subject: [11/34] powerpc/perf_event: Fix oops due to perf_event_do_pending call
Date: Mon, 24 May 2010 15:59:43 -0700 [thread overview]
Message-ID: <20100524230350.326578430@clark.site> (raw)
In-Reply-To: <20100524230418.GA12770@kroah.com>
2.6.34-stable review patch. If anyone has any objections, please let us know.
------------------
From: Paul Mackerras <paulus@samba.org>
commit 0fe1ac48bef018bed896307cd12f6ca9b5e704ab upstream.
Anton Blanchard found that large POWER systems would occasionally
crash in the exception exit path when profiling with perf_events.
The symptom was that an interrupt would occur late in the exit path
when the MSR[RI] (recoverable interrupt) bit was clear. Interrupts
should be hard-disabled at this point but they were enabled. Because
the interrupt was not recoverable the system panicked.
The reason is that the exception exit path was calling
perf_event_do_pending after hard-disabling interrupts, and
perf_event_do_pending will re-enable interrupts.
The simplest and cleanest fix for this is to use the same mechanism
that 32-bit powerpc does, namely to cause a self-IPI by setting the
decrementer to 1. This means we can remove the tests in the exception
exit path and raw_local_irq_restore.
This also makes sure that the call to perf_event_do_pending from
timer_interrupt() happens within irq_enter/irq_exit. (Note that
calling perf_event_do_pending from timer_interrupt does not mean that
there is a possible 1/HZ latency; setting the decrementer to 1 ensures
that the timer interrupt will happen immediately, i.e. within one
timebase tick, which is a few nanoseconds or 10s of nanoseconds.)
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
arch/powerpc/include/asm/hw_irq.h | 38 ------------------------
arch/powerpc/kernel/asm-offsets.c | 1
arch/powerpc/kernel/entry_64.S | 9 -----
arch/powerpc/kernel/irq.c | 6 ---
arch/powerpc/kernel/time.c | 60 ++++++++++++++++++++++++++++++--------
5 files changed, 48 insertions(+), 66 deletions(-)
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -135,43 +135,5 @@ static inline int irqs_disabled_flags(un
*/
struct irq_chip;
-#ifdef CONFIG_PERF_EVENTS
-
-#ifdef CONFIG_PPC64
-static inline unsigned long test_perf_event_pending(void)
-{
- unsigned long x;
-
- asm volatile("lbz %0,%1(13)"
- : "=r" (x)
- : "i" (offsetof(struct paca_struct, perf_event_pending)));
- return x;
-}
-
-static inline void set_perf_event_pending(void)
-{
- asm volatile("stb %0,%1(13)" : :
- "r" (1),
- "i" (offsetof(struct paca_struct, perf_event_pending)));
-}
-
-static inline void clear_perf_event_pending(void)
-{
- asm volatile("stb %0,%1(13)" : :
- "r" (0),
- "i" (offsetof(struct paca_struct, perf_event_pending)));
-}
-#endif /* CONFIG_PPC64 */
-
-#else /* CONFIG_PERF_EVENTS */
-
-static inline unsigned long test_perf_event_pending(void)
-{
- return 0;
-}
-
-static inline void clear_perf_event_pending(void) {}
-#endif /* CONFIG_PERF_EVENTS */
-
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_HW_IRQ_H */
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -133,7 +133,6 @@ int main(void)
DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr));
DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled));
DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled));
- DEFINE(PACAPERFPEND, offsetof(struct paca_struct, perf_event_pending));
DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
#ifdef CONFIG_PPC_MM_SLICES
DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct,
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -556,15 +556,6 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_
2:
TRACE_AND_RESTORE_IRQ(r5);
-#ifdef CONFIG_PERF_EVENTS
- /* check paca->perf_event_pending if we're enabling ints */
- lbz r3,PACAPERFPEND(r13)
- and. r3,r3,r5
- beq 27f
- bl .perf_event_do_pending
-27:
-#endif /* CONFIG_PERF_EVENTS */
-
/* extract EE bit and use it to restore paca->hard_enabled */
ld r3,_MSR(r1)
rldicl r4,r3,49,63 /* r0 = (r3 >> 15) & 1 */
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -53,7 +53,6 @@
#include <linux/bootmem.h>
#include <linux/pci.h>
#include <linux/debugfs.h>
-#include <linux/perf_event.h>
#include <asm/uaccess.h>
#include <asm/system.h>
@@ -138,11 +137,6 @@ notrace void raw_local_irq_restore(unsig
}
#endif /* CONFIG_PPC_STD_MMU_64 */
- if (test_perf_event_pending()) {
- clear_perf_event_pending();
- perf_event_do_pending();
- }
-
/*
* if (get_paca()->hard_enabled) return;
* But again we need to take care that gcc gets hard_enabled directly
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -530,25 +530,60 @@ void __init iSeries_time_init_early(void
}
#endif /* CONFIG_PPC_ISERIES */
-#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_PPC32)
-DEFINE_PER_CPU(u8, perf_event_pending);
+#ifdef CONFIG_PERF_EVENTS
-void set_perf_event_pending(void)
+/*
+ * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable...
+ */
+#ifdef CONFIG_PPC64
+static inline unsigned long test_perf_event_pending(void)
{
- get_cpu_var(perf_event_pending) = 1;
- set_dec(1);
- put_cpu_var(perf_event_pending);
+ unsigned long x;
+
+ asm volatile("lbz %0,%1(13)"
+ : "=r" (x)
+ : "i" (offsetof(struct paca_struct, perf_event_pending)));
+ return x;
}
+static inline void set_perf_event_pending_flag(void)
+{
+ asm volatile("stb %0,%1(13)" : :
+ "r" (1),
+ "i" (offsetof(struct paca_struct, perf_event_pending)));
+}
+
+static inline void clear_perf_event_pending(void)
+{
+ asm volatile("stb %0,%1(13)" : :
+ "r" (0),
+ "i" (offsetof(struct paca_struct, perf_event_pending)));
+}
+
+#else /* 32-bit */
+
+DEFINE_PER_CPU(u8, perf_event_pending);
+
+#define set_perf_event_pending_flag() __get_cpu_var(perf_event_pending) = 1
#define test_perf_event_pending() __get_cpu_var(perf_event_pending)
#define clear_perf_event_pending() __get_cpu_var(perf_event_pending) = 0
-#else /* CONFIG_PERF_EVENTS && CONFIG_PPC32 */
+#endif /* 32 vs 64 bit */
+
+void set_perf_event_pending(void)
+{
+ preempt_disable();
+ set_perf_event_pending_flag();
+ set_dec(1);
+ preempt_enable();
+}
+
+#else /* CONFIG_PERF_EVENTS */
#define test_perf_event_pending() 0
#define clear_perf_event_pending()
-#endif /* CONFIG_PERF_EVENTS && CONFIG_PPC32 */
+#endif /* CONFIG_PERF_EVENTS */
/*
* For iSeries shared processors, we have to let the hypervisor
@@ -576,10 +611,6 @@ void timer_interrupt(struct pt_regs * re
set_dec(DECREMENTER_MAX);
#ifdef CONFIG_PPC32
- if (test_perf_event_pending()) {
- clear_perf_event_pending();
- perf_event_do_pending();
- }
if (atomic_read(&ppc_n_lost_interrupts) != 0)
do_IRQ(regs);
#endif
@@ -597,6 +628,11 @@ void timer_interrupt(struct pt_regs * re
calculate_steal_time();
+ if (test_perf_event_pending()) {
+ clear_perf_event_pending();
+ perf_event_do_pending();
+ }
+
#ifdef CONFIG_PPC_ISERIES
if (firmware_has_feature(FW_FEATURE_ISERIES))
get_lppaca()->int_dword.fields.decr_int = 0;
next prev parent reply other threads:[~2010-05-24 23:14 UTC|newest]
Thread overview: 42+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-05-24 23:04 [00/34] 2.6.32.14-stable review Greg KH
2010-05-24 22:59 ` [01/34] ipv4: udp: fix short packet and bad checksum logging Greg KH
2010-05-25 7:08 ` Bjørn Mork
2010-05-25 14:06 ` Greg KH
2010-05-24 22:59 ` [02/34] hp_accel: fix race in device removal Greg KH
2010-05-24 22:59 ` [03/34] fbdev: bfin-t350mcqb-fb: fix fbmem allocation with blanking lines Greg KH
2010-05-24 22:59 ` [04/34] hugetlbfs: kill applications that use MAP_NORESERVE with SIGBUS instead of OOM-killer Greg KH
2010-05-24 22:59 ` [05/34] dma-mapping: fix dma_sync_single_range_* Greg KH
2010-05-24 22:59 ` [06/34] ACPI: sleep: eliminate duplicate entries in acpisleep_dmi_table[] Greg KH
2010-05-24 22:59 ` [07/34] mmc: atmel-mci: fix two parameters swapped Greg KH
2010-05-24 22:59 ` [08/34] mmc: atmel-mci: prevent kernel oops while removing card Greg KH
2010-05-24 22:59 ` [09/34] mmc: atmel-mci: remove data error interrupt after xfer Greg KH
2010-05-24 22:59 ` [10/34] [S390] ptrace: fix return value of do_syscall_trace_enter() Greg KH
2010-05-24 22:59 ` Greg KH [this message]
2010-05-24 22:59 ` [12/34] cifs: guard against hardlinking directories Greg KH
2010-05-24 22:59 ` [13/34] serial: imx.c: fix CTS trigger level lower to avoid lost chars Greg KH
2010-05-24 22:59 ` [14/34] ALSA: ice1724 - Fix ESI Maya44 capture source control Greg KH
2010-05-24 22:59 ` [15/34] ALSA: hda: Fix 0 dB for Lenovo models using Conexant CX20549 (Venice) Greg KH
2010-05-24 22:59 ` [16/34] inotify: race use after free/double free in inotify inode marks Greg KH
2010-05-24 22:59 ` [17/34] inotify: dont leak user struct on inotify release Greg KH
2010-05-24 22:59 ` [18/34] profile: fix stats and data leakage Greg KH
2010-05-24 22:59 ` [19/34] x86, k8: Fix build error when K8_NB is disabled Greg KH
2010-05-24 23:13 ` H. Peter Anvin
2010-05-24 23:26 ` Greg KH
2010-05-24 22:59 ` [20/34] x86, cacheinfo: Turn off L3 cache index disable feature in virtualized environments Greg KH
2010-05-24 23:13 ` H. Peter Anvin
2010-05-24 23:25 ` Greg KH
2010-05-24 22:59 ` [21/34] x86, amd: Check X86_FEATURE_OSVW bit before accessing OSVW MSRs Greg KH
2010-05-24 23:20 ` H. Peter Anvin
2010-05-24 22:59 ` [22/34] Btrfs: check for read permission on src file in the clone ioctl Greg KH
2010-05-24 22:59 ` [23/34] ALSA: hda - New Intel HDA controller Greg KH
2010-05-24 22:59 ` [24/34] proc: partially revert "procfs: provide stack information for threads" Greg KH
2010-05-24 22:59 ` [25/34] revert "procfs: provide stack information for threads" and its fixup commits Greg KH
2010-05-24 22:59 ` [26/34] iwlwifi: clear all the stop_queue flag after load firmware Greg KH
2010-05-24 22:59 ` [27/34] p54: disable channels with incomplete calibration data sets Greg KH
2010-05-24 23:00 ` [28/34] CacheFiles: Fix error handling in cachefiles_determine_cache_security() Greg KH
2010-05-24 23:00 ` [29/34] [SCSI] megaraid_sas: fix for 32bit apps Greg KH
2010-05-24 23:00 ` [30/34] mmap_min_addr check CAP_SYS_RAWIO only for write Greg KH
2010-05-24 23:00 ` [31/34] nilfs2: fix sync silent failure Greg KH
2010-05-24 23:00 ` [32/34] Revert "ath9k: fix lockdep warning when unloading module" on stable kernels Greg KH
2010-05-24 23:00 ` [33/34] crypto: authenc - Add EINPROGRESS check Greg KH
2010-05-24 23:00 ` [34/34] Revert "parisc: Set PCI CLS early in boot." Greg KH
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20100524230350.326578430@clark.site \
--to=gregkh@suse.de \
--cc=akpm@linux-foundation.org \
--cc=alan@lxorguk.ukuu.org.uk \
--cc=benh@kernel.crashing.org \
--cc=linux-kernel@vger.kernel.org \
--cc=paulus@samba.org \
--cc=stable-review@kernel.org \
--cc=stable@kernel.org \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox