From: Greg KH <gregkh@suse.de>
To: linux-kernel@vger.kernel.org, stable@kernel.org
Cc: stable-review@kernel.org, torvalds@linux-foundation.org,
akpm@linux-foundation.org, alan@lxorguk.ukuu.org.uk,
Paul Mackerras <paulus@samba.org>,
Benjamin Herrenschmidt <benh@kernel.crashing.org>
Subject: [11/34] powerpc/perf_event: Fix oops due to perf_event_do_pending call
Date: Mon, 24 May 2010 15:59:43 -0700 [thread overview]
Message-ID: <20100524230350.326578430@clark.site> (raw)
In-Reply-To: <20100524230418.GA12770@kroah.com>
2.6.34-stable review patch. If anyone has any objections, please let us know.
------------------
From: Paul Mackerras <paulus@samba.org>
commit 0fe1ac48bef018bed896307cd12f6ca9b5e704ab upstream.
Anton Blanchard found that large POWER systems would occasionally
crash in the exception exit path when profiling with perf_events.
The symptom was that an interrupt would occur late in the exit path
when the MSR[RI] (recoverable interrupt) bit was clear. Interrupts
should be hard-disabled at this point but they were enabled. Because
the interrupt was not recoverable the system panicked.
The reason is that the exception exit path was calling
perf_event_do_pending after hard-disabling interrupts, and
perf_event_do_pending will re-enable interrupts.
The simplest and cleanest fix for this is to use the same mechanism
that 32-bit powerpc does, namely to cause a self-IPI by setting the
decrementer to 1. This means we can remove the tests in the exception
exit path and raw_local_irq_restore.
This also makes sure that the call to perf_event_do_pending from
timer_interrupt() happens within irq_enter/irq_exit. (Note that
calling perf_event_do_pending from timer_interrupt does not mean that
there is a possible 1/HZ latency; setting the decrementer to 1 ensures
that the timer interrupt will happen immediately, i.e. within one
timebase tick, which is a few nanoseconds or 10s of nanoseconds.)
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
arch/powerpc/include/asm/hw_irq.h | 38 ------------------------
arch/powerpc/kernel/asm-offsets.c | 1
arch/powerpc/kernel/entry_64.S | 9 -----
arch/powerpc/kernel/irq.c | 6 ---
arch/powerpc/kernel/time.c | 60 ++++++++++++++++++++++++++++++--------
5 files changed, 48 insertions(+), 66 deletions(-)
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -135,43 +135,5 @@ static inline int irqs_disabled_flags(un
*/
struct irq_chip;
-#ifdef CONFIG_PERF_EVENTS
-
-#ifdef CONFIG_PPC64
-static inline unsigned long test_perf_event_pending(void)
-{
- unsigned long x;
-
- asm volatile("lbz %0,%1(13)"
- : "=r" (x)
- : "i" (offsetof(struct paca_struct, perf_event_pending)));
- return x;
-}
-
-static inline void set_perf_event_pending(void)
-{
- asm volatile("stb %0,%1(13)" : :
- "r" (1),
- "i" (offsetof(struct paca_struct, perf_event_pending)));
-}
-
-static inline void clear_perf_event_pending(void)
-{
- asm volatile("stb %0,%1(13)" : :
- "r" (0),
- "i" (offsetof(struct paca_struct, perf_event_pending)));
-}
-#endif /* CONFIG_PPC64 */
-
-#else /* CONFIG_PERF_EVENTS */
-
-static inline unsigned long test_perf_event_pending(void)
-{
- return 0;
-}
-
-static inline void clear_perf_event_pending(void) {}
-#endif /* CONFIG_PERF_EVENTS */
-
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_HW_IRQ_H */
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -133,7 +133,6 @@ int main(void)
DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr));
DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled));
DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled));
- DEFINE(PACAPERFPEND, offsetof(struct paca_struct, perf_event_pending));
DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
#ifdef CONFIG_PPC_MM_SLICES
DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct,
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -556,15 +556,6 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_
2:
TRACE_AND_RESTORE_IRQ(r5);
-#ifdef CONFIG_PERF_EVENTS
- /* check paca->perf_event_pending if we're enabling ints */
- lbz r3,PACAPERFPEND(r13)
- and. r3,r3,r5
- beq 27f
- bl .perf_event_do_pending
-27:
-#endif /* CONFIG_PERF_EVENTS */
-
/* extract EE bit and use it to restore paca->hard_enabled */
ld r3,_MSR(r1)
rldicl r4,r3,49,63 /* r0 = (r3 >> 15) & 1 */
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -53,7 +53,6 @@
#include <linux/bootmem.h>
#include <linux/pci.h>
#include <linux/debugfs.h>
-#include <linux/perf_event.h>
#include <asm/uaccess.h>
#include <asm/system.h>
@@ -138,11 +137,6 @@ notrace void raw_local_irq_restore(unsig
}
#endif /* CONFIG_PPC_STD_MMU_64 */
- if (test_perf_event_pending()) {
- clear_perf_event_pending();
- perf_event_do_pending();
- }
-
/*
* if (get_paca()->hard_enabled) return;
* But again we need to take care that gcc gets hard_enabled directly
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -530,25 +530,60 @@ void __init iSeries_time_init_early(void
}
#endif /* CONFIG_PPC_ISERIES */
-#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_PPC32)
-DEFINE_PER_CPU(u8, perf_event_pending);
+#ifdef CONFIG_PERF_EVENTS
-void set_perf_event_pending(void)
+/*
+ * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable...
+ */
+#ifdef CONFIG_PPC64
+static inline unsigned long test_perf_event_pending(void)
{
- get_cpu_var(perf_event_pending) = 1;
- set_dec(1);
- put_cpu_var(perf_event_pending);
+ unsigned long x;
+
+ asm volatile("lbz %0,%1(13)"
+ : "=r" (x)
+ : "i" (offsetof(struct paca_struct, perf_event_pending)));
+ return x;
}
+static inline void set_perf_event_pending_flag(void)
+{
+ asm volatile("stb %0,%1(13)" : :
+ "r" (1),
+ "i" (offsetof(struct paca_struct, perf_event_pending)));
+}
+
+static inline void clear_perf_event_pending(void)
+{
+ asm volatile("stb %0,%1(13)" : :
+ "r" (0),
+ "i" (offsetof(struct paca_struct, perf_event_pending)));
+}
+
+#else /* 32-bit */
+
+DEFINE_PER_CPU(u8, perf_event_pending);
+
+#define set_perf_event_pending_flag() __get_cpu_var(perf_event_pending) = 1
#define test_perf_event_pending() __get_cpu_var(perf_event_pending)
#define clear_perf_event_pending() __get_cpu_var(perf_event_pending) = 0
-#else /* CONFIG_PERF_EVENTS && CONFIG_PPC32 */
+#endif /* 32 vs 64 bit */
+
+void set_perf_event_pending(void)
+{
+ preempt_disable();
+ set_perf_event_pending_flag();
+ set_dec(1);
+ preempt_enable();
+}
+
+#else /* CONFIG_PERF_EVENTS */
#define test_perf_event_pending() 0
#define clear_perf_event_pending()
-#endif /* CONFIG_PERF_EVENTS && CONFIG_PPC32 */
+#endif /* CONFIG_PERF_EVENTS */
/*
* For iSeries shared processors, we have to let the hypervisor
@@ -576,10 +611,6 @@ void timer_interrupt(struct pt_regs * re
set_dec(DECREMENTER_MAX);
#ifdef CONFIG_PPC32
- if (test_perf_event_pending()) {
- clear_perf_event_pending();
- perf_event_do_pending();
- }
if (atomic_read(&ppc_n_lost_interrupts) != 0)
do_IRQ(regs);
#endif
@@ -597,6 +628,11 @@ void timer_interrupt(struct pt_regs * re
calculate_steal_time();
+ if (test_perf_event_pending()) {
+ clear_perf_event_pending();
+ perf_event_do_pending();
+ }
+
#ifdef CONFIG_PPC_ISERIES
if (firmware_has_feature(FW_FEATURE_ISERIES))
get_lppaca()->int_dword.fields.decr_int = 0;
next prev parent reply other threads:[~2010-05-24 23:14 UTC|newest]
Thread overview: 45+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-05-24 23:04 [00/34] 2.6.32.14-stable review Greg KH
2010-05-24 22:59 ` [01/34] ipv4: udp: fix short packet and bad checksum logging Greg KH
2010-05-25 7:08 ` Bjørn Mork
2010-05-25 14:06 ` Greg KH
2010-05-24 22:59 ` [02/34] hp_accel: fix race in device removal Greg KH
2010-05-24 22:59 ` [03/34] fbdev: bfin-t350mcqb-fb: fix fbmem allocation with blanking lines Greg KH
2010-05-24 22:59 ` [04/34] hugetlbfs: kill applications that use MAP_NORESERVE with SIGBUS instead of OOM-killer Greg KH
2010-05-24 22:59 ` [05/34] dma-mapping: fix dma_sync_single_range_* Greg KH
2010-05-24 22:59 ` [06/34] ACPI: sleep: eliminate duplicate entries in acpisleep_dmi_table[] Greg KH
2010-05-24 22:59 ` [07/34] mmc: atmel-mci: fix two parameters swapped Greg KH
2010-05-24 22:59 ` Greg KH
2010-05-24 22:59 ` [08/34] mmc: atmel-mci: prevent kernel oops while removing card Greg KH
2010-05-24 22:59 ` Greg KH
2010-05-24 22:59 ` [09/34] mmc: atmel-mci: remove data error interrupt after xfer Greg KH
2010-05-24 22:59 ` Greg KH
2010-05-24 22:59 ` [10/34] [S390] ptrace: fix return value of do_syscall_trace_enter() Greg KH
2010-05-24 22:59 ` Greg KH [this message]
2010-05-24 22:59 ` [12/34] cifs: guard against hardlinking directories Greg KH
2010-05-24 22:59 ` [13/34] serial: imx.c: fix CTS trigger level lower to avoid lost chars Greg KH
2010-05-24 22:59 ` [14/34] ALSA: ice1724 - Fix ESI Maya44 capture source control Greg KH
2010-05-24 22:59 ` [15/34] ALSA: hda: Fix 0 dB for Lenovo models using Conexant CX20549 (Venice) Greg KH
2010-05-24 22:59 ` [16/34] inotify: race use after free/double free in inotify inode marks Greg KH
2010-05-24 22:59 ` [17/34] inotify: dont leak user struct on inotify release Greg KH
2010-05-24 22:59 ` [18/34] profile: fix stats and data leakage Greg KH
2010-05-24 22:59 ` [19/34] x86, k8: Fix build error when K8_NB is disabled Greg KH
2010-05-24 23:13 ` H. Peter Anvin
2010-05-24 23:26 ` Greg KH
2010-05-24 22:59 ` [20/34] x86, cacheinfo: Turn off L3 cache index disable feature in virtualized environments Greg KH
2010-05-24 23:13 ` H. Peter Anvin
2010-05-24 23:25 ` Greg KH
2010-05-24 22:59 ` [21/34] x86, amd: Check X86_FEATURE_OSVW bit before accessing OSVW MSRs Greg KH
2010-05-24 23:20 ` H. Peter Anvin
2010-05-24 22:59 ` [22/34] Btrfs: check for read permission on src file in the clone ioctl Greg KH
2010-05-24 22:59 ` [23/34] ALSA: hda - New Intel HDA controller Greg KH
2010-05-24 22:59 ` [24/34] proc: partially revert "procfs: provide stack information for threads" Greg KH
2010-05-24 22:59 ` [25/34] revert "procfs: provide stack information for threads" and its fixup commits Greg KH
2010-05-24 22:59 ` [26/34] iwlwifi: clear all the stop_queue flag after load firmware Greg KH
2010-05-24 22:59 ` [27/34] p54: disable channels with incomplete calibration data sets Greg KH
2010-05-24 23:00 ` [28/34] CacheFiles: Fix error handling in cachefiles_determine_cache_security() Greg KH
2010-05-24 23:00 ` [29/34] [SCSI] megaraid_sas: fix for 32bit apps Greg KH
2010-05-24 23:00 ` [30/34] mmap_min_addr check CAP_SYS_RAWIO only for write Greg KH
2010-05-24 23:00 ` [31/34] nilfs2: fix sync silent failure Greg KH
2010-05-24 23:00 ` [32/34] Revert "ath9k: fix lockdep warning when unloading module" on stable kernels Greg KH
2010-05-24 23:00 ` [33/34] crypto: authenc - Add EINPROGRESS check Greg KH
2010-05-24 23:00 ` [34/34] Revert "parisc: Set PCI CLS early in boot." Greg KH
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20100524230350.326578430@clark.site \
--to=gregkh@suse.de \
--cc=akpm@linux-foundation.org \
--cc=alan@lxorguk.ukuu.org.uk \
--cc=benh@kernel.crashing.org \
--cc=linux-kernel@vger.kernel.org \
--cc=paulus@samba.org \
--cc=stable-review@kernel.org \
--cc=stable@kernel.org \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.