* [PATCH -v3 1/2] irq_work: generic hard-irq context callbacks
@ 2010-07-13 4:59 Huang Ying
2010-07-13 4:59 ` [PATCH -v3 2/2] irq_work, MCE: use irq_work in MCE Huang Ying
2010-08-30 9:41 ` [PATCH -v3 1/2] irq_work: generic hard-irq context callbacks Peter Zijlstra
0 siblings, 2 replies; 3+ messages in thread
From: Huang Ying @ 2010-07-13 4:59 UTC (permalink / raw)
To: Ingo Molnar, H. Peter Anvin
Cc: linux-kernel, Andi Kleen, Peter Zijlstra, Peter Zijlstra,
Huang Ying
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
In order for other NMI context users that want to run things from
hard-IRQ context, extract the perf_event callback mechanism.
Huang Ying: some fixes
This patch is only tested on x86 platform.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Huang Ying <ying.huang@intel.com>
---
arch/alpha/Kconfig | 1
arch/alpha/include/asm/perf_event.h | 9 -
arch/arm/Kconfig | 1
arch/arm/include/asm/perf_event.h | 12 --
arch/arm/kernel/perf_event.c | 4
arch/frv/Kconfig | 1
arch/frv/lib/perf_event.c | 19 ----
arch/parisc/Kconfig | 1
arch/parisc/include/asm/perf_event.h | 7 -
arch/powerpc/Kconfig | 1
arch/powerpc/kernel/time.c | 42 ++++----
arch/s390/Kconfig | 1
arch/s390/include/asm/perf_event.h | 10 --
arch/sh/Kconfig | 1
arch/sh/include/asm/perf_event.h | 7 -
arch/sparc/Kconfig | 2
arch/sparc/include/asm/perf_event.h | 4
arch/sparc/kernel/pcr.c | 8 -
arch/x86/Kconfig | 1
arch/x86/include/asm/entry_arch.h | 4
arch/x86/include/asm/hardirq.h | 2
arch/x86/include/asm/hw_irq.h | 2
arch/x86/include/asm/irq_vectors.h | 4
arch/x86/kernel/Makefile | 1
arch/x86/kernel/cpu/perf_event.c | 19 ----
arch/x86/kernel/entry_64.S | 6 -
arch/x86/kernel/irq.c | 8 -
arch/x86/kernel/irq_work.c | 30 ++++++
arch/x86/kernel/irqinit.c | 6 -
include/linux/irq_work.h | 20 ++++
include/linux/perf_event.h | 11 --
init/Kconfig | 8 +
kernel/Makefile | 2
kernel/irq_work.c | 164 +++++++++++++++++++++++++++++++++++
kernel/perf_event.c | 104 +---------------------
kernel/timer.c | 7 +
36 files changed, 290 insertions(+), 240 deletions(-)
--- /dev/null
+++ b/include/linux/irq_work.h
@@ -0,0 +1,20 @@
+#ifndef _LINUX_IRQ_WORK_H
+#define _LINUX_IRQ_WORK_H
+
+struct irq_work {
+ struct irq_work *next;
+ void (*func)(struct irq_work *);
+};
+
+static inline
+void init_irq_work(struct irq_work *entry, void (*func)(struct irq_work *))
+{
+ entry->next = NULL;
+ entry->func = func;
+}
+
+bool irq_work_queue(struct irq_work *entry);
+void irq_work_run(void);
+void irq_work_sync(struct irq_work *entry);
+
+#endif /* _LINUX_IRQ_WORK_H */
--- /dev/null
+++ b/kernel/irq_work.c
@@ -0,0 +1,164 @@
+/*
+ * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *
+ * Provides a framework for enqueueing and running callbacks from hardirq
+ * context. The enqueueing is NMI-safe.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/irq_work.h>
+#include <linux/hardirq.h>
+
+/*
+ * An entry can be in one of four states:
+ *
+ * free NULL, 0 -> {claimed} : free to be used
+ * claimed NULL, 3 -> {pending} : claimed to be enqueued
+ * pending next, 3 -> {busy} : queued, pending callback
+ * busy NULL, 2 -> {free, claimed} : callback in progress, can be claimed
+ *
+ * We use the lower two bits of the next pointer to keep PENDING and BUSY
+ * flags.
+ */
+
+#define IRQ_WORK_PENDING 1UL
+#define IRQ_WORK_BUSY 2UL
+#define IRQ_WORK_FLAGS 3UL
+
+static inline bool irq_work_is_set(struct irq_work *entry, int flags)
+{
+ return (unsigned long)entry->next & flags;
+}
+
+static inline struct irq_work *irq_work_next(struct irq_work *entry)
+{
+ unsigned long next = (unsigned long)entry->next;
+ next &= ~IRQ_WORK_FLAGS;
+ return (struct irq_work *)next;
+}
+
+static inline struct irq_work *next_flags(struct irq_work *entry, int flags)
+{
+ unsigned long next = (unsigned long)entry;
+ next |= flags;
+ return (struct irq_work *)next;
+}
+
+static DEFINE_PER_CPU(struct irq_work *, irq_work_list);
+
+/*
+ * Claim the entry so that no one else will poke at it.
+ */
+static bool irq_work_claim(struct irq_work *entry)
+{
+ struct irq_work *next, *nflags;
+
+ do {
+ next = entry->next;
+ if ((unsigned long)next & IRQ_WORK_PENDING)
+ return false;
+ nflags = next_flags(next, IRQ_WORK_FLAGS);
+ } while (cmpxchg(&entry->next, next, nflags) != next);
+
+ return true;
+}
+
+
+void __weak arch_irq_work_raise(void)
+{
+ /*
+ * Lame architectures will get the timer tick callback
+ */
+}
+
+/*
+ * Queue the entry and raise the IPI if needed.
+ */
+static void __irq_work_queue(struct irq_work *entry)
+{
+ struct irq_work **head, *next;
+
+ head = &get_cpu_var(irq_work_list);
+
+ do {
+ next = *head;
+ /* Can assign non-atomic because we keep the flags set. */
+ entry->next = next_flags(next, IRQ_WORK_FLAGS);
+ } while (cmpxchg(head, next, entry) != next);
+
+ /* The list was empty, raise self-interrupt to start processing. */
+ if (!irq_work_next(entry))
+ arch_irq_work_raise();
+
+ put_cpu_var(irq_work_list);
+}
+
+/*
+ * Enqueue the irq_work @entry, returns true on success, failure when the
+ * @entry was already enqueued by someone else.
+ *
+ * Can be re-enqueued while the callback is still in progress.
+ */
+bool irq_work_queue(struct irq_work *entry)
+{
+ if (!irq_work_claim(entry)) {
+ /*
+ * Already enqueued, can't do!
+ */
+ return false;
+ }
+
+ __irq_work_queue(entry);
+ return true;
+}
+EXPORT_SYMBOL_GPL(irq_work_queue);
+
+/*
+ * Run the irq_work entries on this cpu. Requires to be ran from hardirq
+ * context with local IRQs disabled.
+ */
+void irq_work_run(void)
+{
+ struct irq_work *list, **head;
+
+ head = &__get_cpu_var(irq_work_list);
+ if (*head == NULL)
+ return;
+
+ BUG_ON(!in_irq());
+ BUG_ON(!irqs_disabled());
+
+ list = xchg(head, NULL);
+ while (list != NULL) {
+ struct irq_work *entry = list;
+
+ list = irq_work_next(list);
+
+ /*
+ * Clear the PENDING bit, after this point the @entry
+ * can be re-used.
+ */
+ entry->next = next_flags(NULL, IRQ_WORK_BUSY);
+ entry->func(entry);
+ /*
+ * Clear the BUSY bit and return to the free state if
+ * no-one else claimed it meanwhile.
+ */
+ cmpxchg(&entry->next, next_flags(NULL, IRQ_WORK_BUSY), NULL);
+ }
+}
+EXPORT_SYMBOL_GPL(irq_work_run);
+
+/*
+ * Synchronize against the irq_work @entry, ensures the entry is not
+ * currently in use.
+ */
+void irq_work_sync(struct irq_work *entry)
+{
+ WARN_ON_ONCE(irqs_disabled());
+
+ while (irq_work_is_set(entry, IRQ_WORK_BUSY))
+ cpu_relax();
+}
+EXPORT_SYMBOL_GPL(irq_work_sync);
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -9,6 +9,7 @@ config ALPHA
select HAVE_IDE
select HAVE_OPROFILE
select HAVE_SYSCALL_WRAPPERS
+ select HAVE_IRQ_WORK
select HAVE_PERF_EVENTS
select HAVE_DMA_ATTRS
help
--- a/arch/alpha/include/asm/perf_event.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef __ASM_ALPHA_PERF_EVENT_H
-#define __ASM_ALPHA_PERF_EVENT_H
-
-/* Alpha only supports software events through this interface. */
-static inline void set_perf_event_pending(void) { }
-
-#define PERF_EVENT_INDEX_OFFSET 0
-
-#endif /* __ASM_ALPHA_PERF_EVENT_H */
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -22,6 +22,7 @@ config ARM
select HAVE_KERNEL_GZIP
select HAVE_KERNEL_LZO
select HAVE_KERNEL_LZMA
+ select HAVE_IRQ_WORK
select HAVE_PERF_EVENTS
select PERF_USE_VMALLOC
help
--- a/arch/arm/include/asm/perf_event.h
+++ b/arch/arm/include/asm/perf_event.h
@@ -12,18 +12,6 @@
#ifndef __ARM_PERF_EVENT_H__
#define __ARM_PERF_EVENT_H__
-/*
- * NOP: on *most* (read: all supported) ARM platforms, the performance
- * counter interrupts are regular interrupts and not an NMI. This
- * means that when we receive the interrupt we can call
- * perf_event_do_pending() that handles all of the work with
- * interrupts enabled.
- */
-static inline void
-set_perf_event_pending(void)
-{
-}
-
/* ARM performance counters start from 1 (in the cp15 accesses) so use the
* same indexes here for consistency. */
#define PERF_EVENT_INDEX_OFFSET 1
--- a/arch/frv/Kconfig
+++ b/arch/frv/Kconfig
@@ -7,6 +7,7 @@ config FRV
default y
select HAVE_IDE
select HAVE_ARCH_TRACEHOOK
+ select HAVE_IRQ_WORK
select HAVE_PERF_EVENTS
config ZONE_DMA
--- a/arch/frv/lib/perf_event.c
+++ /dev/null
@@ -1,19 +0,0 @@
-/* Performance event handling
- *
- * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
- */
-
-#include <linux/perf_event.h>
-
-/*
- * mark the performance event as pending
- */
-void set_perf_event_pending(void)
-{
-}
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -16,6 +16,7 @@ config PARISC
select RTC_DRV_GENERIC
select INIT_ALL_POSSIBLE
select BUG
+ select HAVE_IRQ_WORK
select HAVE_PERF_EVENTS
select GENERIC_ATOMIC64 if !64BIT
help
--- a/arch/parisc/include/asm/perf_event.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef __ASM_PARISC_PERF_EVENT_H
-#define __ASM_PARISC_PERF_EVENT_H
-
-/* parisc only supports software events through this interface. */
-static inline void set_perf_event_pending(void) { }
-
-#endif /* __ASM_PARISC_PERF_EVENT_H */
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -139,6 +139,7 @@ config PPC
select HAVE_OPROFILE
select HAVE_SYSCALL_WRAPPERS if PPC64
select GENERIC_ATOMIC64 if PPC32
+ select HAVE_IRQ_WORK
select HAVE_PERF_EVENTS
select HAVE_REGS_AND_STACK_ACCESS_API
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -53,7 +53,7 @@
#include <linux/posix-timers.h>
#include <linux/irq.h>
#include <linux/delay.h>
-#include <linux/perf_event.h>
+#include <linux/irq_work.h>
#include <asm/trace.h>
#include <asm/io.h>
@@ -532,60 +532,60 @@ void __init iSeries_time_init_early(void
}
#endif /* CONFIG_PPC_ISERIES */
-#ifdef CONFIG_PERF_EVENTS
+#ifdef CONFIG_IRQ_WORK
/*
* 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable...
*/
#ifdef CONFIG_PPC64
-static inline unsigned long test_perf_event_pending(void)
+static inline unsigned long test_irq_work_pending(void)
{
unsigned long x;
asm volatile("lbz %0,%1(13)"
: "=r" (x)
- : "i" (offsetof(struct paca_struct, perf_event_pending)));
+ : "i" (offsetof(struct paca_struct, irq_work_pending)));
return x;
}
-static inline void set_perf_event_pending_flag(void)
+static inline void set_irq_work_pending_flag(void)
{
asm volatile("stb %0,%1(13)" : :
"r" (1),
- "i" (offsetof(struct paca_struct, perf_event_pending)));
+ "i" (offsetof(struct paca_struct, irq_work_pending)));
}
-static inline void clear_perf_event_pending(void)
+static inline void clear_irq_work_pending(void)
{
asm volatile("stb %0,%1(13)" : :
"r" (0),
- "i" (offsetof(struct paca_struct, perf_event_pending)));
+ "i" (offsetof(struct paca_struct, irq_work_pending)));
}
#else /* 32-bit */
-DEFINE_PER_CPU(u8, perf_event_pending);
+DEFINE_PER_CPU(u8, irq_work_pending);
-#define set_perf_event_pending_flag() __get_cpu_var(perf_event_pending) = 1
-#define test_perf_event_pending() __get_cpu_var(perf_event_pending)
-#define clear_perf_event_pending() __get_cpu_var(perf_event_pending) = 0
+#define set_irq_work_pending_flag() __get_cpu_var(irq_work_pending) = 1
+#define test_irq_work_pending() __get_cpu_var(irq_work_pending)
+#define clear_irq_work_pending() __get_cpu_var(irq_work_pending) = 0
#endif /* 32 vs 64 bit */
-void set_perf_event_pending(void)
+void set_irq_work_pending(void)
{
preempt_disable();
- set_perf_event_pending_flag();
+ set_irq_work_pending_flag();
set_dec(1);
preempt_enable();
}
-#else /* CONFIG_PERF_EVENTS */
+#else /* CONFIG_IRQ_WORK */
-#define test_perf_event_pending() 0
-#define clear_perf_event_pending()
+#define test_irq_work_pending() 0
+#define clear_irq_work_pending()
-#endif /* CONFIG_PERF_EVENTS */
+#endif /* CONFIG_IRQ_WORK */
/*
* For iSeries shared processors, we have to let the hypervisor
@@ -635,9 +635,9 @@ void timer_interrupt(struct pt_regs * re
calculate_steal_time();
- if (test_perf_event_pending()) {
- clear_perf_event_pending();
- perf_event_do_pending();
+ if (test_irq_work_pending()) {
+ clear_irq_work_pending();
+ irq_work_run();
}
#ifdef CONFIG_PPC_ISERIES
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -98,6 +98,7 @@ config S390
select HAVE_KVM if 64BIT
select HAVE_ARCH_TRACEHOOK
select INIT_ALL_POSSIBLE
+ select HAVE_IRQ_WORK
select HAVE_PERF_EVENTS
select HAVE_KERNEL_GZIP
select HAVE_KERNEL_BZIP2
--- a/arch/s390/include/asm/perf_event.h
+++ /dev/null
@@ -1,10 +0,0 @@
-/*
- * Performance event support - s390 specific definitions.
- *
- * Copyright 2009 Martin Schwidefsky, IBM Corporation.
- */
-
-static inline void set_perf_event_pending(void) {}
-static inline void clear_perf_event_pending(void) {}
-
-#define PERF_EVENT_INDEX_OFFSET 0
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -16,6 +16,7 @@ config SUPERH
select HAVE_ARCH_TRACEHOOK
select HAVE_DMA_API_DEBUG
select HAVE_DMA_ATTRS
+ select HAVE_IRQ_WORK
select HAVE_PERF_EVENTS
select PERF_USE_VMALLOC
select HAVE_KERNEL_GZIP
--- a/arch/sh/include/asm/perf_event.h
+++ b/arch/sh/include/asm/perf_event.h
@@ -26,11 +26,4 @@ extern int register_sh_pmu(struct sh_pmu
extern int reserve_pmc_hardware(void);
extern void release_pmc_hardware(void);
-static inline void set_perf_event_pending(void)
-{
- /* Nothing to see here, move along. */
-}
-
-#define PERF_EVENT_INDEX_OFFSET 0
-
#endif /* __ASM_SH_PERF_EVENT_H */
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -25,6 +25,7 @@ config SPARC
select ARCH_WANT_OPTIONAL_GPIOLIB
select RTC_CLASS
select RTC_DRV_M48T59
+ select HAVE_IRQ_WORK
select HAVE_PERF_EVENTS
select PERF_USE_VMALLOC
select HAVE_DMA_ATTRS
@@ -52,6 +53,7 @@ config SPARC64
select RTC_DRV_BQ4802
select RTC_DRV_SUN4V
select RTC_DRV_STARFIRE
+ select HAVE_IRQ_WORK
select HAVE_PERF_EVENTS
select PERF_USE_VMALLOC
--- a/arch/sparc/include/asm/perf_event.h
+++ b/arch/sparc/include/asm/perf_event.h
@@ -1,10 +1,6 @@
#ifndef __ASM_SPARC_PERF_EVENT_H
#define __ASM_SPARC_PERF_EVENT_H
-extern void set_perf_event_pending(void);
-
-#define PERF_EVENT_INDEX_OFFSET 0
-
#ifdef CONFIG_PERF_EVENTS
extern void init_hw_perf_events(void);
#else
--- a/arch/sparc/kernel/pcr.c
+++ b/arch/sparc/kernel/pcr.c
@@ -7,7 +7,7 @@
#include <linux/init.h>
#include <linux/irq.h>
-#include <linux/perf_event.h>
+#include <linux/irq_work.h>
#include <linux/ftrace.h>
#include <asm/pil.h>
@@ -43,14 +43,14 @@ void __irq_entry deferred_pcr_work_irq(i
old_regs = set_irq_regs(regs);
irq_enter();
-#ifdef CONFIG_PERF_EVENTS
- perf_event_do_pending();
+#ifdef CONFIG_IRQ_WORK
+ irq_work_run();
#endif
irq_exit();
set_irq_regs(old_regs);
}
-void set_perf_event_pending(void)
+void arch_irq_work_raise(void)
{
set_softint(1 << PIL_DEFERRED_PCR_WORK);
}
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -25,6 +25,7 @@ config X86
select HAVE_IDE
select HAVE_OPROFILE
select HAVE_PERF_EVENTS if (!M386 && !M486)
+ select HAVE_IRQ_WORK
select HAVE_IOREMAP_PROT
select HAVE_KPROBES
select ARCH_WANT_OPTIONAL_GPIOLIB
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -49,8 +49,8 @@ BUILD_INTERRUPT(apic_timer_interrupt,LOC
BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
-#ifdef CONFIG_PERF_EVENTS
-BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR)
+#ifdef CONFIG_IRQ_WORK
+BUILD_INTERRUPT(irq_work_interrupt, IRQ_WORK_VECTOR)
#endif
#ifdef CONFIG_X86_THERMAL_VECTOR
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -29,7 +29,7 @@
extern void apic_timer_interrupt(void);
extern void x86_platform_ipi(void);
extern void error_interrupt(void);
-extern void perf_pending_interrupt(void);
+extern void irq_work_interrupt(void);
extern void spurious_interrupt(void);
extern void thermal_interrupt(void);
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -33,6 +33,7 @@ obj-y := process_$(BITS).o signal.o en
obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
obj-y += time.o ioport.o ldt.o dumpstack.o
obj-y += setup.o x86_init.o i8259.o irqinit.o
+obj-$(CONFIG_IRQ_WORK) += irq_work.o
obj-$(CONFIG_X86_VISWS) += visws_quirks.o
obj-$(CONFIG_X86_32) += probe_roms_32.o
obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1160,25 +1160,6 @@ static int x86_pmu_handle_irq(struct pt_
return handled;
}
-void smp_perf_pending_interrupt(struct pt_regs *regs)
-{
- irq_enter();
- ack_APIC_irq();
- inc_irq_stat(apic_pending_irqs);
- perf_event_do_pending();
- irq_exit();
-}
-
-void set_perf_event_pending(void)
-{
-#ifdef CONFIG_X86_LOCAL_APIC
- if (!x86_pmu.apic || !x86_pmu_initialized())
- return;
-
- apic->send_IPI_self(LOCAL_PENDING_VECTOR);
-#endif
-}
-
void perf_events_lapic_init(void)
{
if (!x86_pmu.apic || !x86_pmu_initialized())
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1023,9 +1023,9 @@ apicinterrupt ERROR_APIC_VECTOR \
apicinterrupt SPURIOUS_APIC_VECTOR \
spurious_interrupt smp_spurious_interrupt
-#ifdef CONFIG_PERF_EVENTS
-apicinterrupt LOCAL_PENDING_VECTOR \
- perf_pending_interrupt smp_perf_pending_interrupt
+#ifdef CONFIG_IRQ_WORK
+apicinterrupt IRQ_WORK_VECTOR \
+ irq_work_interrupt smp_irq_work_interrupt
#endif
/*
--- /dev/null
+++ b/arch/x86/kernel/irq_work.c
@@ -0,0 +1,30 @@
+/*
+ * x86 specific code for irq_work
+ *
+ * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/irq_work.h>
+#include <linux/hardirq.h>
+#include <asm/apic.h>
+
+void smp_irq_work_interrupt(struct pt_regs *regs)
+{
+ irq_enter();
+ ack_APIC_irq();
+ inc_irq_stat(apic_irq_work_irqs);
+ irq_work_run();
+ irq_exit();
+}
+
+void arch_irq_work_raise(void)
+{
+#ifdef CONFIG_X86_LOCAL_APIC
+ if (!cpu_has_apic)
+ return;
+
+ apic->send_IPI_self(IRQ_WORK_VECTOR);
+ apic_wait_icr_idle();
+#endif
+}
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -224,9 +224,9 @@ static void __init apic_intr_init(void)
alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
- /* Performance monitoring interrupts: */
-# ifdef CONFIG_PERF_EVENTS
- alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt);
+ /* IRQ work interrupts: */
+# ifdef CONFIG_IRQ_WORK
+ alloc_intr_gate(IRQ_WORK_VECTOR, irq_work_interrupt);
# endif
#endif
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -484,6 +484,7 @@ struct perf_guest_info_callbacks {
#include <linux/workqueue.h>
#include <linux/ftrace.h>
#include <linux/cpu.h>
+#include <linux/irq_work.h>
#include <asm/atomic.h>
#include <asm/local.h>
@@ -608,11 +609,6 @@ struct perf_mmap_data {
void *data_pages[0];
};
-struct perf_pending_entry {
- struct perf_pending_entry *next;
- void (*func)(struct perf_pending_entry *);
-};
-
struct perf_sample_data;
typedef void (*perf_overflow_handler_t)(struct perf_event *, int,
@@ -719,7 +715,7 @@ struct perf_event {
int pending_wakeup;
int pending_kill;
int pending_disable;
- struct perf_pending_entry pending;
+ struct irq_work pending;
atomic_t event_limit;
@@ -831,8 +827,6 @@ extern void perf_event_task_tick(struct
extern int perf_event_init_task(struct task_struct *child);
extern void perf_event_exit_task(struct task_struct *child);
extern void perf_event_free_task(struct task_struct *task);
-extern void set_perf_event_pending(void);
-extern void perf_event_do_pending(void);
extern void perf_event_print_debug(void);
extern void __perf_disable(void);
extern bool __perf_enable(void);
@@ -1031,7 +1025,6 @@ perf_event_task_tick(struct task_struct
static inline int perf_event_init_task(struct task_struct *child) { return 0; }
static inline void perf_event_exit_task(struct task_struct *child) { }
static inline void perf_event_free_task(struct task_struct *task) { }
-static inline void perf_event_do_pending(void) { }
static inline void perf_event_print_debug(void) { }
static inline void perf_disable(void) { }
static inline void perf_enable(void) { }
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -21,6 +21,13 @@ config CONSTRUCTORS
depends on !UML
default y
+config HAVE_IRQ_WORK
+ bool
+
+config IRQ_WORK
+ bool
+ depends on HAVE_IRQ_WORK
+
menu "General setup"
config EXPERIMENTAL
@@ -983,6 +990,7 @@ config PERF_EVENTS
default y if (PROFILING || PERF_COUNTERS)
depends on HAVE_PERF_EVENTS
select ANON_INODES
+ select IRQ_WORK
help
Enable kernel support for various performance events provided
by software and hardware.
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -23,6 +23,7 @@ CFLAGS_REMOVE_rtmutex-debug.o = -pg
CFLAGS_REMOVE_cgroup-debug.o = -pg
CFLAGS_REMOVE_sched_clock.o = -pg
CFLAGS_REMOVE_perf_event.o = -pg
+CFLAGS_REMOVE_irq_work.o = -pg
endif
obj-$(CONFIG_FREEZER) += freezer.o
@@ -101,6 +102,7 @@ obj-$(CONFIG_RING_BUFFER) += trace/
obj-$(CONFIG_SMP) += sched_cpupri.o
obj-$(CONFIG_SLOW_WORK) += slow-work.o
obj-$(CONFIG_SLOW_WORK_DEBUG) += slow-work-debugfs.o
+obj-$(CONFIG_IRQ_WORK) += irq_work.o
obj-$(CONFIG_PERF_EVENTS) += perf_event.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -1882,12 +1882,11 @@ static void free_event_rcu(struct rcu_he
kfree(event);
}
-static void perf_pending_sync(struct perf_event *event);
static void perf_mmap_data_put(struct perf_mmap_data *data);
static void free_event(struct perf_event *event)
{
- perf_pending_sync(event);
+ irq_work_sync(&event->pending);
if (!event->parent) {
atomic_dec(&nr_events);
@@ -2824,16 +2823,7 @@ void perf_event_wakeup(struct perf_event
}
}
-/*
- * Pending wakeups
- *
- * Handle the case where we need to wakeup up from NMI (or rq->lock) context.
- *
- * The NMI bit means we cannot possibly take locks. Therefore, maintain a
- * single linked list and use cmpxchg() to add entries lockless.
- */
-
-static void perf_pending_event(struct perf_pending_entry *entry)
+static void perf_pending_event(struct irq_work *entry)
{
struct perf_event *event = container_of(entry,
struct perf_event, pending);
@@ -2849,89 +2839,6 @@ static void perf_pending_event(struct pe
}
}
-#define PENDING_TAIL ((struct perf_pending_entry *)-1UL)
-
-static DEFINE_PER_CPU(struct perf_pending_entry *, perf_pending_head) = {
- PENDING_TAIL,
-};
-
-static void perf_pending_queue(struct perf_pending_entry *entry,
- void (*func)(struct perf_pending_entry *))
-{
- struct perf_pending_entry **head;
-
- if (cmpxchg(&entry->next, NULL, PENDING_TAIL) != NULL)
- return;
-
- entry->func = func;
-
- head = &get_cpu_var(perf_pending_head);
-
- do {
- entry->next = *head;
- } while (cmpxchg(head, entry->next, entry) != entry->next);
-
- set_perf_event_pending();
-
- put_cpu_var(perf_pending_head);
-}
-
-static int __perf_pending_run(void)
-{
- struct perf_pending_entry *list;
- int nr = 0;
-
- list = xchg(&__get_cpu_var(perf_pending_head), PENDING_TAIL);
- while (list != PENDING_TAIL) {
- void (*func)(struct perf_pending_entry *);
- struct perf_pending_entry *entry = list;
-
- list = list->next;
-
- func = entry->func;
- entry->next = NULL;
- /*
- * Ensure we observe the unqueue before we issue the wakeup,
- * so that we won't be waiting forever.
- * -- see perf_not_pending().
- */
- smp_wmb();
-
- func(entry);
- nr++;
- }
-
- return nr;
-}
-
-static inline int perf_not_pending(struct perf_event *event)
-{
- /*
- * If we flush on whatever cpu we run, there is a chance we don't
- * need to wait.
- */
- get_cpu();
- __perf_pending_run();
- put_cpu();
-
- /*
- * Ensure we see the proper queue state before going to sleep
- * so that we do not miss the wakeup. -- see perf_pending_handle()
- */
- smp_rmb();
- return event->pending.next == NULL;
-}
-
-static void perf_pending_sync(struct perf_event *event)
-{
- wait_event(event->waitq, perf_not_pending(event));
-}
-
-void perf_event_do_pending(void)
-{
- __perf_pending_run();
-}
-
/*
* Callchain support -- arch specific
*/
@@ -2996,8 +2903,7 @@ static void perf_output_wakeup(struct pe
if (handle->nmi) {
handle->event->pending_wakeup = 1;
- perf_pending_queue(&handle->event->pending,
- perf_pending_event);
+ irq_work_queue(&handle->event->pending);
} else
perf_event_wakeup(handle->event);
}
@@ -3976,8 +3882,7 @@ static int __perf_event_overflow(struct
event->pending_kill = POLL_HUP;
if (nmi) {
event->pending_disable = 1;
- perf_pending_queue(&event->pending,
- perf_pending_event);
+ irq_work_queue(&event->pending);
} else
perf_event_disable(event);
}
@@ -4831,6 +4736,7 @@ perf_event_alloc(struct perf_event_attr
INIT_LIST_HEAD(&event->event_entry);
INIT_LIST_HEAD(&event->sibling_list);
init_waitqueue_head(&event->waitq);
+ init_irq_work(&event->pending, perf_pending_event);
mutex_init(&event->mmap_mutex);
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -37,7 +37,7 @@
#include <linux/delay.h>
#include <linux/tick.h>
#include <linux/kallsyms.h>
-#include <linux/perf_event.h>
+#include <linux/irq_work.h>
#include <linux/sched.h>
#include <linux/slab.h>
@@ -1264,7 +1264,10 @@ void update_process_times(int user_tick)
run_local_timers();
rcu_check_callbacks(cpu, user_tick);
printk_tick();
- perf_event_do_pending();
+#ifdef CONFIG_IRQ_WORK
+ if (in_irq())
+ irq_work_run();
+#endif
scheduler_tick();
run_posix_cpu_timers(p);
}
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -1045,7 +1045,7 @@ armv6pmu_handle_irq(int irq_num,
* platforms that can have the PMU interrupts raised as a PMI, this
* will not work.
*/
- perf_event_do_pending();
+ irq_work_run();
return IRQ_HANDLED;
}
@@ -2021,7 +2021,7 @@ static irqreturn_t armv7pmu_handle_irq(i
* platforms that can have the PMU interrupts raised as a PMI, this
* will not work.
*/
- perf_event_do_pending();
+ irq_work_run();
return IRQ_HANDLED;
}
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -114,9 +114,9 @@
#define X86_PLATFORM_IPI_VECTOR 0xed
/*
- * Performance monitoring pending work vector:
+ * IRQ work vector:
*/
-#define LOCAL_PENDING_VECTOR 0xec
+#define IRQ_WORK_VECTOR 0xec
#define UV_BAU_MESSAGE 0xea
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -14,7 +14,7 @@ typedef struct {
#endif
unsigned int x86_platform_ipis; /* arch dependent */
unsigned int apic_perf_irqs;
- unsigned int apic_pending_irqs;
+ unsigned int apic_irq_work_irqs;
#ifdef CONFIG_SMP
unsigned int irq_resched_count;
unsigned int irq_call_count;
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -67,10 +67,10 @@ static int show_other_interrupts(struct
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
seq_printf(p, " Performance monitoring interrupts\n");
- seq_printf(p, "%*s: ", prec, "PND");
+ seq_printf(p, "%*s: ", prec, "IWI");
for_each_online_cpu(j)
- seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs);
- seq_printf(p, " Performance pending work\n");
+ seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs);
+ seq_printf(p, " IRQ work interrupts\n");
#endif
if (x86_platform_ipi_callback) {
seq_printf(p, "%*s: ", prec, "PLT");
@@ -185,7 +185,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
sum += irq_stats(cpu)->apic_timer_irqs;
sum += irq_stats(cpu)->irq_spurious_count;
sum += irq_stats(cpu)->apic_perf_irqs;
- sum += irq_stats(cpu)->apic_pending_irqs;
+ sum += irq_stats(cpu)->apic_irq_work_irqs;
#endif
if (x86_platform_ipi_callback)
sum += irq_stats(cpu)->x86_platform_ipis;
^ permalink raw reply [flat|nested] 3+ messages in thread
* [PATCH -v3 2/2] irq_work, MCE: use irq_work in MCE
2010-07-13 4:59 [PATCH -v3 1/2] irq_work: generic hard-irq context callbacks Huang Ying
@ 2010-07-13 4:59 ` Huang Ying
2010-08-30 9:41 ` [PATCH -v3 1/2] irq_work: generic hard-irq context callbacks Peter Zijlstra
1 sibling, 0 replies; 3+ messages in thread
From: Huang Ying @ 2010-07-13 4:59 UTC (permalink / raw)
To: Ingo Molnar, H. Peter Anvin
Cc: linux-kernel, Andi Kleen, Peter Zijlstra, Huang Ying
Use general irq_work mechanism to replace the self interrupt used in
MCE handler.
Signed-off-by: Huang Ying <ying.huang@intel.com>
---
arch/x86/include/asm/entry_arch.h | 4 --
arch/x86/include/asm/irq_vectors.h | 5 ---
arch/x86/kernel/cpu/mcheck/mce.c | 51 +++++--------------------------------
arch/x86/kernel/entry_64.S | 5 ---
arch/x86/kernel/irqinit.c | 3 --
5 files changed, 7 insertions(+), 61 deletions(-)
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -61,8 +61,4 @@ BUILD_INTERRUPT(thermal_interrupt,THERMA
BUILD_INTERRUPT(threshold_interrupt,THRESHOLD_APIC_VECTOR)
#endif
-#ifdef CONFIG_X86_MCE
-BUILD_INTERRUPT(mce_self_interrupt,MCE_SELF_VECTOR)
-#endif
-
#endif
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -120,11 +120,6 @@
#define UV_BAU_MESSAGE 0xea
-/*
- * Self IPI vector for machine checks
- */
-#define MCE_SELF_VECTOR 0xeb
-
#define NR_VECTORS 256
#define FPU_IRQ 13
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -37,6 +37,7 @@
#include <linux/mm.h>
#include <linux/debugfs.h>
#include <linux/edac_mce.h>
+#include <linux/irq_work.h>
#include <asm/processor.h>
#include <asm/hw_irq.h>
@@ -125,6 +126,8 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_ban
static DEFINE_PER_CPU(struct work_struct, mce_work);
+static DEFINE_PER_CPU(struct irq_work, mce_irq_work);
+
/* Do initial initialization of a struct mce */
void mce_setup(struct mce *m)
{
@@ -480,60 +483,20 @@ static inline void mce_get_rip(struct mc
m->ip = mce_rdmsrl(rip_msr);
}
-#ifdef CONFIG_X86_LOCAL_APIC
-/*
- * Called after interrupts have been reenabled again
- * when a MCE happened during an interrupts off region
- * in the kernel.
- */
-asmlinkage void smp_mce_self_interrupt(struct pt_regs *regs)
+static void __mce_report_event(struct irq_work *w)
{
- ack_APIC_irq();
- exit_idle();
- irq_enter();
mce_notify_irq();
mce_schedule_work();
- irq_exit();
}
-#endif
static void mce_report_event(struct pt_regs *regs)
{
if (regs->flags & (X86_VM_MASK|X86_EFLAGS_IF)) {
- mce_notify_irq();
- /*
- * Triggering the work queue here is just an insurance
- * policy in case the syscall exit notify handler
- * doesn't run soon enough or ends up running on the
- * wrong CPU (can happen when audit sleeps)
- */
- mce_schedule_work();
+ __mce_report_event(NULL);
return;
}
-#ifdef CONFIG_X86_LOCAL_APIC
- /*
- * Without APIC do not notify. The event will be picked
- * up eventually.
- */
- if (!cpu_has_apic)
- return;
-
- /*
- * When interrupts are disabled we cannot use
- * kernel services safely. Trigger an self interrupt
- * through the APIC to instead do the notification
- * after interrupts are reenabled again.
- */
- apic->send_IPI_self(MCE_SELF_VECTOR);
-
- /*
- * Wait for idle afterwards again so that we don't leave the
- * APIC in a non idle state because the normal APIC writes
- * cannot exclude us.
- */
- apic_wait_icr_idle();
-#endif
+ irq_work_queue(&__get_cpu_var(mce_irq_work));
}
DEFINE_PER_CPU(unsigned, mce_poll_count);
@@ -1463,7 +1426,7 @@ void __cpuinit mcheck_cpu_init(struct cp
__mcheck_cpu_init_vendor(c);
__mcheck_cpu_init_timer();
INIT_WORK(&__get_cpu_var(mce_work), mce_process_work);
-
+ init_irq_work(&__get_cpu_var(mce_irq_work), __mce_report_event);
}
/*
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1004,11 +1004,6 @@ apicinterrupt THRESHOLD_APIC_VECTOR \
apicinterrupt THERMAL_APIC_VECTOR \
thermal_interrupt smp_thermal_interrupt
-#ifdef CONFIG_X86_MCE
-apicinterrupt MCE_SELF_VECTOR \
- mce_self_interrupt smp_mce_self_interrupt
-#endif
-
#ifdef CONFIG_SMP
apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \
call_function_single_interrupt smp_call_function_single_interrupt
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -209,9 +209,6 @@ static void __init apic_intr_init(void)
#ifdef CONFIG_X86_MCE_THRESHOLD
alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
#endif
-#if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_LOCAL_APIC)
- alloc_intr_gate(MCE_SELF_VECTOR, mce_self_interrupt);
-#endif
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
/* self generated IPI for local APIC timer */
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH -v3 1/2] irq_work: generic hard-irq context callbacks
2010-07-13 4:59 [PATCH -v3 1/2] irq_work: generic hard-irq context callbacks Huang Ying
2010-07-13 4:59 ` [PATCH -v3 2/2] irq_work, MCE: use irq_work in MCE Huang Ying
@ 2010-08-30 9:41 ` Peter Zijlstra
1 sibling, 0 replies; 3+ messages in thread
From: Peter Zijlstra @ 2010-08-30 9:41 UTC (permalink / raw)
To: Huang Ying
Cc: Ingo Molnar, H. Peter Anvin, paulus, linux-kernel, Andi Kleen,
dhowells, Russell King, Kyle McMartin, Martin Schwidefsky, davem,
Linux-Arch
On Tue, 2010-07-13 at 12:59 +0800, Huang Ying wrote:
> From: Peter Zijlstra <a.p.zijlstra@chello.nl>
>
> In order for other NMI context users that want to run things from
> hard-IRQ context, extract the perf_event callback mechanism.
>
> Huang Ying: some fixes
>
> This patch is only tested on x86 platform.
Right, looks ok, although it would require some acks from relevant
architecture maintainers, all of whoem you forgot to CC.
> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
> Signed-off-by: Huang Ying <ying.huang@intel.com>
> ---
> arch/alpha/Kconfig | 1
> arch/alpha/include/asm/perf_event.h | 9 -
> arch/arm/Kconfig | 1
> arch/arm/include/asm/perf_event.h | 12 --
> arch/arm/kernel/perf_event.c | 4
> arch/frv/Kconfig | 1
> arch/frv/lib/perf_event.c | 19 ----
> arch/parisc/Kconfig | 1
> arch/parisc/include/asm/perf_event.h | 7 -
> arch/powerpc/Kconfig | 1
> arch/powerpc/kernel/time.c | 42 ++++----
> arch/s390/Kconfig | 1
> arch/s390/include/asm/perf_event.h | 10 --
> arch/sh/Kconfig | 1
> arch/sh/include/asm/perf_event.h | 7 -
> arch/sparc/Kconfig | 2
> arch/sparc/include/asm/perf_event.h | 4
> arch/sparc/kernel/pcr.c | 8 -
> arch/x86/Kconfig | 1
> arch/x86/include/asm/entry_arch.h | 4
> arch/x86/include/asm/hardirq.h | 2
> arch/x86/include/asm/hw_irq.h | 2
> arch/x86/include/asm/irq_vectors.h | 4
> arch/x86/kernel/Makefile | 1
> arch/x86/kernel/cpu/perf_event.c | 19 ----
> arch/x86/kernel/entry_64.S | 6 -
> arch/x86/kernel/irq.c | 8 -
> arch/x86/kernel/irq_work.c | 30 ++++++
> arch/x86/kernel/irqinit.c | 6 -
> include/linux/irq_work.h | 20 ++++
> include/linux/perf_event.h | 11 --
> init/Kconfig | 8 +
> kernel/Makefile | 2
> kernel/irq_work.c | 164 +++++++++++++++++++++++++++++++++++
> kernel/perf_event.c | 104 +---------------------
> kernel/timer.c | 7 +
> 36 files changed, 290 insertions(+), 240 deletions(-)
>
> --- /dev/null
> +++ b/include/linux/irq_work.h
> @@ -0,0 +1,20 @@
> +#ifndef _LINUX_IRQ_WORK_H
> +#define _LINUX_IRQ_WORK_H
> +
> +struct irq_work {
> + struct irq_work *next;
> + void (*func)(struct irq_work *);
> +};
> +
> +static inline
> +void init_irq_work(struct irq_work *entry, void (*func)(struct irq_work *))
> +{
> + entry->next = NULL;
> + entry->func = func;
> +}
> +
> +bool irq_work_queue(struct irq_work *entry);
> +void irq_work_run(void);
> +void irq_work_sync(struct irq_work *entry);
> +
> +#endif /* _LINUX_IRQ_WORK_H */
> --- /dev/null
> +++ b/kernel/irq_work.c
> @@ -0,0 +1,164 @@
> +/*
> + * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
> + *
> + * Provides a framework for enqueueing and running callbacks from hardirq
> + * context. The enqueueing is NMI-safe.
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/module.h>
> +#include <linux/irq_work.h>
> +#include <linux/hardirq.h>
> +
> +/*
> + * An entry can be in one of four states:
> + *
> + * free NULL, 0 -> {claimed} : free to be used
> + * claimed NULL, 3 -> {pending} : claimed to be enqueued
> + * pending next, 3 -> {busy} : queued, pending callback
> + * busy NULL, 2 -> {free, claimed} : callback in progress, can be claimed
> + *
> + * We use the lower two bits of the next pointer to keep PENDING and BUSY
> + * flags.
> + */
> +
> +#define IRQ_WORK_PENDING 1UL
> +#define IRQ_WORK_BUSY 2UL
> +#define IRQ_WORK_FLAGS 3UL
> +
> +static inline bool irq_work_is_set(struct irq_work *entry, int flags)
> +{
> + return (unsigned long)entry->next & flags;
> +}
> +
> +static inline struct irq_work *irq_work_next(struct irq_work *entry)
> +{
> + unsigned long next = (unsigned long)entry->next;
> + next &= ~IRQ_WORK_FLAGS;
> + return (struct irq_work *)next;
> +}
> +
> +static inline struct irq_work *next_flags(struct irq_work *entry, int flags)
> +{
> + unsigned long next = (unsigned long)entry;
> + next |= flags;
> + return (struct irq_work *)next;
> +}
> +
> +static DEFINE_PER_CPU(struct irq_work *, irq_work_list);
> +
> +/*
> + * Claim the entry so that no one else will poke at it.
> + */
> +static bool irq_work_claim(struct irq_work *entry)
> +{
> + struct irq_work *next, *nflags;
> +
> + do {
> + next = entry->next;
> + if ((unsigned long)next & IRQ_WORK_PENDING)
> + return false;
> + nflags = next_flags(next, IRQ_WORK_FLAGS);
> + } while (cmpxchg(&entry->next, next, nflags) != next);
> +
> + return true;
> +}
> +
> +
> +void __weak arch_irq_work_raise(void)
> +{
> + /*
> + * Lame architectures will get the timer tick callback
> + */
> +}
> +
> +/*
> + * Queue the entry and raise the IPI if needed.
> + */
> +static void __irq_work_queue(struct irq_work *entry)
> +{
> + struct irq_work **head, *next;
> +
> + head = &get_cpu_var(irq_work_list);
> +
> + do {
> + next = *head;
> + /* Can assign non-atomic because we keep the flags set. */
> + entry->next = next_flags(next, IRQ_WORK_FLAGS);
> + } while (cmpxchg(head, next, entry) != next);
> +
> + /* The list was empty, raise self-interrupt to start processing. */
> + if (!irq_work_next(entry))
> + arch_irq_work_raise();
> +
> + put_cpu_var(irq_work_list);
> +}
> +
> +/*
> + * Enqueue the irq_work @entry, returns true on success, failure when the
> + * @entry was already enqueued by someone else.
> + *
> + * Can be re-enqueued while the callback is still in progress.
> + */
> +bool irq_work_queue(struct irq_work *entry)
> +{
> + if (!irq_work_claim(entry)) {
> + /*
> + * Already enqueued, can't do!
> + */
> + return false;
> + }
> +
> + __irq_work_queue(entry);
> + return true;
> +}
> +EXPORT_SYMBOL_GPL(irq_work_queue);
> +
> +/*
> + * Run the irq_work entries on this cpu. Requires to be ran from hardirq
> + * context with local IRQs disabled.
> + */
> +void irq_work_run(void)
> +{
> + struct irq_work *list, **head;
> +
> + head = &__get_cpu_var(irq_work_list);
> + if (*head == NULL)
> + return;
> +
> + BUG_ON(!in_irq());
> + BUG_ON(!irqs_disabled());
> +
> + list = xchg(head, NULL);
> + while (list != NULL) {
> + struct irq_work *entry = list;
> +
> + list = irq_work_next(list);
> +
> + /*
> + * Clear the PENDING bit, after this point the @entry
> + * can be re-used.
> + */
> + entry->next = next_flags(NULL, IRQ_WORK_BUSY);
> + entry->func(entry);
> + /*
> + * Clear the BUSY bit and return to the free state if
> + * no-one else claimed it meanwhile.
> + */
> + cmpxchg(&entry->next, next_flags(NULL, IRQ_WORK_BUSY), NULL);
> + }
> +}
> +EXPORT_SYMBOL_GPL(irq_work_run);
> +
> +/*
> + * Synchronize against the irq_work @entry, ensures the entry is not
> + * currently in use.
> + */
> +void irq_work_sync(struct irq_work *entry)
> +{
> + WARN_ON_ONCE(irqs_disabled());
> +
> + while (irq_work_is_set(entry, IRQ_WORK_BUSY))
> + cpu_relax();
> +}
> +EXPORT_SYMBOL_GPL(irq_work_sync);
> --- a/arch/alpha/Kconfig
> +++ b/arch/alpha/Kconfig
> @@ -9,6 +9,7 @@ config ALPHA
> select HAVE_IDE
> select HAVE_OPROFILE
> select HAVE_SYSCALL_WRAPPERS
> + select HAVE_IRQ_WORK
> select HAVE_PERF_EVENTS
> select HAVE_DMA_ATTRS
> help
> --- a/arch/alpha/include/asm/perf_event.h
> +++ /dev/null
> @@ -1,9 +0,0 @@
> -#ifndef __ASM_ALPHA_PERF_EVENT_H
> -#define __ASM_ALPHA_PERF_EVENT_H
> -
> -/* Alpha only supports software events through this interface. */
> -static inline void set_perf_event_pending(void) { }
> -
> -#define PERF_EVENT_INDEX_OFFSET 0
> -
> -#endif /* __ASM_ALPHA_PERF_EVENT_H */
> --- a/arch/arm/Kconfig
> +++ b/arch/arm/Kconfig
> @@ -22,6 +22,7 @@ config ARM
> select HAVE_KERNEL_GZIP
> select HAVE_KERNEL_LZO
> select HAVE_KERNEL_LZMA
> + select HAVE_IRQ_WORK
> select HAVE_PERF_EVENTS
> select PERF_USE_VMALLOC
> help
> --- a/arch/arm/include/asm/perf_event.h
> +++ b/arch/arm/include/asm/perf_event.h
> @@ -12,18 +12,6 @@
> #ifndef __ARM_PERF_EVENT_H__
> #define __ARM_PERF_EVENT_H__
>
> -/*
> - * NOP: on *most* (read: all supported) ARM platforms, the performance
> - * counter interrupts are regular interrupts and not an NMI. This
> - * means that when we receive the interrupt we can call
> - * perf_event_do_pending() that handles all of the work with
> - * interrupts enabled.
> - */
> -static inline void
> -set_perf_event_pending(void)
> -{
> -}
> -
> /* ARM performance counters start from 1 (in the cp15 accesses) so use the
> * same indexes here for consistency. */
> #define PERF_EVENT_INDEX_OFFSET 1
> --- a/arch/frv/Kconfig
> +++ b/arch/frv/Kconfig
> @@ -7,6 +7,7 @@ config FRV
> default y
> select HAVE_IDE
> select HAVE_ARCH_TRACEHOOK
> + select HAVE_IRQ_WORK
> select HAVE_PERF_EVENTS
>
> config ZONE_DMA
> --- a/arch/frv/lib/perf_event.c
> +++ /dev/null
> @@ -1,19 +0,0 @@
> -/* Performance event handling
> - *
> - * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved.
> - * Written by David Howells (dhowells@redhat.com)
> - *
> - * This program is free software; you can redistribute it and/or
> - * modify it under the terms of the GNU General Public Licence
> - * as published by the Free Software Foundation; either version
> - * 2 of the Licence, or (at your option) any later version.
> - */
> -
> -#include <linux/perf_event.h>
> -
> -/*
> - * mark the performance event as pending
> - */
> -void set_perf_event_pending(void)
> -{
> -}
> --- a/arch/parisc/Kconfig
> +++ b/arch/parisc/Kconfig
> @@ -16,6 +16,7 @@ config PARISC
> select RTC_DRV_GENERIC
> select INIT_ALL_POSSIBLE
> select BUG
> + select HAVE_IRQ_WORK
> select HAVE_PERF_EVENTS
> select GENERIC_ATOMIC64 if !64BIT
> help
> --- a/arch/parisc/include/asm/perf_event.h
> +++ /dev/null
> @@ -1,7 +0,0 @@
> -#ifndef __ASM_PARISC_PERF_EVENT_H
> -#define __ASM_PARISC_PERF_EVENT_H
> -
> -/* parisc only supports software events through this interface. */
> -static inline void set_perf_event_pending(void) { }
> -
> -#endif /* __ASM_PARISC_PERF_EVENT_H */
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -139,6 +139,7 @@ config PPC
> select HAVE_OPROFILE
> select HAVE_SYSCALL_WRAPPERS if PPC64
> select GENERIC_ATOMIC64 if PPC32
> + select HAVE_IRQ_WORK
> select HAVE_PERF_EVENTS
> select HAVE_REGS_AND_STACK_ACCESS_API
>
> --- a/arch/powerpc/kernel/time.c
> +++ b/arch/powerpc/kernel/time.c
> @@ -53,7 +53,7 @@
> #include <linux/posix-timers.h>
> #include <linux/irq.h>
> #include <linux/delay.h>
> -#include <linux/perf_event.h>
> +#include <linux/irq_work.h>
> #include <asm/trace.h>
>
> #include <asm/io.h>
> @@ -532,60 +532,60 @@ void __init iSeries_time_init_early(void
> }
> #endif /* CONFIG_PPC_ISERIES */
>
> -#ifdef CONFIG_PERF_EVENTS
> +#ifdef CONFIG_IRQ_WORK
>
> /*
> * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable...
> */
> #ifdef CONFIG_PPC64
> -static inline unsigned long test_perf_event_pending(void)
> +static inline unsigned long test_irq_work_pending(void)
> {
> unsigned long x;
>
> asm volatile("lbz %0,%1(13)"
> : "=r" (x)
> - : "i" (offsetof(struct paca_struct, perf_event_pending)));
> + : "i" (offsetof(struct paca_struct, irq_work_pending)));
> return x;
> }
>
> -static inline void set_perf_event_pending_flag(void)
> +static inline void set_irq_work_pending_flag(void)
> {
> asm volatile("stb %0,%1(13)" : :
> "r" (1),
> - "i" (offsetof(struct paca_struct, perf_event_pending)));
> + "i" (offsetof(struct paca_struct, irq_work_pending)));
> }
>
> -static inline void clear_perf_event_pending(void)
> +static inline void clear_irq_work_pending(void)
> {
> asm volatile("stb %0,%1(13)" : :
> "r" (0),
> - "i" (offsetof(struct paca_struct, perf_event_pending)));
> + "i" (offsetof(struct paca_struct, irq_work_pending)));
> }
>
> #else /* 32-bit */
>
> -DEFINE_PER_CPU(u8, perf_event_pending);
> +DEFINE_PER_CPU(u8, irq_work_pending);
>
> -#define set_perf_event_pending_flag() __get_cpu_var(perf_event_pending) = 1
> -#define test_perf_event_pending() __get_cpu_var(perf_event_pending)
> -#define clear_perf_event_pending() __get_cpu_var(perf_event_pending) = 0
> +#define set_irq_work_pending_flag() __get_cpu_var(irq_work_pending) = 1
> +#define test_irq_work_pending() __get_cpu_var(irq_work_pending)
> +#define clear_irq_work_pending() __get_cpu_var(irq_work_pending) = 0
>
> #endif /* 32 vs 64 bit */
>
> -void set_perf_event_pending(void)
> +void set_irq_work_pending(void)
> {
> preempt_disable();
> - set_perf_event_pending_flag();
> + set_irq_work_pending_flag();
> set_dec(1);
> preempt_enable();
> }
>
> -#else /* CONFIG_PERF_EVENTS */
> +#else /* CONFIG_IRQ_WORK */
>
> -#define test_perf_event_pending() 0
> -#define clear_perf_event_pending()
> +#define test_irq_work_pending() 0
> +#define clear_irq_work_pending()
>
> -#endif /* CONFIG_PERF_EVENTS */
> +#endif /* CONFIG_IRQ_WORK */
>
> /*
> * For iSeries shared processors, we have to let the hypervisor
> @@ -635,9 +635,9 @@ void timer_interrupt(struct pt_regs * re
>
> calculate_steal_time();
>
> - if (test_perf_event_pending()) {
> - clear_perf_event_pending();
> - perf_event_do_pending();
> + if (test_irq_work_pending()) {
> + clear_irq_work_pending();
> + irq_work_run();
> }
>
> #ifdef CONFIG_PPC_ISERIES
> --- a/arch/s390/Kconfig
> +++ b/arch/s390/Kconfig
> @@ -98,6 +98,7 @@ config S390
> select HAVE_KVM if 64BIT
> select HAVE_ARCH_TRACEHOOK
> select INIT_ALL_POSSIBLE
> + select HAVE_IRQ_WORK
> select HAVE_PERF_EVENTS
> select HAVE_KERNEL_GZIP
> select HAVE_KERNEL_BZIP2
> --- a/arch/s390/include/asm/perf_event.h
> +++ /dev/null
> @@ -1,10 +0,0 @@
> -/*
> - * Performance event support - s390 specific definitions.
> - *
> - * Copyright 2009 Martin Schwidefsky, IBM Corporation.
> - */
> -
> -static inline void set_perf_event_pending(void) {}
> -static inline void clear_perf_event_pending(void) {}
> -
> -#define PERF_EVENT_INDEX_OFFSET 0
> --- a/arch/sh/Kconfig
> +++ b/arch/sh/Kconfig
> @@ -16,6 +16,7 @@ config SUPERH
> select HAVE_ARCH_TRACEHOOK
> select HAVE_DMA_API_DEBUG
> select HAVE_DMA_ATTRS
> + select HAVE_IRQ_WORK
> select HAVE_PERF_EVENTS
> select PERF_USE_VMALLOC
> select HAVE_KERNEL_GZIP
> --- a/arch/sh/include/asm/perf_event.h
> +++ b/arch/sh/include/asm/perf_event.h
> @@ -26,11 +26,4 @@ extern int register_sh_pmu(struct sh_pmu
> extern int reserve_pmc_hardware(void);
> extern void release_pmc_hardware(void);
>
> -static inline void set_perf_event_pending(void)
> -{
> - /* Nothing to see here, move along. */
> -}
> -
> -#define PERF_EVENT_INDEX_OFFSET 0
> -
> #endif /* __ASM_SH_PERF_EVENT_H */
> --- a/arch/sparc/Kconfig
> +++ b/arch/sparc/Kconfig
> @@ -25,6 +25,7 @@ config SPARC
> select ARCH_WANT_OPTIONAL_GPIOLIB
> select RTC_CLASS
> select RTC_DRV_M48T59
> + select HAVE_IRQ_WORK
> select HAVE_PERF_EVENTS
> select PERF_USE_VMALLOC
> select HAVE_DMA_ATTRS
> @@ -52,6 +53,7 @@ config SPARC64
> select RTC_DRV_BQ4802
> select RTC_DRV_SUN4V
> select RTC_DRV_STARFIRE
> + select HAVE_IRQ_WORK
> select HAVE_PERF_EVENTS
> select PERF_USE_VMALLOC
>
> --- a/arch/sparc/include/asm/perf_event.h
> +++ b/arch/sparc/include/asm/perf_event.h
> @@ -1,10 +1,6 @@
> #ifndef __ASM_SPARC_PERF_EVENT_H
> #define __ASM_SPARC_PERF_EVENT_H
>
> -extern void set_perf_event_pending(void);
> -
> -#define PERF_EVENT_INDEX_OFFSET 0
> -
> #ifdef CONFIG_PERF_EVENTS
> extern void init_hw_perf_events(void);
> #else
> --- a/arch/sparc/kernel/pcr.c
> +++ b/arch/sparc/kernel/pcr.c
> @@ -7,7 +7,7 @@
> #include <linux/init.h>
> #include <linux/irq.h>
>
> -#include <linux/perf_event.h>
> +#include <linux/irq_work.h>
> #include <linux/ftrace.h>
>
> #include <asm/pil.h>
> @@ -43,14 +43,14 @@ void __irq_entry deferred_pcr_work_irq(i
>
> old_regs = set_irq_regs(regs);
> irq_enter();
> -#ifdef CONFIG_PERF_EVENTS
> - perf_event_do_pending();
> +#ifdef CONFIG_IRQ_WORK
> + irq_work_run();
> #endif
> irq_exit();
> set_irq_regs(old_regs);
> }
>
> -void set_perf_event_pending(void)
> +void arch_irq_work_raise(void)
> {
> set_softint(1 << PIL_DEFERRED_PCR_WORK);
> }
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -25,6 +25,7 @@ config X86
> select HAVE_IDE
> select HAVE_OPROFILE
> select HAVE_PERF_EVENTS if (!M386 && !M486)
> + select HAVE_IRQ_WORK
> select HAVE_IOREMAP_PROT
> select HAVE_KPROBES
> select ARCH_WANT_OPTIONAL_GPIOLIB
> --- a/arch/x86/include/asm/entry_arch.h
> +++ b/arch/x86/include/asm/entry_arch.h
> @@ -49,8 +49,8 @@ BUILD_INTERRUPT(apic_timer_interrupt,LOC
> BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
> BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
>
> -#ifdef CONFIG_PERF_EVENTS
> -BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR)
> +#ifdef CONFIG_IRQ_WORK
> +BUILD_INTERRUPT(irq_work_interrupt, IRQ_WORK_VECTOR)
> #endif
>
> #ifdef CONFIG_X86_THERMAL_VECTOR
> --- a/arch/x86/include/asm/hw_irq.h
> +++ b/arch/x86/include/asm/hw_irq.h
> @@ -29,7 +29,7 @@
> extern void apic_timer_interrupt(void);
> extern void x86_platform_ipi(void);
> extern void error_interrupt(void);
> -extern void perf_pending_interrupt(void);
> +extern void irq_work_interrupt(void);
>
> extern void spurious_interrupt(void);
> extern void thermal_interrupt(void);
> --- a/arch/x86/kernel/Makefile
> +++ b/arch/x86/kernel/Makefile
> @@ -33,6 +33,7 @@ obj-y := process_$(BITS).o signal.o en
> obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
> obj-y += time.o ioport.o ldt.o dumpstack.o
> obj-y += setup.o x86_init.o i8259.o irqinit.o
> +obj-$(CONFIG_IRQ_WORK) += irq_work.o
> obj-$(CONFIG_X86_VISWS) += visws_quirks.o
> obj-$(CONFIG_X86_32) += probe_roms_32.o
> obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
> --- a/arch/x86/kernel/cpu/perf_event.c
> +++ b/arch/x86/kernel/cpu/perf_event.c
> @@ -1160,25 +1160,6 @@ static int x86_pmu_handle_irq(struct pt_
> return handled;
> }
>
> -void smp_perf_pending_interrupt(struct pt_regs *regs)
> -{
> - irq_enter();
> - ack_APIC_irq();
> - inc_irq_stat(apic_pending_irqs);
> - perf_event_do_pending();
> - irq_exit();
> -}
> -
> -void set_perf_event_pending(void)
> -{
> -#ifdef CONFIG_X86_LOCAL_APIC
> - if (!x86_pmu.apic || !x86_pmu_initialized())
> - return;
> -
> - apic->send_IPI_self(LOCAL_PENDING_VECTOR);
> -#endif
> -}
> -
> void perf_events_lapic_init(void)
> {
> if (!x86_pmu.apic || !x86_pmu_initialized())
> --- a/arch/x86/kernel/entry_64.S
> +++ b/arch/x86/kernel/entry_64.S
> @@ -1023,9 +1023,9 @@ apicinterrupt ERROR_APIC_VECTOR \
> apicinterrupt SPURIOUS_APIC_VECTOR \
> spurious_interrupt smp_spurious_interrupt
>
> -#ifdef CONFIG_PERF_EVENTS
> -apicinterrupt LOCAL_PENDING_VECTOR \
> - perf_pending_interrupt smp_perf_pending_interrupt
> +#ifdef CONFIG_IRQ_WORK
> +apicinterrupt IRQ_WORK_VECTOR \
> + irq_work_interrupt smp_irq_work_interrupt
> #endif
>
> /*
> --- /dev/null
> +++ b/arch/x86/kernel/irq_work.c
> @@ -0,0 +1,30 @@
> +/*
> + * x86 specific code for irq_work
> + *
> + * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/irq_work.h>
> +#include <linux/hardirq.h>
> +#include <asm/apic.h>
> +
> +void smp_irq_work_interrupt(struct pt_regs *regs)
> +{
> + irq_enter();
> + ack_APIC_irq();
> + inc_irq_stat(apic_irq_work_irqs);
> + irq_work_run();
> + irq_exit();
> +}
> +
> +void arch_irq_work_raise(void)
> +{
> +#ifdef CONFIG_X86_LOCAL_APIC
> + if (!cpu_has_apic)
> + return;
> +
> + apic->send_IPI_self(IRQ_WORK_VECTOR);
> + apic_wait_icr_idle();
> +#endif
> +}
> --- a/arch/x86/kernel/irqinit.c
> +++ b/arch/x86/kernel/irqinit.c
> @@ -224,9 +224,9 @@ static void __init apic_intr_init(void)
> alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
> alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
>
> - /* Performance monitoring interrupts: */
> -# ifdef CONFIG_PERF_EVENTS
> - alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt);
> + /* IRQ work interrupts: */
> +# ifdef CONFIG_IRQ_WORK
> + alloc_intr_gate(IRQ_WORK_VECTOR, irq_work_interrupt);
> # endif
>
> #endif
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -484,6 +484,7 @@ struct perf_guest_info_callbacks {
> #include <linux/workqueue.h>
> #include <linux/ftrace.h>
> #include <linux/cpu.h>
> +#include <linux/irq_work.h>
> #include <asm/atomic.h>
> #include <asm/local.h>
>
> @@ -608,11 +609,6 @@ struct perf_mmap_data {
> void *data_pages[0];
> };
>
> -struct perf_pending_entry {
> - struct perf_pending_entry *next;
> - void (*func)(struct perf_pending_entry *);
> -};
> -
> struct perf_sample_data;
>
> typedef void (*perf_overflow_handler_t)(struct perf_event *, int,
> @@ -719,7 +715,7 @@ struct perf_event {
> int pending_wakeup;
> int pending_kill;
> int pending_disable;
> - struct perf_pending_entry pending;
> + struct irq_work pending;
>
> atomic_t event_limit;
>
> @@ -831,8 +827,6 @@ extern void perf_event_task_tick(struct
> extern int perf_event_init_task(struct task_struct *child);
> extern void perf_event_exit_task(struct task_struct *child);
> extern void perf_event_free_task(struct task_struct *task);
> -extern void set_perf_event_pending(void);
> -extern void perf_event_do_pending(void);
> extern void perf_event_print_debug(void);
> extern void __perf_disable(void);
> extern bool __perf_enable(void);
> @@ -1031,7 +1025,6 @@ perf_event_task_tick(struct task_struct
> static inline int perf_event_init_task(struct task_struct *child) { return 0; }
> static inline void perf_event_exit_task(struct task_struct *child) { }
> static inline void perf_event_free_task(struct task_struct *task) { }
> -static inline void perf_event_do_pending(void) { }
> static inline void perf_event_print_debug(void) { }
> static inline void perf_disable(void) { }
> static inline void perf_enable(void) { }
> --- a/init/Kconfig
> +++ b/init/Kconfig
> @@ -21,6 +21,13 @@ config CONSTRUCTORS
> depends on !UML
> default y
>
> +config HAVE_IRQ_WORK
> + bool
> +
> +config IRQ_WORK
> + bool
> + depends on HAVE_IRQ_WORK
> +
> menu "General setup"
>
> config EXPERIMENTAL
> @@ -983,6 +990,7 @@ config PERF_EVENTS
> default y if (PROFILING || PERF_COUNTERS)
> depends on HAVE_PERF_EVENTS
> select ANON_INODES
> + select IRQ_WORK
> help
> Enable kernel support for various performance events provided
> by software and hardware.
> --- a/kernel/Makefile
> +++ b/kernel/Makefile
> @@ -23,6 +23,7 @@ CFLAGS_REMOVE_rtmutex-debug.o = -pg
> CFLAGS_REMOVE_cgroup-debug.o = -pg
> CFLAGS_REMOVE_sched_clock.o = -pg
> CFLAGS_REMOVE_perf_event.o = -pg
> +CFLAGS_REMOVE_irq_work.o = -pg
> endif
>
> obj-$(CONFIG_FREEZER) += freezer.o
> @@ -101,6 +102,7 @@ obj-$(CONFIG_RING_BUFFER) += trace/
> obj-$(CONFIG_SMP) += sched_cpupri.o
> obj-$(CONFIG_SLOW_WORK) += slow-work.o
> obj-$(CONFIG_SLOW_WORK_DEBUG) += slow-work-debugfs.o
> +obj-$(CONFIG_IRQ_WORK) += irq_work.o
> obj-$(CONFIG_PERF_EVENTS) += perf_event.o
> obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
> obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o
> --- a/kernel/perf_event.c
> +++ b/kernel/perf_event.c
> @@ -1882,12 +1882,11 @@ static void free_event_rcu(struct rcu_he
> kfree(event);
> }
>
> -static void perf_pending_sync(struct perf_event *event);
> static void perf_mmap_data_put(struct perf_mmap_data *data);
>
> static void free_event(struct perf_event *event)
> {
> - perf_pending_sync(event);
> + irq_work_sync(&event->pending);
>
> if (!event->parent) {
> atomic_dec(&nr_events);
> @@ -2824,16 +2823,7 @@ void perf_event_wakeup(struct perf_event
> }
> }
>
> -/*
> - * Pending wakeups
> - *
> - * Handle the case where we need to wakeup up from NMI (or rq->lock) context.
> - *
> - * The NMI bit means we cannot possibly take locks. Therefore, maintain a
> - * single linked list and use cmpxchg() to add entries lockless.
> - */
> -
> -static void perf_pending_event(struct perf_pending_entry *entry)
> +static void perf_pending_event(struct irq_work *entry)
> {
> struct perf_event *event = container_of(entry,
> struct perf_event, pending);
> @@ -2849,89 +2839,6 @@ static void perf_pending_event(struct pe
> }
> }
>
> -#define PENDING_TAIL ((struct perf_pending_entry *)-1UL)
> -
> -static DEFINE_PER_CPU(struct perf_pending_entry *, perf_pending_head) = {
> - PENDING_TAIL,
> -};
> -
> -static void perf_pending_queue(struct perf_pending_entry *entry,
> - void (*func)(struct perf_pending_entry *))
> -{
> - struct perf_pending_entry **head;
> -
> - if (cmpxchg(&entry->next, NULL, PENDING_TAIL) != NULL)
> - return;
> -
> - entry->func = func;
> -
> - head = &get_cpu_var(perf_pending_head);
> -
> - do {
> - entry->next = *head;
> - } while (cmpxchg(head, entry->next, entry) != entry->next);
> -
> - set_perf_event_pending();
> -
> - put_cpu_var(perf_pending_head);
> -}
> -
> -static int __perf_pending_run(void)
> -{
> - struct perf_pending_entry *list;
> - int nr = 0;
> -
> - list = xchg(&__get_cpu_var(perf_pending_head), PENDING_TAIL);
> - while (list != PENDING_TAIL) {
> - void (*func)(struct perf_pending_entry *);
> - struct perf_pending_entry *entry = list;
> -
> - list = list->next;
> -
> - func = entry->func;
> - entry->next = NULL;
> - /*
> - * Ensure we observe the unqueue before we issue the wakeup,
> - * so that we won't be waiting forever.
> - * -- see perf_not_pending().
> - */
> - smp_wmb();
> -
> - func(entry);
> - nr++;
> - }
> -
> - return nr;
> -}
> -
> -static inline int perf_not_pending(struct perf_event *event)
> -{
> - /*
> - * If we flush on whatever cpu we run, there is a chance we don't
> - * need to wait.
> - */
> - get_cpu();
> - __perf_pending_run();
> - put_cpu();
> -
> - /*
> - * Ensure we see the proper queue state before going to sleep
> - * so that we do not miss the wakeup. -- see perf_pending_handle()
> - */
> - smp_rmb();
> - return event->pending.next == NULL;
> -}
> -
> -static void perf_pending_sync(struct perf_event *event)
> -{
> - wait_event(event->waitq, perf_not_pending(event));
> -}
> -
> -void perf_event_do_pending(void)
> -{
> - __perf_pending_run();
> -}
> -
> /*
> * Callchain support -- arch specific
> */
> @@ -2996,8 +2903,7 @@ static void perf_output_wakeup(struct pe
>
> if (handle->nmi) {
> handle->event->pending_wakeup = 1;
> - perf_pending_queue(&handle->event->pending,
> - perf_pending_event);
> + irq_work_queue(&handle->event->pending);
> } else
> perf_event_wakeup(handle->event);
> }
> @@ -3976,8 +3882,7 @@ static int __perf_event_overflow(struct
> event->pending_kill = POLL_HUP;
> if (nmi) {
> event->pending_disable = 1;
> - perf_pending_queue(&event->pending,
> - perf_pending_event);
> + irq_work_queue(&event->pending);
> } else
> perf_event_disable(event);
> }
> @@ -4831,6 +4736,7 @@ perf_event_alloc(struct perf_event_attr
> INIT_LIST_HEAD(&event->event_entry);
> INIT_LIST_HEAD(&event->sibling_list);
> init_waitqueue_head(&event->waitq);
> + init_irq_work(&event->pending, perf_pending_event);
>
> mutex_init(&event->mmap_mutex);
>
> --- a/kernel/timer.c
> +++ b/kernel/timer.c
> @@ -37,7 +37,7 @@
> #include <linux/delay.h>
> #include <linux/tick.h>
> #include <linux/kallsyms.h>
> -#include <linux/perf_event.h>
> +#include <linux/irq_work.h>
> #include <linux/sched.h>
> #include <linux/slab.h>
>
> @@ -1264,7 +1264,10 @@ void update_process_times(int user_tick)
> run_local_timers();
> rcu_check_callbacks(cpu, user_tick);
> printk_tick();
> - perf_event_do_pending();
> +#ifdef CONFIG_IRQ_WORK
> + if (in_irq())
> + irq_work_run();
> +#endif
> scheduler_tick();
> run_posix_cpu_timers(p);
> }
> --- a/arch/arm/kernel/perf_event.c
> +++ b/arch/arm/kernel/perf_event.c
> @@ -1045,7 +1045,7 @@ armv6pmu_handle_irq(int irq_num,
> * platforms that can have the PMU interrupts raised as a PMI, this
> * will not work.
> */
> - perf_event_do_pending();
> + irq_work_run();
>
> return IRQ_HANDLED;
> }
> @@ -2021,7 +2021,7 @@ static irqreturn_t armv7pmu_handle_irq(i
> * platforms that can have the PMU interrupts raised as a PMI, this
> * will not work.
> */
> - perf_event_do_pending();
> + irq_work_run();
>
> return IRQ_HANDLED;
> }
> --- a/arch/x86/include/asm/irq_vectors.h
> +++ b/arch/x86/include/asm/irq_vectors.h
> @@ -114,9 +114,9 @@
> #define X86_PLATFORM_IPI_VECTOR 0xed
>
> /*
> - * Performance monitoring pending work vector:
> + * IRQ work vector:
> */
> -#define LOCAL_PENDING_VECTOR 0xec
> +#define IRQ_WORK_VECTOR 0xec
>
> #define UV_BAU_MESSAGE 0xea
>
> --- a/arch/x86/include/asm/hardirq.h
> +++ b/arch/x86/include/asm/hardirq.h
> @@ -14,7 +14,7 @@ typedef struct {
> #endif
> unsigned int x86_platform_ipis; /* arch dependent */
> unsigned int apic_perf_irqs;
> - unsigned int apic_pending_irqs;
> + unsigned int apic_irq_work_irqs;
> #ifdef CONFIG_SMP
> unsigned int irq_resched_count;
> unsigned int irq_call_count;
> --- a/arch/x86/kernel/irq.c
> +++ b/arch/x86/kernel/irq.c
> @@ -67,10 +67,10 @@ static int show_other_interrupts(struct
> for_each_online_cpu(j)
> seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
> seq_printf(p, " Performance monitoring interrupts\n");
> - seq_printf(p, "%*s: ", prec, "PND");
> + seq_printf(p, "%*s: ", prec, "IWI");
> for_each_online_cpu(j)
> - seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs);
> - seq_printf(p, " Performance pending work\n");
> + seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs);
> + seq_printf(p, " IRQ work interrupts\n");
> #endif
> if (x86_platform_ipi_callback) {
> seq_printf(p, "%*s: ", prec, "PLT");
> @@ -185,7 +185,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
> sum += irq_stats(cpu)->apic_timer_irqs;
> sum += irq_stats(cpu)->irq_spurious_count;
> sum += irq_stats(cpu)->apic_perf_irqs;
> - sum += irq_stats(cpu)->apic_pending_irqs;
> + sum += irq_stats(cpu)->apic_irq_work_irqs;
> #endif
> if (x86_platform_ipi_callback)
> sum += irq_stats(cpu)->x86_platform_ipis;
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2010-08-30 9:42 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-07-13 4:59 [PATCH -v3 1/2] irq_work: generic hard-irq context callbacks Huang Ying
2010-07-13 4:59 ` [PATCH -v3 2/2] irq_work, MCE: use irq_work in MCE Huang Ying
2010-08-30 9:41 ` [PATCH -v3 1/2] irq_work: generic hard-irq context callbacks Peter Zijlstra
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox