[PATCH 2/10] x86: convert to generic helpers for IPI function calls

linux-arch.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH 2/10] x86: convert to generic helpers for IPI function calls
       [not found] ` <1209453990-7735-1-git-send-email-jens.axboe-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
@ 2008-04-29  7:26   ` Jens Axboe
  2008-04-29  7:26     ` Jens Axboe
       [not found]     ` <1209453990-7735-3-git-send-email-jens.axboe-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
  0 siblings, 2 replies; 54+ messages in thread
From: Jens Axboe @ 2008-04-29  7:26 UTC (permalink / raw)
  To: linux-kernel-u79uwXL29TY76Z2rM5mHXA
  Cc: peterz-wEGCiKHe2LqWVfeAwA7xHQ, npiggin-l3A5Bk7waGM,
	linux-arch-u79uwXL29TY76Z2rM5mHXA, jeremy-TSDbQ3PG+2Y,
	mingo-X9Un+BFzKDI, paulmck-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8,
	Jens Axboe

This converts x86, x86-64, and xen to use the new helpers for
smp_call_function() and friends, and adds support for
smp_call_function_single().

Acked-by: Ingo Molnar <mingo-X9Un+BFzKDI@public.gmane.org>
Signed-off-by: Jens Axboe <jens.axboe-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
---
 arch/x86/Kconfig                           |    1 +
 arch/x86/kernel/apic_32.c                  |    4 +
 arch/x86/kernel/entry_64.S                 |    3 +
 arch/x86/kernel/i8259_64.c                 |    4 +
 arch/x86/kernel/smp.c                      |  152 ++++------------------------
 arch/x86/kernel/smpcommon.c                |   56 ----------
 arch/x86/mach-voyager/voyager_smp.c        |   94 +++--------------
 arch/x86/xen/enlighten.c                   |    4 +-
 arch/x86/xen/mmu.c                         |    2 +-
 arch/x86/xen/smp.c                         |  120 +++++++----------------
 arch/x86/xen/xen-ops.h                     |    9 +--
 include/asm-x86/hw_irq_32.h                |    1 +
 include/asm-x86/hw_irq_64.h                |    2 +
 include/asm-x86/mach-default/entry_arch.h  |    1 +
 include/asm-x86/mach-default/irq_vectors.h |    1 +
 include/asm-x86/mach-voyager/entry_arch.h  |    2 +-
 include/asm-x86/mach-voyager/irq_vectors.h |    4 +-
 include/asm-x86/smp.h                      |   19 ++--
 18 files changed, 113 insertions(+), 366 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index a12dbb2..5e0dcf1 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -162,6 +162,7 @@ config GENERIC_PENDING_IRQ
 config X86_SMP
 	bool
 	depends on SMP && ((X86_32 && !X86_VOYAGER) || X86_64)
+	select USE_GENERIC_SMP_HELPERS
 	default y
 
 config X86_32_SMP
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 4b99b1b..71017f7 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -1358,6 +1358,10 @@ void __init smp_intr_init(void)
 
 	/* IPI for generic function call */
 	set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
+
+	/* IPI for single call function */
+	set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
+				call_function_single_interrupt);
 }
 #endif
 
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 556a8df..6d1fe27 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -711,6 +711,9 @@ END(invalidate_interrupt\num)
 ENTRY(call_function_interrupt)
 	apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
 END(call_function_interrupt)
+ENTRY(call_function_single_interrupt)
+	apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt
+END(call_function_single_interrupt)
 ENTRY(irq_move_cleanup_interrupt)
 	apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
 END(irq_move_cleanup_interrupt)
diff --git a/arch/x86/kernel/i8259_64.c b/arch/x86/kernel/i8259_64.c
index fa57a15..00d2ccd 100644
--- a/arch/x86/kernel/i8259_64.c
+++ b/arch/x86/kernel/i8259_64.c
@@ -494,6 +494,10 @@ void __init native_init_IRQ(void)
 	/* IPI for generic function call */
 	set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
 
+	/* IPI for generic single function call */
+	set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
+				call_function_single_interrupt);
+
 	/* Low priority IPI to cleanup after moving an irq */
 	set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
 #endif
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 8f75893..a0fd846 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -121,131 +121,32 @@ static void native_smp_send_reschedule(int cpu)
 	send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
 }
 
-/*
- * Structure and data for smp_call_function(). This is designed to minimise
- * static memory requirements. It also looks cleaner.
- */
-static DEFINE_SPINLOCK(call_lock);
-
-struct call_data_struct {
-	void (*func) (void *info);
-	void *info;
-	atomic_t started;
-	atomic_t finished;
-	int wait;
-};
-
 void lock_ipi_call_lock(void)
 {
-	spin_lock_irq(&call_lock);
+	spin_lock_irq(&call_function_lock);
 }
 
 void unlock_ipi_call_lock(void)
 {
-	spin_unlock_irq(&call_lock);
+	spin_unlock_irq(&call_function_lock);
 }
 
-static struct call_data_struct *call_data;
-
-static void __smp_call_function(void (*func) (void *info), void *info,
-				int nonatomic, int wait)
+void native_send_call_func_single_ipi(int cpu)
 {
-	struct call_data_struct data;
-	int cpus = num_online_cpus() - 1;
-
-	if (!cpus)
-		return;
-
-	data.func = func;
-	data.info = info;
-	atomic_set(&data.started, 0);
-	data.wait = wait;
-	if (wait)
-		atomic_set(&data.finished, 0);
-
-	call_data = &data;
-	mb();
-
-	/* Send a message to all other CPUs and wait for them to respond */
-	send_IPI_allbutself(CALL_FUNCTION_VECTOR);
-
-	/* Wait for response */
-	while (atomic_read(&data.started) != cpus)
-		cpu_relax();
-
-	if (wait)
-		while (atomic_read(&data.finished) != cpus)
-			cpu_relax();
+	send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_SINGLE_VECTOR);
 }
 
-
-/**
- * smp_call_function_mask(): Run a function on a set of other CPUs.
- * @mask: The set of cpus to run on.  Must not include the current cpu.
- * @func: The function to run. This must be fast and non-blocking.
- * @info: An arbitrary pointer to pass to the function.
- * @wait: If true, wait (atomically) until function has completed on other CPUs.
- *
-  * Returns 0 on success, else a negative status code.
- *
- * If @wait is true, then returns once @func has returned; otherwise
- * it returns just before the target cpu calls @func.
- *
- * You must not call this function with disabled interrupts or from a
- * hardware interrupt handler or from a bottom half handler.
- */
-static int
-native_smp_call_function_mask(cpumask_t mask,
-			      void (*func)(void *), void *info,
-			      int wait)
+void native_send_call_func_ipi(cpumask_t mask)
 {
-	struct call_data_struct data;
 	cpumask_t allbutself;
-	int cpus;
-
-	/* Can deadlock when called with interrupts disabled */
-	WARN_ON(irqs_disabled());
-
-	/* Holding any lock stops cpus from going down. */
-	spin_lock(&call_lock);
 
 	allbutself = cpu_online_map;
 	cpu_clear(smp_processor_id(), allbutself);
 
-	cpus_and(mask, mask, allbutself);
-	cpus = cpus_weight(mask);
-
-	if (!cpus) {
-		spin_unlock(&call_lock);
-		return 0;
-	}
-
-	data.func = func;
-	data.info = info;
-	atomic_set(&data.started, 0);
-	data.wait = wait;
-	if (wait)
-		atomic_set(&data.finished, 0);
-
-	call_data = &data;
-	wmb();
-
-	/* Send a message to other CPUs */
 	if (cpus_equal(mask, allbutself))
 		send_IPI_allbutself(CALL_FUNCTION_VECTOR);
 	else
 		send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
-
-	/* Wait for response */
-	while (atomic_read(&data.started) != cpus)
-		cpu_relax();
-
-	if (wait)
-		while (atomic_read(&data.finished) != cpus)
-			cpu_relax();
-	spin_unlock(&call_lock);
-
-	return 0;
 }
 
 static void stop_this_cpu(void *dummy)
@@ -267,18 +168,13 @@ static void stop_this_cpu(void *dummy)
 
 static void native_smp_send_stop(void)
 {
-	int nolock;
 	unsigned long flags;
 
 	if (reboot_force)
 		return;
 
-	/* Don't deadlock on the call lock in panic */
-	nolock = !spin_trylock(&call_lock);
 	local_irq_save(flags);
-	__smp_call_function(stop_this_cpu, NULL, 0, 0);
-	if (!nolock)
-		spin_unlock(&call_lock);
+	smp_call_function(stop_this_cpu, NULL, 0, 0);
 	disable_local_APIC();
 	local_irq_restore(flags);
 }
@@ -300,33 +196,28 @@ void smp_reschedule_interrupt(struct pt_regs *regs)
 
 void smp_call_function_interrupt(struct pt_regs *regs)
 {
-	void (*func) (void *info) = call_data->func;
-	void *info = call_data->info;
-	int wait = call_data->wait;
-
 	ack_APIC_irq();
-	/*
-	 * Notify initiating CPU that I've grabbed the data and am
-	 * about to execute the function
-	 */
-	mb();
-	atomic_inc(&call_data->started);
-	/*
-	 * At this point the info structure may be out of scope unless wait==1
-	 */
 	irq_enter();
-	(*func)(info);
+	generic_smp_call_function_interrupt();
 #ifdef CONFIG_X86_32
 	__get_cpu_var(irq_stat).irq_call_count++;
 #else
 	add_pda(irq_call_count, 1);
 #endif
 	irq_exit();
+}
 
-	if (wait) {
-		mb();
-		atomic_inc(&call_data->finished);
-	}
+void smp_call_function_single_interrupt(void)
+{
+	ack_APIC_irq();
+	irq_enter();
+	generic_smp_call_function_single_interrupt();
+#ifdef CONFIG_X86_32
+	__get_cpu_var(irq_stat).irq_call_count++;
+#else
+	add_pda(irq_call_count, 1);
+#endif
+	irq_exit();
 }
 
 struct smp_ops smp_ops = {
@@ -337,7 +228,8 @@ struct smp_ops smp_ops = {
 
 	.smp_send_stop = native_smp_send_stop,
 	.smp_send_reschedule = native_smp_send_reschedule,
-	.smp_call_function_mask = native_smp_call_function_mask,
+
+	.send_call_func_ipi = native_send_call_func_ipi,
+	.send_call_func_single_ipi = native_send_call_func_single_ipi,
 };
 EXPORT_SYMBOL_GPL(smp_ops);
-
diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c
index 3449064..99941b3 100644
--- a/arch/x86/kernel/smpcommon.c
+++ b/arch/x86/kernel/smpcommon.c
@@ -25,59 +25,3 @@ __cpuinit void init_gdt(int cpu)
 	per_cpu(cpu_number, cpu) = cpu;
 }
 #endif
-
-/**
- * smp_call_function(): Run a function on all other CPUs.
- * @func: The function to run. This must be fast and non-blocking.
- * @info: An arbitrary pointer to pass to the function.
- * @nonatomic: Unused.
- * @wait: If true, wait (atomically) until function has completed on other CPUs.
- *
- * Returns 0 on success, else a negative status code.
- *
- * If @wait is true, then returns once @func has returned; otherwise
- * it returns just before the target cpu calls @func.
- *
- * You must not call this function with disabled interrupts or from a
- * hardware interrupt handler or from a bottom half handler.
- */
-int smp_call_function(void (*func) (void *info), void *info, int nonatomic,
-		      int wait)
-{
-	return smp_call_function_mask(cpu_online_map, func, info, wait);
-}
-EXPORT_SYMBOL(smp_call_function);
-
-/**
- * smp_call_function_single - Run a function on a specific CPU
- * @cpu: The target CPU.  Cannot be the calling CPU.
- * @func: The function to run. This must be fast and non-blocking.
- * @info: An arbitrary pointer to pass to the function.
- * @nonatomic: Unused.
- * @wait: If true, wait until function has completed on other CPUs.
- *
- * Returns 0 on success, else a negative status code.
- *
- * If @wait is true, then returns once @func has returned; otherwise
- * it returns just before the target cpu calls @func.
- */
-int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
-			     int nonatomic, int wait)
-{
-	/* prevent preemption and reschedule on another processor */
-	int ret;
-	int me = get_cpu();
-	if (cpu == me) {
-		local_irq_disable();
-		func(info);
-		local_irq_enable();
-		put_cpu();
-		return 0;
-	}
-
-	ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait);
-
-	put_cpu();
-	return ret;
-}
-EXPORT_SYMBOL(smp_call_function_single);
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 8acbf0c..cb34407 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -955,94 +955,24 @@ static void smp_stop_cpu_function(void *dummy)
 		halt();
 }
 
-static DEFINE_SPINLOCK(call_lock);
-
-struct call_data_struct {
-	void (*func) (void *info);
-	void *info;
-	volatile unsigned long started;
-	volatile unsigned long finished;
-	int wait;
-};
-
-static struct call_data_struct *call_data;
-
 /* execute a thread on a new CPU.  The function to be called must be
  * previously set up.  This is used to schedule a function for
  * execution on all CPUs - set up the function then broadcast a
  * function_interrupt CPI to come here on each CPU */
 static void smp_call_function_interrupt(void)
 {
-	void (*func) (void *info) = call_data->func;
-	void *info = call_data->info;
-	/* must take copy of wait because call_data may be replaced
-	 * unless the function is waiting for us to finish */
-	int wait = call_data->wait;
-	__u8 cpu = smp_processor_id();
-
-	/*
-	 * Notify initiating CPU that I've grabbed the data and am
-	 * about to execute the function
-	 */
-	mb();
-	if (!test_and_clear_bit(cpu, &call_data->started)) {
-		/* If the bit wasn't set, this could be a replay */
-		printk(KERN_WARNING "VOYAGER SMP: CPU %d received call funtion"
-		       " with no call pending\n", cpu);
-		return;
-	}
-	/*
-	 * At this point the info structure may be out of scope unless wait==1
-	 */
 	irq_enter();
-	(*func) (info);
+	generic_smp_call_function_interrupt();
 	__get_cpu_var(irq_stat).irq_call_count++;
 	irq_exit();
-	if (wait) {
-		mb();
-		clear_bit(cpu, &call_data->finished);
-	}
 }
 
-static int
-voyager_smp_call_function_mask(cpumask_t cpumask,
-			       void (*func) (void *info), void *info, int wait)
+static void smp_call_function_single_interrupt(void)
 {
-	struct call_data_struct data;
-	u32 mask = cpus_addr(cpumask)[0];
-
-	mask &= ~(1 << smp_processor_id());
-
-	if (!mask)
-		return 0;
-
-	/* Can deadlock when called with interrupts disabled */
-	WARN_ON(irqs_disabled());
-
-	data.func = func;
-	data.info = info;
-	data.started = mask;
-	data.wait = wait;
-	if (wait)
-		data.finished = mask;
-
-	spin_lock(&call_lock);
-	call_data = &data;
-	wmb();
-	/* Send a message to all other CPUs and wait for them to respond */
-	send_CPI(mask, VIC_CALL_FUNCTION_CPI);
-
-	/* Wait for response */
-	while (data.started)
-		barrier();
-
-	if (wait)
-		while (data.finished)
-			barrier();
-
-	spin_unlock(&call_lock);
-
-	return 0;
+	irq_enter();
+	generic_smp_call_function_single_interrupt();
+	__get_cpu_var(irq_stat).irq_call_count++;
+	irq_exit();
 }
 
 /* Sorry about the name.  In an APIC based system, the APICs
@@ -1099,6 +1029,12 @@ void smp_qic_call_function_interrupt(struct pt_regs *regs)
 	smp_call_function_interrupt();
 }
 
+void smp_qic_call_function_single_interrupt(struct pt_regs *regs)
+{
+	ack_QIC_CPI(QIC_CALL_FUNCTION_SINGLE_CPI);
+	smp_call_function_single_interrupt();
+}
+
 void smp_vic_cpi_interrupt(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
@@ -1119,6 +1055,8 @@ void smp_vic_cpi_interrupt(struct pt_regs *regs)
 		smp_enable_irq_interrupt();
 	if (test_and_clear_bit(VIC_CALL_FUNCTION_CPI, &vic_cpi_mailbox[cpu]))
 		smp_call_function_interrupt();
+	if (test_and_clear_bit(VIC_CALL_FUNCTION_SINGLE_CPI, &vic_cpi_mailbox[cpu]))
+		smp_call_function_single_interrupt();
 	set_irq_regs(old_regs);
 }
 
@@ -1862,5 +1800,7 @@ struct smp_ops smp_ops = {
 
 	.smp_send_stop = voyager_smp_send_stop,
 	.smp_send_reschedule = voyager_smp_send_reschedule,
-	.smp_call_function_mask = voyager_smp_call_function_mask,
+
+	.send_call_func_ipi = native_send_call_func_ipi,
+	.send_call_func_single_ipi = native_send_call_func_single_ipi,
 };
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index c8a56e4..e045578 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1123,7 +1123,9 @@ static const struct smp_ops xen_smp_ops __initdata = {
 
 	.smp_send_stop = xen_smp_send_stop,
 	.smp_send_reschedule = xen_smp_send_reschedule,
-	.smp_call_function_mask = xen_smp_call_function_mask,
+
+	.send_call_func_ipi = xen_smp_send_call_function_ipi,
+	.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi,
 };
 #endif	/* CONFIG_SMP */
 
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 126766d..2221332 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -569,7 +569,7 @@ static void drop_mm_ref(struct mm_struct *mm)
 	}
 
 	if (!cpus_empty(mask))
-		xen_smp_call_function_mask(mask, drop_other_mm_ref, mm, 1);
+		smp_call_function_mask(mask, drop_other_mm_ref, mm, 1);
 }
 #else
 static void drop_mm_ref(struct mm_struct *mm)
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 94e6900..2dfe093 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -36,23 +36,11 @@
 #include "mmu.h"
 
 static cpumask_t xen_cpu_initialized_map;
-static DEFINE_PER_CPU(int, resched_irq) = -1;
-static DEFINE_PER_CPU(int, callfunc_irq) = -1;
-static DEFINE_PER_CPU(int, debug_irq) = -1;
-
-/*
- * Structure and data for smp_call_function(). This is designed to minimise
- * static memory requirements. It also looks cleaner.
- */
-static DEFINE_SPINLOCK(call_lock);
 
-struct call_data_struct {
-	void (*func) (void *info);
-	void *info;
-	atomic_t started;
-	atomic_t finished;
-	int wait;
-};
+static DEFINE_PER_CPU(int, resched_irq);
+static DEFINE_PER_CPU(int, callfunc_irq);
+static DEFINE_PER_CPU(int, callfuncsingle_irq);
+static DEFINE_PER_CPU(int, debug_irq) = -1;
 
 static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
 
@@ -122,6 +110,17 @@ static int xen_smp_intr_init(unsigned int cpu)
 		goto fail;
 	per_cpu(debug_irq, cpu) = rc;
 
+	callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", cpu);
+	rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_SINGLE_VECTOR,
+				    cpu,
+				    xen_call_function_single_interrupt,
+				    IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
+				    callfunc_name,
+				    NULL);
+	if (rc < 0)
+		goto fail;
+	per_cpu(callfuncsingle_irq, cpu) = rc;
+
 	return 0;
 
  fail:
@@ -131,6 +130,9 @@ static int xen_smp_intr_init(unsigned int cpu)
 		unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
 	if (per_cpu(debug_irq, cpu) >= 0)
 		unbind_from_irqhandler(per_cpu(debug_irq, cpu), NULL);
+	if (per_cpu(callfuncsingle_irq, cpu) >= 0)
+		unbind_from_irqhandler(per_cpu(callfuncsingle_irq, cpu), NULL);
+
 	return rc;
 }
 
@@ -338,7 +340,6 @@ void xen_smp_send_reschedule(int cpu)
 	xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR);
 }
 
-
 static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector)
 {
 	unsigned cpu;
@@ -349,83 +350,32 @@ static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector)
 		xen_send_IPI_one(cpu, vector);
 }
 
+void xen_smp_send_call_function_ipi(cpumask_t mask)
+{
+	xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
+}
+
+void xen_smp_send_call_function_single_ipi(int cpu)
+{
+	xen_send_IPI_mask(cpumask_of_cpu(cpu), XEN_CALL_FUNCTION_SINGLE_VECTOR);
+}
+
 static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
 {
-	void (*func) (void *info) = call_data->func;
-	void *info = call_data->info;
-	int wait = call_data->wait;
-
-	/*
-	 * Notify initiating CPU that I've grabbed the data and am
-	 * about to execute the function
-	 */
-	mb();
-	atomic_inc(&call_data->started);
-	/*
-	 * At this point the info structure may be out of scope unless wait==1
-	 */
 	irq_enter();
-	(*func)(info);
+	generic_smp_call_function_interrupt();
 	__get_cpu_var(irq_stat).irq_call_count++;
 	irq_exit();
 
-	if (wait) {
-		mb();		/* commit everything before setting finished */
-		atomic_inc(&call_data->finished);
-	}
-
 	return IRQ_HANDLED;
 }
 
-int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *),
-			       void *info, int wait)
+static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id)
 {
-	struct call_data_struct data;
-	int cpus, cpu;
-	bool yield;
-
-	/* Holding any lock stops cpus from going down. */
-	spin_lock(&call_lock);
-
-	cpu_clear(smp_processor_id(), mask);
-
-	cpus = cpus_weight(mask);
-	if (!cpus) {
-		spin_unlock(&call_lock);
-		return 0;
-	}
-
-	/* Can deadlock when called with interrupts disabled */
-	WARN_ON(irqs_disabled());
-
-	data.func = func;
-	data.info = info;
-	atomic_set(&data.started, 0);
-	data.wait = wait;
-	if (wait)
-		atomic_set(&data.finished, 0);
-
-	call_data = &data;
-	mb();			/* write everything before IPI */
-
-	/* Send a message to other CPUs and wait for them to respond */
-	xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
-
-	/* Make sure other vcpus get a chance to run if they need to. */
-	yield = false;
-	for_each_cpu_mask(cpu, mask)
-		if (xen_vcpu_stolen(cpu))
-			yield = true;
-
-	if (yield)
-		HYPERVISOR_sched_op(SCHEDOP_yield, 0);
-
-	/* Wait for response */
-	while (atomic_read(&data.started) != cpus ||
-	       (wait && atomic_read(&data.finished) != cpus))
-		cpu_relax();
-
-	spin_unlock(&call_lock);
+	irq_enter();
+	generic_smp_call_function_single_interrupt();
+	__get_cpu_var(irq_stat).irq_call_count++;
+	irq_exit();
 
-	return 0;
+	return IRQ_HANDLED;
 }
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index f1063ae..a636ab5 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -46,13 +46,8 @@ void xen_smp_cpus_done(unsigned int max_cpus);
 
 void xen_smp_send_stop(void);
 void xen_smp_send_reschedule(int cpu);
-int xen_smp_call_function (void (*func) (void *info), void *info, int nonatomic,
-			   int wait);
-int xen_smp_call_function_single(int cpu, void (*func) (void *info), void *info,
-				 int nonatomic, int wait);
-
-int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *),
-			       void *info, int wait);
+void xen_smp_send_call_function_ipi(cpumask_t mask);
+void xen_smp_send_call_function_single_ipi(int cpu);
 
 
 /* Declare an asm function, along with symbols needed to make it
diff --git a/include/asm-x86/hw_irq_32.h b/include/asm-x86/hw_irq_32.h
index ea88054..a87b132 100644
--- a/include/asm-x86/hw_irq_32.h
+++ b/include/asm-x86/hw_irq_32.h
@@ -32,6 +32,7 @@ extern void (*const interrupt[NR_IRQS])(void);
 void reschedule_interrupt(void);
 void invalidate_interrupt(void);
 void call_function_interrupt(void);
+void call_function_single_interrupt(void);
 #endif
 
 #ifdef CONFIG_X86_LOCAL_APIC
diff --git a/include/asm-x86/hw_irq_64.h b/include/asm-x86/hw_irq_64.h
index 0062ef3..fe65781 100644
--- a/include/asm-x86/hw_irq_64.h
+++ b/include/asm-x86/hw_irq_64.h
@@ -68,6 +68,7 @@
 #define ERROR_APIC_VECTOR	0xfe
 #define RESCHEDULE_VECTOR	0xfd
 #define CALL_FUNCTION_VECTOR	0xfc
+#define	CALL_FUNCTION_SINGLE_VECTOR	0xfb
 /* fb free - please don't readd KDB here because it's useless
    (hint - think what a NMI bit does to a vector) */
 #define THERMAL_APIC_VECTOR	0xfa
@@ -102,6 +103,7 @@ void spurious_interrupt(void);
 void error_interrupt(void);
 void reschedule_interrupt(void);
 void call_function_interrupt(void);
+void call_function_single_interrupt(void);
 void irq_move_cleanup_interrupt(void);
 void invalidate_interrupt0(void);
 void invalidate_interrupt1(void);
diff --git a/include/asm-x86/mach-default/entry_arch.h b/include/asm-x86/mach-default/entry_arch.h
index bc86146..9283b60 100644
--- a/include/asm-x86/mach-default/entry_arch.h
+++ b/include/asm-x86/mach-default/entry_arch.h
@@ -13,6 +13,7 @@
 BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
 BUILD_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR)
 BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
+BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
 #endif
 
 /*
diff --git a/include/asm-x86/mach-default/irq_vectors.h b/include/asm-x86/mach-default/irq_vectors.h
index 881c63c..ed7d495 100644
--- a/include/asm-x86/mach-default/irq_vectors.h
+++ b/include/asm-x86/mach-default/irq_vectors.h
@@ -48,6 +48,7 @@
 #define INVALIDATE_TLB_VECTOR	0xfd
 #define RESCHEDULE_VECTOR	0xfc
 #define CALL_FUNCTION_VECTOR	0xfb
+#define CALL_FUNCTION_SINGLE_VECTOR	0xfa
 
 #define THERMAL_APIC_VECTOR	0xf0
 /*
diff --git a/include/asm-x86/mach-voyager/entry_arch.h b/include/asm-x86/mach-voyager/entry_arch.h
index 4a1e1e8..ae52624 100644
--- a/include/asm-x86/mach-voyager/entry_arch.h
+++ b/include/asm-x86/mach-voyager/entry_arch.h
@@ -23,4 +23,4 @@ BUILD_INTERRUPT(qic_invalidate_interrupt, QIC_INVALIDATE_CPI);
 BUILD_INTERRUPT(qic_reschedule_interrupt, QIC_RESCHEDULE_CPI);
 BUILD_INTERRUPT(qic_enable_irq_interrupt, QIC_ENABLE_IRQ_CPI);
 BUILD_INTERRUPT(qic_call_function_interrupt, QIC_CALL_FUNCTION_CPI);
-
+BUILD_INTERRUPT(qic_call_function_single_interrupt, QIC_CALL_FUNCTION_SINGLE_CPI);
diff --git a/include/asm-x86/mach-voyager/irq_vectors.h b/include/asm-x86/mach-voyager/irq_vectors.h
index 165421f..64e47f6 100644
--- a/include/asm-x86/mach-voyager/irq_vectors.h
+++ b/include/asm-x86/mach-voyager/irq_vectors.h
@@ -33,6 +33,7 @@
 #define VIC_RESCHEDULE_CPI		4
 #define VIC_ENABLE_IRQ_CPI		5
 #define VIC_CALL_FUNCTION_CPI		6
+#define VIC_CALL_FUNCTION_SINGLE_CPI	7
 
 /* Now the QIC CPIs:  Since we don't need the two initial levels,
  * these are 2 less than the VIC CPIs */
@@ -42,9 +43,10 @@
 #define QIC_RESCHEDULE_CPI		(VIC_RESCHEDULE_CPI - QIC_CPI_OFFSET)
 #define QIC_ENABLE_IRQ_CPI		(VIC_ENABLE_IRQ_CPI - QIC_CPI_OFFSET)
 #define QIC_CALL_FUNCTION_CPI		(VIC_CALL_FUNCTION_CPI - QIC_CPI_OFFSET)
+#define QIC_CALL_FUNCTION_CPI		(VIC_CALL_FUNCTION_SINGLE_CPI - QIC_CPI_OFFSET)
 
 #define VIC_START_FAKE_CPI		VIC_TIMER_CPI
-#define VIC_END_FAKE_CPI		VIC_CALL_FUNCTION_CPI
+#define VIC_END_FAKE_CPI		VIC_CALL_FUNCTION_SINGLE_CPI
 
 /* this is the SYS_INT CPI. */
 #define VIC_SYS_INT			8
diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h
index 1ebaa5c..bba35a1 100644
--- a/include/asm-x86/smp.h
+++ b/include/asm-x86/smp.h
@@ -59,9 +59,9 @@ struct smp_ops {
 
 	void (*smp_send_stop)(void);
 	void (*smp_send_reschedule)(int cpu);
-	int (*smp_call_function_mask)(cpumask_t mask,
-				      void (*func)(void *info), void *info,
-				      int wait);
+
+	void (*send_call_func_ipi)(cpumask_t mask);
+	void (*send_call_func_single_ipi)(int cpu);
 };
 
 /* Globals due to paravirt */
@@ -103,17 +103,22 @@ static inline void smp_send_reschedule(int cpu)
 	smp_ops.smp_send_reschedule(cpu);
 }
 
-static inline int smp_call_function_mask(cpumask_t mask,
-					 void (*func) (void *info), void *info,
-					 int wait)
+static inline void arch_send_call_function_single_ipi(int cpu)
+{
+	smp_ops.send_call_func_single_ipi(cpu);
+}
+
+static inline void arch_send_call_function_ipi(cpumask_t mask)
 {
-	return smp_ops.smp_call_function_mask(mask, func, info, wait);
+	smp_ops.send_call_func_ipi(mask);
 }
 
 void native_smp_prepare_boot_cpu(void);
 void native_smp_prepare_cpus(unsigned int max_cpus);
 void native_smp_cpus_done(unsigned int max_cpus);
 int native_cpu_up(unsigned int cpunum);
+void native_send_call_func_ipi(cpumask_t mask);
+void native_send_call_func_single_ipi(int cpu);
 
 extern int __cpu_disable(void);
 extern void __cpu_die(unsigned int cpu);
-- 
1.5.5.1.57.g5909c

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [PATCH 2/10] x86: convert to generic helpers for IPI function calls
  2008-04-29  7:26   ` [PATCH 2/10] x86: convert to generic helpers for IPI function calls Jens Axboe
@ 2008-04-29  7:26     ` Jens Axboe
       [not found]     ` <1209453990-7735-3-git-send-email-jens.axboe-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
  1 sibling, 0 replies; 54+ messages in thread
From: Jens Axboe @ 2008-04-29  7:26 UTC (permalink / raw)
  To: linux-kernel
  Cc: peterz, npiggin, linux-arch, jeremy, mingo, paulmck, Jens Axboe

This converts x86, x86-64, and xen to use the new helpers for
smp_call_function() and friends, and adds support for
smp_call_function_single().

Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 arch/x86/Kconfig                           |    1 +
 arch/x86/kernel/apic_32.c                  |    4 +
 arch/x86/kernel/entry_64.S                 |    3 +
 arch/x86/kernel/i8259_64.c                 |    4 +
 arch/x86/kernel/smp.c                      |  152 ++++------------------------
 arch/x86/kernel/smpcommon.c                |   56 ----------
 arch/x86/mach-voyager/voyager_smp.c        |   94 +++--------------
 arch/x86/xen/enlighten.c                   |    4 +-
 arch/x86/xen/mmu.c                         |    2 +-
 arch/x86/xen/smp.c                         |  120 +++++++----------------
 arch/x86/xen/xen-ops.h                     |    9 +--
 include/asm-x86/hw_irq_32.h                |    1 +
 include/asm-x86/hw_irq_64.h                |    2 +
 include/asm-x86/mach-default/entry_arch.h  |    1 +
 include/asm-x86/mach-default/irq_vectors.h |    1 +
 include/asm-x86/mach-voyager/entry_arch.h  |    2 +-
 include/asm-x86/mach-voyager/irq_vectors.h |    4 +-
 include/asm-x86/smp.h                      |   19 ++--
 18 files changed, 113 insertions(+), 366 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index a12dbb2..5e0dcf1 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -162,6 +162,7 @@ config GENERIC_PENDING_IRQ
 config X86_SMP
 	bool
 	depends on SMP && ((X86_32 && !X86_VOYAGER) || X86_64)
+	select USE_GENERIC_SMP_HELPERS
 	default y
 
 config X86_32_SMP
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 4b99b1b..71017f7 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -1358,6 +1358,10 @@ void __init smp_intr_init(void)
 
 	/* IPI for generic function call */
 	set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
+
+	/* IPI for single call function */
+	set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
+				call_function_single_interrupt);
 }
 #endif
 
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 556a8df..6d1fe27 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -711,6 +711,9 @@ END(invalidate_interrupt\num)
 ENTRY(call_function_interrupt)
 	apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
 END(call_function_interrupt)
+ENTRY(call_function_single_interrupt)
+	apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt
+END(call_function_single_interrupt)
 ENTRY(irq_move_cleanup_interrupt)
 	apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
 END(irq_move_cleanup_interrupt)
diff --git a/arch/x86/kernel/i8259_64.c b/arch/x86/kernel/i8259_64.c
index fa57a15..00d2ccd 100644
--- a/arch/x86/kernel/i8259_64.c
+++ b/arch/x86/kernel/i8259_64.c
@@ -494,6 +494,10 @@ void __init native_init_IRQ(void)
 	/* IPI for generic function call */
 	set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
 
+	/* IPI for generic single function call */
+	set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
+				call_function_single_interrupt);
+
 	/* Low priority IPI to cleanup after moving an irq */
 	set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
 #endif
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 8f75893..a0fd846 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -121,131 +121,32 @@ static void native_smp_send_reschedule(int cpu)
 	send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
 }
 
-/*
- * Structure and data for smp_call_function(). This is designed to minimise
- * static memory requirements. It also looks cleaner.
- */
-static DEFINE_SPINLOCK(call_lock);
-
-struct call_data_struct {
-	void (*func) (void *info);
-	void *info;
-	atomic_t started;
-	atomic_t finished;
-	int wait;
-};
-
 void lock_ipi_call_lock(void)
 {
-	spin_lock_irq(&call_lock);
+	spin_lock_irq(&call_function_lock);
 }
 
 void unlock_ipi_call_lock(void)
 {
-	spin_unlock_irq(&call_lock);
+	spin_unlock_irq(&call_function_lock);
 }
 
-static struct call_data_struct *call_data;
-
-static void __smp_call_function(void (*func) (void *info), void *info,
-				int nonatomic, int wait)
+void native_send_call_func_single_ipi(int cpu)
 {
-	struct call_data_struct data;
-	int cpus = num_online_cpus() - 1;
-
-	if (!cpus)
-		return;
-
-	data.func = func;
-	data.info = info;
-	atomic_set(&data.started, 0);
-	data.wait = wait;
-	if (wait)
-		atomic_set(&data.finished, 0);
-
-	call_data = &data;
-	mb();
-
-	/* Send a message to all other CPUs and wait for them to respond */
-	send_IPI_allbutself(CALL_FUNCTION_VECTOR);
-
-	/* Wait for response */
-	while (atomic_read(&data.started) != cpus)
-		cpu_relax();
-
-	if (wait)
-		while (atomic_read(&data.finished) != cpus)
-			cpu_relax();
+	send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_SINGLE_VECTOR);
 }
 
-
-/**
- * smp_call_function_mask(): Run a function on a set of other CPUs.
- * @mask: The set of cpus to run on.  Must not include the current cpu.
- * @func: The function to run. This must be fast and non-blocking.
- * @info: An arbitrary pointer to pass to the function.
- * @wait: If true, wait (atomically) until function has completed on other CPUs.
- *
-  * Returns 0 on success, else a negative status code.
- *
- * If @wait is true, then returns once @func has returned; otherwise
- * it returns just before the target cpu calls @func.
- *
- * You must not call this function with disabled interrupts or from a
- * hardware interrupt handler or from a bottom half handler.
- */
-static int
-native_smp_call_function_mask(cpumask_t mask,
-			      void (*func)(void *), void *info,
-			      int wait)
+void native_send_call_func_ipi(cpumask_t mask)
 {
-	struct call_data_struct data;
 	cpumask_t allbutself;
-	int cpus;
-
-	/* Can deadlock when called with interrupts disabled */
-	WARN_ON(irqs_disabled());
-
-	/* Holding any lock stops cpus from going down. */
-	spin_lock(&call_lock);
 
 	allbutself = cpu_online_map;
 	cpu_clear(smp_processor_id(), allbutself);
 
-	cpus_and(mask, mask, allbutself);
-	cpus = cpus_weight(mask);
-
-	if (!cpus) {
-		spin_unlock(&call_lock);
-		return 0;
-	}
-
-	data.func = func;
-	data.info = info;
-	atomic_set(&data.started, 0);
-	data.wait = wait;
-	if (wait)
-		atomic_set(&data.finished, 0);
-
-	call_data = &data;
-	wmb();
-
-	/* Send a message to other CPUs */
 	if (cpus_equal(mask, allbutself))
 		send_IPI_allbutself(CALL_FUNCTION_VECTOR);
 	else
 		send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
-
-	/* Wait for response */
-	while (atomic_read(&data.started) != cpus)
-		cpu_relax();
-
-	if (wait)
-		while (atomic_read(&data.finished) != cpus)
-			cpu_relax();
-	spin_unlock(&call_lock);
-
-	return 0;
 }
 
 static void stop_this_cpu(void *dummy)
@@ -267,18 +168,13 @@ static void stop_this_cpu(void *dummy)
 
 static void native_smp_send_stop(void)
 {
-	int nolock;
 	unsigned long flags;
 
 	if (reboot_force)
 		return;
 
-	/* Don't deadlock on the call lock in panic */
-	nolock = !spin_trylock(&call_lock);
 	local_irq_save(flags);
-	__smp_call_function(stop_this_cpu, NULL, 0, 0);
-	if (!nolock)
-		spin_unlock(&call_lock);
+	smp_call_function(stop_this_cpu, NULL, 0, 0);
 	disable_local_APIC();
 	local_irq_restore(flags);
 }
@@ -300,33 +196,28 @@ void smp_reschedule_interrupt(struct pt_regs *regs)
 
 void smp_call_function_interrupt(struct pt_regs *regs)
 {
-	void (*func) (void *info) = call_data->func;
-	void *info = call_data->info;
-	int wait = call_data->wait;
-
 	ack_APIC_irq();
-	/*
-	 * Notify initiating CPU that I've grabbed the data and am
-	 * about to execute the function
-	 */
-	mb();
-	atomic_inc(&call_data->started);
-	/*
-	 * At this point the info structure may be out of scope unless wait==1
-	 */
 	irq_enter();
-	(*func)(info);
+	generic_smp_call_function_interrupt();
 #ifdef CONFIG_X86_32
 	__get_cpu_var(irq_stat).irq_call_count++;
 #else
 	add_pda(irq_call_count, 1);
 #endif
 	irq_exit();
+}
 
-	if (wait) {
-		mb();
-		atomic_inc(&call_data->finished);
-	}
+void smp_call_function_single_interrupt(void)
+{
+	ack_APIC_irq();
+	irq_enter();
+	generic_smp_call_function_single_interrupt();
+#ifdef CONFIG_X86_32
+	__get_cpu_var(irq_stat).irq_call_count++;
+#else
+	add_pda(irq_call_count, 1);
+#endif
+	irq_exit();
 }
 
 struct smp_ops smp_ops = {
@@ -337,7 +228,8 @@ struct smp_ops smp_ops = {
 
 	.smp_send_stop = native_smp_send_stop,
 	.smp_send_reschedule = native_smp_send_reschedule,
-	.smp_call_function_mask = native_smp_call_function_mask,
+
+	.send_call_func_ipi = native_send_call_func_ipi,
+	.send_call_func_single_ipi = native_send_call_func_single_ipi,
 };
 EXPORT_SYMBOL_GPL(smp_ops);
-
diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c
index 3449064..99941b3 100644
--- a/arch/x86/kernel/smpcommon.c
+++ b/arch/x86/kernel/smpcommon.c
@@ -25,59 +25,3 @@ __cpuinit void init_gdt(int cpu)
 	per_cpu(cpu_number, cpu) = cpu;
 }
 #endif
-
-/**
- * smp_call_function(): Run a function on all other CPUs.
- * @func: The function to run. This must be fast and non-blocking.
- * @info: An arbitrary pointer to pass to the function.
- * @nonatomic: Unused.
- * @wait: If true, wait (atomically) until function has completed on other CPUs.
- *
- * Returns 0 on success, else a negative status code.
- *
- * If @wait is true, then returns once @func has returned; otherwise
- * it returns just before the target cpu calls @func.
- *
- * You must not call this function with disabled interrupts or from a
- * hardware interrupt handler or from a bottom half handler.
- */
-int smp_call_function(void (*func) (void *info), void *info, int nonatomic,
-		      int wait)
-{
-	return smp_call_function_mask(cpu_online_map, func, info, wait);
-}
-EXPORT_SYMBOL(smp_call_function);
-
-/**
- * smp_call_function_single - Run a function on a specific CPU
- * @cpu: The target CPU.  Cannot be the calling CPU.
- * @func: The function to run. This must be fast and non-blocking.
- * @info: An arbitrary pointer to pass to the function.
- * @nonatomic: Unused.
- * @wait: If true, wait until function has completed on other CPUs.
- *
- * Returns 0 on success, else a negative status code.
- *
- * If @wait is true, then returns once @func has returned; otherwise
- * it returns just before the target cpu calls @func.
- */
-int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
-			     int nonatomic, int wait)
-{
-	/* prevent preemption and reschedule on another processor */
-	int ret;
-	int me = get_cpu();
-	if (cpu == me) {
-		local_irq_disable();
-		func(info);
-		local_irq_enable();
-		put_cpu();
-		return 0;
-	}
-
-	ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait);
-
-	put_cpu();
-	return ret;
-}
-EXPORT_SYMBOL(smp_call_function_single);
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 8acbf0c..cb34407 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -955,94 +955,24 @@ static void smp_stop_cpu_function(void *dummy)
 		halt();
 }
 
-static DEFINE_SPINLOCK(call_lock);
-
-struct call_data_struct {
-	void (*func) (void *info);
-	void *info;
-	volatile unsigned long started;
-	volatile unsigned long finished;
-	int wait;
-};
-
-static struct call_data_struct *call_data;
-
 /* execute a thread on a new CPU.  The function to be called must be
  * previously set up.  This is used to schedule a function for
  * execution on all CPUs - set up the function then broadcast a
  * function_interrupt CPI to come here on each CPU */
 static void smp_call_function_interrupt(void)
 {
-	void (*func) (void *info) = call_data->func;
-	void *info = call_data->info;
-	/* must take copy of wait because call_data may be replaced
-	 * unless the function is waiting for us to finish */
-	int wait = call_data->wait;
-	__u8 cpu = smp_processor_id();
-
-	/*
-	 * Notify initiating CPU that I've grabbed the data and am
-	 * about to execute the function
-	 */
-	mb();
-	if (!test_and_clear_bit(cpu, &call_data->started)) {
-		/* If the bit wasn't set, this could be a replay */
-		printk(KERN_WARNING "VOYAGER SMP: CPU %d received call funtion"
-		       " with no call pending\n", cpu);
-		return;
-	}
-	/*
-	 * At this point the info structure may be out of scope unless wait==1
-	 */
 	irq_enter();
-	(*func) (info);
+	generic_smp_call_function_interrupt();
 	__get_cpu_var(irq_stat).irq_call_count++;
 	irq_exit();
-	if (wait) {
-		mb();
-		clear_bit(cpu, &call_data->finished);
-	}
 }
 
-static int
-voyager_smp_call_function_mask(cpumask_t cpumask,
-			       void (*func) (void *info), void *info, int wait)
+static void smp_call_function_single_interrupt(void)
 {
-	struct call_data_struct data;
-	u32 mask = cpus_addr(cpumask)[0];
-
-	mask &= ~(1 << smp_processor_id());
-
-	if (!mask)
-		return 0;
-
-	/* Can deadlock when called with interrupts disabled */
-	WARN_ON(irqs_disabled());
-
-	data.func = func;
-	data.info = info;
-	data.started = mask;
-	data.wait = wait;
-	if (wait)
-		data.finished = mask;
-
-	spin_lock(&call_lock);
-	call_data = &data;
-	wmb();
-	/* Send a message to all other CPUs and wait for them to respond */
-	send_CPI(mask, VIC_CALL_FUNCTION_CPI);
-
-	/* Wait for response */
-	while (data.started)
-		barrier();
-
-	if (wait)
-		while (data.finished)
-			barrier();
-
-	spin_unlock(&call_lock);
-
-	return 0;
+	irq_enter();
+	generic_smp_call_function_single_interrupt();
+	__get_cpu_var(irq_stat).irq_call_count++;
+	irq_exit();
 }
 
 /* Sorry about the name.  In an APIC based system, the APICs
@@ -1099,6 +1029,12 @@ void smp_qic_call_function_interrupt(struct pt_regs *regs)
 	smp_call_function_interrupt();
 }
 
+void smp_qic_call_function_single_interrupt(struct pt_regs *regs)
+{
+	ack_QIC_CPI(QIC_CALL_FUNCTION_SINGLE_CPI);
+	smp_call_function_single_interrupt();
+}
+
 void smp_vic_cpi_interrupt(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
@@ -1119,6 +1055,8 @@ void smp_vic_cpi_interrupt(struct pt_regs *regs)
 		smp_enable_irq_interrupt();
 	if (test_and_clear_bit(VIC_CALL_FUNCTION_CPI, &vic_cpi_mailbox[cpu]))
 		smp_call_function_interrupt();
+	if (test_and_clear_bit(VIC_CALL_FUNCTION_SINGLE_CPI, &vic_cpi_mailbox[cpu]))
+		smp_call_function_single_interrupt();
 	set_irq_regs(old_regs);
 }
 
@@ -1862,5 +1800,7 @@ struct smp_ops smp_ops = {
 
 	.smp_send_stop = voyager_smp_send_stop,
 	.smp_send_reschedule = voyager_smp_send_reschedule,
-	.smp_call_function_mask = voyager_smp_call_function_mask,
+
+	.send_call_func_ipi = native_send_call_func_ipi,
+	.send_call_func_single_ipi = native_send_call_func_single_ipi,
 };
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index c8a56e4..e045578 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1123,7 +1123,9 @@ static const struct smp_ops xen_smp_ops __initdata = {
 
 	.smp_send_stop = xen_smp_send_stop,
 	.smp_send_reschedule = xen_smp_send_reschedule,
-	.smp_call_function_mask = xen_smp_call_function_mask,
+
+	.send_call_func_ipi = xen_smp_send_call_function_ipi,
+	.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi,
 };
 #endif	/* CONFIG_SMP */
 
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 126766d..2221332 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -569,7 +569,7 @@ static void drop_mm_ref(struct mm_struct *mm)
 	}
 
 	if (!cpus_empty(mask))
-		xen_smp_call_function_mask(mask, drop_other_mm_ref, mm, 1);
+		smp_call_function_mask(mask, drop_other_mm_ref, mm, 1);
 }
 #else
 static void drop_mm_ref(struct mm_struct *mm)
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 94e6900..2dfe093 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -36,23 +36,11 @@
 #include "mmu.h"
 
 static cpumask_t xen_cpu_initialized_map;
-static DEFINE_PER_CPU(int, resched_irq) = -1;
-static DEFINE_PER_CPU(int, callfunc_irq) = -1;
-static DEFINE_PER_CPU(int, debug_irq) = -1;
-
-/*
- * Structure and data for smp_call_function(). This is designed to minimise
- * static memory requirements. It also looks cleaner.
- */
-static DEFINE_SPINLOCK(call_lock);
 
-struct call_data_struct {
-	void (*func) (void *info);
-	void *info;
-	atomic_t started;
-	atomic_t finished;
-	int wait;
-};
+static DEFINE_PER_CPU(int, resched_irq);
+static DEFINE_PER_CPU(int, callfunc_irq);
+static DEFINE_PER_CPU(int, callfuncsingle_irq);
+static DEFINE_PER_CPU(int, debug_irq) = -1;
 
 static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
 
@@ -122,6 +110,17 @@ static int xen_smp_intr_init(unsigned int cpu)
 		goto fail;
 	per_cpu(debug_irq, cpu) = rc;
 
+	callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", cpu);
+	rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_SINGLE_VECTOR,
+				    cpu,
+				    xen_call_function_single_interrupt,
+				    IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
+				    callfunc_name,
+				    NULL);
+	if (rc < 0)
+		goto fail;
+	per_cpu(callfuncsingle_irq, cpu) = rc;
+
 	return 0;
 
  fail:
@@ -131,6 +130,9 @@ static int xen_smp_intr_init(unsigned int cpu)
 		unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
 	if (per_cpu(debug_irq, cpu) >= 0)
 		unbind_from_irqhandler(per_cpu(debug_irq, cpu), NULL);
+	if (per_cpu(callfuncsingle_irq, cpu) >= 0)
+		unbind_from_irqhandler(per_cpu(callfuncsingle_irq, cpu), NULL);
+
 	return rc;
 }
 
@@ -338,7 +340,6 @@ void xen_smp_send_reschedule(int cpu)
 	xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR);
 }
 
-
 static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector)
 {
 	unsigned cpu;
@@ -349,83 +350,32 @@ static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector)
 		xen_send_IPI_one(cpu, vector);
 }
 
+void xen_smp_send_call_function_ipi(cpumask_t mask)
+{
+	xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
+}
+
+void xen_smp_send_call_function_single_ipi(int cpu)
+{
+	xen_send_IPI_mask(cpumask_of_cpu(cpu), XEN_CALL_FUNCTION_SINGLE_VECTOR);
+}
+
 static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
 {
-	void (*func) (void *info) = call_data->func;
-	void *info = call_data->info;
-	int wait = call_data->wait;
-
-	/*
-	 * Notify initiating CPU that I've grabbed the data and am
-	 * about to execute the function
-	 */
-	mb();
-	atomic_inc(&call_data->started);
-	/*
-	 * At this point the info structure may be out of scope unless wait==1
-	 */
 	irq_enter();
-	(*func)(info);
+	generic_smp_call_function_interrupt();
 	__get_cpu_var(irq_stat).irq_call_count++;
 	irq_exit();
 
-	if (wait) {
-		mb();		/* commit everything before setting finished */
-		atomic_inc(&call_data->finished);
-	}
-
 	return IRQ_HANDLED;
 }
 
-int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *),
-			       void *info, int wait)
+static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id)
 {
-	struct call_data_struct data;
-	int cpus, cpu;
-	bool yield;
-
-	/* Holding any lock stops cpus from going down. */
-	spin_lock(&call_lock);
-
-	cpu_clear(smp_processor_id(), mask);
-
-	cpus = cpus_weight(mask);
-	if (!cpus) {
-		spin_unlock(&call_lock);
-		return 0;
-	}
-
-	/* Can deadlock when called with interrupts disabled */
-	WARN_ON(irqs_disabled());
-
-	data.func = func;
-	data.info = info;
-	atomic_set(&data.started, 0);
-	data.wait = wait;
-	if (wait)
-		atomic_set(&data.finished, 0);
-
-	call_data = &data;
-	mb();			/* write everything before IPI */
-
-	/* Send a message to other CPUs and wait for them to respond */
-	xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
-
-	/* Make sure other vcpus get a chance to run if they need to. */
-	yield = false;
-	for_each_cpu_mask(cpu, mask)
-		if (xen_vcpu_stolen(cpu))
-			yield = true;
-
-	if (yield)
-		HYPERVISOR_sched_op(SCHEDOP_yield, 0);
-
-	/* Wait for response */
-	while (atomic_read(&data.started) != cpus ||
-	       (wait && atomic_read(&data.finished) != cpus))
-		cpu_relax();
-
-	spin_unlock(&call_lock);
+	irq_enter();
+	generic_smp_call_function_single_interrupt();
+	__get_cpu_var(irq_stat).irq_call_count++;
+	irq_exit();
 
-	return 0;
+	return IRQ_HANDLED;
 }
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index f1063ae..a636ab5 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -46,13 +46,8 @@ void xen_smp_cpus_done(unsigned int max_cpus);
 
 void xen_smp_send_stop(void);
 void xen_smp_send_reschedule(int cpu);
-int xen_smp_call_function (void (*func) (void *info), void *info, int nonatomic,
-			   int wait);
-int xen_smp_call_function_single(int cpu, void (*func) (void *info), void *info,
-				 int nonatomic, int wait);
-
-int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *),
-			       void *info, int wait);
+void xen_smp_send_call_function_ipi(cpumask_t mask);
+void xen_smp_send_call_function_single_ipi(int cpu);
 
 
 /* Declare an asm function, along with symbols needed to make it
diff --git a/include/asm-x86/hw_irq_32.h b/include/asm-x86/hw_irq_32.h
index ea88054..a87b132 100644
--- a/include/asm-x86/hw_irq_32.h
+++ b/include/asm-x86/hw_irq_32.h
@@ -32,6 +32,7 @@ extern void (*const interrupt[NR_IRQS])(void);
 void reschedule_interrupt(void);
 void invalidate_interrupt(void);
 void call_function_interrupt(void);
+void call_function_single_interrupt(void);
 #endif
 
 #ifdef CONFIG_X86_LOCAL_APIC
diff --git a/include/asm-x86/hw_irq_64.h b/include/asm-x86/hw_irq_64.h
index 0062ef3..fe65781 100644
--- a/include/asm-x86/hw_irq_64.h
+++ b/include/asm-x86/hw_irq_64.h
@@ -68,6 +68,7 @@
 #define ERROR_APIC_VECTOR	0xfe
 #define RESCHEDULE_VECTOR	0xfd
 #define CALL_FUNCTION_VECTOR	0xfc
+#define	CALL_FUNCTION_SINGLE_VECTOR	0xfb
 /* fb free - please don't readd KDB here because it's useless
    (hint - think what a NMI bit does to a vector) */
 #define THERMAL_APIC_VECTOR	0xfa
@@ -102,6 +103,7 @@ void spurious_interrupt(void);
 void error_interrupt(void);
 void reschedule_interrupt(void);
 void call_function_interrupt(void);
+void call_function_single_interrupt(void);
 void irq_move_cleanup_interrupt(void);
 void invalidate_interrupt0(void);
 void invalidate_interrupt1(void);
diff --git a/include/asm-x86/mach-default/entry_arch.h b/include/asm-x86/mach-default/entry_arch.h
index bc86146..9283b60 100644
--- a/include/asm-x86/mach-default/entry_arch.h
+++ b/include/asm-x86/mach-default/entry_arch.h
@@ -13,6 +13,7 @@
 BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
 BUILD_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR)
 BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
+BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
 #endif
 
 /*
diff --git a/include/asm-x86/mach-default/irq_vectors.h b/include/asm-x86/mach-default/irq_vectors.h
index 881c63c..ed7d495 100644
--- a/include/asm-x86/mach-default/irq_vectors.h
+++ b/include/asm-x86/mach-default/irq_vectors.h
@@ -48,6 +48,7 @@
 #define INVALIDATE_TLB_VECTOR	0xfd
 #define RESCHEDULE_VECTOR	0xfc
 #define CALL_FUNCTION_VECTOR	0xfb
+#define CALL_FUNCTION_SINGLE_VECTOR	0xfa
 
 #define THERMAL_APIC_VECTOR	0xf0
 /*
diff --git a/include/asm-x86/mach-voyager/entry_arch.h b/include/asm-x86/mach-voyager/entry_arch.h
index 4a1e1e8..ae52624 100644
--- a/include/asm-x86/mach-voyager/entry_arch.h
+++ b/include/asm-x86/mach-voyager/entry_arch.h
@@ -23,4 +23,4 @@ BUILD_INTERRUPT(qic_invalidate_interrupt, QIC_INVALIDATE_CPI);
 BUILD_INTERRUPT(qic_reschedule_interrupt, QIC_RESCHEDULE_CPI);
 BUILD_INTERRUPT(qic_enable_irq_interrupt, QIC_ENABLE_IRQ_CPI);
 BUILD_INTERRUPT(qic_call_function_interrupt, QIC_CALL_FUNCTION_CPI);
-
+BUILD_INTERRUPT(qic_call_function_single_interrupt, QIC_CALL_FUNCTION_SINGLE_CPI);
diff --git a/include/asm-x86/mach-voyager/irq_vectors.h b/include/asm-x86/mach-voyager/irq_vectors.h
index 165421f..64e47f6 100644
--- a/include/asm-x86/mach-voyager/irq_vectors.h
+++ b/include/asm-x86/mach-voyager/irq_vectors.h
@@ -33,6 +33,7 @@
 #define VIC_RESCHEDULE_CPI		4
 #define VIC_ENABLE_IRQ_CPI		5
 #define VIC_CALL_FUNCTION_CPI		6
+#define VIC_CALL_FUNCTION_SINGLE_CPI	7
 
 /* Now the QIC CPIs:  Since we don't need the two initial levels,
  * these are 2 less than the VIC CPIs */
@@ -42,9 +43,10 @@
 #define QIC_RESCHEDULE_CPI		(VIC_RESCHEDULE_CPI - QIC_CPI_OFFSET)
 #define QIC_ENABLE_IRQ_CPI		(VIC_ENABLE_IRQ_CPI - QIC_CPI_OFFSET)
 #define QIC_CALL_FUNCTION_CPI		(VIC_CALL_FUNCTION_CPI - QIC_CPI_OFFSET)
+#define QIC_CALL_FUNCTION_CPI		(VIC_CALL_FUNCTION_SINGLE_CPI - QIC_CPI_OFFSET)
 
 #define VIC_START_FAKE_CPI		VIC_TIMER_CPI
-#define VIC_END_FAKE_CPI		VIC_CALL_FUNCTION_CPI
+#define VIC_END_FAKE_CPI		VIC_CALL_FUNCTION_SINGLE_CPI
 
 /* this is the SYS_INT CPI. */
 #define VIC_SYS_INT			8
diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h
index 1ebaa5c..bba35a1 100644
--- a/include/asm-x86/smp.h
+++ b/include/asm-x86/smp.h
@@ -59,9 +59,9 @@ struct smp_ops {
 
 	void (*smp_send_stop)(void);
 	void (*smp_send_reschedule)(int cpu);
-	int (*smp_call_function_mask)(cpumask_t mask,
-				      void (*func)(void *info), void *info,
-				      int wait);
+
+	void (*send_call_func_ipi)(cpumask_t mask);
+	void (*send_call_func_single_ipi)(int cpu);
 };
 
 /* Globals due to paravirt */
@@ -103,17 +103,22 @@ static inline void smp_send_reschedule(int cpu)
 	smp_ops.smp_send_reschedule(cpu);
 }
 
-static inline int smp_call_function_mask(cpumask_t mask,
-					 void (*func) (void *info), void *info,
-					 int wait)
+static inline void arch_send_call_function_single_ipi(int cpu)
+{
+	smp_ops.send_call_func_single_ipi(cpu);
+}
+
+static inline void arch_send_call_function_ipi(cpumask_t mask)
 {
-	return smp_ops.smp_call_function_mask(mask, func, info, wait);
+	smp_ops.send_call_func_ipi(mask);
 }
 
 void native_smp_prepare_boot_cpu(void);
 void native_smp_prepare_cpus(unsigned int max_cpus);
 void native_smp_cpus_done(unsigned int max_cpus);
 int native_cpu_up(unsigned int cpunum);
+void native_send_call_func_ipi(cpumask_t mask);
+void native_send_call_func_single_ipi(int cpu);
 
 extern int __cpu_disable(void);
 extern void __cpu_die(unsigned int cpu);
-- 
1.5.5.1.57.g5909c


^ permalink raw reply related	[flat|nested] 54+ messages in thread

* Re: [PATCH 2/10] x86: convert to generic helpers for IPI function calls
       [not found]     ` <1209453990-7735-3-git-send-email-jens.axboe-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
@ 2008-04-29 20:35       ` Jeremy Fitzhardinge
  2008-04-29 20:35         ` Jeremy Fitzhardinge
       [not found]         ` <481786A5.7010604-TSDbQ3PG+2Y@public.gmane.org>
  2008-04-30 21:39       ` Jeremy Fitzhardinge
  1 sibling, 2 replies; 54+ messages in thread
From: Jeremy Fitzhardinge @ 2008-04-29 20:35 UTC (permalink / raw)
  To: Jens Axboe
  Cc: linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	peterz-wEGCiKHe2LqWVfeAwA7xHQ, npiggin-l3A5Bk7waGM,
	linux-arch-u79uwXL29TY76Z2rM5mHXA, mingo-X9Un+BFzKDI,
	paulmck-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8

Jens Axboe wrote:
> -int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *),
> -			       void *info, int wait)
>   
[...]
> -	/* Send a message to other CPUs and wait for them to respond */
> -	xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
> -
> -	/* Make sure other vcpus get a chance to run if they need to. */
> -	yield = false;
> -	for_each_cpu_mask(cpu, mask)
> -		if (xen_vcpu_stolen(cpu))
> -			yield = true;
> -
> -	if (yield)
> -		HYPERVISOR_sched_op(SCHEDOP_yield, 0);
>   

I added this to deal with the case where you're sending an IPI to 
another VCPU which isn't currently running on a real cpu.  In this case 
you could end up spinning while the other VCPU is waiting for a real CPU 
to run on.  (Basically the same problem that spinlocks have in a virtual 
environment.)

However, this is at best a partial solution to the problem, and I never 
benchmarked if it really makes a difference.  Since any other virtual 
environment would have the same problem, its best if we can solve it 
generically.  (Of course a synchronous single-target cross-cpu call is a 
simple cross-cpu rpc, which could be implemented very efficiently in the 
host/hypervisor by simply doing a vcpu context switch...)

    J

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 2/10] x86: convert to generic helpers for IPI function calls
  2008-04-29 20:35       ` Jeremy Fitzhardinge
@ 2008-04-29 20:35         ` Jeremy Fitzhardinge
       [not found]         ` <481786A5.7010604-TSDbQ3PG+2Y@public.gmane.org>
  1 sibling, 0 replies; 54+ messages in thread
From: Jeremy Fitzhardinge @ 2008-04-29 20:35 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-kernel, peterz, npiggin, linux-arch, mingo, paulmck

Jens Axboe wrote:
> -int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *),
> -			       void *info, int wait)
>   
[...]
> -	/* Send a message to other CPUs and wait for them to respond */
> -	xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
> -
> -	/* Make sure other vcpus get a chance to run if they need to. */
> -	yield = false;
> -	for_each_cpu_mask(cpu, mask)
> -		if (xen_vcpu_stolen(cpu))
> -			yield = true;
> -
> -	if (yield)
> -		HYPERVISOR_sched_op(SCHEDOP_yield, 0);
>   

I added this to deal with the case where you're sending an IPI to 
another VCPU which isn't currently running on a real cpu.  In this case 
you could end up spinning while the other VCPU is waiting for a real CPU 
to run on.  (Basically the same problem that spinlocks have in a virtual 
environment.)

However, this is at best a partial solution to the problem, and I never 
benchmarked if it really makes a difference.  Since any other virtual 
environment would have the same problem, its best if we can solve it 
generically.  (Of course a synchronous single-target cross-cpu call is a 
simple cross-cpu rpc, which could be implemented very efficiently in the 
host/hypervisor by simply doing a vcpu context switch...)

    J

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 2/10] x86: convert to generic helpers for IPI function  calls
       [not found]         ` <481786A5.7010604-TSDbQ3PG+2Y@public.gmane.org>
@ 2008-04-30 11:35           ` Jens Axboe
  2008-04-30 11:35             ` Jens Axboe
       [not found]             ` <20080430113542.GZ12774-tSWWG44O7X1aa/9Udqfwiw@public.gmane.org>
  0 siblings, 2 replies; 54+ messages in thread
From: Jens Axboe @ 2008-04-30 11:35 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	peterz-wEGCiKHe2LqWVfeAwA7xHQ, npiggin-l3A5Bk7waGM,
	linux-arch-u79uwXL29TY76Z2rM5mHXA, mingo-X9Un+BFzKDI,
	paulmck-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8

On Tue, Apr 29 2008, Jeremy Fitzhardinge wrote:
> Jens Axboe wrote:
> >-int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *),
> >-			       void *info, int wait)
> >  
> [...]
> >-	/* Send a message to other CPUs and wait for them to respond */
> >-	xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
> >-
> >-	/* Make sure other vcpus get a chance to run if they need to. */
> >-	yield = false;
> >-	for_each_cpu_mask(cpu, mask)
> >-		if (xen_vcpu_stolen(cpu))
> >-			yield = true;
> >-
> >-	if (yield)
> >-		HYPERVISOR_sched_op(SCHEDOP_yield, 0);
> >  
> 
> I added this to deal with the case where you're sending an IPI to 
> another VCPU which isn't currently running on a real cpu.  In this case 
> you could end up spinning while the other VCPU is waiting for a real CPU 
> to run on.  (Basically the same problem that spinlocks have in a virtual 
> environment.)
> 
> However, this is at best a partial solution to the problem, and I never 
> benchmarked if it really makes a difference.  Since any other virtual 
> environment would have the same problem, its best if we can solve it 
> generically.  (Of course a synchronous single-target cross-cpu call is a 
> simple cross-cpu rpc, which could be implemented very efficiently in the 
> host/hypervisor by simply doing a vcpu context switch...)

So, what would your advice be? Seems safe enough to ignore for now and
attack it if it becomes a real problem.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 2/10] x86: convert to generic helpers for IPI function  calls
  2008-04-30 11:35           ` Jens Axboe
@ 2008-04-30 11:35             ` Jens Axboe
       [not found]             ` <20080430113542.GZ12774-tSWWG44O7X1aa/9Udqfwiw@public.gmane.org>
  1 sibling, 0 replies; 54+ messages in thread
From: Jens Axboe @ 2008-04-30 11:35 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: linux-kernel, peterz, npiggin, linux-arch, mingo, paulmck

On Tue, Apr 29 2008, Jeremy Fitzhardinge wrote:
> Jens Axboe wrote:
> >-int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *),
> >-			       void *info, int wait)
> >  
> [...]
> >-	/* Send a message to other CPUs and wait for them to respond */
> >-	xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
> >-
> >-	/* Make sure other vcpus get a chance to run if they need to. */
> >-	yield = false;
> >-	for_each_cpu_mask(cpu, mask)
> >-		if (xen_vcpu_stolen(cpu))
> >-			yield = true;
> >-
> >-	if (yield)
> >-		HYPERVISOR_sched_op(SCHEDOP_yield, 0);
> >  
> 
> I added this to deal with the case where you're sending an IPI to 
> another VCPU which isn't currently running on a real cpu.  In this case 
> you could end up spinning while the other VCPU is waiting for a real CPU 
> to run on.  (Basically the same problem that spinlocks have in a virtual 
> environment.)
> 
> However, this is at best a partial solution to the problem, and I never 
> benchmarked if it really makes a difference.  Since any other virtual 
> environment would have the same problem, its best if we can solve it 
> generically.  (Of course a synchronous single-target cross-cpu call is a 
> simple cross-cpu rpc, which could be implemented very efficiently in the 
> host/hypervisor by simply doing a vcpu context switch...)

So, what would your advice be? Seems safe enough to ignore for now and
attack it if it becomes a real problem.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 2/10] x86: convert to generic helpers for IPI function calls
       [not found]             ` <20080430113542.GZ12774-tSWWG44O7X1aa/9Udqfwiw@public.gmane.org>
@ 2008-04-30 12:20               ` Paul E. McKenney
  2008-04-30 12:20                 ` Paul E. McKenney
       [not found]                 ` <20080430122001.GS11126-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
  0 siblings, 2 replies; 54+ messages in thread
From: Paul E. McKenney @ 2008-04-30 12:20 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Jeremy Fitzhardinge, linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	peterz-wEGCiKHe2LqWVfeAwA7xHQ, npiggin-l3A5Bk7waGM,
	linux-arch-u79uwXL29TY76Z2rM5mHXA, mingo-X9Un+BFzKDI

On Wed, Apr 30, 2008 at 01:35:42PM +0200, Jens Axboe wrote:
> On Tue, Apr 29 2008, Jeremy Fitzhardinge wrote:
> > Jens Axboe wrote:
> > >-int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *),
> > >-			       void *info, int wait)
> > >  
> > [...]
> > >-	/* Send a message to other CPUs and wait for them to respond */
> > >-	xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
> > >-
> > >-	/* Make sure other vcpus get a chance to run if they need to. */
> > >-	yield = false;
> > >-	for_each_cpu_mask(cpu, mask)
> > >-		if (xen_vcpu_stolen(cpu))
> > >-			yield = true;
> > >-
> > >-	if (yield)
> > >-		HYPERVISOR_sched_op(SCHEDOP_yield, 0);
> > >  
> > 
> > I added this to deal with the case where you're sending an IPI to 
> > another VCPU which isn't currently running on a real cpu.  In this case 
> > you could end up spinning while the other VCPU is waiting for a real CPU 
> > to run on.  (Basically the same problem that spinlocks have in a virtual 
> > environment.)
> > 
> > However, this is at best a partial solution to the problem, and I never 
> > benchmarked if it really makes a difference.  Since any other virtual 
> > environment would have the same problem, its best if we can solve it 
> > generically.  (Of course a synchronous single-target cross-cpu call is a 
> > simple cross-cpu rpc, which could be implemented very efficiently in the 
> > host/hypervisor by simply doing a vcpu context switch...)
> 
> So, what would your advice be? Seems safe enough to ignore for now and
> attack it if it becomes a real problem.

How about an arch-specific function/macro invoked in the spin loop?
The generic implementation would do nothing, but things like Xen
could implement as above.

						Thanx, Paul

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 2/10] x86: convert to generic helpers for IPI function calls
  2008-04-30 12:20               ` Paul E. McKenney
@ 2008-04-30 12:20                 ` Paul E. McKenney
       [not found]                 ` <20080430122001.GS11126-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
  1 sibling, 0 replies; 54+ messages in thread
From: Paul E. McKenney @ 2008-04-30 12:20 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Jeremy Fitzhardinge, linux-kernel, peterz, npiggin, linux-arch,
	mingo

On Wed, Apr 30, 2008 at 01:35:42PM +0200, Jens Axboe wrote:
> On Tue, Apr 29 2008, Jeremy Fitzhardinge wrote:
> > Jens Axboe wrote:
> > >-int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *),
> > >-			       void *info, int wait)
> > >  
> > [...]
> > >-	/* Send a message to other CPUs and wait for them to respond */
> > >-	xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
> > >-
> > >-	/* Make sure other vcpus get a chance to run if they need to. */
> > >-	yield = false;
> > >-	for_each_cpu_mask(cpu, mask)
> > >-		if (xen_vcpu_stolen(cpu))
> > >-			yield = true;
> > >-
> > >-	if (yield)
> > >-		HYPERVISOR_sched_op(SCHEDOP_yield, 0);
> > >  
> > 
> > I added this to deal with the case where you're sending an IPI to 
> > another VCPU which isn't currently running on a real cpu.  In this case 
> > you could end up spinning while the other VCPU is waiting for a real CPU 
> > to run on.  (Basically the same problem that spinlocks have in a virtual 
> > environment.)
> > 
> > However, this is at best a partial solution to the problem, and I never 
> > benchmarked if it really makes a difference.  Since any other virtual 
> > environment would have the same problem, its best if we can solve it 
> > generically.  (Of course a synchronous single-target cross-cpu call is a 
> > simple cross-cpu rpc, which could be implemented very efficiently in the 
> > host/hypervisor by simply doing a vcpu context switch...)
> 
> So, what would your advice be? Seems safe enough to ignore for now and
> attack it if it becomes a real problem.

How about an arch-specific function/macro invoked in the spin loop?
The generic implementation would do nothing, but things like Xen
could implement as above.

						Thanx, Paul

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 2/10] x86: convert to generic helpers for IPI function calls
       [not found]                 ` <20080430122001.GS11126-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
@ 2008-04-30 12:31                   ` Jens Axboe
  2008-04-30 12:31                     ` Jens Axboe
       [not found]                     ` <20080430123136.GB12774-tSWWG44O7X1aa/9Udqfwiw@public.gmane.org>
  0 siblings, 2 replies; 54+ messages in thread
From: Jens Axboe @ 2008-04-30 12:31 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: Jeremy Fitzhardinge, linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	peterz-wEGCiKHe2LqWVfeAwA7xHQ, npiggin-l3A5Bk7waGM,
	linux-arch-u79uwXL29TY76Z2rM5mHXA, mingo-X9Un+BFzKDI

On Wed, Apr 30 2008, Paul E. McKenney wrote:
> On Wed, Apr 30, 2008 at 01:35:42PM +0200, Jens Axboe wrote:
> > On Tue, Apr 29 2008, Jeremy Fitzhardinge wrote:
> > > Jens Axboe wrote:
> > > >-int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *),
> > > >-			       void *info, int wait)
> > > >  
> > > [...]
> > > >-	/* Send a message to other CPUs and wait for them to respond */
> > > >-	xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
> > > >-
> > > >-	/* Make sure other vcpus get a chance to run if they need to. */
> > > >-	yield = false;
> > > >-	for_each_cpu_mask(cpu, mask)
> > > >-		if (xen_vcpu_stolen(cpu))
> > > >-			yield = true;
> > > >-
> > > >-	if (yield)
> > > >-		HYPERVISOR_sched_op(SCHEDOP_yield, 0);
> > > >  
> > > 
> > > I added this to deal with the case where you're sending an IPI to 
> > > another VCPU which isn't currently running on a real cpu.  In this case 
> > > you could end up spinning while the other VCPU is waiting for a real CPU 
> > > to run on.  (Basically the same problem that spinlocks have in a virtual 
> > > environment.)
> > > 
> > > However, this is at best a partial solution to the problem, and I never 
> > > benchmarked if it really makes a difference.  Since any other virtual 
> > > environment would have the same problem, its best if we can solve it 
> > > generically.  (Of course a synchronous single-target cross-cpu call is a 
> > > simple cross-cpu rpc, which could be implemented very efficiently in the 
> > > host/hypervisor by simply doing a vcpu context switch...)
> > 
> > So, what would your advice be? Seems safe enough to ignore for now and
> > attack it if it becomes a real problem.
> 
> How about an arch-specific function/macro invoked in the spin loop?
> The generic implementation would do nothing, but things like Xen
> could implement as above.

Xen could just stuff that bit into its arch_send_call_function_ipi(),
something like the below should be fine. My question to Jeremy was more
of the order of whether it should be kept or not, I guess it's safer to
just keep it and retain the existing behaviour (and let Jeremy/others
evaluate it at will later on). Note that I got rid of the yield bool and
break when we called the hypervisor.

Jeremy, shall I add this?

diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 2dfe093..064e6dc 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -352,7 +352,17 @@ static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector)
 
 void xen_smp_send_call_function_ipi(cpumask_t mask)
 {
+	int cpu;
+
 	xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
+
+	/* Make sure other vcpus get a chance to run if they need to. */
+	for_each_cpu_mask(cpu, mask) {
+		if (xen_vcpu_stolen(cpu)) {
+			HYPERVISOR_sched_op(SCHEDOP_yield, 0);
+			break;
+		}
+	}
 }
 
 void xen_smp_send_call_function_single_ipi(int cpu)

-- 
Jens Axboe

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* Re: [PATCH 2/10] x86: convert to generic helpers for IPI function calls
  2008-04-30 12:31                   ` Jens Axboe
@ 2008-04-30 12:31                     ` Jens Axboe
       [not found]                     ` <20080430123136.GB12774-tSWWG44O7X1aa/9Udqfwiw@public.gmane.org>
  1 sibling, 0 replies; 54+ messages in thread
From: Jens Axboe @ 2008-04-30 12:31 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: Jeremy Fitzhardinge, linux-kernel, peterz, npiggin, linux-arch,
	mingo

On Wed, Apr 30 2008, Paul E. McKenney wrote:
> On Wed, Apr 30, 2008 at 01:35:42PM +0200, Jens Axboe wrote:
> > On Tue, Apr 29 2008, Jeremy Fitzhardinge wrote:
> > > Jens Axboe wrote:
> > > >-int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *),
> > > >-			       void *info, int wait)
> > > >  
> > > [...]
> > > >-	/* Send a message to other CPUs and wait for them to respond */
> > > >-	xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
> > > >-
> > > >-	/* Make sure other vcpus get a chance to run if they need to. */
> > > >-	yield = false;
> > > >-	for_each_cpu_mask(cpu, mask)
> > > >-		if (xen_vcpu_stolen(cpu))
> > > >-			yield = true;
> > > >-
> > > >-	if (yield)
> > > >-		HYPERVISOR_sched_op(SCHEDOP_yield, 0);
> > > >  
> > > 
> > > I added this to deal with the case where you're sending an IPI to 
> > > another VCPU which isn't currently running on a real cpu.  In this case 
> > > you could end up spinning while the other VCPU is waiting for a real CPU 
> > > to run on.  (Basically the same problem that spinlocks have in a virtual 
> > > environment.)
> > > 
> > > However, this is at best a partial solution to the problem, and I never 
> > > benchmarked if it really makes a difference.  Since any other virtual 
> > > environment would have the same problem, its best if we can solve it 
> > > generically.  (Of course a synchronous single-target cross-cpu call is a 
> > > simple cross-cpu rpc, which could be implemented very efficiently in the 
> > > host/hypervisor by simply doing a vcpu context switch...)
> > 
> > So, what would your advice be? Seems safe enough to ignore for now and
> > attack it if it becomes a real problem.
> 
> How about an arch-specific function/macro invoked in the spin loop?
> The generic implementation would do nothing, but things like Xen
> could implement as above.

Xen could just stuff that bit into its arch_send_call_function_ipi(),
something like the below should be fine. My question to Jeremy was more
of the order of whether it should be kept or not, I guess it's safer to
just keep it and retain the existing behaviour (and let Jeremy/others
evaluate it at will later on). Note that I got rid of the yield bool and
break when we called the hypervisor.

Jeremy, shall I add this?

diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 2dfe093..064e6dc 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -352,7 +352,17 @@ static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector)
 
 void xen_smp_send_call_function_ipi(cpumask_t mask)
 {
+	int cpu;
+
 	xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
+
+	/* Make sure other vcpus get a chance to run if they need to. */
+	for_each_cpu_mask(cpu, mask) {
+		if (xen_vcpu_stolen(cpu)) {
+			HYPERVISOR_sched_op(SCHEDOP_yield, 0);
+			break;
+		}
+	}
 }
 
 void xen_smp_send_call_function_single_ipi(int cpu)

-- 
Jens Axboe


^ permalink raw reply related	[flat|nested] 54+ messages in thread

* Re: [PATCH 2/10] x86: convert to generic helpers for IPI function calls
       [not found]                     ` <20080430123136.GB12774-tSWWG44O7X1aa/9Udqfwiw@public.gmane.org>
@ 2008-04-30 14:51                       ` Jeremy Fitzhardinge
  2008-04-30 14:51                         ` Jeremy Fitzhardinge
  0 siblings, 1 reply; 54+ messages in thread
From: Jeremy Fitzhardinge @ 2008-04-30 14:51 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Paul E. McKenney, linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	peterz-wEGCiKHe2LqWVfeAwA7xHQ, npiggin-l3A5Bk7waGM,
	linux-arch-u79uwXL29TY76Z2rM5mHXA, mingo-X9Un+BFzKDI

Jens Axboe wrote:
> Xen could just stuff that bit into its arch_send_call_function_ipi(),
> something like the below should be fine. My question to Jeremy was more
> of the order of whether it should be kept or not, I guess it's safer to
> just keep it and retain the existing behaviour (and let Jeremy/others
> evaluate it at will later on). Note that I got rid of the yield bool and
> break when we called the hypervisor.
>   

Yes, it's a nice cleanup.

> Jeremy, shall I add this?
>   

Hold off for now.  Given that its effects are unmeasured, I'm not even 
sure its the right thing to do.  For example, it will yield if you're 
sending an IPI to a vcpu which wants to run but can't, but does nothing 
for an idle vcpu.  And always yielding may be a performance problem if 
the IPI doesn't involve any cpu contention.

It's easy to add back if it turns out to be useful.

    J

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 2/10] x86: convert to generic helpers for IPI function calls
  2008-04-30 14:51                       ` Jeremy Fitzhardinge
@ 2008-04-30 14:51                         ` Jeremy Fitzhardinge
  0 siblings, 0 replies; 54+ messages in thread
From: Jeremy Fitzhardinge @ 2008-04-30 14:51 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Paul E. McKenney, linux-kernel, peterz, npiggin, linux-arch,
	mingo

Jens Axboe wrote:
> Xen could just stuff that bit into its arch_send_call_function_ipi(),
> something like the below should be fine. My question to Jeremy was more
> of the order of whether it should be kept or not, I guess it's safer to
> just keep it and retain the existing behaviour (and let Jeremy/others
> evaluate it at will later on). Note that I got rid of the yield bool and
> break when we called the hypervisor.
>   

Yes, it's a nice cleanup.

> Jeremy, shall I add this?
>   

Hold off for now.  Given that its effects are unmeasured, I'm not even 
sure its the right thing to do.  For example, it will yield if you're 
sending an IPI to a vcpu which wants to run but can't, but does nothing 
for an idle vcpu.  And always yielding may be a performance problem if 
the IPI doesn't involve any cpu contention.

It's easy to add back if it turns out to be useful.

    J

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 2/10] x86: convert to generic helpers for IPI function calls
       [not found]     ` <1209453990-7735-3-git-send-email-jens.axboe-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
  2008-04-29 20:35       ` Jeremy Fitzhardinge
@ 2008-04-30 21:39       ` Jeremy Fitzhardinge
  2008-04-30 21:39         ` Jeremy Fitzhardinge
  1 sibling, 1 reply; 54+ messages in thread
From: Jeremy Fitzhardinge @ 2008-04-30 21:39 UTC (permalink / raw)
  To: Jens Axboe
  Cc: linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	peterz-wEGCiKHe2LqWVfeAwA7xHQ, npiggin-l3A5Bk7waGM,
	linux-arch-u79uwXL29TY76Z2rM5mHXA, mingo-X9Un+BFzKDI,
	paulmck-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8

Jens Axboe wrote:
> This converts x86, x86-64, and xen to use the new helpers for
> smp_call_function() and friends, and adds support for
> smp_call_function_single().
>   
Seems to work fine with this:

Subject: xen: build fix for generic IPI stuff

Not even one CONFIG_XEN test build?

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge-Sxgqhf6Nn4DQT0dZR+AlfA@public.gmane.org>
---
 arch/x86/xen/smp.c           |    3 +--
 include/asm-x86/xen/events.h |    1 +
 2 files changed, 2 insertions(+), 2 deletions(-)

===================================================================
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -43,8 +43,7 @@
 static DEFINE_PER_CPU(int, debug_irq) = -1;
 
 static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
-
-static struct call_data_struct *call_data;
+static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
 
 /*
  * Reschedule call back. Nothing to do,
===================================================================
--- a/include/asm-x86/xen/events.h
+++ b/include/asm-x86/xen/events.h
@@ -4,6 +4,7 @@
 enum ipi_vector {
 	XEN_RESCHEDULE_VECTOR,
 	XEN_CALL_FUNCTION_VECTOR,
+	XEN_CALL_FUNCTION_SINGLE_VECTOR,
 
 	XEN_NR_IPIS,
 };

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 2/10] x86: convert to generic helpers for IPI function calls
  2008-04-30 21:39       ` Jeremy Fitzhardinge
@ 2008-04-30 21:39         ` Jeremy Fitzhardinge
  0 siblings, 0 replies; 54+ messages in thread
From: Jeremy Fitzhardinge @ 2008-04-30 21:39 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-kernel, peterz, npiggin, linux-arch, mingo, paulmck

Jens Axboe wrote:
> This converts x86, x86-64, and xen to use the new helpers for
> smp_call_function() and friends, and adds support for
> smp_call_function_single().
>   
Seems to work fine with this:

Subject: xen: build fix for generic IPI stuff

Not even one CONFIG_XEN test build?

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/smp.c           |    3 +--
 include/asm-x86/xen/events.h |    1 +
 2 files changed, 2 insertions(+), 2 deletions(-)

===================================================================
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -43,8 +43,7 @@
 static DEFINE_PER_CPU(int, debug_irq) = -1;
 
 static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
-
-static struct call_data_struct *call_data;
+static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
 
 /*
  * Reschedule call back. Nothing to do,
===================================================================
--- a/include/asm-x86/xen/events.h
+++ b/include/asm-x86/xen/events.h
@@ -4,6 +4,7 @@
 enum ipi_vector {
 	XEN_RESCHEDULE_VECTOR,
 	XEN_CALL_FUNCTION_VECTOR,
+	XEN_CALL_FUNCTION_SINGLE_VECTOR,
 
 	XEN_NR_IPIS,
 };



^ permalink raw reply	[flat|nested] 54+ messages in thread

* [PATCH 0/10] Add generic helpers for arch IPI function calls #4
@ 2008-05-29  8:58 Jens Axboe
  2008-05-29  8:58 ` [PATCH 1/10] Add generic helpers for arch IPI function calls Jens Axboe
                   ` (10 more replies)
  0 siblings, 11 replies; 54+ messages in thread
From: Jens Axboe @ 2008-05-29  8:58 UTC (permalink / raw)
  To: linux-kernel; +Cc: peterz, npiggin, linux-arch, jeremy, mingo, paulmck

Hi,

Jeremy sent me a ping about this patchset, so here's an updated
version against the current kernel (2.6.26-rc4'ish). Changes since
last post:

- Incorporate Pauls changes and suggestions to make
  smp_call_function_single() both cleaner and (hopefull) deadlock free
  within the restrictions set.
- Fix various compile problems with some archs and .config configurations.

Not really a lot of churn, it's mostly solid and works well on the
archs that I test (x86, x86-64, ppc) and I know that it works well
on ia64 as well.

The nice diffstat is below, enjoy.

 arch/Kconfig                               |    3 
 arch/alpha/Kconfig                         |    1 
 arch/alpha/kernel/core_marvel.c            |    6 
 arch/alpha/kernel/smp.c                    |  170 ---------
 arch/arm/Kconfig                           |    1 
 arch/arm/kernel/smp.c                      |  157 ---------
 arch/ia64/Kconfig                          |    1 
 arch/ia64/kernel/smp.c                     |  242 --------------
 arch/m32r/Kconfig                          |    1 
 arch/m32r/kernel/m32r_ksyms.c              |    3 
 arch/m32r/kernel/smp.c                     |  128 -------
 arch/m32r/kernel/traps.c                   |    3 
 arch/mips/Kconfig                          |    1 
 arch/mips/kernel/smp.c                     |  139 --------
 arch/mips/kernel/smtc.c                    |    1 
 arch/parisc/Kconfig                        |    1 
 arch/parisc/kernel/smp.c                   |  134 +------
 arch/powerpc/Kconfig                       |    1 
 arch/powerpc/kernel/smp.c                  |  220 ------------
 arch/powerpc/platforms/cell/interrupt.c    |    1 
 arch/powerpc/platforms/ps3/smp.c           |    7 
 arch/powerpc/platforms/pseries/xics.c      |    6 
 arch/powerpc/sysdev/mpic.c                 |    2 
 arch/sh/Kconfig                            |    1 
 arch/sh/kernel/smp.c                       |   48 --
 arch/sparc64/kernel/smp.c                  |   11 
 arch/x86/Kconfig                           |    1 
 arch/x86/kernel/apic_32.c                  |    4 
 arch/x86/kernel/entry_64.S                 |    3 
 arch/x86/kernel/i8259_64.c                 |    4 
 arch/x86/kernel/smp.c                      |  152 +-------
 arch/x86/kernel/smpcommon.c                |   56 ---
 arch/x86/mach-voyager/voyager_smp.c        |   94 -----
 arch/x86/xen/enlighten.c                   |    4 
 arch/x86/xen/smp.c                         |  133 ++-----
 arch/x86/xen/xen-ops.h                     |    9 
 include/asm-alpha/smp.h                    |    2 
 include/asm-ia64/smp.h                     |    3 
 include/asm-m32r/smp.h                     |    1 
 include/asm-mips/smp.h                     |   10 
 include/asm-powerpc/smp.h                  |    5 
 include/asm-sh/smp.h                       |   12 
 include/asm-x86/hw_irq_32.h                |    1 
 include/asm-x86/hw_irq_64.h                |    2 
 include/asm-x86/mach-default/entry_arch.h  |    1 
 include/asm-x86/mach-default/irq_vectors.h |    1 
 include/asm-x86/mach-voyager/entry_arch.h  |    2 
 include/asm-x86/mach-voyager/irq_vectors.h |    4 
 include/asm-x86/smp.h                      |   19 -
 include/asm-x86/xen/events.h               |    1 
 include/linux/smp.h                        |   34 +
 init/main.c                                |    2 
 kernel/Makefile                            |    1 
 kernel/smp.c                               |  362 +++++++++++++++++++++
 54 files changed, 661 insertions(+), 1551 deletions(-)

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 54+ messages in thread

* [PATCH 1/10] Add generic helpers for arch IPI function calls
  2008-05-29  8:58 [PATCH 0/10] Add generic helpers for arch IPI function calls #4 Jens Axboe
@ 2008-05-29  8:58 ` Jens Axboe
  2008-05-30 11:24   ` Paul E. McKenney
                     ` (2 more replies)
  2008-05-29  8:58 ` [PATCH 2/10] x86: convert to generic helpers for " Jens Axboe
                   ` (9 subsequent siblings)
  10 siblings, 3 replies; 54+ messages in thread
From: Jens Axboe @ 2008-05-29  8:58 UTC (permalink / raw)
  To: linux-kernel
  Cc: peterz, npiggin, linux-arch, jeremy, mingo, paulmck, Jens Axboe

This adds kernel/smp.c which contains helpers for IPI function calls. In
addition to supporting the existing smp_call_function() in a more efficient
manner, it also adds a more scalable variant called smp_call_function_single()
for calling a given function on a single CPU only.

The core of this is based on the x86-64 patch from Nick Piggin, lots of
changes since then. "Alan D. Brunelle" <Alan.Brunelle@hp.com> has
contributed lots of fixes and suggestions as well. Also thanks to
Paul E. McKenney <paulmck@linux.vnet.ibm.com> for reviewing RCU usage
and getting rid of the data allocation fallback deadlock.

Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 arch/Kconfig              |    3 +
 arch/sparc64/kernel/smp.c |   11 +-
 include/linux/smp.h       |   34 ++++-
 init/main.c               |    2 +
 kernel/Makefile           |    1 +
 kernel/smp.c              |  362 +++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 406 insertions(+), 7 deletions(-)
 create mode 100644 kernel/smp.c

diff --git a/arch/Kconfig b/arch/Kconfig
index 3ea332b..ad89a33 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -39,3 +39,6 @@ config HAVE_KRETPROBES
 
 config HAVE_DMA_ATTRS
 	def_bool n
+
+config USE_GENERIC_SMP_HELPERS
+	def_bool n
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index fa63c68..b82d017 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -816,8 +816,9 @@ extern unsigned long xcall_call_function;
  * You must not call this function with disabled interrupts or from a
  * hardware interrupt handler or from a bottom half handler.
  */
-static int smp_call_function_mask(void (*func)(void *info), void *info,
-				  int nonatomic, int wait, cpumask_t mask)
+static int sparc64_smp_call_function_mask(void (*func)(void *info), void *info,
+					  int nonatomic, int wait,
+					  cpumask_t mask)
 {
 	struct call_data_struct data;
 	int cpus;
@@ -855,8 +856,8 @@ out_unlock:
 int smp_call_function(void (*func)(void *info), void *info,
 		      int nonatomic, int wait)
 {
-	return smp_call_function_mask(func, info, nonatomic, wait,
-				      cpu_online_map);
+	return sparc64_smp_call_function_mask(func, info, nonatomic, wait,
+						cpu_online_map);
 }
 
 void smp_call_function_client(int irq, struct pt_regs *regs)
@@ -893,7 +894,7 @@ static void tsb_sync(void *info)
 
 void smp_tsb_sync(struct mm_struct *mm)
 {
-	smp_call_function_mask(tsb_sync, mm, 0, 1, mm->cpu_vm_mask);
+	sparc64_smp_call_function_mask(tsb_sync, mm, 0, 1, mm->cpu_vm_mask);
 }
 
 extern unsigned long xcall_flush_tlb_mm;
diff --git a/include/linux/smp.h b/include/linux/smp.h
index 55232cc..2691bad 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -7,9 +7,19 @@
  */
 
 #include <linux/errno.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/cpumask.h>
 
 extern void cpu_idle(void);
 
+struct call_single_data {
+	struct list_head list;
+	void (*func) (void *info);
+	void *info;
+	unsigned int flags;
+};
+
 #ifdef CONFIG_SMP
 
 #include <linux/preempt.h>
@@ -53,9 +63,27 @@ extern void smp_cpus_done(unsigned int max_cpus);
  * Call a function on all other processors
  */
 int smp_call_function(void(*func)(void *info), void *info, int retry, int wait);
-
+int smp_call_function_mask(cpumask_t mask, void(*func)(void *info), void *info,
+				int wait);
 int smp_call_function_single(int cpuid, void (*func) (void *info), void *info,
 				int retry, int wait);
+void __smp_call_function_single(int cpuid, struct call_single_data *data);
+
+/*
+ * Generic and arch helpers
+ */
+#ifdef CONFIG_USE_GENERIC_SMP_HELPERS
+void generic_smp_call_function_single_interrupt(void);
+void generic_smp_call_function_interrupt(void);
+void init_call_single_data(void);
+void arch_send_call_function_single_ipi(int cpu);
+void arch_send_call_function_ipi(cpumask_t mask);
+extern spinlock_t call_function_lock;
+#else
+static inline void init_call_single_data(void)
+{
+}
+#endif
 
 /*
  * Call a function on all processors
@@ -112,7 +140,9 @@ static inline void smp_send_reschedule(int cpu) { }
 })
 #define smp_call_function_mask(mask, func, info, wait) \
 			(up_smp_call_function(func, info))
-
+static inline void init_call_single_data(void)
+{
+}
 #endif /* !SMP */
 
 /*
diff --git a/init/main.c b/init/main.c
index f7fb200..1efcccf 100644
--- a/init/main.c
+++ b/init/main.c
@@ -31,6 +31,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/start_kernel.h>
 #include <linux/security.h>
+#include <linux/smp.h>
 #include <linux/workqueue.h>
 #include <linux/profile.h>
 #include <linux/rcupdate.h>
@@ -779,6 +780,7 @@ static void __init do_pre_smp_initcalls(void)
 {
 	extern int spawn_ksoftirqd(void);
 
+	init_call_single_data();
 	migration_init();
 	spawn_ksoftirqd();
 	if (!nosoftlockup)
diff --git a/kernel/Makefile b/kernel/Makefile
index 1c9938a..9fa5797 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -28,6 +28,7 @@ obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
 obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
 obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
 obj-$(CONFIG_SMP) += cpu.o spinlock.o
+obj-$(CONFIG_USE_GENERIC_SMP_HELPERS) += smp.o
 obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
 obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
 obj-$(CONFIG_UID16) += uid16.o
diff --git a/kernel/smp.c b/kernel/smp.c
new file mode 100644
index 0000000..ef6de3d
--- /dev/null
+++ b/kernel/smp.c
@@ -0,0 +1,362 @@
+/*
+ * Generic helpers for smp ipi calls
+ *
+ * (C) Jens Axboe <jens.axboe@oracle.com> 2008
+ *
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/rcupdate.h>
+#include <linux/smp.h>
+
+static DEFINE_PER_CPU(struct call_single_queue, call_single_queue);
+static LIST_HEAD(call_function_queue);
+__cacheline_aligned_in_smp DEFINE_SPINLOCK(call_function_lock);
+
+enum {
+	CSD_FLAG_WAIT		= 0x01,
+	CSD_FLAG_ALLOC		= 0x02,
+};
+
+struct call_function_data {
+	struct call_single_data csd;
+	spinlock_t lock;
+	unsigned int refs;
+	cpumask_t cpumask;
+	struct rcu_head rcu_head;
+};
+
+struct call_single_queue {
+	struct list_head list;
+	spinlock_t lock;
+};
+
+void __cpuinit init_call_single_data(void)
+{
+	int i;
+
+	for_each_possible_cpu(i) {
+		struct call_single_queue *q = &per_cpu(call_single_queue, i);
+
+		spin_lock_init(&q->lock);
+		INIT_LIST_HEAD(&q->list);
+	}
+}
+
+static void csd_flag_wait(struct call_single_data *data)
+{
+	/* Wait for response */
+	do {
+		/*
+		 * We need to see the flags store in the IPI handler
+		 */
+		smp_mb();
+		if (!(data->flags & CSD_FLAG_WAIT))
+			break;
+		cpu_relax();
+	} while (1);
+}
+
+/*
+ * Insert a previously allocated call_single_data element for execution
+ * on the given CPU. data must already have ->func, ->info, and ->flags set.
+ */
+static void generic_exec_single(int cpu, struct call_single_data *data)
+{
+	struct call_single_queue *dst = &per_cpu(call_single_queue, cpu);
+	int wait = data->flags & CSD_FLAG_WAIT, ipi;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dst->lock, flags);
+	ipi = list_empty(&dst->list);
+	list_add_tail(&data->list, &dst->list);
+	spin_unlock_irqrestore(&dst->lock, flags);
+
+	if (ipi)
+		arch_send_call_function_single_ipi(cpu);
+
+	if (wait)
+		csd_flag_wait(data);
+}
+
+static void rcu_free_call_data(struct rcu_head *head)
+{
+	struct call_function_data *data;
+
+	data = container_of(head, struct call_function_data, rcu_head);
+
+	kfree(data);
+}
+
+/*
+ * Invoked by arch to handle an IPI for call function. Must be called with
+ * interrupts disabled.
+ */
+void generic_smp_call_function_interrupt(void)
+{
+	struct call_function_data *data;
+	int cpu = get_cpu();
+
+	/*
+	 * It's ok to use list_for_each_rcu() here even though we may delete
+	 * 'pos', since list_del_rcu() doesn't clear ->next
+	 */
+	rcu_read_lock();
+	list_for_each_entry_rcu(data, &call_function_queue, csd.list) {
+		int refs;
+
+		if (!cpu_isset(cpu, data->cpumask))
+			continue;
+
+		data->csd.func(data->csd.info);
+
+		spin_lock(&data->lock);
+		cpu_clear(cpu, data->cpumask);
+		WARN_ON(data->refs == 0);
+		data->refs--;
+		refs = data->refs;
+		spin_unlock(&data->lock);
+
+		if (refs)
+			continue;
+
+		spin_lock(&call_function_lock);
+		list_del_rcu(&data->csd.list);
+		spin_unlock(&call_function_lock);
+
+		if (data->csd.flags & CSD_FLAG_WAIT) {
+			/*
+			 * serialize stores to data with the flag clear
+			 * and wakeup
+			 */
+			smp_wmb();
+			data->csd.flags &= ~CSD_FLAG_WAIT;
+		} else
+			call_rcu(&data->rcu_head, rcu_free_call_data);
+	}
+	rcu_read_unlock();
+
+	put_cpu();
+}
+
+/*
+ * Invoked by arch to handle an IPI for call function single. Must be called
+ * from the arch with interrupts disabled.
+ */
+void generic_smp_call_function_single_interrupt(void)
+{
+	struct call_single_queue *q = &__get_cpu_var(call_single_queue);
+	LIST_HEAD(list);
+
+	/*
+	 * Need to see other stores to list head for checking whether
+	 * list is empty without holding q->lock
+	 */
+	smp_mb();
+	while (!list_empty(&q->list)) {
+		unsigned int data_flags;
+
+		spin_lock(&q->lock);
+		list_replace_init(&q->list, &list);
+		spin_unlock(&q->lock);
+
+		while (!list_empty(&list)) {
+			struct call_single_data *data;
+
+			data = list_entry(list.next, struct call_single_data,
+						list);
+			list_del(&data->list);
+
+			/*
+			 * 'data' can be invalid after this call if
+			 * flags == 0 (when called through
+			 * generic_exec_single(), so save them away before
+			 * making the call.
+			 */
+			data_flags = data->flags;
+
+			data->func(data->info);
+
+			if (data_flags & CSD_FLAG_WAIT) {
+				smp_wmb();
+				data->flags &= ~CSD_FLAG_WAIT;
+			} else if (data_flags & CSD_FLAG_ALLOC)
+				kfree(data);
+		}
+		/*
+		 * See comment on outer loop
+		 */
+		smp_mb();
+	}
+}
+
+/*
+ * smp_call_function_single - Run a function on a specific CPU
+ * @func: The function to run. This must be fast and non-blocking.
+ * @info: An arbitrary pointer to pass to the function.
+ * @retry: Unused
+ * @wait: If true, wait until function has completed on other CPUs.
+ *
+ * Returns 0 on success, else a negative status code.
+ */
+int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
+			     int retry, int wait)
+{
+	struct call_single_data d;
+	unsigned long flags;
+	/* prevent preemption and reschedule on another processor */
+	int me = get_cpu();
+
+	/* Can deadlock when called with interrupts disabled */
+	WARN_ON(irqs_disabled());
+
+	if (cpu == me) {
+		local_irq_save(flags);
+		func(info);
+		local_irq_restore(flags);
+	} else {
+		struct call_single_data *data = NULL;
+
+		if (!wait) {
+			data = kmalloc(sizeof(*data), GFP_ATOMIC);
+			if (data)
+				data->flags = CSD_FLAG_ALLOC;
+		}
+		if (!data) {
+			data = &d;
+			data->flags = CSD_FLAG_WAIT;
+		}
+
+		data->func = func;
+		data->info = info;
+		generic_exec_single(cpu, data);
+	}
+
+	put_cpu();
+	return 0;
+}
+EXPORT_SYMBOL(smp_call_function_single);
+
+/**
+ * __smp_call_function_single(): Run a function on another CPU
+ * @cpu: The CPU to run on.
+ * @data: Pre-allocated and setup data structure
+ *
+ * Like smp_call_function_single(), but allow caller to pass in a pre-allocated
+ * data structure. Useful for embedding @data inside other structures, for
+ * instance.
+ *
+ */
+void __smp_call_function_single(int cpu, struct call_single_data *data)
+{
+	/* Can deadlock when called with interrupts disabled */
+	WARN_ON((data->flags & CSD_FLAG_WAIT) && irqs_disabled());
+
+	generic_exec_single(cpu, data);
+}
+
+/**
+ * smp_call_function_mask(): Run a function on a set of other CPUs.
+ * @mask: The set of cpus to run on.
+ * @func: The function to run. This must be fast and non-blocking.
+ * @info: An arbitrary pointer to pass to the function.
+ * @wait: If true, wait (atomically) until function has completed on other CPUs.
+ *
+ * Returns 0 on success, else a negative status code.
+ *
+ * If @wait is true, then returns once @func has returned.
+ *
+ * You must not call this function with disabled interrupts or from a
+ * hardware interrupt handler or from a bottom half handler.
+ */
+int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info,
+			   int wait)
+{
+	struct call_function_data d;
+	struct call_function_data *data = NULL;
+	cpumask_t allbutself;
+	unsigned long flags;
+	int cpu, num_cpus;
+
+	/* Can deadlock when called with interrupts disabled */
+	WARN_ON(irqs_disabled());
+
+	cpu = smp_processor_id();
+	allbutself = cpu_online_map;
+	cpu_clear(cpu, allbutself);
+	cpus_and(mask, mask, allbutself);
+	num_cpus = cpus_weight(mask);
+
+	/*
+	 * If zero CPUs, return. If just a single CPU, turn this request
+	 * into a targetted single call instead since it's faster.
+	 */
+	if (!num_cpus)
+		return 0;
+	else if (num_cpus == 1) {
+		cpu = first_cpu(mask);
+		return smp_call_function_single(cpu, func, info, 0, wait);
+	}
+
+	if (!wait) {
+		data = kmalloc(sizeof(*data), GFP_ATOMIC);
+		if (data)
+			data->csd.flags = CSD_FLAG_ALLOC;
+	}
+	if (!data) {
+		data = &d;
+		data->csd.flags = CSD_FLAG_WAIT;
+	}
+
+	spin_lock_init(&data->lock);
+	data->csd.func = func;
+	data->csd.info = info;
+	data->refs = num_cpus;
+
+	/*
+	 * need to see above stores before the cpumask is valid for the CPU
+	 */
+	smp_wmb();
+	data->cpumask = mask;
+
+	spin_lock_irqsave(&call_function_lock, flags);
+	list_add_tail_rcu(&data->csd.list, &call_function_queue);
+	spin_unlock_irqrestore(&call_function_lock, flags);
+
+	/* Send a message to all CPUs in the map */
+	arch_send_call_function_ipi(mask);
+
+	/* optionally wait for the CPUs to complete */
+	if (wait)
+		csd_flag_wait(&data->csd);
+
+	return 0;
+}
+EXPORT_SYMBOL(smp_call_function_mask);
+
+/**
+ * smp_call_function(): Run a function on all other CPUs.
+ * @func: The function to run. This must be fast and non-blocking.
+ * @info: An arbitrary pointer to pass to the function.
+ * @natomic: Unused
+ * @wait: If true, wait (atomically) until function has completed on other CPUs.
+ *
+ * Returns 0 on success, else a negative status code.
+ *
+ * If @wait is true, then returns once @func has returned; otherwise
+ * it returns just before the target cpu calls @func.
+ *
+ * You must not call this function with disabled interrupts or from a
+ * hardware interrupt handler or from a bottom half handler.
+ */
+int smp_call_function(void (*func)(void *), void *info, int natomic, int wait)
+{
+	int ret;
+
+	preempt_disable();
+	ret = smp_call_function_mask(cpu_online_map, func, info, wait);
+	preempt_enable();
+	return ret;
+}
+EXPORT_SYMBOL(smp_call_function);
-- 
1.5.6.rc0.40.gd683

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [PATCH 2/10] x86: convert to generic helpers for IPI function calls
  2008-05-29  8:58 [PATCH 0/10] Add generic helpers for arch IPI function calls #4 Jens Axboe
  2008-05-29  8:58 ` [PATCH 1/10] Add generic helpers for arch IPI function calls Jens Axboe
@ 2008-05-29  8:58 ` Jens Axboe
  2008-05-29 12:12   ` Jeremy Fitzhardinge
  2008-05-29  8:58 ` [PATCH 3/10] powerpc: " Jens Axboe
                   ` (8 subsequent siblings)
  10 siblings, 1 reply; 54+ messages in thread
From: Jens Axboe @ 2008-05-29  8:58 UTC (permalink / raw)
  To: linux-kernel
  Cc: peterz, npiggin, linux-arch, jeremy, mingo, paulmck, Jens Axboe

This converts x86, x86-64, and xen to use the new helpers for
smp_call_function() and friends, and adds support for
smp_call_function_single().

Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 arch/x86/Kconfig                           |    1 +
 arch/x86/kernel/apic_32.c                  |    4 +
 arch/x86/kernel/entry_64.S                 |    3 +
 arch/x86/kernel/i8259_64.c                 |    4 +
 arch/x86/kernel/smp.c                      |  152 ++++------------------------
 arch/x86/kernel/smpcommon.c                |   56 ----------
 arch/x86/mach-voyager/voyager_smp.c        |   94 +++--------------
 arch/x86/xen/enlighten.c                   |    4 +-
 arch/x86/xen/smp.c                         |  133 +++++++++----------------
 arch/x86/xen/xen-ops.h                     |    9 +--
 include/asm-x86/hw_irq_32.h                |    1 +
 include/asm-x86/hw_irq_64.h                |    2 +
 include/asm-x86/mach-default/entry_arch.h  |    1 +
 include/asm-x86/mach-default/irq_vectors.h |    1 +
 include/asm-x86/mach-voyager/entry_arch.h  |    2 +-
 include/asm-x86/mach-voyager/irq_vectors.h |    4 +-
 include/asm-x86/smp.h                      |   19 ++--
 include/asm-x86/xen/events.h               |    1 +
 18 files changed, 124 insertions(+), 367 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index dcbec34..a617291 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -168,6 +168,7 @@ config GENERIC_PENDING_IRQ
 config X86_SMP
 	bool
 	depends on SMP && ((X86_32 && !X86_VOYAGER) || X86_64)
+	select USE_GENERIC_SMP_HELPERS
 	default y
 
 config X86_32_SMP
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 4b99b1b..71017f7 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -1358,6 +1358,10 @@ void __init smp_intr_init(void)
 
 	/* IPI for generic function call */
 	set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
+
+	/* IPI for single call function */
+	set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
+				call_function_single_interrupt);
 }
 #endif
 
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 556a8df..6d1fe27 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -711,6 +711,9 @@ END(invalidate_interrupt\num)
 ENTRY(call_function_interrupt)
 	apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
 END(call_function_interrupt)
+ENTRY(call_function_single_interrupt)
+	apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt
+END(call_function_single_interrupt)
 ENTRY(irq_move_cleanup_interrupt)
 	apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
 END(irq_move_cleanup_interrupt)
diff --git a/arch/x86/kernel/i8259_64.c b/arch/x86/kernel/i8259_64.c
index fa57a15..00d2ccd 100644
--- a/arch/x86/kernel/i8259_64.c
+++ b/arch/x86/kernel/i8259_64.c
@@ -494,6 +494,10 @@ void __init native_init_IRQ(void)
 	/* IPI for generic function call */
 	set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
 
+	/* IPI for generic single function call */
+	set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
+				call_function_single_interrupt);
+
 	/* Low priority IPI to cleanup after moving an irq */
 	set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
 #endif
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 0cb7aad..3e051ae 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -121,132 +121,33 @@ static void native_smp_send_reschedule(int cpu)
 	send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
 }
 
-/*
- * Structure and data for smp_call_function(). This is designed to minimise
- * static memory requirements. It also looks cleaner.
- */
-static DEFINE_SPINLOCK(call_lock);
-
-struct call_data_struct {
-	void (*func) (void *info);
-	void *info;
-	atomic_t started;
-	atomic_t finished;
-	int wait;
-};
-
 void lock_ipi_call_lock(void)
 {
-	spin_lock_irq(&call_lock);
+	spin_lock_irq(&call_function_lock);
 }
 
 void unlock_ipi_call_lock(void)
 {
-	spin_unlock_irq(&call_lock);
+	spin_unlock_irq(&call_function_lock);
 }
 
-static struct call_data_struct *call_data;
-
-static void __smp_call_function(void (*func) (void *info), void *info,
-				int nonatomic, int wait)
+void native_send_call_func_single_ipi(int cpu)
 {
-	struct call_data_struct data;
-	int cpus = num_online_cpus() - 1;
-
-	if (!cpus)
-		return;
-
-	data.func = func;
-	data.info = info;
-	atomic_set(&data.started, 0);
-	data.wait = wait;
-	if (wait)
-		atomic_set(&data.finished, 0);
-
-	call_data = &data;
-	mb();
-
-	/* Send a message to all other CPUs and wait for them to respond */
-	send_IPI_allbutself(CALL_FUNCTION_VECTOR);
-
-	/* Wait for response */
-	while (atomic_read(&data.started) != cpus)
-		cpu_relax();
-
-	if (wait)
-		while (atomic_read(&data.finished) != cpus)
-			cpu_relax();
+	send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_SINGLE_VECTOR);
 }
 
-
-/**
- * smp_call_function_mask(): Run a function on a set of other CPUs.
- * @mask: The set of cpus to run on.  Must not include the current cpu.
- * @func: The function to run. This must be fast and non-blocking.
- * @info: An arbitrary pointer to pass to the function.
- * @wait: If true, wait (atomically) until function has completed on other CPUs.
- *
-  * Returns 0 on success, else a negative status code.
- *
- * If @wait is true, then returns once @func has returned; otherwise
- * it returns just before the target cpu calls @func.
- *
- * You must not call this function with disabled interrupts or from a
- * hardware interrupt handler or from a bottom half handler.
- */
-static int
-native_smp_call_function_mask(cpumask_t mask,
-			      void (*func)(void *), void *info,
-			      int wait)
+void native_send_call_func_ipi(cpumask_t mask)
 {
-	struct call_data_struct data;
 	cpumask_t allbutself;
-	int cpus;
-
-	/* Can deadlock when called with interrupts disabled */
-	WARN_ON(irqs_disabled());
-
-	/* Holding any lock stops cpus from going down. */
-	spin_lock(&call_lock);
 
 	allbutself = cpu_online_map;
 	cpu_clear(smp_processor_id(), allbutself);
 
-	cpus_and(mask, mask, allbutself);
-	cpus = cpus_weight(mask);
-
-	if (!cpus) {
-		spin_unlock(&call_lock);
-		return 0;
-	}
-
-	data.func = func;
-	data.info = info;
-	atomic_set(&data.started, 0);
-	data.wait = wait;
-	if (wait)
-		atomic_set(&data.finished, 0);
-
-	call_data = &data;
-	wmb();
-
-	/* Send a message to other CPUs */
 	if (cpus_equal(mask, allbutself) &&
 	    cpus_equal(cpu_online_map, cpu_callout_map))
 		send_IPI_allbutself(CALL_FUNCTION_VECTOR);
 	else
 		send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
-
-	/* Wait for response */
-	while (atomic_read(&data.started) != cpus)
-		cpu_relax();
-
-	if (wait)
-		while (atomic_read(&data.finished) != cpus)
-			cpu_relax();
-	spin_unlock(&call_lock);
-
-	return 0;
 }
 
 static void stop_this_cpu(void *dummy)
@@ -268,18 +169,13 @@ static void stop_this_cpu(void *dummy)
 
 static void native_smp_send_stop(void)
 {
-	int nolock;
 	unsigned long flags;
 
 	if (reboot_force)
 		return;
 
-	/* Don't deadlock on the call lock in panic */
-	nolock = !spin_trylock(&call_lock);
+	smp_call_function(stop_this_cpu, NULL, 0, 0);
 	local_irq_save(flags);
-	__smp_call_function(stop_this_cpu, NULL, 0, 0);
-	if (!nolock)
-		spin_unlock(&call_lock);
 	disable_local_APIC();
 	local_irq_restore(flags);
 }
@@ -301,33 +197,28 @@ void smp_reschedule_interrupt(struct pt_regs *regs)
 
 void smp_call_function_interrupt(struct pt_regs *regs)
 {
-	void (*func) (void *info) = call_data->func;
-	void *info = call_data->info;
-	int wait = call_data->wait;
-
 	ack_APIC_irq();
-	/*
-	 * Notify initiating CPU that I've grabbed the data and am
-	 * about to execute the function
-	 */
-	mb();
-	atomic_inc(&call_data->started);
-	/*
-	 * At this point the info structure may be out of scope unless wait==1
-	 */
 	irq_enter();
-	(*func)(info);
+	generic_smp_call_function_interrupt();
 #ifdef CONFIG_X86_32
 	__get_cpu_var(irq_stat).irq_call_count++;
 #else
 	add_pda(irq_call_count, 1);
 #endif
 	irq_exit();
+}
 
-	if (wait) {
-		mb();
-		atomic_inc(&call_data->finished);
-	}
+void smp_call_function_single_interrupt(void)
+{
+	ack_APIC_irq();
+	irq_enter();
+	generic_smp_call_function_single_interrupt();
+#ifdef CONFIG_X86_32
+	__get_cpu_var(irq_stat).irq_call_count++;
+#else
+	add_pda(irq_call_count, 1);
+#endif
+	irq_exit();
 }
 
 struct smp_ops smp_ops = {
@@ -338,7 +229,8 @@ struct smp_ops smp_ops = {
 
 	.smp_send_stop = native_smp_send_stop,
 	.smp_send_reschedule = native_smp_send_reschedule,
-	.smp_call_function_mask = native_smp_call_function_mask,
+
+	.send_call_func_ipi = native_send_call_func_ipi,
+	.send_call_func_single_ipi = native_send_call_func_single_ipi,
 };
 EXPORT_SYMBOL_GPL(smp_ops);
-
diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c
index 3449064..99941b3 100644
--- a/arch/x86/kernel/smpcommon.c
+++ b/arch/x86/kernel/smpcommon.c
@@ -25,59 +25,3 @@ __cpuinit void init_gdt(int cpu)
 	per_cpu(cpu_number, cpu) = cpu;
 }
 #endif
-
-/**
- * smp_call_function(): Run a function on all other CPUs.
- * @func: The function to run. This must be fast and non-blocking.
- * @info: An arbitrary pointer to pass to the function.
- * @nonatomic: Unused.
- * @wait: If true, wait (atomically) until function has completed on other CPUs.
- *
- * Returns 0 on success, else a negative status code.
- *
- * If @wait is true, then returns once @func has returned; otherwise
- * it returns just before the target cpu calls @func.
- *
- * You must not call this function with disabled interrupts or from a
- * hardware interrupt handler or from a bottom half handler.
- */
-int smp_call_function(void (*func) (void *info), void *info, int nonatomic,
-		      int wait)
-{
-	return smp_call_function_mask(cpu_online_map, func, info, wait);
-}
-EXPORT_SYMBOL(smp_call_function);
-
-/**
- * smp_call_function_single - Run a function on a specific CPU
- * @cpu: The target CPU.  Cannot be the calling CPU.
- * @func: The function to run. This must be fast and non-blocking.
- * @info: An arbitrary pointer to pass to the function.
- * @nonatomic: Unused.
- * @wait: If true, wait until function has completed on other CPUs.
- *
- * Returns 0 on success, else a negative status code.
- *
- * If @wait is true, then returns once @func has returned; otherwise
- * it returns just before the target cpu calls @func.
- */
-int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
-			     int nonatomic, int wait)
-{
-	/* prevent preemption and reschedule on another processor */
-	int ret;
-	int me = get_cpu();
-	if (cpu == me) {
-		local_irq_disable();
-		func(info);
-		local_irq_enable();
-		put_cpu();
-		return 0;
-	}
-
-	ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait);
-
-	put_cpu();
-	return ret;
-}
-EXPORT_SYMBOL(smp_call_function_single);
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 8acbf0c..cb34407 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -955,94 +955,24 @@ static void smp_stop_cpu_function(void *dummy)
 		halt();
 }
 
-static DEFINE_SPINLOCK(call_lock);
-
-struct call_data_struct {
-	void (*func) (void *info);
-	void *info;
-	volatile unsigned long started;
-	volatile unsigned long finished;
-	int wait;
-};
-
-static struct call_data_struct *call_data;
-
 /* execute a thread on a new CPU.  The function to be called must be
  * previously set up.  This is used to schedule a function for
  * execution on all CPUs - set up the function then broadcast a
  * function_interrupt CPI to come here on each CPU */
 static void smp_call_function_interrupt(void)
 {
-	void (*func) (void *info) = call_data->func;
-	void *info = call_data->info;
-	/* must take copy of wait because call_data may be replaced
-	 * unless the function is waiting for us to finish */
-	int wait = call_data->wait;
-	__u8 cpu = smp_processor_id();
-
-	/*
-	 * Notify initiating CPU that I've grabbed the data and am
-	 * about to execute the function
-	 */
-	mb();
-	if (!test_and_clear_bit(cpu, &call_data->started)) {
-		/* If the bit wasn't set, this could be a replay */
-		printk(KERN_WARNING "VOYAGER SMP: CPU %d received call funtion"
-		       " with no call pending\n", cpu);
-		return;
-	}
-	/*
-	 * At this point the info structure may be out of scope unless wait==1
-	 */
 	irq_enter();
-	(*func) (info);
+	generic_smp_call_function_interrupt();
 	__get_cpu_var(irq_stat).irq_call_count++;
 	irq_exit();
-	if (wait) {
-		mb();
-		clear_bit(cpu, &call_data->finished);
-	}
 }
 
-static int
-voyager_smp_call_function_mask(cpumask_t cpumask,
-			       void (*func) (void *info), void *info, int wait)
+static void smp_call_function_single_interrupt(void)
 {
-	struct call_data_struct data;
-	u32 mask = cpus_addr(cpumask)[0];
-
-	mask &= ~(1 << smp_processor_id());
-
-	if (!mask)
-		return 0;
-
-	/* Can deadlock when called with interrupts disabled */
-	WARN_ON(irqs_disabled());
-
-	data.func = func;
-	data.info = info;
-	data.started = mask;
-	data.wait = wait;
-	if (wait)
-		data.finished = mask;
-
-	spin_lock(&call_lock);
-	call_data = &data;
-	wmb();
-	/* Send a message to all other CPUs and wait for them to respond */
-	send_CPI(mask, VIC_CALL_FUNCTION_CPI);
-
-	/* Wait for response */
-	while (data.started)
-		barrier();
-
-	if (wait)
-		while (data.finished)
-			barrier();
-
-	spin_unlock(&call_lock);
-
-	return 0;
+	irq_enter();
+	generic_smp_call_function_single_interrupt();
+	__get_cpu_var(irq_stat).irq_call_count++;
+	irq_exit();
 }
 
 /* Sorry about the name.  In an APIC based system, the APICs
@@ -1099,6 +1029,12 @@ void smp_qic_call_function_interrupt(struct pt_regs *regs)
 	smp_call_function_interrupt();
 }
 
+void smp_qic_call_function_single_interrupt(struct pt_regs *regs)
+{
+	ack_QIC_CPI(QIC_CALL_FUNCTION_SINGLE_CPI);
+	smp_call_function_single_interrupt();
+}
+
 void smp_vic_cpi_interrupt(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
@@ -1119,6 +1055,8 @@ void smp_vic_cpi_interrupt(struct pt_regs *regs)
 		smp_enable_irq_interrupt();
 	if (test_and_clear_bit(VIC_CALL_FUNCTION_CPI, &vic_cpi_mailbox[cpu]))
 		smp_call_function_interrupt();
+	if (test_and_clear_bit(VIC_CALL_FUNCTION_SINGLE_CPI, &vic_cpi_mailbox[cpu]))
+		smp_call_function_single_interrupt();
 	set_irq_regs(old_regs);
 }
 
@@ -1862,5 +1800,7 @@ struct smp_ops smp_ops = {
 
 	.smp_send_stop = voyager_smp_send_stop,
 	.smp_send_reschedule = voyager_smp_send_reschedule,
-	.smp_call_function_mask = voyager_smp_call_function_mask,
+
+	.send_call_func_ipi = native_send_call_func_ipi,
+	.send_call_func_single_ipi = native_send_call_func_single_ipi,
 };
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index c8a56e4..e045578 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1123,7 +1123,9 @@ static const struct smp_ops xen_smp_ops __initdata = {
 
 	.smp_send_stop = xen_smp_send_stop,
 	.smp_send_reschedule = xen_smp_send_reschedule,
-	.smp_call_function_mask = xen_smp_call_function_mask,
+
+	.send_call_func_ipi = xen_smp_send_call_function_ipi,
+	.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi,
 };
 #endif	/* CONFIG_SMP */
 
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 94e6900..b3786e7 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -36,27 +36,14 @@
 #include "mmu.h"
 
 static cpumask_t xen_cpu_initialized_map;
-static DEFINE_PER_CPU(int, resched_irq) = -1;
-static DEFINE_PER_CPU(int, callfunc_irq) = -1;
-static DEFINE_PER_CPU(int, debug_irq) = -1;
-
-/*
- * Structure and data for smp_call_function(). This is designed to minimise
- * static memory requirements. It also looks cleaner.
- */
-static DEFINE_SPINLOCK(call_lock);
 
-struct call_data_struct {
-	void (*func) (void *info);
-	void *info;
-	atomic_t started;
-	atomic_t finished;
-	int wait;
-};
+static DEFINE_PER_CPU(int, resched_irq);
+static DEFINE_PER_CPU(int, callfunc_irq);
+static DEFINE_PER_CPU(int, callfuncsingle_irq);
+static DEFINE_PER_CPU(int, debug_irq) = -1;
 
 static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
-
-static struct call_data_struct *call_data;
+static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
 
 /*
  * Reschedule call back. Nothing to do,
@@ -122,6 +109,17 @@ static int xen_smp_intr_init(unsigned int cpu)
 		goto fail;
 	per_cpu(debug_irq, cpu) = rc;
 
+	callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", cpu);
+	rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_SINGLE_VECTOR,
+				    cpu,
+				    xen_call_function_single_interrupt,
+				    IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
+				    callfunc_name,
+				    NULL);
+	if (rc < 0)
+		goto fail;
+	per_cpu(callfuncsingle_irq, cpu) = rc;
+
 	return 0;
 
  fail:
@@ -131,6 +129,9 @@ static int xen_smp_intr_init(unsigned int cpu)
 		unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
 	if (per_cpu(debug_irq, cpu) >= 0)
 		unbind_from_irqhandler(per_cpu(debug_irq, cpu), NULL);
+	if (per_cpu(callfuncsingle_irq, cpu) >= 0)
+		unbind_from_irqhandler(per_cpu(callfuncsingle_irq, cpu), NULL);
+
 	return rc;
 }
 
@@ -338,7 +339,6 @@ void xen_smp_send_reschedule(int cpu)
 	xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR);
 }
 
-
 static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector)
 {
 	unsigned cpu;
@@ -349,83 +349,42 @@ static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector)
 		xen_send_IPI_one(cpu, vector);
 }
 
+void xen_smp_send_call_function_ipi(cpumask_t mask)
+{
+	int cpu;
+
+	xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
+
+	/* Make sure other vcpus get a chance to run if they need to. */
+	for_each_cpu_mask(cpu, mask) {
+		if (xen_vcpu_stolen(cpu)) {
+			HYPERVISOR_sched_op(SCHEDOP_yield, 0);
+			break;
+		}
+	}
+}
+
+void xen_smp_send_call_function_single_ipi(int cpu)
+{
+	xen_send_IPI_mask(cpumask_of_cpu(cpu), XEN_CALL_FUNCTION_SINGLE_VECTOR);
+}
+
 static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
 {
-	void (*func) (void *info) = call_data->func;
-	void *info = call_data->info;
-	int wait = call_data->wait;
-
-	/*
-	 * Notify initiating CPU that I've grabbed the data and am
-	 * about to execute the function
-	 */
-	mb();
-	atomic_inc(&call_data->started);
-	/*
-	 * At this point the info structure may be out of scope unless wait==1
-	 */
 	irq_enter();
-	(*func)(info);
+	generic_smp_call_function_interrupt();
 	__get_cpu_var(irq_stat).irq_call_count++;
 	irq_exit();
 
-	if (wait) {
-		mb();		/* commit everything before setting finished */
-		atomic_inc(&call_data->finished);
-	}
-
 	return IRQ_HANDLED;
 }
 
-int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *),
-			       void *info, int wait)
+static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id)
 {
-	struct call_data_struct data;
-	int cpus, cpu;
-	bool yield;
-
-	/* Holding any lock stops cpus from going down. */
-	spin_lock(&call_lock);
-
-	cpu_clear(smp_processor_id(), mask);
-
-	cpus = cpus_weight(mask);
-	if (!cpus) {
-		spin_unlock(&call_lock);
-		return 0;
-	}
-
-	/* Can deadlock when called with interrupts disabled */
-	WARN_ON(irqs_disabled());
-
-	data.func = func;
-	data.info = info;
-	atomic_set(&data.started, 0);
-	data.wait = wait;
-	if (wait)
-		atomic_set(&data.finished, 0);
-
-	call_data = &data;
-	mb();			/* write everything before IPI */
-
-	/* Send a message to other CPUs and wait for them to respond */
-	xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
-
-	/* Make sure other vcpus get a chance to run if they need to. */
-	yield = false;
-	for_each_cpu_mask(cpu, mask)
-		if (xen_vcpu_stolen(cpu))
-			yield = true;
-
-	if (yield)
-		HYPERVISOR_sched_op(SCHEDOP_yield, 0);
-
-	/* Wait for response */
-	while (atomic_read(&data.started) != cpus ||
-	       (wait && atomic_read(&data.finished) != cpus))
-		cpu_relax();
-
-	spin_unlock(&call_lock);
+	irq_enter();
+	generic_smp_call_function_single_interrupt();
+	__get_cpu_var(irq_stat).irq_call_count++;
+	irq_exit();
 
-	return 0;
+	return IRQ_HANDLED;
 }
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index f1063ae..a636ab5 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -46,13 +46,8 @@ void xen_smp_cpus_done(unsigned int max_cpus);
 
 void xen_smp_send_stop(void);
 void xen_smp_send_reschedule(int cpu);
-int xen_smp_call_function (void (*func) (void *info), void *info, int nonatomic,
-			   int wait);
-int xen_smp_call_function_single(int cpu, void (*func) (void *info), void *info,
-				 int nonatomic, int wait);
-
-int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *),
-			       void *info, int wait);
+void xen_smp_send_call_function_ipi(cpumask_t mask);
+void xen_smp_send_call_function_single_ipi(int cpu);
 
 
 /* Declare an asm function, along with symbols needed to make it
diff --git a/include/asm-x86/hw_irq_32.h b/include/asm-x86/hw_irq_32.h
index ea88054..a87b132 100644
--- a/include/asm-x86/hw_irq_32.h
+++ b/include/asm-x86/hw_irq_32.h
@@ -32,6 +32,7 @@ extern void (*const interrupt[NR_IRQS])(void);
 void reschedule_interrupt(void);
 void invalidate_interrupt(void);
 void call_function_interrupt(void);
+void call_function_single_interrupt(void);
 #endif
 
 #ifdef CONFIG_X86_LOCAL_APIC
diff --git a/include/asm-x86/hw_irq_64.h b/include/asm-x86/hw_irq_64.h
index 0062ef3..fe65781 100644
--- a/include/asm-x86/hw_irq_64.h
+++ b/include/asm-x86/hw_irq_64.h
@@ -68,6 +68,7 @@
 #define ERROR_APIC_VECTOR	0xfe
 #define RESCHEDULE_VECTOR	0xfd
 #define CALL_FUNCTION_VECTOR	0xfc
+#define	CALL_FUNCTION_SINGLE_VECTOR	0xfb
 /* fb free - please don't readd KDB here because it's useless
    (hint - think what a NMI bit does to a vector) */
 #define THERMAL_APIC_VECTOR	0xfa
@@ -102,6 +103,7 @@ void spurious_interrupt(void);
 void error_interrupt(void);
 void reschedule_interrupt(void);
 void call_function_interrupt(void);
+void call_function_single_interrupt(void);
 void irq_move_cleanup_interrupt(void);
 void invalidate_interrupt0(void);
 void invalidate_interrupt1(void);
diff --git a/include/asm-x86/mach-default/entry_arch.h b/include/asm-x86/mach-default/entry_arch.h
index bc86146..9283b60 100644
--- a/include/asm-x86/mach-default/entry_arch.h
+++ b/include/asm-x86/mach-default/entry_arch.h
@@ -13,6 +13,7 @@
 BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
 BUILD_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR)
 BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
+BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
 #endif
 
 /*
diff --git a/include/asm-x86/mach-default/irq_vectors.h b/include/asm-x86/mach-default/irq_vectors.h
index 881c63c..ed7d495 100644
--- a/include/asm-x86/mach-default/irq_vectors.h
+++ b/include/asm-x86/mach-default/irq_vectors.h
@@ -48,6 +48,7 @@
 #define INVALIDATE_TLB_VECTOR	0xfd
 #define RESCHEDULE_VECTOR	0xfc
 #define CALL_FUNCTION_VECTOR	0xfb
+#define CALL_FUNCTION_SINGLE_VECTOR	0xfa
 
 #define THERMAL_APIC_VECTOR	0xf0
 /*
diff --git a/include/asm-x86/mach-voyager/entry_arch.h b/include/asm-x86/mach-voyager/entry_arch.h
index 4a1e1e8..ae52624 100644
--- a/include/asm-x86/mach-voyager/entry_arch.h
+++ b/include/asm-x86/mach-voyager/entry_arch.h
@@ -23,4 +23,4 @@ BUILD_INTERRUPT(qic_invalidate_interrupt, QIC_INVALIDATE_CPI);
 BUILD_INTERRUPT(qic_reschedule_interrupt, QIC_RESCHEDULE_CPI);
 BUILD_INTERRUPT(qic_enable_irq_interrupt, QIC_ENABLE_IRQ_CPI);
 BUILD_INTERRUPT(qic_call_function_interrupt, QIC_CALL_FUNCTION_CPI);
-
+BUILD_INTERRUPT(qic_call_function_single_interrupt, QIC_CALL_FUNCTION_SINGLE_CPI);
diff --git a/include/asm-x86/mach-voyager/irq_vectors.h b/include/asm-x86/mach-voyager/irq_vectors.h
index 165421f..64e47f6 100644
--- a/include/asm-x86/mach-voyager/irq_vectors.h
+++ b/include/asm-x86/mach-voyager/irq_vectors.h
@@ -33,6 +33,7 @@
 #define VIC_RESCHEDULE_CPI		4
 #define VIC_ENABLE_IRQ_CPI		5
 #define VIC_CALL_FUNCTION_CPI		6
+#define VIC_CALL_FUNCTION_SINGLE_CPI	7
 
 /* Now the QIC CPIs:  Since we don't need the two initial levels,
  * these are 2 less than the VIC CPIs */
@@ -42,9 +43,10 @@
 #define QIC_RESCHEDULE_CPI		(VIC_RESCHEDULE_CPI - QIC_CPI_OFFSET)
 #define QIC_ENABLE_IRQ_CPI		(VIC_ENABLE_IRQ_CPI - QIC_CPI_OFFSET)
 #define QIC_CALL_FUNCTION_CPI		(VIC_CALL_FUNCTION_CPI - QIC_CPI_OFFSET)
+#define QIC_CALL_FUNCTION_CPI		(VIC_CALL_FUNCTION_SINGLE_CPI - QIC_CPI_OFFSET)
 
 #define VIC_START_FAKE_CPI		VIC_TIMER_CPI
-#define VIC_END_FAKE_CPI		VIC_CALL_FUNCTION_CPI
+#define VIC_END_FAKE_CPI		VIC_CALL_FUNCTION_SINGLE_CPI
 
 /* this is the SYS_INT CPI. */
 #define VIC_SYS_INT			8
diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h
index 1ebaa5c..bba35a1 100644
--- a/include/asm-x86/smp.h
+++ b/include/asm-x86/smp.h
@@ -59,9 +59,9 @@ struct smp_ops {
 
 	void (*smp_send_stop)(void);
 	void (*smp_send_reschedule)(int cpu);
-	int (*smp_call_function_mask)(cpumask_t mask,
-				      void (*func)(void *info), void *info,
-				      int wait);
+
+	void (*send_call_func_ipi)(cpumask_t mask);
+	void (*send_call_func_single_ipi)(int cpu);
 };
 
 /* Globals due to paravirt */
@@ -103,17 +103,22 @@ static inline void smp_send_reschedule(int cpu)
 	smp_ops.smp_send_reschedule(cpu);
 }
 
-static inline int smp_call_function_mask(cpumask_t mask,
-					 void (*func) (void *info), void *info,
-					 int wait)
+static inline void arch_send_call_function_single_ipi(int cpu)
+{
+	smp_ops.send_call_func_single_ipi(cpu);
+}
+
+static inline void arch_send_call_function_ipi(cpumask_t mask)
 {
-	return smp_ops.smp_call_function_mask(mask, func, info, wait);
+	smp_ops.send_call_func_ipi(mask);
 }
 
 void native_smp_prepare_boot_cpu(void);
 void native_smp_prepare_cpus(unsigned int max_cpus);
 void native_smp_cpus_done(unsigned int max_cpus);
 int native_cpu_up(unsigned int cpunum);
+void native_send_call_func_ipi(cpumask_t mask);
+void native_send_call_func_single_ipi(int cpu);
 
 extern int __cpu_disable(void);
 extern void __cpu_die(unsigned int cpu);
diff --git a/include/asm-x86/xen/events.h b/include/asm-x86/xen/events.h
index 596312a..f8d57ea 100644
--- a/include/asm-x86/xen/events.h
+++ b/include/asm-x86/xen/events.h
@@ -4,6 +4,7 @@
 enum ipi_vector {
 	XEN_RESCHEDULE_VECTOR,
 	XEN_CALL_FUNCTION_VECTOR,
+	XEN_CALL_FUNCTION_SINGLE_VECTOR,
 
 	XEN_NR_IPIS,
 };
-- 
1.5.6.rc0.40.gd683

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [PATCH 3/10] powerpc: convert to generic helpers for IPI function calls
  2008-05-29  8:58 [PATCH 0/10] Add generic helpers for arch IPI function calls #4 Jens Axboe
  2008-05-29  8:58 ` [PATCH 1/10] Add generic helpers for arch IPI function calls Jens Axboe
  2008-05-29  8:58 ` [PATCH 2/10] x86: convert to generic helpers for " Jens Axboe
@ 2008-05-29  8:58 ` Jens Axboe
  2008-05-29  8:58 ` [PATCH 4/10] ia64: " Jens Axboe
                   ` (7 subsequent siblings)
  10 siblings, 0 replies; 54+ messages in thread
From: Jens Axboe @ 2008-05-29  8:58 UTC (permalink / raw)
  To: linux-kernel
  Cc: peterz, npiggin, linux-arch, jeremy, mingo, paulmck, Jens Axboe

This converts ppc to use the new helpers for smp_call_function() and
friends, and adds support for smp_call_function_single().

ppc loses the timeout functionality of smp_call_function_mask() with
this change, as the generic code does not provide that.

Acked-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 arch/powerpc/Kconfig                    |    1 +
 arch/powerpc/kernel/smp.c               |  220 ++-----------------------------
 arch/powerpc/platforms/cell/interrupt.c |    1 +
 arch/powerpc/platforms/ps3/smp.c        |    7 +-
 arch/powerpc/platforms/pseries/xics.c   |    6 +-
 arch/powerpc/sysdev/mpic.c              |    2 +-
 include/asm-powerpc/smp.h               |    5 +-
 7 files changed, 23 insertions(+), 219 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 3934e26..852d40c 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -110,6 +110,7 @@ config PPC
 	select HAVE_KPROBES
 	select HAVE_KRETPROBES
 	select HAVE_LMB
+	select USE_GENERIC_SMP_HELPERS if SMP
 
 config EARLY_PRINTK
 	bool
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 1457aa0..cfdb21e 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -72,12 +72,8 @@ struct smp_ops_t *smp_ops;
 
 static volatile unsigned int cpu_callin_map[NR_CPUS];
 
-void smp_call_function_interrupt(void);
-
 int smt_enabled_at_boot = 1;
 
-static int ipi_fail_ok;
-
 static void (*crash_ipi_function_ptr)(struct pt_regs *) = NULL;
 
 #ifdef CONFIG_PPC64
@@ -99,12 +95,15 @@ void smp_message_recv(int msg)
 {
 	switch(msg) {
 	case PPC_MSG_CALL_FUNCTION:
-		smp_call_function_interrupt();
+		generic_smp_call_function_interrupt();
 		break;
 	case PPC_MSG_RESCHEDULE:
 		/* XXX Do we have to do this? */
 		set_need_resched();
 		break;
+	case PPC_MSG_CALL_FUNC_SINGLE:
+		generic_smp_call_function_single_interrupt();
+		break;
 	case PPC_MSG_DEBUGGER_BREAK:
 		if (crash_ipi_function_ptr) {
 			crash_ipi_function_ptr(get_irq_regs());
@@ -154,215 +153,22 @@ static void stop_this_cpu(void *dummy)
 		;
 }
 
-/*
- * Structure and data for smp_call_function(). This is designed to minimise
- * static memory requirements. It also looks cleaner.
- * Stolen from the i386 version.
- */
-static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(call_lock);
-
-static struct call_data_struct {
-	void (*func) (void *info);
-	void *info;
-	atomic_t started;
-	atomic_t finished;
-	int wait;
-} *call_data;
-
-/* delay of at least 8 seconds */
-#define SMP_CALL_TIMEOUT	8
-
-/*
- * These functions send a 'generic call function' IPI to other online
- * CPUS in the system.
- *
- * [SUMMARY] Run a function on other CPUs.
- * <func> The function to run. This must be fast and non-blocking.
- * <info> An arbitrary pointer to pass to the function.
- * <nonatomic> currently unused.
- * <wait> If true, wait (atomically) until function has completed on other CPUs.
- * [RETURNS] 0 on success, else a negative status code. Does not return until
- * remote CPUs are nearly ready to execute <<func>> or are or have executed.
- * <map> is a cpu map of the cpus to send IPI to.
- *
- * You must not call this function with disabled interrupts or from a
- * hardware interrupt handler or from a bottom half handler.
- */
-static int __smp_call_function_map(void (*func) (void *info), void *info,
-				   int nonatomic, int wait, cpumask_t map)
-{
-	struct call_data_struct data;
-	int ret = -1, num_cpus;
-	int cpu;
-	u64 timeout;
-
-	if (unlikely(smp_ops == NULL))
-		return ret;
-
-	data.func = func;
-	data.info = info;
-	atomic_set(&data.started, 0);
-	data.wait = wait;
-	if (wait)
-		atomic_set(&data.finished, 0);
-
-	/* remove 'self' from the map */
-	if (cpu_isset(smp_processor_id(), map))
-		cpu_clear(smp_processor_id(), map);
-
-	/* sanity check the map, remove any non-online processors. */
-	cpus_and(map, map, cpu_online_map);
-
-	num_cpus = cpus_weight(map);
-	if (!num_cpus)
-		goto done;
-
-	call_data = &data;
-	smp_wmb();
-	/* Send a message to all CPUs in the map */
-	for_each_cpu_mask(cpu, map)
-		smp_ops->message_pass(cpu, PPC_MSG_CALL_FUNCTION);
-
-	timeout = get_tb() + (u64) SMP_CALL_TIMEOUT * tb_ticks_per_sec;
-
-	/* Wait for indication that they have received the message */
-	while (atomic_read(&data.started) != num_cpus) {
-		HMT_low();
-		if (get_tb() >= timeout) {
-			printk("smp_call_function on cpu %d: other cpus not "
-				"responding (%d)\n", smp_processor_id(),
-				atomic_read(&data.started));
-			if (!ipi_fail_ok)
-				debugger(NULL);
-			goto out;
-		}
-	}
-
-	/* optionally wait for the CPUs to complete */
-	if (wait) {
-		while (atomic_read(&data.finished) != num_cpus) {
-			HMT_low();
-			if (get_tb() >= timeout) {
-				printk("smp_call_function on cpu %d: other "
-					"cpus not finishing (%d/%d)\n",
-					smp_processor_id(),
-					atomic_read(&data.finished),
-					atomic_read(&data.started));
-				debugger(NULL);
-				goto out;
-			}
-		}
-	}
-
- done:
-	ret = 0;
-
- out:
-	call_data = NULL;
-	HMT_medium();
-	return ret;
-}
-
-static int __smp_call_function(void (*func)(void *info), void *info,
-			       int nonatomic, int wait)
+void arch_send_call_function_single_ipi(int cpu)
 {
-	int ret;
-	spin_lock(&call_lock);
-	ret =__smp_call_function_map(func, info, nonatomic, wait,
-				       cpu_online_map);
-	spin_unlock(&call_lock);
-	return ret;
-}
-
-int smp_call_function(void (*func) (void *info), void *info, int nonatomic,
-			int wait)
-{
-	/* Can deadlock when called with interrupts disabled */
-	WARN_ON(irqs_disabled());
-
-	return __smp_call_function(func, info, nonatomic, wait);
+	smp_ops->message_pass(cpu, PPC_MSG_CALL_FUNC_SINGLE);
 }
-EXPORT_SYMBOL(smp_call_function);
 
-int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
-			     int nonatomic, int wait)
+void arch_send_call_function_ipi(cpumask_t mask)
 {
-	cpumask_t map = CPU_MASK_NONE;
-	int ret = 0;
-
-	/* Can deadlock when called with interrupts disabled */
-	WARN_ON(irqs_disabled());
-
-	if (!cpu_online(cpu))
-		return -EINVAL;
+	unsigned int cpu;
 
-	cpu_set(cpu, map);
-	if (cpu != get_cpu()) {
-		spin_lock(&call_lock);
-		ret = __smp_call_function_map(func, info, nonatomic, wait, map);
-		spin_unlock(&call_lock);
-	} else {
-		local_irq_disable();
-		func(info);
-		local_irq_enable();
-	}
-	put_cpu();
-	return ret;
+	for_each_cpu_mask(cpu, mask)
+		smp_ops->message_pass(cpu, PPC_MSG_CALL_FUNCTION);
 }
-EXPORT_SYMBOL(smp_call_function_single);
 
 void smp_send_stop(void)
 {
-	int nolock;
-
-	/* It's OK to fail sending the IPI, since the alternative is to
-	 * be stuck forever waiting on the other CPU to take the interrupt.
-	 *
-	 * It's better to at least continue and go through reboot, since this
-	 * function is usually called at panic or reboot time in the first
-	 * place.
-	 */
-	ipi_fail_ok = 1;
-
-	/* Don't deadlock in case we got called through panic */
-	nolock = !spin_trylock(&call_lock);
-	__smp_call_function_map(stop_this_cpu, NULL, 1, 0, cpu_online_map);
-	if (!nolock)
-		spin_unlock(&call_lock);
-}
-
-void smp_call_function_interrupt(void)
-{
-	void (*func) (void *info);
-	void *info;
-	int wait;
-
-	/* call_data will be NULL if the sender timed out while
-	 * waiting on us to receive the call.
-	 */
-	if (!call_data)
-		return;
-
-	func = call_data->func;
-	info = call_data->info;
-	wait = call_data->wait;
-
-	if (!wait)
-		smp_mb__before_atomic_inc();
-
-	/*
-	 * Notify initiating CPU that I've grabbed the data and am
-	 * about to execute the function
-	 */
-	atomic_inc(&call_data->started);
-	/*
-	 * At this point the info structure may be out of scope unless wait==1
-	 */
-	(*func)(info);
-	if (wait) {
-		smp_mb__before_atomic_inc();
-		atomic_inc(&call_data->finished);
-	}
+	smp_call_function(stop_this_cpu, NULL, 0, 0);
 }
 
 extern struct gettimeofday_struct do_gtod;
@@ -596,9 +402,9 @@ int __devinit start_secondary(void *unused)
 
 	secondary_cpu_time_init();
 
-	spin_lock(&call_lock);
+	spin_lock(&call_function_lock);
 	cpu_set(cpu, cpu_online_map);
-	spin_unlock(&call_lock);
+	spin_unlock(&call_function_lock);
 
 	local_irq_enable();
 
diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c
index 5bf7df1..2d5bb22 100644
--- a/arch/powerpc/platforms/cell/interrupt.c
+++ b/arch/powerpc/platforms/cell/interrupt.c
@@ -218,6 +218,7 @@ void iic_request_IPIs(void)
 {
 	iic_request_ipi(PPC_MSG_CALL_FUNCTION, "IPI-call");
 	iic_request_ipi(PPC_MSG_RESCHEDULE, "IPI-resched");
+	iic_request_ipi(PPC_MSG_CALL_FUNC_SINGLE, "IPI-call-single");
 #ifdef CONFIG_DEBUGGER
 	iic_request_ipi(PPC_MSG_DEBUGGER_BREAK, "IPI-debug");
 #endif /* CONFIG_DEBUGGER */
diff --git a/arch/powerpc/platforms/ps3/smp.c b/arch/powerpc/platforms/ps3/smp.c
index f0b12f2..a0927a3 100644
--- a/arch/powerpc/platforms/ps3/smp.c
+++ b/arch/powerpc/platforms/ps3/smp.c
@@ -105,9 +105,10 @@ static void __init ps3_smp_setup_cpu(int cpu)
 	 * to index needs to be setup.
 	 */
 
-	BUILD_BUG_ON(PPC_MSG_CALL_FUNCTION  != 0);
-	BUILD_BUG_ON(PPC_MSG_RESCHEDULE     != 1);
-	BUILD_BUG_ON(PPC_MSG_DEBUGGER_BREAK != 3);
+	BUILD_BUG_ON(PPC_MSG_CALL_FUNCTION    != 0);
+	BUILD_BUG_ON(PPC_MSG_RESCHEDULE       != 1);
+	BUILD_BUG_ON(PPC_MSG_CALL_FUNC_SINGLE != 2);
+	BUILD_BUG_ON(PPC_MSG_DEBUGGER_BREAK   != 3);
 
 	for (i = 0; i < MSG_COUNT; i++) {
 		result = ps3_event_receive_port_setup(cpu, &virqs[i]);
diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c
index ebebc28..0fc830f 100644
--- a/arch/powerpc/platforms/pseries/xics.c
+++ b/arch/powerpc/platforms/pseries/xics.c
@@ -383,13 +383,11 @@ static irqreturn_t xics_ipi_dispatch(int cpu)
 			mb();
 			smp_message_recv(PPC_MSG_RESCHEDULE);
 		}
-#if 0
-		if (test_and_clear_bit(PPC_MSG_MIGRATE_TASK,
+		if (test_and_clear_bit(PPC_MSG_CALL_FUNC_SINGLE,
 				       &xics_ipi_message[cpu].value)) {
 			mb();
-			smp_message_recv(PPC_MSG_MIGRATE_TASK);
+			smp_message_recv(PPC_MSG_CALL_FUNC_SINGLE);
 		}
-#endif
 #if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
 		if (test_and_clear_bit(PPC_MSG_DEBUGGER_BREAK,
 				       &xics_ipi_message[cpu].value)) {
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index 7680001..6c90c95 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -1494,7 +1494,7 @@ void mpic_request_ipis(void)
 	static char *ipi_names[] = {
 		"IPI0 (call function)",
 		"IPI1 (reschedule)",
-		"IPI2 (unused)",
+		"IPI2 (call function single)",
 		"IPI3 (debugger break)",
 	};
 	BUG_ON(mpic == NULL);
diff --git a/include/asm-powerpc/smp.h b/include/asm-powerpc/smp.h
index 505f35b..78382f6 100644
--- a/include/asm-powerpc/smp.h
+++ b/include/asm-powerpc/smp.h
@@ -67,10 +67,7 @@ DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
  * in /proc/interrupts will be wrong!!! --Troy */
 #define PPC_MSG_CALL_FUNCTION   0
 #define PPC_MSG_RESCHEDULE      1
-/* This is unused now */
-#if 0
-#define PPC_MSG_MIGRATE_TASK    2
-#endif
+#define PPC_MSG_CALL_FUNC_SINGLE	2
 #define PPC_MSG_DEBUGGER_BREAK  3
 
 void smp_init_iSeries(void);
-- 
1.5.6.rc0.40.gd683

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [PATCH 4/10] ia64: convert to generic helpers for IPI function calls
  2008-05-29  8:58 [PATCH 0/10] Add generic helpers for arch IPI function calls #4 Jens Axboe
                   ` (2 preceding siblings ...)
  2008-05-29  8:58 ` [PATCH 3/10] powerpc: " Jens Axboe
@ 2008-05-29  8:58 ` Jens Axboe
  2008-05-29  8:58 ` [PATCH 5/10] alpha: " Jens Axboe
                   ` (6 subsequent siblings)
  10 siblings, 0 replies; 54+ messages in thread
From: Jens Axboe @ 2008-05-29  8:58 UTC (permalink / raw)
  To: linux-kernel
  Cc: peterz, npiggin, linux-arch, jeremy, mingo, paulmck, Jens Axboe,
	Tony Luck

This converts ia64 to use the new helpers for smp_call_function() and
friends, and adds support for smp_call_function_single().

Cc: Tony Luck <tony.luck@intel.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 arch/ia64/Kconfig      |    1 +
 arch/ia64/kernel/smp.c |  242 +++---------------------------------------------
 include/asm-ia64/smp.h |    3 -
 3 files changed, 16 insertions(+), 230 deletions(-)

diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 16be414..18bcc10 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -303,6 +303,7 @@ config VIRT_CPU_ACCOUNTING
 
 config SMP
 	bool "Symmetric multi-processing support"
+	select USE_GENERIC_SMP_HELPERS
 	help
 	  This enables support for systems with more than one CPU. If you have
 	  a system with only one CPU, say N.  If you have a system with more
diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c
index 983296f..70b7b35 100644
--- a/arch/ia64/kernel/smp.c
+++ b/arch/ia64/kernel/smp.c
@@ -60,25 +60,9 @@ static struct local_tlb_flush_counts {
 
 static DEFINE_PER_CPU(unsigned int, shadow_flush_counts[NR_CPUS]) ____cacheline_aligned;
 
-
-/*
- * Structure and data for smp_call_function(). This is designed to minimise static memory
- * requirements. It also looks cleaner.
- */
-static  __cacheline_aligned DEFINE_SPINLOCK(call_lock);
-
-struct call_data_struct {
-	void (*func) (void *info);
-	void *info;
-	long wait;
-	atomic_t started;
-	atomic_t finished;
-};
-
-static volatile struct call_data_struct *call_data;
-
 #define IPI_CALL_FUNC		0
 #define IPI_CPU_STOP		1
+#define IPI_CALL_FUNC_SINGLE	2
 #define IPI_KDUMP_CPU_STOP	3
 
 /* This needs to be cacheline aligned because it is written to by *other* CPUs.  */
@@ -89,38 +73,13 @@ extern void cpu_halt (void);
 void
 lock_ipi_calllock(void)
 {
-	spin_lock_irq(&call_lock);
+	spin_lock_irq(&call_function_lock);
 }
 
 void
 unlock_ipi_calllock(void)
 {
-	spin_unlock_irq(&call_lock);
-}
-
-static inline void
-handle_call_data(void)
-{
-	struct call_data_struct *data;
-	void (*func)(void *info);
-	void *info;
-	int wait;
-
-	/* release the 'pointer lock' */
-	data = (struct call_data_struct *)call_data;
-	func = data->func;
-	info = data->info;
-	wait = data->wait;
-
-	mb();
-	atomic_inc(&data->started);
-	/* At this point the structure may be gone unless wait is true. */
-	(*func)(info);
-
-	/* Notify the sending CPU that the task is done. */
-	mb();
-	if (wait)
-		atomic_inc(&data->finished);
+	spin_unlock_irq(&call_function_lock);
 }
 
 static void
@@ -163,13 +122,15 @@ handle_IPI (int irq, void *dev_id)
 			ops &= ~(1 << which);
 
 			switch (which) {
-			case IPI_CALL_FUNC:
-				handle_call_data();
-				break;
-
 			case IPI_CPU_STOP:
 				stop_this_cpu();
 				break;
+			case IPI_CALL_FUNC:
+				generic_smp_call_function_interrupt();
+				break;
+			case IPI_CALL_FUNC_SINGLE:
+				generic_smp_call_function_single_interrupt();
+				break;
 #ifdef CONFIG_KEXEC
 			case IPI_KDUMP_CPU_STOP:
 				unw_init_running(kdump_cpu_freeze, NULL);
@@ -187,6 +148,8 @@ handle_IPI (int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
+
+
 /*
  * Called with preemption disabled.
  */
@@ -360,190 +323,15 @@ smp_flush_tlb_mm (struct mm_struct *mm)
 	on_each_cpu((void (*)(void *))local_finish_flush_tlb_mm, mm, 1, 1);
 }
 
-/*
- * Run a function on a specific CPU
- *  <func>	The function to run. This must be fast and non-blocking.
- *  <info>	An arbitrary pointer to pass to the function.
- *  <nonatomic>	Currently unused.
- *  <wait>	If true, wait until function has completed on other CPUs.
- *  [RETURNS]   0 on success, else a negative status code.
- *
- * Does not return until the remote CPU is nearly ready to execute <func>
- * or is or has executed.
- */
-
-int
-smp_call_function_single (int cpuid, void (*func) (void *info), void *info, int nonatomic,
-			  int wait)
-{
-	struct call_data_struct data;
-	int cpus = 1;
-	int me = get_cpu(); /* prevent preemption and reschedule on another processor */
-
-	if (cpuid == me) {
-		local_irq_disable();
-		func(info);
-		local_irq_enable();
-		put_cpu();
-		return 0;
-	}
-
-	data.func = func;
-	data.info = info;
-	atomic_set(&data.started, 0);
-	data.wait = wait;
-	if (wait)
-		atomic_set(&data.finished, 0);
-
-	spin_lock_bh(&call_lock);
-
-	call_data = &data;
-	mb();	/* ensure store to call_data precedes setting of IPI_CALL_FUNC */
-  	send_IPI_single(cpuid, IPI_CALL_FUNC);
-
-	/* Wait for response */
-	while (atomic_read(&data.started) != cpus)
-		cpu_relax();
-
-	if (wait)
-		while (atomic_read(&data.finished) != cpus)
-			cpu_relax();
-	call_data = NULL;
-
-	spin_unlock_bh(&call_lock);
-	put_cpu();
-	return 0;
-}
-EXPORT_SYMBOL(smp_call_function_single);
-
-/**
- * smp_call_function_mask(): Run a function on a set of other CPUs.
- * <mask>	The set of cpus to run on.  Must not include the current cpu.
- * <func> 	The function to run. This must be fast and non-blocking.
- * <info>	An arbitrary pointer to pass to the function.
- * <wait>	If true, wait (atomically) until function
- *		has completed on other CPUs.
- *
- * Returns 0 on success, else a negative status code.
- *
- * If @wait is true, then returns once @func has returned; otherwise
- * it returns just before the target cpu calls @func.
- *
- * You must not call this function with disabled interrupts or from a
- * hardware interrupt handler or from a bottom half handler.
- */
-int smp_call_function_mask(cpumask_t mask,
-			   void (*func)(void *), void *info,
-			   int wait)
+void arch_send_call_function_single_ipi(int cpu)
 {
-	struct call_data_struct data;
-	cpumask_t allbutself;
-	int cpus;
-
-	spin_lock(&call_lock);
-	allbutself = cpu_online_map;
-	cpu_clear(smp_processor_id(), allbutself);
-
-	cpus_and(mask, mask, allbutself);
-	cpus = cpus_weight(mask);
-	if (!cpus) {
-		spin_unlock(&call_lock);
-		return 0;
-	}
-
-	/* Can deadlock when called with interrupts disabled */
-	WARN_ON(irqs_disabled());
-
-	data.func = func;
-	data.info = info;
-	atomic_set(&data.started, 0);
-	data.wait = wait;
-	if (wait)
-		atomic_set(&data.finished, 0);
-
-	call_data = &data;
-	mb(); /* ensure store to call_data precedes setting of IPI_CALL_FUNC*/
-
-	/* Send a message to other CPUs */
-	if (cpus_equal(mask, allbutself))
-		send_IPI_allbutself(IPI_CALL_FUNC);
-	else
-		send_IPI_mask(mask, IPI_CALL_FUNC);
-
-	/* Wait for response */
-	while (atomic_read(&data.started) != cpus)
-		cpu_relax();
-
-	if (wait)
-		while (atomic_read(&data.finished) != cpus)
-			cpu_relax();
-	call_data = NULL;
-
-	spin_unlock(&call_lock);
-	return 0;
-
+	send_IPI_single(cpu, IPI_CALL_FUNC_SINGLE);
 }
-EXPORT_SYMBOL(smp_call_function_mask);
-
-/*
- * this function sends a 'generic call function' IPI to all other CPUs
- * in the system.
- */
 
-/*
- *  [SUMMARY]	Run a function on all other CPUs.
- *  <func>	The function to run. This must be fast and non-blocking.
- *  <info>	An arbitrary pointer to pass to the function.
- *  <nonatomic>	currently unused.
- *  <wait>	If true, wait (atomically) until function has completed on other CPUs.
- *  [RETURNS]   0 on success, else a negative status code.
- *
- * Does not return until remote CPUs are nearly ready to execute <func> or are or have
- * executed.
- *
- * You must not call this function with disabled interrupts or from a
- * hardware interrupt handler or from a bottom half handler.
- */
-int
-smp_call_function (void (*func) (void *info), void *info, int nonatomic, int wait)
+void arch_send_call_function_ipi(cpumask_t mask)
 {
-	struct call_data_struct data;
-	int cpus;
-
-	spin_lock(&call_lock);
-	cpus = num_online_cpus() - 1;
-	if (!cpus) {
-		spin_unlock(&call_lock);
-		return 0;
-	}
-
-	/* Can deadlock when called with interrupts disabled */
-	WARN_ON(irqs_disabled());
-
-	data.func = func;
-	data.info = info;
-	atomic_set(&data.started, 0);
-	data.wait = wait;
-	if (wait)
-		atomic_set(&data.finished, 0);
-
-	call_data = &data;
-	mb();	/* ensure store to call_data precedes setting of IPI_CALL_FUNC */
-	send_IPI_allbutself(IPI_CALL_FUNC);
-
-	/* Wait for response */
-	while (atomic_read(&data.started) != cpus)
-		cpu_relax();
-
-	if (wait)
-		while (atomic_read(&data.finished) != cpus)
-			cpu_relax();
-	call_data = NULL;
-
-	spin_unlock(&call_lock);
-	return 0;
+	send_IPI_mask(mask, IPI_CALL_FUNC);
 }
-EXPORT_SYMBOL(smp_call_function);
 
 /*
  * this function calls the 'stop' function on all other CPUs in the system.
diff --git a/include/asm-ia64/smp.h b/include/asm-ia64/smp.h
index ec5f355..4fa733d 100644
--- a/include/asm-ia64/smp.h
+++ b/include/asm-ia64/smp.h
@@ -38,9 +38,6 @@ ia64_get_lid (void)
 	return lid.f.id << 8 | lid.f.eid;
 }
 
-extern int smp_call_function_mask(cpumask_t mask, void (*func)(void *),
-				  void *info, int wait);
-
 #define hard_smp_processor_id()		ia64_get_lid()
 
 #ifdef CONFIG_SMP
-- 
1.5.6.rc0.40.gd683

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [PATCH 5/10] alpha: convert to generic helpers for IPI function calls
  2008-05-29  8:58 [PATCH 0/10] Add generic helpers for arch IPI function calls #4 Jens Axboe
                   ` (3 preceding siblings ...)
  2008-05-29  8:58 ` [PATCH 4/10] ia64: " Jens Axboe
@ 2008-05-29  8:58 ` Jens Axboe
  2008-05-29  8:58 ` [PATCH 6/10] arm: " Jens Axboe
                   ` (5 subsequent siblings)
  10 siblings, 0 replies; 54+ messages in thread
From: Jens Axboe @ 2008-05-29  8:58 UTC (permalink / raw)
  To: linux-kernel
  Cc: peterz, npiggin, linux-arch, jeremy, mingo, paulmck, Jens Axboe

This converts alpha to use the new helpers for smp_call_function() and
friends, and adds support for smp_call_function_single().

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 arch/alpha/Kconfig              |    1 +
 arch/alpha/kernel/core_marvel.c |    6 +-
 arch/alpha/kernel/smp.c         |  170 +++------------------------------------
 include/asm-alpha/smp.h         |    2 -
 4 files changed, 14 insertions(+), 165 deletions(-)

diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 729cdbd..dbe8c28 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -528,6 +528,7 @@ config ARCH_MAY_HAVE_PC_FDC
 config SMP
 	bool "Symmetric multi-processing support"
 	depends on ALPHA_SABLE || ALPHA_LYNX || ALPHA_RAWHIDE || ALPHA_DP264 || ALPHA_WILDFIRE || ALPHA_TITAN || ALPHA_GENERIC || ALPHA_SHARK || ALPHA_MARVEL
+	select USE_GENERIC_SMP_HELPERS
 	---help---
 	  This enables support for systems with more than one CPU. If you have
 	  a system with only one CPU, like most personal computers, say N. If
diff --git a/arch/alpha/kernel/core_marvel.c b/arch/alpha/kernel/core_marvel.c
index b04f1fe..ced4aae 100644
--- a/arch/alpha/kernel/core_marvel.c
+++ b/arch/alpha/kernel/core_marvel.c
@@ -660,9 +660,9 @@ __marvel_rtc_io(u8 b, unsigned long addr, int write)
 
 #ifdef CONFIG_SMP
 		if (smp_processor_id() != boot_cpuid)
-			smp_call_function_on_cpu(__marvel_access_rtc,
-						 &rtc_access, 1, 1,
-						 cpumask_of_cpu(boot_cpuid));
+			smp_call_function_single(boot_cpuid,
+						 __marvel_access_rtc,
+						 &rtc_access, 1, 1);
 		else
 			__marvel_access_rtc(&rtc_access);
 #else
diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index 2525692..95c905b 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -62,6 +62,7 @@ static struct {
 enum ipi_message_type {
 	IPI_RESCHEDULE,
 	IPI_CALL_FUNC,
+	IPI_CALL_FUNC_SINGLE,
 	IPI_CPU_STOP,
 };
 
@@ -558,51 +559,6 @@ send_ipi_message(cpumask_t to_whom, enum ipi_message_type operation)
 		wripir(i);
 }
 
-/* Structure and data for smp_call_function.  This is designed to 
-   minimize static memory requirements.  Plus it looks cleaner.  */
-
-struct smp_call_struct {
-	void (*func) (void *info);
-	void *info;
-	long wait;
-	atomic_t unstarted_count;
-	atomic_t unfinished_count;
-};
-
-static struct smp_call_struct *smp_call_function_data;
-
-/* Atomicly drop data into a shared pointer.  The pointer is free if
-   it is initially locked.  If retry, spin until free.  */
-
-static int
-pointer_lock (void *lock, void *data, int retry)
-{
-	void *old, *tmp;
-
-	mb();
- again:
-	/* Compare and swap with zero.  */
-	asm volatile (
-	"1:	ldq_l	%0,%1\n"
-	"	mov	%3,%2\n"
-	"	bne	%0,2f\n"
-	"	stq_c	%2,%1\n"
-	"	beq	%2,1b\n"
-	"2:"
-	: "=&r"(old), "=m"(*(void **)lock), "=&r"(tmp)
-	: "r"(data)
-	: "memory");
-
-	if (old == 0)
-		return 0;
-	if (! retry)
-		return -EBUSY;
-
-	while (*(void **)lock)
-		barrier();
-	goto again;
-}
-
 void
 handle_ipi(struct pt_regs *regs)
 {
@@ -632,31 +588,12 @@ handle_ipi(struct pt_regs *regs)
 			break;
 
 		case IPI_CALL_FUNC:
-		    {
-			struct smp_call_struct *data;
-			void (*func)(void *info);
-			void *info;
-			int wait;
-
-			data = smp_call_function_data;
-			func = data->func;
-			info = data->info;
-			wait = data->wait;
-
-			/* Notify the sending CPU that the data has been
-			   received, and execution is about to begin.  */
-			mb();
-			atomic_dec (&data->unstarted_count);
-
-			/* At this point the structure may be gone unless
-			   wait is true.  */
-			(*func)(info);
-
-			/* Notify the sending CPU that the task is done.  */
-			mb();
-			if (wait) atomic_dec (&data->unfinished_count);
+			generic_smp_call_function_interrupt();
+			break;
+
+		case IPI_CALL_FUNC_SINGLE:
+			generic_smp_call_function_single_interrupt();
 			break;
-		    }
 
 		case IPI_CPU_STOP:
 			halt();
@@ -700,102 +637,15 @@ smp_send_stop(void)
 	send_ipi_message(to_whom, IPI_CPU_STOP);
 }
 
-/*
- * Run a function on all other CPUs.
- *  <func>	The function to run. This must be fast and non-blocking.
- *  <info>	An arbitrary pointer to pass to the function.
- *  <retry>	If true, keep retrying until ready.
- *  <wait>	If true, wait until function has completed on other CPUs.
- *  [RETURNS]   0 on success, else a negative status code.
- *
- * Does not return until remote CPUs are nearly ready to execute <func>
- * or are or have executed.
- * You must not call this function with disabled interrupts or from a
- * hardware interrupt handler or from a bottom half handler.
- */
-
-int
-smp_call_function_on_cpu (void (*func) (void *info), void *info, int retry,
-			  int wait, cpumask_t to_whom)
+void arch_send_call_function_ipi(cpumask_t mask)
 {
-	struct smp_call_struct data;
-	unsigned long timeout;
-	int num_cpus_to_call;
-	
-	/* Can deadlock when called with interrupts disabled */
-	WARN_ON(irqs_disabled());
-
-	data.func = func;
-	data.info = info;
-	data.wait = wait;
-
-	cpu_clear(smp_processor_id(), to_whom);
-	num_cpus_to_call = cpus_weight(to_whom);
-
-	atomic_set(&data.unstarted_count, num_cpus_to_call);
-	atomic_set(&data.unfinished_count, num_cpus_to_call);
-
-	/* Acquire the smp_call_function_data mutex.  */
-	if (pointer_lock(&smp_call_function_data, &data, retry))
-		return -EBUSY;
-
-	/* Send a message to the requested CPUs.  */
-	send_ipi_message(to_whom, IPI_CALL_FUNC);
-
-	/* Wait for a minimal response.  */
-	timeout = jiffies + HZ;
-	while (atomic_read (&data.unstarted_count) > 0
-	       && time_before (jiffies, timeout))
-		barrier();
-
-	/* If there's no response yet, log a message but allow a longer
-	 * timeout period -- if we get a response this time, log
-	 * a message saying when we got it.. 
-	 */
-	if (atomic_read(&data.unstarted_count) > 0) {
-		long start_time = jiffies;
-		printk(KERN_ERR "%s: initial timeout -- trying long wait\n",
-		       __func__);
-		timeout = jiffies + 30 * HZ;
-		while (atomic_read(&data.unstarted_count) > 0
-		       && time_before(jiffies, timeout))
-			barrier();
-		if (atomic_read(&data.unstarted_count) <= 0) {
-			long delta = jiffies - start_time;
-			printk(KERN_ERR 
-			       "%s: response %ld.%ld seconds into long wait\n",
-			       __func__, delta / HZ,
-			       (100 * (delta - ((delta / HZ) * HZ))) / HZ);
-		}
-	}
-
-	/* We either got one or timed out -- clear the lock. */
-	mb();
-	smp_call_function_data = NULL;
-
-	/* 
-	 * If after both the initial and long timeout periods we still don't
-	 * have a response, something is very wrong...
-	 */
-	BUG_ON(atomic_read (&data.unstarted_count) > 0);
-
-	/* Wait for a complete response, if needed.  */
-	if (wait) {
-		while (atomic_read (&data.unfinished_count) > 0)
-			barrier();
-	}
-
-	return 0;
+	send_ipi_message(mask, IPI_CALL_FUNC);
 }
-EXPORT_SYMBOL(smp_call_function_on_cpu);
 
-int
-smp_call_function (void (*func) (void *info), void *info, int retry, int wait)
+void arch_send_call_function_single_ipi(int cpu)
 {
-	return smp_call_function_on_cpu (func, info, retry, wait,
-					 cpu_online_map);
+	send_ipi_message(cpumask_of_cpu(cpu), IPI_CALL_FUNC_SINGLE);
 }
-EXPORT_SYMBOL(smp_call_function);
 
 static void
 ipi_imb(void *ignored)
diff --git a/include/asm-alpha/smp.h b/include/asm-alpha/smp.h
index 286e1d8..a9090b6 100644
--- a/include/asm-alpha/smp.h
+++ b/include/asm-alpha/smp.h
@@ -47,8 +47,6 @@ extern struct cpuinfo_alpha cpu_data[NR_CPUS];
 extern int smp_num_cpus;
 #define cpu_possible_map	cpu_present_map
 
-int smp_call_function_on_cpu(void (*func) (void *info), void *info,int retry, int wait, cpumask_t cpu);
-
 #else /* CONFIG_SMP */
 
 #define hard_smp_processor_id()		0
-- 
1.5.6.rc0.40.gd683

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [PATCH 6/10] arm: convert to generic helpers for IPI function calls
  2008-05-29  8:58 [PATCH 0/10] Add generic helpers for arch IPI function calls #4 Jens Axboe
                   ` (4 preceding siblings ...)
  2008-05-29  8:58 ` [PATCH 5/10] alpha: " Jens Axboe
@ 2008-05-29  8:58 ` Jens Axboe
  2008-06-02 12:29   ` Russell King
  2008-05-29  8:58 ` [PATCH 7/10] m32r: " Jens Axboe
                   ` (4 subsequent siblings)
  10 siblings, 1 reply; 54+ messages in thread
From: Jens Axboe @ 2008-05-29  8:58 UTC (permalink / raw)
  To: linux-kernel
  Cc: peterz, npiggin, linux-arch, jeremy, mingo, paulmck, Jens Axboe,
	Russell King

This converts arm to use the new helpers for smp_call_function() and
friends, and adds support for smp_call_function_single().

Fixups and testing done by Catalin Marinas <catalin.marinas@arm.com>

Cc: Russell King <rmk@arm.linux.org.uk>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 arch/arm/Kconfig      |    1 +
 arch/arm/kernel/smp.c |  157 +++++--------------------------------------------
 2 files changed, 16 insertions(+), 142 deletions(-)

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index b786e68..c72dae6 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -650,6 +650,7 @@ source "kernel/time/Kconfig"
 config SMP
 	bool "Symmetric Multi-Processing (EXPERIMENTAL)"
 	depends on EXPERIMENTAL && (REALVIEW_EB_ARM11MP || MACH_REALVIEW_PB11MP)
+	select USE_GENERIC_SMP_HELPERS
 	help
 	  This enables support for systems with more than one CPU. If you have
 	  a system with only one CPU, like most personal computers, say N. If
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index eefae1d..6344466 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -68,20 +68,10 @@ enum ipi_msg_type {
 	IPI_TIMER,
 	IPI_RESCHEDULE,
 	IPI_CALL_FUNC,
+	IPI_CALL_FUNC_SINGLE,
 	IPI_CPU_STOP,
 };
 
-struct smp_call_struct {
-	void (*func)(void *info);
-	void *info;
-	int wait;
-	cpumask_t pending;
-	cpumask_t unfinished;
-};
-
-static struct smp_call_struct * volatile smp_call_function_data;
-static DEFINE_SPINLOCK(smp_call_function_lock);
-
 int __cpuinit __cpu_up(unsigned int cpu)
 {
 	struct cpuinfo_arm *ci = &per_cpu(cpu_data, cpu);
@@ -366,114 +356,15 @@ static void send_ipi_message(cpumask_t callmap, enum ipi_msg_type msg)
 	local_irq_restore(flags);
 }
 
-/*
- * You must not call this function with disabled interrupts, from a
- * hardware interrupt handler, nor from a bottom half handler.
- */
-static int smp_call_function_on_cpu(void (*func)(void *info), void *info,
-				    int retry, int wait, cpumask_t callmap)
-{
-	struct smp_call_struct data;
-	unsigned long timeout;
-	int ret = 0;
-
-	data.func = func;
-	data.info = info;
-	data.wait = wait;
-
-	cpu_clear(smp_processor_id(), callmap);
-	if (cpus_empty(callmap))
-		goto out;
-
-	data.pending = callmap;
-	if (wait)
-		data.unfinished = callmap;
-
-	/*
-	 * try to get the mutex on smp_call_function_data
-	 */
-	spin_lock(&smp_call_function_lock);
-	smp_call_function_data = &data;
-
-	send_ipi_message(callmap, IPI_CALL_FUNC);
-
-	timeout = jiffies + HZ;
-	while (!cpus_empty(data.pending) && time_before(jiffies, timeout))
-		barrier();
-
-	/*
-	 * did we time out?
-	 */
-	if (!cpus_empty(data.pending)) {
-		/*
-		 * this may be causing our panic - report it
-		 */
-		printk(KERN_CRIT
-		       "CPU%u: smp_call_function timeout for %p(%p)\n"
-		       "      callmap %lx pending %lx, %swait\n",
-		       smp_processor_id(), func, info, *cpus_addr(callmap),
-		       *cpus_addr(data.pending), wait ? "" : "no ");
-
-		/*
-		 * TRACE
-		 */
-		timeout = jiffies + (5 * HZ);
-		while (!cpus_empty(data.pending) && time_before(jiffies, timeout))
-			barrier();
-
-		if (cpus_empty(data.pending))
-			printk(KERN_CRIT "     RESOLVED\n");
-		else
-			printk(KERN_CRIT "     STILL STUCK\n");
-	}
-
-	/*
-	 * whatever happened, we're done with the data, so release it
-	 */
-	smp_call_function_data = NULL;
-	spin_unlock(&smp_call_function_lock);
-
-	if (!cpus_empty(data.pending)) {
-		ret = -ETIMEDOUT;
-		goto out;
-	}
-
-	if (wait)
-		while (!cpus_empty(data.unfinished))
-			barrier();
- out:
-
-	return 0;
-}
-
-int smp_call_function(void (*func)(void *info), void *info, int retry,
-                      int wait)
+void arch_send_call_function_ipi(cpumask_t mask)
 {
-	return smp_call_function_on_cpu(func, info, retry, wait,
-					cpu_online_map);
+	send_ipi_message(mask, IPI_CALL_FUNC);
 }
-EXPORT_SYMBOL_GPL(smp_call_function);
 
-int smp_call_function_single(int cpu, void (*func)(void *info), void *info,
-			     int retry, int wait)
+void arch_send_call_function_single_ipi(int cpu)
 {
-	/* prevent preemption and reschedule on another processor */
-	int current_cpu = get_cpu();
-	int ret = 0;
-
-	if (cpu == current_cpu) {
-		local_irq_disable();
-		func(info);
-		local_irq_enable();
-	} else
-		ret = smp_call_function_on_cpu(func, info, retry, wait,
-					       cpumask_of_cpu(cpu));
-
-	put_cpu();
-
-	return ret;
+	send_ipi_message(cpumask_of_cpu(cpu), IPI_CALL_FUNC_SINGLE);
 }
-EXPORT_SYMBOL_GPL(smp_call_function_single);
 
 void show_ipi_list(struct seq_file *p)
 {
@@ -521,27 +412,6 @@ asmlinkage void __exception do_local_timer(struct pt_regs *regs)
 }
 #endif
 
-/*
- * ipi_call_function - handle IPI from smp_call_function()
- *
- * Note that we copy data out of the cross-call structure and then
- * let the caller know that we're here and have done with their data
- */
-static void ipi_call_function(unsigned int cpu)
-{
-	struct smp_call_struct *data = smp_call_function_data;
-	void (*func)(void *info) = data->func;
-	void *info = data->info;
-	int wait = data->wait;
-
-	cpu_clear(cpu, data->pending);
-
-	func(info);
-
-	if (wait)
-		cpu_clear(cpu, data->unfinished);
-}
-
 static DEFINE_SPINLOCK(stop_lock);
 
 /*
@@ -611,7 +481,11 @@ asmlinkage void __exception do_IPI(struct pt_regs *regs)
 				break;
 
 			case IPI_CALL_FUNC:
-				ipi_call_function(cpu);
+				generic_smp_call_function_interrupt();
+				break;
+
+			case IPI_CALL_FUNC_SINGLE:
+				generic_smp_call_function_single_interrupt();
 				break;
 
 			case IPI_CPU_STOP:
@@ -662,14 +536,13 @@ int setup_profiling_timer(unsigned int multiplier)
 }
 
 static int
-on_each_cpu_mask(void (*func)(void *), void *info, int retry, int wait,
-		 cpumask_t mask)
+on_each_cpu_mask(void (*func)(void *), void *info, int wait, cpumask_t mask)
 {
 	int ret = 0;
 
 	preempt_disable();
 
-	ret = smp_call_function_on_cpu(func, info, retry, wait, mask);
+	ret = smp_call_function_mask(mask, func, info, wait);
 	if (cpu_isset(smp_processor_id(), mask))
 		func(info);
 
@@ -738,7 +611,7 @@ void flush_tlb_mm(struct mm_struct *mm)
 {
 	cpumask_t mask = mm->cpu_vm_mask;
 
-	on_each_cpu_mask(ipi_flush_tlb_mm, mm, 1, 1, mask);
+	on_each_cpu_mask(ipi_flush_tlb_mm, mm, 1, mask);
 }
 
 void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
@@ -749,7 +622,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
 	ta.ta_vma = vma;
 	ta.ta_start = uaddr;
 
-	on_each_cpu_mask(ipi_flush_tlb_page, &ta, 1, 1, mask);
+	on_each_cpu_mask(ipi_flush_tlb_page, &ta, 1, mask);
 }
 
 void flush_tlb_kernel_page(unsigned long kaddr)
@@ -771,7 +644,7 @@ void flush_tlb_range(struct vm_area_struct *vma,
 	ta.ta_start = start;
 	ta.ta_end = end;
 
-	on_each_cpu_mask(ipi_flush_tlb_range, &ta, 1, 1, mask);
+	on_each_cpu_mask(ipi_flush_tlb_range, &ta, 1, mask);
 }
 
 void flush_tlb_kernel_range(unsigned long start, unsigned long end)
-- 
1.5.6.rc0.40.gd683

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [PATCH 7/10] m32r: convert to generic helpers for IPI function calls
  2008-05-29  8:58 [PATCH 0/10] Add generic helpers for arch IPI function calls #4 Jens Axboe
                   ` (5 preceding siblings ...)
  2008-05-29  8:58 ` [PATCH 6/10] arm: " Jens Axboe
@ 2008-05-29  8:58 ` Jens Axboe
  2008-05-29  8:58 ` [PATCH 8/10] mips: " Jens Axboe
                   ` (3 subsequent siblings)
  10 siblings, 0 replies; 54+ messages in thread
From: Jens Axboe @ 2008-05-29  8:58 UTC (permalink / raw)
  To: linux-kernel
  Cc: peterz, npiggin, linux-arch, jeremy, mingo, paulmck, Jens Axboe,
	Hirokazu Takata

This converts m32r to use the new helpers for smp_call_function() and
friends, and adds support for smp_call_function_single(). Not tested,
not even compiled.

Cc: Hirokazu Takata <takata@linux-m32r.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 arch/m32r/Kconfig             |    1 +
 arch/m32r/kernel/m32r_ksyms.c |    3 -
 arch/m32r/kernel/smp.c        |  128 ++++------------------------------------
 arch/m32r/kernel/traps.c      |    3 +-
 include/asm-m32r/smp.h        |    1 +
 5 files changed, 17 insertions(+), 119 deletions(-)

diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig
index de153de..a5f864c 100644
--- a/arch/m32r/Kconfig
+++ b/arch/m32r/Kconfig
@@ -296,6 +296,7 @@ config PREEMPT
 
 config SMP
 	bool "Symmetric multi-processing support"
+	select USE_GENERIC_SMP_HELPERS
 	---help---
 	  This enables support for systems with more than one CPU. If you have
 	  a system with only one CPU, like most personal computers, say N. If
diff --git a/arch/m32r/kernel/m32r_ksyms.c b/arch/m32r/kernel/m32r_ksyms.c
index e6709fe..16bcb18 100644
--- a/arch/m32r/kernel/m32r_ksyms.c
+++ b/arch/m32r/kernel/m32r_ksyms.c
@@ -43,9 +43,6 @@ EXPORT_SYMBOL(dcache_dummy);
 #endif
 EXPORT_SYMBOL(cpu_data);
 
-/* Global SMP stuff */
-EXPORT_SYMBOL(smp_call_function);
-
 /* TLB flushing */
 EXPORT_SYMBOL(smp_flush_tlb_page);
 #endif
diff --git a/arch/m32r/kernel/smp.c b/arch/m32r/kernel/smp.c
index c837bc1..74eb7bc 100644
--- a/arch/m32r/kernel/smp.c
+++ b/arch/m32r/kernel/smp.c
@@ -35,22 +35,6 @@
 /*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*/
 
 /*
- * Structure and data for smp_call_function(). This is designed to minimise
- * static memory requirements. It also looks cleaner.
- */
-static DEFINE_SPINLOCK(call_lock);
-
-struct call_data_struct {
-	void (*func) (void *info);
-	void *info;
-	atomic_t started;
-	atomic_t finished;
-	int wait;
-} __attribute__ ((__aligned__(SMP_CACHE_BYTES)));
-
-static struct call_data_struct *call_data;
-
-/*
  * For flush_cache_all()
  */
 static DEFINE_SPINLOCK(flushcache_lock);
@@ -96,9 +80,6 @@ void smp_invalidate_interrupt(void);
 void smp_send_stop(void);
 static void stop_this_cpu(void *);
 
-int smp_call_function(void (*) (void *), void *, int, int);
-void smp_call_function_interrupt(void);
-
 void smp_send_timer(void);
 void smp_ipi_timer_interrupt(struct pt_regs *);
 void smp_local_timer_interrupt(void);
@@ -565,86 +546,14 @@ static void stop_this_cpu(void *dummy)
 	for ( ; ; );
 }
 
-/*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*/
-/* Call function Routines                                                    */
-/*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*/
-
-/*==========================================================================*
- * Name:         smp_call_function
- *
- * Description:  This routine sends a 'CALL_FUNCTION_IPI' to all other CPUs
- *               in the system.
- *
- * Born on Date: 2002.02.05
- *
- * Arguments:    *func - The function to run. This must be fast and
- *                       non-blocking.
- *               *info - An arbitrary pointer to pass to the function.
- *               nonatomic - currently unused.
- *               wait - If true, wait (atomically) until function has
- *                      completed on other CPUs.
- *
- * Returns:      0 on success, else a negative status code. Does not return
- *               until remote CPUs are nearly ready to execute <<func>> or
- *               are or have executed.
- *
- * Cautions:     You must not call this function with disabled interrupts or
- *               from a hardware interrupt handler, you may call it from a
- *               bottom half handler.
- *
- * Modification log:
- * Date       Who Description
- * ---------- --- --------------------------------------------------------
- *
- *==========================================================================*/
-int smp_call_function(void (*func) (void *info), void *info, int nonatomic,
-	int wait)
+void arch_send_call_function_ipi(cpumask_t mask)
 {
-	struct call_data_struct data;
-	int cpus;
-
-#ifdef DEBUG_SMP
-	unsigned long flags;
-	__save_flags(flags);
-	if (!(flags & 0x0040))	/* Interrupt Disable NONONO */
-		BUG();
-#endif /* DEBUG_SMP */
-
-	/* Holding any lock stops cpus from going down. */
-	spin_lock(&call_lock);
-	cpus = num_online_cpus() - 1;
-
-	if (!cpus) {
-		spin_unlock(&call_lock);
-		return 0;
-	}
-
-	/* Can deadlock when called with interrupts disabled */
-	WARN_ON(irqs_disabled());
-
-	data.func = func;
-	data.info = info;
-	atomic_set(&data.started, 0);
-	data.wait = wait;
-	if (wait)
-		atomic_set(&data.finished, 0);
-
-	call_data = &data;
-	mb();
-
-	/* Send a message to all other CPUs and wait for them to respond */
-	send_IPI_allbutself(CALL_FUNCTION_IPI, 0);
-
-	/* Wait for response */
-	while (atomic_read(&data.started) != cpus)
-		barrier();
-
-	if (wait)
-		while (atomic_read(&data.finished) != cpus)
-			barrier();
-	spin_unlock(&call_lock);
+	send_IPI_mask(mask, CALL_FUNCTION_IPI, 0);
+}
 
-	return 0;
+void arch_send_call_function_single_ipi(int cpu)
+{
+	send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNC_SINGLE_IPI, 0);
 }
 
 /*==========================================================================*
@@ -666,27 +575,16 @@ int smp_call_function(void (*func) (void *info), void *info, int nonatomic,
  *==========================================================================*/
 void smp_call_function_interrupt(void)
 {
-	void (*func) (void *info) = call_data->func;
-	void *info = call_data->info;
-	int wait = call_data->wait;
-
-	/*
-	 * Notify initiating CPU that I've grabbed the data and am
-	 * about to execute the function
-	 */
-	mb();
-	atomic_inc(&call_data->started);
-	/*
-	 * At this point the info structure may be out of scope unless wait==1
-	 */
 	irq_enter();
-	(*func)(info);
+	generic_smp_call_function_interrupt();
 	irq_exit();
+}
 
-	if (wait) {
-		mb();
-		atomic_inc(&call_data->finished);
-	}
+void smp_call_function_single_interrupt(void)
+{
+	irq_enter();
+	generic_smp_call_function_single_interrupt();
+	irq_exit();
 }
 
 /*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*/
diff --git a/arch/m32r/kernel/traps.c b/arch/m32r/kernel/traps.c
index 89ba4a0..46159a4 100644
--- a/arch/m32r/kernel/traps.c
+++ b/arch/m32r/kernel/traps.c
@@ -40,6 +40,7 @@ extern void smp_invalidate_interrupt(void);
 extern void smp_call_function_interrupt(void);
 extern void smp_ipi_timer_interrupt(void);
 extern void smp_flush_cache_all_interrupt(void);
+extern void smp_call_function_single_interrupt(void);
 
 /*
  * for Boot AP function
@@ -103,7 +104,7 @@ void	set_eit_vector_entries(void)
 	eit_vector[186] = (unsigned long)smp_call_function_interrupt;
 	eit_vector[187] = (unsigned long)smp_ipi_timer_interrupt;
 	eit_vector[188] = (unsigned long)smp_flush_cache_all_interrupt;
-	eit_vector[189] = 0;
+	eit_vector[189] = (unsigned long)smp_call_function_single_interrupt;
 	eit_vector[190] = 0;
 	eit_vector[191] = 0;
 #endif
diff --git a/include/asm-m32r/smp.h b/include/asm-m32r/smp.h
index 078e1a5..6a7f3af 100644
--- a/include/asm-m32r/smp.h
+++ b/include/asm-m32r/smp.h
@@ -104,6 +104,7 @@ extern unsigned long send_IPI_mask_phys(cpumask_t, int, int);
 #define LOCAL_TIMER_IPI		(M32R_IRQ_IPI3-M32R_IRQ_IPI0)
 #define INVALIDATE_CACHE_IPI	(M32R_IRQ_IPI4-M32R_IRQ_IPI0)
 #define CPU_BOOT_IPI		(M32R_IRQ_IPI5-M32R_IRQ_IPI0)
+#define CALL_FUNC_SINGLE_IPI	(M32R_IRQ_IPI6-M32R_IRQ_IPI0)
 
 #define IPI_SHIFT	(0)
 #define NR_IPIS		(8)
-- 
1.5.6.rc0.40.gd683

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [PATCH 8/10] mips: convert to generic helpers for IPI function calls
  2008-05-29  8:58 [PATCH 0/10] Add generic helpers for arch IPI function calls #4 Jens Axboe
                   ` (6 preceding siblings ...)
  2008-05-29  8:58 ` [PATCH 7/10] m32r: " Jens Axboe
@ 2008-05-29  8:58 ` Jens Axboe
  2008-05-29 14:20   ` Ralf Baechle
  2008-05-29  8:58 ` [PATCH 9/10] parisc: " Jens Axboe
                   ` (2 subsequent siblings)
  10 siblings, 1 reply; 54+ messages in thread
From: Jens Axboe @ 2008-05-29  8:58 UTC (permalink / raw)
  To: linux-kernel
  Cc: peterz, npiggin, linux-arch, jeremy, mingo, paulmck, Jens Axboe,
	Ralf Baechle

This converts mips to use the new helpers for smp_call_function() and
friends, and adds support for smp_call_function_single(). Not tested,
but it compiles.

mips shares the same IPI for smp_call_function() and
smp_call_function_single(), since not all hardware has enough available
IPIs available to support seperate setups.

Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 arch/mips/Kconfig       |    1 +
 arch/mips/kernel/smp.c  |  139 ++++-------------------------------------------
 arch/mips/kernel/smtc.c |    1 -
 include/asm-mips/smp.h  |   10 ----
 4 files changed, 12 insertions(+), 139 deletions(-)

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index e5a7c5d..ea70d5a 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1763,6 +1763,7 @@ config SMP
 	bool "Multi-Processing support"
 	depends on SYS_SUPPORTS_SMP
 	select IRQ_PER_CPU
+	select USE_GENERIC_SMP_HELPERS
 	help
 	  This enables support for systems with more than one CPU. If you have
 	  a system with only one CPU, like most personal computers, say N. If
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 63370cd..c75b26c 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -131,145 +131,28 @@ asmlinkage __cpuinit void start_secondary(void)
 	cpu_idle();
 }
 
-DEFINE_SPINLOCK(smp_call_lock);
-
-struct call_data_struct *call_data;
-
-/*
- * Run a function on all other CPUs.
- *
- *  <mask>	cpuset_t of all processors to run the function on.
- *  <func>      The function to run. This must be fast and non-blocking.
- *  <info>      An arbitrary pointer to pass to the function.
- *  <retry>     If true, keep retrying until ready.
- *  <wait>      If true, wait until function has completed on other CPUs.
- *  [RETURNS]   0 on success, else a negative status code.
- *
- * Does not return until remote CPUs are nearly ready to execute <func>
- * or are or have executed.
- *
- * You must not call this function with disabled interrupts or from a
- * hardware interrupt handler or from a bottom half handler:
- *
- * CPU A                               CPU B
- * Disable interrupts
- *                                     smp_call_function()
- *                                     Take call_lock
- *                                     Send IPIs
- *                                     Wait for all cpus to acknowledge IPI
- *                                     CPU A has not responded, spin waiting
- *                                     for cpu A to respond, holding call_lock
- * smp_call_function()
- * Spin waiting for call_lock
- * Deadlock                            Deadlock
- */
-int smp_call_function_mask(cpumask_t mask, void (*func) (void *info),
-	void *info, int retry, int wait)
+void arch_send_call_function_ipi(cpumask_t mask)
 {
-	struct call_data_struct data;
-	int cpu = smp_processor_id();
-	int cpus;
-
-	/*
-	 * Can die spectacularly if this CPU isn't yet marked online
-	 */
-	BUG_ON(!cpu_online(cpu));
-
-	cpu_clear(cpu, mask);
-	cpus = cpus_weight(mask);
-	if (!cpus)
-		return 0;
-
-	/* Can deadlock when called with interrupts disabled */
-	WARN_ON(irqs_disabled());
-
-	data.func = func;
-	data.info = info;
-	atomic_set(&data.started, 0);
-	data.wait = wait;
-	if (wait)
-		atomic_set(&data.finished, 0);
-
-	spin_lock(&smp_call_lock);
-	call_data = &data;
-	smp_mb();
-
-	/* Send a message to all other CPUs and wait for them to respond */
 	mp_ops->send_ipi_mask(mask, SMP_CALL_FUNCTION);
-
-	/* Wait for response */
-	/* FIXME: lock-up detection, backtrace on lock-up */
-	while (atomic_read(&data.started) != cpus)
-		barrier();
-
-	if (wait)
-		while (atomic_read(&data.finished) != cpus)
-			barrier();
-	call_data = NULL;
-	spin_unlock(&smp_call_lock);
-
-	return 0;
 }
 
-int smp_call_function(void (*func) (void *info), void *info, int retry,
-	int wait)
+/*
+ * We reuse the same vector for the single IPI
+ */
+void arch_send_call_function_single_ipi(int cpu)
 {
-	return smp_call_function_mask(cpu_online_map, func, info, retry, wait);
+	mp_ops->send_ipi_mask(cpumask_of_cpu(cpu), SMP_CALL_FUNCTION);
 }
 
+/*
+ * Call into both interrupt handlers, as we share the IPI for them
+ */
 void smp_call_function_interrupt(void)
 {
-	void (*func) (void *info) = call_data->func;
-	void *info = call_data->info;
-	int wait = call_data->wait;
-
-	/*
-	 * Notify initiating CPU that I've grabbed the data and am
-	 * about to execute the function.
-	 */
-	smp_mb();
-	atomic_inc(&call_data->started);
-
-	/*
-	 * At this point the info structure may be out of scope unless wait==1.
-	 */
 	irq_enter();
-	(*func)(info);
+	generic_smp_call_function_single_interrupt();
+	generic_smp_call_function_interrupt();
 	irq_exit();
-
-	if (wait) {
-		smp_mb();
-		atomic_inc(&call_data->finished);
-	}
-}
-
-int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
-			     int retry, int wait)
-{
-	int ret, me;
-
-	/*
-	 * Can die spectacularly if this CPU isn't yet marked online
-	 */
-	if (!cpu_online(cpu))
-		return 0;
-
-	me = get_cpu();
-	BUG_ON(!cpu_online(me));
-
-	if (cpu == me) {
-		local_irq_disable();
-		func(info);
-		local_irq_enable();
-		put_cpu();
-		return 0;
-	}
-
-	ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, retry,
-				     wait);
-
-	put_cpu();
-	return 0;
 }
 
 static void stop_this_cpu(void *dummy)
diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c
index 3e86318..a516286 100644
--- a/arch/mips/kernel/smtc.c
+++ b/arch/mips/kernel/smtc.c
@@ -877,7 +877,6 @@ static void ipi_resched_interrupt(void)
 	/* Return from interrupt should be enough to cause scheduler check */
 }
 
-
 static void ipi_call_interrupt(void)
 {
 	/* Invoke generic function invocation code in smp.c */
diff --git a/include/asm-mips/smp.h b/include/asm-mips/smp.h
index 84fef1a..5222587 100644
--- a/include/asm-mips/smp.h
+++ b/include/asm-mips/smp.h
@@ -35,16 +35,6 @@ extern int __cpu_logical_map[NR_CPUS];
 
 #define NO_PROC_ID	(-1)
 
-struct call_data_struct {
-	void		(*func)(void *);
-	void		*info;
-	atomic_t	started;
-	atomic_t	finished;
-	int		wait;
-};
-
-extern struct call_data_struct *call_data;
-
 #define SMP_RESCHEDULE_YOURSELF	0x1	/* XXX braindead */
 #define SMP_CALL_FUNCTION	0x2
 
-- 
1.5.6.rc0.40.gd683

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [PATCH 9/10] parisc: convert to generic helpers for IPI function calls
  2008-05-29  8:58 [PATCH 0/10] Add generic helpers for arch IPI function calls #4 Jens Axboe
                   ` (7 preceding siblings ...)
  2008-05-29  8:58 ` [PATCH 8/10] mips: " Jens Axboe
@ 2008-05-29  8:58 ` Jens Axboe
  2008-05-31  7:00   ` Kyle McMartin
  2008-05-29  8:58 ` [PATCH 10/10] sh: " Jens Axboe
  2008-06-01  8:57 ` [PATCH 0/10] Add generic helpers for arch IPI function calls #4 Andrew Morton
  10 siblings, 1 reply; 54+ messages in thread
From: Jens Axboe @ 2008-05-29  8:58 UTC (permalink / raw)
  To: linux-kernel
  Cc: peterz, npiggin, linux-arch, jeremy, mingo, paulmck, Jens Axboe,
	Kyle McMartin, Matthew Wilcox, Grant Grundler

This converts parisc to use the new helpers for smp_call_function() and
friends, and adds support for smp_call_function_single(). Not tested,
not even compiled.

Cc: Kyle McMartin <kyle@parisc-linux.org>
Cc: Matthew Wilcox <matthew@wil.cx>
Cc: Grant Grundler <grundler@parisc-linux.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 arch/parisc/Kconfig      |    1 +
 arch/parisc/kernel/smp.c |  134 +++++++--------------------------------------
 2 files changed, 22 insertions(+), 113 deletions(-)

diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index bc7a19d..a7d4fd3 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -199,6 +199,7 @@ endchoice
 
 config SMP
 	bool "Symmetric multi-processing support"
+	select USE_GENERIC_SMP_HELPERS
 	---help---
 	  This enables support for systems with more than one CPU. If you have
 	  a system with only one CPU, like most personal computers, say N. If
diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c
index 85fc775..126105c 100644
--- a/arch/parisc/kernel/smp.c
+++ b/arch/parisc/kernel/smp.c
@@ -84,19 +84,11 @@ EXPORT_SYMBOL(cpu_possible_map);
 
 DEFINE_PER_CPU(spinlock_t, ipi_lock) = SPIN_LOCK_UNLOCKED;
 
-struct smp_call_struct {
-	void (*func) (void *info);
-	void *info;
-	long wait;
-	atomic_t unstarted_count;
-	atomic_t unfinished_count;
-};
-static volatile struct smp_call_struct *smp_call_function_data;
-
 enum ipi_message_type {
 	IPI_NOP=0,
 	IPI_RESCHEDULE=1,
 	IPI_CALL_FUNC,
+	IPI_CALL_FUNC_SINGLE,
 	IPI_CPU_START,
 	IPI_CPU_STOP,
 	IPI_CPU_TEST
@@ -187,33 +179,12 @@ ipi_interrupt(int irq, void *dev_id)
 
 			case IPI_CALL_FUNC:
 				smp_debug(100, KERN_DEBUG "CPU%d IPI_CALL_FUNC\n", this_cpu);
-				{
-					volatile struct smp_call_struct *data;
-					void (*func)(void *info);
-					void *info;
-					int wait;
-
-					data = smp_call_function_data;
-					func = data->func;
-					info = data->info;
-					wait = data->wait;
-
-					mb();
-					atomic_dec ((atomic_t *)&data->unstarted_count);
-
-					/* At this point, *data can't
-					 * be relied upon.
-					 */
-
-					(*func)(info);
-
-					/* Notify the sending CPU that the
-					 * task is done.
-					 */
-					mb();
-					if (wait)
-						atomic_dec ((atomic_t *)&data->unfinished_count);
-				}
+				generic_smp_call_function_interrupt();
+				break;
+
+			case IPI_CALL_FUNC_SINGLE:
+				smp_debug(100, KERN_DEBUG "CPU%d IPI_CALL_FUNC_SINGLE\n", this_cpu);
+				generic_smp_call_function_single_interrupt();
 				break;
 
 			case IPI_CPU_START:
@@ -256,6 +227,14 @@ ipi_send(int cpu, enum ipi_message_type op)
 	spin_unlock_irqrestore(lock, flags);
 }
 
+static void
+send_IPI_mask(cpumask_t mask, enum ipi_message_type op)
+{
+	int cpu;
+
+	for_each_cpu_mask(cpu, mask)
+		ipi_send(cpu, op);
+}
 
 static inline void
 send_IPI_single(int dest_cpu, enum ipi_message_type op)
@@ -295,86 +274,15 @@ smp_send_all_nop(void)
 	send_IPI_allbutself(IPI_NOP);
 }
 
-
-/**
- * Run a function on all other CPUs.
- *  <func>	The function to run. This must be fast and non-blocking.
- *  <info>	An arbitrary pointer to pass to the function.
- *  <retry>	If true, keep retrying until ready.
- *  <wait>	If true, wait until function has completed on other CPUs.
- *  [RETURNS]   0 on success, else a negative status code.
- *
- * Does not return until remote CPUs are nearly ready to execute <func>
- * or have executed.
- */
-
-int
-smp_call_function (void (*func) (void *info), void *info, int retry, int wait)
+void arch_send_call_function_ipi(cpumask_t mask)
 {
-	struct smp_call_struct data;
-	unsigned long timeout;
-	static DEFINE_SPINLOCK(lock);
-	int retries = 0;
-
-	if (num_online_cpus() < 2)
-		return 0;
-
-	/* Can deadlock when called with interrupts disabled */
-	WARN_ON(irqs_disabled());
-
-	/* can also deadlock if IPIs are disabled */
-	WARN_ON((get_eiem() & (1UL<<(CPU_IRQ_MAX - IPI_IRQ))) == 0);
-
-	
-	data.func = func;
-	data.info = info;
-	data.wait = wait;
-	atomic_set(&data.unstarted_count, num_online_cpus() - 1);
-	atomic_set(&data.unfinished_count, num_online_cpus() - 1);
-
-	if (retry) {
-		spin_lock (&lock);
-		while (smp_call_function_data != 0)
-			barrier();
-	}
-	else {
-		spin_lock (&lock);
-		if (smp_call_function_data) {
-			spin_unlock (&lock);
-			return -EBUSY;
-		}
-	}
-
-	smp_call_function_data = &data;
-	spin_unlock (&lock);
-	
-	/*  Send a message to all other CPUs and wait for them to respond  */
-	send_IPI_allbutself(IPI_CALL_FUNC);
-
- retry:
-	/*  Wait for response  */
-	timeout = jiffies + HZ;
-	while ( (atomic_read (&data.unstarted_count) > 0) &&
-		time_before (jiffies, timeout) )
-		barrier ();
-
-	if (atomic_read (&data.unstarted_count) > 0) {
-		printk(KERN_CRIT "SMP CALL FUNCTION TIMED OUT! (cpu=%d), try %d\n",
-		      smp_processor_id(), ++retries);
-		goto retry;
-	}
-	/* We either got one or timed out. Release the lock */
-
-	mb();
-	smp_call_function_data = NULL;
-
-	while (wait && atomic_read (&data.unfinished_count) > 0)
-			barrier ();
-
-	return 0;
+	send_IPI_mask(mask, IPI_CALL_FUNC);
 }
 
-EXPORT_SYMBOL(smp_call_function);
+void arch_send_call_function_single_ipi(int cpu)
+{
+	send_IPI_single(cpu, IPI_CALL_FUNC_SINGLE);
+}
 
 /*
  * Flush all other CPU's tlb and then mine.  Do this with on_each_cpu()
-- 
1.5.6.rc0.40.gd683

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [PATCH 10/10] sh: convert to generic helpers for IPI function calls
  2008-05-29  8:58 [PATCH 0/10] Add generic helpers for arch IPI function calls #4 Jens Axboe
                   ` (8 preceding siblings ...)
  2008-05-29  8:58 ` [PATCH 9/10] parisc: " Jens Axboe
@ 2008-05-29  8:58 ` Jens Axboe
  2008-06-01  8:57 ` [PATCH 0/10] Add generic helpers for arch IPI function calls #4 Andrew Morton
  10 siblings, 0 replies; 54+ messages in thread
From: Jens Axboe @ 2008-05-29  8:58 UTC (permalink / raw)
  To: linux-kernel
  Cc: peterz, npiggin, linux-arch, jeremy, mingo, paulmck, Jens Axboe,
	Paul Mundt

This converts sh to use the new helpers for smp_call_function() and
friends, and adds support for smp_call_function_single(). Not tested,
but it compiles.

Cc: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 arch/sh/Kconfig      |    1 +
 arch/sh/kernel/smp.c |   48 ++++++++----------------------------------------
 include/asm-sh/smp.h |   12 ++----------
 3 files changed, 11 insertions(+), 50 deletions(-)

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 9a854c8..3e7384f 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -688,6 +688,7 @@ config CRASH_DUMP
 config SMP
 	bool "Symmetric multi-processing support"
 	depends on SYS_SUPPORTS_SMP
+	select USE_GENERIC_SMP_HELPERS
 	---help---
 	  This enables support for systems with more than one CPU. If you have
 	  a system with only one CPU, like most personal computers, say N. If
diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c
index 5d039d1..2ed8dce 100644
--- a/arch/sh/kernel/smp.c
+++ b/arch/sh/kernel/smp.c
@@ -36,13 +36,6 @@ EXPORT_SYMBOL(cpu_possible_map);
 cpumask_t cpu_online_map;
 EXPORT_SYMBOL(cpu_online_map);
 
-static atomic_t cpus_booted = ATOMIC_INIT(0);
-
-/*
- * Run specified function on a particular processor.
- */
-void __smp_call_function(unsigned int cpu);
-
 static inline void __init smp_store_cpu_info(unsigned int cpu)
 {
 	struct sh_cpuinfo *c = cpu_data + cpu;
@@ -178,42 +171,17 @@ void smp_send_stop(void)
 	smp_call_function(stop_this_cpu, 0, 1, 0);
 }
 
-struct smp_fn_call_struct smp_fn_call = {
-	.lock		= __SPIN_LOCK_UNLOCKED(smp_fn_call.lock),
-	.finished	= ATOMIC_INIT(0),
-};
-
-/*
- * The caller of this wants the passed function to run on every cpu.  If wait
- * is set, wait until all cpus have finished the function before returning.
- * The lock is here to protect the call structure.
- * You must not call this function with disabled interrupts or from a
- * hardware interrupt handler or from a bottom half handler.
- */
-int smp_call_function(void (*func)(void *info), void *info, int retry, int wait)
+void arch_send_call_function_ipi(cpumask_t mask)
 {
-	unsigned int nr_cpus = atomic_read(&cpus_booted);
-	int i;
-
-	/* Can deadlock when called with interrupts disabled */
-	WARN_ON(irqs_disabled());
-
-	spin_lock(&smp_fn_call.lock);
-
-	atomic_set(&smp_fn_call.finished, 0);
-	smp_fn_call.fn = func;
-	smp_fn_call.data = info;
-
-	for (i = 0; i < nr_cpus; i++)
-		if (i != smp_processor_id())
-			plat_send_ipi(i, SMP_MSG_FUNCTION);
-
-	if (wait)
-		while (atomic_read(&smp_fn_call.finished) != (nr_cpus - 1));
+	int cpu;
 
-	spin_unlock(&smp_fn_call.lock);
+	for_each_cpu_mask(cpu, mask)
+		plat_send_ipi(cpu, SMP_MSG_FUNCTION);
+}
 
-	return 0;
+void arch_send_call_function_single_ipi(int cpu)
+{
+	plat_send_ipi(cpu, SMP_MSG_FUNCTION_SINGLE);
 }
 
 /* Not really SMP stuff ... */
diff --git a/include/asm-sh/smp.h b/include/asm-sh/smp.h
index 9c8d34b..7982516 100644
--- a/include/asm-sh/smp.h
+++ b/include/asm-sh/smp.h
@@ -26,18 +26,10 @@ extern int __cpu_logical_map[NR_CPUS];
 
 #define NO_PROC_ID	(-1)
 
-struct smp_fn_call_struct {
-	spinlock_t lock;
-	atomic_t   finished;
-	void (*fn)(void *);
-	void *data;
-};
-
-extern struct smp_fn_call_struct smp_fn_call;
-
 #define SMP_MSG_FUNCTION	0
 #define SMP_MSG_RESCHEDULE	1
-#define SMP_MSG_NR		2
+#define SMP_MSG_FUNCTION_SINGLE	2
+#define SMP_MSG_NR		3
 
 void plat_smp_setup(void);
 void plat_prepare_cpus(unsigned int max_cpus);
-- 
1.5.6.rc0.40.gd683

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* Re: [PATCH 2/10] x86: convert to generic helpers for IPI function calls
  2008-05-29  8:58 ` [PATCH 2/10] x86: convert to generic helpers for " Jens Axboe
@ 2008-05-29 12:12   ` Jeremy Fitzhardinge
  2008-05-29 12:17     ` Jens Axboe
  0 siblings, 1 reply; 54+ messages in thread
From: Jeremy Fitzhardinge @ 2008-05-29 12:12 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-kernel, peterz, npiggin, linux-arch, mingo, paulmck

Jens Axboe wrote:
> @@ -122,6 +109,17 @@ static int xen_smp_intr_init(unsigned int cpu)
>  		goto fail;
>  	per_cpu(debug_irq, cpu) = rc;
>  
> +	callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", cpu);
> +	rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_SINGLE_VECTOR,
> +				    cpu,
> +				    xen_call_function_single_interrupt,
> +				    IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
> +				    callfunc_name,
> +				    NULL);
> +	if (rc < 0)
> +		goto fail;
> +	per_cpu(callfuncsingle_irq, cpu) = rc;
>   

Tiny nit: could you shift this up below the other callfunc registration, 
so they appear next to each other in /proc?

> +void xen_smp_send_call_function_ipi(cpumask_t mask)
> +{
> +	int cpu;
> +
> +	xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
> +
> +	/* Make sure other vcpus get a chance to run if they need to. */
> +	for_each_cpu_mask(cpu, mask) {
> +		if (xen_vcpu_stolen(cpu)) {
> +			HYPERVISOR_sched_op(SCHEDOP_yield, 0);
> +			break;
> +		}
> +	}
>   

I think you should just drop this loop for now; I want to do some 
measurements before putting it in.

Thanks,
    J

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 2/10] x86: convert to generic helpers for IPI function  calls
  2008-05-29 12:12   ` Jeremy Fitzhardinge
@ 2008-05-29 12:17     ` Jens Axboe
  2008-05-29 13:47       ` Jeremy Fitzhardinge
  0 siblings, 1 reply; 54+ messages in thread
From: Jens Axboe @ 2008-05-29 12:17 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: linux-kernel, peterz, npiggin, linux-arch, mingo, paulmck

On Thu, May 29 2008, Jeremy Fitzhardinge wrote:
> Jens Axboe wrote:
> >@@ -122,6 +109,17 @@ static int xen_smp_intr_init(unsigned int cpu)
> > 		goto fail;
> > 	per_cpu(debug_irq, cpu) = rc;
> > 
> >+	callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", 
> >cpu);
> >+	rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_SINGLE_VECTOR,
> >+				    cpu,
> >+				    
> >xen_call_function_single_interrupt,
> >+				    
> >IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
> >+				    callfunc_name,
> >+				    NULL);
> >+	if (rc < 0)
> >+		goto fail;
> >+	per_cpu(callfuncsingle_irq, cpu) = rc;
> >  
> 
> Tiny nit: could you shift this up below the other callfunc 
> registration, so they appear next to each other in /proc?

Certainly, will do.

> >+void xen_smp_send_call_function_ipi(cpumask_t mask)
> >+{
> >+	int cpu;
> >+
> >+	xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
> >+
> >+	/* Make sure other vcpus get a chance to run if they need to. 
> >*/
> >+	for_each_cpu_mask(cpu, mask) {
> >+		if (xen_vcpu_stolen(cpu)) {
> >+			HYPERVISOR_sched_op(SCHEDOP_yield, 0);
> >+			break;
> >+		}
> >+	}
> >  
> 
> I think you should just drop this loop for now; I want to do some 
> measurements before putting it in.

If you don't mind, I'd like to keep it as-is. Then the patch should
not have a functional change (there, at least), which I think is
important. I can add a later patch removing this stolen bit, it would
be even better if you send me such a patch :-)

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 2/10] x86: convert to generic helpers for IPI function calls
  2008-05-29 12:17     ` Jens Axboe
@ 2008-05-29 13:47       ` Jeremy Fitzhardinge
  0 siblings, 0 replies; 54+ messages in thread
From: Jeremy Fitzhardinge @ 2008-05-29 13:47 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-kernel, peterz, npiggin, linux-arch, mingo, paulmck

Jens Axboe wrote:
> If you don't mind, I'd like to keep it as-is. Then the patch should
> not have a functional change (there, at least), which I think is
> important. I can add a later patch removing this stolen bit, it would
> be even better if you send me such a patch :-)
>   

Sure. BTW, I got it to mush on top of x86.git, and it seems OK.  There 
were lots of fairly trivial conflicts with things moving between unified 
files, and a few missing smp_call_function and on_each_cpu calls.

    J

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 8/10] mips: convert to generic helpers for IPI function calls
  2008-05-29  8:58 ` [PATCH 8/10] mips: " Jens Axboe
@ 2008-05-29 14:20   ` Ralf Baechle
  2008-05-30  7:23     ` Jens Axboe
  0 siblings, 1 reply; 54+ messages in thread
From: Ralf Baechle @ 2008-05-29 14:20 UTC (permalink / raw)
  To: Jens Axboe
  Cc: linux-kernel, peterz, npiggin, linux-arch, jeremy, mingo, paulmck

On Thu, May 29, 2008 at 10:58:22AM +0200, Jens Axboe wrote:

> This converts mips to use the new helpers for smp_call_function() and
> friends, and adds support for smp_call_function_single(). Not tested,
> but it compiles.
> 
> mips shares the same IPI for smp_call_function() and
> smp_call_function_single(), since not all hardware has enough available
> IPIs available to support seperate setups.

FYI, the comment isn't quite accurate.  The machanisms for IPIs on MIPS
are platform-specific.  The change which I recently objected to was
specific to a code model called VSMP.

  Raf

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 8/10] mips: convert to generic helpers for IPI function  calls
  2008-05-29 14:20   ` Ralf Baechle
@ 2008-05-30  7:23     ` Jens Axboe
  0 siblings, 0 replies; 54+ messages in thread
From: Jens Axboe @ 2008-05-30  7:23 UTC (permalink / raw)
  To: Ralf Baechle
  Cc: linux-kernel, peterz, npiggin, linux-arch, jeremy, mingo, paulmck

On Thu, May 29 2008, Ralf Baechle wrote:
> On Thu, May 29, 2008 at 10:58:22AM +0200, Jens Axboe wrote:
> 
> > This converts mips to use the new helpers for smp_call_function() and
> > friends, and adds support for smp_call_function_single(). Not tested,
> > but it compiles.
> > 
> > mips shares the same IPI for smp_call_function() and
> > smp_call_function_single(), since not all hardware has enough available
> > IPIs available to support seperate setups.
> 
> FYI, the comment isn't quite accurate.  The machanisms for IPIs on MIPS
> are platform-specific.  The change which I recently objected to was
> specific to a code model called VSMP.

So s/hardware/platform and you are happy?

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/10] Add generic helpers for arch IPI function calls
  2008-05-29  8:58 ` [PATCH 1/10] Add generic helpers for arch IPI function calls Jens Axboe
@ 2008-05-30 11:24   ` Paul E. McKenney
  2008-06-06  8:44     ` Jens Axboe
  2008-06-10 14:51   ` Catalin Marinas
  2008-07-06 17:21   ` Jeremy Fitzhardinge
  2 siblings, 1 reply; 54+ messages in thread
From: Paul E. McKenney @ 2008-05-30 11:24 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-kernel, peterz, npiggin, linux-arch, jeremy, mingo

On Thu, May 29, 2008 at 10:58:15AM +0200, Jens Axboe wrote:
> This adds kernel/smp.c which contains helpers for IPI function calls. In
> addition to supporting the existing smp_call_function() in a more efficient
> manner, it also adds a more scalable variant called smp_call_function_single()
> for calling a given function on a single CPU only.
> 
> The core of this is based on the x86-64 patch from Nick Piggin, lots of
> changes since then. "Alan D. Brunelle" <Alan.Brunelle@hp.com> has
> contributed lots of fixes and suggestions as well. Also thanks to
> Paul E. McKenney <paulmck@linux.vnet.ibm.com> for reviewing RCU usage
> and getting rid of the data allocation fallback deadlock.

Looks much improved!!!  A few suggestions for header comments, and
there appears to be a leftover memory barrier that should now be
removed.  With these changes:

Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

> Acked-by: Ingo Molnar <mingo@elte.hu>
> Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
> ---
>  arch/Kconfig              |    3 +
>  arch/sparc64/kernel/smp.c |   11 +-
>  include/linux/smp.h       |   34 ++++-
>  init/main.c               |    2 +
>  kernel/Makefile           |    1 +
>  kernel/smp.c              |  362 +++++++++++++++++++++++++++++++++++++++++++++
>  6 files changed, 406 insertions(+), 7 deletions(-)
>  create mode 100644 kernel/smp.c
> 
> diff --git a/arch/Kconfig b/arch/Kconfig
> index 3ea332b..ad89a33 100644
> --- a/arch/Kconfig
> +++ b/arch/Kconfig
> @@ -39,3 +39,6 @@ config HAVE_KRETPROBES
> 
>  config HAVE_DMA_ATTRS
>  	def_bool n
> +
> +config USE_GENERIC_SMP_HELPERS
> +	def_bool n
> diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
> index fa63c68..b82d017 100644
> --- a/arch/sparc64/kernel/smp.c
> +++ b/arch/sparc64/kernel/smp.c
> @@ -816,8 +816,9 @@ extern unsigned long xcall_call_function;
>   * You must not call this function with disabled interrupts or from a
>   * hardware interrupt handler or from a bottom half handler.
>   */
> -static int smp_call_function_mask(void (*func)(void *info), void *info,
> -				  int nonatomic, int wait, cpumask_t mask)
> +static int sparc64_smp_call_function_mask(void (*func)(void *info), void *info,
> +					  int nonatomic, int wait,
> +					  cpumask_t mask)
>  {
>  	struct call_data_struct data;
>  	int cpus;
> @@ -855,8 +856,8 @@ out_unlock:
>  int smp_call_function(void (*func)(void *info), void *info,
>  		      int nonatomic, int wait)
>  {
> -	return smp_call_function_mask(func, info, nonatomic, wait,
> -				      cpu_online_map);
> +	return sparc64_smp_call_function_mask(func, info, nonatomic, wait,
> +						cpu_online_map);
>  }
> 
>  void smp_call_function_client(int irq, struct pt_regs *regs)
> @@ -893,7 +894,7 @@ static void tsb_sync(void *info)
> 
>  void smp_tsb_sync(struct mm_struct *mm)
>  {
> -	smp_call_function_mask(tsb_sync, mm, 0, 1, mm->cpu_vm_mask);
> +	sparc64_smp_call_function_mask(tsb_sync, mm, 0, 1, mm->cpu_vm_mask);
>  }
> 
>  extern unsigned long xcall_flush_tlb_mm;
> diff --git a/include/linux/smp.h b/include/linux/smp.h
> index 55232cc..2691bad 100644
> --- a/include/linux/smp.h
> +++ b/include/linux/smp.h
> @@ -7,9 +7,19 @@
>   */
> 
>  #include <linux/errno.h>
> +#include <linux/list.h>
> +#include <linux/spinlock.h>
> +#include <linux/cpumask.h>
> 
>  extern void cpu_idle(void);
> 
> +struct call_single_data {
> +	struct list_head list;
> +	void (*func) (void *info);
> +	void *info;
> +	unsigned int flags;
> +};
> +
>  #ifdef CONFIG_SMP
> 
>  #include <linux/preempt.h>
> @@ -53,9 +63,27 @@ extern void smp_cpus_done(unsigned int max_cpus);
>   * Call a function on all other processors
>   */
>  int smp_call_function(void(*func)(void *info), void *info, int retry, int wait);
> -
> +int smp_call_function_mask(cpumask_t mask, void(*func)(void *info), void *info,
> +				int wait);
>  int smp_call_function_single(int cpuid, void (*func) (void *info), void *info,
>  				int retry, int wait);
> +void __smp_call_function_single(int cpuid, struct call_single_data *data);
> +
> +/*
> + * Generic and arch helpers
> + */
> +#ifdef CONFIG_USE_GENERIC_SMP_HELPERS
> +void generic_smp_call_function_single_interrupt(void);
> +void generic_smp_call_function_interrupt(void);
> +void init_call_single_data(void);
> +void arch_send_call_function_single_ipi(int cpu);
> +void arch_send_call_function_ipi(cpumask_t mask);
> +extern spinlock_t call_function_lock;
> +#else
> +static inline void init_call_single_data(void)
> +{
> +}
> +#endif
> 
>  /*
>   * Call a function on all processors
> @@ -112,7 +140,9 @@ static inline void smp_send_reschedule(int cpu) { }
>  })
>  #define smp_call_function_mask(mask, func, info, wait) \
>  			(up_smp_call_function(func, info))
> -
> +static inline void init_call_single_data(void)
> +{
> +}
>  #endif /* !SMP */
> 
>  /*
> diff --git a/init/main.c b/init/main.c
> index f7fb200..1efcccf 100644
> --- a/init/main.c
> +++ b/init/main.c
> @@ -31,6 +31,7 @@
>  #include <linux/kernel_stat.h>
>  #include <linux/start_kernel.h>
>  #include <linux/security.h>
> +#include <linux/smp.h>
>  #include <linux/workqueue.h>
>  #include <linux/profile.h>
>  #include <linux/rcupdate.h>
> @@ -779,6 +780,7 @@ static void __init do_pre_smp_initcalls(void)
>  {
>  	extern int spawn_ksoftirqd(void);
> 
> +	init_call_single_data();
>  	migration_init();
>  	spawn_ksoftirqd();
>  	if (!nosoftlockup)
> diff --git a/kernel/Makefile b/kernel/Makefile
> index 1c9938a..9fa5797 100644
> --- a/kernel/Makefile
> +++ b/kernel/Makefile
> @@ -28,6 +28,7 @@ obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
>  obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
>  obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
>  obj-$(CONFIG_SMP) += cpu.o spinlock.o
> +obj-$(CONFIG_USE_GENERIC_SMP_HELPERS) += smp.o
>  obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
>  obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
>  obj-$(CONFIG_UID16) += uid16.o
> diff --git a/kernel/smp.c b/kernel/smp.c
> new file mode 100644
> index 0000000..ef6de3d
> --- /dev/null
> +++ b/kernel/smp.c
> @@ -0,0 +1,362 @@
> +/*
> + * Generic helpers for smp ipi calls
> + *
> + * (C) Jens Axboe <jens.axboe@oracle.com> 2008
> + *
> + */
> +#include <linux/init.h>
> +#include <linux/module.h>
> +#include <linux/percpu.h>
> +#include <linux/rcupdate.h>
> +#include <linux/smp.h>
> +
> +static DEFINE_PER_CPU(struct call_single_queue, call_single_queue);
> +static LIST_HEAD(call_function_queue);
> +__cacheline_aligned_in_smp DEFINE_SPINLOCK(call_function_lock);
> +
> +enum {
> +	CSD_FLAG_WAIT		= 0x01,
> +	CSD_FLAG_ALLOC		= 0x02,
> +};
> +
> +struct call_function_data {
> +	struct call_single_data csd;
> +	spinlock_t lock;
> +	unsigned int refs;
> +	cpumask_t cpumask;
> +	struct rcu_head rcu_head;
> +};
> +
> +struct call_single_queue {
> +	struct list_head list;
> +	spinlock_t lock;
> +};
> +
> +void __cpuinit init_call_single_data(void)
> +{
> +	int i;
> +
> +	for_each_possible_cpu(i) {
> +		struct call_single_queue *q = &per_cpu(call_single_queue, i);
> +
> +		spin_lock_init(&q->lock);
> +		INIT_LIST_HEAD(&q->list);
> +	}
> +}
> +
> +static void csd_flag_wait(struct call_single_data *data)
> +{
> +	/* Wait for response */
> +	do {
> +		/*
> +		 * We need to see the flags store in the IPI handler
> +		 */
> +		smp_mb();
> +		if (!(data->flags & CSD_FLAG_WAIT))
> +			break;
> +		cpu_relax();
> +	} while (1);
> +}
> +
> +/*
> + * Insert a previously allocated call_single_data element for execution
> + * on the given CPU. data must already have ->func, ->info, and ->flags set.
> + */
> +static void generic_exec_single(int cpu, struct call_single_data *data)
> +{
> +	struct call_single_queue *dst = &per_cpu(call_single_queue, cpu);
> +	int wait = data->flags & CSD_FLAG_WAIT, ipi;
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&dst->lock, flags);
> +	ipi = list_empty(&dst->list);
> +	list_add_tail(&data->list, &dst->list);
> +	spin_unlock_irqrestore(&dst->lock, flags);
> +
> +	if (ipi)
> +		arch_send_call_function_single_ipi(cpu);
> +
> +	if (wait)
> +		csd_flag_wait(data);
> +}
> +
> +static void rcu_free_call_data(struct rcu_head *head)
> +{
> +	struct call_function_data *data;
> +
> +	data = container_of(head, struct call_function_data, rcu_head);
> +
> +	kfree(data);
> +}
> +
> +/*
> + * Invoked by arch to handle an IPI for call function. Must be called with
> + * interrupts disabled.
> + */
> +void generic_smp_call_function_interrupt(void)
> +{
> +	struct call_function_data *data;
> +	int cpu = get_cpu();
> +
> +	/*
> +	 * It's ok to use list_for_each_rcu() here even though we may delete
> +	 * 'pos', since list_del_rcu() doesn't clear ->next
> +	 */
> +	rcu_read_lock();
> +	list_for_each_entry_rcu(data, &call_function_queue, csd.list) {
> +		int refs;
> +
> +		if (!cpu_isset(cpu, data->cpumask))
> +			continue;
> +
> +		data->csd.func(data->csd.info);
> +
> +		spin_lock(&data->lock);
> +		cpu_clear(cpu, data->cpumask);
> +		WARN_ON(data->refs == 0);
> +		data->refs--;
> +		refs = data->refs;
> +		spin_unlock(&data->lock);
> +
> +		if (refs)
> +			continue;
> +
> +		spin_lock(&call_function_lock);
> +		list_del_rcu(&data->csd.list);
> +		spin_unlock(&call_function_lock);
> +
> +		if (data->csd.flags & CSD_FLAG_WAIT) {
> +			/*
> +			 * serialize stores to data with the flag clear
> +			 * and wakeup
> +			 */
> +			smp_wmb();
> +			data->csd.flags &= ~CSD_FLAG_WAIT;
> +		} else
> +			call_rcu(&data->rcu_head, rcu_free_call_data);
> +	}
> +	rcu_read_unlock();
> +
> +	put_cpu();
> +}
> +
> +/*
> + * Invoked by arch to handle an IPI for call function single. Must be called
> + * from the arch with interrupts disabled.
> + */
> +void generic_smp_call_function_single_interrupt(void)
> +{
> +	struct call_single_queue *q = &__get_cpu_var(call_single_queue);
> +	LIST_HEAD(list);
> +
> +	/*
> +	 * Need to see other stores to list head for checking whether
> +	 * list is empty without holding q->lock
> +	 */
> +	smp_mb();
> +	while (!list_empty(&q->list)) {
> +		unsigned int data_flags;
> +
> +		spin_lock(&q->lock);
> +		list_replace_init(&q->list, &list);
> +		spin_unlock(&q->lock);
> +
> +		while (!list_empty(&list)) {
> +			struct call_single_data *data;
> +
> +			data = list_entry(list.next, struct call_single_data,
> +						list);
> +			list_del(&data->list);
> +
> +			/*
> +			 * 'data' can be invalid after this call if
> +			 * flags == 0 (when called through
> +			 * generic_exec_single(), so save them away before
> +			 * making the call.
> +			 */
> +			data_flags = data->flags;
> +
> +			data->func(data->info);
> +
> +			if (data_flags & CSD_FLAG_WAIT) {
> +				smp_wmb();
> +				data->flags &= ~CSD_FLAG_WAIT;
> +			} else if (data_flags & CSD_FLAG_ALLOC)
> +				kfree(data);
> +		}
> +		/*
> +		 * See comment on outer loop
> +		 */
> +		smp_mb();
> +	}
> +}
> +
> +/*
> + * smp_call_function_single - Run a function on a specific CPU
> + * @func: The function to run. This must be fast and non-blocking.
> + * @info: An arbitrary pointer to pass to the function.
> + * @retry: Unused
> + * @wait: If true, wait until function has completed on other CPUs.

Suggest adding comment to the effect that @wait will be implicitly set
upon memory-allocation failure.

> + *
> + * Returns 0 on success, else a negative status code.
> + */
> +int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
> +			     int retry, int wait)
> +{
> +	struct call_single_data d;
> +	unsigned long flags;
> +	/* prevent preemption and reschedule on another processor */
> +	int me = get_cpu();
> +
> +	/* Can deadlock when called with interrupts disabled */
> +	WARN_ON(irqs_disabled());
> +
> +	if (cpu == me) {
> +		local_irq_save(flags);
> +		func(info);
> +		local_irq_restore(flags);
> +	} else {
> +		struct call_single_data *data = NULL;
> +
> +		if (!wait) {
> +			data = kmalloc(sizeof(*data), GFP_ATOMIC);
> +			if (data)
> +				data->flags = CSD_FLAG_ALLOC;
> +		}
> +		if (!data) {
> +			data = &d;
> +			data->flags = CSD_FLAG_WAIT;
> +		}
> +
> +		data->func = func;
> +		data->info = info;
> +		generic_exec_single(cpu, data);
> +	}
> +
> +	put_cpu();
> +	return 0;
> +}
> +EXPORT_SYMBOL(smp_call_function_single);
> +
> +/**
> + * __smp_call_function_single(): Run a function on another CPU
> + * @cpu: The CPU to run on.
> + * @data: Pre-allocated and setup data structure
> + *
> + * Like smp_call_function_single(), but allow caller to pass in a pre-allocated
> + * data structure. Useful for embedding @data inside other structures, for
> + * instance.
> + *
> + */
> +void __smp_call_function_single(int cpu, struct call_single_data *data)
> +{
> +	/* Can deadlock when called with interrupts disabled */
> +	WARN_ON((data->flags & CSD_FLAG_WAIT) && irqs_disabled());
> +
> +	generic_exec_single(cpu, data);
> +}
> +
> +/**
> + * smp_call_function_mask(): Run a function on a set of other CPUs.
> + * @mask: The set of cpus to run on.
> + * @func: The function to run. This must be fast and non-blocking.
> + * @info: An arbitrary pointer to pass to the function.
> + * @wait: If true, wait (atomically) until function has completed on other CPUs.
> + *
> + * Returns 0 on success, else a negative status code.
> + *
> + * If @wait is true, then returns once @func has returned.
> + *
> + * You must not call this function with disabled interrupts or from a
> + * hardware interrupt handler or from a bottom half handler.

Suggest adding comment to the effect that @wait will be implicitly set
upon memory-allocation failure.

Also, isn't it necessary to call this function with preemption disabled?
If so, this should be commented as well.

> + */
> +int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info,
> +			   int wait)
> +{
> +	struct call_function_data d;
> +	struct call_function_data *data = NULL;
> +	cpumask_t allbutself;
> +	unsigned long flags;
> +	int cpu, num_cpus;
> +
> +	/* Can deadlock when called with interrupts disabled */
> +	WARN_ON(irqs_disabled());
> +
> +	cpu = smp_processor_id();
> +	allbutself = cpu_online_map;
> +	cpu_clear(cpu, allbutself);
> +	cpus_and(mask, mask, allbutself);
> +	num_cpus = cpus_weight(mask);
> +
> +	/*
> +	 * If zero CPUs, return. If just a single CPU, turn this request
> +	 * into a targetted single call instead since it's faster.
> +	 */
> +	if (!num_cpus)
> +		return 0;
> +	else if (num_cpus == 1) {
> +		cpu = first_cpu(mask);
> +		return smp_call_function_single(cpu, func, info, 0, wait);
> +	}
> +
> +	if (!wait) {
> +		data = kmalloc(sizeof(*data), GFP_ATOMIC);
> +		if (data)
> +			data->csd.flags = CSD_FLAG_ALLOC;
> +	}
> +	if (!data) {
> +		data = &d;
> +		data->csd.flags = CSD_FLAG_WAIT;
> +	}
> +
> +	spin_lock_init(&data->lock);
> +	data->csd.func = func;
> +	data->csd.info = info;
> +	data->refs = num_cpus;
> +
> +	/*
> +	 * need to see above stores before the cpumask is valid for the CPU
> +	 */
> +	smp_wmb();

Given that all call_function_data structs either get run through
call_rcu() or are waited for, I believe we no longer need the above
smp_wmb().  The only reason we needed it before was that there was the
possibility of a call_function_data struct being reused while a reader
was still referencing it.  If I understand the code, this can no longer
happen, so this memory barrier is not needed and should be removed.

> +	data->cpumask = mask;
> +
> +	spin_lock_irqsave(&call_function_lock, flags);
> +	list_add_tail_rcu(&data->csd.list, &call_function_queue);
> +	spin_unlock_irqrestore(&call_function_lock, flags);
> +
> +	/* Send a message to all CPUs in the map */
> +	arch_send_call_function_ipi(mask);
> +
> +	/* optionally wait for the CPUs to complete */
> +	if (wait)
> +		csd_flag_wait(&data->csd);
> +
> +	return 0;
> +}
> +EXPORT_SYMBOL(smp_call_function_mask);
> +
> +/**
> + * smp_call_function(): Run a function on all other CPUs.
> + * @func: The function to run. This must be fast and non-blocking.
> + * @info: An arbitrary pointer to pass to the function.
> + * @natomic: Unused
> + * @wait: If true, wait (atomically) until function has completed on other CPUs.
> + *
> + * Returns 0 on success, else a negative status code.
> + *
> + * If @wait is true, then returns once @func has returned; otherwise
> + * it returns just before the target cpu calls @func.

Suggest adding comment to the effect that @wait will be implicitly set
upon memory-allocation failure.

> + *
> + * You must not call this function with disabled interrupts or from a
> + * hardware interrupt handler or from a bottom half handler.
> + */
> +int smp_call_function(void (*func)(void *), void *info, int natomic, int wait)
> +{
> +	int ret;
> +
> +	preempt_disable();
> +	ret = smp_call_function_mask(cpu_online_map, func, info, wait);
> +	preempt_enable();
> +	return ret;
> +}
> +EXPORT_SYMBOL(smp_call_function);
> -- 
> 1.5.6.rc0.40.gd683
> 

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 9/10] parisc: convert to generic helpers for IPI function calls
  2008-05-29  8:58 ` [PATCH 9/10] parisc: " Jens Axboe
@ 2008-05-31  7:00   ` Kyle McMartin
  2008-05-31  7:00     ` Kyle McMartin
  2008-06-02  8:17     ` Jens Axboe
  0 siblings, 2 replies; 54+ messages in thread
From: Kyle McMartin @ 2008-05-31  7:00 UTC (permalink / raw)
  To: Jens Axboe
  Cc: linux-kernel, peterz, npiggin, linux-arch, jeremy, mingo, paulmck,
	Kyle McMartin, Matthew Wilcox, Grant Grundler

On Thu, May 29, 2008 at 10:58:23AM +0200, Jens Axboe wrote:
> This converts parisc to use the new helpers for smp_call_function() and
> friends, and adds support for smp_call_function_single(). Not tested,
> not even compiled.
> 
> Cc: Kyle McMartin <kyle@parisc-linux.org>
> Cc: Matthew Wilcox <matthew@wil.cx>
> Cc: Grant Grundler <grundler@parisc-linux.org>
> Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
>  

Do you have all this in an easily accessible git tree that I can merge
to test?

regards, Kyle

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 9/10] parisc: convert to generic helpers for IPI function calls
  2008-05-31  7:00   ` Kyle McMartin
@ 2008-05-31  7:00     ` Kyle McMartin
  2008-06-02  8:17     ` Jens Axboe
  1 sibling, 0 replies; 54+ messages in thread
From: Kyle McMartin @ 2008-05-31  7:00 UTC (permalink / raw)
  To: Jens Axboe
  Cc: linux-kernel, peterz, npiggin, linux-arch, jeremy, mingo, paulmck,
	Kyle McMartin, Matthew Wilcox, Grant Grundler

On Thu, May 29, 2008 at 10:58:23AM +0200, Jens Axboe wrote:
> This converts parisc to use the new helpers for smp_call_function() and
> friends, and adds support for smp_call_function_single(). Not tested,
> not even compiled.
> 
> Cc: Kyle McMartin <kyle@parisc-linux.org>
> Cc: Matthew Wilcox <matthew@wil.cx>
> Cc: Grant Grundler <grundler@parisc-linux.org>
> Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
>  

Do you have all this in an easily accessible git tree that I can merge
to test?

regards, Kyle

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 0/10] Add generic helpers for arch IPI function calls #4
  2008-05-29  8:58 [PATCH 0/10] Add generic helpers for arch IPI function calls #4 Jens Axboe
                   ` (9 preceding siblings ...)
  2008-05-29  8:58 ` [PATCH 10/10] sh: " Jens Axboe
@ 2008-06-01  8:57 ` Andrew Morton
  2008-06-01  8:57   ` Andrew Morton
  2008-06-01  9:52   ` Jeremy Fitzhardinge
  10 siblings, 2 replies; 54+ messages in thread
From: Andrew Morton @ 2008-06-01  8:57 UTC (permalink / raw)
  To: Jens Axboe
  Cc: linux-kernel, peterz, npiggin, linux-arch, jeremy, mingo, paulmck

On Thu, 29 May 2008 10:58:14 +0200 Jens Axboe <jens.axboe@oracle.com> wrote:

> Jeremy sent me a ping about this patchset, so here's an updated
> version against the current kernel (2.6.26-rc4'ish).

If we're having 2.6.27 thoughts about this work then we should get it
into linux-next pretty soon, please.

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 0/10] Add generic helpers for arch IPI function calls #4
  2008-06-01  8:57 ` [PATCH 0/10] Add generic helpers for arch IPI function calls #4 Andrew Morton
@ 2008-06-01  8:57   ` Andrew Morton
  2008-06-01  9:52   ` Jeremy Fitzhardinge
  1 sibling, 0 replies; 54+ messages in thread
From: Andrew Morton @ 2008-06-01  8:57 UTC (permalink / raw)
  To: Jens Axboe
  Cc: linux-kernel, peterz, npiggin, linux-arch, jeremy, mingo, paulmck

On Thu, 29 May 2008 10:58:14 +0200 Jens Axboe <jens.axboe@oracle.com> wrote:

> Jeremy sent me a ping about this patchset, so here's an updated
> version against the current kernel (2.6.26-rc4'ish).

If we're having 2.6.27 thoughts about this work then we should get it
into linux-next pretty soon, please.

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 0/10] Add generic helpers for arch IPI function calls #4
  2008-06-01  8:57 ` [PATCH 0/10] Add generic helpers for arch IPI function calls #4 Andrew Morton
  2008-06-01  8:57   ` Andrew Morton
@ 2008-06-01  9:52   ` Jeremy Fitzhardinge
  1 sibling, 0 replies; 54+ messages in thread
From: Jeremy Fitzhardinge @ 2008-06-01  9:52 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Jens Axboe, linux-kernel, peterz, npiggin, linux-arch, mingo,
	paulmck, Thomas Gleixner, x86

Andrew Morton wrote:
> On Thu, 29 May 2008 10:58:14 +0200 Jens Axboe <jens.axboe@oracle.com> wrote:
>
>   
>> Jeremy sent me a ping about this patchset, so here's an updated
>> version against the current kernel (2.6.26-rc4'ish).
>>     
>
> If we're having 2.6.27 thoughts about this work then we should get it
> into linux-next pretty soon, please.
>   

The x86 parts clash fairly thoroughly with the tip.git x86 work.  
Nothing deep; just that various files it tries to patch are now 
unified.  From a purely selfish perspective, I'd like to see this series 
end up as a topic branch in tip.git.

    J

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 9/10] parisc: convert to generic helpers for IPI  function calls
  2008-05-31  7:00   ` Kyle McMartin
  2008-05-31  7:00     ` Kyle McMartin
@ 2008-06-02  8:17     ` Jens Axboe
  2008-06-02 16:09       ` Kyle McMartin
  1 sibling, 1 reply; 54+ messages in thread
From: Jens Axboe @ 2008-06-02  8:17 UTC (permalink / raw)
  To: Kyle McMartin
  Cc: linux-kernel, peterz, npiggin, linux-arch, jeremy, mingo, paulmck,
	Kyle McMartin, Matthew Wilcox, Grant Grundler

On Sat, May 31 2008, Kyle McMartin wrote:
> On Thu, May 29, 2008 at 10:58:23AM +0200, Jens Axboe wrote:
> > This converts parisc to use the new helpers for smp_call_function() and
> > friends, and adds support for smp_call_function_single(). Not tested,
> > not even compiled.
> > 
> > Cc: Kyle McMartin <kyle@parisc-linux.org>
> > Cc: Matthew Wilcox <matthew@wil.cx>
> > Cc: Grant Grundler <grundler@parisc-linux.org>
> > Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
> >  
> 
> Do you have all this in an easily accessible git tree that I can merge
> to test?

Yep, you can pull:

git://git.kernel.dk/linux-2.6-block.git generic-ipi

to get this patchset. I should have mentioned that in the intro
mail...

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 6/10] arm: convert to generic helpers for IPI function calls
  2008-05-29  8:58 ` [PATCH 6/10] arm: " Jens Axboe
@ 2008-06-02 12:29   ` Russell King
  2008-06-02 12:29     ` Russell King
  2008-06-06  8:47     ` Jens Axboe
  0 siblings, 2 replies; 54+ messages in thread
From: Russell King @ 2008-06-02 12:29 UTC (permalink / raw)
  To: Jens Axboe
  Cc: linux-kernel, peterz, npiggin, linux-arch, jeremy, mingo, paulmck

On Thu, May 29, 2008 at 10:58:20AM +0200, Jens Axboe wrote:
> This converts arm to use the new helpers for smp_call_function() and
> friends, and adds support for smp_call_function_single().

> +void arch_send_call_function_ipi(cpumask_t mask)
>  {
> -	return smp_call_function_on_cpu(func, info, retry, wait,
> -					cpu_online_map);
> +	send_ipi_message(mask, IPI_CALL_FUNC);
>  }

Can things like the above be inline functions to be included in the
generic code?

-- 
Russell King
 Linux kernel    2.6 ARM Linux   - http://www.arm.linux.org.uk/
 maintainer of:

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 6/10] arm: convert to generic helpers for IPI function calls
  2008-06-02 12:29   ` Russell King
@ 2008-06-02 12:29     ` Russell King
  2008-06-06  8:47     ` Jens Axboe
  1 sibling, 0 replies; 54+ messages in thread
From: Russell King @ 2008-06-02 12:29 UTC (permalink / raw)
  To: Jens Axboe
  Cc: linux-kernel, peterz, npiggin, linux-arch, jeremy, mingo, paulmck

On Thu, May 29, 2008 at 10:58:20AM +0200, Jens Axboe wrote:
> This converts arm to use the new helpers for smp_call_function() and
> friends, and adds support for smp_call_function_single().

> +void arch_send_call_function_ipi(cpumask_t mask)
>  {
> -	return smp_call_function_on_cpu(func, info, retry, wait,
> -					cpu_online_map);
> +	send_ipi_message(mask, IPI_CALL_FUNC);
>  }

Can things like the above be inline functions to be included in the
generic code?

-- 
Russell King
 Linux kernel    2.6 ARM Linux   - http://www.arm.linux.org.uk/
 maintainer of:

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 9/10] parisc: convert to generic helpers for IPI function calls
  2008-06-02  8:17     ` Jens Axboe
@ 2008-06-02 16:09       ` Kyle McMartin
  2008-06-06  8:47         ` Jens Axboe
  0 siblings, 1 reply; 54+ messages in thread
From: Kyle McMartin @ 2008-06-02 16:09 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Kyle McMartin, linux-kernel, peterz, npiggin, linux-arch, jeremy,
	mingo, paulmck, Kyle McMartin, Matthew Wilcox, Grant Grundler

On Mon, Jun 02, 2008 at 10:17:50AM +0200, Jens Axboe wrote:
> On Sat, May 31 2008, Kyle McMartin wrote:
> > On Thu, May 29, 2008 at 10:58:23AM +0200, Jens Axboe wrote:
> > > This converts parisc to use the new helpers for smp_call_function() and
> > > friends, and adds support for smp_call_function_single(). Not tested,
> > > not even compiled.
> > > 
> > > Cc: Kyle McMartin <kyle@parisc-linux.org>
> > > Cc: Matthew Wilcox <matthew@wil.cx>
> > > Cc: Grant Grundler <grundler@parisc-linux.org>
> > > Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
> > >  
> > 
> > Do you have all this in an easily accessible git tree that I can merge
> > to test?
> 
> Yep, you can pull:
> 
> git://git.kernel.dk/linux-2.6-block.git generic-ipi
> 
> to get this patchset. I should have mentioned that in the intro
> mail...
> 

Thanks, I'll try it out this evening.

r, Kyle

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/10] Add generic helpers for arch IPI function calls
  2008-05-30 11:24   ` Paul E. McKenney
@ 2008-06-06  8:44     ` Jens Axboe
  0 siblings, 0 replies; 54+ messages in thread
From: Jens Axboe @ 2008-06-06  8:44 UTC (permalink / raw)
  To: Paul E. McKenney; +Cc: linux-kernel, peterz, npiggin, linux-arch, jeremy, mingo

On Fri, May 30 2008, Paul E. McKenney wrote:
> On Thu, May 29, 2008 at 10:58:15AM +0200, Jens Axboe wrote:
> > This adds kernel/smp.c which contains helpers for IPI function calls. In
> > addition to supporting the existing smp_call_function() in a more efficient
> > manner, it also adds a more scalable variant called smp_call_function_single()
> > for calling a given function on a single CPU only.
> > 
> > The core of this is based on the x86-64 patch from Nick Piggin, lots of
> > changes since then. "Alan D. Brunelle" <Alan.Brunelle@hp.com> has
> > contributed lots of fixes and suggestions as well. Also thanks to
> > Paul E. McKenney <paulmck@linux.vnet.ibm.com> for reviewing RCU usage
> > and getting rid of the data allocation fallback deadlock.
> 
> Looks much improved!!!  A few suggestions for header comments, and
> there appears to be a leftover memory barrier that should now be
> removed.  With these changes:

Thanks Paul, I added the three extra comments to the kerneldoc for those
functions. And you are right about write ordering memory barrier, we
don't need that anymore.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 6/10] arm: convert to generic helpers for IPI function calls
  2008-06-02 12:29   ` Russell King
  2008-06-02 12:29     ` Russell King
@ 2008-06-06  8:47     ` Jens Axboe
  1 sibling, 0 replies; 54+ messages in thread
From: Jens Axboe @ 2008-06-06  8:47 UTC (permalink / raw)
  To: Russell King
  Cc: linux-kernel, peterz, npiggin, linux-arch, jeremy, mingo, paulmck

On Mon, Jun 02 2008, Russell King wrote:
> On Thu, May 29, 2008 at 10:58:20AM +0200, Jens Axboe wrote:
> > This converts arm to use the new helpers for smp_call_function() and
> > friends, and adds support for smp_call_function_single().
> 
> > +void arch_send_call_function_ipi(cpumask_t mask)
> >  {
> > -	return smp_call_function_on_cpu(func, info, retry, wait,
> > -					cpu_online_map);
> > +	send_ipi_message(mask, IPI_CALL_FUNC);
> >  }
> 
> Can things like the above be inline functions to be included in the
> generic code?

Sure, it could be put asm/smp.h for the supported archs.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 9/10] parisc: convert to generic helpers for IPI  function calls
  2008-06-02 16:09       ` Kyle McMartin
@ 2008-06-06  8:47         ` Jens Axboe
  2008-06-06 21:11           ` Kyle McMartin
  0 siblings, 1 reply; 54+ messages in thread
From: Jens Axboe @ 2008-06-06  8:47 UTC (permalink / raw)
  To: Kyle McMartin
  Cc: linux-kernel, peterz, npiggin, linux-arch, jeremy, mingo, paulmck,
	Kyle McMartin, Matthew Wilcox, Grant Grundler

On Mon, Jun 02 2008, Kyle McMartin wrote:
> On Mon, Jun 02, 2008 at 10:17:50AM +0200, Jens Axboe wrote:
> > On Sat, May 31 2008, Kyle McMartin wrote:
> > > On Thu, May 29, 2008 at 10:58:23AM +0200, Jens Axboe wrote:
> > > > This converts parisc to use the new helpers for smp_call_function() and
> > > > friends, and adds support for smp_call_function_single(). Not tested,
> > > > not even compiled.
> > > > 
> > > > Cc: Kyle McMartin <kyle@parisc-linux.org>
> > > > Cc: Matthew Wilcox <matthew@wil.cx>
> > > > Cc: Grant Grundler <grundler@parisc-linux.org>
> > > > Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
> > > >  
> > > 
> > > Do you have all this in an easily accessible git tree that I can merge
> > > to test?
> > 
> > Yep, you can pull:
> > 
> > git://git.kernel.dk/linux-2.6-block.git generic-ipi
> > 
> > to get this patchset. I should have mentioned that in the intro
> > mail...
> > 
> 
> Thanks, I'll try it out this evening.

How did that go?

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 9/10] parisc: convert to generic helpers for IPI function calls
  2008-06-06  8:47         ` Jens Axboe
@ 2008-06-06 21:11           ` Kyle McMartin
  2008-06-09  8:47             ` Jens Axboe
  0 siblings, 1 reply; 54+ messages in thread
From: Kyle McMartin @ 2008-06-06 21:11 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Kyle McMartin, linux-kernel, peterz, npiggin, linux-arch, jeremy,
	mingo, paulmck, Kyle McMartin, Matthew Wilcox, Grant Grundler

On Fri, Jun 06, 2008 at 10:47:43AM +0200, Jens Axboe wrote:
> > Thanks, I'll try it out this evening.
> 
> How did that go?
> 

Ah, sorry, I forgot to mention that. Seems to be working fine, at least,
I'm getting jobs scheduled on each cpu, afaict.

Needed this patch though, and from the looks of things, a bunch of other
architectures will need it as well.

Signed-off-by: Kyle McMartin <kyle@mcmartin.ca>

diff --git a/include/asm-parisc/smp.h b/include/asm-parisc/smp.h
index 306f495..ad24e7f 100644
--- a/include/asm-parisc/smp.h
+++ b/include/asm-parisc/smp.h
@@ -62,4 +62,7 @@ static inline void __cpu_die (unsigned int cpu) {
 }
 extern int __cpu_up (unsigned int cpu);
 
+void arch_send_call_function_ipi(cpumask_t mask);
+void arch_send_call_function_single_ipi(int cpu);
+
 #endif /*  __ASM_SMP_H */

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* Re: [PATCH 9/10] parisc: convert to generic helpers for IPI  function calls
  2008-06-06 21:11           ` Kyle McMartin
@ 2008-06-09  8:47             ` Jens Axboe
  0 siblings, 0 replies; 54+ messages in thread
From: Jens Axboe @ 2008-06-09  8:47 UTC (permalink / raw)
  To: Kyle McMartin
  Cc: linux-kernel, peterz, npiggin, linux-arch, jeremy, mingo, paulmck,
	Kyle McMartin, Matthew Wilcox, Grant Grundler

On Fri, Jun 06 2008, Kyle McMartin wrote:
> On Fri, Jun 06, 2008 at 10:47:43AM +0200, Jens Axboe wrote:
> > > Thanks, I'll try it out this evening.
> > 
> > How did that go?
> > 
> 
> Ah, sorry, I forgot to mention that. Seems to be working fine, at
> least, I'm getting jobs scheduled on each cpu, afaict.
> 
> Needed this patch though, and from the looks of things, a bunch of
> other architectures will need it as well.
> 
> Signed-off-by: Kyle McMartin <kyle@mcmartin.ca>

> 
> diff --git a/include/asm-parisc/smp.h b/include/asm-parisc/smp.h
> index 306f495..ad24e7f 100644
> --- a/include/asm-parisc/smp.h
> +++ b/include/asm-parisc/smp.h
> @@ -62,4 +62,7 @@ static inline void __cpu_die (unsigned int cpu) {
>  }
>  extern int __cpu_up (unsigned int cpu);
>  
> +void arch_send_call_function_ipi(cpumask_t mask);
> +void arch_send_call_function_single_ipi(int cpu);
> +
>  #endif /*  __ASM_SMP_H */

Yeah, the very latest (not posted) needs this, as archs can now put
inline versions in asm/smp.h if they wish. I'll update parisc and
the others, thanks!

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/10] Add generic helpers for arch IPI function calls
  2008-05-29  8:58 ` [PATCH 1/10] Add generic helpers for arch IPI function calls Jens Axboe
  2008-05-30 11:24   ` Paul E. McKenney
@ 2008-06-10 14:51   ` Catalin Marinas
  2008-06-10 15:44     ` James Bottomley
  2008-06-10 15:47     ` Paul E. McKenney
  2008-07-06 17:21   ` Jeremy Fitzhardinge
  2 siblings, 2 replies; 54+ messages in thread
From: Catalin Marinas @ 2008-06-10 14:51 UTC (permalink / raw)
  To: Jens Axboe
  Cc: linux-kernel, peterz, npiggin, linux-arch, jeremy, mingo, paulmck,
	Russell King

Hi,

On Thu, 2008-05-29 at 10:58 +0200, Jens Axboe wrote:
> This adds kernel/smp.c which contains helpers for IPI function calls. In
> addition to supporting the existing smp_call_function() in a more efficient
> manner, it also adds a more scalable variant called smp_call_function_single()
> for calling a given function on a single CPU only.
[...]
> + * You must not call this function with disabled interrupts or from a
> + * hardware interrupt handler or from a bottom half handler.
> + */
> +int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info,
> +			   int wait)
> +{
> +	struct call_function_data d;
> +	struct call_function_data *data = NULL;
> +	cpumask_t allbutself;
> +	unsigned long flags;
> +	int cpu, num_cpus;
> +
> +	/* Can deadlock when called with interrupts disabled */
> +	WARN_ON(irqs_disabled());

I was thinking whether this condition can be removed and allow the
smp_call_function*() to be called with IRQs disabled. At a quick look,
it seems to be possible if the csd_flag_wait() function calls the IPI
handlers directly when the IRQs are disabled (see the patch below).

This would be useful on ARM11MPCore based systems where the cache
maintenance operations are not detected by the snoop control unit and
this affects the DMA calls like dma_map_single(). There doesn't seem to
be any restriction on calls to dma_map_single() and hence we cannot
broadcast the cache operation to the other CPUs. I could implement this
in the ARM specific code using spin_try_lock (on an IPI-specific lock
held during the cross-call) and polling for an IPI if a lock cannot be
acquired (meaning that a different CPU is issuing an IPI) but I was
wondering whether this would be possible in a more generic way.

Please let me know what you think or whether deadlocks are still
possible (or any other solution apart from hardware fixes :-)). Thanks.

diff --git a/kernel/smp.c b/kernel/smp.c
index ef6de3d..2c63e81 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -54,6 +54,10 @@ static void csd_flag_wait(struct call_single_data *data)
 		smp_mb();
 		if (!(data->flags & CSD_FLAG_WAIT))
 			break;
+		if (irqs_disabled()) {
+			generic_smp_call_function_single_interrupt();
+			generic_smp_call_function_interrupt();
+		}
 		cpu_relax();
 	} while (1);
 }
@@ -208,9 +212,6 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
 	/* prevent preemption and reschedule on another processor */
 	int me = get_cpu();
 
-	/* Can deadlock when called with interrupts disabled */
-	WARN_ON(irqs_disabled());
-
 	if (cpu == me) {
 		local_irq_save(flags);
 		func(info);
@@ -250,9 +251,6 @@ EXPORT_SYMBOL(smp_call_function_single);
  */
 void __smp_call_function_single(int cpu, struct call_single_data *data)
 {
-	/* Can deadlock when called with interrupts disabled */
-	WARN_ON((data->flags & CSD_FLAG_WAIT) && irqs_disabled());
-
 	generic_exec_single(cpu, data);
 }
 
@@ -279,9 +277,6 @@ int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info,
 	unsigned long flags;
 	int cpu, num_cpus;
 
-	/* Can deadlock when called with interrupts disabled */
-	WARN_ON(irqs_disabled());
-
 	cpu = smp_processor_id();
 	allbutself = cpu_online_map;
 	cpu_clear(cpu, allbutself);


-- 
Catalin

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/10] Add generic helpers for arch IPI function calls
  2008-06-10 14:51   ` Catalin Marinas
@ 2008-06-10 15:44     ` James Bottomley
  2008-06-10 16:04       ` Catalin Marinas
  2008-06-10 15:47     ` Paul E. McKenney
  1 sibling, 1 reply; 54+ messages in thread
From: James Bottomley @ 2008-06-10 15:44 UTC (permalink / raw)
  To: Catalin Marinas
  Cc: Jens Axboe, linux-kernel, peterz, npiggin, linux-arch, jeremy,
	mingo, paulmck, Russell King

On Tue, 2008-06-10 at 15:51 +0100, Catalin Marinas wrote:
> Hi,
> 
> On Thu, 2008-05-29 at 10:58 +0200, Jens Axboe wrote:
> > This adds kernel/smp.c which contains helpers for IPI function calls. In
> > addition to supporting the existing smp_call_function() in a more efficient
> > manner, it also adds a more scalable variant called smp_call_function_single()
> > for calling a given function on a single CPU only.
> [...]
> > + * You must not call this function with disabled interrupts or from a
> > + * hardware interrupt handler or from a bottom half handler.
> > + */
> > +int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info,
> > +			   int wait)
> > +{
> > +	struct call_function_data d;
> > +	struct call_function_data *data = NULL;
> > +	cpumask_t allbutself;
> > +	unsigned long flags;
> > +	int cpu, num_cpus;
> > +
> > +	/* Can deadlock when called with interrupts disabled */
> > +	WARN_ON(irqs_disabled());
> 
> I was thinking whether this condition can be removed and allow the
> smp_call_function*() to be called with IRQs disabled. At a quick look,
> it seems to be possible if the csd_flag_wait() function calls the IPI
> handlers directly when the IRQs are disabled (see the patch below).
> 
> This would be useful on ARM11MPCore based systems where the cache
> maintenance operations are not detected by the snoop control unit and
> this affects the DMA calls like dma_map_single(). There doesn't seem to
> be any restriction on calls to dma_map_single() and hence we cannot
> broadcast the cache operation to the other CPUs. I could implement this
> in the ARM specific code using spin_try_lock (on an IPI-specific lock
> held during the cross-call) and polling for an IPI if a lock cannot be
> acquired (meaning that a different CPU is issuing an IPI) but I was
> wondering whether this would be possible in a more generic way.
> 
> Please let me know what you think or whether deadlocks are still
> possible (or any other solution apart from hardware fixes :-)). Thanks.

I don't see how your proposal fixes the deadlocks.  The problem is that
on a lot of arch's IPIs are normal interrupts.  If interrupts are
disabled, you don't see them.

The deadlock scenario is CPU1 enters smp_call_function() with IRQ's
disabled as CPU2 does the same thing and spins on the call_lock.  Now
CPU1 is waiting for an ack for its IPI to CPU2, but CPU2 will never see
the IPI until it enables interrupts.

One way to mitigate the effects of this is to enable interrupts if the
architecture code finds the call_lock (x86 implementation) held against
it, then re-disable before trying to get the lock again.  But really, in
order to make smp_call_function work in interrupt disabled sections, the
interrupt handler has to be modified to bar all non-IPI interrupts until
the critical section is over (otherwise there's no point allowing it
with disabled interrupts because an smp_call_function becomes a de facto
interrupt enable again).  If you really want to see how something like
this works, the voyager code has it (because interrupt disabling in the
VIC is expensive).  But it's quite a lot of code ...

James

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/10] Add generic helpers for arch IPI function calls
  2008-06-10 14:51   ` Catalin Marinas
  2008-06-10 15:44     ` James Bottomley
@ 2008-06-10 15:47     ` Paul E. McKenney
  2008-06-10 16:53       ` Catalin Marinas
  1 sibling, 1 reply; 54+ messages in thread
From: Paul E. McKenney @ 2008-06-10 15:47 UTC (permalink / raw)
  To: Catalin Marinas
  Cc: Jens Axboe, linux-kernel, peterz, npiggin, linux-arch, jeremy,
	mingo, Russell King

On Tue, Jun 10, 2008 at 03:51:25PM +0100, Catalin Marinas wrote:
> Hi,
> 
> On Thu, 2008-05-29 at 10:58 +0200, Jens Axboe wrote:
> > This adds kernel/smp.c which contains helpers for IPI function calls. In
> > addition to supporting the existing smp_call_function() in a more efficient
> > manner, it also adds a more scalable variant called smp_call_function_single()
> > for calling a given function on a single CPU only.
> [...]
> > + * You must not call this function with disabled interrupts or from a
> > + * hardware interrupt handler or from a bottom half handler.
> > + */
> > +int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info,
> > +			   int wait)
> > +{
> > +	struct call_function_data d;
> > +	struct call_function_data *data = NULL;
> > +	cpumask_t allbutself;
> > +	unsigned long flags;
> > +	int cpu, num_cpus;
> > +
> > +	/* Can deadlock when called with interrupts disabled */
> > +	WARN_ON(irqs_disabled());
> 
> I was thinking whether this condition can be removed and allow the
> smp_call_function*() to be called with IRQs disabled. At a quick look,
> it seems to be possible if the csd_flag_wait() function calls the IPI
> handlers directly when the IRQs are disabled (see the patch below).
> 
> This would be useful on ARM11MPCore based systems where the cache
> maintenance operations are not detected by the snoop control unit and
> this affects the DMA calls like dma_map_single(). There doesn't seem to
> be any restriction on calls to dma_map_single() and hence we cannot
> broadcast the cache operation to the other CPUs. I could implement this
> in the ARM specific code using spin_try_lock (on an IPI-specific lock
> held during the cross-call) and polling for an IPI if a lock cannot be
> acquired (meaning that a different CPU is issuing an IPI) but I was
> wondering whether this would be possible in a more generic way.
> 
> Please let me know what you think or whether deadlocks are still
> possible (or any other solution apart from hardware fixes :-)). Thanks.

There were objections last month:  http://lkml.org/lkml/2008/5/3/167

The issue was that this permits some interrupts to arrive despite
interrupts being disabled.  There seemed to be less resistance to
doing this in the wait==1 case, however.

						Thanx, Paul

> diff --git a/kernel/smp.c b/kernel/smp.c
> index ef6de3d..2c63e81 100644
> --- a/kernel/smp.c
> +++ b/kernel/smp.c
> @@ -54,6 +54,10 @@ static void csd_flag_wait(struct call_single_data *data)
>  		smp_mb();
>  		if (!(data->flags & CSD_FLAG_WAIT))
>  			break;
> +		if (irqs_disabled()) {
> +			generic_smp_call_function_single_interrupt();
> +			generic_smp_call_function_interrupt();
> +		}
>  		cpu_relax();
>  	} while (1);
>  }
> @@ -208,9 +212,6 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
>  	/* prevent preemption and reschedule on another processor */
>  	int me = get_cpu();
> 
> -	/* Can deadlock when called with interrupts disabled */
> -	WARN_ON(irqs_disabled());
> -
>  	if (cpu == me) {
>  		local_irq_save(flags);
>  		func(info);
> @@ -250,9 +251,6 @@ EXPORT_SYMBOL(smp_call_function_single);
>   */
>  void __smp_call_function_single(int cpu, struct call_single_data *data)
>  {
> -	/* Can deadlock when called with interrupts disabled */
> -	WARN_ON((data->flags & CSD_FLAG_WAIT) && irqs_disabled());
> -
>  	generic_exec_single(cpu, data);
>  }
> 
> @@ -279,9 +277,6 @@ int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info,
>  	unsigned long flags;
>  	int cpu, num_cpus;
> 
> -	/* Can deadlock when called with interrupts disabled */
> -	WARN_ON(irqs_disabled());
> -
>  	cpu = smp_processor_id();
>  	allbutself = cpu_online_map;
>  	cpu_clear(cpu, allbutself);
> 
> 
> -- 
> Catalin
> 

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/10] Add generic helpers for arch IPI function calls
  2008-06-10 15:44     ` James Bottomley
@ 2008-06-10 16:04       ` Catalin Marinas
  0 siblings, 0 replies; 54+ messages in thread
From: Catalin Marinas @ 2008-06-10 16:04 UTC (permalink / raw)
  To: James Bottomley
  Cc: Jens Axboe, linux-kernel, peterz, npiggin, linux-arch, jeremy,
	mingo, paulmck, Russell King

On Tue, 2008-06-10 at 10:44 -0500, James Bottomley wrote:
> On Tue, 2008-06-10 at 15:51 +0100, Catalin Marinas wrote:
> > I was thinking whether this condition can be removed and allow the
> > smp_call_function*() to be called with IRQs disabled. At a quick look,
> > it seems to be possible if the csd_flag_wait() function calls the IPI
> > handlers directly when the IRQs are disabled (see the patch below).
[...]
> > Please let me know what you think or whether deadlocks are still
> > possible (or any other solution apart from hardware fixes :-)). Thanks.
> 
> I don't see how your proposal fixes the deadlocks.  The problem is that
> on a lot of arch's IPIs are normal interrupts.  If interrupts are
> disabled, you don't see them.

ARM uses normal interrupts for IPIs as well.

> The deadlock scenario is CPU1 enters smp_call_function() with IRQ's
> disabled as CPU2 does the same thing and spins on the call_lock.  Now
> CPU1 is waiting for an ack for its IPI to CPU2, but CPU2 will never see
> the IPI until it enables interrupts.

I can see in the generic IPI patches that the call_function_lock is only
held for list_add_tail_rcu and not while waiting for the other CPU to
complete (both arch_send_call_function_ipi and csd_flag_wait are outside
the locking region).

The patch I posted polls for an incoming IPI in the csd_flag_wait()
function if the interrupts are disabled so that it clears the wait flag
even if it doesn't get the IPI. The disadvantage might be a spurious IPI
(but I can leave with this). If interrupts are enabled, there is no
drawback, apart from a call to irq_disabled().

> One way to mitigate the effects of this is to enable interrupts if the
> architecture code finds the call_lock (x86 implementation) held against
> it, then re-disable before trying to get the lock again.  But really, in
> order to make smp_call_function work in interrupt disabled sections, the
> interrupt handler has to be modified to bar all non-IPI interrupts until
> the critical section is over (otherwise there's no point allowing it
> with disabled interrupts because an smp_call_function becomes a de facto
> interrupt enable again).  If you really want to see how something like
> this works, the voyager code has it (because interrupt disabling in the
> VIC is expensive).  But it's quite a lot of code ...

I still think it can be less complicated that this. I'll look at Paul's
post to get some ideas. As I said, I need this functionality on current
ARM SMP systems, even if it means implementing it separately.

Thanks.

-- 
Catalin

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/10] Add generic helpers for arch IPI function calls
  2008-06-10 15:47     ` Paul E. McKenney
@ 2008-06-10 16:53       ` Catalin Marinas
  2008-06-11  3:25         ` Nick Piggin
  0 siblings, 1 reply; 54+ messages in thread
From: Catalin Marinas @ 2008-06-10 16:53 UTC (permalink / raw)
  To: paulmck
  Cc: Jens Axboe, linux-kernel, peterz, npiggin, linux-arch, jeremy,
	mingo, Russell King

On Tue, 2008-06-10 at 08:47 -0700, Paul E. McKenney wrote:
> On Tue, Jun 10, 2008 at 03:51:25PM +0100, Catalin Marinas wrote:
> > I was thinking whether this condition can be removed and allow the
> > smp_call_function*() to be called with IRQs disabled. At a quick look,
> > it seems to be possible if the csd_flag_wait() function calls the IPI
> > handlers directly when the IRQs are disabled (see the patch below).
[...]
> There were objections last month:  http://lkml.org/lkml/2008/5/3/167

Thanks, I missed this discussion.

> The issue was that this permits some interrupts to arrive despite
> interrupts being disabled.  There seemed to be less resistance to
> doing this in the wait==1 case, however.

The "(wait == 1) && irqs_disabled()" case is what I would be interested
in. In the patch you proposed, this doesn't seem to be allowed (at least
from the use of WARN_ON). However, from your post in May:

> 5.	If you call smp_call_function() with irqs disabled, then you
> 	are guaranteed that no other CPU's smp_call_function() handler
> 	will be invoked while smp_call_function() is executing.

this would be possible but no one need this functionality yet.

Would one use-case (ARM SMP and DMA cache maintenance) be enough to
implement this or I should add it to the ARM-specific code?

Thanks.

-- 
Catalin

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/10] Add generic helpers for arch IPI function calls
  2008-06-10 16:53       ` Catalin Marinas
@ 2008-06-11  3:25         ` Nick Piggin
  2008-06-11 10:13           ` Catalin Marinas
  0 siblings, 1 reply; 54+ messages in thread
From: Nick Piggin @ 2008-06-11  3:25 UTC (permalink / raw)
  To: Catalin Marinas
  Cc: paulmck, Jens Axboe, linux-kernel, peterz, linux-arch, jeremy,
	mingo, Russell King

On Tue, Jun 10, 2008 at 05:53:08PM +0100, Catalin Marinas wrote:
> On Tue, 2008-06-10 at 08:47 -0700, Paul E. McKenney wrote:
> > On Tue, Jun 10, 2008 at 03:51:25PM +0100, Catalin Marinas wrote:
> > > I was thinking whether this condition can be removed and allow the
> > > smp_call_function*() to be called with IRQs disabled. At a quick look,
> > > it seems to be possible if the csd_flag_wait() function calls the IPI
> > > handlers directly when the IRQs are disabled (see the patch below).
> [...]
> > There were objections last month:  http://lkml.org/lkml/2008/5/3/167
> 
> Thanks, I missed this discussion.
> 
> > The issue was that this permits some interrupts to arrive despite
> > interrupts being disabled.  There seemed to be less resistance to
> > doing this in the wait==1 case, however.
> 
> The "(wait == 1) && irqs_disabled()" case is what I would be interested
> in. In the patch you proposed, this doesn't seem to be allowed (at least
> from the use of WARN_ON). However, from your post in May:
> 
> > 5.	If you call smp_call_function() with irqs disabled, then you
> > 	are guaranteed that no other CPU's smp_call_function() handler
> > 	will be invoked while smp_call_function() is executing.
> 
> this would be possible but no one need this functionality yet.
> 
> Would one use-case (ARM SMP and DMA cache maintenance) be enough to
> implement this or I should add it to the ARM-specific code?

How will you implement it? You have to be able to wait *somewhere*
(either before or after the smp_call_function call) with interrupts
enabled. It is not enough just to eg. use a spinlock around
smp_call_function, because other CPUs might also be trying to call
down the same path also with interrupts disabled, and they'll wait
forever on the spinlock.

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/10] Add generic helpers for arch IPI function calls
  2008-06-11  3:25         ` Nick Piggin
@ 2008-06-11 10:13           ` Catalin Marinas
  2008-06-11 10:13             ` Catalin Marinas
  0 siblings, 1 reply; 54+ messages in thread
From: Catalin Marinas @ 2008-06-11 10:13 UTC (permalink / raw)
  To: Nick Piggin
  Cc: paulmck, Jens Axboe, linux-kernel, peterz, linux-arch, jeremy,
	mingo, Russell King

On Wed, 2008-06-11 at 05:25 +0200, Nick Piggin wrote:
> On Tue, Jun 10, 2008 at 05:53:08PM +0100, Catalin Marinas wrote:
> > On Tue, 2008-06-10 at 08:47 -0700, Paul E. McKenney wrote:
> > > On Tue, Jun 10, 2008 at 03:51:25PM +0100, Catalin Marinas wrote:
> > > > I was thinking whether this condition can be removed and allow the
> > > > smp_call_function*() to be called with IRQs disabled. At a quick look,
> > > > it seems to be possible if the csd_flag_wait() function calls the IPI
> > > > handlers directly when the IRQs are disabled (see the patch below).
> > [...]
> > > There were objections last month:  http://lkml.org/lkml/2008/5/3/167
> > 
> > Thanks, I missed this discussion.
> > 
> > > The issue was that this permits some interrupts to arrive despite
> > > interrupts being disabled.  There seemed to be less resistance to
> > > doing this in the wait==1 case, however.
> > 
> > The "(wait == 1) && irqs_disabled()" case is what I would be interested
> > in. In the patch you proposed, this doesn't seem to be allowed (at least
> > from the use of WARN_ON). However, from your post in May:
> > 
> > > 5.	If you call smp_call_function() with irqs disabled, then you
> > > 	are guaranteed that no other CPU's smp_call_function() handler
> > > 	will be invoked while smp_call_function() is executing.
> > 
> > this would be possible but no one need this functionality yet.
> > 
> > Would one use-case (ARM SMP and DMA cache maintenance) be enough to
> > implement this or I should add it to the ARM-specific code?
> 
> How will you implement it? You have to be able to wait *somewhere*
> (either before or after the smp_call_function call) with interrupts
> enabled. It is not enough just to eg. use a spinlock around
> smp_call_function, because other CPUs might also be trying to call
> down the same path also with interrupts disabled, and they'll wait
> forever on the spinlock.

With the generic IPI patches, I think it is just a matter of polling for
incoming IPIs in the csd_flag_wait() if interrupts are disabled since no
spinlock is held when issuing the IPI (smp_call_function can be called
concurrently).

If I do it in the ARM-specific code, I would duplicate the
smp_call_function (but avoid the call_single_queue, maybe just like the
current ARM implementation) and use a spinlock around the IPI
invocation. If interrupts are allowed to be disabled, the spin_lock()
would actually be a spin_trylock() (or spin_trylock_irqsave()),
something like below (untested). It can be improved for the IRQs-enabled
case to reduce the latency.

smp_call_function(...)
{
	...
	/*
	 * disable IRQs so that we can call this function from
	 * interrupt context
	 */
	local_irq_save(flags)
	while (!spin_trylock(&call_function_lock)) {
		/* other CPU is sending an IPI, just poll for it */
		smp_call_function_interrupt();
	}
	/* acquired the lock, do the IPI stuff */
	...

	/* 
	 * wait for the other CPUs to complete the IPI. No other CPU 
	 * is waiting for completion because of the call_function_lock
	 */
	...

	spin_unlock(&call_function_lock);
	local_irq_restore(flags);
}

One issue I saw raised with the polling loop is that it calls an
interrupt handler outside an interrupt context and with IRQs disabled.
There shouldn't be any issue with the existing code since
smp_call_function assumes interrupts enabled anyway. Whoever needs this
functionality should take greater care with IRQs disabled (in my case,
only calling some cache maintenance operations is OK).

Regarding the interrupt latency, it would be higher since the IPI is not
that cheap but (in ARM SMP case) calling dma_map_single/sg (especially
with TO_DEVICE) with interrupts disabled I think is pretty bad already
as it involves flushing the caches for that range and time-consuming.

If dma_map_single/sg would be disallowed with interrupts disabled or
from interrupt context, we would no longer need the above workarounds
for smp_call_function on ARM.

Regards.

-- 
Catalin

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/10] Add generic helpers for arch IPI function calls
  2008-06-11 10:13           ` Catalin Marinas
@ 2008-06-11 10:13             ` Catalin Marinas
  0 siblings, 0 replies; 54+ messages in thread
From: Catalin Marinas @ 2008-06-11 10:13 UTC (permalink / raw)
  To: Nick Piggin
  Cc: paulmck, Jens Axboe, linux-kernel, peterz, linux-arch, jeremy,
	mingo, Russell King

On Wed, 2008-06-11 at 05:25 +0200, Nick Piggin wrote:
> On Tue, Jun 10, 2008 at 05:53:08PM +0100, Catalin Marinas wrote:
> > On Tue, 2008-06-10 at 08:47 -0700, Paul E. McKenney wrote:
> > > On Tue, Jun 10, 2008 at 03:51:25PM +0100, Catalin Marinas wrote:
> > > > I was thinking whether this condition can be removed and allow the
> > > > smp_call_function*() to be called with IRQs disabled. At a quick look,
> > > > it seems to be possible if the csd_flag_wait() function calls the IPI
> > > > handlers directly when the IRQs are disabled (see the patch below).
> > [...]
> > > There were objections last month:  http://lkml.org/lkml/2008/5/3/167
> > 
> > Thanks, I missed this discussion.
> > 
> > > The issue was that this permits some interrupts to arrive despite
> > > interrupts being disabled.  There seemed to be less resistance to
> > > doing this in the wait==1 case, however.
> > 
> > The "(wait == 1) && irqs_disabled()" case is what I would be interested
> > in. In the patch you proposed, this doesn't seem to be allowed (at least
> > from the use of WARN_ON). However, from your post in May:
> > 
> > > 5.	If you call smp_call_function() with irqs disabled, then you
> > > 	are guaranteed that no other CPU's smp_call_function() handler
> > > 	will be invoked while smp_call_function() is executing.
> > 
> > this would be possible but no one need this functionality yet.
> > 
> > Would one use-case (ARM SMP and DMA cache maintenance) be enough to
> > implement this or I should add it to the ARM-specific code?
> 
> How will you implement it? You have to be able to wait *somewhere*
> (either before or after the smp_call_function call) with interrupts
> enabled. It is not enough just to eg. use a spinlock around
> smp_call_function, because other CPUs might also be trying to call
> down the same path also with interrupts disabled, and they'll wait
> forever on the spinlock.

With the generic IPI patches, I think it is just a matter of polling for
incoming IPIs in the csd_flag_wait() if interrupts are disabled since no
spinlock is held when issuing the IPI (smp_call_function can be called
concurrently).

If I do it in the ARM-specific code, I would duplicate the
smp_call_function (but avoid the call_single_queue, maybe just like the
current ARM implementation) and use a spinlock around the IPI
invocation. If interrupts are allowed to be disabled, the spin_lock()
would actually be a spin_trylock() (or spin_trylock_irqsave()),
something like below (untested). It can be improved for the IRQs-enabled
case to reduce the latency.

smp_call_function(...)
{
	...
	/*
	 * disable IRQs so that we can call this function from
	 * interrupt context
	 */
	local_irq_save(flags)
	while (!spin_trylock(&call_function_lock)) {
		/* other CPU is sending an IPI, just poll for it */
		smp_call_function_interrupt();
	}
	/* acquired the lock, do the IPI stuff */
	...

	/* 
	 * wait for the other CPUs to complete the IPI. No other CPU 
	 * is waiting for completion because of the call_function_lock
	 */
	...

	spin_unlock(&call_function_lock);
	local_irq_restore(flags);
}

One issue I saw raised with the polling loop is that it calls an
interrupt handler outside an interrupt context and with IRQs disabled.
There shouldn't be any issue with the existing code since
smp_call_function assumes interrupts enabled anyway. Whoever needs this
functionality should take greater care with IRQs disabled (in my case,
only calling some cache maintenance operations is OK).

Regarding the interrupt latency, it would be higher since the IPI is not
that cheap but (in ARM SMP case) calling dma_map_single/sg (especially
with TO_DEVICE) with interrupts disabled I think is pretty bad already
as it involves flushing the caches for that range and time-consuming.

If dma_map_single/sg would be disallowed with interrupts disabled or
from interrupt context, we would no longer need the above workarounds
for smp_call_function on ARM.

Regards.

-- 
Catalin

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/10] Add generic helpers for arch IPI function calls
  2008-05-29  8:58 ` [PATCH 1/10] Add generic helpers for arch IPI function calls Jens Axboe
  2008-05-30 11:24   ` Paul E. McKenney
  2008-06-10 14:51   ` Catalin Marinas
@ 2008-07-06 17:21   ` Jeremy Fitzhardinge
  2 siblings, 0 replies; 54+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-06 17:21 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-kernel, peterz, npiggin, linux-arch, mingo, paulmck

Jens Axboe wrote:
> +/**
> + * __smp_call_function_single(): Run a function on another CPU
> + * @cpu: The CPU to run on.
> + * @data: Pre-allocated and setup data structure
> + *
> + * Like smp_call_function_single(), but allow caller to pass in a pre-allocated
> + * data structure. Useful for embedding @data inside other structures, for
> + * instance.
> + *
> + */
> +void __smp_call_function_single(int cpu, struct call_single_data *data)
>   

Does this have any users?  Proposed users?

> +{
> +	/* Can deadlock when called with interrupts disabled */
> +	WARN_ON((data->flags & CSD_FLAG_WAIT) && irqs_disabled());
>   

Can it be reasonably be used without FLAG_WAIT set?  If it doesn't have 
FLAG_WAIT, it must have FLAG_ALLOC set, but it can't have FLAG_ALLOC if 
the csd structure is embedded in something else.

> +
> +	generic_exec_single(cpu, data);
> +}
> +
> +/**
> + * smp_call_function_mask(): Run a function on a set of other CPUs.
> + * @mask: The set of cpus to run on.
> + * @func: The function to run. This must be fast and non-blocking.
> + * @info: An arbitrary pointer to pass to the function.
> + * @wait: If true, wait (atomically) until function has completed on other CPUs.
> + *
> + * Returns 0 on success, else a negative status code.
> + *
> + * If @wait is true, then returns once @func has returned.
> + *
> + * You must not call this function with disabled interrupts or from a
> + * hardware interrupt handler or from a bottom half handler.
> + */
> +int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info,
> +			   int wait)
> +{
> +	struct call_function_data d;
> +	struct call_function_data *data = NULL;
> +	cpumask_t allbutself;
> +	unsigned long flags;
> +	int cpu, num_cpus;
> +
> +	/* Can deadlock when called with interrupts disabled */
> +	WARN_ON(irqs_disabled());
> +
> +	cpu = smp_processor_id();
>   

What prevents us from changing cpus?  Are all callers supposed to 
disable preemption?  If so, should we WARN for it?

> +	allbutself = cpu_online_map;
> +	cpu_clear(cpu, allbutself);
> +	cpus_and(mask, mask, allbutself);
> +	num_cpus = cpus_weight(mask);
>   

Ditto: What prevents the online map changing under our feet?  Is 
disabling preemption enough?

> +
> +	/*
> +	 * If zero CPUs, return. If just a single CPU, turn this request
> +	 * into a targetted single call instead since it's faster.
> +	 */
> +	if (!num_cpus)
> +		return 0;
> +	else if (num_cpus == 1) {
> +		cpu = first_cpu(mask);
> +		return smp_call_function_single(cpu, func, info, 0, wait);
> +	}
>   

It's weirdly inconsistent that smp_call_function_mask will ignore the 
current cpu if its in the mask, but smp_call_function_single will call 
the function locally if the specified cpu is the current one.  I know 
this is a hold-over from the old code, but could we finally fix it now?  
I guess it's still reasonable for smp_call_function to call on 
everywhere but current cpu, but it can do its own mask manipulation to 
sort that out.

> +
> +	if (!wait) {
> +		data = kmalloc(sizeof(*data), GFP_ATOMIC);
> +		if (data)
> +			data->csd.flags = CSD_FLAG_ALLOC;
> +	}
> +	if (!data) {
> +		data = &d;
> +		data->csd.flags = CSD_FLAG_WAIT;
> +	}
> +
> +	spin_lock_init(&data->lock);
> +	data->csd.func = func;
> +	data->csd.info = info;
> +	data->refs = num_cpus;
> +
> +	/*
> +	 * need to see above stores before the cpumask is valid for the CPU
> +	 */
> +	smp_wmb();
> +	data->cpumask = mask;
> +
> +	spin_lock_irqsave(&call_function_lock, flags);
> +	list_add_tail_rcu(&data->csd.list, &call_function_queue);
> +	spin_unlock_irqrestore(&call_function_lock, flags);
> +
> +	/* Send a message to all CPUs in the map */
> +	arch_send_call_function_ipi(mask);
> +
> +	/* optionally wait for the CPUs to complete */
> +	if (wait)
>   
Should this test for data->csd.flags & CDS_FLAG_WAIT?  Or should the "if 
(!data)" path above also set "wait"?

> +		csd_flag_wait(&data->csd);
> +
> +	return 0;
> +}
> +EXPORT_SYMBOL(smp_call_function_mask);
> +
> +/**
> + * smp_call_function(): Run a function on all other CPUs.
> + * @func: The function to run. This must be fast and non-blocking.
> + * @info: An arbitrary pointer to pass to the function.
> + * @natomic: Unused
> + * @wait: If true, wait (atomically) until function has completed on other CPUs.
> + *
> + * Returns 0 on success, else a negative status code.
> + *
> + * If @wait is true, then returns once @func has returned; otherwise
> + * it returns just before the target cpu calls @func.
> + *
> + * You must not call this function with disabled interrupts or from a
> + * hardware interrupt handler or from a bottom half handler.
> + */
> +int smp_call_function(void (*func)(void *), void *info, int natomic, int wait)
> +{
> +	int ret;
> +
> +	preempt_disable();
> +	ret = smp_call_function_mask(cpu_online_map, func, info, wait);
> +	preempt_enable();
> +	return ret;
> +}
> +EXPORT_SYMBOL(smp_call_function);
>   

^ permalink raw reply	[flat|nested] 54+ messages in thread

end of thread, other threads:[~2008-07-06 17:21 UTC | newest]

Thread overview: 54+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-05-29  8:58 [PATCH 0/10] Add generic helpers for arch IPI function calls #4 Jens Axboe
2008-05-29  8:58 ` [PATCH 1/10] Add generic helpers for arch IPI function calls Jens Axboe
2008-05-30 11:24   ` Paul E. McKenney
2008-06-06  8:44     ` Jens Axboe
2008-06-10 14:51   ` Catalin Marinas
2008-06-10 15:44     ` James Bottomley
2008-06-10 16:04       ` Catalin Marinas
2008-06-10 15:47     ` Paul E. McKenney
2008-06-10 16:53       ` Catalin Marinas
2008-06-11  3:25         ` Nick Piggin
2008-06-11 10:13           ` Catalin Marinas
2008-06-11 10:13             ` Catalin Marinas
2008-07-06 17:21   ` Jeremy Fitzhardinge
2008-05-29  8:58 ` [PATCH 2/10] x86: convert to generic helpers for " Jens Axboe
2008-05-29 12:12   ` Jeremy Fitzhardinge
2008-05-29 12:17     ` Jens Axboe
2008-05-29 13:47       ` Jeremy Fitzhardinge
2008-05-29  8:58 ` [PATCH 3/10] powerpc: " Jens Axboe
2008-05-29  8:58 ` [PATCH 4/10] ia64: " Jens Axboe
2008-05-29  8:58 ` [PATCH 5/10] alpha: " Jens Axboe
2008-05-29  8:58 ` [PATCH 6/10] arm: " Jens Axboe
2008-06-02 12:29   ` Russell King
2008-06-02 12:29     ` Russell King
2008-06-06  8:47     ` Jens Axboe
2008-05-29  8:58 ` [PATCH 7/10] m32r: " Jens Axboe
2008-05-29  8:58 ` [PATCH 8/10] mips: " Jens Axboe
2008-05-29 14:20   ` Ralf Baechle
2008-05-30  7:23     ` Jens Axboe
2008-05-29  8:58 ` [PATCH 9/10] parisc: " Jens Axboe
2008-05-31  7:00   ` Kyle McMartin
2008-05-31  7:00     ` Kyle McMartin
2008-06-02  8:17     ` Jens Axboe
2008-06-02 16:09       ` Kyle McMartin
2008-06-06  8:47         ` Jens Axboe
2008-06-06 21:11           ` Kyle McMartin
2008-06-09  8:47             ` Jens Axboe
2008-05-29  8:58 ` [PATCH 10/10] sh: " Jens Axboe
2008-06-01  8:57 ` [PATCH 0/10] Add generic helpers for arch IPI function calls #4 Andrew Morton
2008-06-01  8:57   ` Andrew Morton
2008-06-01  9:52   ` Jeremy Fitzhardinge
  -- strict thread matches above, loose matches on Subject: below --
2008-04-29  7:26 [PATCH 0/10] Add generic helpers for arch IPI function calls #3 Jens Axboe
     [not found] ` <1209453990-7735-1-git-send-email-jens.axboe-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
2008-04-29  7:26   ` [PATCH 2/10] x86: convert to generic helpers for IPI function calls Jens Axboe
2008-04-29  7:26     ` Jens Axboe
     [not found]     ` <1209453990-7735-3-git-send-email-jens.axboe-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
2008-04-29 20:35       ` Jeremy Fitzhardinge
2008-04-29 20:35         ` Jeremy Fitzhardinge
     [not found]         ` <481786A5.7010604-TSDbQ3PG+2Y@public.gmane.org>
2008-04-30 11:35           ` Jens Axboe
2008-04-30 11:35             ` Jens Axboe
     [not found]             ` <20080430113542.GZ12774-tSWWG44O7X1aa/9Udqfwiw@public.gmane.org>
2008-04-30 12:20               ` Paul E. McKenney
2008-04-30 12:20                 ` Paul E. McKenney
     [not found]                 ` <20080430122001.GS11126-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
2008-04-30 12:31                   ` Jens Axboe
2008-04-30 12:31                     ` Jens Axboe
     [not found]                     ` <20080430123136.GB12774-tSWWG44O7X1aa/9Udqfwiw@public.gmane.org>
2008-04-30 14:51                       ` Jeremy Fitzhardinge
2008-04-30 14:51                         ` Jeremy Fitzhardinge
2008-04-30 21:39       ` Jeremy Fitzhardinge
2008-04-30 21:39         ` Jeremy Fitzhardinge

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).