public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] generic smp function call: add multiple queues for scaling
@ 2008-07-29 23:32 Jeremy Fitzhardinge
  2008-07-30  0:26 ` Andi Kleen
  0 siblings, 1 reply; 3+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-29 23:32 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Nick Piggin, Andi Kleen, Linux Kernel Mailing List

This patch allows an architecture to have multiple smp_call_function
queues, in order to reduce contention on a single queue and lock.  By
default there is still one queue, so this patch makes no functional
change.

However, an architecture can set CONFIG_GENERIC_SMP_QUEUES to enable a
certain number of queues.  It's expected it will will set up an IPI
vector for each queue, and it should pass the appropriate queue number
into generic_smp_call_function_interrupt.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/alpha/kernel/smp.c             |    2 -
 arch/arm/kernel/smp.c               |    2 -
 arch/ia64/kernel/smp.c              |    2 -
 arch/m32r/kernel/smp.c              |    2 -
 arch/mips/kernel/smp.c              |    2 -
 arch/parisc/kernel/smp.c            |    2 -
 arch/powerpc/kernel/smp.c           |    2 -
 arch/sparc64/kernel/smp.c           |    2 -
 arch/x86/kernel/smp.c               |    2 -
 arch/x86/mach-voyager/voyager_smp.c |    2 -
 arch/x86/xen/smp.c                  |    2 -
 include/linux/smp.h                 |    2 -
 kernel/smp.c                        |   70 ++++++++++++++++++++++++++++-------
 13 files changed, 69 insertions(+), 25 deletions(-)

===================================================================
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -588,7 +588,7 @@
 			break;
 
 		case IPI_CALL_FUNC:
-			generic_smp_call_function_interrupt();
+			generic_smp_call_function_interrupt(0);
 			break;
 
 		case IPI_CALL_FUNC_SINGLE:
===================================================================
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -481,7 +481,7 @@
 				break;
 
 			case IPI_CALL_FUNC:
-				generic_smp_call_function_interrupt();
+				generic_smp_call_function_interrupt(0);
 				break;
 
 			case IPI_CALL_FUNC_SINGLE:
===================================================================
--- a/arch/ia64/kernel/smp.c
+++ b/arch/ia64/kernel/smp.c
@@ -114,7 +114,7 @@
 				stop_this_cpu();
 				break;
 			case IPI_CALL_FUNC:
-				generic_smp_call_function_interrupt();
+				generic_smp_call_function_interrupt(0);
 				break;
 			case IPI_CALL_FUNC_SINGLE:
 				generic_smp_call_function_single_interrupt();
===================================================================
--- a/arch/m32r/kernel/smp.c
+++ b/arch/m32r/kernel/smp.c
@@ -576,7 +576,7 @@
 void smp_call_function_interrupt(void)
 {
 	irq_enter();
-	generic_smp_call_function_interrupt();
+	generic_smp_call_function_interrupt(0);
 	irq_exit();
 }
 
===================================================================
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -151,7 +151,7 @@
 {
 	irq_enter();
 	generic_smp_call_function_single_interrupt();
-	generic_smp_call_function_interrupt();
+	generic_smp_call_function_interrupt(0);
 	irq_exit();
 }
 
===================================================================
--- a/arch/parisc/kernel/smp.c
+++ b/arch/parisc/kernel/smp.c
@@ -179,7 +179,7 @@
 
 			case IPI_CALL_FUNC:
 				smp_debug(100, KERN_DEBUG "CPU%d IPI_CALL_FUNC\n", this_cpu);
-				generic_smp_call_function_interrupt();
+				generic_smp_call_function_interrupt(0);
 				break;
 
 			case IPI_CALL_FUNC_SINGLE:
===================================================================
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -95,7 +95,7 @@
 {
 	switch(msg) {
 	case PPC_MSG_CALL_FUNCTION:
-		generic_smp_call_function_interrupt();
+		generic_smp_call_function_interrupt(0);
 		break;
 	case PPC_MSG_RESCHEDULE:
 		/* XXX Do we have to do this? */
===================================================================
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -811,7 +811,7 @@
 void smp_call_function_client(int irq, struct pt_regs *regs)
 {
 	clear_softint(1 << irq);
-	generic_smp_call_function_interrupt();
+	generic_smp_call_function_interrupt(0);
 }
 
 void smp_call_function_single_client(int irq, struct pt_regs *regs)
===================================================================
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -189,7 +189,7 @@
 {
 	ack_APIC_irq();
 	irq_enter();
-	generic_smp_call_function_interrupt();
+	generic_smp_call_function_interrupt(0);
 #ifdef CONFIG_X86_32
 	__get_cpu_var(irq_stat).irq_call_count++;
 #else
===================================================================
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -957,7 +957,7 @@
 static void smp_call_function_interrupt(void)
 {
 	irq_enter();
-	generic_smp_call_function_interrupt();
+	generic_smp_call_function_interrupt(0);
 	__get_cpu_var(irq_stat).irq_call_count++;
 	irq_exit();
 }
===================================================================
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -401,7 +401,7 @@
 static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
 {
 	irq_enter();
-	generic_smp_call_function_interrupt();
+	generic_smp_call_function_interrupt(0);
 #ifdef CONFIG_X86_32
 	__get_cpu_var(irq_stat).irq_call_count++;
 #else
===================================================================
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -73,7 +73,7 @@
  */
 #ifdef CONFIG_USE_GENERIC_SMP_HELPERS
 void generic_smp_call_function_single_interrupt(void);
-void generic_smp_call_function_interrupt(void);
+void generic_smp_call_function_interrupt(unsigned queue);
 void ipi_call_lock(void);
 void ipi_call_unlock(void);
 void ipi_call_lock_irq(void);
===================================================================
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -11,9 +11,19 @@
 #include <linux/rculist.h>
 #include <linux/smp.h>
 
+#ifdef CONFIG_GENERIC_SMP_QUEUES
+#define	NQUEUES	CONFIG_GENERIC_SMP_QUEUES
+#else
+#define	NQUEUES	1
+#endif
+
 static DEFINE_PER_CPU(struct call_single_queue, call_single_queue);
-static LIST_HEAD(call_function_queue);
-__cacheline_aligned_in_smp DEFINE_SPINLOCK(call_function_lock);
+struct queue {
+	struct list_head list;
+	spinlock_t lock;
+};
+
+static __cacheline_aligned_in_smp struct queue call_function_queues[NQUEUES];
 
 enum {
 	CSD_FLAG_WAIT		= 0x01,
@@ -96,17 +106,20 @@
  * Invoked by arch to handle an IPI for call function. Must be called with
  * interrupts disabled.
  */
-void generic_smp_call_function_interrupt(void)
+void generic_smp_call_function_interrupt(unsigned queue_no)
 {
 	struct call_function_data *data;
+	struct queue *queue;
 	int cpu = get_cpu();
+
+	queue = &call_function_queues[queue_no];
 
 	/*
 	 * It's ok to use list_for_each_rcu() here even though we may delete
 	 * 'pos', since list_del_rcu() doesn't clear ->next
 	 */
 	rcu_read_lock();
-	list_for_each_entry_rcu(data, &call_function_queue, csd.list) {
+	list_for_each_entry_rcu(data, &queue->list, csd.list) {
 		int refs;
 
 		if (!cpu_isset(cpu, data->cpumask))
@@ -124,9 +137,9 @@
 		if (refs)
 			continue;
 
-		spin_lock(&call_function_lock);
+		spin_lock(&queue->lock);
 		list_del_rcu(&data->csd.list);
-		spin_unlock(&call_function_lock);
+		spin_unlock(&queue->lock);
 
 		if (data->csd.flags & CSD_FLAG_WAIT) {
 			/*
@@ -285,6 +298,8 @@
 	cpumask_t allbutself;
 	unsigned long flags;
 	int cpu, num_cpus;
+	unsigned queue_no;
+	struct queue *queue;
 
 	/* Can deadlock when called with interrupts disabled */
 	WARN_ON(irqs_disabled());
@@ -294,6 +309,9 @@
 	cpu_clear(cpu, allbutself);
 	cpus_and(mask, mask, allbutself);
 	num_cpus = cpus_weight(mask);
+
+	queue_no = cpu % NQUEUES;
+	queue = &call_function_queues[queue_no];
 
 	/*
 	 * If zero CPUs, return. If just a single CPU, turn this request
@@ -323,9 +341,9 @@
 	data->refs = num_cpus;
 	data->cpumask = mask;
 
-	spin_lock_irqsave(&call_function_lock, flags);
-	list_add_tail_rcu(&data->csd.list, &call_function_queue);
-	spin_unlock_irqrestore(&call_function_lock, flags);
+	spin_lock_irqsave(&queue->lock, flags);
+	list_add_tail_rcu(&data->csd.list, &queue->list);
+	spin_unlock_irqrestore(&queue->lock, flags);
 
 	/* Send a message to all CPUs in the map */
 	arch_send_call_function_ipi(mask);
@@ -366,20 +384,46 @@
 
 void ipi_call_lock(void)
 {
-	spin_lock(&call_function_lock);
+	int i;
+
+	for(i = 0; i < NQUEUES; i++)
+		spin_lock(&call_function_queues[i].lock);
 }
 
 void ipi_call_unlock(void)
 {
-	spin_unlock(&call_function_lock);
+	int i;
+
+	for(i = 0; i < NQUEUES; i++)
+		spin_unlock(&call_function_queues[i].lock);
 }
 
 void ipi_call_lock_irq(void)
 {
-	spin_lock_irq(&call_function_lock);
+	int i;
+
+	for(i = 0; i < NQUEUES; i++)
+		spin_lock_irq(&call_function_queues[i].lock);
 }
 
 void ipi_call_unlock_irq(void)
 {
-	spin_unlock_irq(&call_function_lock);
+	int i;
+
+	for(i = 0; i < NQUEUES; i++)
+		spin_unlock_irq(&call_function_queues[i].lock);
 }
+
+
+static int __init init_smp_function_call(void)
+{
+	int i;
+
+	for(i = 0; i < NQUEUES; i++) {
+		INIT_LIST_HEAD(&call_function_queues[i].list);
+		spin_lock_init(&call_function_queues[i].lock);
+	}
+
+	return 0;
+}
+early_initcall(init_smp_function_call);



^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH 1/2] generic smp function call: add multiple queues for scaling
  2008-07-29 23:32 [PATCH 1/2] generic smp function call: add multiple queues for scaling Jeremy Fitzhardinge
@ 2008-07-30  0:26 ` Andi Kleen
  2008-07-30  0:44   ` Jeremy Fitzhardinge
  0 siblings, 1 reply; 3+ messages in thread
From: Andi Kleen @ 2008-07-30  0:26 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: Ingo Molnar, Nick Piggin, Andi Kleen, Linux Kernel Mailing List

Ah I see the locking is here. Never mind the earlier comment.

> +#define	NQUEUES	CONFIG_GENERIC_SMP_QUEUES
> +#else
> +#define	NQUEUES	1
> +#endif
> +
> static DEFINE_PER_CPU(struct call_single_queue, call_single_queue);
> -static LIST_HEAD(call_function_queue);
> -__cacheline_aligned_in_smp DEFINE_SPINLOCK(call_function_lock);
> +struct queue {
> +	struct list_head list;
> +	spinlock_t lock;
> +};
> +
> +static __cacheline_aligned_in_smp struct queue 
> call_function_queues[NQUEUES];

Hmm are you sure this aligns the individual elements and not the whole
array? 

> void ipi_call_unlock(void)
> {
> -	spin_unlock(&call_function_lock);
> +	int i;
> +
> +	for(i = 0; i < NQUEUES; i++)
> +		spin_unlock(&call_function_queues[i].lock);
> }
> 
> void ipi_call_lock_irq(void)
> {
> -	spin_lock_irq(&call_function_lock);
> +	int i;
> +
> +	for(i = 0; i < NQUEUES; i++)
> +		spin_lock_irq(&call_function_queues[i].lock);
> }
> 
> void ipi_call_unlock_irq(void)
> {
> -	spin_unlock_irq(&call_function_lock);
> +	int i;
> +
> +	for(i = 0; i < NQUEUES; i++)
> +		spin_unlock_irq(&call_function_queues[i].lock);
> }
> +
> +
> +static int __init init_smp_function_call(void)
> +{
> +	int i;
> +
> +	for(i = 0; i < NQUEUES; i++) {
> +		INIT_LIST_HEAD(&call_function_queues[i].list);
> +		spin_lock_init(&call_function_queues[i].lock);
> +	}
> +
> +	return 0;
> +}
> +early_initcall(init_smp_function_call);

You can avoid all that init gunk by using the [0 ... NQUEUES] = ..
gcc extension in the initializer.

-Andi

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH 1/2] generic smp function call: add multiple queues for scaling
  2008-07-30  0:26 ` Andi Kleen
@ 2008-07-30  0:44   ` Jeremy Fitzhardinge
  0 siblings, 0 replies; 3+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-30  0:44 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Ingo Molnar, Nick Piggin, Linux Kernel Mailing List

Andi Kleen wrote:
> Ah I see the locking is here. Never mind the earlier comment.
>
>   
>> +#define	NQUEUES	CONFIG_GENERIC_SMP_QUEUES
>> +#else
>> +#define	NQUEUES	1
>> +#endif
>> +
>> static DEFINE_PER_CPU(struct call_single_queue, call_single_queue);
>> -static LIST_HEAD(call_function_queue);
>> -__cacheline_aligned_in_smp DEFINE_SPINLOCK(call_function_lock);
>> +struct queue {
>> +	struct list_head list;
>> +	spinlock_t lock;
>> +};
>> +
>> +static __cacheline_aligned_in_smp struct queue 
>> call_function_queues[NQUEUES];
>>     
>
> Hmm are you sure this aligns the individual elements and not the whole
> array? 
>   

Hm, that's a point.  I guess the __cacheline_aligned_in_smp should be on 
the struct definition.

>> +static int __init init_smp_function_call(void)
>> +{
>> +	int i;
>> +
>> +	for(i = 0; i < NQUEUES; i++) {
>> +		INIT_LIST_HEAD(&call_function_queues[i].list);
>> +		spin_lock_init(&call_function_queues[i].lock);
>> +	}
>> +
>> +	return 0;
>> +}
>> +early_initcall(init_smp_function_call);
>>     
>
> You can avoid all that init gunk by using the [0 ... NQUEUES] = ..
> gcc extension in the initializer.
>   

Are you sure?  I tried using it, but couldn't work out how.  Remember 
the list head init needs to point to itself, which means it's not 
constant across the array.

    J

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2008-07-30  0:44 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-07-29 23:32 [PATCH 1/2] generic smp function call: add multiple queues for scaling Jeremy Fitzhardinge
2008-07-30  0:26 ` Andi Kleen
2008-07-30  0:44   ` Jeremy Fitzhardinge

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox