[PATCH 2/2] x86: implement multiple queues for smp function call IPIs

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH 2/2] x86: implement multiple queues for smp function call IPIs
@ 2008-07-29 23:32 Jeremy Fitzhardinge
  2008-07-29 23:43 ` Jeremy Fitzhardinge
                   ` (3 more replies)
  0 siblings, 4 replies; 12+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-29 23:32 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Nick Piggin, Andi Kleen, Linux Kernel Mailing List

This adds 8 queues for smp_call_function(), in order to avoid a
bottleneck on a single global lock and list for function calls.  When
initiating a function call, the sender chooses a queue based on its
own processor id (if there are more than 8 processors, they hash down
to 8 queues).  It then sends an IPI to the corresponding vector for
that queue to each target CPU.  The target CPUs use the vector number
to determine which queue they should scan for work.

This should give smp_call_function the same performance
characteristics as the original x86-64 cross-cpu tlb flush code, which
used the same scheme.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/Kconfig                          |    4 ++++
 arch/x86/kernel/apic_32.c                 |   11 ++++++++++-
 arch/x86/kernel/entry_32.S                |   25 ++++++++++++++-----------
 arch/x86/kernel/entry_64.S                |   19 ++++++++++++++++---
 arch/x86/kernel/irqinit_64.c              |   11 ++++++++++-
 arch/x86/kernel/smp.c                     |   10 +++++++---
 arch/x86/xen/smp.c                        |   27 ++++++++++++++++++++++++++-
 include/asm-x86/hw_irq.h                  |   20 +++++++++-----------
 include/asm-x86/irq_vectors.h             |    6 ++++--
 include/asm-x86/mach-default/entry_arch.h |   16 +++++++++++++++-
 10 files changed, 115 insertions(+), 34 deletions(-)

===================================================================
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -178,6 +178,10 @@
 	depends on SMP && ((X86_32 && !X86_VOYAGER) || X86_64)
 	select USE_GENERIC_SMP_HELPERS
 	default y
+
+config GENERIC_SMP_QUEUES
+       int
+       default "8"
 
 config X86_32_SMP
 	def_bool y
===================================================================
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -1382,7 +1382,16 @@
 	alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
 
 	/* IPI for generic function call */
-	alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
+	BUILD_BUG_ON(CONFIG_GENERIC_SMP_QUEUES !=
+		     (CALL_FUNCTION_VECTOR_END - CALL_FUNCTION_VECTOR_START + 1));
+	alloc_intr_gate(CALL_FUNCTION_VECTOR_START+0, call_function_interrupt0);
+	alloc_intr_gate(CALL_FUNCTION_VECTOR_START+1, call_function_interrupt1);
+	alloc_intr_gate(CALL_FUNCTION_VECTOR_START+2, call_function_interrupt2);
+	alloc_intr_gate(CALL_FUNCTION_VECTOR_START+3, call_function_interrupt3);
+	alloc_intr_gate(CALL_FUNCTION_VECTOR_START+4, call_function_interrupt4);
+	alloc_intr_gate(CALL_FUNCTION_VECTOR_START+5, call_function_interrupt5);
+	alloc_intr_gate(CALL_FUNCTION_VECTOR_START+6, call_function_interrupt6);
+	alloc_intr_gate(CALL_FUNCTION_VECTOR_START+7, call_function_interrupt7);
 
 	/* IPI for single call function */
 	set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
===================================================================
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -688,19 +688,22 @@
 ENDPROC(common_interrupt)
 	CFI_ENDPROC
 
-#define BUILD_INTERRUPT(name, nr)	\
-ENTRY(name)				\
-	RING0_INT_FRAME;		\
-	pushl $~(nr);			\
-	CFI_ADJUST_CFA_OFFSET 4;	\
-	SAVE_ALL;			\
-	TRACE_IRQS_OFF			\
-	movl %esp,%eax;			\
-	call smp_##name;		\
-	jmp ret_from_intr;		\
-	CFI_ENDPROC;			\
+#define __BUILD_INTERRUPT(name, func, nr)	\
+ENTRY(name)					\
+	RING0_INT_FRAME;			\
+	pushl $~(nr);				\
+	CFI_ADJUST_CFA_OFFSET 4;		\
+	SAVE_ALL;				\
+	TRACE_IRQS_OFF				\
+	movl %esp,%eax;				\
+	call func;				\
+	jmp ret_from_intr;			\
+	CFI_ENDPROC;				\
 ENDPROC(name)
 
+#define BUILD_INTERRUPT(name, nr)	\
+	__BUILD_INTERRUPT(name, smp_##name, nr)
+
 /* The include is where all of the SMP etc. interrupts come from */
 #include "entry_arch.h"
 
===================================================================
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -869,9 +869,22 @@
 	apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
 END(reschedule_interrupt)
 
-ENTRY(call_function_interrupt)
-	apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
-END(call_function_interrupt)
+
+	.macro CALLFUNCTION_ENTRY num
+ENTRY(call_function_interrupt\num)
+	apicinterrupt CALL_FUNCTION_VECTOR_START+\num,smp_call_function_interrupt
+END(call_function_interrupt\num)
+	.endm
+
+	CALLFUNCTION_ENTRY 0
+	CALLFUNCTION_ENTRY 1
+	CALLFUNCTION_ENTRY 2
+	CALLFUNCTION_ENTRY 3
+	CALLFUNCTION_ENTRY 4
+	CALLFUNCTION_ENTRY 5
+	CALLFUNCTION_ENTRY 6
+	CALLFUNCTION_ENTRY 7
+
 ENTRY(call_function_single_interrupt)
 	apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt
 END(call_function_single_interrupt)
===================================================================
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -188,7 +188,16 @@
 	alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
 
 	/* IPI for generic function call */
-	alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
+	BUILD_BUG_ON(CONFIG_GENERIC_SMP_QUEUES !=
+		     (CALL_FUNCTION_VECTOR_END - CALL_FUNCTION_VECTOR_START + 1));
+	alloc_intr_gate(CALL_FUNCTION_VECTOR_START+0, call_function_interrupt0);
+	alloc_intr_gate(CALL_FUNCTION_VECTOR_START+1, call_function_interrupt1);
+	alloc_intr_gate(CALL_FUNCTION_VECTOR_START+2, call_function_interrupt2);
+	alloc_intr_gate(CALL_FUNCTION_VECTOR_START+3, call_function_interrupt3);
+	alloc_intr_gate(CALL_FUNCTION_VECTOR_START+4, call_function_interrupt4);
+	alloc_intr_gate(CALL_FUNCTION_VECTOR_START+5, call_function_interrupt5);
+	alloc_intr_gate(CALL_FUNCTION_VECTOR_START+6, call_function_interrupt6);
+	alloc_intr_gate(CALL_FUNCTION_VECTOR_START+7, call_function_interrupt7);
 
 	/* IPI for generic single function call */
 	alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
===================================================================
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -129,15 +129,16 @@
 void native_send_call_func_ipi(cpumask_t mask)
 {
 	cpumask_t allbutself;
+	unsigned queue = smp_processor_id() % CONFIG_GENERIC_SMP_QUEUES;
 
 	allbutself = cpu_online_map;
 	cpu_clear(smp_processor_id(), allbutself);
 
 	if (cpus_equal(mask, allbutself) &&
 	    cpus_equal(cpu_online_map, cpu_callout_map))
-		send_IPI_allbutself(CALL_FUNCTION_VECTOR);
+		send_IPI_allbutself(CALL_FUNCTION_VECTOR_START + queue);
 	else
-		send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
+		send_IPI_mask(mask, CALL_FUNCTION_VECTOR_START + queue);
 }
 
 static void stop_this_cpu(void *dummy)
@@ -187,9 +188,12 @@
 
 void smp_call_function_interrupt(struct pt_regs *regs)
 {
+	unsigned queue;
+
 	ack_APIC_irq();
 	irq_enter();
-	generic_smp_call_function_interrupt(0);
+	queue = ~regs->orig_ax - CALL_FUNCTION_VECTOR_START;
+	generic_smp_call_function_interrupt(queue);
 #ifdef CONFIG_X86_32
 	__get_cpu_var(irq_stat).irq_call_count++;
 #else
===================================================================
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -41,6 +41,8 @@
 static DEFINE_PER_CPU(int, callfunc_irq);
 static DEFINE_PER_CPU(int, callfuncsingle_irq);
 static DEFINE_PER_CPU(int, debug_irq) = -1;
+
+static DEFINE_PER_CPU(unsigned, callfunc_queue);
 
 static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
 static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
@@ -381,6 +383,21 @@
 static void xen_smp_send_call_function_ipi(cpumask_t mask)
 {
 	int cpu;
+	unsigned queue = smp_processor_id() % CONFIG_GENERIC_SMP_QUEUES;
+
+	/*
+	 * We can't afford to allocate N callfunc vectors * M cpu
+	 * interrupts, so we just need to fake it for now.  We can fix
+	 * this when we integrate event channels at the vector level.
+	 * For now, we just leave a hint for the target cpus for which
+	 * queue to start on, but they still need to search them all.
+	 * (Which is not really much worse than having a single
+	 * queue.)
+	 */
+	for_each_cpu_mask_nr(cpu, mask)
+		per_cpu(callfunc_queue, cpu) = queue;
+
+	wmb();			/* set queues before sending interrupt */
 
 	xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
 
@@ -400,8 +417,16 @@
 
 static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
 {
+	unsigned start_queue = __get_cpu_var(callfunc_queue);
+	unsigned queue;
+
 	irq_enter();
-	generic_smp_call_function_interrupt(0);
+	queue = start_queue;
+	do {
+		generic_smp_call_function_interrupt(queue);
+		queue = (queue + 1) % CONFIG_GENERIC_SMP_QUEUES;
+	} while(queue != start_queue);
+
 #ifdef CONFIG_X86_32
 	__get_cpu_var(irq_stat).irq_call_count++;
 #else
===================================================================
--- a/include/asm-x86/hw_irq.h
+++ b/include/asm-x86/hw_irq.h
@@ -34,20 +34,18 @@
 extern void thermal_interrupt(void);
 extern void reschedule_interrupt(void);
 
-extern void invalidate_interrupt(void);
-extern void invalidate_interrupt0(void);
-extern void invalidate_interrupt1(void);
-extern void invalidate_interrupt2(void);
-extern void invalidate_interrupt3(void);
-extern void invalidate_interrupt4(void);
-extern void invalidate_interrupt5(void);
-extern void invalidate_interrupt6(void);
-extern void invalidate_interrupt7(void);
-
 extern void irq_move_cleanup_interrupt(void);
 extern void threshold_interrupt(void);
 
-extern void call_function_interrupt(void);
+extern void call_function_interrupt0(void);
+extern void call_function_interrupt1(void);
+extern void call_function_interrupt2(void);
+extern void call_function_interrupt3(void);
+extern void call_function_interrupt4(void);
+extern void call_function_interrupt5(void);
+extern void call_function_interrupt6(void);
+extern void call_function_interrupt7(void);
+
 extern void call_function_single_interrupt(void);
 
 /* PIC specific functions */
===================================================================
--- a/include/asm-x86/irq_vectors.h
+++ b/include/asm-x86/irq_vectors.h
@@ -62,8 +62,9 @@
 # define SPURIOUS_APIC_VECTOR		0xff
 # define ERROR_APIC_VECTOR		0xfe
 # define RESCHEDULE_VECTOR		0xfc
-# define CALL_FUNCTION_VECTOR		0xfb
 # define CALL_FUNCTION_SINGLE_VECTOR	0xfa
+# define CALL_FUNCTION_VECTOR_END	0xf8
+# define CALL_FUNCTION_VECTOR_START	0xf1 /* f1-f8 multiple callfunction queues */
 # define THERMAL_APIC_VECTOR		0xf0
 
 #else
@@ -71,10 +72,11 @@
 #define SPURIOUS_APIC_VECTOR		0xff
 #define ERROR_APIC_VECTOR		0xfe
 #define RESCHEDULE_VECTOR		0xfd
-#define CALL_FUNCTION_VECTOR		0xfc
 #define CALL_FUNCTION_SINGLE_VECTOR	0xfb
 #define THERMAL_APIC_VECTOR		0xfa
 #define THRESHOLD_APIC_VECTOR		0xf9
+#define CALL_FUNCTION_VECTOR_END	0xf7
+#define CALL_FUNCTION_VECTOR_START	0xf0 /* f0-f7 multiple callfunction queues */
 
 #endif
 
===================================================================
--- a/include/asm-x86/mach-default/entry_arch.h
+++ b/include/asm-x86/mach-default/entry_arch.h
@@ -11,8 +11,22 @@
  */
 #ifdef CONFIG_X86_SMP
 BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
-BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
 BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
+
+#define BUILD_CALLFUNCTION(n)					\
+	__BUILD_INTERRUPT(call_function_interrupt##n,		\
+			  smp_call_function_interrupt,		\
+			  CALL_FUNCTION_VECTOR_START + n)
+BUILD_CALLFUNCTION(0)
+BUILD_CALLFUNCTION(1)
+BUILD_CALLFUNCTION(2)
+BUILD_CALLFUNCTION(3)
+BUILD_CALLFUNCTION(4)
+BUILD_CALLFUNCTION(5)
+BUILD_CALLFUNCTION(6)
+BUILD_CALLFUNCTION(7)
+
+#undef BUILD_CALLFUNCTION
 #endif
 
 /*



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] x86: implement multiple queues for smp function call IPIs
  2008-07-29 23:32 [PATCH 2/2] x86: implement multiple queues for smp function call IPIs Jeremy Fitzhardinge
@ 2008-07-29 23:43 ` Jeremy Fitzhardinge
  2008-07-30  0:13 ` Andi Kleen
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 12+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-29 23:43 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Nick Piggin, Andi Kleen, Linux Kernel Mailing List

Jeremy Fitzhardinge wrote:
> This adds 8 queues for smp_call_function(), in order to avoid a
> bottleneck on a single global lock and list for function calls.  When
> initiating a function call, the sender chooses a queue based on its
> own processor id (if there are more than 8 processors, they hash down
> to 8 queues).  It then sends an IPI to the corresponding vector for
> that queue to each target CPU.  The target CPUs use the vector number
> to determine which queue they should scan for work.

I should point out that this patch depends on the earlier tlb.c 
unification patch, since it recycles the IPI vectors that series freed up.

    J

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] x86: implement multiple queues for smp function call IPIs
  2008-07-29 23:32 [PATCH 2/2] x86: implement multiple queues for smp function call IPIs Jeremy Fitzhardinge
  2008-07-29 23:43 ` Jeremy Fitzhardinge
@ 2008-07-30  0:13 ` Andi Kleen
  2008-07-30  0:44   ` Jeremy Fitzhardinge
  2008-07-30  4:55 ` Nick Piggin
  2008-07-31 22:08 ` Ingo Molnar
  3 siblings, 1 reply; 12+ messages in thread
From: Andi Kleen @ 2008-07-30  0:13 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: Ingo Molnar, Nick Piggin, Andi Kleen, Linux Kernel Mailing List

On Tue, Jul 29, 2008 at 04:32:57PM -0700, Jeremy Fitzhardinge wrote:
> This adds 8 queues for smp_call_function(), in order to avoid a

Now that we have per CPU IDT and there's no global bottleneck anymore
I think it would be actually fine to use
more than 8 vectors. 32 or 64 might be a better default.

> void native_send_call_func_ipi(cpumask_t mask)
> {
> 	cpumask_t allbutself;
> +	unsigned queue = smp_processor_id() % CONFIG_GENERIC_SMP_QUEUES;

Does this really always run with preemption disabled?

Did I miss it but where is the per vector lock? 

-Andi

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] x86: implement multiple queues for smp function call IPIs
  2008-07-30  0:13 ` Andi Kleen
@ 2008-07-30  0:44   ` Jeremy Fitzhardinge
  0 siblings, 0 replies; 12+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-30  0:44 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Ingo Molnar, Nick Piggin, Linux Kernel Mailing List

Andi Kleen wrote:
> On Tue, Jul 29, 2008 at 04:32:57PM -0700, Jeremy Fitzhardinge wrote:
>   
>> This adds 8 queues for smp_call_function(), in order to avoid a
>>     
>
> Now that we have per CPU IDT and there's no global bottleneck anymore
> I think it would be actually fine to use
> more than 8 vectors. 32 or 64 might be a better default.
>   

Well, there's no point in having more vectors than CPUs, and a bit of 
doubling up doesn't hurt too much.  So I think 8 is a good default for 
normal sized machines.  But I can see that being able to add more 
vectors for large machines might be helpful.  I guess it really depends 
on what the fan-out is for multicast messages.

I dunno, maybe it makes sense to take numa topology into account, on the 
assumption that 1) most cross-cpu function calls will be tlb flushes now 
(or at least, sending to mm->cpu_vm_mask), and 2) most tlb flushes will 
be between cpus within one node.

>> void native_send_call_func_ipi(cpumask_t mask)
>> {
>> 	cpumask_t allbutself;
>> +	unsigned queue = smp_processor_id() % CONFIG_GENERIC_SMP_QUEUES;
>>     
>
> Does this really always run with preemption disabled?

Think so, but I'll check again.  One of my TODO list items is to check 
whether smp_call_function_mask should disable preemption for itself, or 
at least WARN_ON if its called with preemption enabled.

    J

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] x86: implement multiple queues for smp function call IPIs
  2008-07-29 23:32 [PATCH 2/2] x86: implement multiple queues for smp function call IPIs Jeremy Fitzhardinge
  2008-07-29 23:43 ` Jeremy Fitzhardinge
  2008-07-30  0:13 ` Andi Kleen
@ 2008-07-30  4:55 ` Nick Piggin
  2008-07-31 22:08 ` Ingo Molnar
  3 siblings, 0 replies; 12+ messages in thread
From: Nick Piggin @ 2008-07-30  4:55 UTC (permalink / raw)
  To: Jeremy Fitzhardinge; +Cc: Ingo Molnar, Andi Kleen, Linux Kernel Mailing List

On Wednesday 30 July 2008 09:32, Jeremy Fitzhardinge wrote:
> This adds 8 queues for smp_call_function(), in order to avoid a
> bottleneck on a single global lock and list for function calls.  When
> initiating a function call, the sender chooses a queue based on its
> own processor id (if there are more than 8 processors, they hash down
> to 8 queues).  It then sends an IPI to the corresponding vector for
> that queue to each target CPU.  The target CPUs use the vector number
> to determine which queue they should scan for work.
>
> This should give smp_call_function the same performance
> characteristics as the original x86-64 cross-cpu tlb flush code, which
> used the same scheme.

Yep, I'm much happier if you go this way to do it. This way hopefully
eventually we can extend the call function infrastructure to also
generalise the UV type payload-IPIs and fold all that in here too.

I don't _think_ there should any longer be any reason why it should
be slower than the special case code (at least nothing fundamental
that I can see).

Actually it should have a chance to be faster because we should be able
to queue up multiple tlb flushes into each global call queue, rather
than executing them strictly one at a time under the tlbstate lock like
we do now.

Anyway it looks like Andi is reviewing the fine details, so I'm happy
with this if he is :) Thanks!

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] x86: implement multiple queues for smp function call IPIs
  2008-07-29 23:32 [PATCH 2/2] x86: implement multiple queues for smp function call IPIs Jeremy Fitzhardinge
                   ` (2 preceding siblings ...)
  2008-07-30  4:55 ` Nick Piggin
@ 2008-07-31 22:08 ` Ingo Molnar
  2008-07-31 22:12   ` Andi Kleen
  2008-07-31 22:23   ` Jeremy Fitzhardinge
  3 siblings, 2 replies; 12+ messages in thread
From: Ingo Molnar @ 2008-07-31 22:08 UTC (permalink / raw)
  To: Jeremy Fitzhardinge; +Cc: Nick Piggin, Andi Kleen, Linux Kernel Mailing List

* Jeremy Fitzhardinge <jeremy@goop.org> wrote:

> This adds 8 queues for smp_call_function(), in order to avoid a 
> bottleneck on a single global lock and list for function calls.  When 
> initiating a function call, the sender chooses a queue based on its 
> own processor id (if there are more than 8 processors, they hash down 
> to 8 queues).  It then sends an IPI to the corresponding vector for 
> that queue to each target CPU.  The target CPUs use the vector number 
> to determine which queue they should scan for work.
>
> This should give smp_call_function the same performance 
> characteristics as the original x86-64 cross-cpu tlb flush code, which 
> used the same scheme.

heh, nice :-)

Before going into all the fine details an trying our luck in tip/master 
QA, i'm a bit worried about hw compatibility in general though. APICs 
have been flaky since the beginnings of times. We had erratas in the 
area of local timer IRQs(IPIs) overlapping with IPIs, etc. - so i'd not 
bet the farm on all APICs being able to handle a _lot_ more overlapped 
inter-CPU IPIs than we do currently. (which basically was just three of 
them until now, and now four with the new SMP cross-call IPIs)

So this _has_ to be approached defensively. It _should_ work, and i'm 
all in favor of utilizing hardware resources more fully, but it's an 
entirely new mode of operation for the hardware. I think a Kconfig 
option (which defaults to off), and a boot option to disable it would be 
nice, so that we can introduce this gently, at least initially. Then 
when we see that it's 100% trouble-free we can flip around the default.

Plus, would it be possible to shape this a bit more dynamically? I like 
8 as a nice round number, but i bet some folks would like to have 16, 
some would like to have 4 ... Perhaps even making it dynamic (so that we 
can turn it all off in the case of trouble with certain CPU/APIC 
versions).

Hm?

	Ingo

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] x86: implement multiple queues for smp function call IPIs
  2008-07-31 22:08 ` Ingo Molnar
@ 2008-07-31 22:12   ` Andi Kleen
  2008-07-31 22:23   ` Jeremy Fitzhardinge
  1 sibling, 0 replies; 12+ messages in thread
From: Andi Kleen @ 2008-07-31 22:12 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Jeremy Fitzhardinge, Nick Piggin, Andi Kleen,
	Linux Kernel Mailing List

> Before going into all the fine details an trying our luck in tip/master 
> QA, i'm a bit worried about hw compatibility in general though. APICs 
> have been flaky since the beginnings of times. We had erratas in the 
> area of local timer IRQs(IPIs) overlapping with IPIs, etc. - so i'd not 
> bet the farm on all APICs being able to handle a _lot_ more overlapped 
> inter-CPU IPIs than we do currently. (which basically was just three of 
> them until now, and now four with the new SMP cross-call IPIs)

At least on 64bit systems this configuration has been already tested
for years with the 8 vector TLB flush. I am not aware
of any problems caused by it. Using them on them should be fine.

So your cautious approach would only be potentially useful on older 32bit only
systems.

-Andi

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] x86: implement multiple queues for smp function call IPIs
  2008-07-31 22:08 ` Ingo Molnar
  2008-07-31 22:12   ` Andi Kleen
@ 2008-07-31 22:23   ` Jeremy Fitzhardinge
  2008-07-31 22:42     ` Ingo Molnar
  1 sibling, 1 reply; 12+ messages in thread
From: Jeremy Fitzhardinge @ 2008-07-31 22:23 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Nick Piggin, Andi Kleen, Linux Kernel Mailing List

Ingo Molnar wrote:
>
> heh, nice :-)
>
> Before going into all the fine details an trying our luck in tip/master 
> QA, i'm a bit worried about hw compatibility in general though. APICs 
> have been flaky since the beginnings of times. We had erratas in the 
> area of local timer IRQs(IPIs) overlapping with IPIs, etc. - so i'd not 
> bet the farm on all APICs being able to handle a _lot_ more overlapped 
> inter-CPU IPIs than we do currently. (which basically was just three of 
> them until now, and now four with the new SMP cross-call IPIs)
>
> So this _has_ to be approached defensively. It _should_ work, and i'm 
> all in favor of utilizing hardware resources more fully, but it's an 
> entirely new mode of operation for the hardware. I think a Kconfig 
> option (which defaults to off), and a boot option to disable it would be 
> nice, so that we can introduce this gently, at least initially. Then 
> when we see that it's 100% trouble-free we can flip around the default.
>   

As Andi pointed out, this is more or less functionally identical to the 
code I ripped out of tlb_64.c, so this mode of operation has had lots of 
exposure on the 64-bit side.  Because the number of queues is a CONFIG 
variable, it would be relatively easy to make it a real config option, 
and/or use different numbers for 32 and 64-bit.  Choosing 1 as the 
number of queues will make it behave exactly as the current code does.

I'm not really familiar with all the ins and outs of apic bugs.  What's 
the issue you're concerned about?

> Plus, would it be possible to shape this a bit more dynamically? I like 
> 8 as a nice round number, but i bet some folks would like to have 16, 
> some would like to have 4 ... Perhaps even making it dynamic (so that we 
> can turn it all off in the case of trouble with certain CPU/APIC 
> versions).
>
> Hm?
>   

Sure, that's possible in a followup patch.  There's a pile of repeated 
boilerplate code which would need to be cleaned up to make it 
configurable and/or runtime changable.  Also, it would need some way to 
allocate a contiguous block of vectors; I'm not sure if the just-posted 
SGI patch allows that...

It also occurred to me that it might be more interesting to parameterise 
the queues - and the mapping of cpus->queues - in a more topology-aware 
way than simply NQUEUES=NCPUS/x, queue=cpuid % NQUEUES.  But I haven't 
given it much thought.

    J

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] x86: implement multiple queues for smp function call IPIs
  2008-07-31 22:23   ` Jeremy Fitzhardinge
@ 2008-07-31 22:42     ` Ingo Molnar
  2008-08-01  4:58       ` Jeremy Fitzhardinge
  0 siblings, 1 reply; 12+ messages in thread
From: Ingo Molnar @ 2008-07-31 22:42 UTC (permalink / raw)
  To: Jeremy Fitzhardinge; +Cc: Nick Piggin, Andi Kleen, Linux Kernel Mailing List

* Jeremy Fitzhardinge <jeremy@goop.org> wrote:

>> So this _has_ to be approached defensively. It _should_ work, and i'm 
>> all in favor of utilizing hardware resources more fully, but it's an 
>> entirely new mode of operation for the hardware. I think a Kconfig 
>> option (which defaults to off), and a boot option to disable it would 
>> be nice, so that we can introduce this gently, at least initially. 
>> Then when we see that it's 100% trouble-free we can flip around the 
>> default.
>
> As Andi pointed out, this is more or less functionally identical to 
> the code I ripped out of tlb_64.c, so this mode of operation has had 
> lots of exposure on the 64-bit side.  Because the number of queues is 
> a CONFIG variable, it would be relatively easy to make it a real 
> config option, and/or use different numbers for 32 and 64-bit.  
> Choosing 1 as the number of queues will make it behave exactly as the 
> current code does.
>
> I'm not really familiar with all the ins and outs of apic bugs.  
> What's the issue you're concerned about?

Yes on the 64-bit side we've had NUM_INVALIDATE_TLB_VECTORS (==8) for a 
long time, but note that 64-bit is obviously for more modern CPUs. What 
i'm mindful about (i'm not _that_ worried) are fragile APICs and unknown 
erratas.

The other issue is that the concurrency pattern changes somewhat and 
becomes more agressive. The existing 64-bit special-purpose TLB flush 
code uses in essence synchronous waiting for that specific IPI that 
belongs to that CPU, it sends the IPI then waits for the acknowledgement 
by polling the flush mask:

        send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender);

        while (!cpus_empty(f->flush_cpumask))
                cpu_relax();

while with generic IPIs we could have asynchronous IPIs as well.

So with the TLB flush code there's never any true "multiple outstanding 
IPIs" scenario for the same IPI, for 8 CPUs and fewer. (which is the 
predominant majority of existing hardware)

> It also occurred to me that it might be more interesting to 
> parameterise the queues - and the mapping of cpus->queues - in a more 
> topology-aware way than simply NQUEUES=NCPUS/x, queue=cpuid % NQUEUES.  
> But I haven't given it much thought.

i'd suggest we keep it at the current simple modulo rule.

	Ingo

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] x86: implement multiple queues for smp function call IPIs
  2008-07-31 22:42     ` Ingo Molnar
@ 2008-08-01  4:58       ` Jeremy Fitzhardinge
  2008-08-01  9:09         ` Ingo Molnar
  0 siblings, 1 reply; 12+ messages in thread
From: Jeremy Fitzhardinge @ 2008-08-01  4:58 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Nick Piggin, Andi Kleen, Linux Kernel Mailing List

Ingo Molnar wrote:
> Yes on the 64-bit side we've had NUM_INVALIDATE_TLB_VECTORS (==8) for a 
> long time, but note that 64-bit is obviously for more modern CPUs. What 
> i'm mindful about (i'm not _that_ worried) are fragile APICs and unknown 
> erratas.
>   

Well, the whole exercise is only useful if you have a relatively large 
number of CPUs, which presumably means you have relatively modern 
APICs.  If we set the number of queues to 1 for < 4 CPUs, would that 
avoid the problem APICs?

    J

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] x86: implement multiple queues for smp function call IPIs
  2008-08-01  4:58       ` Jeremy Fitzhardinge
@ 2008-08-01  9:09         ` Ingo Molnar
  2008-08-01 14:17           ` Andi Kleen
  0 siblings, 1 reply; 12+ messages in thread
From: Ingo Molnar @ 2008-08-01  9:09 UTC (permalink / raw)
  To: Jeremy Fitzhardinge; +Cc: Nick Piggin, Andi Kleen, Linux Kernel Mailing List


* Jeremy Fitzhardinge <jeremy@goop.org> wrote:

> Ingo Molnar wrote:
>> Yes on the 64-bit side we've had NUM_INVALIDATE_TLB_VECTORS (==8) for a 
>> long time, but note that 64-bit is obviously for more modern CPUs. What 
>> i'm mindful about (i'm not _that_ worried) are fragile APICs and 
>> unknown erratas.
>>   
>
> Well, the whole exercise is only useful if you have a relatively large 
> number of CPUs, which presumably means you have relatively modern 
> APICs.  If we set the number of queues to 1 for < 4 CPUs, would that 
> avoid the problem APICs?

hm, but such special casing would reduce testing exposure and would also 
needlessly penalize boxes that work perfectly fine.

So ... how about my original suggestion, to add a (default-off) boot 
option that reduces the queues to 1? I.e. lets have something in place 
from day 1 on that could be quirk-handler-driven, should the need arise. 
"apic=simpleipi" or something like that.

In a year or two, once this all has proven itself fine on a broad range 
of systems, we'll laugh out loud at being so cautious and can remove the 
boot option for good ;-)

	Ingo

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] x86: implement multiple queues for smp function call IPIs
  2008-08-01  9:09         ` Ingo Molnar
@ 2008-08-01 14:17           ` Andi Kleen
  0 siblings, 0 replies; 12+ messages in thread
From: Andi Kleen @ 2008-08-01 14:17 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Jeremy Fitzhardinge, Nick Piggin, Andi Kleen,
	Linux Kernel Mailing List

> So ... how about my original suggestion, to add a (default-off) boot 
> option that reduces the queues to 1? I.e. lets have something in place 
> from day 1 on that could be quirk-handler-driven, should the need arise. 
> "apic=simpleipi" or something like that.
> 
> In a year or two, once this all has proven itself fine on a broad range 
> of systems, we'll laugh out loud at being so cautious and can remove the 
> boot option for good ;-)

I agree that adding a boot option to turn it off is a good idea (assuming
it doesn't take too much code to implement) Adding such paranoid boot options early 
has saved my ass a few times in the past.

Only tricky bit is use if () not "modulo variable", because the if ()
will be much faster than a division through an arbitary number.

-Andi

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2008-08-01 14:17 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-07-29 23:32 [PATCH 2/2] x86: implement multiple queues for smp function call IPIs Jeremy Fitzhardinge
2008-07-29 23:43 ` Jeremy Fitzhardinge
2008-07-30  0:13 ` Andi Kleen
2008-07-30  0:44   ` Jeremy Fitzhardinge
2008-07-30  4:55 ` Nick Piggin
2008-07-31 22:08 ` Ingo Molnar
2008-07-31 22:12   ` Andi Kleen
2008-07-31 22:23   ` Jeremy Fitzhardinge
2008-07-31 22:42     ` Ingo Molnar
2008-08-01  4:58       ` Jeremy Fitzhardinge
2008-08-01  9:09         ` Ingo Molnar
2008-08-01 14:17           ` Andi Kleen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox