All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] stack overflow during pv-guest restore
@ 2008-01-31 20:05 Don Dutile
  2008-02-01  9:33 ` Jan Beulich
  0 siblings, 1 reply; 4+ messages in thread
From: Don Dutile @ 2008-01-31 20:05 UTC (permalink / raw)
  To: xen-devel

[-- Attachment #1: Type: text/plain, Size: 1223 bytes --]


When secondary cpus are initialized during an i386 pv-guest restore
(due to save/restore or live migration), and the guest has
a load that generates a fair number of interrupts (e.g., parallel kernel make),
a stack overflow can occur because cpu_initialize_context() has
a 2800 byte structure it declares on its stack.  linux-i386 has 4K stacks, by default.
Using 2800 bytes out of 4K by a single function in a call list isn't nice;
add the beginning of interrupt handling at just the right point, before the
switch to the interrupt stack is made... and... the scale tips...

Simple fix: malloc & free structure as needed.

Would fail save/restore testing of an i386-guest running a parallel, kernel make after
50->100 save/restores;  with the fix, haven't seen it fail after 650 save/restores.

Note: this is a basic port of this function in Jeremy Fitzharinge's Xen implementation of pv-ops
       in upstream Linux, part of 15/32 patch, Xen SMP guest support.

Original patch done on rhel5; did a simple diff & merge to xen-unstable's version
of smpboot.c to generate the attached patch, so it cleanly applies; but
haven't built/run/tested the xen-unstable version.

Signed-off-by: Donald Dutile <ddutile@redhat.com>



[-- Attachment #2: hotplug-vcpu-stack-overflow.patch --]
[-- Type: text/x-patch, Size: 4204 bytes --]

--- linux-2.6.18-xen.hg/drivers/xen/core/smpboot.c.orig	2008-01-31 13:45:10.000000000 -0500
+++ linux-2.6.18-xen.hg/drivers/xen/core/smpboot.c	2008-01-31 13:56:07.000000000 -0500
@@ -180,9 +180,9 @@
 	cpu_idle();
 }
 
-static void __cpuinit cpu_initialize_context(unsigned int cpu)
+static int cpu_initialize_context(unsigned int cpu)
 {
-	vcpu_guest_context_t ctxt;
+	vcpu_guest_context_t *ctxt;
 	struct task_struct *idle = idle_task(cpu);
 #ifdef __x86_64__
 	struct desc_ptr *gdt_descr = &cpu_gdt_descr[cpu];
@@ -191,58 +191,65 @@
 #endif
 
 	if (cpu_test_and_set(cpu, cpu_initialized_map))
-		return;
+		return 0;
 
-	memset(&ctxt, 0, sizeof(ctxt));
+	ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL);
+	if (ctxt == NULL)
+		return -ENOMEM;
 
-	ctxt.flags = VGCF_IN_KERNEL;
-	ctxt.user_regs.ds = __USER_DS;
-	ctxt.user_regs.es = __USER_DS;
-	ctxt.user_regs.fs = 0;
-	ctxt.user_regs.gs = 0;
-	ctxt.user_regs.ss = __KERNEL_DS;
-	ctxt.user_regs.eip = (unsigned long)cpu_bringup_and_idle;
-	ctxt.user_regs.eflags = X86_EFLAGS_IF | 0x1000; /* IOPL_RING1 */
+	memset(ctxt, 0, sizeof(*ctxt));
 
-	memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
+	ctxt->flags = VGCF_IN_KERNEL;
+	ctxt->user_regs.ds = __USER_DS;
+	ctxt->user_regs.es = __USER_DS;
+	ctxt->user_regs.fs = 0;
+	ctxt->user_regs.gs = 0;
+	ctxt->user_regs.ss = __KERNEL_DS;
+	ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
+	ctxt->user_regs.eflags = X86_EFLAGS_IF | 0x1000; /* IOPL_RING1 */
 
-	smp_trap_init(ctxt.trap_ctxt);
+	memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
 
-	ctxt.ldt_ents = 0;
+	smp_trap_init(ctxt->trap_ctxt);
 
-	ctxt.gdt_frames[0] = virt_to_mfn(gdt_descr->address);
-	ctxt.gdt_ents      = gdt_descr->size / 8;
+	ctxt->ldt_ents = 0;
+
+	ctxt->gdt_frames[0] = virt_to_mfn(gdt_descr->address);
+	ctxt->gdt_ents      = gdt_descr->size / 8;
 
 #ifdef __i386__
-	ctxt.user_regs.cs = __KERNEL_CS;
-	ctxt.user_regs.esp = idle->thread.esp0 - sizeof(struct pt_regs);
+	ctxt->user_regs.cs = __KERNEL_CS;
+	ctxt->user_regs.esp = idle->thread.esp0 - sizeof(struct pt_regs);
 
-	ctxt.kernel_ss = __KERNEL_DS;
-	ctxt.kernel_sp = idle->thread.esp0;
+	ctxt->kernel_ss = __KERNEL_DS;
+	ctxt->kernel_sp = idle->thread.esp0;
 
-	ctxt.event_callback_cs     = __KERNEL_CS;
-	ctxt.event_callback_eip    = (unsigned long)hypervisor_callback;
-	ctxt.failsafe_callback_cs  = __KERNEL_CS;
-	ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
+	ctxt->event_callback_cs     = __KERNEL_CS;
+	ctxt->event_callback_eip    = (unsigned long)hypervisor_callback;
+	ctxt->failsafe_callback_cs  = __KERNEL_CS;
+	ctxt->failsafe_callback_eip = (unsigned long)failsafe_callback;
 
-	ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
+	ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
 #else /* __x86_64__ */
-	ctxt.user_regs.cs = __KERNEL_CS;
-	ctxt.user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs);
+	ctxt->user_regs.cs = __KERNEL_CS;
+	ctxt->user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs);
 
-	ctxt.kernel_ss = __KERNEL_DS;
-	ctxt.kernel_sp = idle->thread.rsp0;
+	ctxt->kernel_ss = __KERNEL_DS;
+	ctxt->kernel_sp = idle->thread.rsp0;
 
-	ctxt.event_callback_eip    = (unsigned long)hypervisor_callback;
-	ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
-	ctxt.syscall_callback_eip  = (unsigned long)system_call;
+	ctxt->event_callback_eip    = (unsigned long)hypervisor_callback;
+	ctxt->failsafe_callback_eip = (unsigned long)failsafe_callback;
+	ctxt->syscall_callback_eip  = (unsigned long)system_call;
 
-	ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(init_level4_pgt));
+	ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(init_level4_pgt));
 
-	ctxt.gs_base_kernel = (unsigned long)(cpu_pda(cpu));
+	ctxt->gs_base_kernel = (unsigned long)(cpu_pda(cpu));
 #endif
 
-	BUG_ON(HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt));
+	BUG_ON(HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt));
+
+	kfree(ctxt);
+	return 0;
 }
 
 void __init smp_prepare_cpus(unsigned int max_cpus)
@@ -400,7 +407,9 @@
 	if (rc)
 		return rc;
 
-	cpu_initialize_context(cpu);
+	rc = cpu_initialize_context(cpu);
+	if (rc)
+		return rc;
 
 	if (num_online_cpus() == 1)
 		alternatives_smp_switch(1);

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] stack overflow during pv-guest restore
  2008-01-31 20:05 [PATCH] stack overflow during pv-guest restore Don Dutile
@ 2008-02-01  9:33 ` Jan Beulich
  2008-02-01 10:34   ` Keir Fraser
  2008-02-01 14:32   ` Don Dutile
  0 siblings, 2 replies; 4+ messages in thread
From: Jan Beulich @ 2008-02-01  9:33 UTC (permalink / raw)
  To: ddutile; +Cc: xen-devel

I think this can be fixed with a much smaller change: Since the only caller
of cpu_initialize_context() is __cpu_up(), and since __cpu_up() invocation
is serialized, the variable in question could simply be static.

Jan

>>> Don Dutile <ddutile@redhat.com> 31.01.08 21:05 >>>

When secondary cpus are initialized during an i386 pv-guest restore
(due to save/restore or live migration), and the guest has
a load that generates a fair number of interrupts (e.g., parallel kernel make),
a stack overflow can occur because cpu_initialize_context() has
a 2800 byte structure it declares on its stack.  linux-i386 has 4K stacks, by default.
Using 2800 bytes out of 4K by a single function in a call list isn't nice;
add the beginning of interrupt handling at just the right point, before the
switch to the interrupt stack is made... and... the scale tips...

Simple fix: malloc & free structure as needed.

Would fail save/restore testing of an i386-guest running a parallel, kernel make after
50->100 save/restores;  with the fix, haven't seen it fail after 650 save/restores.

Note: this is a basic port of this function in Jeremy Fitzharinge's Xen implementation of pv-ops
       in upstream Linux, part of 15/32 patch, Xen SMP guest support.

Original patch done on rhel5; did a simple diff & merge to xen-unstable's version
of smpboot.c to generate the attached patch, so it cleanly applies; but
haven't built/run/tested the xen-unstable version.

Signed-off-by: Donald Dutile <ddutile@redhat.com>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] stack overflow during pv-guest restore
  2008-02-01  9:33 ` Jan Beulich
@ 2008-02-01 10:34   ` Keir Fraser
  2008-02-01 14:32   ` Don Dutile
  1 sibling, 0 replies; 4+ messages in thread
From: Keir Fraser @ 2008-02-01 10:34 UTC (permalink / raw)
  To: Jan Beulich, ddutile; +Cc: xen-devel

Good point, I'll confirm that and take that approach instead, protected with
some kind of BUG_ON(!spin_trylock) on a private lock for sanity's sake.

 -- Keir

On 1/2/08 09:33, "Jan Beulich" <jbeulich@novell.com> wrote:

> I think this can be fixed with a much smaller change: Since the only caller
> of cpu_initialize_context() is __cpu_up(), and since __cpu_up() invocation
> is serialized, the variable in question could simply be static.
> 
> Jan
> 
>>>> Don Dutile <ddutile@redhat.com> 31.01.08 21:05 >>>
> 
> When secondary cpus are initialized during an i386 pv-guest restore
> (due to save/restore or live migration), and the guest has
> a load that generates a fair number of interrupts (e.g., parallel kernel
> make),
> a stack overflow can occur because cpu_initialize_context() has
> a 2800 byte structure it declares on its stack.  linux-i386 has 4K stacks, by
> default.
> Using 2800 bytes out of 4K by a single function in a call list isn't nice;
> add the beginning of interrupt handling at just the right point, before the
> switch to the interrupt stack is made... and... the scale tips...
> 
> Simple fix: malloc & free structure as needed.
> 
> Would fail save/restore testing of an i386-guest running a parallel, kernel
> make after
> 50->100 save/restores;  with the fix, haven't seen it fail after 650
> save/restores.
> 
> Note: this is a basic port of this function in Jeremy Fitzharinge's Xen
> implementation of pv-ops
>        in upstream Linux, part of 15/32 patch, Xen SMP guest support.
> 
> Original patch done on rhel5; did a simple diff & merge to xen-unstable's
> version
> of smpboot.c to generate the attached patch, so it cleanly applies; but
> haven't built/run/tested the xen-unstable version.
> 
> Signed-off-by: Donald Dutile <ddutile@redhat.com>
> 
> 
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] stack overflow during pv-guest restore
  2008-02-01  9:33 ` Jan Beulich
  2008-02-01 10:34   ` Keir Fraser
@ 2008-02-01 14:32   ` Don Dutile
  1 sibling, 0 replies; 4+ messages in thread
From: Don Dutile @ 2008-02-01 14:32 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel

Jan (& Keir)

That's what I did the first time (used a simple static) I generated
a fix & tested it, because a lock is taken out (long) before __cpu_up() is invoked,
to ensure only one cpu is in that code path anyhow.

*But* as I explained below, a modified version of cpu_initialize_context()
was already upstream and it went with a kzalloc-kfree implementation, so
maybe you want to ask Jeremy why he didn't go with a simple static as well.

So, the bottom line: the fix tries to follow upstream... typically that's
what we do (or change upstream, so all of us can follow it).

- Don

Jan Beulich wrote:
> I think this can be fixed with a much smaller change: Since the only caller
> of cpu_initialize_context() is __cpu_up(), and since __cpu_up() invocation
> is serialized, the variable in question could simply be static.
> 
> Jan
> 
>>>> Don Dutile <ddutile@redhat.com> 31.01.08 21:05 >>>
> 
> When secondary cpus are initialized during an i386 pv-guest restore
> (due to save/restore or live migration), and the guest has
> a load that generates a fair number of interrupts (e.g., parallel kernel make),
> a stack overflow can occur because cpu_initialize_context() has
> a 2800 byte structure it declares on its stack.  linux-i386 has 4K stacks, by default.
> Using 2800 bytes out of 4K by a single function in a call list isn't nice;
> add the beginning of interrupt handling at just the right point, before the
> switch to the interrupt stack is made... and... the scale tips...
> 
> Simple fix: malloc & free structure as needed.
> 
> Would fail save/restore testing of an i386-guest running a parallel, kernel make after
> 50->100 save/restores;  with the fix, haven't seen it fail after 650 save/restores.
> 
> Note: this is a basic port of this function in Jeremy Fitzharinge's Xen implementation of pv-ops
>        in upstream Linux, part of 15/32 patch, Xen SMP guest support.
> 
> Original patch done on rhel5; did a simple diff & merge to xen-unstable's version
> of smpboot.c to generate the attached patch, so it cleanly applies; but
> haven't built/run/tested the xen-unstable version.
> 
> Signed-off-by: Donald Dutile <ddutile@redhat.com>
> 
> 
> 

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2008-02-01 14:32 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-01-31 20:05 [PATCH] stack overflow during pv-guest restore Don Dutile
2008-02-01  9:33 ` Jan Beulich
2008-02-01 10:34   ` Keir Fraser
2008-02-01 14:32   ` Don Dutile

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.