* [RFC PATCH] Bug during kexec...not all cpus are stopped
@ 2010-10-08 20:34 Alok Kataria
2010-10-11 17:09 ` Alok Kataria
0 siblings, 1 reply; 16+ messages in thread
From: Alok Kataria @ 2010-10-08 20:34 UTC (permalink / raw)
To: kexec, the arch/x86 maintainers; +Cc: LKML, Dan Hecht
Before starting the new kernel kexec calls machine_shutdown to stop all
the cpus, which internally calls native_smp_send_stop. AFAIU, kexec
expects that all the cpus are now halted after that call returns.
Now, looking at the code for native_smp_send_stop, it assumes that all
the processors have processed the REBOOT ipi in 1 second after the IPI
was sent.
native_smp_send_stop()
---------------------------------------------------------
apic->send_IPI_allbutself(REBOOT_VECTOR);
/* Don't wait longer than a second */
wait = USEC_PER_SEC;
while (num_online_cpus() > 1 && wait--)
udelay(1);
---------------------------------------------------------
It just returns after that 1 second irrespective of whether all cpus
were halted or not. This brings up a issue in the kexec case, since we
can have the BSP starting the new kernel and AP's still processing the
REBOOT IPI simultaneously.
Many distribution kernels use kexec to load the newly installed kernel
during the installation phase, in virtualized environment with the host
heavily overcommitted, we have seen some instances when vcpu fails to
process the IPI in the allotted 1 sec and as a result the AP's end up
accessing uninitialized state (the BSP has already gone ahead with
setting up the new state) and causing GPF's.
IMO, kexec expects machine_shutdown to return only after all cpus are
stopped.
The patch below should fix the issue, comments ??
--
machine_shutdown now takes a parameter "wait", if it is true, it waits
until all the cpus are halted. All the callers except kexec still
fallback to the earlier version of the shutdown call, where it just
waited for max 1 sec before returning.
Signed-off-by: Alok N Kataria <akataria@vmware.com>
Index: linux-2.6/arch/x86/include/asm/reboot.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/reboot.h 2010-03-26 16:57:18.000000000 -0700
+++ linux-2.6/arch/x86/include/asm/reboot.h 2010-10-07 16:41:58.000000000 -0700
@@ -9,7 +9,7 @@ struct machine_ops {
void (*restart)(char *cmd);
void (*halt)(void);
void (*power_off)(void);
- void (*shutdown)(void);
+ void (*shutdown)(int wait);
void (*crash_shutdown)(struct pt_regs *);
void (*emergency_restart)(void);
};
@@ -17,7 +17,7 @@ struct machine_ops {
extern struct machine_ops machine_ops;
void native_machine_crash_shutdown(struct pt_regs *regs);
-void native_machine_shutdown(void);
+void native_machine_shutdown(int wait);
void machine_real_restart(const unsigned char *code, int length);
typedef void (*nmi_shootdown_cb)(int, struct die_args*);
Index: linux-2.6/arch/x86/include/asm/smp.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/smp.h 2010-08-03 12:43:47.000000000 -0700
+++ linux-2.6/arch/x86/include/asm/smp.h 2010-10-07 16:37:41.000000000 -0700
@@ -50,7 +50,7 @@ struct smp_ops {
void (*smp_prepare_cpus)(unsigned max_cpus);
void (*smp_cpus_done)(unsigned max_cpus);
- void (*smp_send_stop)(void);
+ void (*smp_send_stop)(int wait);
void (*smp_send_reschedule)(int cpu);
int (*cpu_up)(unsigned cpu);
@@ -71,9 +71,9 @@ extern void set_cpu_sibling_map(int cpu)
#endif
extern struct smp_ops smp_ops;
-static inline void smp_send_stop(void)
+static inline void smp_send_stop(int wait)
{
- smp_ops.smp_send_stop();
+ smp_ops.smp_send_stop(wait);
}
static inline void smp_prepare_boot_cpu(void)
Index: linux-2.6/arch/x86/kernel/kvmclock.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/kvmclock.c 2010-08-03 12:43:47.000000000 -0700
+++ linux-2.6/arch/x86/kernel/kvmclock.c 2010-10-07 16:43:28.000000000 -0700
@@ -174,10 +174,10 @@ static void kvm_crash_shutdown(struct pt
}
#endif
-static void kvm_shutdown(void)
+static void kvm_shutdown(int wait)
{
native_write_msr(msr_kvm_system_time, 0, 0);
- native_machine_shutdown();
+ native_machine_shutdown(wait);
}
void __init kvmclock_init(void)
Index: linux-2.6/arch/x86/kernel/reboot.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/reboot.c 2010-08-03 12:43:47.000000000 -0700
+++ linux-2.6/arch/x86/kernel/reboot.c 2010-10-07 16:45:24.000000000 -0700
@@ -616,7 +616,7 @@ static void native_machine_emergency_res
}
}
-void native_machine_shutdown(void)
+void native_machine_shutdown(int wait)
{
/* Stop the cpus and apics */
#ifdef CONFIG_SMP
@@ -641,7 +641,7 @@ void native_machine_shutdown(void)
/* O.K Now that I'm on the appropriate processor,
* stop all of the others.
*/
- smp_send_stop();
+ smp_send_stop(wait);
#endif
lapic_shutdown();
@@ -670,14 +670,14 @@ static void native_machine_restart(char
printk("machine restart\n");
if (!reboot_force)
- machine_shutdown();
+ machine_shutdown(0);
__machine_emergency_restart(0);
}
static void native_machine_halt(void)
{
/* stop other cpus and apics */
- machine_shutdown();
+ machine_shutdown(0);
tboot_shutdown(TB_SHUTDOWN_HALT);
@@ -689,7 +689,7 @@ static void native_machine_power_off(voi
{
if (pm_power_off) {
if (!reboot_force)
- machine_shutdown();
+ machine_shutdown(0);
pm_power_off();
}
/* a fallback in case there is no PM info available */
@@ -712,9 +712,9 @@ void machine_power_off(void)
machine_ops.power_off();
}
-void machine_shutdown(void)
+void machine_shutdown(int wait)
{
- machine_ops.shutdown();
+ machine_ops.shutdown(wait);
}
void machine_emergency_restart(void)
Index: linux-2.6/arch/x86/kernel/smp.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/smp.c 2010-10-07 16:30:34.000000000 -0700
+++ linux-2.6/arch/x86/kernel/smp.c 2010-10-07 16:34:16.000000000 -0700
@@ -159,10 +159,10 @@ asmlinkage void smp_reboot_interrupt(voi
irq_exit();
}
-static void native_smp_send_stop(void)
+static void native_smp_send_stop(int wait)
{
unsigned long flags;
- unsigned long wait;
+ unsigned long timeout;
if (reboot_force)
return;
@@ -179,9 +179,9 @@ static void native_smp_send_stop(void)
if (num_online_cpus() > 1) {
apic->send_IPI_allbutself(REBOOT_VECTOR);
- /* Don't wait longer than a second */
- wait = USEC_PER_SEC;
- while (num_online_cpus() > 1 && wait--)
+ /* Don't wait longer than a second if this not a synchronous call */
+ timeout = USEC_PER_SEC;
+ while (num_online_cpus() > 1 && (wait || timeout--))
udelay(1);
}
Index: linux-2.6/arch/x86/xen/enlighten.c
===================================================================
--- linux-2.6.orig/arch/x86/xen/enlighten.c 2010-08-30 16:20:50.000000000 -0700
+++ linux-2.6/arch/x86/xen/enlighten.c 2010-10-07 16:49:00.000000000 -0700
@@ -1018,7 +1018,7 @@ static void xen_reboot(int reason)
struct sched_shutdown r = { .reason = reason };
#ifdef CONFIG_SMP
- smp_send_stop();
+ smp_send_stop(0);
#endif
if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r))
Index: linux-2.6/arch/x86/xen/smp.c
===================================================================
--- linux-2.6.orig/arch/x86/xen/smp.c 2010-08-30 16:20:50.000000000 -0700
+++ linux-2.6/arch/x86/xen/smp.c 2010-10-07 16:46:27.000000000 -0700
@@ -400,7 +400,7 @@ static void stop_self(void *v)
BUG();
}
-static void xen_smp_send_stop(void)
+static void xen_smp_send_stop(int wait)
{
smp_call_function(stop_self, NULL, 0);
}
Index: linux-2.6/drivers/s390/char/sclp_quiesce.c
===================================================================
--- linux-2.6.orig/drivers/s390/char/sclp_quiesce.c 2010-08-03 12:45:04.000000000 -0700
+++ linux-2.6/drivers/s390/char/sclp_quiesce.c 2010-10-07 16:49:12.000000000 -0700
@@ -29,7 +29,7 @@ static void do_machine_quiesce(void)
{
psw_t quiesce_psw;
- smp_send_stop();
+ smp_send_stop(0);
quiesce_psw.mask = PSW_BASE_BITS | PSW_MASK_WAIT;
quiesce_psw.addr = 0xfff;
__load_psw(quiesce_psw);
Index: linux-2.6/include/linux/reboot.h
===================================================================
--- linux-2.6.orig/include/linux/reboot.h 2010-08-03 12:46:08.000000000 -0700
+++ linux-2.6/include/linux/reboot.h 2010-10-07 16:44:21.000000000 -0700
@@ -51,7 +51,7 @@ extern void machine_restart(char *cmd);
extern void machine_halt(void);
extern void machine_power_off(void);
-extern void machine_shutdown(void);
+extern void machine_shutdown(int wait);
struct pt_regs;
extern void machine_crash_shutdown(struct pt_regs *);
Index: linux-2.6/include/linux/smp.h
===================================================================
--- linux-2.6.orig/include/linux/smp.h 2010-08-03 12:46:08.000000000 -0700
+++ linux-2.6/include/linux/smp.h 2010-10-07 16:49:41.000000000 -0700
@@ -43,7 +43,7 @@ int smp_call_function_single(int cpuid,
/*
* stops all CPUs but the current one:
*/
-extern void smp_send_stop(void);
+extern void smp_send_stop(int wait);
/*
* sends a 'reschedule' event to another CPU:
@@ -116,7 +116,7 @@ extern unsigned int setup_max_cpus;
#else /* !SMP */
-static inline void smp_send_stop(void) { }
+static inline void smp_send_stop(int wait) { }
/*
* These macros fold the SMP functionality into a single CPU system
Index: linux-2.6/kernel/kexec.c
===================================================================
--- linux-2.6.orig/kernel/kexec.c 2010-10-07 14:33:57.000000000 -0700
+++ linux-2.6/kernel/kexec.c 2010-10-07 16:45:38.000000000 -0700
@@ -1538,7 +1538,7 @@ int kernel_kexec(void)
{
kernel_restart_prepare(NULL);
printk(KERN_EMERG "Starting new kernel\n");
- machine_shutdown();
+ machine_shutdown(1);
}
machine_kexec(kexec_image);
Index: linux-2.6/kernel/panic.c
===================================================================
--- linux-2.6.orig/kernel/panic.c 2010-08-30 16:22:03.000000000 -0700
+++ linux-2.6/kernel/panic.c 2010-10-07 16:50:05.000000000 -0700
@@ -94,7 +94,7 @@ NORET_TYPE void panic(const char * fmt,
* unfortunately means it may not be hardened to work in a panic
* situation.
*/
- smp_send_stop();
+ smp_send_stop(0);
atomic_notifier_call_chain(&panic_notifier_list, 0, buf);
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC PATCH] Bug during kexec...not all cpus are stopped
2010-10-08 20:34 [RFC PATCH] Bug during kexec...not all cpus are stopped Alok Kataria
@ 2010-10-11 17:09 ` Alok Kataria
2010-10-11 18:07 ` Eric W. Biederman
0 siblings, 1 reply; 16+ messages in thread
From: Alok Kataria @ 2010-10-11 17:09 UTC (permalink / raw)
To: kexec, Eric Biederman, Vivek Goyal, Haren Myneni
Cc: the arch/x86 maintainers, LKML, Dan Hecht
Copying a few more maintainers on the thread...
On Fri, 2010-10-08 at 13:34 -0700, Alok Kataria wrote:
> Before starting the new kernel kexec calls machine_shutdown to stop all
> the cpus, which internally calls native_smp_send_stop. AFAIU, kexec
> expects that all the cpus are now halted after that call returns.
> Now, looking at the code for native_smp_send_stop, it assumes that all
> the processors have processed the REBOOT ipi in 1 second after the IPI
> was sent.
> native_smp_send_stop()
> ---------------------------------------------------------
> apic->send_IPI_allbutself(REBOOT_VECTOR);
>
> /* Don't wait longer than a second */
> wait = USEC_PER_SEC;
> while (num_online_cpus() > 1 && wait--)
> udelay(1);
> ---------------------------------------------------------
>
> It just returns after that 1 second irrespective of whether all cpus
> were halted or not. This brings up a issue in the kexec case, since we
> can have the BSP starting the new kernel and AP's still processing the
> REBOOT IPI simultaneously.
>
> Many distribution kernels use kexec to load the newly installed kernel
> during the installation phase, in virtualized environment with the host
> heavily overcommitted, we have seen some instances when vcpu fails to
> process the IPI in the allotted 1 sec and as a result the AP's end up
> accessing uninitialized state (the BSP has already gone ahead with
> setting up the new state) and causing GPF's.
>
> IMO, kexec expects machine_shutdown to return only after all cpus are
> stopped.
>
> The patch below should fix the issue, comments ??
>
> --
> machine_shutdown now takes a parameter "wait", if it is true, it waits
> until all the cpus are halted. All the callers except kexec still
> fallback to the earlier version of the shutdown call, where it just
> waited for max 1 sec before returning.
>
> Signed-off-by: Alok N Kataria <akataria@vmware.com>
>
> Index: linux-2.6/arch/x86/include/asm/reboot.h
> ===================================================================
> --- linux-2.6.orig/arch/x86/include/asm/reboot.h 2010-03-26 16:57:18.000000000 -0700
> +++ linux-2.6/arch/x86/include/asm/reboot.h 2010-10-07 16:41:58.000000000 -0700
> @@ -9,7 +9,7 @@ struct machine_ops {
> void (*restart)(char *cmd);
> void (*halt)(void);
> void (*power_off)(void);
> - void (*shutdown)(void);
> + void (*shutdown)(int wait);
> void (*crash_shutdown)(struct pt_regs *);
> void (*emergency_restart)(void);
> };
> @@ -17,7 +17,7 @@ struct machine_ops {
> extern struct machine_ops machine_ops;
>
> void native_machine_crash_shutdown(struct pt_regs *regs);
> -void native_machine_shutdown(void);
> +void native_machine_shutdown(int wait);
> void machine_real_restart(const unsigned char *code, int length);
>
> typedef void (*nmi_shootdown_cb)(int, struct die_args*);
> Index: linux-2.6/arch/x86/include/asm/smp.h
> ===================================================================
> --- linux-2.6.orig/arch/x86/include/asm/smp.h 2010-08-03 12:43:47.000000000 -0700
> +++ linux-2.6/arch/x86/include/asm/smp.h 2010-10-07 16:37:41.000000000 -0700
> @@ -50,7 +50,7 @@ struct smp_ops {
> void (*smp_prepare_cpus)(unsigned max_cpus);
> void (*smp_cpus_done)(unsigned max_cpus);
>
> - void (*smp_send_stop)(void);
> + void (*smp_send_stop)(int wait);
> void (*smp_send_reschedule)(int cpu);
>
> int (*cpu_up)(unsigned cpu);
> @@ -71,9 +71,9 @@ extern void set_cpu_sibling_map(int cpu)
> #endif
> extern struct smp_ops smp_ops;
>
> -static inline void smp_send_stop(void)
> +static inline void smp_send_stop(int wait)
> {
> - smp_ops.smp_send_stop();
> + smp_ops.smp_send_stop(wait);
> }
>
> static inline void smp_prepare_boot_cpu(void)
> Index: linux-2.6/arch/x86/kernel/kvmclock.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/kernel/kvmclock.c 2010-08-03 12:43:47.000000000 -0700
> +++ linux-2.6/arch/x86/kernel/kvmclock.c 2010-10-07 16:43:28.000000000 -0700
> @@ -174,10 +174,10 @@ static void kvm_crash_shutdown(struct pt
> }
> #endif
>
> -static void kvm_shutdown(void)
> +static void kvm_shutdown(int wait)
> {
> native_write_msr(msr_kvm_system_time, 0, 0);
> - native_machine_shutdown();
> + native_machine_shutdown(wait);
> }
>
> void __init kvmclock_init(void)
> Index: linux-2.6/arch/x86/kernel/reboot.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/kernel/reboot.c 2010-08-03 12:43:47.000000000 -0700
> +++ linux-2.6/arch/x86/kernel/reboot.c 2010-10-07 16:45:24.000000000 -0700
> @@ -616,7 +616,7 @@ static void native_machine_emergency_res
> }
> }
>
> -void native_machine_shutdown(void)
> +void native_machine_shutdown(int wait)
> {
> /* Stop the cpus and apics */
> #ifdef CONFIG_SMP
> @@ -641,7 +641,7 @@ void native_machine_shutdown(void)
> /* O.K Now that I'm on the appropriate processor,
> * stop all of the others.
> */
> - smp_send_stop();
> + smp_send_stop(wait);
> #endif
>
> lapic_shutdown();
> @@ -670,14 +670,14 @@ static void native_machine_restart(char
> printk("machine restart\n");
>
> if (!reboot_force)
> - machine_shutdown();
> + machine_shutdown(0);
> __machine_emergency_restart(0);
> }
>
> static void native_machine_halt(void)
> {
> /* stop other cpus and apics */
> - machine_shutdown();
> + machine_shutdown(0);
>
> tboot_shutdown(TB_SHUTDOWN_HALT);
>
> @@ -689,7 +689,7 @@ static void native_machine_power_off(voi
> {
> if (pm_power_off) {
> if (!reboot_force)
> - machine_shutdown();
> + machine_shutdown(0);
> pm_power_off();
> }
> /* a fallback in case there is no PM info available */
> @@ -712,9 +712,9 @@ void machine_power_off(void)
> machine_ops.power_off();
> }
>
> -void machine_shutdown(void)
> +void machine_shutdown(int wait)
> {
> - machine_ops.shutdown();
> + machine_ops.shutdown(wait);
> }
>
> void machine_emergency_restart(void)
> Index: linux-2.6/arch/x86/kernel/smp.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/kernel/smp.c 2010-10-07 16:30:34.000000000 -0700
> +++ linux-2.6/arch/x86/kernel/smp.c 2010-10-07 16:34:16.000000000 -0700
> @@ -159,10 +159,10 @@ asmlinkage void smp_reboot_interrupt(voi
> irq_exit();
> }
>
> -static void native_smp_send_stop(void)
> +static void native_smp_send_stop(int wait)
> {
> unsigned long flags;
> - unsigned long wait;
> + unsigned long timeout;
>
> if (reboot_force)
> return;
> @@ -179,9 +179,9 @@ static void native_smp_send_stop(void)
> if (num_online_cpus() > 1) {
> apic->send_IPI_allbutself(REBOOT_VECTOR);
>
> - /* Don't wait longer than a second */
> - wait = USEC_PER_SEC;
> - while (num_online_cpus() > 1 && wait--)
> + /* Don't wait longer than a second if this not a synchronous call */
> + timeout = USEC_PER_SEC;
> + while (num_online_cpus() > 1 && (wait || timeout--))
> udelay(1);
> }
>
> Index: linux-2.6/arch/x86/xen/enlighten.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/xen/enlighten.c 2010-08-30 16:20:50.000000000 -0700
> +++ linux-2.6/arch/x86/xen/enlighten.c 2010-10-07 16:49:00.000000000 -0700
> @@ -1018,7 +1018,7 @@ static void xen_reboot(int reason)
> struct sched_shutdown r = { .reason = reason };
>
> #ifdef CONFIG_SMP
> - smp_send_stop();
> + smp_send_stop(0);
> #endif
>
> if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r))
> Index: linux-2.6/arch/x86/xen/smp.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/xen/smp.c 2010-08-30 16:20:50.000000000 -0700
> +++ linux-2.6/arch/x86/xen/smp.c 2010-10-07 16:46:27.000000000 -0700
> @@ -400,7 +400,7 @@ static void stop_self(void *v)
> BUG();
> }
>
> -static void xen_smp_send_stop(void)
> +static void xen_smp_send_stop(int wait)
> {
> smp_call_function(stop_self, NULL, 0);
> }
> Index: linux-2.6/drivers/s390/char/sclp_quiesce.c
> ===================================================================
> --- linux-2.6.orig/drivers/s390/char/sclp_quiesce.c 2010-08-03 12:45:04.000000000 -0700
> +++ linux-2.6/drivers/s390/char/sclp_quiesce.c 2010-10-07 16:49:12.000000000 -0700
> @@ -29,7 +29,7 @@ static void do_machine_quiesce(void)
> {
> psw_t quiesce_psw;
>
> - smp_send_stop();
> + smp_send_stop(0);
> quiesce_psw.mask = PSW_BASE_BITS | PSW_MASK_WAIT;
> quiesce_psw.addr = 0xfff;
> __load_psw(quiesce_psw);
> Index: linux-2.6/include/linux/reboot.h
> ===================================================================
> --- linux-2.6.orig/include/linux/reboot.h 2010-08-03 12:46:08.000000000 -0700
> +++ linux-2.6/include/linux/reboot.h 2010-10-07 16:44:21.000000000 -0700
> @@ -51,7 +51,7 @@ extern void machine_restart(char *cmd);
> extern void machine_halt(void);
> extern void machine_power_off(void);
>
> -extern void machine_shutdown(void);
> +extern void machine_shutdown(int wait);
> struct pt_regs;
> extern void machine_crash_shutdown(struct pt_regs *);
>
> Index: linux-2.6/include/linux/smp.h
> ===================================================================
> --- linux-2.6.orig/include/linux/smp.h 2010-08-03 12:46:08.000000000 -0700
> +++ linux-2.6/include/linux/smp.h 2010-10-07 16:49:41.000000000 -0700
> @@ -43,7 +43,7 @@ int smp_call_function_single(int cpuid,
> /*
> * stops all CPUs but the current one:
> */
> -extern void smp_send_stop(void);
> +extern void smp_send_stop(int wait);
>
> /*
> * sends a 'reschedule' event to another CPU:
> @@ -116,7 +116,7 @@ extern unsigned int setup_max_cpus;
>
> #else /* !SMP */
>
> -static inline void smp_send_stop(void) { }
> +static inline void smp_send_stop(int wait) { }
>
> /*
> * These macros fold the SMP functionality into a single CPU system
> Index: linux-2.6/kernel/kexec.c
> ===================================================================
> --- linux-2.6.orig/kernel/kexec.c 2010-10-07 14:33:57.000000000 -0700
> +++ linux-2.6/kernel/kexec.c 2010-10-07 16:45:38.000000000 -0700
> @@ -1538,7 +1538,7 @@ int kernel_kexec(void)
> {
> kernel_restart_prepare(NULL);
> printk(KERN_EMERG "Starting new kernel\n");
> - machine_shutdown();
> + machine_shutdown(1);
> }
>
> machine_kexec(kexec_image);
> Index: linux-2.6/kernel/panic.c
> ===================================================================
> --- linux-2.6.orig/kernel/panic.c 2010-08-30 16:22:03.000000000 -0700
> +++ linux-2.6/kernel/panic.c 2010-10-07 16:50:05.000000000 -0700
> @@ -94,7 +94,7 @@ NORET_TYPE void panic(const char * fmt,
> * unfortunately means it may not be hardened to work in a panic
> * situation.
> */
> - smp_send_stop();
> + smp_send_stop(0);
>
> atomic_notifier_call_chain(&panic_notifier_list, 0, buf);
>
>
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC PATCH] Bug during kexec...not all cpus are stopped
2010-10-11 17:09 ` Alok Kataria
@ 2010-10-11 18:07 ` Eric W. Biederman
2010-10-11 19:41 ` Alok Kataria
0 siblings, 1 reply; 16+ messages in thread
From: Eric W. Biederman @ 2010-10-11 18:07 UTC (permalink / raw)
To: akataria
Cc: kexec, Vivek Goyal, Haren Myneni, the arch/x86 maintainers, LKML,
Dan Hecht
Alok Kataria <akataria@vmware.com> writes:
> Copying a few more maintainers on the thread...
> On Fri, 2010-10-08 at 13:34 -0700, Alok Kataria wrote:
>> Before starting the new kernel kexec calls machine_shutdown to stop all
>> the cpus, which internally calls native_smp_send_stop. AFAIU, kexec
>> expects that all the cpus are now halted after that call returns.
>> Now, looking at the code for native_smp_send_stop, it assumes that all
>> the processors have processed the REBOOT ipi in 1 second after the IPI
>> was sent.
>> native_smp_send_stop()
>> ---------------------------------------------------------
>> apic->send_IPI_allbutself(REBOOT_VECTOR);
>>
>> /* Don't wait longer than a second */
>> wait = USEC_PER_SEC;
>> while (num_online_cpus() > 1 && wait--)
>> udelay(1);
>> ---------------------------------------------------------
>>
>> It just returns after that 1 second irrespective of whether all cpus
>> were halted or not. This brings up a issue in the kexec case, since we
>> can have the BSP starting the new kernel and AP's still processing the
>> REBOOT IPI simultaneously.
>>
>> Many distribution kernels use kexec to load the newly installed kernel
>> during the installation phase, in virtualized environment with the host
>> heavily overcommitted, we have seen some instances when vcpu fails to
>> process the IPI in the allotted 1 sec and as a result the AP's end up
>> accessing uninitialized state (the BSP has already gone ahead with
>> setting up the new state) and causing GPF's.
>>
>> IMO, kexec expects machine_shutdown to return only after all cpus are
>> stopped.
>>
>> The patch below should fix the issue, comments ??
>>
>> --
>> machine_shutdown now takes a parameter "wait", if it is true, it waits
>> until all the cpus are halted. All the callers except kexec still
>> fallback to the earlier version of the shutdown call, where it just
>> waited for max 1 sec before returning.
The approach seems reasonable. However on every path except for the
panic path I would wait for all of the cpus to stop, as that is what
those code paths are expecting. Until last year smp_send_stop always
waited until all of the cpus stopped. So I think changing
machine_shutdown should not need to happen.
This patch cannot be used as is because it changes the parameters to
smp_send_stop() and there are more than just x86 implementations out
there.
Let me propose a slightly different tactic. We need to separate
the panic path from the normal path to avoid confusion. At the
generic level smp_send_stop is exclusively used for the panic
path. So we should keep that, and rename the x86 non-panic path
function something else.
Can you rename the x86 smp_send_stop function say stop_all_other_cpus().
At which point we could implement smp_send_stop as:
stop_all_other_cpus(0);
And everywhere else would call stop_all_other_cpus(1) waiting for
the cpus to actually stop.
I really think we want to wait for the cpus to stop in all cases except
for panic/kdump where we expect things to be broken and we are doing
our best to make things work anyway.
Eric
>> Signed-off-by: Alok N Kataria <akataria@vmware.com>
>>
>> Index: linux-2.6/arch/x86/include/asm/reboot.h
>> ===================================================================
>> --- linux-2.6.orig/arch/x86/include/asm/reboot.h 2010-03-26 16:57:18.000000000 -0700
>> +++ linux-2.6/arch/x86/include/asm/reboot.h 2010-10-07 16:41:58.000000000 -0700
>> @@ -9,7 +9,7 @@ struct machine_ops {
>> void (*restart)(char *cmd);
>> void (*halt)(void);
>> void (*power_off)(void);
>> - void (*shutdown)(void);
>> + void (*shutdown)(int wait);
>> void (*crash_shutdown)(struct pt_regs *);
>> void (*emergency_restart)(void);
>> };
>> @@ -17,7 +17,7 @@ struct machine_ops {
>> extern struct machine_ops machine_ops;
>>
>> void native_machine_crash_shutdown(struct pt_regs *regs);
>> -void native_machine_shutdown(void);
>> +void native_machine_shutdown(int wait);
>> void machine_real_restart(const unsigned char *code, int length);
>>
>> typedef void (*nmi_shootdown_cb)(int, struct die_args*);
>> Index: linux-2.6/arch/x86/include/asm/smp.h
>> ===================================================================
>> --- linux-2.6.orig/arch/x86/include/asm/smp.h 2010-08-03 12:43:47.000000000 -0700
>> +++ linux-2.6/arch/x86/include/asm/smp.h 2010-10-07 16:37:41.000000000 -0700
>> @@ -50,7 +50,7 @@ struct smp_ops {
>> void (*smp_prepare_cpus)(unsigned max_cpus);
>> void (*smp_cpus_done)(unsigned max_cpus);
>>
>> - void (*smp_send_stop)(void);
>> + void (*smp_send_stop)(int wait);
>> void (*smp_send_reschedule)(int cpu);
>>
>> int (*cpu_up)(unsigned cpu);
>> @@ -71,9 +71,9 @@ extern void set_cpu_sibling_map(int cpu)
>> #endif
>> extern struct smp_ops smp_ops;
>>
>> -static inline void smp_send_stop(void)
>> +static inline void smp_send_stop(int wait)
>> {
>> - smp_ops.smp_send_stop();
>> + smp_ops.smp_send_stop(wait);
>> }
>>
>> static inline void smp_prepare_boot_cpu(void)
>> Index: linux-2.6/arch/x86/kernel/kvmclock.c
>> ===================================================================
>> --- linux-2.6.orig/arch/x86/kernel/kvmclock.c 2010-08-03 12:43:47.000000000 -0700
>> +++ linux-2.6/arch/x86/kernel/kvmclock.c 2010-10-07 16:43:28.000000000 -0700
>> @@ -174,10 +174,10 @@ static void kvm_crash_shutdown(struct pt
>> }
>> #endif
>>
>> -static void kvm_shutdown(void)
>> +static void kvm_shutdown(int wait)
>> {
>> native_write_msr(msr_kvm_system_time, 0, 0);
>> - native_machine_shutdown();
>> + native_machine_shutdown(wait);
>> }
>>
>> void __init kvmclock_init(void)
>> Index: linux-2.6/arch/x86/kernel/reboot.c
>> ===================================================================
>> --- linux-2.6.orig/arch/x86/kernel/reboot.c 2010-08-03 12:43:47.000000000 -0700
>> +++ linux-2.6/arch/x86/kernel/reboot.c 2010-10-07 16:45:24.000000000 -0700
>> @@ -616,7 +616,7 @@ static void native_machine_emergency_res
>> }
>> }
>>
>> -void native_machine_shutdown(void)
>> +void native_machine_shutdown(int wait)
>> {
>> /* Stop the cpus and apics */
>> #ifdef CONFIG_SMP
>> @@ -641,7 +641,7 @@ void native_machine_shutdown(void)
>> /* O.K Now that I'm on the appropriate processor,
>> * stop all of the others.
>> */
>> - smp_send_stop();
>> + smp_send_stop(wait);
>> #endif
>>
>> lapic_shutdown();
>> @@ -670,14 +670,14 @@ static void native_machine_restart(char
>> printk("machine restart\n");
>>
>> if (!reboot_force)
>> - machine_shutdown();
>> + machine_shutdown(0);
>> __machine_emergency_restart(0);
>> }
>>
>> static void native_machine_halt(void)
>> {
>> /* stop other cpus and apics */
>> - machine_shutdown();
>> + machine_shutdown(0);
>>
>> tboot_shutdown(TB_SHUTDOWN_HALT);
>>
>> @@ -689,7 +689,7 @@ static void native_machine_power_off(voi
>> {
>> if (pm_power_off) {
>> if (!reboot_force)
>> - machine_shutdown();
>> + machine_shutdown(0);
>> pm_power_off();
>> }
>> /* a fallback in case there is no PM info available */
>> @@ -712,9 +712,9 @@ void machine_power_off(void)
>> machine_ops.power_off();
>> }
>>
>> -void machine_shutdown(void)
>> +void machine_shutdown(int wait)
>> {
>> - machine_ops.shutdown();
>> + machine_ops.shutdown(wait);
>> }
>>
>> void machine_emergency_restart(void)
>> Index: linux-2.6/arch/x86/kernel/smp.c
>> ===================================================================
>> --- linux-2.6.orig/arch/x86/kernel/smp.c 2010-10-07 16:30:34.000000000 -0700
>> +++ linux-2.6/arch/x86/kernel/smp.c 2010-10-07 16:34:16.000000000 -0700
>> @@ -159,10 +159,10 @@ asmlinkage void smp_reboot_interrupt(voi
>> irq_exit();
>> }
>>
>> -static void native_smp_send_stop(void)
>> +static void native_smp_send_stop(int wait)
>> {
>> unsigned long flags;
>> - unsigned long wait;
>> + unsigned long timeout;
>>
>> if (reboot_force)
>> return;
>> @@ -179,9 +179,9 @@ static void native_smp_send_stop(void)
>> if (num_online_cpus() > 1) {
>> apic->send_IPI_allbutself(REBOOT_VECTOR);
>>
>> - /* Don't wait longer than a second */
>> - wait = USEC_PER_SEC;
>> - while (num_online_cpus() > 1 && wait--)
>> + /* Don't wait longer than a second if this not a synchronous call */
>> + timeout = USEC_PER_SEC;
>> + while (num_online_cpus() > 1 && (wait || timeout--))
>> udelay(1);
>> }
>>
>> Index: linux-2.6/arch/x86/xen/enlighten.c
>> ===================================================================
>> --- linux-2.6.orig/arch/x86/xen/enlighten.c 2010-08-30 16:20:50.000000000 -0700
>> +++ linux-2.6/arch/x86/xen/enlighten.c 2010-10-07 16:49:00.000000000 -0700
>> @@ -1018,7 +1018,7 @@ static void xen_reboot(int reason)
>> struct sched_shutdown r = { .reason = reason };
>>
>> #ifdef CONFIG_SMP
>> - smp_send_stop();
>> + smp_send_stop(0);
>> #endif
>>
>> if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r))
>> Index: linux-2.6/arch/x86/xen/smp.c
>> ===================================================================
>> --- linux-2.6.orig/arch/x86/xen/smp.c 2010-08-30 16:20:50.000000000 -0700
>> +++ linux-2.6/arch/x86/xen/smp.c 2010-10-07 16:46:27.000000000 -0700
>> @@ -400,7 +400,7 @@ static void stop_self(void *v)
>> BUG();
>> }
>>
>> -static void xen_smp_send_stop(void)
>> +static void xen_smp_send_stop(int wait)
>> {
>> smp_call_function(stop_self, NULL, 0);
>> }
>> Index: linux-2.6/drivers/s390/char/sclp_quiesce.c
>> ===================================================================
>> --- linux-2.6.orig/drivers/s390/char/sclp_quiesce.c 2010-08-03 12:45:04.000000000 -0700
>> +++ linux-2.6/drivers/s390/char/sclp_quiesce.c 2010-10-07 16:49:12.000000000 -0700
>> @@ -29,7 +29,7 @@ static void do_machine_quiesce(void)
>> {
>> psw_t quiesce_psw;
>>
>> - smp_send_stop();
>> + smp_send_stop(0);
>> quiesce_psw.mask = PSW_BASE_BITS | PSW_MASK_WAIT;
>> quiesce_psw.addr = 0xfff;
>> __load_psw(quiesce_psw);
>> Index: linux-2.6/include/linux/reboot.h
>> ===================================================================
>> --- linux-2.6.orig/include/linux/reboot.h 2010-08-03 12:46:08.000000000 -0700
>> +++ linux-2.6/include/linux/reboot.h 2010-10-07 16:44:21.000000000 -0700
>> @@ -51,7 +51,7 @@ extern void machine_restart(char *cmd);
>> extern void machine_halt(void);
>> extern void machine_power_off(void);
>>
>> -extern void machine_shutdown(void);
>> +extern void machine_shutdown(int wait);
>> struct pt_regs;
>> extern void machine_crash_shutdown(struct pt_regs *);
>>
>> Index: linux-2.6/include/linux/smp.h
>> ===================================================================
>> --- linux-2.6.orig/include/linux/smp.h 2010-08-03 12:46:08.000000000 -0700
>> +++ linux-2.6/include/linux/smp.h 2010-10-07 16:49:41.000000000 -0700
>> @@ -43,7 +43,7 @@ int smp_call_function_single(int cpuid,
>> /*
>> * stops all CPUs but the current one:
>> */
>> -extern void smp_send_stop(void);
>> +extern void smp_send_stop(int wait);
>>
>> /*
>> * sends a 'reschedule' event to another CPU:
>> @@ -116,7 +116,7 @@ extern unsigned int setup_max_cpus;
>>
>> #else /* !SMP */
>>
>> -static inline void smp_send_stop(void) { }
>> +static inline void smp_send_stop(int wait) { }
>>
>> /*
>> * These macros fold the SMP functionality into a single CPU system
>> Index: linux-2.6/kernel/kexec.c
>> ===================================================================
>> --- linux-2.6.orig/kernel/kexec.c 2010-10-07 14:33:57.000000000 -0700
>> +++ linux-2.6/kernel/kexec.c 2010-10-07 16:45:38.000000000 -0700
>> @@ -1538,7 +1538,7 @@ int kernel_kexec(void)
>> {
>> kernel_restart_prepare(NULL);
>> printk(KERN_EMERG "Starting new kernel\n");
>> - machine_shutdown();
>> + machine_shutdown(1);
>> }
>>
>> machine_kexec(kexec_image);
>> Index: linux-2.6/kernel/panic.c
>> ===================================================================
>> --- linux-2.6.orig/kernel/panic.c 2010-08-30 16:22:03.000000000 -0700
>> +++ linux-2.6/kernel/panic.c 2010-10-07 16:50:05.000000000 -0700
>> @@ -94,7 +94,7 @@ NORET_TYPE void panic(const char * fmt,
>> * unfortunately means it may not be hardened to work in a panic
>> * situation.
>> */
>> - smp_send_stop();
>> + smp_send_stop(0);
>>
>> atomic_notifier_call_chain(&panic_notifier_list, 0, buf);
>>
>>
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC PATCH] Bug during kexec...not all cpus are stopped
2010-10-11 18:07 ` Eric W. Biederman
@ 2010-10-11 19:41 ` Alok Kataria
2010-10-11 21:17 ` Eric W. Biederman
2010-10-11 21:39 ` [RFC PATCH] Bug during kexec...not all cpus are stopped Vivek Goyal
0 siblings, 2 replies; 16+ messages in thread
From: Alok Kataria @ 2010-10-11 19:41 UTC (permalink / raw)
To: Eric W. Biederman
Cc: kexec@lists.infradead.org, Vivek Goyal, Haren Myneni,
the arch/x86 maintainers, LKML, Daniel Hecht, jeremy
Hi Eric,
Thanks for taking a look.
On Mon, 2010-10-11 at 11:07 -0700, Eric W. Biederman wrote:
> Alok Kataria <akataria@vmware.com> writes:
>
> > Copying a few more maintainers on the thread...
> > On Fri, 2010-10-08 at 13:34 -0700, Alok Kataria wrote:
> >> Before starting the new kernel kexec calls machine_shutdown to stop all
> >> the cpus, which internally calls native_smp_send_stop. AFAIU, kexec
> >> expects that all the cpus are now halted after that call returns.
> >> Now, looking at the code for native_smp_send_stop, it assumes that all
> >> the processors have processed the REBOOT ipi in 1 second after the IPI
> >> was sent.
> >> native_smp_send_stop()
> >> ---------------------------------------------------------
> >> apic->send_IPI_allbutself(REBOOT_VECTOR);
> >>
> >> /* Don't wait longer than a second */
> >> wait = USEC_PER_SEC;
> >> while (num_online_cpus() > 1 && wait--)
> >> udelay(1);
> >> ---------------------------------------------------------
> >>
> >> It just returns after that 1 second irrespective of whether all cpus
> >> were halted or not. This brings up a issue in the kexec case, since we
> >> can have the BSP starting the new kernel and AP's still processing the
> >> REBOOT IPI simultaneously.
> >>
> >> Many distribution kernels use kexec to load the newly installed kernel
> >> during the installation phase, in virtualized environment with the host
> >> heavily overcommitted, we have seen some instances when vcpu fails to
> >> process the IPI in the allotted 1 sec and as a result the AP's end up
> >> accessing uninitialized state (the BSP has already gone ahead with
> >> setting up the new state) and causing GPF's.
> >>
> >> IMO, kexec expects machine_shutdown to return only after all cpus are
> >> stopped.
> >>
> >> The patch below should fix the issue, comments ??
> >>
> >> --
> >> machine_shutdown now takes a parameter "wait", if it is true, it waits
> >> until all the cpus are halted. All the callers except kexec still
> >> fallback to the earlier version of the shutdown call, where it just
> >> waited for max 1 sec before returning.
>
> The approach seems reasonable. However on every path except for the
> panic path I would wait for all of the cpus to stop, as that is what
> those code paths are expecting. Until last year smp_send_stop always
> waited until all of the cpus stopped. So I think changing
> machine_shutdown should not need to happen.
>
> This patch cannot be used as is because it changes the parameters to
> smp_send_stop() and there are more than just x86 implementations out
> there.
>
> Let me propose a slightly different tactic. We need to separate
> the panic path from the normal path to avoid confusion. At the
> generic level smp_send_stop is exclusively used for the panic
> path. So we should keep that, and rename the x86 non-panic path
> function something else.
>
> Can you rename the x86 smp_send_stop function say stop_all_other_cpus().
>
> At which point we could implement smp_send_stop as:
> stop_all_other_cpus(0);
>
> And everywhere else would call stop_all_other_cpus(1) waiting for
> the cpus to actually stop.
Done, I have added a stop_other_cpus function which calls
smp_ops.stop_other_cpus(1)
>
> I really think we want to wait for the cpus to stop in all cases except
> for panic/kdump where we expect things to be broken and we are doing
> our best to make things work anyway.
Now it does, except in the panic case.
Jeremy, I have modified the xen_reboot bits too so that it now waits
until all cpus are actually stopped, please take a look.
--
x86 smp_ops now has a new op, stop_other_cpus which takes a parameter "wait"
this allows the caller to specify if it wants to stop untill all the cpus
have processed the stop IPI. This is required specifically for the kexec case
where we should wait for all the cpus to be stopped before starting the new
kernel.
We now wait for the cpus to stop in all cases except for panic/kdump where
we expect things to be broken and we are doing our best to make things
work anyway.
Signed-off-by: Alok N Kataria <akataria@vmware.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Jeremy Fitzhardinge <jeremy@xensource.com>
Index: linux-2.6/arch/x86/include/asm/smp.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/smp.h 2010-10-11 11:56:40.000000000 -0700
+++ linux-2.6/arch/x86/include/asm/smp.h 2010-10-11 12:00:22.000000000 -0700
@@ -50,7 +50,7 @@ struct smp_ops {
void (*smp_prepare_cpus)(unsigned max_cpus);
void (*smp_cpus_done)(unsigned max_cpus);
- void (*smp_send_stop)(void);
+ void (*stop_other_cpus)(int wait);
void (*smp_send_reschedule)(int cpu);
int (*cpu_up)(unsigned cpu);
@@ -73,7 +73,12 @@ extern struct smp_ops smp_ops;
static inline void smp_send_stop(void)
{
- smp_ops.smp_send_stop();
+ smp_ops.stop_other_cpus(0);
+}
+
+static inline void stop_other_cpus(void)
+{
+ smp_ops.stop_other_cpus(1);
}
static inline void smp_prepare_boot_cpu(void)
Index: linux-2.6/arch/x86/kernel/reboot.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/reboot.c 2010-10-11 11:56:40.000000000 -0700
+++ linux-2.6/arch/x86/kernel/reboot.c 2010-10-11 12:05:02.000000000 -0700
@@ -641,7 +641,7 @@ void native_machine_shutdown(void)
/* O.K Now that I'm on the appropriate processor,
* stop all of the others.
*/
- smp_send_stop();
+ stop_other_cpus();
#endif
lapic_shutdown();
Index: linux-2.6/arch/x86/kernel/smp.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/smp.c 2010-10-11 11:56:40.000000000 -0700
+++ linux-2.6/arch/x86/kernel/smp.c 2010-10-11 12:16:42.000000000 -0700
@@ -159,10 +159,10 @@ asmlinkage void smp_reboot_interrupt(voi
irq_exit();
}
-static void native_smp_send_stop(void)
+static void native_stop_other_cpus(int wait)
{
unsigned long flags;
- unsigned long wait;
+ unsigned long timeout;
if (reboot_force)
return;
@@ -179,9 +179,12 @@ static void native_smp_send_stop(void)
if (num_online_cpus() > 1) {
apic->send_IPI_allbutself(REBOOT_VECTOR);
- /* Don't wait longer than a second */
- wait = USEC_PER_SEC;
- while (num_online_cpus() > 1 && wait--)
+ /*
+ * Don't wait longer than a second if the caller
+ * didn't ask us to wait.
+ */
+ timeout = USEC_PER_SEC;
+ while (num_online_cpus() > 1 && (wait || timeout--))
udelay(1);
}
@@ -227,7 +230,7 @@ struct smp_ops smp_ops = {
.smp_prepare_cpus = native_smp_prepare_cpus,
.smp_cpus_done = native_smp_cpus_done,
- .smp_send_stop = native_smp_send_stop,
+ .stop_other_cpus = native_stop_other_cpus,
.smp_send_reschedule = native_smp_send_reschedule,
.cpu_up = native_cpu_up,
Index: linux-2.6/arch/x86/xen/enlighten.c
===================================================================
--- linux-2.6.orig/arch/x86/xen/enlighten.c 2010-10-11 11:56:40.000000000 -0700
+++ linux-2.6/arch/x86/xen/enlighten.c 2010-10-11 11:58:53.000000000 -0700
@@ -1018,7 +1018,7 @@ static void xen_reboot(int reason)
struct sched_shutdown r = { .reason = reason };
#ifdef CONFIG_SMP
- smp_send_stop();
+ stop_other_cpus();
#endif
if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r))
Index: linux-2.6/arch/x86/xen/smp.c
===================================================================
--- linux-2.6.orig/arch/x86/xen/smp.c 2010-10-11 11:56:40.000000000 -0700
+++ linux-2.6/arch/x86/xen/smp.c 2010-10-11 11:58:53.000000000 -0700
@@ -400,9 +400,9 @@ static void stop_self(void *v)
BUG();
}
-static void xen_smp_send_stop(void)
+static void xen_stop_other_cpus(int wait)
{
- smp_call_function(stop_self, NULL, 0);
+ smp_call_function(stop_self, NULL, wait);
}
static void xen_smp_send_reschedule(int cpu)
@@ -470,7 +470,7 @@ static const struct smp_ops xen_smp_ops
.cpu_disable = xen_cpu_disable,
.play_dead = xen_play_dead,
- .smp_send_stop = xen_smp_send_stop,
+ .stop_other_cpus = xen_stop_other_cpus,
.smp_send_reschedule = xen_smp_send_reschedule,
.send_call_func_ipi = xen_smp_send_call_function_ipi,
Index: linux-2.6/include/linux/smp.h
===================================================================
--- linux-2.6.orig/include/linux/smp.h 2010-10-11 11:56:40.000000000 -0700
+++ linux-2.6/include/linux/smp.h 2010-10-11 12:07:19.000000000 -0700
@@ -116,7 +116,8 @@ extern unsigned int setup_max_cpus;
#else /* !SMP */
-static inline void smp_send_stop(void) { }
+static inline void smp_send_stop() { }
+static inline void stop_other_cpus() { }
/*
* These macros fold the SMP functionality into a single CPU system
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC PATCH] Bug during kexec...not all cpus are stopped
2010-10-11 19:41 ` Alok Kataria
@ 2010-10-11 21:17 ` Eric W. Biederman
2010-10-11 21:37 ` Alok Kataria
2010-10-11 21:39 ` [RFC PATCH] Bug during kexec...not all cpus are stopped Vivek Goyal
1 sibling, 1 reply; 16+ messages in thread
From: Eric W. Biederman @ 2010-10-11 21:17 UTC (permalink / raw)
To: akataria
Cc: kexec@lists.infradead.org, Vivek Goyal, Haren Myneni,
the arch/x86 maintainers, LKML, Daniel Hecht, jeremy
Alok Kataria <akataria@vmware.com> writes:
> Hi Eric,
>
> Thanks for taking a look.
>
> On Mon, 2010-10-11 at 11:07 -0700, Eric W. Biederman wrote:
>> Alok Kataria <akataria@vmware.com> writes:
>>
>> > Copying a few more maintainers on the thread...
>> > On Fri, 2010-10-08 at 13:34 -0700, Alok Kataria wrote:
>> >> Before starting the new kernel kexec calls machine_shutdown to stop all
>> >> the cpus, which internally calls native_smp_send_stop. AFAIU, kexec
>> >> expects that all the cpus are now halted after that call returns.
>> >> Now, looking at the code for native_smp_send_stop, it assumes that all
>> >> the processors have processed the REBOOT ipi in 1 second after the IPI
>> >> was sent.
>> >> native_smp_send_stop()
>> >> ---------------------------------------------------------
>> >> apic->send_IPI_allbutself(REBOOT_VECTOR);
>> >>
>> >> /* Don't wait longer than a second */
>> >> wait = USEC_PER_SEC;
>> >> while (num_online_cpus() > 1 && wait--)
>> >> udelay(1);
>> >> ---------------------------------------------------------
>> >>
>> >> It just returns after that 1 second irrespective of whether all cpus
>> >> were halted or not. This brings up a issue in the kexec case, since we
>> >> can have the BSP starting the new kernel and AP's still processing the
>> >> REBOOT IPI simultaneously.
>> >>
>> >> Many distribution kernels use kexec to load the newly installed kernel
>> >> during the installation phase, in virtualized environment with the host
>> >> heavily overcommitted, we have seen some instances when vcpu fails to
>> >> process the IPI in the allotted 1 sec and as a result the AP's end up
>> >> accessing uninitialized state (the BSP has already gone ahead with
>> >> setting up the new state) and causing GPF's.
>> >>
>> >> IMO, kexec expects machine_shutdown to return only after all cpus are
>> >> stopped.
>> >>
>> >> The patch below should fix the issue, comments ??
>> >>
>> >> --
>> >> machine_shutdown now takes a parameter "wait", if it is true, it waits
>> >> until all the cpus are halted. All the callers except kexec still
>> >> fallback to the earlier version of the shutdown call, where it just
>> >> waited for max 1 sec before returning.
>>
>> The approach seems reasonable. However on every path except for the
>> panic path I would wait for all of the cpus to stop, as that is what
>> those code paths are expecting. Until last year smp_send_stop always
>> waited until all of the cpus stopped. So I think changing
>> machine_shutdown should not need to happen.
>>
>> This patch cannot be used as is because it changes the parameters to
>> smp_send_stop() and there are more than just x86 implementations out
>> there.
>>
>> Let me propose a slightly different tactic. We need to separate
>> the panic path from the normal path to avoid confusion. At the
>> generic level smp_send_stop is exclusively used for the panic
>> path. So we should keep that, and rename the x86 non-panic path
>> function something else.
>>
>> Can you rename the x86 smp_send_stop function say stop_all_other_cpus().
>>
>> At which point we could implement smp_send_stop as:
>> stop_all_other_cpus(0);
>>
>> And everywhere else would call stop_all_other_cpus(1) waiting for
>> the cpus to actually stop.
>
> Done, I have added a stop_other_cpus function which calls
> smp_ops.stop_other_cpus(1)
>
>>
>> I really think we want to wait for the cpus to stop in all cases except
>> for panic/kdump where we expect things to be broken and we are doing
>> our best to make things work anyway.
> Now it does, except in the panic case.
>
> Jeremy, I have modified the xen_reboot bits too so that it now waits
> until all cpus are actually stopped, please take a look.
>
> --
>
> x86 smp_ops now has a new op, stop_other_cpus which takes a parameter "wait"
> this allows the caller to specify if it wants to stop untill all the cpus
> have processed the stop IPI. This is required specifically for the kexec case
> where we should wait for all the cpus to be stopped before starting the new
> kernel.
> We now wait for the cpus to stop in all cases except for panic/kdump where
> we expect things to be broken and we are doing our best to make things
> work anyway.
One little nit.
> Signed-off-by: Alok N Kataria <akataria@vmware.com>
> Cc: Eric W. Biederman <ebiederm@xmission.com>
> Cc: Jeremy Fitzhardinge <jeremy@xensource.com>
>
> Index: linux-2.6/arch/x86/include/asm/smp.h
> ===================================================================
> --- linux-2.6.orig/arch/x86/include/asm/smp.h 2010-10-11 11:56:40.000000000 -0700
> +++ linux-2.6/arch/x86/include/asm/smp.h 2010-10-11 12:00:22.000000000 -0700
> @@ -50,7 +50,7 @@ struct smp_ops {
> void (*smp_prepare_cpus)(unsigned max_cpus);
> void (*smp_cpus_done)(unsigned max_cpus);
>
> - void (*smp_send_stop)(void);
> + void (*stop_other_cpus)(int wait);
> void (*smp_send_reschedule)(int cpu);
>
> int (*cpu_up)(unsigned cpu);
> @@ -73,7 +73,12 @@ extern struct smp_ops smp_ops;
>
> static inline void smp_send_stop(void)
> {
> - smp_ops.smp_send_stop();
> + smp_ops.stop_other_cpus(0);
> +}
> +
> +static inline void stop_other_cpus(void)
> +{
> + smp_ops.stop_other_cpus(1);
> }
>
> static inline void smp_prepare_boot_cpu(void)
> Index: linux-2.6/arch/x86/kernel/reboot.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/kernel/reboot.c 2010-10-11 11:56:40.000000000 -0700
> +++ linux-2.6/arch/x86/kernel/reboot.c 2010-10-11 12:05:02.000000000 -0700
> @@ -641,7 +641,7 @@ void native_machine_shutdown(void)
> /* O.K Now that I'm on the appropriate processor,
> * stop all of the others.
> */
> - smp_send_stop();
> + stop_other_cpus();
> #endif
>
> lapic_shutdown();
> Index: linux-2.6/arch/x86/kernel/smp.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/kernel/smp.c 2010-10-11 11:56:40.000000000 -0700
> +++ linux-2.6/arch/x86/kernel/smp.c 2010-10-11 12:16:42.000000000 -0700
> @@ -159,10 +159,10 @@ asmlinkage void smp_reboot_interrupt(voi
> irq_exit();
> }
>
> -static void native_smp_send_stop(void)
> +static void native_stop_other_cpus(int wait)
> {
> unsigned long flags;
> - unsigned long wait;
> + unsigned long timeout;
>
> if (reboot_force)
> return;
> @@ -179,9 +179,12 @@ static void native_smp_send_stop(void)
> if (num_online_cpus() > 1) {
> apic->send_IPI_allbutself(REBOOT_VECTOR);
>
> - /* Don't wait longer than a second */
> - wait = USEC_PER_SEC;
> - while (num_online_cpus() > 1 && wait--)
> + /*
> + * Don't wait longer than a second if the caller
> + * didn't ask us to wait.
> + */
> + timeout = USEC_PER_SEC;
> + while (num_online_cpus() > 1 && (wait || timeout--))
> udelay(1);
> }
>
> @@ -227,7 +230,7 @@ struct smp_ops smp_ops = {
> .smp_prepare_cpus = native_smp_prepare_cpus,
> .smp_cpus_done = native_smp_cpus_done,
>
> - .smp_send_stop = native_smp_send_stop,
> + .stop_other_cpus = native_stop_other_cpus,
> .smp_send_reschedule = native_smp_send_reschedule,
>
> .cpu_up = native_cpu_up,
> Index: linux-2.6/arch/x86/xen/enlighten.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/xen/enlighten.c 2010-10-11 11:56:40.000000000 -0700
> +++ linux-2.6/arch/x86/xen/enlighten.c 2010-10-11 11:58:53.000000000 -0700
> @@ -1018,7 +1018,7 @@ static void xen_reboot(int reason)
> struct sched_shutdown r = { .reason = reason };
>
> #ifdef CONFIG_SMP
> - smp_send_stop();
> + stop_other_cpus();
> #endif
>
> if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r))
> Index: linux-2.6/arch/x86/xen/smp.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/xen/smp.c 2010-10-11 11:56:40.000000000 -0700
> +++ linux-2.6/arch/x86/xen/smp.c 2010-10-11 11:58:53.000000000 -0700
> @@ -400,9 +400,9 @@ static void stop_self(void *v)
> BUG();
> }
>
> -static void xen_smp_send_stop(void)
> +static void xen_stop_other_cpus(int wait)
> {
> - smp_call_function(stop_self, NULL, 0);
> + smp_call_function(stop_self, NULL, wait);
> }
>
> static void xen_smp_send_reschedule(int cpu)
> @@ -470,7 +470,7 @@ static const struct smp_ops xen_smp_ops
> .cpu_disable = xen_cpu_disable,
> .play_dead = xen_play_dead,
>
> - .smp_send_stop = xen_smp_send_stop,
> + .stop_other_cpus = xen_stop_other_cpus,
> .smp_send_reschedule = xen_smp_send_reschedule,
>
> .send_call_func_ipi = xen_smp_send_call_function_ipi,
> Index: linux-2.6/include/linux/smp.h
> ===================================================================
> --- linux-2.6.orig/include/linux/smp.h 2010-10-11 11:56:40.000000000 -0700
> +++ linux-2.6/include/linux/smp.h 2010-10-11 12:07:19.000000000 -0700
> @@ -116,7 +116,8 @@ extern unsigned int setup_max_cpus;
>
> #else /* !SMP */
>
> -static inline void smp_send_stop(void) { }
> +static inline void smp_send_stop() { }
> +static inline void stop_other_cpus() { }
You shouldn't change smp.h.
For the moment stop_other_cpus() is x86 specific so we shouldn't
impose it in other architectures.
Also note removing the explicit void is a bad idea in C because that is
equivalent to saying: smp_send_stop(...) { } Which allows any set
of parameters you can think of.
Eric
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC PATCH] Bug during kexec...not all cpus are stopped
2010-10-11 21:17 ` Eric W. Biederman
@ 2010-10-11 21:37 ` Alok Kataria
2010-10-21 21:40 ` [tip:x86/urgent] x86, kexec: Make sure to stop all CPUs before exiting the kernel tip-bot for Alok Kataria
0 siblings, 1 reply; 16+ messages in thread
From: Alok Kataria @ 2010-10-11 21:37 UTC (permalink / raw)
To: Eric W. Biederman
Cc: kexec@lists.infradead.org, Vivek Goyal, Haren Myneni,
the arch/x86 maintainers, LKML, Daniel Hecht,
jeremy@xensource.com
On Mon, 2010-10-11 at 14:17 -0700, Eric W. Biederman wrote:
> Alok Kataria <akataria@vmware.com> writes:
<snip>
...
> > Index: linux-2.6/include/linux/smp.h
> > ===================================================================
> > --- linux-2.6.orig/include/linux/smp.h 2010-10-11 11:56:40.000000000 -0700
> > +++ linux-2.6/include/linux/smp.h 2010-10-11 12:07:19.000000000 -0700
> > @@ -116,7 +116,8 @@ extern unsigned int setup_max_cpus;
> >
> > #else /* !SMP */
> >
> > -static inline void smp_send_stop(void) { }
> > +static inline void smp_send_stop() { }
> > +static inline void stop_other_cpus() { }
>
> You shouldn't change smp.h.
>
> For the moment stop_other_cpus() is x86 specific so we shouldn't
> impose it in other architectures.
>
> Also note removing the explicit void is a bad idea in C because that is
> equivalent to saying: smp_send_stop(...) { } Which allows any set
> of parameters you can think of.
>
You are right, that hunk was leftover from an earlier change. Thanks for
the eagle eyes. Below is the refreshed patch.
Alok
--
x86 smp_ops now has a new op, stop_other_cpus which takes a parameter "wait"
this allows the caller to specify if it wants to stop untill all the cpus
have processed the stop IPI. This is required specifically for the kexec case
where we should wait for all the cpus to be stopped before starting the new
kernel.
We now wait for the cpus to stop in all cases except for panic/kdump where
we expect things to be broken and we are doing our best to make things
work anyway.
Signed-off-by: Alok N Kataria <akataria@vmware.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Jeremy Fitzhardinge <jeremy@xensource.com>
Index: linux-2.6/arch/x86/include/asm/smp.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/smp.h 2010-10-11 11:56:40.000000000 -0700
+++ linux-2.6/arch/x86/include/asm/smp.h 2010-10-11 12:00:22.000000000 -0700
@@ -50,7 +50,7 @@ struct smp_ops {
void (*smp_prepare_cpus)(unsigned max_cpus);
void (*smp_cpus_done)(unsigned max_cpus);
- void (*smp_send_stop)(void);
+ void (*stop_other_cpus)(int wait);
void (*smp_send_reschedule)(int cpu);
int (*cpu_up)(unsigned cpu);
@@ -73,7 +73,12 @@ extern struct smp_ops smp_ops;
static inline void smp_send_stop(void)
{
- smp_ops.smp_send_stop();
+ smp_ops.stop_other_cpus(0);
+}
+
+static inline void stop_other_cpus(void)
+{
+ smp_ops.stop_other_cpus(1);
}
static inline void smp_prepare_boot_cpu(void)
Index: linux-2.6/arch/x86/kernel/reboot.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/reboot.c 2010-10-11 11:56:40.000000000 -0700
+++ linux-2.6/arch/x86/kernel/reboot.c 2010-10-11 12:05:02.000000000 -0700
@@ -641,7 +641,7 @@ void native_machine_shutdown(void)
/* O.K Now that I'm on the appropriate processor,
* stop all of the others.
*/
- smp_send_stop();
+ stop_other_cpus();
#endif
lapic_shutdown();
Index: linux-2.6/arch/x86/kernel/smp.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/smp.c 2010-10-11 11:56:40.000000000 -0700
+++ linux-2.6/arch/x86/kernel/smp.c 2010-10-11 12:16:42.000000000 -0700
@@ -159,10 +159,10 @@ asmlinkage void smp_reboot_interrupt(voi
irq_exit();
}
-static void native_smp_send_stop(void)
+static void native_stop_other_cpus(int wait)
{
unsigned long flags;
- unsigned long wait;
+ unsigned long timeout;
if (reboot_force)
return;
@@ -179,9 +179,12 @@ static void native_smp_send_stop(void)
if (num_online_cpus() > 1) {
apic->send_IPI_allbutself(REBOOT_VECTOR);
- /* Don't wait longer than a second */
- wait = USEC_PER_SEC;
- while (num_online_cpus() > 1 && wait--)
+ /*
+ * Don't wait longer than a second if the caller
+ * didn't ask us to wait.
+ */
+ timeout = USEC_PER_SEC;
+ while (num_online_cpus() > 1 && (wait || timeout--))
udelay(1);
}
@@ -227,7 +230,7 @@ struct smp_ops smp_ops = {
.smp_prepare_cpus = native_smp_prepare_cpus,
.smp_cpus_done = native_smp_cpus_done,
- .smp_send_stop = native_smp_send_stop,
+ .stop_other_cpus = native_stop_other_cpus,
.smp_send_reschedule = native_smp_send_reschedule,
.cpu_up = native_cpu_up,
Index: linux-2.6/arch/x86/xen/enlighten.c
===================================================================
--- linux-2.6.orig/arch/x86/xen/enlighten.c 2010-10-11 11:56:40.000000000 -0700
+++ linux-2.6/arch/x86/xen/enlighten.c 2010-10-11 12:17:45.000000000 -0700
@@ -1018,7 +1018,7 @@ static void xen_reboot(int reason)
struct sched_shutdown r = { .reason = reason };
#ifdef CONFIG_SMP
- smp_send_stop();
+ stop_other_cpus();
#endif
if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r))
Index: linux-2.6/arch/x86/xen/smp.c
===================================================================
--- linux-2.6.orig/arch/x86/xen/smp.c 2010-10-11 11:56:40.000000000 -0700
+++ linux-2.6/arch/x86/xen/smp.c 2010-10-11 11:58:53.000000000 -0700
@@ -400,9 +400,9 @@ static void stop_self(void *v)
BUG();
}
-static void xen_smp_send_stop(void)
+static void xen_stop_other_cpus(int wait)
{
- smp_call_function(stop_self, NULL, 0);
+ smp_call_function(stop_self, NULL, wait);
}
static void xen_smp_send_reschedule(int cpu)
@@ -470,7 +470,7 @@ static const struct smp_ops xen_smp_ops
.cpu_disable = xen_cpu_disable,
.play_dead = xen_play_dead,
- .smp_send_stop = xen_smp_send_stop,
+ .stop_other_cpus = xen_stop_other_cpus,
.smp_send_reschedule = xen_smp_send_reschedule,
.send_call_func_ipi = xen_smp_send_call_function_ipi,
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC PATCH] Bug during kexec...not all cpus are stopped
2010-10-11 19:41 ` Alok Kataria
2010-10-11 21:17 ` Eric W. Biederman
@ 2010-10-11 21:39 ` Vivek Goyal
2010-10-11 21:47 ` Alok Kataria
2010-10-11 22:10 ` Eric W. Biederman
1 sibling, 2 replies; 16+ messages in thread
From: Vivek Goyal @ 2010-10-11 21:39 UTC (permalink / raw)
To: Alok Kataria
Cc: Eric W. Biederman, kexec@lists.infradead.org, Haren Myneni,
the arch/x86 maintainers, LKML, Daniel Hecht, jeremy
On Mon, Oct 11, 2010 at 12:41:23PM -0700, Alok Kataria wrote:
> Hi Eric,
>
> Thanks for taking a look.
>
> On Mon, 2010-10-11 at 11:07 -0700, Eric W. Biederman wrote:
> > Alok Kataria <akataria@vmware.com> writes:
> >
> > > Copying a few more maintainers on the thread...
> > > On Fri, 2010-10-08 at 13:34 -0700, Alok Kataria wrote:
> > >> Before starting the new kernel kexec calls machine_shutdown to stop all
> > >> the cpus, which internally calls native_smp_send_stop. AFAIU, kexec
> > >> expects that all the cpus are now halted after that call returns.
> > >> Now, looking at the code for native_smp_send_stop, it assumes that all
> > >> the processors have processed the REBOOT ipi in 1 second after the IPI
> > >> was sent.
> > >> native_smp_send_stop()
> > >> ---------------------------------------------------------
> > >> apic->send_IPI_allbutself(REBOOT_VECTOR);
> > >>
> > >> /* Don't wait longer than a second */
> > >> wait = USEC_PER_SEC;
> > >> while (num_online_cpus() > 1 && wait--)
> > >> udelay(1);
> > >> ---------------------------------------------------------
> > >>
> > >> It just returns after that 1 second irrespective of whether all cpus
> > >> were halted or not. This brings up a issue in the kexec case, since we
> > >> can have the BSP starting the new kernel and AP's still processing the
> > >> REBOOT IPI simultaneously.
> > >>
> > >> Many distribution kernels use kexec to load the newly installed kernel
> > >> during the installation phase, in virtualized environment with the host
> > >> heavily overcommitted, we have seen some instances when vcpu fails to
> > >> process the IPI in the allotted 1 sec and as a result the AP's end up
> > >> accessing uninitialized state (the BSP has already gone ahead with
> > >> setting up the new state) and causing GPF's.
> > >>
> > >> IMO, kexec expects machine_shutdown to return only after all cpus are
> > >> stopped.
> > >>
> > >> The patch below should fix the issue, comments ??
> > >>
> > >> --
> > >> machine_shutdown now takes a parameter "wait", if it is true, it waits
> > >> until all the cpus are halted. All the callers except kexec still
> > >> fallback to the earlier version of the shutdown call, where it just
> > >> waited for max 1 sec before returning.
> >
> > The approach seems reasonable. However on every path except for the
> > panic path I would wait for all of the cpus to stop, as that is what
> > those code paths are expecting. Until last year smp_send_stop always
> > waited until all of the cpus stopped. So I think changing
> > machine_shutdown should not need to happen.
> >
> > This patch cannot be used as is because it changes the parameters to
> > smp_send_stop() and there are more than just x86 implementations out
> > there.
> >
> > Let me propose a slightly different tactic. We need to separate
> > the panic path from the normal path to avoid confusion. At the
> > generic level smp_send_stop is exclusively used for the panic
> > path. So we should keep that, and rename the x86 non-panic path
> > function something else.
> >
> > Can you rename the x86 smp_send_stop function say stop_all_other_cpus().
> >
> > At which point we could implement smp_send_stop as:
> > stop_all_other_cpus(0);
> >
> > And everywhere else would call stop_all_other_cpus(1) waiting for
> > the cpus to actually stop.
>
> Done, I have added a stop_other_cpus function which calls
> smp_ops.stop_other_cpus(1)
>
> >
> > I really think we want to wait for the cpus to stop in all cases except
> > for panic/kdump where we expect things to be broken and we are doing
> > our best to make things work anyway.
> Now it does, except in the panic case.
>
> Jeremy, I have modified the xen_reboot bits too so that it now waits
> until all cpus are actually stopped, please take a look.
>
> --
>
> x86 smp_ops now has a new op, stop_other_cpus which takes a parameter "wait"
> this allows the caller to specify if it wants to stop untill all the cpus
> have processed the stop IPI. This is required specifically for the kexec case
> where we should wait for all the cpus to be stopped before starting the new
> kernel.
> We now wait for the cpus to stop in all cases except for panic/kdump where
> we expect things to be broken and we are doing our best to make things
> work anyway.
I don't think that kdump path uses smp_send_stop().
IIUC, on x86, we directly send NMI to other cpus.
native_machine_crash_shutdown()
kdump_nmi_shootdown_cpus()
nmi_shootdown_cpus()
smp_send_nmi_allbutself
apic->send_IPI_allbutself(NMI_VECTOR);
So above description should be limited to only panic() path.
On a side note, I am wondering why panic() and kdump path can't share the
shutdown routine.
Vivek
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC PATCH] Bug during kexec...not all cpus are stopped
2010-10-11 21:39 ` [RFC PATCH] Bug during kexec...not all cpus are stopped Vivek Goyal
@ 2010-10-11 21:47 ` Alok Kataria
2010-10-11 22:10 ` Eric W. Biederman
1 sibling, 0 replies; 16+ messages in thread
From: Alok Kataria @ 2010-10-11 21:47 UTC (permalink / raw)
To: Vivek Goyal
Cc: Eric W. Biederman, kexec@lists.infradead.org, Haren Myneni,
the arch/x86 maintainers, LKML, Daniel Hecht,
jeremy@xensource.com
On Mon, 2010-10-11 at 14:39 -0700, Vivek Goyal wrote:
> On Mon, Oct 11, 2010 at 12:41:23PM -0700, Alok Kataria wrote:
> > --
> >
> > x86 smp_ops now has a new op, stop_other_cpus which takes a parameter "wait"
> > this allows the caller to specify if it wants to stop untill all the cpus
> > have processed the stop IPI. This is required specifically for the kexec case
> > where we should wait for all the cpus to be stopped before starting the new
> > kernel.
>
> > We now wait for the cpus to stop in all cases except for panic/kdump where
> > we expect things to be broken and we are doing our best to make things
> > work anyway.
>
> I don't think that kdump path uses smp_send_stop().
Right, this should be removed from the patch description.
Belows is the new patch description
----------------------------------------------
x86 smp_ops now has a new op, stop_other_cpus which takes a parameter "wait"
this allows the caller to specify if it wants to stop untill all the cpus
have processed the stop IPI. This is required specifically for the kexec case
where we should wait for all the cpus to be stopped before starting the new
kernel.
We now wait for the cpus to stop in all cases except for panic where
we expect things to be broken and we are doing our best to make things
work anyway.
----------------------------------------------
>
> IIUC, on x86, we directly send NMI to other cpus.
>
> native_machine_crash_shutdown()
> kdump_nmi_shootdown_cpus()
> nmi_shootdown_cpus()
> smp_send_nmi_allbutself
> apic->send_IPI_allbutself(NMI_VECTOR);
>
> So above description should be limited to only panic() path.
>
> On a side note, I am wondering why panic() and kdump path can't share the
> shutdown routine.
The comment in native_stop_other_cpus (native_smp_send_stop earlier)
explains why
/*
* Use an own vector here because smp_call_function
* does lots of things not suitable in a panic situation.
* On most systems we could also use an NMI here,
* but there are a few systems around where NMI
* is problematic so stay with an non NMI for now
* (this implies we cannot stop CPUs spinning with irq off
* currently)
*/
Thanks,
Alok
>
> Vivek
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC PATCH] Bug during kexec...not all cpus are stopped
2010-10-11 21:39 ` [RFC PATCH] Bug during kexec...not all cpus are stopped Vivek Goyal
2010-10-11 21:47 ` Alok Kataria
@ 2010-10-11 22:10 ` Eric W. Biederman
2010-10-12 22:17 ` Vivek Goyal
1 sibling, 1 reply; 16+ messages in thread
From: Eric W. Biederman @ 2010-10-11 22:10 UTC (permalink / raw)
To: Vivek Goyal
Cc: Alok Kataria, kexec@lists.infradead.org, Haren Myneni,
the arch/x86 maintainers, LKML, Daniel Hecht, jeremy
Vivek Goyal <vgoyal@redhat.com> writes:
> On Mon, Oct 11, 2010 at 12:41:23PM -0700, Alok Kataria wrote:
> I don't think that kdump path uses smp_send_stop().
It doesn't.
> IIUC, on x86, we directly send NMI to other cpus.
>
> native_machine_crash_shutdown()
> kdump_nmi_shootdown_cpus()
> nmi_shootdown_cpus()
> smp_send_nmi_allbutself
> apic->send_IPI_allbutself(NMI_VECTOR);
>
> So above description should be limited to only panic() path.
Is it actually confusing? With respect to documenting the line
of thinking it seems reasonable.
> On a side note, I am wondering why panic() and kdump path can't share the
> shutdown routine.
Hysterical raisins. Andi's change to smp_send_stop says that NMIs not
working on some boxes. When someone wants to weed through all of the
insanity it would probably be good to get the panic and the kdump paths
sharing code. For now simply separating panic and reboot should be
enough, and it lets the code evolve where it needs to.
Eric
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC PATCH] Bug during kexec...not all cpus are stopped
2010-10-11 22:10 ` Eric W. Biederman
@ 2010-10-12 22:17 ` Vivek Goyal
2010-10-13 0:23 ` Alok Kataria
0 siblings, 1 reply; 16+ messages in thread
From: Vivek Goyal @ 2010-10-12 22:17 UTC (permalink / raw)
To: Eric W. Biederman
Cc: Alok Kataria, kexec@lists.infradead.org, Haren Myneni,
the arch/x86 maintainers, LKML, Daniel Hecht, jeremy
On Mon, Oct 11, 2010 at 03:10:11PM -0700, Eric W. Biederman wrote:
> Vivek Goyal <vgoyal@redhat.com> writes:
>
> > On Mon, Oct 11, 2010 at 12:41:23PM -0700, Alok Kataria wrote:
>
> > I don't think that kdump path uses smp_send_stop().
>
> It doesn't.
>
> > IIUC, on x86, we directly send NMI to other cpus.
> >
> > native_machine_crash_shutdown()
> > kdump_nmi_shootdown_cpus()
> > nmi_shootdown_cpus()
> > smp_send_nmi_allbutself
> > apic->send_IPI_allbutself(NMI_VECTOR);
> >
> > So above description should be limited to only panic() path.
>
> Is it actually confusing? With respect to documenting the line
> of thinking it seems reasonable.
>
No, just wanted to point out that let us modify the changelog to remove
keyword "kdump" from it.
> > On a side note, I am wondering why panic() and kdump path can't share the
> > shutdown routine.
>
> Hysterical raisins. Andi's change to smp_send_stop says that NMIs not
> working on some boxes. When someone wants to weed through all of the
> insanity it would probably be good to get the panic and the kdump paths
> sharing code. For now simply separating panic and reboot should be
> enough, and it lets the code evolve where it needs to.
>
Ok. Agreed that atleast conceptually kdump and panic() path should share
the code. But that's a different problem altogether and this patch can go in.
Thanks
Vivek
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC PATCH] Bug during kexec...not all cpus are stopped
2010-10-12 22:17 ` Vivek Goyal
@ 2010-10-13 0:23 ` Alok Kataria
2010-10-21 19:09 ` Alok Kataria
0 siblings, 1 reply; 16+ messages in thread
From: Alok Kataria @ 2010-10-13 0:23 UTC (permalink / raw)
To: Vivek Goyal
Cc: Eric W. Biederman, kexec@lists.infradead.org, Haren Myneni,
the arch/x86 maintainers, LKML, Daniel Hecht,
jeremy@xensource.com
On Tue, 2010-10-12 at 15:17 -0700, Vivek Goyal wrote:
> On Mon, Oct 11, 2010 at 03:10:11PM -0700, Eric W. Biederman wrote:
> > Vivek Goyal <vgoyal@redhat.com> writes:
> >
> > > On Mon, Oct 11, 2010 at 12:41:23PM -0700, Alok Kataria wrote:
> >
> > > I don't think that kdump path uses smp_send_stop().
> >
> > It doesn't.
> >
> > > IIUC, on x86, we directly send NMI to other cpus.
> > >
> > > native_machine_crash_shutdown()
> > > kdump_nmi_shootdown_cpus()
> > > nmi_shootdown_cpus()
> > > smp_send_nmi_allbutself
> > > apic->send_IPI_allbutself(NMI_VECTOR);
> > >
> > > So above description should be limited to only panic() path.
> >
> > Is it actually confusing? With respect to documenting the line
> > of thinking it seems reasonable.
> >
>
> No, just wanted to point out that let us modify the changelog to remove
> keyword "kdump" from it.
>
> > > On a side note, I am wondering why panic() and kdump path can't share the
> > > shutdown routine.
> >
> > Hysterical raisins. Andi's change to smp_send_stop says that NMIs not
> > working on some boxes. When someone wants to weed through all of the
> > insanity it would probably be good to get the panic and the kdump paths
> > sharing code. For now simply separating panic and reboot should be
> > enough, and it lets the code evolve where it needs to.
> >
>
> Ok. Agreed that atleast conceptually kdump and panic() path should share
> the code. But that's a different problem altogether and this patch can go in.
Okay now that we all agree, let me repost a patch with the updated
changelog, this fits on top of tip/master.
--
x86 smp_ops now has a new op, stop_other_cpus which takes a parameter "wait"
this allows the caller to specify if it wants to stop until all the cpus
have processed the stop IPI. This is required specifically for the kexec case
where we should wait for all the cpus to be stopped before starting the new
kernel.
We now wait for the cpus to stop in all cases except for panic where we expect
things to be broken and we are doing our best to make things work anyway.
Signed-off-by: Alok N Kataria <akataria@vmware.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Jeremy Fitzhardinge <jeremy@xensource.com>
Index: linux-x86-tree.git/arch/x86/include/asm/smp.h
===================================================================
--- linux-x86-tree.git.orig/arch/x86/include/asm/smp.h 2010-02-07 16:37:26.000000000 -0800
+++ linux-x86-tree.git/arch/x86/include/asm/smp.h 2010-10-12 16:37:04.000000000 -0700
@@ -50,7 +50,7 @@ struct smp_ops {
void (*smp_prepare_cpus)(unsigned max_cpus);
void (*smp_cpus_done)(unsigned max_cpus);
- void (*smp_send_stop)(void);
+ void (*stop_other_cpus)(int wait);
void (*smp_send_reschedule)(int cpu);
int (*cpu_up)(unsigned cpu);
@@ -73,7 +73,12 @@ extern struct smp_ops smp_ops;
static inline void smp_send_stop(void)
{
- smp_ops.smp_send_stop();
+ smp_ops.stop_other_cpus(0);
+}
+
+static inline void stop_other_cpus(void)
+{
+ smp_ops.stop_other_cpus(1);
}
static inline void smp_prepare_boot_cpu(void)
Index: linux-x86-tree.git/arch/x86/kernel/reboot.c
===================================================================
--- linux-x86-tree.git.orig/arch/x86/kernel/reboot.c 2010-08-17 12:09:51.000000000 -0700
+++ linux-x86-tree.git/arch/x86/kernel/reboot.c 2010-10-12 16:37:04.000000000 -0700
@@ -641,7 +641,7 @@ void native_machine_shutdown(void)
/* O.K Now that I'm on the appropriate processor,
* stop all of the others.
*/
- smp_send_stop();
+ stop_other_cpus();
#endif
lapic_shutdown();
Index: linux-x86-tree.git/arch/x86/kernel/smp.c
===================================================================
--- linux-x86-tree.git.orig/arch/x86/kernel/smp.c 2010-07-08 13:53:34.000000000 -0700
+++ linux-x86-tree.git/arch/x86/kernel/smp.c 2010-10-12 16:37:04.000000000 -0700
@@ -159,10 +159,10 @@ asmlinkage void smp_reboot_interrupt(voi
irq_exit();
}
-static void native_smp_send_stop(void)
+static void native_stop_other_cpus(int wait)
{
unsigned long flags;
- unsigned long wait;
+ unsigned long timeout;
if (reboot_force)
return;
@@ -179,9 +179,12 @@ static void native_smp_send_stop(void)
if (num_online_cpus() > 1) {
apic->send_IPI_allbutself(REBOOT_VECTOR);
- /* Don't wait longer than a second */
- wait = USEC_PER_SEC;
- while (num_online_cpus() > 1 && wait--)
+ /*
+ * Don't wait longer than a second if the caller
+ * didn't ask us to wait.
+ */
+ timeout = USEC_PER_SEC;
+ while (num_online_cpus() > 1 && (wait || timeout--))
udelay(1);
}
@@ -227,7 +230,7 @@ struct smp_ops smp_ops = {
.smp_prepare_cpus = native_smp_prepare_cpus,
.smp_cpus_done = native_smp_cpus_done,
- .smp_send_stop = native_smp_send_stop,
+ .stop_other_cpus = native_stop_other_cpus,
.smp_send_reschedule = native_smp_send_reschedule,
.cpu_up = native_cpu_up,
Index: linux-x86-tree.git/arch/x86/xen/enlighten.c
===================================================================
--- linux-x86-tree.git.orig/arch/x86/xen/enlighten.c 2010-10-12 16:36:28.000000000 -0700
+++ linux-x86-tree.git/arch/x86/xen/enlighten.c 2010-10-12 16:37:04.000000000 -0700
@@ -1019,7 +1019,7 @@ static void xen_reboot(int reason)
struct sched_shutdown r = { .reason = reason };
#ifdef CONFIG_SMP
- smp_send_stop();
+ stop_other_cpus();
#endif
if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r))
Index: linux-x86-tree.git/arch/x86/xen/smp.c
===================================================================
--- linux-x86-tree.git.orig/arch/x86/xen/smp.c 2010-08-17 12:09:51.000000000 -0700
+++ linux-x86-tree.git/arch/x86/xen/smp.c 2010-10-12 16:37:04.000000000 -0700
@@ -400,9 +400,9 @@ static void stop_self(void *v)
BUG();
}
-static void xen_smp_send_stop(void)
+static void xen_stop_other_cpus(int wait)
{
- smp_call_function(stop_self, NULL, 0);
+ smp_call_function(stop_self, NULL, wait);
}
static void xen_smp_send_reschedule(int cpu)
@@ -470,7 +470,7 @@ static const struct smp_ops xen_smp_ops
.cpu_disable = xen_cpu_disable,
.play_dead = xen_play_dead,
- .smp_send_stop = xen_smp_send_stop,
+ .stop_other_cpus = xen_stop_other_cpus,
.smp_send_reschedule = xen_smp_send_reschedule,
.send_call_func_ipi = xen_smp_send_call_function_ipi,
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC PATCH] Bug during kexec...not all cpus are stopped
2010-10-13 0:23 ` Alok Kataria
@ 2010-10-21 19:09 ` Alok Kataria
2010-10-21 20:26 ` H. Peter Anvin
0 siblings, 1 reply; 16+ messages in thread
From: Alok Kataria @ 2010-10-21 19:09 UTC (permalink / raw)
To: Ingo Molnar, H. Peter Anvin
Cc: Vivek Goyal, Eric W. Biederman, kexec@lists.infradead.org,
Haren Myneni, the arch/x86 maintainers, LKML, Daniel Hecht,
jeremy@xensource.com
On Tue, 2010-10-12 at 17:23 -0700, Alok Kataria wrote:
> On Tue, 2010-10-12 at 15:17 -0700, Vivek Goyal wrote:
> > On Mon, Oct 11, 2010 at 03:10:11PM -0700, Eric W. Biederman wrote:
> > > Vivek Goyal <vgoyal@redhat.com> writes:
> > >
> > > > On Mon, Oct 11, 2010 at 12:41:23PM -0700, Alok Kataria wrote:
> > >
> > > > I don't think that kdump path uses smp_send_stop().
> > >
> > > It doesn't.
> > >
> > > > IIUC, on x86, we directly send NMI to other cpus.
> > > >
> > > > native_machine_crash_shutdown()
> > > > kdump_nmi_shootdown_cpus()
> > > > nmi_shootdown_cpus()
> > > > smp_send_nmi_allbutself
> > > > apic->send_IPI_allbutself(NMI_VECTOR);
> > > >
> > > > So above description should be limited to only panic() path.
> > >
> > > Is it actually confusing? With respect to documenting the line
> > > of thinking it seems reasonable.
> > >
> >
> > No, just wanted to point out that let us modify the changelog to remove
> > keyword "kdump" from it.
> >
> > > > On a side note, I am wondering why panic() and kdump path can't share the
> > > > shutdown routine.
> > >
> > > Hysterical raisins. Andi's change to smp_send_stop says that NMIs not
> > > working on some boxes. When someone wants to weed through all of the
> > > insanity it would probably be good to get the panic and the kdump paths
> > > sharing code. For now simply separating panic and reboot should be
> > > enough, and it lets the code evolve where it needs to.
> > >
> >
> > Ok. Agreed that atleast conceptually kdump and panic() path should share
> > the code. But that's a different problem altogether and this patch can go in.
>
> Okay now that we all agree, let me repost a patch with the updated
> changelog, this fits on top of tip/master.
Hi Ingo, HPA
I don't think this patch was picked up for tip, now that the 2.6.37
merge window is open can you please pick this up push it upstream.
This patch fixes a legitimate regression, which was introduced during
2.6.30, by commit id 4ef702c10b5df18ab04921fc252c26421d4d6c75.
Thanks,
Alok
>
> --
>
> x86 smp_ops now has a new op, stop_other_cpus which takes a parameter "wait"
> this allows the caller to specify if it wants to stop until all the cpus
> have processed the stop IPI. This is required specifically for the kexec case
> where we should wait for all the cpus to be stopped before starting the new
> kernel.
> We now wait for the cpus to stop in all cases except for panic where we expect
> things to be broken and we are doing our best to make things work anyway.
>
>
> Signed-off-by: Alok N Kataria <akataria@vmware.com>
> Cc: Eric W. Biederman <ebiederm@xmission.com>
> Cc: Jeremy Fitzhardinge <jeremy@xensource.com>
>
> Index: linux-x86-tree.git/arch/x86/include/asm/smp.h
> ===================================================================
> --- linux-x86-tree.git.orig/arch/x86/include/asm/smp.h 2010-02-07 16:37:26.000000000 -0800
> +++ linux-x86-tree.git/arch/x86/include/asm/smp.h 2010-10-12 16:37:04.000000000 -0700
> @@ -50,7 +50,7 @@ struct smp_ops {
> void (*smp_prepare_cpus)(unsigned max_cpus);
> void (*smp_cpus_done)(unsigned max_cpus);
>
> - void (*smp_send_stop)(void);
> + void (*stop_other_cpus)(int wait);
> void (*smp_send_reschedule)(int cpu);
>
> int (*cpu_up)(unsigned cpu);
> @@ -73,7 +73,12 @@ extern struct smp_ops smp_ops;
>
> static inline void smp_send_stop(void)
> {
> - smp_ops.smp_send_stop();
> + smp_ops.stop_other_cpus(0);
> +}
> +
> +static inline void stop_other_cpus(void)
> +{
> + smp_ops.stop_other_cpus(1);
> }
>
> static inline void smp_prepare_boot_cpu(void)
> Index: linux-x86-tree.git/arch/x86/kernel/reboot.c
> ===================================================================
> --- linux-x86-tree.git.orig/arch/x86/kernel/reboot.c 2010-08-17 12:09:51.000000000 -0700
> +++ linux-x86-tree.git/arch/x86/kernel/reboot.c 2010-10-12 16:37:04.000000000 -0700
> @@ -641,7 +641,7 @@ void native_machine_shutdown(void)
> /* O.K Now that I'm on the appropriate processor,
> * stop all of the others.
> */
> - smp_send_stop();
> + stop_other_cpus();
> #endif
>
> lapic_shutdown();
> Index: linux-x86-tree.git/arch/x86/kernel/smp.c
> ===================================================================
> --- linux-x86-tree.git.orig/arch/x86/kernel/smp.c 2010-07-08 13:53:34.000000000 -0700
> +++ linux-x86-tree.git/arch/x86/kernel/smp.c 2010-10-12 16:37:04.000000000 -0700
> @@ -159,10 +159,10 @@ asmlinkage void smp_reboot_interrupt(voi
> irq_exit();
> }
>
> -static void native_smp_send_stop(void)
> +static void native_stop_other_cpus(int wait)
> {
> unsigned long flags;
> - unsigned long wait;
> + unsigned long timeout;
>
> if (reboot_force)
> return;
> @@ -179,9 +179,12 @@ static void native_smp_send_stop(void)
> if (num_online_cpus() > 1) {
> apic->send_IPI_allbutself(REBOOT_VECTOR);
>
> - /* Don't wait longer than a second */
> - wait = USEC_PER_SEC;
> - while (num_online_cpus() > 1 && wait--)
> + /*
> + * Don't wait longer than a second if the caller
> + * didn't ask us to wait.
> + */
> + timeout = USEC_PER_SEC;
> + while (num_online_cpus() > 1 && (wait || timeout--))
> udelay(1);
> }
>
> @@ -227,7 +230,7 @@ struct smp_ops smp_ops = {
> .smp_prepare_cpus = native_smp_prepare_cpus,
> .smp_cpus_done = native_smp_cpus_done,
>
> - .smp_send_stop = native_smp_send_stop,
> + .stop_other_cpus = native_stop_other_cpus,
> .smp_send_reschedule = native_smp_send_reschedule,
>
> .cpu_up = native_cpu_up,
> Index: linux-x86-tree.git/arch/x86/xen/enlighten.c
> ===================================================================
> --- linux-x86-tree.git.orig/arch/x86/xen/enlighten.c 2010-10-12 16:36:28.000000000 -0700
> +++ linux-x86-tree.git/arch/x86/xen/enlighten.c 2010-10-12 16:37:04.000000000 -0700
> @@ -1019,7 +1019,7 @@ static void xen_reboot(int reason)
> struct sched_shutdown r = { .reason = reason };
>
> #ifdef CONFIG_SMP
> - smp_send_stop();
> + stop_other_cpus();
> #endif
>
> if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r))
> Index: linux-x86-tree.git/arch/x86/xen/smp.c
> ===================================================================
> --- linux-x86-tree.git.orig/arch/x86/xen/smp.c 2010-08-17 12:09:51.000000000 -0700
> +++ linux-x86-tree.git/arch/x86/xen/smp.c 2010-10-12 16:37:04.000000000 -0700
> @@ -400,9 +400,9 @@ static void stop_self(void *v)
> BUG();
> }
>
> -static void xen_smp_send_stop(void)
> +static void xen_stop_other_cpus(int wait)
> {
> - smp_call_function(stop_self, NULL, 0);
> + smp_call_function(stop_self, NULL, wait);
> }
>
> static void xen_smp_send_reschedule(int cpu)
> @@ -470,7 +470,7 @@ static const struct smp_ops xen_smp_ops
> .cpu_disable = xen_cpu_disable,
> .play_dead = xen_play_dead,
>
> - .smp_send_stop = xen_smp_send_stop,
> + .stop_other_cpus = xen_stop_other_cpus,
> .smp_send_reschedule = xen_smp_send_reschedule,
>
> .send_call_func_ipi = xen_smp_send_call_function_ipi,
>
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC PATCH] Bug during kexec...not all cpus are stopped
2010-10-21 19:09 ` Alok Kataria
@ 2010-10-21 20:26 ` H. Peter Anvin
2010-10-21 21:10 ` Alok Kataria
0 siblings, 1 reply; 16+ messages in thread
From: H. Peter Anvin @ 2010-10-21 20:26 UTC (permalink / raw)
To: akataria
Cc: Ingo Molnar, Vivek Goyal, Eric W. Biederman,
kexec@lists.infradead.org, Haren Myneni, the arch/x86 maintainers,
LKML, Daniel Hecht, jeremy@xensource.com
On 10/21/2010 12:09 PM, Alok Kataria wrote:
>
> I don't think this patch was picked up for tip, now that the 2.6.37
> merge window is open can you please pick this up push it upstream.
> This patch fixes a legitimate regression, which was introduced during
> 2.6.30, by commit id 4ef702c10b5df18ab04921fc252c26421d4d6c75.
>
It probably would have helped if the patch had had a proper patch header
and so on, and *in particular* not buried in a tree with [RFC PATCH].
RFC strongly implies that the patch is intended as a base for
discussion, and is explicitly not intended to be committed.
-hpa
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC PATCH] Bug during kexec...not all cpus are stopped
2010-10-21 20:26 ` H. Peter Anvin
@ 2010-10-21 21:10 ` Alok Kataria
2010-10-21 21:24 ` H. Peter Anvin
0 siblings, 1 reply; 16+ messages in thread
From: Alok Kataria @ 2010-10-21 21:10 UTC (permalink / raw)
To: H. Peter Anvin
Cc: Ingo Molnar, Vivek Goyal, Eric W. Biederman,
kexec@lists.infradead.org, Haren Myneni, the arch/x86 maintainers,
LKML, Daniel Hecht, jeremy@xensource.com
On Thu, 2010-10-21 at 13:26 -0700, H. Peter Anvin wrote:
> On 10/21/2010 12:09 PM, Alok Kataria wrote:
> >
> > I don't think this patch was picked up for tip, now that the 2.6.37
> > merge window is open can you please pick this up push it upstream.
> > This patch fixes a legitimate regression, which was introduced during
> > 2.6.30, by commit id 4ef702c10b5df18ab04921fc252c26421d4d6c75.
> >
>
> It probably would have helped if the patch had had a proper patch header
> and so on, and *in particular* not buried in a tree with [RFC PATCH].
> RFC strongly implies that the patch is intended as a base for
> discussion, and is explicitly not intended to be committed.
I see, I have sent another mail with the patch.
Thanks,
Alok
>
> -hpa
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC PATCH] Bug during kexec...not all cpus are stopped
2010-10-21 21:10 ` Alok Kataria
@ 2010-10-21 21:24 ` H. Peter Anvin
0 siblings, 0 replies; 16+ messages in thread
From: H. Peter Anvin @ 2010-10-21 21:24 UTC (permalink / raw)
To: akataria
Cc: Ingo Molnar, Vivek Goyal, Eric W. Biederman,
kexec@lists.infradead.org, Haren Myneni, the arch/x86 maintainers,
LKML, Daniel Hecht, jeremy@xensource.com
On 10/21/2010 02:10 PM, Alok Kataria wrote:
>
> On Thu, 2010-10-21 at 13:26 -0700, H. Peter Anvin wrote:
>> On 10/21/2010 12:09 PM, Alok Kataria wrote:
>>>
>>> I don't think this patch was picked up for tip, now that the 2.6.37
>>> merge window is open can you please pick this up push it upstream.
>>> This patch fixes a legitimate regression, which was introduced during
>>> 2.6.30, by commit id 4ef702c10b5df18ab04921fc252c26421d4d6c75.
>>>
>>
>> It probably would have helped if the patch had had a proper patch header
>> and so on, and *in particular* not buried in a tree with [RFC PATCH].
>> RFC strongly implies that the patch is intended as a base for
>> discussion, and is explicitly not intended to be committed.
>
> I see, I have sent another mail with the patch.
>
I already took the patch and am running it through compile tests, but
that was mostly for future reference.
-hpa
^ permalink raw reply [flat|nested] 16+ messages in thread
* [tip:x86/urgent] x86, kexec: Make sure to stop all CPUs before exiting the kernel
2010-10-11 21:37 ` Alok Kataria
@ 2010-10-21 21:40 ` tip-bot for Alok Kataria
0 siblings, 0 replies; 16+ messages in thread
From: tip-bot for Alok Kataria @ 2010-10-21 21:40 UTC (permalink / raw)
To: linux-tip-commits
Cc: jeremy, linux-kernel, hpa, mingo, akataria, tglx, hpa, ebiederm
Commit-ID: 76fac077db6b34e2c6383a7b4f3f4f7b7d06d8ce
Gitweb: http://git.kernel.org/tip/76fac077db6b34e2c6383a7b4f3f4f7b7d06d8ce
Author: Alok Kataria <akataria@vmware.com>
AuthorDate: Mon, 11 Oct 2010 14:37:08 -0700
Committer: H. Peter Anvin <hpa@linux.intel.com>
CommitDate: Thu, 21 Oct 2010 13:30:44 -0700
x86, kexec: Make sure to stop all CPUs before exiting the kernel
x86 smp_ops now has a new op, stop_other_cpus which takes a parameter
"wait" this allows the caller to specify if it wants to stop until all
the cpus have processed the stop IPI. This is required specifically
for the kexec case where we should wait for all the cpus to be stopped
before starting the new kernel. We now wait for the cpus to stop in
all cases except for panic/kdump where we expect things to be broken
and we are doing our best to make things work anyway.
This patch fixes a legitimate regression, which was introduced during
2.6.30, by commit id 4ef702c10b5df18ab04921fc252c26421d4d6c75.
Signed-off-by: Alok N Kataria <akataria@vmware.com>
LKML-Reference: <1286833028.1372.20.camel@ank32.eng.vmware.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: <stable@kernel.org> v2.6.30-36
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
arch/x86/include/asm/smp.h | 9 +++++++--
arch/x86/kernel/reboot.c | 2 +-
arch/x86/kernel/smp.c | 15 +++++++++------
arch/x86/xen/enlighten.c | 2 +-
arch/x86/xen/smp.c | 6 +++---
5 files changed, 21 insertions(+), 13 deletions(-)
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 4cfc908..4c2f63c 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -50,7 +50,7 @@ struct smp_ops {
void (*smp_prepare_cpus)(unsigned max_cpus);
void (*smp_cpus_done)(unsigned max_cpus);
- void (*smp_send_stop)(void);
+ void (*stop_other_cpus)(int wait);
void (*smp_send_reschedule)(int cpu);
int (*cpu_up)(unsigned cpu);
@@ -73,7 +73,12 @@ extern struct smp_ops smp_ops;
static inline void smp_send_stop(void)
{
- smp_ops.smp_send_stop();
+ smp_ops.stop_other_cpus(0);
+}
+
+static inline void stop_other_cpus(void)
+{
+ smp_ops.stop_other_cpus(1);
}
static inline void smp_prepare_boot_cpu(void)
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index e3af342..76a0d71 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -641,7 +641,7 @@ void native_machine_shutdown(void)
/* O.K Now that I'm on the appropriate processor,
* stop all of the others.
*/
- smp_send_stop();
+ stop_other_cpus();
#endif
lapic_shutdown();
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index d801210..513deac 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -159,10 +159,10 @@ asmlinkage void smp_reboot_interrupt(void)
irq_exit();
}
-static void native_smp_send_stop(void)
+static void native_stop_other_cpus(int wait)
{
unsigned long flags;
- unsigned long wait;
+ unsigned long timeout;
if (reboot_force)
return;
@@ -179,9 +179,12 @@ static void native_smp_send_stop(void)
if (num_online_cpus() > 1) {
apic->send_IPI_allbutself(REBOOT_VECTOR);
- /* Don't wait longer than a second */
- wait = USEC_PER_SEC;
- while (num_online_cpus() > 1 && wait--)
+ /*
+ * Don't wait longer than a second if the caller
+ * didn't ask us to wait.
+ */
+ timeout = USEC_PER_SEC;
+ while (num_online_cpus() > 1 && (wait || timeout--))
udelay(1);
}
@@ -227,7 +230,7 @@ struct smp_ops smp_ops = {
.smp_prepare_cpus = native_smp_prepare_cpus,
.smp_cpus_done = native_smp_cpus_done,
- .smp_send_stop = native_smp_send_stop,
+ .stop_other_cpus = native_stop_other_cpus,
.smp_send_reschedule = native_smp_send_reschedule,
.cpu_up = native_cpu_up,
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 7d46c84..44f8086 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1018,7 +1018,7 @@ static void xen_reboot(int reason)
struct sched_shutdown r = { .reason = reason };
#ifdef CONFIG_SMP
- smp_send_stop();
+ stop_other_cpus();
#endif
if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r))
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 25f232b..f4d0100 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -400,9 +400,9 @@ static void stop_self(void *v)
BUG();
}
-static void xen_smp_send_stop(void)
+static void xen_stop_other_cpus(int wait)
{
- smp_call_function(stop_self, NULL, 0);
+ smp_call_function(stop_self, NULL, wait);
}
static void xen_smp_send_reschedule(int cpu)
@@ -470,7 +470,7 @@ static const struct smp_ops xen_smp_ops __initdata = {
.cpu_disable = xen_cpu_disable,
.play_dead = xen_play_dead,
- .smp_send_stop = xen_smp_send_stop,
+ .stop_other_cpus = xen_stop_other_cpus,
.smp_send_reschedule = xen_smp_send_reschedule,
.send_call_func_ipi = xen_smp_send_call_function_ipi,
^ permalink raw reply related [flat|nested] 16+ messages in thread
end of thread, other threads:[~2010-10-21 21:40 UTC | newest]
Thread overview: 16+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-10-08 20:34 [RFC PATCH] Bug during kexec...not all cpus are stopped Alok Kataria
2010-10-11 17:09 ` Alok Kataria
2010-10-11 18:07 ` Eric W. Biederman
2010-10-11 19:41 ` Alok Kataria
2010-10-11 21:17 ` Eric W. Biederman
2010-10-11 21:37 ` Alok Kataria
2010-10-21 21:40 ` [tip:x86/urgent] x86, kexec: Make sure to stop all CPUs before exiting the kernel tip-bot for Alok Kataria
2010-10-11 21:39 ` [RFC PATCH] Bug during kexec...not all cpus are stopped Vivek Goyal
2010-10-11 21:47 ` Alok Kataria
2010-10-11 22:10 ` Eric W. Biederman
2010-10-12 22:17 ` Vivek Goyal
2010-10-13 0:23 ` Alok Kataria
2010-10-21 19:09 ` Alok Kataria
2010-10-21 20:26 ` H. Peter Anvin
2010-10-21 21:10 ` Alok Kataria
2010-10-21 21:24 ` H. Peter Anvin
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox