public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH -mm][resend] Disable CPU hotplug during suspend
@ 2006-07-28  8:15 Rafael J. Wysocki
  2006-07-28 18:20 ` Nathan Lynch
  2006-07-29  5:15 ` Andrew Morton
  0 siblings, 2 replies; 10+ messages in thread
From: Rafael J. Wysocki @ 2006-07-28  8:15 UTC (permalink / raw)
  To: Andrew Morton; +Cc: LKML, Pavel Machek

The current suspend code has to be run on one CPU, so we use the CPU
hotplug to take the non-boot CPUs offline on SMP machines.  However, we
should also make sure that these CPUs will not be enabled by someone
else after we have disabled them.

The functions disable_nonboot_cpus() and enable_nonboot_cpus() are
moved to kernel/cpu.c, because they now refer to some stuff in there that
should better be static.  Also it's better if disable_nonboot_cpus() returns
an error instead of panicking if something goes wrong, and
enable_nonboot_cpus() has no reason to panic(), because the CPUs may have
been enabled by the userland before it tries to take them online.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@ucw.cz>
---
 include/linux/cpu.h     |    8 +++
 include/linux/suspend.h |    8 ---
 kernel/cpu.c            |  104 ++++++++++++++++++++++++++++++++++++++++++------
 kernel/power/Makefile   |    2 
 kernel/power/disk.c     |    7 ++-
 kernel/power/main.c     |   10 +---
 kernel/power/smp.c      |   62 ----------------------------
 kernel/power/user.c     |   14 ++++--
 8 files changed, 118 insertions(+), 97 deletions(-)

Index: linux-2.6.18-rc2-mm1/include/linux/suspend.h
===================================================================
--- linux-2.6.18-rc2-mm1.orig/include/linux/suspend.h	2006-07-28 08:34:46.000000000 +0200
+++ linux-2.6.18-rc2-mm1/include/linux/suspend.h	2006-07-28 08:37:11.000000000 +0200
@@ -57,14 +57,6 @@ static inline int software_suspend(void)
 }
 #endif /* CONFIG_PM */
 
-#ifdef CONFIG_SUSPEND_SMP
-extern void disable_nonboot_cpus(void);
-extern void enable_nonboot_cpus(void);
-#else
-static inline void disable_nonboot_cpus(void) {}
-static inline void enable_nonboot_cpus(void) {}
-#endif
-
 void save_processor_state(void);
 void restore_processor_state(void);
 struct saved_context;
Index: linux-2.6.18-rc2-mm1/kernel/power/smp.c
===================================================================
--- linux-2.6.18-rc2-mm1.orig/kernel/power/smp.c	2006-06-18 03:49:35.000000000 +0200
+++ /dev/null	1970-01-01 00:00:00.000000000 +0000
@@ -1,62 +0,0 @@
-/*
- * drivers/power/smp.c - Functions for stopping other CPUs.
- *
- * Copyright 2004 Pavel Machek <pavel@suse.cz>
- * Copyright (C) 2002-2003 Nigel Cunningham <ncunningham@clear.net.nz>
- *
- * This file is released under the GPLv2.
- */
-
-#undef DEBUG
-
-#include <linux/smp_lock.h>
-#include <linux/interrupt.h>
-#include <linux/suspend.h>
-#include <linux/module.h>
-#include <linux/cpu.h>
-#include <asm/atomic.h>
-#include <asm/tlbflush.h>
-
-/* This is protected by pm_sem semaphore */
-static cpumask_t frozen_cpus;
-
-void disable_nonboot_cpus(void)
-{
-	int cpu, error;
-
-	error = 0;
-	cpus_clear(frozen_cpus);
-	printk("Freezing cpus ...\n");
-	for_each_online_cpu(cpu) {
-		if (cpu == 0)
-			continue;
-		error = cpu_down(cpu);
-		if (!error) {
-			cpu_set(cpu, frozen_cpus);
-			printk("CPU%d is down\n", cpu);
-			continue;
-		}
-		printk("Error taking cpu %d down: %d\n", cpu, error);
-	}
-	BUG_ON(raw_smp_processor_id() != 0);
-	if (error)
-		panic("cpus not sleeping");
-}
-
-void enable_nonboot_cpus(void)
-{
-	int cpu, error;
-
-	printk("Thawing cpus ...\n");
-	for_each_cpu_mask(cpu, frozen_cpus) {
-		error = cpu_up(cpu);
-		if (!error) {
-			printk("CPU%d is up\n", cpu);
-			continue;
-		}
-		printk("Error taking cpu %d up: %d\n", cpu, error);
-		panic("Not enough cpus");
-	}
-	cpus_clear(frozen_cpus);
-}
-
Index: linux-2.6.18-rc2-mm1/kernel/power/disk.c
===================================================================
--- linux-2.6.18-rc2-mm1.orig/kernel/power/disk.c	2006-07-28 08:36:24.000000000 +0200
+++ linux-2.6.18-rc2-mm1/kernel/power/disk.c	2006-07-28 10:17:02.000000000 +0200
@@ -18,6 +18,7 @@
 #include <linux/fs.h>
 #include <linux/mount.h>
 #include <linux/pm.h>
+#include <linux/cpu.h>
 
 #include "power.h"
 
@@ -72,7 +73,10 @@ static int prepare_processes(void)
 	int error;
 
 	pm_prepare_console();
-	disable_nonboot_cpus();
+
+	error = disable_nonboot_cpus();
+	if (error)
+		goto enable_cpus;
 
 	if (freeze_processes()) {
 		error = -EBUSY;
@@ -84,6 +88,7 @@ static int prepare_processes(void)
 		return 0;
 thaw:
 	thaw_processes();
+enable_cpus:
 	enable_nonboot_cpus();
 	pm_restore_console();
 	return error;
Index: linux-2.6.18-rc2-mm1/kernel/power/main.c
===================================================================
--- linux-2.6.18-rc2-mm1.orig/kernel/power/main.c	2006-07-28 08:36:24.000000000 +0200
+++ linux-2.6.18-rc2-mm1/kernel/power/main.c	2006-07-28 08:37:11.000000000 +0200
@@ -16,6 +16,7 @@
 #include <linux/init.h>
 #include <linux/pm.h>
 #include <linux/console.h>
+#include <linux/cpu.h>
 
 #include "power.h"
 
@@ -51,7 +52,7 @@ void pm_set_ops(struct pm_ops * ops)
 
 static int suspend_prepare(suspend_state_t state)
 {
-	int error = 0;
+	int error;
 	unsigned int free_pages;
 
 	if (!pm_ops || !pm_ops->enter)
@@ -63,12 +64,9 @@ static int suspend_prepare(suspend_state
 
 	pm_prepare_console();
 
-	disable_nonboot_cpus();
-
-	if (num_online_cpus() != 1) {
-		error = -EPERM;
+	error = disable_nonboot_cpus();
+	if (error)
 		goto Enable_cpu;
-	}
 
 	if (freeze_processes()) {
 		error = -EAGAIN;
Index: linux-2.6.18-rc2-mm1/kernel/power/user.c
===================================================================
--- linux-2.6.18-rc2-mm1.orig/kernel/power/user.c	2006-07-28 08:36:24.000000000 +0200
+++ linux-2.6.18-rc2-mm1/kernel/power/user.c	2006-07-28 08:37:11.000000000 +0200
@@ -19,6 +19,7 @@
 #include <linux/swapops.h>
 #include <linux/pm.h>
 #include <linux/fs.h>
+#include <linux/cpu.h>
 
 #include <asm/uaccess.h>
 
@@ -139,12 +140,15 @@ static int snapshot_ioctl(struct inode *
 		if (data->frozen)
 			break;
 		down(&pm_sem);
-		disable_nonboot_cpus();
-		if (freeze_processes()) {
-			thaw_processes();
-			enable_nonboot_cpus();
-			error = -EBUSY;
+		error = disable_nonboot_cpus();
+		if (!error) {
+			error = freeze_processes();
+			if (error) {
+				thaw_processes();
+				error = -EBUSY;
+			}
 		}
+		enable_nonboot_cpus();
 		up(&pm_sem);
 		if (!error)
 			data->frozen = 1;
Index: linux-2.6.18-rc2-mm1/include/linux/cpu.h
===================================================================
--- linux-2.6.18-rc2-mm1.orig/include/linux/cpu.h	2006-07-28 08:36:23.000000000 +0200
+++ linux-2.6.18-rc2-mm1/include/linux/cpu.h	2006-07-28 08:37:11.000000000 +0200
@@ -89,4 +89,12 @@ int cpu_down(unsigned int cpu);
 static inline int cpu_is_offline(int cpu) { return 0; }
 #endif
 
+#ifdef CONFIG_SUSPEND_SMP
+extern int disable_nonboot_cpus(void);
+extern void enable_nonboot_cpus(void);
+#else
+static inline int disable_nonboot_cpus(void) { return 0; }
+static inline void enable_nonboot_cpus(void) {}
+#endif
+
 #endif /* _LINUX_CPU_H_ */
Index: linux-2.6.18-rc2-mm1/kernel/cpu.c
===================================================================
--- linux-2.6.18-rc2-mm1.orig/kernel/cpu.c	2006-07-28 08:36:24.000000000 +0200
+++ linux-2.6.18-rc2-mm1/kernel/cpu.c	2006-07-28 08:51:17.000000000 +0200
@@ -21,6 +21,11 @@ static DEFINE_MUTEX(cpu_bitmask_lock);
 
 static __cpuinitdata BLOCKING_NOTIFIER_HEAD(cpu_chain);
 
+/* If set, cpu_up and cpu_down will return -EPERM and do nothing.
+ * Should always be manipulated under cpu_add_remove_lock
+ */
+static int cpu_hotplug_disabled;
+
 #ifdef CONFIG_HOTPLUG_CPU
 
 /* Crappy recursive lock-takers in cpufreq! Complain loudly about idiots */
@@ -108,30 +113,25 @@ static int take_cpu_down(void *unused)
 	return 0;
 }
 
-int cpu_down(unsigned int cpu)
+/* Requires cpu_add_remove_lock to be held */
+static int __cpu_down(unsigned int cpu)
 {
 	int err;
 	struct task_struct *p;
 	cpumask_t old_allowed, tmp;
 
-	mutex_lock(&cpu_add_remove_lock);
-	if (num_online_cpus() == 1) {
-		err = -EBUSY;
-		goto out;
-	}
+	if (num_online_cpus() == 1)
+		return -EBUSY;
 
-	if (!cpu_online(cpu)) {
-		err = -EINVAL;
-		goto out;
-	}
+	if (!cpu_online(cpu))
+		return -EINVAL;
 
 	err = blocking_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE,
 						(void *)(long)cpu);
 	if (err == NOTIFY_BAD) {
 		printk("%s: attempt to take down CPU %u failed\n",
 				__FUNCTION__, cpu);
-		err = -EINVAL;
-		goto out;
+		return -EINVAL;
 	}
 
 	/* Ensure that we are not runnable on dying cpu */
@@ -179,7 +179,19 @@ out_thread:
 	err = kthread_stop(p);
 out_allowed:
 	set_cpus_allowed(current, old_allowed);
-out:
+	return err;
+}
+
+int cpu_down(unsigned int cpu)
+{
+	int err = 0;
+
+	mutex_lock(&cpu_add_remove_lock);
+	if (cpu_hotplug_disabled)
+		err = -EPERM;
+	else
+		err = __cpu_down(cpu);
+
 	mutex_unlock(&cpu_add_remove_lock);
 	return err;
 }
@@ -191,6 +203,11 @@ int __devinit cpu_up(unsigned int cpu)
 	void *hcpu = (void *)(long)cpu;
 
 	mutex_lock(&cpu_add_remove_lock);
+	if (cpu_hotplug_disabled) {
+		ret = -EPERM;
+		goto out;
+	}
+
 	if (cpu_online(cpu) || !cpu_present(cpu)) {
 		ret = -EINVAL;
 		goto out;
@@ -223,3 +240,64 @@ out:
 	mutex_unlock(&cpu_add_remove_lock);
 	return ret;
 }
+
+#ifdef CONFIG_SUSPEND_SMP
+static cpumask_t frozen_cpus;
+
+int disable_nonboot_cpus(void)
+{
+	int cpu, error = 0;
+
+	/* We take all of the non-boot CPUs down in one shot to avoid races
+	 * with the userspace trying to use the CPU hotplug at the same time
+	 */
+	mutex_lock(&cpu_add_remove_lock);
+	cpus_clear(frozen_cpus);
+	printk("Disabling non-boot CPUs ...\n");
+	for_each_online_cpu(cpu) {
+		if (cpu == 0)
+			continue;
+		error = __cpu_down(cpu);
+		if (!error) {
+			cpu_set(cpu, frozen_cpus);
+			printk("CPU%d is down\n", cpu);
+		} else {
+			printk(KERN_ERR "Error taking CPU%d down: %d\n",
+				cpu, error);
+			break;
+		}
+	}
+	if (!error) {
+		BUG_ON(num_online_cpus() > 1);
+		BUG_ON(raw_smp_processor_id() != 0);
+		/* Make sure the CPUs won't be enabled by someone else */
+		cpu_hotplug_disabled = 1;
+	} else {
+		printk(KERN_ERR "Non-boot CPUs are not disabled");
+	}
+	mutex_unlock(&cpu_add_remove_lock);
+	return error;
+}
+
+void enable_nonboot_cpus(void)
+{
+	int cpu, error;
+
+	/* Allow everyone to use the CPU hotplug again */
+	mutex_lock(&cpu_add_remove_lock);
+	cpu_hotplug_disabled = 0;
+	mutex_unlock(&cpu_add_remove_lock);
+
+	printk("Enabling non-boot CPUs ...\n");
+	for_each_cpu_mask(cpu, frozen_cpus) {
+		error = cpu_up(cpu);
+		if (!error) {
+			printk("CPU%d is up\n", cpu);
+			continue;
+		}
+		printk(KERN_WARNING "Error taking CPU%d up: %d\n",
+			cpu, error);
+	}
+	cpus_clear(frozen_cpus);
+}
+#endif
Index: linux-2.6.18-rc2-mm1/kernel/power/Makefile
===================================================================
--- linux-2.6.18-rc2-mm1.orig/kernel/power/Makefile	2006-06-18 03:49:35.000000000 +0200
+++ linux-2.6.18-rc2-mm1/kernel/power/Makefile	2006-07-28 08:37:11.000000000 +0200
@@ -7,6 +7,4 @@ obj-y				:= main.o process.o console.o
 obj-$(CONFIG_PM_LEGACY)		+= pm.o
 obj-$(CONFIG_SOFTWARE_SUSPEND)	+= swsusp.o disk.o snapshot.o swap.o user.o
 
-obj-$(CONFIG_SUSPEND_SMP)	+= smp.o
-
 obj-$(CONFIG_MAGIC_SYSRQ)	+= poweroff.o

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH -mm][resend] Disable CPU hotplug during suspend
  2006-07-28  8:15 [PATCH -mm][resend] Disable CPU hotplug during suspend Rafael J. Wysocki
@ 2006-07-28 18:20 ` Nathan Lynch
  2006-07-28 19:15   ` Rafael J. Wysocki
  2006-07-29  5:15 ` Andrew Morton
  1 sibling, 1 reply; 10+ messages in thread
From: Nathan Lynch @ 2006-07-28 18:20 UTC (permalink / raw)
  To: Rafael J. Wysocki; +Cc: Andrew Morton, LKML, Pavel Machek

Hi Rafael-

A couple of minor comments:


> +int cpu_down(unsigned int cpu)
> +{
> +	int err = 0;
> +
> +	mutex_lock(&cpu_add_remove_lock);
> +	if (cpu_hotplug_disabled)
> +		err = -EPERM;
> +	else
> +		err = __cpu_down(cpu);
> +
>  	mutex_unlock(&cpu_add_remove_lock);
>  	return err;
>  }
> @@ -191,6 +203,11 @@ int __devinit cpu_up(unsigned int cpu)
>  	void *hcpu = (void *)(long)cpu;
>  
>  	mutex_lock(&cpu_add_remove_lock);
> +	if (cpu_hotplug_disabled) {
> +		ret = -EPERM;
> +		goto out;
> +	}

I think -EBUSY would be more appropriate than -EPERM, perhaps?


> +#ifdef CONFIG_SUSPEND_SMP
> +static cpumask_t frozen_cpus;
> +
> +int disable_nonboot_cpus(void)
> +{
> +	int cpu, error = 0;
> +
> +	/* We take all of the non-boot CPUs down in one shot to avoid races
> +	 * with the userspace trying to use the CPU hotplug at the same time
> +	 */
> +	mutex_lock(&cpu_add_remove_lock);
> +	cpus_clear(frozen_cpus);
> +	printk("Disabling non-boot CPUs ...\n");
> +	for_each_online_cpu(cpu) {
> +		if (cpu == 0)
> +			continue;

Assuming cpu 0 is online is not okay in generic code.  This should be
something like:

	int cpu, first_cpu, error = 0;

	/* We take all of the non-boot CPUs down in one shot to avoid races
	 * with the userspace trying to use the CPU hotplug at the same time
	 */
	mutex_lock(&cpu_add_remove_lock);
	cpus_clear(frozen_cpus);
	first_cpu = first_cpu(cpu_online_mask);
	printk("Disabling non-boot CPUs ...\n");
	for_each_online_cpu(cpu) {
		if (cpu == first_cpu)
			continue;


> +		error = __cpu_down(cpu);
> +		if (!error) {
> +			cpu_set(cpu, frozen_cpus);
> +			printk("CPU%d is down\n", cpu);
> +		} else {
> +			printk(KERN_ERR "Error taking CPU%d down: %d\n",
> +				cpu, error);
> +			break;
> +		}
> +	}
> +	if (!error) {
> +		BUG_ON(num_online_cpus() > 1);
> +		BUG_ON(raw_smp_processor_id() != 0);

Same problem here.

Otherwise, I think the patch looks okay.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH -mm][resend] Disable CPU hotplug during suspend
  2006-07-28 18:20 ` Nathan Lynch
@ 2006-07-28 19:15   ` Rafael J. Wysocki
  2006-07-28 21:23     ` Rafael J. Wysocki
  2006-07-28 22:40     ` Nathan Lynch
  0 siblings, 2 replies; 10+ messages in thread
From: Rafael J. Wysocki @ 2006-07-28 19:15 UTC (permalink / raw)
  To: Nathan Lynch; +Cc: Andrew Morton, LKML, Pavel Machek

Hi Nathan,

On Friday 28 July 2006 20:20, Nathan Lynch wrote:
> Hi Rafael-
> 
> A couple of minor comments:
> 
> 
> > +int cpu_down(unsigned int cpu)
> > +{
> > +	int err = 0;
> > +
> > +	mutex_lock(&cpu_add_remove_lock);
> > +	if (cpu_hotplug_disabled)
> > +		err = -EPERM;
> > +	else
> > +		err = __cpu_down(cpu);
> > +
> >  	mutex_unlock(&cpu_add_remove_lock);
> >  	return err;
> >  }
> > @@ -191,6 +203,11 @@ int __devinit cpu_up(unsigned int cpu)
> >  	void *hcpu = (void *)(long)cpu;
> >  
> >  	mutex_lock(&cpu_add_remove_lock);
> > +	if (cpu_hotplug_disabled) {
> > +		ret = -EPERM;
> > +		goto out;
> > +	}
> 
> I think -EBUSY would be more appropriate than -EPERM, perhaps?

Sure, why not.
 
> > +#ifdef CONFIG_SUSPEND_SMP
> > +static cpumask_t frozen_cpus;
> > +
> > +int disable_nonboot_cpus(void)
> > +{
> > +	int cpu, error = 0;
> > +
> > +	/* We take all of the non-boot CPUs down in one shot to avoid races
> > +	 * with the userspace trying to use the CPU hotplug at the same time
> > +	 */
> > +	mutex_lock(&cpu_add_remove_lock);
> > +	cpus_clear(frozen_cpus);
> > +	printk("Disabling non-boot CPUs ...\n");
> > +	for_each_online_cpu(cpu) {
> > +		if (cpu == 0)
> > +			continue;
> 
> Assuming cpu 0 is online is not okay in generic code.

Absolutely.  Thanks for pointing this out.

> This should be something like:
> 
> 	int cpu, first_cpu, error = 0;
> 
> 	/* We take all of the non-boot CPUs down in one shot to avoid races
> 	 * with the userspace trying to use the CPU hotplug at the same time
> 	 */
> 	mutex_lock(&cpu_add_remove_lock);
> 	cpus_clear(frozen_cpus);
> 	first_cpu = first_cpu(cpu_online_mask);
> 	printk("Disabling non-boot CPUs ...\n");
> 	for_each_online_cpu(cpu) {
> 		if (cpu == first_cpu)
> 			continue;

I'm not quite sure if we can finish with CPU0 offline.  Perhaps it's better to
check if CPU0 is online and bring it up if not and then continue or return
an error if that fails?

Rafael

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH -mm][resend] Disable CPU hotplug during suspend
  2006-07-28 19:15   ` Rafael J. Wysocki
@ 2006-07-28 21:23     ` Rafael J. Wysocki
  2006-07-28 22:40     ` Nathan Lynch
  1 sibling, 0 replies; 10+ messages in thread
From: Rafael J. Wysocki @ 2006-07-28 21:23 UTC (permalink / raw)
  To: Nathan Lynch; +Cc: Andrew Morton, LKML, Pavel Machek

On Friday 28 July 2006 21:15, Rafael J. Wysocki wrote:
> Hi Nathan,
> 
> On Friday 28 July 2006 20:20, Nathan Lynch wrote:
> > Hi Rafael-
> > 
]--snip--[
> I'm not quite sure if we can finish with CPU0 offline.  Perhaps it's better to
> check if CPU0 is online and bring it up if not and then continue or return
> an error if that fails?

I've tried to implement this idea in the appended patch (I'm omitting the
changes outside of kernel/cpu.c as they are not relevant here).

Please have a look.

Rafael


--- linux-2.6.18-rc2-mm1.orig/kernel/cpu.c	2006-07-28 21:03:21.000000000 +0200
+++ linux-2.6.18-rc2-mm1/kernel/cpu.c	2006-07-28 22:59:03.000000000 +0200
@@ -21,6 +21,11 @@ static DEFINE_MUTEX(cpu_bitmask_lock);
 
 static __cpuinitdata BLOCKING_NOTIFIER_HEAD(cpu_chain);
 
+/* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
+ * Should always be manipulated under cpu_add_remove_lock
+ */
+static int cpu_hotplug_disabled;
+
 #ifdef CONFIG_HOTPLUG_CPU
 
 /* Crappy recursive lock-takers in cpufreq! Complain loudly about idiots */
@@ -108,30 +113,25 @@ static int take_cpu_down(void *unused)
 	return 0;
 }
 
-int cpu_down(unsigned int cpu)
+/* Requires cpu_add_remove_lock to be held */
+static int _cpu_down(unsigned int cpu)
 {
 	int err;
 	struct task_struct *p;
 	cpumask_t old_allowed, tmp;
 
-	mutex_lock(&cpu_add_remove_lock);
-	if (num_online_cpus() == 1) {
-		err = -EBUSY;
-		goto out;
-	}
+	if (num_online_cpus() == 1)
+		return -EBUSY;
 
-	if (!cpu_online(cpu)) {
-		err = -EINVAL;
-		goto out;
-	}
+	if (!cpu_online(cpu))
+		return -EINVAL;
 
 	err = blocking_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE,
 						(void *)(long)cpu);
 	if (err == NOTIFY_BAD) {
 		printk("%s: attempt to take down CPU %u failed\n",
 				__FUNCTION__, cpu);
-		err = -EINVAL;
-		goto out;
+		return -EINVAL;
 	}
 
 	/* Ensure that we are not runnable on dying cpu */
@@ -179,22 +179,31 @@ out_thread:
 	err = kthread_stop(p);
 out_allowed:
 	set_cpus_allowed(current, old_allowed);
-out:
+	return err;
+}
+
+int cpu_down(unsigned int cpu)
+{
+	int err = 0;
+
+	mutex_lock(&cpu_add_remove_lock);
+	if (cpu_hotplug_disabled)
+		err = -EBUSY;
+	else
+		err = _cpu_down(cpu);
+
 	mutex_unlock(&cpu_add_remove_lock);
 	return err;
 }
 #endif /*CONFIG_HOTPLUG_CPU*/
 
-int __devinit cpu_up(unsigned int cpu)
+static int __devinit _cpu_up(unsigned int cpu)
 {
 	int ret;
 	void *hcpu = (void *)(long)cpu;
 
-	mutex_lock(&cpu_add_remove_lock);
-	if (cpu_online(cpu) || !cpu_present(cpu)) {
-		ret = -EINVAL;
-		goto out;
-	}
+	if (cpu_online(cpu) || !cpu_present(cpu))
+		return -EINVAL;
 
 	ret = blocking_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE, hcpu);
 	if (ret == NOTIFY_BAD) {
@@ -219,7 +228,95 @@ out_notify:
 	if (ret != 0)
 		blocking_notifier_call_chain(&cpu_chain,
 				CPU_UP_CANCELED, hcpu);
+
+	return ret;
+}
+
+int __devinit cpu_up(unsigned int cpu)
+{
+	int err = 0;
+
+	mutex_lock(&cpu_add_remove_lock);
+	if (cpu_hotplug_disabled)
+		err = -EBUSY;
+	else
+		err = _cpu_up(cpu);
+
+	mutex_unlock(&cpu_add_remove_lock);
+	return err;
+}
+
+#ifdef CONFIG_SUSPEND_SMP
+static cpumask_t frozen_cpus;
+
+int disable_nonboot_cpus(void)
+{
+	int cpu, first_cpu, error;
+
+	mutex_lock(&cpu_add_remove_lock);
+	first_cpu = first_cpu(cpu_present_map);
+	if (!cpu_online(first_cpu)) {
+		error = _cpu_up(first_cpu);
+		if (error) {
+			printk(KERN_ERR "Could not bring CPU%d up.\n",
+				first_cpu);
+			goto out;
+		}
+	}
+	error = set_cpus_allowed(current, cpumask_of_cpu(first_cpu));
+	if (error) {
+		printk(KERN_ERR "Could not run on CPU%d\n", first_cpu);
+		goto out;
+	}
+	/* We take down all of the non-boot CPUs in one shot to avoid races
+	 * with the userspace trying to use the CPU hotplug at the same time
+	 */
+	cpus_clear(frozen_cpus);
+	printk("Disabling non-boot CPUs ...\n");
+	for_each_online_cpu(cpu) {
+		if (cpu == first_cpu)
+			continue;
+		error = _cpu_down(cpu);
+		if (!error) {
+			cpu_set(cpu, frozen_cpus);
+			printk("CPU%d is down\n", cpu);
+		} else {
+			printk(KERN_ERR "Error taking CPU%d down: %d\n",
+				cpu, error);
+			break;
+		}
+	}
+	if (!error) {
+		BUG_ON(num_online_cpus() > 1);
+		/* Make sure the CPUs won't be enabled by someone else */
+		cpu_hotplug_disabled = 1;
+	} else {
+		printk(KERN_ERR "Non-boot CPUs are not disabled");
+	}
 out:
 	mutex_unlock(&cpu_add_remove_lock);
-	return ret;
+	return error;
+}
+
+void enable_nonboot_cpus(void)
+{
+	int cpu, error;
+
+	/* Allow everyone to use the CPU hotplug again */
+	mutex_lock(&cpu_add_remove_lock);
+	cpu_hotplug_disabled = 0;
+	mutex_unlock(&cpu_add_remove_lock);
+
+	printk("Enabling non-boot CPUs ...\n");
+	for_each_cpu_mask(cpu, frozen_cpus) {
+		error = cpu_up(cpu);
+		if (!error) {
+			printk("CPU%d is up\n", cpu);
+			continue;
+		}
+		printk(KERN_WARNING "Error taking CPU%d up: %d\n",
+			cpu, error);
+	}
+	cpus_clear(frozen_cpus);
 }
+#endif

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH -mm][resend] Disable CPU hotplug during suspend
  2006-07-28 19:15   ` Rafael J. Wysocki
  2006-07-28 21:23     ` Rafael J. Wysocki
@ 2006-07-28 22:40     ` Nathan Lynch
  2006-07-29 12:18       ` Rafael J. Wysocki
  1 sibling, 1 reply; 10+ messages in thread
From: Nathan Lynch @ 2006-07-28 22:40 UTC (permalink / raw)
  To: Rafael J. Wysocki; +Cc: Andrew Morton, LKML, Pavel Machek

Rafael J. Wysocki wrote:
> On Friday 28 July 2006 20:20, Nathan Lynch wrote:
> > > +#ifdef CONFIG_SUSPEND_SMP
> > > +static cpumask_t frozen_cpus;
> > > +
> > > +int disable_nonboot_cpus(void)
> > > +{
> > > +	int cpu, error = 0;
> > > +
> > > +	/* We take all of the non-boot CPUs down in one shot to avoid races
> > > +	 * with the userspace trying to use the CPU hotplug at the same time
> > > +	 */
> > > +	mutex_lock(&cpu_add_remove_lock);
> > > +	cpus_clear(frozen_cpus);
> > > +	printk("Disabling non-boot CPUs ...\n");
> > > +	for_each_online_cpu(cpu) {
> > > +		if (cpu == 0)
> > > +			continue;
> > 
> > Assuming cpu 0 is online is not okay in generic code.
> 
> Absolutely.  Thanks for pointing this out.
> 
> > This should be something like:
> > 
> > 	int cpu, first_cpu, error = 0;
> > 
> > 	/* We take all of the non-boot CPUs down in one shot to avoid races
> > 	 * with the userspace trying to use the CPU hotplug at the same time
> > 	 */
> > 	mutex_lock(&cpu_add_remove_lock);
> > 	cpus_clear(frozen_cpus);
> > 	first_cpu = first_cpu(cpu_online_mask);
> > 	printk("Disabling non-boot CPUs ...\n");
> > 	for_each_online_cpu(cpu) {
> > 		if (cpu == first_cpu)
> > 			continue;
>
> 
> I'm not quite sure if we can finish with CPU0 offline.  Perhaps it's
> better to check if CPU0 is online and bring it up if not and then
> continue or return an error if that fails?

You can't assume that cpu 0 is even present in generic code. :-)

But maybe I'm misunderstanding the motivation for using cpu 0 here.  I
had assumed it was because on i386 (and others?) the BSP can't be
offlined.  Is there some other reason?


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH -mm][resend] Disable CPU hotplug during suspend
  2006-07-28  8:15 [PATCH -mm][resend] Disable CPU hotplug during suspend Rafael J. Wysocki
  2006-07-28 18:20 ` Nathan Lynch
@ 2006-07-29  5:15 ` Andrew Morton
  2006-07-29 12:27   ` Rafael J. Wysocki
  1 sibling, 1 reply; 10+ messages in thread
From: Andrew Morton @ 2006-07-29  5:15 UTC (permalink / raw)
  To: Rafael J. Wysocki; +Cc: linux-kernel, pavel

On Fri, 28 Jul 2006 10:15:29 +0200
"Rafael J. Wysocki" <rjw@sisk.pl> wrote:

> int disable_nonboot_cpus(void)
> +{
> +	int cpu, error = 0;
> +
> +	/* We take all of the non-boot CPUs down in one shot to avoid races
> +	 * with the userspace trying to use the CPU hotplug at the same time
> +	 */
> +	mutex_lock(&cpu_add_remove_lock);
> +	cpus_clear(frozen_cpus);
> +	printk("Disabling non-boot CPUs ...\n");
> +	for_each_online_cpu(cpu) {
> +		if (cpu == 0)
> +			continue;

This is presumably only called on cpu 0, yes?

How can we guarantee that, given that preemption is enabled?

What happens if cpu 0 isn't online?

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH -mm][resend] Disable CPU hotplug during suspend
  2006-07-28 22:40     ` Nathan Lynch
@ 2006-07-29 12:18       ` Rafael J. Wysocki
  2006-07-29 21:40         ` Nathan Lynch
  0 siblings, 1 reply; 10+ messages in thread
From: Rafael J. Wysocki @ 2006-07-29 12:18 UTC (permalink / raw)
  To: Nathan Lynch; +Cc: Andrew Morton, LKML, Pavel Machek

On Saturday 29 July 2006 00:40, Nathan Lynch wrote:
> Rafael J. Wysocki wrote:
> > On Friday 28 July 2006 20:20, Nathan Lynch wrote:
> > > > +#ifdef CONFIG_SUSPEND_SMP
> > > > +static cpumask_t frozen_cpus;
> > > > +
> > > > +int disable_nonboot_cpus(void)
> > > > +{
> > > > +	int cpu, error = 0;
> > > > +
> > > > +	/* We take all of the non-boot CPUs down in one shot to avoid races
> > > > +	 * with the userspace trying to use the CPU hotplug at the same time
> > > > +	 */
> > > > +	mutex_lock(&cpu_add_remove_lock);
> > > > +	cpus_clear(frozen_cpus);
> > > > +	printk("Disabling non-boot CPUs ...\n");
> > > > +	for_each_online_cpu(cpu) {
> > > > +		if (cpu == 0)
> > > > +			continue;
> > > 
> > > Assuming cpu 0 is online is not okay in generic code.
> > 
> > Absolutely.  Thanks for pointing this out.
> > 
> > > This should be something like:
> > > 
> > > 	int cpu, first_cpu, error = 0;
> > > 
> > > 	/* We take all of the non-boot CPUs down in one shot to avoid races
> > > 	 * with the userspace trying to use the CPU hotplug at the same time
> > > 	 */
> > > 	mutex_lock(&cpu_add_remove_lock);
> > > 	cpus_clear(frozen_cpus);
> > > 	first_cpu = first_cpu(cpu_online_mask);
> > > 	printk("Disabling non-boot CPUs ...\n");
> > > 	for_each_online_cpu(cpu) {
> > > 		if (cpu == first_cpu)
> > > 			continue;
> >
> > 
> > I'm not quite sure if we can finish with CPU0 offline.  Perhaps it's
> > better to check if CPU0 is online and bring it up if not and then
> > continue or return an error if that fails?
> 
> You can't assume that cpu 0 is even present in generic code. :-)

Yes.  I should have said "the first present CPU" instead of "CPU0".

> But maybe I'm misunderstanding the motivation for using cpu 0 here.  I
> had assumed it was because on i386 (and others?) the BSP can't be
> offlined.  Is there some other reason?

Yes.

First, the arch-dependent suspend code assumes implicitly that it will be
running on the BSP, so some strange things may happen if it doesn't.

Second, we have to make sure that this function will always leaves the
same CPU online.  It's a bit difficult to explain, but I'll do my best.
Suppose that disable_nonboot_cpus() exits running on CPU1, assuming it's
possible.  Then the system memory state saved in the suspend image will
reflect this situation.  Now the resume code will almost certainly run on the
BSP (say it's CPU0), but when the system memory is restored from the suspend
image the kernel will think it's running on CPU1.

In the last patch I send yesterday I made disable_nonboot_cpus() check if the
first present CPU, first_cpu(cpu_present_map), is online, try to bring it up
if not and migrate itself to it before the loop over all online CPUs is run.

I think that's general enough.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH -mm][resend] Disable CPU hotplug during suspend
  2006-07-29  5:15 ` Andrew Morton
@ 2006-07-29 12:27   ` Rafael J. Wysocki
  0 siblings, 0 replies; 10+ messages in thread
From: Rafael J. Wysocki @ 2006-07-29 12:27 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel, pavel, Nathan Lynch

On Saturday 29 July 2006 07:15, Andrew Morton wrote:
> On Fri, 28 Jul 2006 10:15:29 +0200
> "Rafael J. Wysocki" <rjw@sisk.pl> wrote:
> 
> > int disable_nonboot_cpus(void)
> > +{
> > +	int cpu, error = 0;
> > +
> > +	/* We take all of the non-boot CPUs down in one shot to avoid races
> > +	 * with the userspace trying to use the CPU hotplug at the same time
> > +	 */
> > +	mutex_lock(&cpu_add_remove_lock);
> > +	cpus_clear(frozen_cpus);
> > +	printk("Disabling non-boot CPUs ...\n");
> > +	for_each_online_cpu(cpu) {
> > +		if (cpu == 0)
> > +			continue;
> 
> This is presumably only called on cpu 0, yes?
> 
> How can we guarantee that, given that preemption is enabled?

If cpu 0 is online, it will end up running on it when all of the other cpus
are down.

> What happens if cpu 0 isn't online?

Fortunately, on x86_64 and i386 it cannot be offline (I'm not sure about ppc,
though), but of course in general we shouldn't assume that it's online or even
present here.

I'm discussing the issue with Nathan right now.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH -mm][resend] Disable CPU hotplug during suspend
  2006-07-29 12:18       ` Rafael J. Wysocki
@ 2006-07-29 21:40         ` Nathan Lynch
  2006-07-29 23:07           ` Rafael J. Wysocki
  0 siblings, 1 reply; 10+ messages in thread
From: Nathan Lynch @ 2006-07-29 21:40 UTC (permalink / raw)
  To: Rafael J. Wysocki; +Cc: Andrew Morton, LKML, Pavel Machek

Rafael J. Wysocki wrote:
> On Saturday 29 July 2006 00:40, Nathan Lynch wrote:
> 
> > But maybe I'm misunderstanding the motivation for using cpu 0 here.  I
> > had assumed it was because on i386 (and others?) the BSP can't be
> > offlined.  Is there some other reason?
> 
> Yes.
> 
> First, the arch-dependent suspend code assumes implicitly that it will be
> running on the BSP, so some strange things may happen if it doesn't.
> 
> Second, we have to make sure that this function will always leaves the
> same CPU online.  It's a bit difficult to explain, but I'll do my best.
> Suppose that disable_nonboot_cpus() exits running on CPU1, assuming it's
> possible.  Then the system memory state saved in the suspend image will
> reflect this situation.  Now the resume code will almost certainly run on the
> BSP (say it's CPU0), but when the system memory is restored from the suspend
> image the kernel will think it's running on CPU1.
> 
> In the last patch I send yesterday I made disable_nonboot_cpus() check if the
> first present CPU, first_cpu(cpu_present_map), is online, try to bring it up
> if not and migrate itself to it before the loop over all online CPUs is run.
> 
> I think that's general enough.

I see, thanks for the explanation.

It doesn't look like SMP swsusp would work reliably on platforms where
there's a possibility of the cpu maps in the resume and saved images
not matching (e.g. ppc64 logical partitions, where cpu 0 could be
removed before suspending).  But I guess that's largely a theoretical
concern at this time. ;)


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH -mm][resend] Disable CPU hotplug during suspend
  2006-07-29 21:40         ` Nathan Lynch
@ 2006-07-29 23:07           ` Rafael J. Wysocki
  0 siblings, 0 replies; 10+ messages in thread
From: Rafael J. Wysocki @ 2006-07-29 23:07 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Nathan Lynch, LKML, Pavel Machek

On Saturday 29 July 2006 23:40, Nathan Lynch wrote:
> Rafael J. Wysocki wrote:
> > On Saturday 29 July 2006 00:40, Nathan Lynch wrote:
> > 
> > > But maybe I'm misunderstanding the motivation for using cpu 0 here.  I
> > > had assumed it was because on i386 (and others?) the BSP can't be
> > > offlined.  Is there some other reason?
> > 
> > Yes.
> > 
> > First, the arch-dependent suspend code assumes implicitly that it will be
> > running on the BSP, so some strange things may happen if it doesn't.
> > 
> > Second, we have to make sure that this function will always leaves the
> > same CPU online.  It's a bit difficult to explain, but I'll do my best.
> > Suppose that disable_nonboot_cpus() exits running on CPU1, assuming it's
> > possible.  Then the system memory state saved in the suspend image will
> > reflect this situation.  Now the resume code will almost certainly run on the
> > BSP (say it's CPU0), but when the system memory is restored from the suspend
> > image the kernel will think it's running on CPU1.
> > 
> > In the last patch I send yesterday I made disable_nonboot_cpus() check if the
> > first present CPU, first_cpu(cpu_present_map), is online, try to bring it up
> > if not and migrate itself to it before the loop over all online CPUs is run.
> > 
> > I think that's general enough.
> 
> I see, thanks for the explanation.
> 
> It doesn't look like SMP swsusp would work reliably on platforms where
> there's a possibility of the cpu maps in the resume and saved images
> not matching (e.g. ppc64 logical partitions, where cpu 0 could be
> removed before suspending).  But I guess that's largely a theoretical
> concern at this time. ;)

I think so. :-)

Appended is the full patch with corrections resulting from our discussion
here.

Andrew, could you please replace disable-cpu-hotplug-during-suspend.patch
with this one?

---
The current suspend code has to be run on one CPU, so we use the CPU
hotplug to take the non-boot CPUs offline on SMP machines.  However, we
should also make sure that these CPUs will not be enabled by someone else
after we have disabled them.

The functions disable_nonboot_cpus() and enable_nonboot_cpus() are moved to
kernel/cpu.c, because they now refer to some stuff in there that should
better be static.  Also it's better if disable_nonboot_cpus() returns an
error instead of panicking if something goes wrong, and
enable_nonboot_cpus() has no reason to panic(), because the CPUs may have
been enabled by the userland before it tries to take them online.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@ucw.cz>
---
 include/linux/cpu.h     |    8 ++
 include/linux/suspend.h |    8 --
 kernel/cpu.c            |  138 +++++++++++++++++++++++++++++++++++++++++-------
 kernel/power/Makefile   |    2 
 kernel/power/disk.c     |    7 ++
 kernel/power/main.c     |   10 +--
 kernel/power/smp.c      |   62 ---------------------
 kernel/power/user.c     |   14 +++-
 8 files changed, 145 insertions(+), 104 deletions(-)

Index: linux-2.6.18-rc2-mm1/include/linux/suspend.h
===================================================================
--- linux-2.6.18-rc2-mm1.orig/include/linux/suspend.h	2006-07-28 21:03:21.000000000 +0200
+++ linux-2.6.18-rc2-mm1/include/linux/suspend.h	2006-07-28 21:03:25.000000000 +0200
@@ -57,14 +57,6 @@ static inline int software_suspend(void)
 }
 #endif /* CONFIG_PM */
 
-#ifdef CONFIG_SUSPEND_SMP
-extern void disable_nonboot_cpus(void);
-extern void enable_nonboot_cpus(void);
-#else
-static inline void disable_nonboot_cpus(void) {}
-static inline void enable_nonboot_cpus(void) {}
-#endif
-
 void save_processor_state(void);
 void restore_processor_state(void);
 struct saved_context;
Index: linux-2.6.18-rc2-mm1/kernel/power/smp.c
===================================================================
--- linux-2.6.18-rc2-mm1.orig/kernel/power/smp.c	2006-07-28 21:03:21.000000000 +0200
+++ /dev/null	1970-01-01 00:00:00.000000000 +0000
@@ -1,62 +0,0 @@
-/*
- * drivers/power/smp.c - Functions for stopping other CPUs.
- *
- * Copyright 2004 Pavel Machek <pavel@suse.cz>
- * Copyright (C) 2002-2003 Nigel Cunningham <ncunningham@clear.net.nz>
- *
- * This file is released under the GPLv2.
- */
-
-#undef DEBUG
-
-#include <linux/smp_lock.h>
-#include <linux/interrupt.h>
-#include <linux/suspend.h>
-#include <linux/module.h>
-#include <linux/cpu.h>
-#include <asm/atomic.h>
-#include <asm/tlbflush.h>
-
-/* This is protected by pm_sem semaphore */
-static cpumask_t frozen_cpus;
-
-void disable_nonboot_cpus(void)
-{
-	int cpu, error;
-
-	error = 0;
-	cpus_clear(frozen_cpus);
-	printk("Freezing cpus ...\n");
-	for_each_online_cpu(cpu) {
-		if (cpu == 0)
-			continue;
-		error = cpu_down(cpu);
-		if (!error) {
-			cpu_set(cpu, frozen_cpus);
-			printk("CPU%d is down\n", cpu);
-			continue;
-		}
-		printk("Error taking cpu %d down: %d\n", cpu, error);
-	}
-	BUG_ON(raw_smp_processor_id() != 0);
-	if (error)
-		panic("cpus not sleeping");
-}
-
-void enable_nonboot_cpus(void)
-{
-	int cpu, error;
-
-	printk("Thawing cpus ...\n");
-	for_each_cpu_mask(cpu, frozen_cpus) {
-		error = cpu_up(cpu);
-		if (!error) {
-			printk("CPU%d is up\n", cpu);
-			continue;
-		}
-		printk("Error taking cpu %d up: %d\n", cpu, error);
-		panic("Not enough cpus");
-	}
-	cpus_clear(frozen_cpus);
-}
-
Index: linux-2.6.18-rc2-mm1/kernel/power/disk.c
===================================================================
--- linux-2.6.18-rc2-mm1.orig/kernel/power/disk.c	2006-07-28 21:03:21.000000000 +0200
+++ linux-2.6.18-rc2-mm1/kernel/power/disk.c	2006-07-28 23:25:13.000000000 +0200
@@ -18,6 +18,7 @@
 #include <linux/fs.h>
 #include <linux/mount.h>
 #include <linux/pm.h>
+#include <linux/cpu.h>
 
 #include "power.h"
 
@@ -72,7 +73,10 @@ static int prepare_processes(void)
 	int error;
 
 	pm_prepare_console();
-	disable_nonboot_cpus();
+
+	error = disable_nonboot_cpus();
+	if (error)
+		goto enable_cpus;
 
 	if (freeze_processes()) {
 		error = -EBUSY;
@@ -84,6 +88,7 @@ static int prepare_processes(void)
 		return 0;
 thaw:
 	thaw_processes();
+enable_cpus:
 	enable_nonboot_cpus();
 	pm_restore_console();
 	return error;
Index: linux-2.6.18-rc2-mm1/kernel/power/main.c
===================================================================
--- linux-2.6.18-rc2-mm1.orig/kernel/power/main.c	2006-07-28 21:03:21.000000000 +0200
+++ linux-2.6.18-rc2-mm1/kernel/power/main.c	2006-07-28 21:03:25.000000000 +0200
@@ -16,6 +16,7 @@
 #include <linux/init.h>
 #include <linux/pm.h>
 #include <linux/console.h>
+#include <linux/cpu.h>
 
 #include "power.h"
 
@@ -51,7 +52,7 @@ void pm_set_ops(struct pm_ops * ops)
 
 static int suspend_prepare(suspend_state_t state)
 {
-	int error = 0;
+	int error;
 	unsigned int free_pages;
 
 	if (!pm_ops || !pm_ops->enter)
@@ -63,12 +64,9 @@ static int suspend_prepare(suspend_state
 
 	pm_prepare_console();
 
-	disable_nonboot_cpus();
-
-	if (num_online_cpus() != 1) {
-		error = -EPERM;
+	error = disable_nonboot_cpus();
+	if (error)
 		goto Enable_cpu;
-	}
 
 	if (freeze_processes()) {
 		error = -EAGAIN;
Index: linux-2.6.18-rc2-mm1/kernel/power/user.c
===================================================================
--- linux-2.6.18-rc2-mm1.orig/kernel/power/user.c	2006-07-28 21:03:21.000000000 +0200
+++ linux-2.6.18-rc2-mm1/kernel/power/user.c	2006-07-28 21:03:25.000000000 +0200
@@ -19,6 +19,7 @@
 #include <linux/swapops.h>
 #include <linux/pm.h>
 #include <linux/fs.h>
+#include <linux/cpu.h>
 
 #include <asm/uaccess.h>
 
@@ -139,12 +140,15 @@ static int snapshot_ioctl(struct inode *
 		if (data->frozen)
 			break;
 		down(&pm_sem);
-		disable_nonboot_cpus();
-		if (freeze_processes()) {
-			thaw_processes();
-			enable_nonboot_cpus();
-			error = -EBUSY;
+		error = disable_nonboot_cpus();
+		if (!error) {
+			error = freeze_processes();
+			if (error) {
+				thaw_processes();
+				error = -EBUSY;
+			}
 		}
+		enable_nonboot_cpus();
 		up(&pm_sem);
 		if (!error)
 			data->frozen = 1;
Index: linux-2.6.18-rc2-mm1/include/linux/cpu.h
===================================================================
--- linux-2.6.18-rc2-mm1.orig/include/linux/cpu.h	2006-07-28 21:03:21.000000000 +0200
+++ linux-2.6.18-rc2-mm1/include/linux/cpu.h	2006-07-28 21:03:25.000000000 +0200
@@ -89,4 +89,12 @@ int cpu_down(unsigned int cpu);
 static inline int cpu_is_offline(int cpu) { return 0; }
 #endif
 
+#ifdef CONFIG_SUSPEND_SMP
+extern int disable_nonboot_cpus(void);
+extern void enable_nonboot_cpus(void);
+#else
+static inline int disable_nonboot_cpus(void) { return 0; }
+static inline void enable_nonboot_cpus(void) {}
+#endif
+
 #endif /* _LINUX_CPU_H_ */
Index: linux-2.6.18-rc2-mm1/kernel/cpu.c
===================================================================
--- linux-2.6.18-rc2-mm1.orig/kernel/cpu.c	2006-07-28 21:03:21.000000000 +0200
+++ linux-2.6.18-rc2-mm1/kernel/cpu.c	2006-07-28 23:25:08.000000000 +0200
@@ -21,6 +21,11 @@ static DEFINE_MUTEX(cpu_bitmask_lock);
 
 static __cpuinitdata BLOCKING_NOTIFIER_HEAD(cpu_chain);
 
+/* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
+ * Should always be manipulated under cpu_add_remove_lock
+ */
+static int cpu_hotplug_disabled;
+
 #ifdef CONFIG_HOTPLUG_CPU
 
 /* Crappy recursive lock-takers in cpufreq! Complain loudly about idiots */
@@ -108,30 +113,25 @@ static int take_cpu_down(void *unused)
 	return 0;
 }
 
-int cpu_down(unsigned int cpu)
+/* Requires cpu_add_remove_lock to be held */
+static int _cpu_down(unsigned int cpu)
 {
 	int err;
 	struct task_struct *p;
 	cpumask_t old_allowed, tmp;
 
-	mutex_lock(&cpu_add_remove_lock);
-	if (num_online_cpus() == 1) {
-		err = -EBUSY;
-		goto out;
-	}
+	if (num_online_cpus() == 1)
+		return -EBUSY;
 
-	if (!cpu_online(cpu)) {
-		err = -EINVAL;
-		goto out;
-	}
+	if (!cpu_online(cpu))
+		return -EINVAL;
 
 	err = blocking_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE,
 						(void *)(long)cpu);
 	if (err == NOTIFY_BAD) {
 		printk("%s: attempt to take down CPU %u failed\n",
 				__FUNCTION__, cpu);
-		err = -EINVAL;
-		goto out;
+		return -EINVAL;
 	}
 
 	/* Ensure that we are not runnable on dying cpu */
@@ -179,22 +179,32 @@ out_thread:
 	err = kthread_stop(p);
 out_allowed:
 	set_cpus_allowed(current, old_allowed);
-out:
+	return err;
+}
+
+int cpu_down(unsigned int cpu)
+{
+	int err = 0;
+
+	mutex_lock(&cpu_add_remove_lock);
+	if (cpu_hotplug_disabled)
+		err = -EBUSY;
+	else
+		err = _cpu_down(cpu);
+
 	mutex_unlock(&cpu_add_remove_lock);
 	return err;
 }
 #endif /*CONFIG_HOTPLUG_CPU*/
 
-int __devinit cpu_up(unsigned int cpu)
+/* Requires cpu_add_remove_lock to be held */
+static int __devinit _cpu_up(unsigned int cpu)
 {
 	int ret;
 	void *hcpu = (void *)(long)cpu;
 
-	mutex_lock(&cpu_add_remove_lock);
-	if (cpu_online(cpu) || !cpu_present(cpu)) {
-		ret = -EINVAL;
-		goto out;
-	}
+	if (cpu_online(cpu) || !cpu_present(cpu))
+		return -EINVAL;
 
 	ret = blocking_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE, hcpu);
 	if (ret == NOTIFY_BAD) {
@@ -219,7 +229,95 @@ out_notify:
 	if (ret != 0)
 		blocking_notifier_call_chain(&cpu_chain,
 				CPU_UP_CANCELED, hcpu);
+
+	return ret;
+}
+
+int __devinit cpu_up(unsigned int cpu)
+{
+	int err = 0;
+
+	mutex_lock(&cpu_add_remove_lock);
+	if (cpu_hotplug_disabled)
+		err = -EBUSY;
+	else
+		err = _cpu_up(cpu);
+
+	mutex_unlock(&cpu_add_remove_lock);
+	return err;
+}
+
+#ifdef CONFIG_SUSPEND_SMP
+static cpumask_t frozen_cpus;
+
+int disable_nonboot_cpus(void)
+{
+	int cpu, first_cpu, error;
+
+	mutex_lock(&cpu_add_remove_lock);
+	first_cpu = first_cpu(cpu_present_map);
+	if (!cpu_online(first_cpu)) {
+		error = _cpu_up(first_cpu);
+		if (error) {
+			printk(KERN_ERR "Could not bring CPU%d up.\n",
+				first_cpu);
+			goto out;
+		}
+	}
+	error = set_cpus_allowed(current, cpumask_of_cpu(first_cpu));
+	if (error) {
+		printk(KERN_ERR "Could not run on CPU%d\n", first_cpu);
+		goto out;
+	}
+	/* We take down all of the non-boot CPUs in one shot to avoid races
+	 * with the userspace trying to use the CPU hotplug at the same time
+	 */
+	cpus_clear(frozen_cpus);
+	printk("Disabling non-boot CPUs ...\n");
+	for_each_online_cpu(cpu) {
+		if (cpu == first_cpu)
+			continue;
+		error = _cpu_down(cpu);
+		if (!error) {
+			cpu_set(cpu, frozen_cpus);
+			printk("CPU%d is down\n", cpu);
+		} else {
+			printk(KERN_ERR "Error taking CPU%d down: %d\n",
+				cpu, error);
+			break;
+		}
+	}
+	if (!error) {
+		BUG_ON(num_online_cpus() > 1);
+		/* Make sure the CPUs won't be enabled by someone else */
+		cpu_hotplug_disabled = 1;
+	} else {
+		printk(KERN_ERR "Non-boot CPUs are not disabled");
+	}
 out:
 	mutex_unlock(&cpu_add_remove_lock);
-	return ret;
+	return error;
+}
+
+void enable_nonboot_cpus(void)
+{
+	int cpu, error;
+
+	/* Allow everyone to use the CPU hotplug again */
+	mutex_lock(&cpu_add_remove_lock);
+	cpu_hotplug_disabled = 0;
+	mutex_unlock(&cpu_add_remove_lock);
+
+	printk("Enabling non-boot CPUs ...\n");
+	for_each_cpu_mask(cpu, frozen_cpus) {
+		error = cpu_up(cpu);
+		if (!error) {
+			printk("CPU%d is up\n", cpu);
+			continue;
+		}
+		printk(KERN_WARNING "Error taking CPU%d up: %d\n",
+			cpu, error);
+	}
+	cpus_clear(frozen_cpus);
 }
+#endif
Index: linux-2.6.18-rc2-mm1/kernel/power/Makefile
===================================================================
--- linux-2.6.18-rc2-mm1.orig/kernel/power/Makefile	2006-07-28 21:03:21.000000000 +0200
+++ linux-2.6.18-rc2-mm1/kernel/power/Makefile	2006-07-28 21:03:25.000000000 +0200
@@ -7,6 +7,4 @@ obj-y				:= main.o process.o console.o
 obj-$(CONFIG_PM_LEGACY)		+= pm.o
 obj-$(CONFIG_SOFTWARE_SUSPEND)	+= swsusp.o disk.o snapshot.o swap.o user.o
 
-obj-$(CONFIG_SUSPEND_SMP)	+= smp.o
-
 obj-$(CONFIG_MAGIC_SYSRQ)	+= poweroff.o

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2006-07-29 23:08 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-07-28  8:15 [PATCH -mm][resend] Disable CPU hotplug during suspend Rafael J. Wysocki
2006-07-28 18:20 ` Nathan Lynch
2006-07-28 19:15   ` Rafael J. Wysocki
2006-07-28 21:23     ` Rafael J. Wysocki
2006-07-28 22:40     ` Nathan Lynch
2006-07-29 12:18       ` Rafael J. Wysocki
2006-07-29 21:40         ` Nathan Lynch
2006-07-29 23:07           ` Rafael J. Wysocki
2006-07-29  5:15 ` Andrew Morton
2006-07-29 12:27   ` Rafael J. Wysocki

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox