All of lore.kernel.org
 help / color / mirror / Atom feed
From: Lai Jiangshan <laijs@cn.fujitsu.com>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: paulmck@linux.vnet.ibm.com, Gautham R Shenoy <ego@in.ibm.com>,
	Rusty Russell <rusty@rustcorp.com.au>,
	Ingo Molnar <mingo@elte.hu>, LKML <linux-kernel@vger.kernel.org>,
	Peter Zijlstra <peterz@infradead.org>,
	Oleg Nesterov <oleg@redhat.com>,
	dipankar@in.ibm.com
Subject: [PATCH -mm] cpuhotplug: introduce try_get_online_cpus() take 3
Date: Tue, 09 Jun 2009 20:07:09 +0800	[thread overview]
Message-ID: <4A2E506D.9090107@cn.fujitsu.com> (raw)
In-Reply-To: <20090608142520.GA6961@linux.vnet.ibm.com>

It's for -mm tree.

It also works for mainline if you apply this at first:
http://lkml.org/lkml/2009/2/17/58

Subject: [PATCH -mm] cpuhotplug: introduce try_get_online_cpus() take 3

get_online_cpus() is a typically coarsely granular lock.
It's a source of ABBA deadlock.

Thanks to the CPU notifiers, Some subsystem's global lock will
be required after cpu_hotplug.lock. Subsystem's global lock
is coarsely granular lock too, thus a lot's of lock in kernel
should be required after cpu_hotplug.lock(if we need
cpu_hotplug.lock held too)

Otherwise it may come to a ABBA deadlock like this:

thread 1                                      |        thread 2
_cpu_down()                                   |  Lock a-kernel-lock.
  cpu_hotplug_begin()                         |
    down_write(&cpu_hotplug.lock)             |
  __raw_notifier_call_chain(CPU_DOWN_PREPARE) |  get_online_cpus()
------------------------------------------------------------------------
    Lock a-kernel-lock.(wait thread2)         |    down_read(&cpu_hotplug.lock)
                                                   (wait thread 1)

But CPU online/offline are happened very rarely, get_online_cpus()
returns success quickly in all probability.
So it's an asinine behavior that get_online_cpus() is not allowed
to be required after we had held "a-kernel-lock".

To dispel the ABBA deadlock, this patch introduces
try_get_online_cpus(). It returns fail very rarely. It gives the
caller a chance to select an alternative way to finish works,
instead of sleeping or deadlock.

Changed from V1
Lockless for get_online_cpus()'s fast path

Changed from V2
Fix patch as Oleg Nesterov valuable suggestions.

Suggested-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 4d668e0..eeb9ca5 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -99,6 +99,7 @@ extern struct sysdev_class cpu_sysdev_class;
 
 extern void get_online_cpus(void);
 extern void put_online_cpus(void);
+extern int try_get_online_cpus(void);
 #define hotcpu_notifier(fn, pri) {				\
 	static struct notifier_block fn##_nb __cpuinitdata =	\
 		{ .notifier_call = fn, .priority = pri };	\
@@ -112,6 +113,7 @@ int cpu_down(unsigned int cpu);
 
 #define get_online_cpus()	do { } while (0)
 #define put_online_cpus()	do { } while (0)
+static inline int try_get_online_cpus(void) { return 1; }
 #define hotcpu_notifier(fn, pri)	do { (void)(fn); } while (0)
 /* These aren't inline functions due to a GCC bug. */
 #define register_hotcpu_notifier(nb)	({ (void)(nb); 0; })
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 609fae1..afecc95 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -14,6 +14,8 @@
 #include <linux/kthread.h>
 #include <linux/stop_machine.h>
 #include <linux/mutex.h>
+#include <asm/atomic.h>
+#include <linux/wait.h>
 
 #ifdef CONFIG_SMP
 /* Serializes the updates to cpu_online_mask, cpu_present_mask */
@@ -26,16 +28,23 @@ static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain);
  */
 static int cpu_hotplug_disabled;
 
+/*
+ * @cpu_hotplug is a special read-write semaphore with these semantics:
+ * 1) It is read-preference and allows reader-in-reader recursion.
+ * 2) It allows writer to downgrade to a reader when required.
+ *    (allows reader-in-writer recursion.)
+ * 3) It allows only one thread to require the write-side lock at most.
+ *    (cpu_add_remove_lock ensures it.)
+ */
 static struct {
 	struct task_struct *active_writer;
-	struct mutex lock; /* Synchronizes accesses to refcount, */
-	/*
-	 * Also blocks the new readers during
-	 * an ongoing cpu hotplug operation.
-	 */
-	int refcount;
+	wait_queue_head_t sleeping_readers;
+	/* refcount = 0 means the writer owns the lock. */
+	atomic_t refcount;
 } cpu_hotplug = {
-	.lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
+	NULL,
+	__WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.sleeping_readers),
+	ATOMIC_INIT(1),
 };
 
 #ifdef CONFIG_HOTPLUG_CPU
@@ -45,10 +54,9 @@ void get_online_cpus(void)
 	might_sleep();
 	if (cpu_hotplug.active_writer == current)
 		return;
-	mutex_lock(&cpu_hotplug.lock);
-	cpu_hotplug.refcount++;
-	mutex_unlock(&cpu_hotplug.lock);
 
+	wait_event(cpu_hotplug.sleeping_readers,
+		   likely(atomic_inc_not_zero(&cpu_hotplug.refcount)));
 }
 EXPORT_SYMBOL_GPL(get_online_cpus);
 
@@ -56,14 +64,27 @@ void put_online_cpus(void)
 {
 	if (cpu_hotplug.active_writer == current)
 		return;
-	mutex_lock(&cpu_hotplug.lock);
-	if (!--cpu_hotplug.refcount && unlikely(cpu_hotplug.active_writer))
-		wake_up_process(cpu_hotplug.active_writer);
-	mutex_unlock(&cpu_hotplug.lock);
 
+	if (unlikely(atomic_dec_and_test(&cpu_hotplug.refcount))) {
+		/* atomic_dec_and_test() implies smp_mb__after_atomic_dec() */
+		BUG_ON(!cpu_hotplug.active_writer);
+		wake_up_process(cpu_hotplug.active_writer);
+	}
 }
 EXPORT_SYMBOL_GPL(put_online_cpus);
 
+int try_get_online_cpus(void)
+{
+	if (cpu_hotplug.active_writer == current)
+		return 1;
+
+	if (likely(atomic_inc_not_zero(&cpu_hotplug.refcount)))
+		return 1;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(try_get_online_cpus);
+
 #endif	/* CONFIG_HOTPLUG_CPU */
 
 /*
@@ -81,46 +102,42 @@ void cpu_maps_update_done(void)
 }
 
 /*
- * This ensures that the hotplug operation can begin only when the
- * refcount goes to zero.
+ * This ensures that the hotplug operation can begin only when
+ * there is no ongoing reader.
  *
  * Note that during a cpu-hotplug operation, the new readers, if any,
- * will be blocked by the cpu_hotplug.lock
+ * will be blocked and queued at cpu_hotplug.sleeping_readers.
  *
  * Since cpu_hotplug_begin() is always called after invoking
  * cpu_maps_update_begin(), we can be sure that only one writer is active.
  *
- * Note that theoretically, there is a possibility of a livelock:
- * - Refcount goes to zero, last reader wakes up the sleeping
- *   writer.
- * - Last reader unlocks the cpu_hotplug.lock.
- * - A new reader arrives at this moment, bumps up the refcount.
- * - The writer acquires the cpu_hotplug.lock finds the refcount
- *   non zero and goes to sleep again.
- *
- * However, this is very difficult to achieve in practice since
- * get_online_cpus() not an api which is called all that often.
- *
  */
 static void cpu_hotplug_begin(void)
 {
 	cpu_hotplug.active_writer = current;
 
+	/* atomic_dec_and_test() implies smp_mb__before_atomic_dec() */
+	if (atomic_dec_and_test(&cpu_hotplug.refcount))
+		return;
+
 	for (;;) {
-		mutex_lock(&cpu_hotplug.lock);
-		if (likely(!cpu_hotplug.refcount))
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		if (!atomic_read(&cpu_hotplug.refcount))
 			break;
-		__set_current_state(TASK_UNINTERRUPTIBLE);
-		mutex_unlock(&cpu_hotplug.lock);
 		schedule();
 	}
+
+	__set_current_state(TASK_RUNNING);
 }
 
 static void cpu_hotplug_done(void)
 {
+	atomic_inc(&cpu_hotplug.refcount);
+	wake_up_all(&cpu_hotplug.sleeping_readers);
+
 	cpu_hotplug.active_writer = NULL;
-	mutex_unlock(&cpu_hotplug.lock);
 }
+
 /* Need to know about CPUs going up/down? */
 int __ref register_cpu_notifier(struct notifier_block *nb)
 {


  reply	other threads:[~2009-06-09 12:06 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-05-29  8:29 [PATCH 1/2] cpuhotplug: use rw_semaphore for cpu_hotplug Lai Jiangshan
2009-05-29 20:23 ` Andrew Morton
2009-05-29 21:07   ` Oleg Nesterov
2009-05-29 21:17     ` Oleg Nesterov
2009-06-01  1:04       ` Lai Jiangshan
2009-06-01  0:52     ` Lai Jiangshan
2009-06-01  2:22       ` Lai Jiangshan
2009-05-30  1:53 ` Paul E. McKenney
2009-05-30  4:37   ` Gautham R Shenoy
2009-06-04  6:58     ` [PATCH] cpuhotplug: introduce try_get_online_cpus() take 2 Lai Jiangshan
2009-06-04 20:49       ` Oleg Nesterov
2009-06-05  1:32         ` Lai Jiangshan
2009-06-05  2:14           ` Oleg Nesterov
2009-06-05 15:37       ` Paul E. McKenney
2009-06-08  2:36         ` Lai Jiangshan
2009-06-08  4:19         ` Gautham R Shenoy
2009-06-08 14:25           ` Paul E. McKenney
2009-06-09 12:07             ` Lai Jiangshan [this message]
2009-06-09 19:34               ` [PATCH -mm] cpuhotplug: introduce try_get_online_cpus() take 3 Andrew Morton
2009-06-09 23:47                 ` Paul E. McKenney
2009-06-10  1:13                   ` [PATCH -mm resend] " Lai Jiangshan
2009-06-10  1:42                     ` Andrew Morton
2009-06-11  8:41                       ` Lai Jiangshan
2009-06-11 18:50                         ` Paul E. McKenney
2009-06-15  4:04                           ` Gautham R Shenoy
2009-06-10  0:57                 ` [PATCH -mm] " Lai Jiangshan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4A2E506D.9090107@cn.fujitsu.com \
    --to=laijs@cn.fujitsu.com \
    --cc=akpm@linux-foundation.org \
    --cc=dipankar@in.ibm.com \
    --cc=ego@in.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=oleg@redhat.com \
    --cc=paulmck@linux.vnet.ibm.com \
    --cc=peterz@infradead.org \
    --cc=rusty@rustcorp.com.au \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.