[PATCH tip/core/rcu 0/3] rcu: prevent hangs by simplifying rcu_barrier/CPU-hotplug, fix lockdep complaint

linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH tip/core/rcu 0/3] rcu: prevent hangs by simplifying rcu_barrier/CPU-hotplug, fix lockdep complaint
@ 2009-10-07  4:47 Paul E. McKenney
  2009-10-07  4:48 ` [PATCH tip/core/rcu 1/3] rcu: move rcu_barrier() to rcutree Paul E. McKenney
                   ` (3 more replies)
  0 siblings, 4 replies; 13+ messages in thread
From: Paul E. McKenney @ 2009-10-07  4:47 UTC (permalink / raw)
  To: linux-kernel
  Cc: mingo, laijs, dipankar, akpm, mathieu.desnoyers, josh, dvhltc,
	niv, tglx, peterz, rostedt, Valdis.Kletnieks, dhowells

This patchset fixes a couple of issues with TREE_PREEMPT_RCU:

1.	The current implementation of rcu_barrier() waits for any
	prior CPU-hotplug operation to complete.  This code path has
	resulted in some hangs, so first move this code from rcupdate.c
	to rcutree.c and then use a simpler algorithm that avoids the
	waiting, eliminating the possibility of such hangs.

2.	The lockdep facility complains from time to time due to the
	fact that certain combinations of CPU-hotplug operations can
	cause the TREE_PREEMPT_RCU code to acquire the root rcu_node
	structure's lock while holding a leaf rcu_node structure's
	lock.  This patch places the root rcu_node structure's lock
	into its own lockdep class to prevent this false positive.

Ingo, please rewind tip/core/rcu to commit #135c8aea before applying this
series, as these patches really need to go into 2.6.32.  Reworked and
retested rcutiny (for 2.6.33, as Linus requested) will follow in a
few days.  Or more patches for more bug fixes, as the case may be.  ;-)

 b/kernel/rcupdate.c       |  120 -----------------------------------
 b/kernel/rcutree.c        |  120 +++++++++++++++++++++++++++++++++++
 b/kernel/rcutree.h        |   11 ++-
 b/kernel/rcutree_plugin.h |   34 ++++++++++
 b/kernel/rcutree_trace.c  |    5 -
 kernel/rcutree.c          |  156 ++++++++++++++++++++++++----------------------
 6 files changed, 248 insertions(+), 198 deletions(-)

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH tip/core/rcu 1/3] rcu: move rcu_barrier() to rcutree
  2009-10-07  4:47 [PATCH tip/core/rcu 0/3] rcu: prevent hangs by simplifying rcu_barrier/CPU-hotplug, fix lockdep complaint Paul E. McKenney
@ 2009-10-07  4:48 ` Paul E. McKenney
  2009-10-07  6:16   ` [tip:core/rcu] rcu: Move " tip-bot for Paul E. McKenney
  2009-10-07  4:48 ` [PATCH tip/core/rcu 2/3] rcu: make hot-unplugged CPU relinquish its own RCU callbacks Paul E. McKenney
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 13+ messages in thread
From: Paul E. McKenney @ 2009-10-07  4:48 UTC (permalink / raw)
  To: linux-kernel
  Cc: mingo, laijs, dipankar, akpm, mathieu.desnoyers, josh, dvhltc,
	niv, tglx, peterz, rostedt, Valdis.Kletnieks, dhowells,
	Paul E. McKenney

Move the existing rcu_barrier() implementation to rcutree.c, consistent
with the fact that the rcu_barrier() implementation is tied quite tightly
to the RCU implementation.  This opens the way to simplify rcutree.c's
rcu_barrier() implementation in a later patch.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcupdate.c |  120 +----------------------------------------------------
 kernel/rcutree.c  |  119 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 120 insertions(+), 119 deletions(-)

diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index e432422..4001833 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -53,16 +53,8 @@ struct lockdep_map rcu_lock_map =
 EXPORT_SYMBOL_GPL(rcu_lock_map);
 #endif
 
-static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL};
-static atomic_t rcu_barrier_cpu_count;
-static DEFINE_MUTEX(rcu_barrier_mutex);
-static struct completion rcu_barrier_completion;
 int rcu_scheduler_active __read_mostly;
 
-static atomic_t rcu_migrate_type_count = ATOMIC_INIT(0);
-static struct rcu_head rcu_migrate_head[3];
-static DECLARE_WAIT_QUEUE_HEAD(rcu_migrate_wq);
-
 /*
  * Awaken the corresponding synchronize_rcu() instance now that a
  * grace period has elapsed.
@@ -165,120 +157,10 @@ void synchronize_rcu_bh(void)
 }
 EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
 
-static void rcu_barrier_callback(struct rcu_head *notused)
-{
-	if (atomic_dec_and_test(&rcu_barrier_cpu_count))
-		complete(&rcu_barrier_completion);
-}
-
-/*
- * Called with preemption disabled, and from cross-cpu IRQ context.
- */
-static void rcu_barrier_func(void *type)
-{
-	int cpu = smp_processor_id();
-	struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu);
-	void (*call_rcu_func)(struct rcu_head *head,
-			      void (*func)(struct rcu_head *head));
-
-	atomic_inc(&rcu_barrier_cpu_count);
-	call_rcu_func = type;
-	call_rcu_func(head, rcu_barrier_callback);
-}
-
-static inline void wait_migrated_callbacks(void)
-{
-	wait_event(rcu_migrate_wq, !atomic_read(&rcu_migrate_type_count));
-	smp_mb(); /* In case we didn't sleep. */
-}
-
-/*
- * Orchestrate the specified type of RCU barrier, waiting for all
- * RCU callbacks of the specified type to complete.
- */
-static void _rcu_barrier(void (*call_rcu_func)(struct rcu_head *head,
-					       void (*func)(struct rcu_head *head)))
-{
-	BUG_ON(in_interrupt());
-	/* Take cpucontrol mutex to protect against CPU hotplug */
-	mutex_lock(&rcu_barrier_mutex);
-	init_completion(&rcu_barrier_completion);
-	/*
-	 * Initialize rcu_barrier_cpu_count to 1, then invoke
-	 * rcu_barrier_func() on each CPU, so that each CPU also has
-	 * incremented rcu_barrier_cpu_count.  Only then is it safe to
-	 * decrement rcu_barrier_cpu_count -- otherwise the first CPU
-	 * might complete its grace period before all of the other CPUs
-	 * did their increment, causing this function to return too
-	 * early.
-	 */
-	atomic_set(&rcu_barrier_cpu_count, 1);
-	on_each_cpu(rcu_barrier_func, (void *)call_rcu_func, 1);
-	if (atomic_dec_and_test(&rcu_barrier_cpu_count))
-		complete(&rcu_barrier_completion);
-	wait_for_completion(&rcu_barrier_completion);
-	mutex_unlock(&rcu_barrier_mutex);
-	wait_migrated_callbacks();
-}
-
-/**
- * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
- */
-void rcu_barrier(void)
-{
-	_rcu_barrier(call_rcu);
-}
-EXPORT_SYMBOL_GPL(rcu_barrier);
-
-/**
- * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete.
- */
-void rcu_barrier_bh(void)
-{
-	_rcu_barrier(call_rcu_bh);
-}
-EXPORT_SYMBOL_GPL(rcu_barrier_bh);
-
-/**
- * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks.
- */
-void rcu_barrier_sched(void)
-{
-	_rcu_barrier(call_rcu_sched);
-}
-EXPORT_SYMBOL_GPL(rcu_barrier_sched);
-
-static void rcu_migrate_callback(struct rcu_head *notused)
-{
-	if (atomic_dec_and_test(&rcu_migrate_type_count))
-		wake_up(&rcu_migrate_wq);
-}
-
 static int __cpuinit rcu_barrier_cpu_hotplug(struct notifier_block *self,
 		unsigned long action, void *hcpu)
 {
-	rcu_cpu_notify(self, action, hcpu);
-	if (action == CPU_DYING) {
-		/*
-		 * preempt_disable() in on_each_cpu() prevents stop_machine(),
-		 * so when "on_each_cpu(rcu_barrier_func, (void *)type, 1);"
-		 * returns, all online cpus have queued rcu_barrier_func(),
-		 * and the dead cpu(if it exist) queues rcu_migrate_callback()s.
-		 *
-		 * These callbacks ensure _rcu_barrier() waits for all
-		 * RCU callbacks of the specified type to complete.
-		 */
-		atomic_set(&rcu_migrate_type_count, 3);
-		call_rcu_bh(rcu_migrate_head, rcu_migrate_callback);
-		call_rcu_sched(rcu_migrate_head + 1, rcu_migrate_callback);
-		call_rcu(rcu_migrate_head + 2, rcu_migrate_callback);
-	} else if (action == CPU_DOWN_PREPARE) {
-		/* Don't need to wait until next removal operation. */
-		/* rcu_migrate_head is protected by cpu_add_remove_lock */
-		wait_migrated_callbacks();
-	}
-
-	return NOTIFY_OK;
+	return rcu_cpu_notify(self, action, hcpu);
 }
 
 void __init rcu_init(void)
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index e2e272b..0108570 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1363,6 +1363,103 @@ int rcu_needs_cpu(int cpu)
 	       rcu_preempt_needs_cpu(cpu);
 }
 
+static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL};
+static atomic_t rcu_barrier_cpu_count;
+static DEFINE_MUTEX(rcu_barrier_mutex);
+static struct completion rcu_barrier_completion;
+static atomic_t rcu_migrate_type_count = ATOMIC_INIT(0);
+static struct rcu_head rcu_migrate_head[3];
+static DECLARE_WAIT_QUEUE_HEAD(rcu_migrate_wq);
+
+static void rcu_barrier_callback(struct rcu_head *notused)
+{
+	if (atomic_dec_and_test(&rcu_barrier_cpu_count))
+		complete(&rcu_barrier_completion);
+}
+
+/*
+ * Called with preemption disabled, and from cross-cpu IRQ context.
+ */
+static void rcu_barrier_func(void *type)
+{
+	int cpu = smp_processor_id();
+	struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu);
+	void (*call_rcu_func)(struct rcu_head *head,
+			      void (*func)(struct rcu_head *head));
+
+	atomic_inc(&rcu_barrier_cpu_count);
+	call_rcu_func = type;
+	call_rcu_func(head, rcu_barrier_callback);
+}
+
+static inline void wait_migrated_callbacks(void)
+{
+	wait_event(rcu_migrate_wq, !atomic_read(&rcu_migrate_type_count));
+	smp_mb(); /* In case we didn't sleep. */
+}
+
+/*
+ * Orchestrate the specified type of RCU barrier, waiting for all
+ * RCU callbacks of the specified type to complete.
+ */
+static void _rcu_barrier(void (*call_rcu_func)(struct rcu_head *head,
+					       void (*func)(struct rcu_head *head)))
+{
+	BUG_ON(in_interrupt());
+	/* Take cpucontrol mutex to protect against CPU hotplug */
+	mutex_lock(&rcu_barrier_mutex);
+	init_completion(&rcu_barrier_completion);
+	/*
+	 * Initialize rcu_barrier_cpu_count to 1, then invoke
+	 * rcu_barrier_func() on each CPU, so that each CPU also has
+	 * incremented rcu_barrier_cpu_count.  Only then is it safe to
+	 * decrement rcu_barrier_cpu_count -- otherwise the first CPU
+	 * might complete its grace period before all of the other CPUs
+	 * did their increment, causing this function to return too
+	 * early.
+	 */
+	atomic_set(&rcu_barrier_cpu_count, 1);
+	on_each_cpu(rcu_barrier_func, (void *)call_rcu_func, 1);
+	if (atomic_dec_and_test(&rcu_barrier_cpu_count))
+		complete(&rcu_barrier_completion);
+	wait_for_completion(&rcu_barrier_completion);
+	mutex_unlock(&rcu_barrier_mutex);
+	wait_migrated_callbacks();
+}
+
+/**
+ * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
+ */
+void rcu_barrier(void)
+{
+	_rcu_barrier(call_rcu);
+}
+EXPORT_SYMBOL_GPL(rcu_barrier);
+
+/**
+ * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete.
+ */
+void rcu_barrier_bh(void)
+{
+	_rcu_barrier(call_rcu_bh);
+}
+EXPORT_SYMBOL_GPL(rcu_barrier_bh);
+
+/**
+ * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks.
+ */
+void rcu_barrier_sched(void)
+{
+	_rcu_barrier(call_rcu_sched);
+}
+EXPORT_SYMBOL_GPL(rcu_barrier_sched);
+
+static void rcu_migrate_callback(struct rcu_head *notused)
+{
+	if (atomic_dec_and_test(&rcu_migrate_type_count))
+		wake_up(&rcu_migrate_wq);
+}
+
 /*
  * Do boot-time initialization of a CPU's per-CPU RCU data.
  */
@@ -1459,6 +1556,28 @@ int __cpuinit rcu_cpu_notify(struct notifier_block *self,
 	case CPU_UP_PREPARE_FROZEN:
 		rcu_online_cpu(cpu);
 		break;
+	case CPU_DOWN_PREPARE:
+	case CPU_DOWN_PREPARE_FROZEN:
+		/* Don't need to wait until next removal operation. */
+		/* rcu_migrate_head is protected by cpu_add_remove_lock */
+		wait_migrated_callbacks();
+		break;
+	case CPU_DYING:
+	case CPU_DYING_FROZEN:
+		/*
+		 * preempt_disable() in on_each_cpu() prevents stop_machine(),
+		 * so when "on_each_cpu(rcu_barrier_func, (void *)type, 1);"
+		 * returns, all online cpus have queued rcu_barrier_func(),
+		 * and the dead cpu(if it exist) queues rcu_migrate_callback()s.
+		 *
+		 * These callbacks ensure _rcu_barrier() waits for all
+		 * RCU callbacks of the specified type to complete.
+		 */
+		atomic_set(&rcu_migrate_type_count, 3);
+		call_rcu_bh(rcu_migrate_head, rcu_migrate_callback);
+		call_rcu_sched(rcu_migrate_head + 1, rcu_migrate_callback);
+		call_rcu(rcu_migrate_head + 2, rcu_migrate_callback);
+		break;
 	case CPU_DEAD:
 	case CPU_DEAD_FROZEN:
 	case CPU_UP_CANCELED:
-- 
1.5.2.5


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH tip/core/rcu 2/3] rcu: make hot-unplugged CPU relinquish its own RCU callbacks
  2009-10-07  4:47 [PATCH tip/core/rcu 0/3] rcu: prevent hangs by simplifying rcu_barrier/CPU-hotplug, fix lockdep complaint Paul E. McKenney
  2009-10-07  4:48 ` [PATCH tip/core/rcu 1/3] rcu: move rcu_barrier() to rcutree Paul E. McKenney
@ 2009-10-07  4:48 ` Paul E. McKenney
  2009-10-07  6:17   ` [tip:core/rcu] rcu: Make " tip-bot for Paul E. McKenney
  2009-10-07 13:16   ` [PATCH tip/core/rcu 2/3] rcu: make " Steven Rostedt
  2009-10-07  4:48 ` [PATCH tip/core/rcu 3/3] rcu: place root rcu_node structure in separate lockdep class Paul E. McKenney
  2009-10-07  6:14 ` [PATCH tip/core/rcu 0/3] rcu: prevent hangs by simplifying rcu_barrier/CPU-hotplug, fix lockdep complaint Ingo Molnar
  3 siblings, 2 replies; 13+ messages in thread
From: Paul E. McKenney @ 2009-10-07  4:48 UTC (permalink / raw)
  To: linux-kernel
  Cc: mingo, laijs, dipankar, akpm, mathieu.desnoyers, josh, dvhltc,
	niv, tglx, peterz, rostedt, Valdis.Kletnieks, dhowells,
	Paul E. McKenney

From: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

The current interaction between RCU and CPU hotplug requires that
RCU block in CPU notifiers waiting for callbacks to drain.  This can
be greatly simplified by haing each CPU relinquish its own callbacks,
and for both _rcu_barrier() and CPU_DEAD notifiers to adopt all callbacks
that were previously relinquished.  This change also eliminates the
possibility of certain types of hangs due to the previous practice of
waiting for callbacks to be invoked from within CPU notifiers.  If you
don't every wait, you cannot hang.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcutree.c        |  151 ++++++++++++++++++++++++----------------------
 kernel/rcutree.h        |   11 +++-
 kernel/rcutree_plugin.h |   34 +++++++++++
 kernel/rcutree_trace.c  |    4 +-
 4 files changed, 125 insertions(+), 75 deletions(-)

diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 0108570..d8d9865 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -63,6 +63,9 @@
 	.gpnum = -300, \
 	.completed = -300, \
 	.onofflock = __SPIN_LOCK_UNLOCKED(&name.onofflock), \
+	.orphan_cbs_list = NULL, \
+	.orphan_cbs_tail = &name.orphan_cbs_list, \
+	.orphan_qlen = 0, \
 	.fqslock = __SPIN_LOCK_UNLOCKED(&name.fqslock), \
 	.n_force_qs = 0, \
 	.n_force_qs_ngp = 0, \
@@ -838,17 +841,63 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
 #ifdef CONFIG_HOTPLUG_CPU
 
 /*
+ * Move a dying CPU's RCU callbacks to the ->orphan_cbs_list for the
+ * specified flavor of RCU.  The callbacks will be adopted by the next
+ * _rcu_barrier() invocation or by the CPU_DEAD notifier, whichever
+ * comes first.  Because this is invoked from the CPU_DYING notifier,
+ * irqs are already disabled.
+ */
+static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp)
+{
+	int i;
+	struct rcu_data *rdp = rsp->rda[smp_processor_id()];
+
+	if (rdp->nxtlist == NULL)
+		return;  /* irqs disabled, so comparison is stable. */
+	spin_lock(&rsp->onofflock);  /* irqs already disabled. */
+	*rsp->orphan_cbs_tail = rdp->nxtlist;
+	rsp->orphan_cbs_tail = rdp->nxttail[RCU_NEXT_TAIL];
+	rdp->nxtlist = NULL;
+	for (i = 0; i < RCU_NEXT_SIZE; i++)
+		rdp->nxttail[i] = &rdp->nxtlist;
+	rsp->orphan_qlen += rdp->qlen;
+	rdp->qlen = 0;
+	spin_unlock(&rsp->onofflock);  /* irqs remain disabled. */
+}
+
+/*
+ * Adopt previously orphaned RCU callbacks.
+ */
+static void rcu_adopt_orphan_cbs(struct rcu_state *rsp)
+{
+	unsigned long flags;
+	struct rcu_data *rdp;
+
+	spin_lock_irqsave(&rsp->onofflock, flags);
+	rdp = rsp->rda[smp_processor_id()];
+	if (rsp->orphan_cbs_list == NULL) {
+		spin_unlock_irqrestore(&rsp->onofflock, flags);
+		return;
+	}
+	*rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_cbs_list;
+	rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_cbs_tail;
+	rdp->qlen += rsp->orphan_qlen;
+	rsp->orphan_cbs_list = NULL;
+	rsp->orphan_cbs_tail = &rsp->orphan_cbs_list;
+	rsp->orphan_qlen = 0;
+	spin_unlock_irqrestore(&rsp->onofflock, flags);
+}
+
+/*
  * Remove the outgoing CPU from the bitmasks in the rcu_node hierarchy
  * and move all callbacks from the outgoing CPU to the current one.
  */
 static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
 {
-	int i;
 	unsigned long flags;
 	long lastcomp;
 	unsigned long mask;
 	struct rcu_data *rdp = rsp->rda[cpu];
-	struct rcu_data *rdp_me;
 	struct rcu_node *rnp;
 
 	/* Exclude any attempts to start a new grace period. */
@@ -871,32 +920,9 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
 	} while (rnp != NULL);
 	lastcomp = rsp->completed;
 
-	spin_unlock(&rsp->onofflock);		/* irqs remain disabled. */
+	spin_unlock_irqrestore(&rsp->onofflock, flags);
 
-	/*
-	 * Move callbacks from the outgoing CPU to the running CPU.
-	 * Note that the outgoing CPU is now quiescent, so it is now
-	 * (uncharacteristically) safe to access its rcu_data structure.
-	 * Note also that we must carefully retain the order of the
-	 * outgoing CPU's callbacks in order for rcu_barrier() to work
-	 * correctly.  Finally, note that we start all the callbacks
-	 * afresh, even those that have passed through a grace period
-	 * and are therefore ready to invoke.  The theory is that hotplug
-	 * events are rare, and that if they are frequent enough to
-	 * indefinitely delay callbacks, you have far worse things to
-	 * be worrying about.
-	 */
-	if (rdp->nxtlist != NULL) {
-		rdp_me = rsp->rda[smp_processor_id()];
-		*rdp_me->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist;
-		rdp_me->nxttail[RCU_NEXT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
-		rdp->nxtlist = NULL;
-		for (i = 0; i < RCU_NEXT_SIZE; i++)
-			rdp->nxttail[i] = &rdp->nxtlist;
-		rdp_me->qlen += rdp->qlen;
-		rdp->qlen = 0;
-	}
-	local_irq_restore(flags);
+	rcu_adopt_orphan_cbs(rsp);
 }
 
 /*
@@ -914,6 +940,14 @@ static void rcu_offline_cpu(int cpu)
 
 #else /* #ifdef CONFIG_HOTPLUG_CPU */
 
+static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp)
+{
+}
+
+static void rcu_adopt_orphan_cbs(struct rcu_state *rsp)
+{
+}
+
 static void rcu_offline_cpu(int cpu)
 {
 }
@@ -1367,9 +1401,6 @@ static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL};
 static atomic_t rcu_barrier_cpu_count;
 static DEFINE_MUTEX(rcu_barrier_mutex);
 static struct completion rcu_barrier_completion;
-static atomic_t rcu_migrate_type_count = ATOMIC_INIT(0);
-static struct rcu_head rcu_migrate_head[3];
-static DECLARE_WAIT_QUEUE_HEAD(rcu_migrate_wq);
 
 static void rcu_barrier_callback(struct rcu_head *notused)
 {
@@ -1392,21 +1423,16 @@ static void rcu_barrier_func(void *type)
 	call_rcu_func(head, rcu_barrier_callback);
 }
 
-static inline void wait_migrated_callbacks(void)
-{
-	wait_event(rcu_migrate_wq, !atomic_read(&rcu_migrate_type_count));
-	smp_mb(); /* In case we didn't sleep. */
-}
-
 /*
  * Orchestrate the specified type of RCU barrier, waiting for all
  * RCU callbacks of the specified type to complete.
  */
-static void _rcu_barrier(void (*call_rcu_func)(struct rcu_head *head,
+static void _rcu_barrier(struct rcu_state *rsp,
+			 void (*call_rcu_func)(struct rcu_head *head,
 					       void (*func)(struct rcu_head *head)))
 {
 	BUG_ON(in_interrupt());
-	/* Take cpucontrol mutex to protect against CPU hotplug */
+	/* Take mutex to serialize concurrent rcu_barrier() requests. */
 	mutex_lock(&rcu_barrier_mutex);
 	init_completion(&rcu_barrier_completion);
 	/*
@@ -1419,29 +1445,22 @@ static void _rcu_barrier(void (*call_rcu_func)(struct rcu_head *head,
 	 * early.
 	 */
 	atomic_set(&rcu_barrier_cpu_count, 1);
+	preempt_disable(); /* stop CPU_DYING from filling orphan_cbs_list */
+	rcu_adopt_orphan_cbs(rsp);
 	on_each_cpu(rcu_barrier_func, (void *)call_rcu_func, 1);
+	preempt_enable(); /* CPU_DYING can again fill orphan_cbs_list */
 	if (atomic_dec_and_test(&rcu_barrier_cpu_count))
 		complete(&rcu_barrier_completion);
 	wait_for_completion(&rcu_barrier_completion);
 	mutex_unlock(&rcu_barrier_mutex);
-	wait_migrated_callbacks();
-}
-
-/**
- * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
- */
-void rcu_barrier(void)
-{
-	_rcu_barrier(call_rcu);
 }
-EXPORT_SYMBOL_GPL(rcu_barrier);
 
 /**
  * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete.
  */
 void rcu_barrier_bh(void)
 {
-	_rcu_barrier(call_rcu_bh);
+	_rcu_barrier(&rcu_bh_state, call_rcu_bh);
 }
 EXPORT_SYMBOL_GPL(rcu_barrier_bh);
 
@@ -1450,16 +1469,10 @@ EXPORT_SYMBOL_GPL(rcu_barrier_bh);
  */
 void rcu_barrier_sched(void)
 {
-	_rcu_barrier(call_rcu_sched);
+	_rcu_barrier(&rcu_sched_state, call_rcu_sched);
 }
 EXPORT_SYMBOL_GPL(rcu_barrier_sched);
 
-static void rcu_migrate_callback(struct rcu_head *notused)
-{
-	if (atomic_dec_and_test(&rcu_migrate_type_count))
-		wake_up(&rcu_migrate_wq);
-}
-
 /*
  * Do boot-time initialization of a CPU's per-CPU RCU data.
  */
@@ -1556,27 +1569,21 @@ int __cpuinit rcu_cpu_notify(struct notifier_block *self,
 	case CPU_UP_PREPARE_FROZEN:
 		rcu_online_cpu(cpu);
 		break;
-	case CPU_DOWN_PREPARE:
-	case CPU_DOWN_PREPARE_FROZEN:
-		/* Don't need to wait until next removal operation. */
-		/* rcu_migrate_head is protected by cpu_add_remove_lock */
-		wait_migrated_callbacks();
-		break;
 	case CPU_DYING:
 	case CPU_DYING_FROZEN:
 		/*
-		 * preempt_disable() in on_each_cpu() prevents stop_machine(),
+		 * preempt_disable() in _rcu_barrier() prevents stop_machine(),
 		 * so when "on_each_cpu(rcu_barrier_func, (void *)type, 1);"
-		 * returns, all online cpus have queued rcu_barrier_func(),
-		 * and the dead cpu(if it exist) queues rcu_migrate_callback()s.
-		 *
-		 * These callbacks ensure _rcu_barrier() waits for all
-		 * RCU callbacks of the specified type to complete.
+		 * returns, all online cpus have queued rcu_barrier_func().
+		 * The dying CPU clears its cpu_online_mask bit and
+		 * moves all of its RCU callbacks to ->orphan_cbs_list
+		 * in the context of stop_machine(), so subsequent calls
+		 * to _rcu_barrier() will adopt these callbacks and only
+		 * then queue rcu_barrier_func() on all remaining CPUs.
 		 */
-		atomic_set(&rcu_migrate_type_count, 3);
-		call_rcu_bh(rcu_migrate_head, rcu_migrate_callback);
-		call_rcu_sched(rcu_migrate_head + 1, rcu_migrate_callback);
-		call_rcu(rcu_migrate_head + 2, rcu_migrate_callback);
+		rcu_send_cbs_to_orphanage(&rcu_bh_state);
+		rcu_send_cbs_to_orphanage(&rcu_sched_state);
+		rcu_preempt_send_cbs_to_orphanage();
 		break;
 	case CPU_DEAD:
 	case CPU_DEAD_FROZEN:
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 676eecd..b40ac57 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -244,7 +244,15 @@ struct rcu_state {
 	/* End  of fields guarded by root rcu_node's lock. */
 
 	spinlock_t onofflock;			/* exclude on/offline and */
-						/*  starting new GP. */
+						/*  starting new GP.  Also */
+						/*  protects the following */
+						/*  orphan_cbs fields. */
+	struct rcu_head *orphan_cbs_list;	/* list of rcu_head structs */
+						/*  orphaned by all CPUs in */
+						/*  a given leaf rcu_node */
+						/*  going offline. */
+	struct rcu_head **orphan_cbs_tail;	/* And tail pointer. */
+	long orphan_qlen;			/* Number of orphaned cbs. */
 	spinlock_t fqslock;			/* Only one task forcing */
 						/*  quiescent states. */
 	unsigned long jiffies_force_qs;		/* Time at which to invoke */
@@ -305,6 +313,7 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
 static int rcu_preempt_pending(int cpu);
 static int rcu_preempt_needs_cpu(int cpu);
 static void __cpuinit rcu_preempt_init_percpu_data(int cpu);
+static void rcu_preempt_send_cbs_to_orphanage(void);
 static void __init __rcu_init_preempt(void);
 
 #endif /* #else #ifdef RCU_TREE_NONCORE */
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 57200fe..c0cb783 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -410,6 +410,15 @@ static int rcu_preempt_needs_cpu(int cpu)
 	return !!per_cpu(rcu_preempt_data, cpu).nxtlist;
 }
 
+/**
+ * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
+ */
+void rcu_barrier(void)
+{
+	_rcu_barrier(&rcu_preempt_state, call_rcu);
+}
+EXPORT_SYMBOL_GPL(rcu_barrier);
+
 /*
  * Initialize preemptable RCU's per-CPU data.
  */
@@ -419,6 +428,14 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
 }
 
 /*
+ * Move preemptable RCU's callbacks to ->orphan_cbs_list.
+ */
+static void rcu_preempt_send_cbs_to_orphanage(void)
+{
+	rcu_send_cbs_to_orphanage(&rcu_preempt_state);
+}
+
+/*
  * Initialize preemptable RCU's state structures.
  */
 static void __init __rcu_init_preempt(void)
@@ -564,6 +581,16 @@ static int rcu_preempt_needs_cpu(int cpu)
 }
 
 /*
+ * Because preemptable RCU does not exist, rcu_barrier() is just
+ * another name for rcu_barrier_sched().
+ */
+void rcu_barrier(void)
+{
+	rcu_barrier_sched();
+}
+EXPORT_SYMBOL_GPL(rcu_barrier);
+
+/*
  * Because preemptable RCU does not exist, there is no per-CPU
  * data to initialize.
  */
@@ -572,6 +599,13 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
 }
 
 /*
+ * Because there is no preemptable RCU, there are no callbacks to move.
+ */
+static void rcu_preempt_send_cbs_to_orphanage(void)
+{
+}
+
+/*
  * Because preemptable RCU does not exist, it need not be initialized.
  */
 static void __init __rcu_init_preempt(void)
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index f09af28..4b31c77 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -159,13 +159,13 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
 	struct rcu_node *rnp;
 
 	seq_printf(m, "c=%ld g=%ld s=%d jfq=%ld j=%x "
-		      "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu\n",
+		      "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld\n",
 		   rsp->completed, rsp->gpnum, rsp->signaled,
 		   (long)(rsp->jiffies_force_qs - jiffies),
 		   (int)(jiffies & 0xffff),
 		   rsp->n_force_qs, rsp->n_force_qs_ngp,
 		   rsp->n_force_qs - rsp->n_force_qs_ngp,
-		   rsp->n_force_qs_lh);
+		   rsp->n_force_qs_lh, rsp->orphan_qlen);
 	for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < NUM_RCU_NODES; rnp++) {
 		if (rnp->level != level) {
 			seq_puts(m, "\n");
-- 
1.5.2.5


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH tip/core/rcu 3/3] rcu: place root rcu_node structure in separate lockdep class
  2009-10-07  4:47 [PATCH tip/core/rcu 0/3] rcu: prevent hangs by simplifying rcu_barrier/CPU-hotplug, fix lockdep complaint Paul E. McKenney
  2009-10-07  4:48 ` [PATCH tip/core/rcu 1/3] rcu: move rcu_barrier() to rcutree Paul E. McKenney
  2009-10-07  4:48 ` [PATCH tip/core/rcu 2/3] rcu: make hot-unplugged CPU relinquish its own RCU callbacks Paul E. McKenney
@ 2009-10-07  4:48 ` Paul E. McKenney
  2009-10-07  6:17   ` [tip:core/rcu] rcu: Place " tip-bot for Paul E. McKenney
  2009-10-08  9:31   ` [PATCH tip/core/rcu 3/3] rcu: place " Peter Zijlstra
  2009-10-07  6:14 ` [PATCH tip/core/rcu 0/3] rcu: prevent hangs by simplifying rcu_barrier/CPU-hotplug, fix lockdep complaint Ingo Molnar
  3 siblings, 2 replies; 13+ messages in thread
From: Paul E. McKenney @ 2009-10-07  4:48 UTC (permalink / raw)
  To: linux-kernel
  Cc: mingo, laijs, dipankar, akpm, mathieu.desnoyers, josh, dvhltc,
	niv, tglx, peterz, rostedt, Valdis.Kletnieks, dhowells,
	Paul E. McKenney

From: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

Before this patch, all of the rcu_node structures were in the same lockdep
class, so that lockdep would complain when rcu_preempt_offline_tasks()
acquired the root rcu_node structure's lock while holding one of the leaf
rcu_nodes' locks.  This patch changes rcu_init_one() to use a separate
spin_lock_init() for the root rcu_node structure's lock than is used for
that of all of the rest of the rcu_node structures, which puts the root
rcu_node structure's lock in its own lockdep class.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcutree.c |    4 +++-
 1 files changed, 3 insertions(+), 1 deletions(-)

diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index d8d9865..705f02a 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1647,7 +1647,8 @@ static void __init rcu_init_one(struct rcu_state *rsp)
 		cpustride *= rsp->levelspread[i];
 		rnp = rsp->level[i];
 		for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) {
-			spin_lock_init(&rnp->lock);
+			if (rnp != rcu_get_root(rsp))
+				spin_lock_init(&rnp->lock);
 			rnp->gpnum = 0;
 			rnp->qsmask = 0;
 			rnp->qsmaskinit = 0;
@@ -1670,6 +1671,7 @@ static void __init rcu_init_one(struct rcu_state *rsp)
 			INIT_LIST_HEAD(&rnp->blocked_tasks[1]);
 		}
 	}
+	spin_lock_init(&rcu_get_root(rsp)->lock);
 }
 
 /*
-- 
1.5.2.5


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH tip/core/rcu 0/3] rcu: prevent hangs by simplifying rcu_barrier/CPU-hotplug, fix lockdep complaint
  2009-10-07  4:47 [PATCH tip/core/rcu 0/3] rcu: prevent hangs by simplifying rcu_barrier/CPU-hotplug, fix lockdep complaint Paul E. McKenney
                   ` (2 preceding siblings ...)
  2009-10-07  4:48 ` [PATCH tip/core/rcu 3/3] rcu: place root rcu_node structure in separate lockdep class Paul E. McKenney
@ 2009-10-07  6:14 ` Ingo Molnar
  2009-10-07 13:52   ` Paul E. McKenney
  3 siblings, 1 reply; 13+ messages in thread
From: Ingo Molnar @ 2009-10-07  6:14 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: linux-kernel, laijs, dipankar, akpm, mathieu.desnoyers, josh,
	dvhltc, niv, tglx, peterz, rostedt, Valdis.Kletnieks, dhowells


* Paul E. McKenney <paulmck@linux.vnet.ibm.com> wrote:

> This patchset fixes a couple of issues with TREE_PREEMPT_RCU:
> 
> 1.	The current implementation of rcu_barrier() waits for any
> 	prior CPU-hotplug operation to complete.  This code path has
> 	resulted in some hangs, so first move this code from rcupdate.c
> 	to rcutree.c and then use a simpler algorithm that avoids the
> 	waiting, eliminating the possibility of such hangs.
> 
> 2.	The lockdep facility complains from time to time due to the
> 	fact that certain combinations of CPU-hotplug operations can
> 	cause the TREE_PREEMPT_RCU code to acquire the root rcu_node
> 	structure's lock while holding a leaf rcu_node structure's
> 	lock.  This patch places the root rcu_node structure's lock
> 	into its own lockdep class to prevent this false positive.
> 
> Ingo, please rewind tip/core/rcu to commit #135c8aea before applying this
> series, as these patches really need to go into 2.6.32.  Reworked and
> retested rcutiny (for 2.6.33, as Linus requested) will follow in a
> few days.  Or more patches for more bug fixes, as the case may be.  ;-)
> 
>  b/kernel/rcupdate.c       |  120 -----------------------------------
>  b/kernel/rcutree.c        |  120 +++++++++++++++++++++++++++++++++++
>  b/kernel/rcutree.h        |   11 ++-
>  b/kernel/rcutree_plugin.h |   34 ++++++++++
>  b/kernel/rcutree_trace.c  |    5 -
>  kernel/rcutree.c          |  156 ++++++++++++++++++++++++----------------------
>  6 files changed, 248 insertions(+), 198 deletions(-)

Applied, thanks Paul!

I dropped these commits for now:

 3ffea79: rcu: Make hot-unplugged CPU relinquish its own RCU callbacks
 eddd962: rcu: Move rcu_barrier() to rcutree, make lightweight rcu_barrier() for rcutiny
 a39e7d5: rcu-tiny: The Bloatwatch Edition, v6

We can apply rcu-tiny for .33 again, once the dust has settled.

	Ingo

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [tip:core/rcu] rcu: Move rcu_barrier() to rcutree
  2009-10-07  4:48 ` [PATCH tip/core/rcu 1/3] rcu: move rcu_barrier() to rcutree Paul E. McKenney
@ 2009-10-07  6:16   ` tip-bot for Paul E. McKenney
  0 siblings, 0 replies; 13+ messages in thread
From: tip-bot for Paul E. McKenney @ 2009-10-07  6:16 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: linux-kernel, paulmck, hpa, mingo, tglx, mingo

Commit-ID:  d0ec774cb2599c858be9d923bb873cf6697520d8
Gitweb:     http://git.kernel.org/tip/d0ec774cb2599c858be9d923bb873cf6697520d8
Author:     Paul E. McKenney <paulmck@linux.vnet.ibm.com>
AuthorDate: Tue, 6 Oct 2009 21:48:16 -0700
Committer:  Ingo Molnar <mingo@elte.hu>
CommitDate: Wed, 7 Oct 2009 08:11:20 +0200

rcu: Move rcu_barrier() to rcutree

Move the existing rcu_barrier() implementation to rcutree.c,
consistent with the fact that the rcu_barrier() implementation is
tied quite tightly to the RCU implementation.

This opens the way to simplify and fix rcutree.c's rcu_barrier()
implementation in a later patch.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: akpm@linux-foundation.org
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
LKML-Reference: <12548908982563-git-send-email->
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/rcupdate.c |  120 +----------------------------------------------------
 kernel/rcutree.c  |  119 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 120 insertions(+), 119 deletions(-)

diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index e432422..4001833 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -53,16 +53,8 @@ struct lockdep_map rcu_lock_map =
 EXPORT_SYMBOL_GPL(rcu_lock_map);
 #endif
 
-static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL};
-static atomic_t rcu_barrier_cpu_count;
-static DEFINE_MUTEX(rcu_barrier_mutex);
-static struct completion rcu_barrier_completion;
 int rcu_scheduler_active __read_mostly;
 
-static atomic_t rcu_migrate_type_count = ATOMIC_INIT(0);
-static struct rcu_head rcu_migrate_head[3];
-static DECLARE_WAIT_QUEUE_HEAD(rcu_migrate_wq);
-
 /*
  * Awaken the corresponding synchronize_rcu() instance now that a
  * grace period has elapsed.
@@ -165,120 +157,10 @@ void synchronize_rcu_bh(void)
 }
 EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
 
-static void rcu_barrier_callback(struct rcu_head *notused)
-{
-	if (atomic_dec_and_test(&rcu_barrier_cpu_count))
-		complete(&rcu_barrier_completion);
-}
-
-/*
- * Called with preemption disabled, and from cross-cpu IRQ context.
- */
-static void rcu_barrier_func(void *type)
-{
-	int cpu = smp_processor_id();
-	struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu);
-	void (*call_rcu_func)(struct rcu_head *head,
-			      void (*func)(struct rcu_head *head));
-
-	atomic_inc(&rcu_barrier_cpu_count);
-	call_rcu_func = type;
-	call_rcu_func(head, rcu_barrier_callback);
-}
-
-static inline void wait_migrated_callbacks(void)
-{
-	wait_event(rcu_migrate_wq, !atomic_read(&rcu_migrate_type_count));
-	smp_mb(); /* In case we didn't sleep. */
-}
-
-/*
- * Orchestrate the specified type of RCU barrier, waiting for all
- * RCU callbacks of the specified type to complete.
- */
-static void _rcu_barrier(void (*call_rcu_func)(struct rcu_head *head,
-					       void (*func)(struct rcu_head *head)))
-{
-	BUG_ON(in_interrupt());
-	/* Take cpucontrol mutex to protect against CPU hotplug */
-	mutex_lock(&rcu_barrier_mutex);
-	init_completion(&rcu_barrier_completion);
-	/*
-	 * Initialize rcu_barrier_cpu_count to 1, then invoke
-	 * rcu_barrier_func() on each CPU, so that each CPU also has
-	 * incremented rcu_barrier_cpu_count.  Only then is it safe to
-	 * decrement rcu_barrier_cpu_count -- otherwise the first CPU
-	 * might complete its grace period before all of the other CPUs
-	 * did their increment, causing this function to return too
-	 * early.
-	 */
-	atomic_set(&rcu_barrier_cpu_count, 1);
-	on_each_cpu(rcu_barrier_func, (void *)call_rcu_func, 1);
-	if (atomic_dec_and_test(&rcu_barrier_cpu_count))
-		complete(&rcu_barrier_completion);
-	wait_for_completion(&rcu_barrier_completion);
-	mutex_unlock(&rcu_barrier_mutex);
-	wait_migrated_callbacks();
-}
-
-/**
- * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
- */
-void rcu_barrier(void)
-{
-	_rcu_barrier(call_rcu);
-}
-EXPORT_SYMBOL_GPL(rcu_barrier);
-
-/**
- * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete.
- */
-void rcu_barrier_bh(void)
-{
-	_rcu_barrier(call_rcu_bh);
-}
-EXPORT_SYMBOL_GPL(rcu_barrier_bh);
-
-/**
- * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks.
- */
-void rcu_barrier_sched(void)
-{
-	_rcu_barrier(call_rcu_sched);
-}
-EXPORT_SYMBOL_GPL(rcu_barrier_sched);
-
-static void rcu_migrate_callback(struct rcu_head *notused)
-{
-	if (atomic_dec_and_test(&rcu_migrate_type_count))
-		wake_up(&rcu_migrate_wq);
-}
-
 static int __cpuinit rcu_barrier_cpu_hotplug(struct notifier_block *self,
 		unsigned long action, void *hcpu)
 {
-	rcu_cpu_notify(self, action, hcpu);
-	if (action == CPU_DYING) {
-		/*
-		 * preempt_disable() in on_each_cpu() prevents stop_machine(),
-		 * so when "on_each_cpu(rcu_barrier_func, (void *)type, 1);"
-		 * returns, all online cpus have queued rcu_barrier_func(),
-		 * and the dead cpu(if it exist) queues rcu_migrate_callback()s.
-		 *
-		 * These callbacks ensure _rcu_barrier() waits for all
-		 * RCU callbacks of the specified type to complete.
-		 */
-		atomic_set(&rcu_migrate_type_count, 3);
-		call_rcu_bh(rcu_migrate_head, rcu_migrate_callback);
-		call_rcu_sched(rcu_migrate_head + 1, rcu_migrate_callback);
-		call_rcu(rcu_migrate_head + 2, rcu_migrate_callback);
-	} else if (action == CPU_DOWN_PREPARE) {
-		/* Don't need to wait until next removal operation. */
-		/* rcu_migrate_head is protected by cpu_add_remove_lock */
-		wait_migrated_callbacks();
-	}
-
-	return NOTIFY_OK;
+	return rcu_cpu_notify(self, action, hcpu);
 }
 
 void __init rcu_init(void)
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index e2e272b..0108570 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1363,6 +1363,103 @@ int rcu_needs_cpu(int cpu)
 	       rcu_preempt_needs_cpu(cpu);
 }
 
+static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL};
+static atomic_t rcu_barrier_cpu_count;
+static DEFINE_MUTEX(rcu_barrier_mutex);
+static struct completion rcu_barrier_completion;
+static atomic_t rcu_migrate_type_count = ATOMIC_INIT(0);
+static struct rcu_head rcu_migrate_head[3];
+static DECLARE_WAIT_QUEUE_HEAD(rcu_migrate_wq);
+
+static void rcu_barrier_callback(struct rcu_head *notused)
+{
+	if (atomic_dec_and_test(&rcu_barrier_cpu_count))
+		complete(&rcu_barrier_completion);
+}
+
+/*
+ * Called with preemption disabled, and from cross-cpu IRQ context.
+ */
+static void rcu_barrier_func(void *type)
+{
+	int cpu = smp_processor_id();
+	struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu);
+	void (*call_rcu_func)(struct rcu_head *head,
+			      void (*func)(struct rcu_head *head));
+
+	atomic_inc(&rcu_barrier_cpu_count);
+	call_rcu_func = type;
+	call_rcu_func(head, rcu_barrier_callback);
+}
+
+static inline void wait_migrated_callbacks(void)
+{
+	wait_event(rcu_migrate_wq, !atomic_read(&rcu_migrate_type_count));
+	smp_mb(); /* In case we didn't sleep. */
+}
+
+/*
+ * Orchestrate the specified type of RCU barrier, waiting for all
+ * RCU callbacks of the specified type to complete.
+ */
+static void _rcu_barrier(void (*call_rcu_func)(struct rcu_head *head,
+					       void (*func)(struct rcu_head *head)))
+{
+	BUG_ON(in_interrupt());
+	/* Take cpucontrol mutex to protect against CPU hotplug */
+	mutex_lock(&rcu_barrier_mutex);
+	init_completion(&rcu_barrier_completion);
+	/*
+	 * Initialize rcu_barrier_cpu_count to 1, then invoke
+	 * rcu_barrier_func() on each CPU, so that each CPU also has
+	 * incremented rcu_barrier_cpu_count.  Only then is it safe to
+	 * decrement rcu_barrier_cpu_count -- otherwise the first CPU
+	 * might complete its grace period before all of the other CPUs
+	 * did their increment, causing this function to return too
+	 * early.
+	 */
+	atomic_set(&rcu_barrier_cpu_count, 1);
+	on_each_cpu(rcu_barrier_func, (void *)call_rcu_func, 1);
+	if (atomic_dec_and_test(&rcu_barrier_cpu_count))
+		complete(&rcu_barrier_completion);
+	wait_for_completion(&rcu_barrier_completion);
+	mutex_unlock(&rcu_barrier_mutex);
+	wait_migrated_callbacks();
+}
+
+/**
+ * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
+ */
+void rcu_barrier(void)
+{
+	_rcu_barrier(call_rcu);
+}
+EXPORT_SYMBOL_GPL(rcu_barrier);
+
+/**
+ * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete.
+ */
+void rcu_barrier_bh(void)
+{
+	_rcu_barrier(call_rcu_bh);
+}
+EXPORT_SYMBOL_GPL(rcu_barrier_bh);
+
+/**
+ * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks.
+ */
+void rcu_barrier_sched(void)
+{
+	_rcu_barrier(call_rcu_sched);
+}
+EXPORT_SYMBOL_GPL(rcu_barrier_sched);
+
+static void rcu_migrate_callback(struct rcu_head *notused)
+{
+	if (atomic_dec_and_test(&rcu_migrate_type_count))
+		wake_up(&rcu_migrate_wq);
+}
+
 /*
  * Do boot-time initialization of a CPU's per-CPU RCU data.
  */
@@ -1459,6 +1556,28 @@ int __cpuinit rcu_cpu_notify(struct notifier_block *self,
 	case CPU_UP_PREPARE_FROZEN:
 		rcu_online_cpu(cpu);
 		break;
+	case CPU_DOWN_PREPARE:
+	case CPU_DOWN_PREPARE_FROZEN:
+		/* Don't need to wait until next removal operation. */
+		/* rcu_migrate_head is protected by cpu_add_remove_lock */
+		wait_migrated_callbacks();
+		break;
+	case CPU_DYING:
+	case CPU_DYING_FROZEN:
+		/*
+		 * preempt_disable() in on_each_cpu() prevents stop_machine(),
+		 * so when "on_each_cpu(rcu_barrier_func, (void *)type, 1);"
+		 * returns, all online cpus have queued rcu_barrier_func(),
+		 * and the dead cpu(if it exist) queues rcu_migrate_callback()s.
+		 *
+		 * These callbacks ensure _rcu_barrier() waits for all
+		 * RCU callbacks of the specified type to complete.
+		 */
+		atomic_set(&rcu_migrate_type_count, 3);
+		call_rcu_bh(rcu_migrate_head, rcu_migrate_callback);
+		call_rcu_sched(rcu_migrate_head + 1, rcu_migrate_callback);
+		call_rcu(rcu_migrate_head + 2, rcu_migrate_callback);
+		break;
 	case CPU_DEAD:
 	case CPU_DEAD_FROZEN:
 	case CPU_UP_CANCELED:

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [tip:core/rcu] rcu: Make hot-unplugged CPU relinquish its own RCU callbacks
  2009-10-07  4:48 ` [PATCH tip/core/rcu 2/3] rcu: make hot-unplugged CPU relinquish its own RCU callbacks Paul E. McKenney
@ 2009-10-07  6:17   ` tip-bot for Paul E. McKenney
  2009-10-07 13:16   ` [PATCH tip/core/rcu 2/3] rcu: make " Steven Rostedt
  1 sibling, 0 replies; 13+ messages in thread
From: tip-bot for Paul E. McKenney @ 2009-10-07  6:17 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: linux-kernel, paulmck, hpa, mingo, tglx, mingo

Commit-ID:  e74f4c4564455c91a3b4075bb1721993c2a95dda
Gitweb:     http://git.kernel.org/tip/e74f4c4564455c91a3b4075bb1721993c2a95dda
Author:     Paul E. McKenney <paulmck@linux.vnet.ibm.com>
AuthorDate: Tue, 6 Oct 2009 21:48:17 -0700
Committer:  Ingo Molnar <mingo@elte.hu>
CommitDate: Wed, 7 Oct 2009 08:11:20 +0200

rcu: Make hot-unplugged CPU relinquish its own RCU callbacks

The current interaction between RCU and CPU hotplug requires that
RCU block in CPU notifiers waiting for callbacks to drain.

This can be greatly simplified by having each CPU relinquish its
own callbacks, and for both _rcu_barrier() and CPU_DEAD notifiers
to adopt all callbacks that were previously relinquished.

This change also eliminates the possibility of certain types of
hangs due to the previous practice of waiting for callbacks to be
invoked from within CPU notifiers.  If you don't every wait, you
cannot hang.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: akpm@linux-foundation.org
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
LKML-Reference: <1254890898456-git-send-email->
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/rcutree.c        |  151 ++++++++++++++++++++++++----------------------
 kernel/rcutree.h        |   11 +++-
 kernel/rcutree_plugin.h |   34 +++++++++++
 kernel/rcutree_trace.c  |    4 +-
 4 files changed, 125 insertions(+), 75 deletions(-)

diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 0108570..d8d9865 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -63,6 +63,9 @@
 	.gpnum = -300, \
 	.completed = -300, \
 	.onofflock = __SPIN_LOCK_UNLOCKED(&name.onofflock), \
+	.orphan_cbs_list = NULL, \
+	.orphan_cbs_tail = &name.orphan_cbs_list, \
+	.orphan_qlen = 0, \
 	.fqslock = __SPIN_LOCK_UNLOCKED(&name.fqslock), \
 	.n_force_qs = 0, \
 	.n_force_qs_ngp = 0, \
@@ -838,17 +841,63 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
 #ifdef CONFIG_HOTPLUG_CPU
 
 /*
+ * Move a dying CPU's RCU callbacks to the ->orphan_cbs_list for the
+ * specified flavor of RCU.  The callbacks will be adopted by the next
+ * _rcu_barrier() invocation or by the CPU_DEAD notifier, whichever
+ * comes first.  Because this is invoked from the CPU_DYING notifier,
+ * irqs are already disabled.
+ */
+static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp)
+{
+	int i;
+	struct rcu_data *rdp = rsp->rda[smp_processor_id()];
+
+	if (rdp->nxtlist == NULL)
+		return;  /* irqs disabled, so comparison is stable. */
+	spin_lock(&rsp->onofflock);  /* irqs already disabled. */
+	*rsp->orphan_cbs_tail = rdp->nxtlist;
+	rsp->orphan_cbs_tail = rdp->nxttail[RCU_NEXT_TAIL];
+	rdp->nxtlist = NULL;
+	for (i = 0; i < RCU_NEXT_SIZE; i++)
+		rdp->nxttail[i] = &rdp->nxtlist;
+	rsp->orphan_qlen += rdp->qlen;
+	rdp->qlen = 0;
+	spin_unlock(&rsp->onofflock);  /* irqs remain disabled. */
+}
+
+/*
+ * Adopt previously orphaned RCU callbacks.
+ */
+static void rcu_adopt_orphan_cbs(struct rcu_state *rsp)
+{
+	unsigned long flags;
+	struct rcu_data *rdp;
+
+	spin_lock_irqsave(&rsp->onofflock, flags);
+	rdp = rsp->rda[smp_processor_id()];
+	if (rsp->orphan_cbs_list == NULL) {
+		spin_unlock_irqrestore(&rsp->onofflock, flags);
+		return;
+	}
+	*rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_cbs_list;
+	rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_cbs_tail;
+	rdp->qlen += rsp->orphan_qlen;
+	rsp->orphan_cbs_list = NULL;
+	rsp->orphan_cbs_tail = &rsp->orphan_cbs_list;
+	rsp->orphan_qlen = 0;
+	spin_unlock_irqrestore(&rsp->onofflock, flags);
+}
+
+/*
  * Remove the outgoing CPU from the bitmasks in the rcu_node hierarchy
  * and move all callbacks from the outgoing CPU to the current one.
  */
 static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
 {
-	int i;
 	unsigned long flags;
 	long lastcomp;
 	unsigned long mask;
 	struct rcu_data *rdp = rsp->rda[cpu];
-	struct rcu_data *rdp_me;
 	struct rcu_node *rnp;
 
 	/* Exclude any attempts to start a new grace period. */
@@ -871,32 +920,9 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
 	} while (rnp != NULL);
 	lastcomp = rsp->completed;
 
-	spin_unlock(&rsp->onofflock);		/* irqs remain disabled. */
+	spin_unlock_irqrestore(&rsp->onofflock, flags);
 
-	/*
-	 * Move callbacks from the outgoing CPU to the running CPU.
-	 * Note that the outgoing CPU is now quiescent, so it is now
-	 * (uncharacteristically) safe to access its rcu_data structure.
-	 * Note also that we must carefully retain the order of the
-	 * outgoing CPU's callbacks in order for rcu_barrier() to work
-	 * correctly.  Finally, note that we start all the callbacks
-	 * afresh, even those that have passed through a grace period
-	 * and are therefore ready to invoke.  The theory is that hotplug
-	 * events are rare, and that if they are frequent enough to
-	 * indefinitely delay callbacks, you have far worse things to
-	 * be worrying about.
-	 */
-	if (rdp->nxtlist != NULL) {
-		rdp_me = rsp->rda[smp_processor_id()];
-		*rdp_me->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist;
-		rdp_me->nxttail[RCU_NEXT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
-		rdp->nxtlist = NULL;
-		for (i = 0; i < RCU_NEXT_SIZE; i++)
-			rdp->nxttail[i] = &rdp->nxtlist;
-		rdp_me->qlen += rdp->qlen;
-		rdp->qlen = 0;
-	}
-	local_irq_restore(flags);
+	rcu_adopt_orphan_cbs(rsp);
 }
 
 /*
@@ -914,6 +940,14 @@ static void rcu_offline_cpu(int cpu)
 
 #else /* #ifdef CONFIG_HOTPLUG_CPU */
 
+static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp)
+{
+}
+
+static void rcu_adopt_orphan_cbs(struct rcu_state *rsp)
+{
+}
+
 static void rcu_offline_cpu(int cpu)
 {
 }
@@ -1367,9 +1401,6 @@ static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL};
 static atomic_t rcu_barrier_cpu_count;
 static DEFINE_MUTEX(rcu_barrier_mutex);
 static struct completion rcu_barrier_completion;
-static atomic_t rcu_migrate_type_count = ATOMIC_INIT(0);
-static struct rcu_head rcu_migrate_head[3];
-static DECLARE_WAIT_QUEUE_HEAD(rcu_migrate_wq);
 
 static void rcu_barrier_callback(struct rcu_head *notused)
 {
@@ -1392,21 +1423,16 @@ static void rcu_barrier_func(void *type)
 	call_rcu_func(head, rcu_barrier_callback);
 }
 
-static inline void wait_migrated_callbacks(void)
-{
-	wait_event(rcu_migrate_wq, !atomic_read(&rcu_migrate_type_count));
-	smp_mb(); /* In case we didn't sleep. */
-}
-
 /*
  * Orchestrate the specified type of RCU barrier, waiting for all
  * RCU callbacks of the specified type to complete.
  */
-static void _rcu_barrier(void (*call_rcu_func)(struct rcu_head *head,
+static void _rcu_barrier(struct rcu_state *rsp,
+			 void (*call_rcu_func)(struct rcu_head *head,
 					       void (*func)(struct rcu_head *head)))
 {
 	BUG_ON(in_interrupt());
-	/* Take cpucontrol mutex to protect against CPU hotplug */
+	/* Take mutex to serialize concurrent rcu_barrier() requests. */
 	mutex_lock(&rcu_barrier_mutex);
 	init_completion(&rcu_barrier_completion);
 	/*
@@ -1419,29 +1445,22 @@ static void _rcu_barrier(void (*call_rcu_func)(struct rcu_head *head,
 	 * early.
 	 */
 	atomic_set(&rcu_barrier_cpu_count, 1);
+	preempt_disable(); /* stop CPU_DYING from filling orphan_cbs_list */
+	rcu_adopt_orphan_cbs(rsp);
 	on_each_cpu(rcu_barrier_func, (void *)call_rcu_func, 1);
+	preempt_enable(); /* CPU_DYING can again fill orphan_cbs_list */
 	if (atomic_dec_and_test(&rcu_barrier_cpu_count))
 		complete(&rcu_barrier_completion);
 	wait_for_completion(&rcu_barrier_completion);
 	mutex_unlock(&rcu_barrier_mutex);
-	wait_migrated_callbacks();
-}
-
-/**
- * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
- */
-void rcu_barrier(void)
-{
-	_rcu_barrier(call_rcu);
 }
-EXPORT_SYMBOL_GPL(rcu_barrier);
 
 /**
  * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete.
  */
 void rcu_barrier_bh(void)
 {
-	_rcu_barrier(call_rcu_bh);
+	_rcu_barrier(&rcu_bh_state, call_rcu_bh);
 }
 EXPORT_SYMBOL_GPL(rcu_barrier_bh);
 
@@ -1450,16 +1469,10 @@ EXPORT_SYMBOL_GPL(rcu_barrier_bh);
  */
 void rcu_barrier_sched(void)
 {
-	_rcu_barrier(call_rcu_sched);
+	_rcu_barrier(&rcu_sched_state, call_rcu_sched);
 }
 EXPORT_SYMBOL_GPL(rcu_barrier_sched);
 
-static void rcu_migrate_callback(struct rcu_head *notused)
-{
-	if (atomic_dec_and_test(&rcu_migrate_type_count))
-		wake_up(&rcu_migrate_wq);
-}
-
 /*
  * Do boot-time initialization of a CPU's per-CPU RCU data.
  */
@@ -1556,27 +1569,21 @@ int __cpuinit rcu_cpu_notify(struct notifier_block *self,
 	case CPU_UP_PREPARE_FROZEN:
 		rcu_online_cpu(cpu);
 		break;
-	case CPU_DOWN_PREPARE:
-	case CPU_DOWN_PREPARE_FROZEN:
-		/* Don't need to wait until next removal operation. */
-		/* rcu_migrate_head is protected by cpu_add_remove_lock */
-		wait_migrated_callbacks();
-		break;
 	case CPU_DYING:
 	case CPU_DYING_FROZEN:
 		/*
-		 * preempt_disable() in on_each_cpu() prevents stop_machine(),
+		 * preempt_disable() in _rcu_barrier() prevents stop_machine(),
 		 * so when "on_each_cpu(rcu_barrier_func, (void *)type, 1);"
-		 * returns, all online cpus have queued rcu_barrier_func(),
-		 * and the dead cpu(if it exist) queues rcu_migrate_callback()s.
-		 *
-		 * These callbacks ensure _rcu_barrier() waits for all
-		 * RCU callbacks of the specified type to complete.
+		 * returns, all online cpus have queued rcu_barrier_func().
+		 * The dying CPU clears its cpu_online_mask bit and
+		 * moves all of its RCU callbacks to ->orphan_cbs_list
+		 * in the context of stop_machine(), so subsequent calls
+		 * to _rcu_barrier() will adopt these callbacks and only
+		 * then queue rcu_barrier_func() on all remaining CPUs.
 		 */
-		atomic_set(&rcu_migrate_type_count, 3);
-		call_rcu_bh(rcu_migrate_head, rcu_migrate_callback);
-		call_rcu_sched(rcu_migrate_head + 1, rcu_migrate_callback);
-		call_rcu(rcu_migrate_head + 2, rcu_migrate_callback);
+		rcu_send_cbs_to_orphanage(&rcu_bh_state);
+		rcu_send_cbs_to_orphanage(&rcu_sched_state);
+		rcu_preempt_send_cbs_to_orphanage();
 		break;
 	case CPU_DEAD:
 	case CPU_DEAD_FROZEN:
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 676eecd..b40ac57 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -244,7 +244,15 @@ struct rcu_state {
 	/* End  of fields guarded by root rcu_node's lock. */
 
 	spinlock_t onofflock;			/* exclude on/offline and */
-						/*  starting new GP. */
+						/*  starting new GP.  Also */
+						/*  protects the following */
+						/*  orphan_cbs fields. */
+	struct rcu_head *orphan_cbs_list;	/* list of rcu_head structs */
+						/*  orphaned by all CPUs in */
+						/*  a given leaf rcu_node */
+						/*  going offline. */
+	struct rcu_head **orphan_cbs_tail;	/* And tail pointer. */
+	long orphan_qlen;			/* Number of orphaned cbs. */
 	spinlock_t fqslock;			/* Only one task forcing */
 						/*  quiescent states. */
 	unsigned long jiffies_force_qs;		/* Time at which to invoke */
@@ -305,6 +313,7 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
 static int rcu_preempt_pending(int cpu);
 static int rcu_preempt_needs_cpu(int cpu);
 static void __cpuinit rcu_preempt_init_percpu_data(int cpu);
+static void rcu_preempt_send_cbs_to_orphanage(void);
 static void __init __rcu_init_preempt(void);
 
 #endif /* #else #ifdef RCU_TREE_NONCORE */
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 57200fe..c0cb783 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -410,6 +410,15 @@ static int rcu_preempt_needs_cpu(int cpu)
 	return !!per_cpu(rcu_preempt_data, cpu).nxtlist;
 }
 
+/**
+ * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
+ */
+void rcu_barrier(void)
+{
+	_rcu_barrier(&rcu_preempt_state, call_rcu);
+}
+EXPORT_SYMBOL_GPL(rcu_barrier);
+
 /*
  * Initialize preemptable RCU's per-CPU data.
  */
@@ -419,6 +428,14 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
 }
 
 /*
+ * Move preemptable RCU's callbacks to ->orphan_cbs_list.
+ */
+static void rcu_preempt_send_cbs_to_orphanage(void)
+{
+	rcu_send_cbs_to_orphanage(&rcu_preempt_state);
+}
+
+/*
  * Initialize preemptable RCU's state structures.
  */
 static void __init __rcu_init_preempt(void)
@@ -564,6 +581,16 @@ static int rcu_preempt_needs_cpu(int cpu)
 }
 
 /*
+ * Because preemptable RCU does not exist, rcu_barrier() is just
+ * another name for rcu_barrier_sched().
+ */
+void rcu_barrier(void)
+{
+	rcu_barrier_sched();
+}
+EXPORT_SYMBOL_GPL(rcu_barrier);
+
+/*
  * Because preemptable RCU does not exist, there is no per-CPU
  * data to initialize.
  */
@@ -572,6 +599,13 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
 }
 
 /*
+ * Because there is no preemptable RCU, there are no callbacks to move.
+ */
+static void rcu_preempt_send_cbs_to_orphanage(void)
+{
+}
+
+/*
  * Because preemptable RCU does not exist, it need not be initialized.
  */
 static void __init __rcu_init_preempt(void)
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index f09af28..4b31c77 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -159,13 +159,13 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
 	struct rcu_node *rnp;
 
 	seq_printf(m, "c=%ld g=%ld s=%d jfq=%ld j=%x "
-		      "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu\n",
+		      "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld\n",
 		   rsp->completed, rsp->gpnum, rsp->signaled,
 		   (long)(rsp->jiffies_force_qs - jiffies),
 		   (int)(jiffies & 0xffff),
 		   rsp->n_force_qs, rsp->n_force_qs_ngp,
 		   rsp->n_force_qs - rsp->n_force_qs_ngp,
-		   rsp->n_force_qs_lh);
+		   rsp->n_force_qs_lh, rsp->orphan_qlen);
 	for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < NUM_RCU_NODES; rnp++) {
 		if (rnp->level != level) {
 			seq_puts(m, "\n");

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [tip:core/rcu] rcu: Place root rcu_node structure in separate lockdep class
  2009-10-07  4:48 ` [PATCH tip/core/rcu 3/3] rcu: place root rcu_node structure in separate lockdep class Paul E. McKenney
@ 2009-10-07  6:17   ` tip-bot for Paul E. McKenney
  2009-10-08  9:31   ` [PATCH tip/core/rcu 3/3] rcu: place " Peter Zijlstra
  1 sibling, 0 replies; 13+ messages in thread
From: tip-bot for Paul E. McKenney @ 2009-10-07  6:17 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: linux-kernel, paulmck, hpa, mingo, peterz, tglx, mingo

Commit-ID:  978c0b88146a7f9b364b71b5b83c5b12e7b413d7
Gitweb:     http://git.kernel.org/tip/978c0b88146a7f9b364b71b5b83c5b12e7b413d7
Author:     Paul E. McKenney <paulmck@linux.vnet.ibm.com>
AuthorDate: Tue, 6 Oct 2009 21:48:18 -0700
Committer:  Ingo Molnar <mingo@elte.hu>
CommitDate: Wed, 7 Oct 2009 08:11:21 +0200

rcu: Place root rcu_node structure in separate lockdep class

Before this patch, all of the rcu_node structures were in the same lockdep
class, so that lockdep would complain when rcu_preempt_offline_tasks()
acquired the root rcu_node structure's lock while holding one of the leaf
rcu_nodes' locks.

This patch changes rcu_init_one() to use a separate
spin_lock_init() for the root rcu_node structure's lock than is
used for that of all of the rest of the rcu_node structures, which
puts the root rcu_node structure's lock in its own lockdep class.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: akpm@linux-foundation.org
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
LKML-Reference: <12548908983277-git-send-email->
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/rcutree.c |    4 +++-
 1 files changed, 3 insertions(+), 1 deletions(-)

diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index d8d9865..705f02a 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1647,7 +1647,8 @@ static void __init rcu_init_one(struct rcu_state *rsp)
 		cpustride *= rsp->levelspread[i];
 		rnp = rsp->level[i];
 		for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) {
-			spin_lock_init(&rnp->lock);
+			if (rnp != rcu_get_root(rsp))
+				spin_lock_init(&rnp->lock);
 			rnp->gpnum = 0;
 			rnp->qsmask = 0;
 			rnp->qsmaskinit = 0;
@@ -1670,6 +1671,7 @@ static void __init rcu_init_one(struct rcu_state *rsp)
 			INIT_LIST_HEAD(&rnp->blocked_tasks[1]);
 		}
 	}
+	spin_lock_init(&rcu_get_root(rsp)->lock);
 }
 
 /*

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH tip/core/rcu 2/3] rcu: make hot-unplugged CPU relinquish its own RCU callbacks
@ 2009-10-07 12:55 Mathieu Desnoyers
  0 siblings, 0 replies; 13+ messages in thread
From: Mathieu Desnoyers @ 2009-10-07 12:55 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: linux-kernel, mingo, laijs, dipankar, akpm, josh, dvhltc, niv,
	tglx, peterz, rostedt, Valdis.Kletnieks, dhowells

* Paul E. McKenney (paulmck@linux.vnet.ibm.com) wrote:
> From: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> 
> The current interaction between RCU and CPU hotplug requires that
> RCU block in CPU notifiers waiting for callbacks to drain.  This can
> be greatly simplified by haing each CPU relinquish its own callbacks,
> and for both _rcu_barrier() and CPU_DEAD notifiers to adopt all callbacks
> that were previously relinquished.  This change also eliminates the
> possibility of certain types of hangs due to the previous practice of
> waiting for callbacks to be invoked from within CPU notifiers.  If you
> don't every wait, you cannot hang.
> 
> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

Acked-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>

> ---
>  kernel/rcutree.c        |  151 ++++++++++++++++++++++++----------------------
>  kernel/rcutree.h        |   11 +++-
>  kernel/rcutree_plugin.h |   34 +++++++++++
>  kernel/rcutree_trace.c  |    4 +-
>  4 files changed, 125 insertions(+), 75 deletions(-)
> 
> diff --git a/kernel/rcutree.c b/kernel/rcutree.c
> index 0108570..d8d9865 100644
> --- a/kernel/rcutree.c
> +++ b/kernel/rcutree.c
> @@ -63,6 +63,9 @@
>  	.gpnum = -300, \
>  	.completed = -300, \
>  	.onofflock = __SPIN_LOCK_UNLOCKED(&name.onofflock), \
> +	.orphan_cbs_list = NULL, \
> +	.orphan_cbs_tail = &name.orphan_cbs_list, \
> +	.orphan_qlen = 0, \
>  	.fqslock = __SPIN_LOCK_UNLOCKED(&name.fqslock), \
>  	.n_force_qs = 0, \
>  	.n_force_qs_ngp = 0, \
> @@ -838,17 +841,63 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
>  #ifdef CONFIG_HOTPLUG_CPU
>  
>  /*
> + * Move a dying CPU's RCU callbacks to the ->orphan_cbs_list for the
> + * specified flavor of RCU.  The callbacks will be adopted by the next
> + * _rcu_barrier() invocation or by the CPU_DEAD notifier, whichever
> + * comes first.  Because this is invoked from the CPU_DYING notifier,
> + * irqs are already disabled.
> + */
> +static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp)
> +{
> +	int i;
> +	struct rcu_data *rdp = rsp->rda[smp_processor_id()];
> +
> +	if (rdp->nxtlist == NULL)
> +		return;  /* irqs disabled, so comparison is stable. */
> +	spin_lock(&rsp->onofflock);  /* irqs already disabled. */
> +	*rsp->orphan_cbs_tail = rdp->nxtlist;
> +	rsp->orphan_cbs_tail = rdp->nxttail[RCU_NEXT_TAIL];
> +	rdp->nxtlist = NULL;
> +	for (i = 0; i < RCU_NEXT_SIZE; i++)
> +		rdp->nxttail[i] = &rdp->nxtlist;
> +	rsp->orphan_qlen += rdp->qlen;
> +	rdp->qlen = 0;
> +	spin_unlock(&rsp->onofflock);  /* irqs remain disabled. */
> +}
> +
> +/*
> + * Adopt previously orphaned RCU callbacks.
> + */
> +static void rcu_adopt_orphan_cbs(struct rcu_state *rsp)
> +{
> +	unsigned long flags;
> +	struct rcu_data *rdp;
> +
> +	spin_lock_irqsave(&rsp->onofflock, flags);
> +	rdp = rsp->rda[smp_processor_id()];
> +	if (rsp->orphan_cbs_list == NULL) {
> +		spin_unlock_irqrestore(&rsp->onofflock, flags);
> +		return;
> +	}
> +	*rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_cbs_list;
> +	rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_cbs_tail;
> +	rdp->qlen += rsp->orphan_qlen;
> +	rsp->orphan_cbs_list = NULL;
> +	rsp->orphan_cbs_tail = &rsp->orphan_cbs_list;
> +	rsp->orphan_qlen = 0;
> +	spin_unlock_irqrestore(&rsp->onofflock, flags);
> +}
> +
> +/*
>   * Remove the outgoing CPU from the bitmasks in the rcu_node hierarchy
>   * and move all callbacks from the outgoing CPU to the current one.
>   */
>  static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
>  {
> -	int i;
>  	unsigned long flags;
>  	long lastcomp;
>  	unsigned long mask;
>  	struct rcu_data *rdp = rsp->rda[cpu];
> -	struct rcu_data *rdp_me;
>  	struct rcu_node *rnp;
>  
>  	/* Exclude any attempts to start a new grace period. */
> @@ -871,32 +920,9 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
>  	} while (rnp != NULL);
>  	lastcomp = rsp->completed;
>  
> -	spin_unlock(&rsp->onofflock);		/* irqs remain disabled. */
> +	spin_unlock_irqrestore(&rsp->onofflock, flags);
>  
> -	/*
> -	 * Move callbacks from the outgoing CPU to the running CPU.
> -	 * Note that the outgoing CPU is now quiescent, so it is now
> -	 * (uncharacteristically) safe to access its rcu_data structure.
> -	 * Note also that we must carefully retain the order of the
> -	 * outgoing CPU's callbacks in order for rcu_barrier() to work
> -	 * correctly.  Finally, note that we start all the callbacks
> -	 * afresh, even those that have passed through a grace period
> -	 * and are therefore ready to invoke.  The theory is that hotplug
> -	 * events are rare, and that if they are frequent enough to
> -	 * indefinitely delay callbacks, you have far worse things to
> -	 * be worrying about.
> -	 */
> -	if (rdp->nxtlist != NULL) {
> -		rdp_me = rsp->rda[smp_processor_id()];
> -		*rdp_me->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist;
> -		rdp_me->nxttail[RCU_NEXT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
> -		rdp->nxtlist = NULL;
> -		for (i = 0; i < RCU_NEXT_SIZE; i++)
> -			rdp->nxttail[i] = &rdp->nxtlist;
> -		rdp_me->qlen += rdp->qlen;
> -		rdp->qlen = 0;
> -	}
> -	local_irq_restore(flags);
> +	rcu_adopt_orphan_cbs(rsp);
>  }
>  
>  /*
> @@ -914,6 +940,14 @@ static void rcu_offline_cpu(int cpu)
>  
>  #else /* #ifdef CONFIG_HOTPLUG_CPU */
>  
> +static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp)
> +{
> +}
> +
> +static void rcu_adopt_orphan_cbs(struct rcu_state *rsp)
> +{
> +}
> +
>  static void rcu_offline_cpu(int cpu)
>  {
>  }
> @@ -1367,9 +1401,6 @@ static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL};
>  static atomic_t rcu_barrier_cpu_count;
>  static DEFINE_MUTEX(rcu_barrier_mutex);
>  static struct completion rcu_barrier_completion;
> -static atomic_t rcu_migrate_type_count = ATOMIC_INIT(0);
> -static struct rcu_head rcu_migrate_head[3];
> -static DECLARE_WAIT_QUEUE_HEAD(rcu_migrate_wq);
>  
>  static void rcu_barrier_callback(struct rcu_head *notused)
>  {
> @@ -1392,21 +1423,16 @@ static void rcu_barrier_func(void *type)
>  	call_rcu_func(head, rcu_barrier_callback);
>  }
>  
> -static inline void wait_migrated_callbacks(void)
> -{
> -	wait_event(rcu_migrate_wq, !atomic_read(&rcu_migrate_type_count));
> -	smp_mb(); /* In case we didn't sleep. */
> -}
> -
>  /*
>   * Orchestrate the specified type of RCU barrier, waiting for all
>   * RCU callbacks of the specified type to complete.
>   */
> -static void _rcu_barrier(void (*call_rcu_func)(struct rcu_head *head,
> +static void _rcu_barrier(struct rcu_state *rsp,
> +			 void (*call_rcu_func)(struct rcu_head *head,
>  					       void (*func)(struct rcu_head *head)))
>  {
>  	BUG_ON(in_interrupt());
> -	/* Take cpucontrol mutex to protect against CPU hotplug */
> +	/* Take mutex to serialize concurrent rcu_barrier() requests. */
>  	mutex_lock(&rcu_barrier_mutex);
>  	init_completion(&rcu_barrier_completion);
>  	/*
> @@ -1419,29 +1445,22 @@ static void _rcu_barrier(void (*call_rcu_func)(struct rcu_head *head,
>  	 * early.
>  	 */
>  	atomic_set(&rcu_barrier_cpu_count, 1);
> +	preempt_disable(); /* stop CPU_DYING from filling orphan_cbs_list */
> +	rcu_adopt_orphan_cbs(rsp);
>  	on_each_cpu(rcu_barrier_func, (void *)call_rcu_func, 1);
> +	preempt_enable(); /* CPU_DYING can again fill orphan_cbs_list */
>  	if (atomic_dec_and_test(&rcu_barrier_cpu_count))
>  		complete(&rcu_barrier_completion);
>  	wait_for_completion(&rcu_barrier_completion);
>  	mutex_unlock(&rcu_barrier_mutex);
> -	wait_migrated_callbacks();
> -}
> -
> -/**
> - * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
> - */
> -void rcu_barrier(void)
> -{
> -	_rcu_barrier(call_rcu);
>  }
> -EXPORT_SYMBOL_GPL(rcu_barrier);
>  
>  /**
>   * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete.
>   */
>  void rcu_barrier_bh(void)
>  {
> -	_rcu_barrier(call_rcu_bh);
> +	_rcu_barrier(&rcu_bh_state, call_rcu_bh);
>  }
>  EXPORT_SYMBOL_GPL(rcu_barrier_bh);
>  
> @@ -1450,16 +1469,10 @@ EXPORT_SYMBOL_GPL(rcu_barrier_bh);
>   */
>  void rcu_barrier_sched(void)
>  {
> -	_rcu_barrier(call_rcu_sched);
> +	_rcu_barrier(&rcu_sched_state, call_rcu_sched);
>  }
>  EXPORT_SYMBOL_GPL(rcu_barrier_sched);
>  
> -static void rcu_migrate_callback(struct rcu_head *notused)
> -{
> -	if (atomic_dec_and_test(&rcu_migrate_type_count))
> -		wake_up(&rcu_migrate_wq);
> -}
> -
>  /*
>   * Do boot-time initialization of a CPU's per-CPU RCU data.
>   */
> @@ -1556,27 +1569,21 @@ int __cpuinit rcu_cpu_notify(struct notifier_block *self,
>  	case CPU_UP_PREPARE_FROZEN:
>  		rcu_online_cpu(cpu);
>  		break;
> -	case CPU_DOWN_PREPARE:
> -	case CPU_DOWN_PREPARE_FROZEN:
> -		/* Don't need to wait until next removal operation. */
> -		/* rcu_migrate_head is protected by cpu_add_remove_lock */
> -		wait_migrated_callbacks();
> -		break;
>  	case CPU_DYING:
>  	case CPU_DYING_FROZEN:
>  		/*
> -		 * preempt_disable() in on_each_cpu() prevents stop_machine(),
> +		 * preempt_disable() in _rcu_barrier() prevents stop_machine(),
>  		 * so when "on_each_cpu(rcu_barrier_func, (void *)type, 1);"
> -		 * returns, all online cpus have queued rcu_barrier_func(),
> -		 * and the dead cpu(if it exist) queues rcu_migrate_callback()s.
> -		 *
> -		 * These callbacks ensure _rcu_barrier() waits for all
> -		 * RCU callbacks of the specified type to complete.
> +		 * returns, all online cpus have queued rcu_barrier_func().
> +		 * The dying CPU clears its cpu_online_mask bit and
> +		 * moves all of its RCU callbacks to ->orphan_cbs_list
> +		 * in the context of stop_machine(), so subsequent calls
> +		 * to _rcu_barrier() will adopt these callbacks and only
> +		 * then queue rcu_barrier_func() on all remaining CPUs.
>  		 */
> -		atomic_set(&rcu_migrate_type_count, 3);
> -		call_rcu_bh(rcu_migrate_head, rcu_migrate_callback);
> -		call_rcu_sched(rcu_migrate_head + 1, rcu_migrate_callback);
> -		call_rcu(rcu_migrate_head + 2, rcu_migrate_callback);
> +		rcu_send_cbs_to_orphanage(&rcu_bh_state);
> +		rcu_send_cbs_to_orphanage(&rcu_sched_state);
> +		rcu_preempt_send_cbs_to_orphanage();
>  		break;
>  	case CPU_DEAD:
>  	case CPU_DEAD_FROZEN:
> diff --git a/kernel/rcutree.h b/kernel/rcutree.h
> index 676eecd..b40ac57 100644
> --- a/kernel/rcutree.h
> +++ b/kernel/rcutree.h
> @@ -244,7 +244,15 @@ struct rcu_state {
>  	/* End  of fields guarded by root rcu_node's lock. */
>  
>  	spinlock_t onofflock;			/* exclude on/offline and */
> -						/*  starting new GP. */
> +						/*  starting new GP.  Also */
> +						/*  protects the following */
> +						/*  orphan_cbs fields. */
> +	struct rcu_head *orphan_cbs_list;	/* list of rcu_head structs */
> +						/*  orphaned by all CPUs in */
> +						/*  a given leaf rcu_node */
> +						/*  going offline. */
> +	struct rcu_head **orphan_cbs_tail;	/* And tail pointer. */
> +	long orphan_qlen;			/* Number of orphaned cbs. */
>  	spinlock_t fqslock;			/* Only one task forcing */
>  						/*  quiescent states. */
>  	unsigned long jiffies_force_qs;		/* Time at which to invoke */
> @@ -305,6 +313,7 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
>  static int rcu_preempt_pending(int cpu);
>  static int rcu_preempt_needs_cpu(int cpu);
>  static void __cpuinit rcu_preempt_init_percpu_data(int cpu);
> +static void rcu_preempt_send_cbs_to_orphanage(void);
>  static void __init __rcu_init_preempt(void);
>  
>  #endif /* #else #ifdef RCU_TREE_NONCORE */
> diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
> index 57200fe..c0cb783 100644
> --- a/kernel/rcutree_plugin.h
> +++ b/kernel/rcutree_plugin.h
> @@ -410,6 +410,15 @@ static int rcu_preempt_needs_cpu(int cpu)
>  	return !!per_cpu(rcu_preempt_data, cpu).nxtlist;
>  }
>  
> +/**
> + * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
> + */
> +void rcu_barrier(void)
> +{
> +	_rcu_barrier(&rcu_preempt_state, call_rcu);
> +}
> +EXPORT_SYMBOL_GPL(rcu_barrier);
> +
>  /*
>   * Initialize preemptable RCU's per-CPU data.
>   */
> @@ -419,6 +428,14 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
>  }
>  
>  /*
> + * Move preemptable RCU's callbacks to ->orphan_cbs_list.
> + */
> +static void rcu_preempt_send_cbs_to_orphanage(void)
> +{
> +	rcu_send_cbs_to_orphanage(&rcu_preempt_state);
> +}
> +
> +/*
>   * Initialize preemptable RCU's state structures.
>   */
>  static void __init __rcu_init_preempt(void)
> @@ -564,6 +581,16 @@ static int rcu_preempt_needs_cpu(int cpu)
>  }
>  
>  /*
> + * Because preemptable RCU does not exist, rcu_barrier() is just
> + * another name for rcu_barrier_sched().
> + */
> +void rcu_barrier(void)
> +{
> +	rcu_barrier_sched();
> +}
> +EXPORT_SYMBOL_GPL(rcu_barrier);
> +
> +/*
>   * Because preemptable RCU does not exist, there is no per-CPU
>   * data to initialize.
>   */
> @@ -572,6 +599,13 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
>  }
>  
>  /*
> + * Because there is no preemptable RCU, there are no callbacks to move.
> + */
> +static void rcu_preempt_send_cbs_to_orphanage(void)
> +{
> +}
> +
> +/*
>   * Because preemptable RCU does not exist, it need not be initialized.
>   */
>  static void __init __rcu_init_preempt(void)
> diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
> index f09af28..4b31c77 100644
> --- a/kernel/rcutree_trace.c
> +++ b/kernel/rcutree_trace.c
> @@ -159,13 +159,13 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
>  	struct rcu_node *rnp;
>  
>  	seq_printf(m, "c=%ld g=%ld s=%d jfq=%ld j=%x "
> -		      "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu\n",
> +		      "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld\n",
>  		   rsp->completed, rsp->gpnum, rsp->signaled,
>  		   (long)(rsp->jiffies_force_qs - jiffies),
>  		   (int)(jiffies & 0xffff),
>  		   rsp->n_force_qs, rsp->n_force_qs_ngp,
>  		   rsp->n_force_qs - rsp->n_force_qs_ngp,
> -		   rsp->n_force_qs_lh);
> +		   rsp->n_force_qs_lh, rsp->orphan_qlen);
>  	for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < NUM_RCU_NODES; rnp++) {
>  		if (rnp->level != level) {
>  			seq_puts(m, "\n");
> -- 
> 1.5.2.5
> 

-- 
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH tip/core/rcu 2/3] rcu: make hot-unplugged CPU relinquish its own RCU callbacks
  2009-10-07  4:48 ` [PATCH tip/core/rcu 2/3] rcu: make hot-unplugged CPU relinquish its own RCU callbacks Paul E. McKenney
  2009-10-07  6:17   ` [tip:core/rcu] rcu: Make " tip-bot for Paul E. McKenney
@ 2009-10-07 13:16   ` Steven Rostedt
  2009-10-07 13:50     ` Paul E. McKenney
  1 sibling, 1 reply; 13+ messages in thread
From: Steven Rostedt @ 2009-10-07 13:16 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: linux-kernel, mingo, laijs, dipankar, akpm, mathieu.desnoyers,
	josh, dvhltc, niv, tglx, peterz, Valdis.Kletnieks, dhowells

Paul,

Still suffering jetlag? ;-)

On Tue, 2009-10-06 at 21:48 -0700, Paul E. McKenney wrote:
> From: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> 
> The current interaction between RCU and CPU hotplug requires that
> RCU block in CPU notifiers waiting for callbacks to drain.  This can
> be greatly simplified by haing each CPU relinquish its own callbacks,
                           having

> and for both _rcu_barrier() and CPU_DEAD notifiers to adopt all callbacks
> that were previously relinquished.  This change also eliminates the
> possibility of certain types of hangs due to the previous practice of
> waiting for callbacks to be invoked from within CPU notifiers.  If you
> don't every wait, you cannot hang.
        ever

;-)

> 
> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

-- Steve



^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH tip/core/rcu 2/3] rcu: make hot-unplugged CPU relinquish its own RCU callbacks
  2009-10-07 13:16   ` [PATCH tip/core/rcu 2/3] rcu: make " Steven Rostedt
@ 2009-10-07 13:50     ` Paul E. McKenney
  0 siblings, 0 replies; 13+ messages in thread
From: Paul E. McKenney @ 2009-10-07 13:50 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: linux-kernel, mingo, laijs, dipankar, akpm, mathieu.desnoyers,
	josh, dvhltc, niv, tglx, peterz, Valdis.Kletnieks, dhowells

On Wed, Oct 07, 2009 at 09:16:12AM -0400, Steven Rostedt wrote:
> Paul,
> 
> Still suffering jetlag? ;-)

As a matter of fact, yes.  ;-)

> On Tue, 2009-10-06 at 21:48 -0700, Paul E. McKenney wrote:
> > From: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> > 
> > The current interaction between RCU and CPU hotplug requires that
> > RCU block in CPU notifiers waiting for callbacks to drain.  This can
> > be greatly simplified by haing each CPU relinquish its own callbacks,
>                            having
> 
> > and for both _rcu_barrier() and CPU_DEAD notifiers to adopt all callbacks
> > that were previously relinquished.  This change also eliminates the
> > possibility of certain types of hangs due to the previous practice of
> > waiting for callbacks to be invoked from within CPU notifiers.  If you
> > don't every wait, you cannot hang.
>         ever
> 
> ;-)

Courtesy of "git cherry-pick" and my being out of it.  ;-)

							Thanx, Paul

> > Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> 
> -- Steve
> 
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH tip/core/rcu 0/3] rcu: prevent hangs by simplifying rcu_barrier/CPU-hotplug, fix lockdep complaint
  2009-10-07  6:14 ` [PATCH tip/core/rcu 0/3] rcu: prevent hangs by simplifying rcu_barrier/CPU-hotplug, fix lockdep complaint Ingo Molnar
@ 2009-10-07 13:52   ` Paul E. McKenney
  0 siblings, 0 replies; 13+ messages in thread
From: Paul E. McKenney @ 2009-10-07 13:52 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: linux-kernel, laijs, dipankar, akpm, mathieu.desnoyers, josh,
	dvhltc, niv, tglx, peterz, rostedt, Valdis.Kletnieks, dhowells

On Wed, Oct 07, 2009 at 08:14:48AM +0200, Ingo Molnar wrote:
> 
> * Paul E. McKenney <paulmck@linux.vnet.ibm.com> wrote:
> 
> > This patchset fixes a couple of issues with TREE_PREEMPT_RCU:
> > 
> > 1.	The current implementation of rcu_barrier() waits for any
> > 	prior CPU-hotplug operation to complete.  This code path has
> > 	resulted in some hangs, so first move this code from rcupdate.c
> > 	to rcutree.c and then use a simpler algorithm that avoids the
> > 	waiting, eliminating the possibility of such hangs.
> > 
> > 2.	The lockdep facility complains from time to time due to the
> > 	fact that certain combinations of CPU-hotplug operations can
> > 	cause the TREE_PREEMPT_RCU code to acquire the root rcu_node
> > 	structure's lock while holding a leaf rcu_node structure's
> > 	lock.  This patch places the root rcu_node structure's lock
> > 	into its own lockdep class to prevent this false positive.
> > 
> > Ingo, please rewind tip/core/rcu to commit #135c8aea before applying this
> > series, as these patches really need to go into 2.6.32.  Reworked and
> > retested rcutiny (for 2.6.33, as Linus requested) will follow in a
> > few days.  Or more patches for more bug fixes, as the case may be.  ;-)
> > 
> >  b/kernel/rcupdate.c       |  120 -----------------------------------
> >  b/kernel/rcutree.c        |  120 +++++++++++++++++++++++++++++++++++
> >  b/kernel/rcutree.h        |   11 ++-
> >  b/kernel/rcutree_plugin.h |   34 ++++++++++
> >  b/kernel/rcutree_trace.c  |    5 -
> >  kernel/rcutree.c          |  156 ++++++++++++++++++++++++----------------------
> >  6 files changed, 248 insertions(+), 198 deletions(-)
> 
> Applied, thanks Paul!
> 
> I dropped these commits for now:
> 
>  3ffea79: rcu: Make hot-unplugged CPU relinquish its own RCU callbacks
>  eddd962: rcu: Move rcu_barrier() to rcutree, make lightweight rcu_barrier() for rcutiny
>  a39e7d5: rcu-tiny: The Bloatwatch Edition, v6
> 
> We can apply rcu-tiny for .33 again, once the dust has settled.

Sounds good!  I am placing rcu-tiny and synchronize_srcu_expedited()
on a separate branch in my local git tree for 2.6.33.

							Thanx, Paul

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH tip/core/rcu 3/3] rcu: place root rcu_node structure in separate lockdep class
  2009-10-07  4:48 ` [PATCH tip/core/rcu 3/3] rcu: place root rcu_node structure in separate lockdep class Paul E. McKenney
  2009-10-07  6:17   ` [tip:core/rcu] rcu: Place " tip-bot for Paul E. McKenney
@ 2009-10-08  9:31   ` Peter Zijlstra
  1 sibling, 0 replies; 13+ messages in thread
From: Peter Zijlstra @ 2009-10-08  9:31 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: linux-kernel, mingo, laijs, dipankar, akpm, mathieu.desnoyers,
	josh, dvhltc, niv, tglx, rostedt, Valdis.Kletnieks, dhowells

On Tue, 2009-10-06 at 21:48 -0700, Paul E. McKenney wrote:
> From: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> 
> Before this patch, all of the rcu_node structures were in the same lockdep
> class, so that lockdep would complain when rcu_preempt_offline_tasks()
> acquired the root rcu_node structure's lock while holding one of the leaf
> rcu_nodes' locks.  This patch changes rcu_init_one() to use a separate
> spin_lock_init() for the root rcu_node structure's lock than is used for
> that of all of the rest of the rcu_node structures, which puts the root
> rcu_node structure's lock in its own lockdep class.
> 
> Suggested-by: Peter Zijlstra <peterz@infradead.org>
> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> ---
>  kernel/rcutree.c |    4 +++-
>  1 files changed, 3 insertions(+), 1 deletions(-)
> 
> diff --git a/kernel/rcutree.c b/kernel/rcutree.c
> index d8d9865..705f02a 100644
> --- a/kernel/rcutree.c
> +++ b/kernel/rcutree.c
> @@ -1647,7 +1647,8 @@ static void __init rcu_init_one(struct rcu_state *rsp)
>  		cpustride *= rsp->levelspread[i];
>  		rnp = rsp->level[i];
>  		for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) {
> -			spin_lock_init(&rnp->lock);
> +			if (rnp != rcu_get_root(rsp))
> +				spin_lock_init(&rnp->lock);
>  			rnp->gpnum = 0;
>  			rnp->qsmask = 0;
>  			rnp->qsmaskinit = 0;
> @@ -1670,6 +1671,7 @@ static void __init rcu_init_one(struct rcu_state *rsp)
>  			INIT_LIST_HEAD(&rnp->blocked_tasks[1]);
>  		}
>  	}
> +	spin_lock_init(&rcu_get_root(rsp)->lock);
>  }
>  
>  /*


An alternative version that would not change any code in the !LOCKDEP
case would read as:

---
 kernel/rcutree.c |    4 ++++
 1 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index d8d9865..113f274 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1625,6 +1625,8 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp)
 }
 #endif /* #else #ifdef CONFIG_RCU_FANOUT_EXACT */
 
+static struct lock_class_key rcu_root_class;
+
 /*
  * Helper function for rcu_init() that initializes one rcu_state structure.
  */
@@ -1670,6 +1672,7 @@ static void __init rcu_init_one(struct rcu_state *rsp)
 			INIT_LIST_HEAD(&rnp->blocked_tasks[1]);
 		}
 	}
+	lockdep_set_class(&rcu_get_root(rsp)->lock, &rcu_root_class);
 }
 
 /*


^ permalink raw reply related	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2009-10-08  9:29 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-10-07  4:47 [PATCH tip/core/rcu 0/3] rcu: prevent hangs by simplifying rcu_barrier/CPU-hotplug, fix lockdep complaint Paul E. McKenney
2009-10-07  4:48 ` [PATCH tip/core/rcu 1/3] rcu: move rcu_barrier() to rcutree Paul E. McKenney
2009-10-07  6:16   ` [tip:core/rcu] rcu: Move " tip-bot for Paul E. McKenney
2009-10-07  4:48 ` [PATCH tip/core/rcu 2/3] rcu: make hot-unplugged CPU relinquish its own RCU callbacks Paul E. McKenney
2009-10-07  6:17   ` [tip:core/rcu] rcu: Make " tip-bot for Paul E. McKenney
2009-10-07 13:16   ` [PATCH tip/core/rcu 2/3] rcu: make " Steven Rostedt
2009-10-07 13:50     ` Paul E. McKenney
2009-10-07  4:48 ` [PATCH tip/core/rcu 3/3] rcu: place root rcu_node structure in separate lockdep class Paul E. McKenney
2009-10-07  6:17   ` [tip:core/rcu] rcu: Place " tip-bot for Paul E. McKenney
2009-10-08  9:31   ` [PATCH tip/core/rcu 3/3] rcu: place " Peter Zijlstra
2009-10-07  6:14 ` [PATCH tip/core/rcu 0/3] rcu: prevent hangs by simplifying rcu_barrier/CPU-hotplug, fix lockdep complaint Ingo Molnar
2009-10-07 13:52   ` Paul E. McKenney
  -- strict thread matches above, loose matches on Subject: below --
2009-10-07 12:55 [PATCH tip/core/rcu 2/3] rcu: make hot-unplugged CPU relinquish its own RCU callbacks Mathieu Desnoyers

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).