public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* rcu: endless stalls
@ 2012-06-11 10:06 Mike Galbraith
  2012-06-11 13:39 ` Paul E. McKenney
  0 siblings, 1 reply; 15+ messages in thread
From: Mike Galbraith @ 2012-06-11 10:06 UTC (permalink / raw)
  To: LKML; +Cc: Paul E. McKenney

Greetings,

I received a report of a 48 core UV box hitting a gripe, taking longer
than timeout to emit same, so box griped endlessly, forcing reboot.

The below might prevent that.. and bust other stuff for free :)

rcu: one gripe at a time please

Not-compiled-by:
Not-signed-off-by:
Not-etc-by:

diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 0da7b88..6462056d6 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -818,10 +818,25 @@ static void print_cpu_stall(struct rcu_state *rsp)
 	set_need_resched();  /* kick ourselves to get things going. */
 }
 
+/**
+ * rcu_cpu_stall_reset - prevent further stall warnings in current grace period
+ *
+ * Set the stall-warning timeout way off into the future, thus preventing
+ * any RCU CPU stall-warning messages from appearing in the current set of
+ * RCU grace periods.
+ *
+ * The caller must disable hard irqs.
+ */
+void rcu_cpu_stall_reset(void)
+{
+	rcu_sched_state.jiffies_stall = jiffies + ULONG_MAX / 2;
+	rcu_bh_state.jiffies_stall = jiffies + ULONG_MAX / 2;
+	rcu_preempt_stall_reset();
+}
+
 static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
 {
-	unsigned long j;
-	unsigned long js;
+	unsigned long j, js, flags;
 	struct rcu_node *rnp;
 
 	if (rcu_cpu_stall_suppress)
@@ -832,13 +847,23 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
 	if ((ACCESS_ONCE(rnp->qsmask) & rdp->grpmask) && ULONG_CMP_GE(j, js)) {
 
 		/* We haven't checked in, so go dump stack. */
+		rcu_cpu_stall_suppress = 1;
 		print_cpu_stall(rsp);
+		local_irq_save(flags);
+		rcu_cpu_stall_reset();
+		local_irq_restore(flags);
+		rcu_cpu_stall_suppress = 0;
 
 	} else if (rcu_gp_in_progress(rsp) &&
 		   ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY)) {
 
 		/* They had a few time units to dump stack, so complain. */
+		rcu_cpu_stall_suppress = 1;
 		print_other_cpu_stall(rsp);
+		local_irq_save(flags);
+		rcu_cpu_stall_reset();
+		local_irq_restore(flags);
+		rcu_cpu_stall_suppress = 0;
 	}
 }
 
@@ -848,22 +873,6 @@ static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr)
 	return NOTIFY_DONE;
 }
 
-/**
- * rcu_cpu_stall_reset - prevent further stall warnings in current grace period
- *
- * Set the stall-warning timeout way off into the future, thus preventing
- * any RCU CPU stall-warning messages from appearing in the current set of
- * RCU grace periods.
- *
- * The caller must disable hard irqs.
- */
-void rcu_cpu_stall_reset(void)
-{
-	rcu_sched_state.jiffies_stall = jiffies + ULONG_MAX / 2;
-	rcu_bh_state.jiffies_stall = jiffies + ULONG_MAX / 2;
-	rcu_preempt_stall_reset();
-}
-
 static struct notifier_block rcu_panic_block = {
 	.notifier_call = rcu_panic,
 };



^ permalink raw reply related	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2012-06-15  7:49 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-06-11 10:06 rcu: endless stalls Mike Galbraith
2012-06-11 13:39 ` Paul E. McKenney
2012-06-11 14:22   ` Mike Galbraith
2012-06-11 16:54     ` Paul E. McKenney
2012-06-11 17:20     ` Mike Galbraith
2012-06-11 18:01       ` Paul E. McKenney
2012-06-11 18:10         ` Mike Galbraith
2012-06-13  3:35           ` Mike Galbraith
2012-06-13  4:31             ` Hugh Dickins
2012-06-13  5:56               ` Mike Galbraith
2012-06-13  7:12                 ` Mike Galbraith
2012-06-14  7:45                   ` Mike Galbraith
2012-06-14 16:47                     ` Paul E. McKenney
2012-06-14 21:34                       ` Mike Galbraith
2012-06-15  7:49                       ` Mike Galbraith

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox