From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
To: linux-kernel@vger.kernel.org
Cc: Ingo Molnar <mingo@elte.hu>,
mathieu.desnoyers@polymtl.ca, akpm@linux-foundation.org,
laijs@cn.fujitsu.com, manfred@colorfullife.com,
dipankar@in.ibm.com, niv@us.ibm.com, dvhltc@us.ibm.com,
josht@linux.vnet.ibm.com, ltt-dev@lists.casi.polymtl.ca
Subject: [PATCH tip/master] RCU-based detection of stalled CPUs for Classic RCU
Date: Thu, 2 Oct 2008 16:06:39 -0700 [thread overview]
Message-ID: <20081002230639.GA7522@linux.vnet.ibm.com> (raw)
In-Reply-To: <20081002225537.GA6706@linux.vnet.ibm.com>
Hello!
This patch adds stalled-CPU detection to Classic RCU. This capability
is enabled by a new config variable CONFIG_RCU_CPU_STALL_DETECTOR, which
defaults disabled. This is a debugging feature to detect infinite loops
in kernel code, not something that non-kernel-hackers would be expected
to care about. This feature can detect looping CPUs in !PREEMPT builds
and looping CPUs with preemption disabled in PREEMPT builds. This is
essentially a port of this functionality from the treercu patch, replacing
the stall debug patch that is already in tip/core/rcu (commit 67182ae1c4).
The changes from the patch in tip/core/rcu include making the config
variable name match that in treercu, changing from seconds to jiffies to
avoid spurious warnings, and printing a boot message when this feature
is enabled.
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
include/linux/rcuclassic.h | 12 ++-
kernel/rcuclassic.c | 166 +++++++++++++++++++++++----------------------
lib/Kconfig.debug | 2
3 files changed, 96 insertions(+), 84 deletions(-)
diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h
index 29bf528..2d72d20 100644
--- a/include/linux/rcuclassic.h
+++ b/include/linux/rcuclassic.h
@@ -40,15 +40,21 @@
#include <linux/cpumask.h>
#include <linux/seqlock.h>
+#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
+#define RCU_SECONDS_TILL_STALL_CHECK 3 * HZ /* for rcp->jiffies_stall */
+#define RCU_SECONDS_TILL_STALL_RECHECK 30 * HZ /* for rcp->jiffies_stall */
+#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
/* Global control variables for rcupdate callback mechanism. */
struct rcu_ctrlblk {
long cur; /* Current batch number. */
long completed; /* Number of the last completed batch */
long pending; /* Number of the last pending batch */
-#ifdef CONFIG_DEBUG_RCU_STALL
- unsigned long gp_check; /* Time grace period should end, in seconds. */
-#endif /* #ifdef CONFIG_DEBUG_RCU_STALL */
+#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
+ unsigned long gp_start; /* Time at which GP started in jiffies. */
+ unsigned long jiffies_stall;
+ /* Time at which to check for CPU stalls. */
+#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
int signaled;
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
index ed15128..eae2fb6 100644
--- a/kernel/rcuclassic.c
+++ b/kernel/rcuclassic.c
@@ -164,6 +164,87 @@ static void __call_rcu(struct rcu_head *head, struct rcu_ctrlblk *rcp,
}
}
+#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
+
+static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
+{
+ rcp->gp_start = jiffies;
+ rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_CHECK;
+}
+
+static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
+{
+ int cpu;
+ long delta;
+ unsigned long flags;
+
+ /* Only let one CPU complain about others per time interval. */
+
+ spin_lock_irqsave(&rcp->lock, flags);
+ delta = jiffies - rcp->jiffies_stall;
+ if (delta < 2 || rcp->cur != rcp->completed) {
+ spin_unlock_irqrestore(&rcp->lock, flags);
+ return;
+ }
+ rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
+ spin_unlock_irqrestore(&rcp->lock, flags);
+
+ /* OK, time to rat on our buddy... */
+
+ printk(KERN_ERR "RCU detected CPU stalls:");
+ for_each_possible_cpu(cpu) {
+ if (cpu_isset(cpu, rcp->cpumask))
+ printk(" %d", cpu);
+ }
+ printk(" (detected by %d, t=%ld jiffies)\n",
+ smp_processor_id(), (long)(jiffies - rcp->gp_start));
+}
+
+static void print_cpu_stall(struct rcu_ctrlblk *rcp)
+{
+ unsigned long flags;
+
+ printk(KERN_ERR "RCU detected CPU %d stall (t=%lu/%lu jiffies)\n",
+ smp_processor_id(), jiffies,
+ jiffies - rcp->gp_start);
+ dump_stack();
+ spin_lock_irqsave(&rcp->lock, flags);
+ if ((long)(jiffies - rcp->jiffies_stall) >= 0)
+ rcp->jiffies_stall =
+ jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
+ spin_unlock_irqrestore(&rcp->lock, flags);
+ set_need_resched(); /* kick ourselves to get things going. */
+}
+
+static void check_cpu_stall(struct rcu_ctrlblk *rcp)
+{
+ long delta;
+
+ delta = jiffies - rcp->jiffies_stall;
+ if (cpu_isset(smp_processor_id(), rcp->cpumask) && delta >= 0) {
+
+ /* We haven't checked in, so go dump stack. */
+ print_cpu_stall(rcp);
+
+ } else if (rcp->cur != rcp->completed && delta >= 2) {
+
+ /* They had two seconds to dump stack, so complain. */
+ print_other_cpu_stall(rcp);
+ }
+}
+
+#else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
+
+static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
+{
+}
+
+static void check_cpu_stall(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
+{
+}
+
+#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
+
/**
* call_rcu - Queue an RCU callback for invocation after a grace period.
* @head: structure to be used for queueing the RCU updates.
@@ -293,84 +374,6 @@ static void rcu_do_batch(struct rcu_data *rdp)
* period (if necessary).
*/
-#ifdef CONFIG_DEBUG_RCU_STALL
-
-static inline void record_gp_check_time(struct rcu_ctrlblk *rcp)
-{
- rcp->gp_check = get_seconds() + 3;
-}
-
-static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
-{
- int cpu;
- long delta;
- unsigned long flags;
-
- /* Only let one CPU complain about others per time interval. */
-
- spin_lock_irqsave(&rcp->lock, flags);
- delta = get_seconds() - rcp->gp_check;
- if (delta < 2L || cpus_empty(rcp->cpumask)) {
- spin_unlock(&rcp->lock);
- return;
- }
- rcp->gp_check = get_seconds() + 30;
- spin_unlock_irqrestore(&rcp->lock, flags);
-
- /* OK, time to rat on our buddy... */
-
- printk(KERN_ERR "RCU detected CPU stalls:");
- for_each_cpu_mask(cpu, rcp->cpumask)
- printk(" %d", cpu);
- printk(" (detected by %d, t=%lu/%lu)\n",
- smp_processor_id(), get_seconds(), rcp->gp_check);
-}
-
-static void print_cpu_stall(struct rcu_ctrlblk *rcp)
-{
- unsigned long flags;
-
- printk(KERN_ERR "RCU detected CPU %d stall (t=%lu/%lu)\n",
- smp_processor_id(), get_seconds(), rcp->gp_check);
- dump_stack();
- spin_lock_irqsave(&rcp->lock, flags);
- if ((long)(get_seconds() - rcp->gp_check) >= 0L)
- rcp->gp_check = get_seconds() + 30;
- spin_unlock_irqrestore(&rcp->lock, flags);
-}
-
-static void check_cpu_stall(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
-{
- long delta;
-
- delta = get_seconds() - rcp->gp_check;
- if (cpu_isset(smp_processor_id(), rcp->cpumask) && delta >= 0L) {
-
- /* We haven't checked in, so go dump stack. */
-
- print_cpu_stall(rcp);
-
- } else {
- if (!cpus_empty(rcp->cpumask) && delta >= 2L) {
- /* They had two seconds to dump stack, so complain. */
- print_other_cpu_stall(rcp);
- }
- }
-}
-
-#else /* #ifdef CONFIG_DEBUG_RCU_STALL */
-
-static inline void record_gp_check_time(struct rcu_ctrlblk *rcp)
-{
-}
-
-static inline void
-check_cpu_stall(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
-{
-}
-
-#endif /* #else #ifdef CONFIG_DEBUG_RCU_STALL */
-
/*
* Register a new batch of callbacks, and start it up if there is currently no
* active batch and the batch to be registered has not already occurred.
@@ -381,7 +384,7 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp)
if (rcp->cur != rcp->pending &&
rcp->completed == rcp->cur) {
rcp->cur++;
- record_gp_check_time(rcp);
+ record_gp_stall_check_time(rcp);
/*
* Accessing nohz_cpu_mask before incrementing rcp->cur needs a
@@ -603,7 +606,7 @@ static void rcu_process_callbacks(struct softirq_action *unused)
static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
{
/* Check for CPU stalls, if enabled. */
- check_cpu_stall(rcp, rdp);
+ check_cpu_stall(rcp);
if (rdp->nxtlist) {
long completed_snap = ACCESS_ONCE(rcp->completed);
@@ -769,6 +772,9 @@ static struct notifier_block __cpuinitdata rcu_nb = {
*/
void __init __rcu_init(void)
{
+#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
+ printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n");
+#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE,
(void *)(long)smp_processor_id());
/* Register notifier for non-boot CPUs */
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 4e921a8..e0e0582 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -616,7 +616,7 @@ config RCU_TORTURE_TEST_RUNNABLE
Say N here if you want the RCU torture tests to start only
after being manually enabled via /proc.
-config RCU_CPU_STALL
+config RCU_CPU_STALL_DETECTOR
bool "Check for stalled CPUs delaying RCU grace periods"
depends on CLASSIC_RCU
default n
next prev parent reply other threads:[~2008-10-02 23:06 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-10-02 0:36 [PATCH,RFC] RCU-based detection of stalled CPUs for Classic RCU Paul E. McKenney
2008-10-02 8:07 ` Ingo Molnar
2008-10-02 14:05 ` Paul E. McKenney
2008-10-02 15:10 ` Mathieu Desnoyers
2008-10-02 17:43 ` Paul E. McKenney
2008-10-02 18:51 ` [PATCH,RFC] RCU-based detection of stalled CPUs for Classic RCU (LTTng support) Mathieu Desnoyers
2008-10-02 22:55 ` Paul E. McKenney
2008-10-02 23:06 ` Paul E. McKenney [this message]
2008-10-03 4:41 ` [PATCH tip/master] RCU-based detection of stalled CPUs for Classic RCU Mathieu Desnoyers
2008-10-03 5:02 ` Paul E. McKenney
2008-10-03 8:39 ` Ingo Molnar
2008-10-03 8:42 ` [PATCH] rcu: RCU-based detection of stalled CPUs for Classic RCU, fix Ingo Molnar
2008-10-03 15:14 ` Paul E. McKenney
2008-10-03 15:13 ` [PATCH tip/master] RCU-based detection of stalled CPUs for Classic RCU Paul E. McKenney
2008-10-02 17:39 ` [PATCH,RFC] " Paul E. McKenney
2008-10-03 10:12 ` Lai Jiangshan
2008-10-03 15:12 ` Paul E. McKenney
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20081002230639.GA7522@linux.vnet.ibm.com \
--to=paulmck@linux.vnet.ibm.com \
--cc=akpm@linux-foundation.org \
--cc=dipankar@in.ibm.com \
--cc=dvhltc@us.ibm.com \
--cc=josht@linux.vnet.ibm.com \
--cc=laijs@cn.fujitsu.com \
--cc=linux-kernel@vger.kernel.org \
--cc=ltt-dev@lists.casi.polymtl.ca \
--cc=manfred@colorfullife.com \
--cc=mathieu.desnoyers@polymtl.ca \
--cc=mingo@elte.hu \
--cc=niv@us.ibm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.