All of lore.kernel.org
 help / color / mirror / Atom feed
From: Lai Jiangshan <laijs@cn.fujitsu.com>
To: paulmck@linux.vnet.ibm.com
Cc: Ingo Molnar <mingo@elte.hu>,
	linux-kernel@vger.kernel.org, akpm@linux-foundation.org,
	mathieu.desnoyers@polymtl.ca, manfred@colorfullife.com,
	dipankar@in.ibm.com, niv@us.ibm.com, dvhltc@us.ibm.com,
	josht@linux.vnet.ibm.com
Subject: Re: [PATCH,RFC] RCU-based detection of stalled CPUs for Classic RCU
Date: Fri, 03 Oct 2008 18:12:28 +0800	[thread overview]
Message-ID: <48E5F00C.3040002@cn.fujitsu.com> (raw)
In-Reply-To: <20081002173920.GA9430@linux.vnet.ibm.com>

I found a magic number in it.

Paul E. McKenney wrote:
> Hello again!
> 
> This patch adds stalled-CPU detection to Classic RCU.  This capability 
> is enabled by a new config variable CONFIG_RCU_CPU_STALL_DETECTOR, 
> which defaults disabled.  This is a debugging feature, not something 
> that non-kernel-hackers would be expected to care about.  This feature 
> can detect looping CPUs in !PREEMPT builds and looping CPUs with 
> preemption disabled in PREEMPT builds.  This is essentially a port of 
> this functionality from the treercu patch.
> 
> This version uses jiffies rather than get_seconds(), which eliminates
> the spurious boot-time CPU stall warnings seen on some systems with
> the previous patch.
> 
> This is still against v2.6.27-rc8 -- I will do a version against tip
> this evening (Pacific Time) when I get back to the combination of better
> bandwidth and AC power.
> 
> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> ---
> 
>  include/linux/rcuclassic.h |    9 ++++
>  kernel/rcuclassic.c        |   88 +++++++++++++++++++++++++++++++++++++++++++++
>  lib/Kconfig.debug          |   13 ++++++
>  3 files changed, 110 insertions(+)
> 
> diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h
> index 4ab8436..cab055b 100644
> --- a/include/linux/rcuclassic.h
> +++ b/include/linux/rcuclassic.h
> @@ -40,6 +40,10 @@
>  #include <linux/cpumask.h>
>  #include <linux/seqlock.h>
>  
> +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
> +#define RCU_SECONDS_TILL_STALL_CHECK	3 * HZ	/* for rcp->jiffies_stall */
> +#define RCU_SECONDS_TILL_STALL_RECHECK	30 * HZ	/* for rcp->jiffies_stall */
> +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
>  
>  /* Global control variables for rcupdate callback mechanism. */
>  struct rcu_ctrlblk {
> @@ -52,6 +56,11 @@ struct rcu_ctrlblk {
>  	spinlock_t	lock	____cacheline_internodealigned_in_smp;
>  	cpumask_t	cpumask; /* CPUs that need to switch in order    */
>  				 /* for current batch to proceed.        */
> +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
> +	unsigned long gp_start;	 /* Time at which GP started in jiffies. */
> +	unsigned long jiffies_stall;
> +				 /* Time at which to check for CPU stalls. */
> +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
>  } ____cacheline_internodealigned_in_smp;
>  
>  /* Is batch a before batch b ? */
> diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
> index aad93cd..a299876 100644
> --- a/kernel/rcuclassic.c
> +++ b/kernel/rcuclassic.c
> @@ -118,6 +118,87 @@ static inline void force_quiescent_state(struct rcu_data *rdp,
>  }
>  #endif
>  
> +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
> +
> +static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
> +{
> +	rcp->gp_start = jiffies;
> +	rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_CHECK;
> +}
> +
> +static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
> +{
> +	int cpu;
> +	long delta;
> +	unsigned long flags;
> +
> +	/* Only let one CPU complain about others per time interval. */
> +
> +	spin_lock_irqsave(&rcp->lock, flags);
> +	delta = jiffies - rcp->jiffies_stall;
> +	if (delta < 2 || rcp->cur != rcp->completed) {

Is it (2 * HZ)?
should it be defined as macro?

> +		spin_unlock_irqrestore(&rcp->lock, flags);
> +		return;
> +	}
> +	rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
> +	spin_unlock_irqrestore(&rcp->lock, flags);
> +
> +	/* OK, time to rat on our buddy... */
> +
> +	printk(KERN_ERR "RCU detected CPU stalls:");
> +	for_each_possible_cpu(cpu) {
> +		if (cpu_isset(cpu, rcp->cpumask))
> +			printk(" %d", cpu);
> +	}
> +	printk(" (detected by %d, t=%ld jiffies)\n",
> +	       smp_processor_id(), (long)(jiffies - rcp->gp_start));
> +}
> +
> +static void print_cpu_stall(struct rcu_ctrlblk *rcp)
> +{
> +	unsigned long flags;
> +
> +	printk(KERN_ERR "RCU detected CPU %d stall (t=%lu/%lu jiffies)\n",
> +			smp_processor_id(), jiffies,
> +			jiffies - rcp->gp_start);
> +	dump_stack();
> +	spin_lock_irqsave(&rcp->lock, flags);
> +	if ((long)(jiffies - rcp->jiffies_stall) >= 0)
> +		rcp->jiffies_stall =
> +			jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
> +	spin_unlock_irqrestore(&rcp->lock, flags);
> +	set_need_resched();  /* kick ourselves to get things going. */
> +}
> +
> +static void check_cpu_stall(struct rcu_ctrlblk *rcp)
> +{
> +	long delta;
> +
> +	delta = jiffies - rcp->jiffies_stall;
> +	if (cpu_isset(smp_processor_id(), rcp->cpumask) && delta >= 0) {
> +		
> +		/* We haven't checked in, so go dump stack. */
> +		print_cpu_stall(rcp);
> +
> +	} else if (rcp->cur != rcp->completed && delta >= 2) {
> +
> +		/* They had two seconds to dump stack, so complain. */

appear here again! and it's inconsistent with comment.

> +		print_other_cpu_stall(rcp);
> +	}
> +}
> +
> +#else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
> +
> +static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
> +{
> +}
> +
> +static void check_cpu_stall(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
> +{
> +}
> +
> +#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
> +
>  /**
>   * call_rcu - Queue an RCU callback for invocation after a grace period.
>   * @head: structure to be used for queueing the RCU updates.
> @@ -285,6 +366,7 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp)
>  		 */
>  		smp_wmb();
>  		rcp->cur++;
> +		record_gp_stall_check_time(rcp);
>  
>  		/*
>  		 * Accessing nohz_cpu_mask before incrementing rcp->cur needs a
> @@ -468,6 +550,9 @@ static void rcu_process_callbacks(struct softirq_action *unused)
>  
>  static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
>  {
> +	/* Check for CPU stalls, if enabled. */
> +	check_cpu_stall(rcp);
> +
>  	/* This cpu has pending rcu entries and the grace period
>  	 * for them has completed.
>  	 */
> @@ -558,6 +643,9 @@ void rcu_check_callbacks(int cpu, int user)
>  static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
>  						struct rcu_data *rdp)
>  {
> +#ifdef CONFIG_DEBUG_RCU_STALL
> +	printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n");
> +#endif /* #ifdef CONFIG_DEBUG_RCU_STALL */
>  	memset(rdp, 0, sizeof(*rdp));
>  	rdp->curtail = &rdp->curlist;
>  	rdp->nxttail = &rdp->nxtlist;
> diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
> index 0b50481..9fee969 100644
> --- a/lib/Kconfig.debug
> +++ b/lib/Kconfig.debug
> @@ -597,6 +597,19 @@ config RCU_TORTURE_TEST_RUNNABLE
>  	  Say N here if you want the RCU torture tests to start only
>  	  after being manually enabled via /proc.
>  
> +config RCU_CPU_STALL_DETECTOR
> +	bool "Check for stalled CPUs delaying RCU grace periods"
> +	depends on CLASSIC_RCU
> +	default n
> +	help
> +	  This option causes RCU to printk information on which
> +	  CPUs are delaying the current grace period, but only when
> +	  the grace period extends for excessive time periods.
> +
> +	  Say Y if you want RCU to perform such checks.
> +
> +	  Say N if you are unsure.
> +
>  config KPROBES_SANITY_TEST
>  	bool "Kprobes sanity tests"
>  	depends on DEBUG_KERNEL
> 
> 
> 



  reply	other threads:[~2008-10-03 10:14 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-10-02  0:36 [PATCH,RFC] RCU-based detection of stalled CPUs for Classic RCU Paul E. McKenney
2008-10-02  8:07 ` Ingo Molnar
2008-10-02 14:05   ` Paul E. McKenney
2008-10-02 15:10     ` Mathieu Desnoyers
2008-10-02 17:43       ` Paul E. McKenney
2008-10-02 18:51         ` [PATCH,RFC] RCU-based detection of stalled CPUs for Classic RCU (LTTng support) Mathieu Desnoyers
2008-10-02 22:55           ` Paul E. McKenney
2008-10-02 23:06             ` [PATCH tip/master] RCU-based detection of stalled CPUs for Classic RCU Paul E. McKenney
2008-10-03  4:41               ` Mathieu Desnoyers
2008-10-03  5:02                 ` Paul E. McKenney
2008-10-03  8:39               ` Ingo Molnar
2008-10-03  8:42                 ` [PATCH] rcu: RCU-based detection of stalled CPUs for Classic RCU, fix Ingo Molnar
2008-10-03 15:14                   ` Paul E. McKenney
2008-10-03 15:13                 ` [PATCH tip/master] RCU-based detection of stalled CPUs for Classic RCU Paul E. McKenney
2008-10-02 17:39     ` [PATCH,RFC] " Paul E. McKenney
2008-10-03 10:12       ` Lai Jiangshan [this message]
2008-10-03 15:12         ` Paul E. McKenney

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=48E5F00C.3040002@cn.fujitsu.com \
    --to=laijs@cn.fujitsu.com \
    --cc=akpm@linux-foundation.org \
    --cc=dipankar@in.ibm.com \
    --cc=dvhltc@us.ibm.com \
    --cc=josht@linux.vnet.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=manfred@colorfullife.com \
    --cc=mathieu.desnoyers@polymtl.ca \
    --cc=mingo@elte.hu \
    --cc=niv@us.ibm.com \
    --cc=paulmck@linux.vnet.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.