[PATCH diagnostic] Re: HPET regression in 2.6.26 versus 2.6.25 -- RCU problem

netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH diagnostic] Re: HPET regression in 2.6.26 versus 2.6.25 -- RCU problem
  2008-08-10 15:15 ` Paul E. McKenney
@ 2008-08-11  1:35   ` Paul E. McKenney
  2008-08-11  1:38     ` Paul E. McKenney
  2008-08-11 11:38     ` Ingo Molnar
  0 siblings, 2 replies; 11+ messages in thread
From: Paul E. McKenney @ 2008-08-11  1:35 UTC (permalink / raw)
  To: David Witbrodt
  Cc: Peter Zijlstra, linux-kernel, Yinghai Lu, Ingo Molnar,
	Thomas Gleixner, H. Peter Anvin, netdev

On Sun, Aug 10, 2008 at 08:15:20AM -0700, Paul E. McKenney wrote:
> I will see about putting together a diagnostic patch for Classic RCU.
> The approach will be to record jiffies (or some such) at the beginning
> of the grace period (in rcu_start_batch()), then have
> rcu_check_callbacks() complain if:
> 
> 1.	it is running on a CPU that has holding up grace periods for
> 	a long time (say one second).  This will identify the culprit
> 	assuming that the culprit has not disabled hardware irqs,
> 	instruction execution, or some such.
> 
> 2.	it is running on a CPU that is not holding up grace periods,
> 	but grace periods have been held up for an even longer time
> 	(say two seconds).
> 
> In either case, some sort of exponential backoff would be needed to
> avoid multi-gigabyte log files.  Of course, all of this assumes that
> the machine remains healthy enough to actually get any such messages
> somewhere that you can see them, but so it goes...

And here is the patch.  It is still a bit raw, so the results should
be viewed with some suspicion.  It adds a default-off kernel parameter
CONFIG_RCU_CPU_STALL which must be enabled.

Rather than exponential backoff, it backs off to once per 30 seconds.
My feeling upon thinking on it was that if you have stalled RCU grace
periods for that long, a few extra printk() messages are probably the
least of your worries...

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---

 include/linux/rcuclassic.h |    3 +
 kernel/rcuclassic.c        |   80 +++++++++++++++++++++++++++++++++++++++++++++
 lib/Kconfig.debug          |   13 +++++++
 3 files changed, 96 insertions(+)

diff -urpNa -X dontdiff linux-2.6.27-rc1/include/linux/rcuclassic.h linux-2.6.27-rc1-cpustall/include/linux/rcuclassic.h
--- linux-2.6.27-rc1/include/linux/rcuclassic.h	2008-07-30 08:48:16.000000000 -0700
+++ linux-2.6.27-rc1-cpustall/include/linux/rcuclassic.h	2008-08-10 12:21:22.000000000 -0700
@@ -46,6 +46,9 @@ struct rcu_ctrlblk {
 	long	cur;		/* Current batch number.                      */
 	long	completed;	/* Number of the last completed batch         */
 	int	next_pending;	/* Is the next batch already waiting?         */
+#ifdef CONFIG_RCU_CPU_STALL
+	unsigned long gp_check;	/* Time grace period should end, in seconds.  */
+#endif /* #ifdef CONFIG_RCU_CPU_STALL */
 
 	int	signaled;
 
diff -urpNa -X dontdiff linux-2.6.27-rc1/kernel/rcuclassic.c linux-2.6.27-rc1-cpustall/kernel/rcuclassic.c
--- linux-2.6.27-rc1/kernel/rcuclassic.c	2008-07-30 08:48:17.000000000 -0700
+++ linux-2.6.27-rc1-cpustall/kernel/rcuclassic.c	2008-08-10 17:51:32.000000000 -0700
@@ -47,6 +47,7 @@
 #include <linux/notifier.h>
 #include <linux/cpu.h>
 #include <linux/mutex.h>
+#include <linux/time.h>
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 static struct lock_class_key rcu_lock_key;
@@ -269,6 +270,81 @@ static void rcu_do_batch(struct rcu_data
  *   rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace
  *   period (if necessary).
  */
+
+#ifdef CONFIG_RCU_CPU_STALL
+
+static inline void record_gp_check_time(struct rcu_ctrlblk *rcp)
+{
+	rcp->gp_check = get_seconds() + 3;
+}
+static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
+{
+	int cpu;
+	long delta;
+
+	/* Only let one CPU complain about others per time interval. */
+
+	spin_lock(&rcp->lock);
+	delta = get_seconds() - rcp->gp_check;
+	if (delta < 2L ||
+	    cpus_empty(rcp->cpumask)) {
+		spin_unlock(&rcp->lock);
+		return;
+	rcp->gp_check = get_seconds() + 30;
+	}
+	spin_unlock(&rcp->lock);
+
+	/* OK, time to rat on our buddy... */
+
+	printk(KERN_ERR "RCU detected CPU stalls:");
+	for_each_cpu_mask(cpu, rcp->cpumask)
+		printk(" %d", cpu);
+	printk(" (detected by %d, t=%lu/%lu)\n",
+	       smp_processor_id(), get_seconds(), rcp->gp_check);
+}
+static void print_cpu_stall(struct rcu_ctrlblk *rcp)
+{
+	printk(KERN_ERR "RCU detected CPU %d stall (t=%lu/%lu)\n",
+			smp_processor_id(), get_seconds(), rcp->gp_check);
+	dump_stack();
+	spin_lock(&rcp->lock);
+	if ((long)(get_seconds() - rcp->gp_check) >= 0L)
+		rcp->gp_check = get_seconds() + 30;
+	spin_unlock(&rcp->lock);
+}
+static inline void check_cpu_stall(struct rcu_ctrlblk *rcp,
+				   struct rcu_data *rdp)
+{
+	long delta;
+
+	delta = get_seconds() - rcp->gp_check;
+	if (cpu_isset(smp_processor_id(), rcp->cpumask) && delta >= 0L) {
+
+		/* We haven't checked in, so go dump stack. */
+
+		print_cpu_stall(rcp);
+
+	} else if (!cpus_empty(rcp->cpumask) && delta >= 2L) {
+
+		/* They had two seconds to dump stack, so complain. */
+
+		print_other_cpu_stall(rcp);
+
+	}
+}
+
+#else /* #ifdef CONFIG_RCU_CPU_STALL */
+
+static inline void record_gp_check_time(struct rcu_ctrlblk *rcp)
+{
+}
+static inline void check_cpu_stall(struct rcu_ctrlblk *rcp,
+				   struct rcu_data *rdp)
+{
+}
+
+#endif /* #else #ifdef CONFIG_RCU_CPU_STALL */
+
 /*
  * Register a new batch of callbacks, and start it up if there is currently no
  * active batch and the batch to be registered has not already occurred.
@@ -285,6 +361,7 @@ static void rcu_start_batch(struct rcu_c
 		 */
 		smp_wmb();
 		rcp->cur++;
+		record_gp_check_time(rcp);
 
 		/*
 		 * Accessing nohz_cpu_mask before incrementing rcp->cur needs a
@@ -468,6 +545,9 @@ static void rcu_process_callbacks(struct
 
 static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
 {
+	/* Check for CPU stalls, if enabled. */
+	check_cpu_stall(rcp, rdp);
+
 	/* This cpu has pending rcu entries and the grace period
 	 * for them has completed.
 	 */
diff -urpNa -X dontdiff linux-2.6.27-rc1/lib/Kconfig.debug linux-2.6.27-rc1-cpustall/lib/Kconfig.debug
--- linux-2.6.27-rc1/lib/Kconfig.debug	2008-07-30 08:48:17.000000000 -0700
+++ linux-2.6.27-rc1-cpustall/lib/Kconfig.debug	2008-08-10 12:14:18.000000000 -0700
@@ -597,6 +597,19 @@ config RCU_TORTURE_TEST_RUNNABLE
 	  Say N here if you want the RCU torture tests to start only
 	  after being manually enabled via /proc.
 
+config RCU_CPU_STALL
+	bool "Check for stalled CPUs delaying RCU grace periods"
+	depends on CLASSIC_RCU
+	default n
+	help
+	  This option causes RCU to printk information on which
+	  CPUs are delaying the current grace period, but only when
+	  the grace period extends for excessive time periods.
+
+	  Say Y if you want RCU to perform such checks.
+
+	  Say N if you are unsure.
+
 config KPROBES_SANITY_TEST
 	bool "Kprobes sanity tests"
 	depends on DEBUG_KERNEL

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH diagnostic] Re: HPET regression in 2.6.26 versus 2.6.25 -- RCU problem
  2008-08-11  1:35   ` [PATCH diagnostic] " Paul E. McKenney
@ 2008-08-11  1:38     ` Paul E. McKenney
  2008-08-11 11:38     ` Ingo Molnar
  1 sibling, 0 replies; 11+ messages in thread
From: Paul E. McKenney @ 2008-08-11  1:38 UTC (permalink / raw)
  To: David Witbrodt
  Cc: Peter Zijlstra, linux-kernel, Yinghai Lu, Ingo Molnar,
	Thomas Gleixner, H. Peter Anvin, netdev

[-- Attachment #1: Type: text/plain, Size: 6949 bytes --]

On Sun, Aug 10, 2008 at 06:35:38PM -0700, Paul E. McKenney wrote:
> On Sun, Aug 10, 2008 at 08:15:20AM -0700, Paul E. McKenney wrote:
> > I will see about putting together a diagnostic patch for Classic RCU.
> > The approach will be to record jiffies (or some such) at the beginning
> > of the grace period (in rcu_start_batch()), then have
> > rcu_check_callbacks() complain if:
> > 
> > 1.	it is running on a CPU that has holding up grace periods for
> > 	a long time (say one second).  This will identify the culprit
> > 	assuming that the culprit has not disabled hardware irqs,
> > 	instruction execution, or some such.
> > 
> > 2.	it is running on a CPU that is not holding up grace periods,
> > 	but grace periods have been held up for an even longer time
> > 	(say two seconds).
> > 
> > In either case, some sort of exponential backoff would be needed to
> > avoid multi-gigabyte log files.  Of course, all of this assumes that
> > the machine remains healthy enough to actually get any such messages
> > somewhere that you can see them, but so it goes...

And attached is the kernel module I used to test the patch, for whatever
that might be worth.

							Thanx, Paul

> And here is the patch.  It is still a bit raw, so the results should
> be viewed with some suspicion.  It adds a default-off kernel parameter
> CONFIG_RCU_CPU_STALL which must be enabled.
> 
> Rather than exponential backoff, it backs off to once per 30 seconds.
> My feeling upon thinking on it was that if you have stalled RCU grace
> periods for that long, a few extra printk() messages are probably the
> least of your worries...
> 
> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> ---
> 
>  include/linux/rcuclassic.h |    3 +
>  kernel/rcuclassic.c        |   80 +++++++++++++++++++++++++++++++++++++++++++++
>  lib/Kconfig.debug          |   13 +++++++
>  3 files changed, 96 insertions(+)
> 
> diff -urpNa -X dontdiff linux-2.6.27-rc1/include/linux/rcuclassic.h linux-2.6.27-rc1-cpustall/include/linux/rcuclassic.h
> --- linux-2.6.27-rc1/include/linux/rcuclassic.h	2008-07-30 08:48:16.000000000 -0700
> +++ linux-2.6.27-rc1-cpustall/include/linux/rcuclassic.h	2008-08-10 12:21:22.000000000 -0700
> @@ -46,6 +46,9 @@ struct rcu_ctrlblk {
>  	long	cur;		/* Current batch number.                      */
>  	long	completed;	/* Number of the last completed batch         */
>  	int	next_pending;	/* Is the next batch already waiting?         */
> +#ifdef CONFIG_RCU_CPU_STALL
> +	unsigned long gp_check;	/* Time grace period should end, in seconds.  */
> +#endif /* #ifdef CONFIG_RCU_CPU_STALL */
>  
>  	int	signaled;
>  
> diff -urpNa -X dontdiff linux-2.6.27-rc1/kernel/rcuclassic.c linux-2.6.27-rc1-cpustall/kernel/rcuclassic.c
> --- linux-2.6.27-rc1/kernel/rcuclassic.c	2008-07-30 08:48:17.000000000 -0700
> +++ linux-2.6.27-rc1-cpustall/kernel/rcuclassic.c	2008-08-10 17:51:32.000000000 -0700
> @@ -47,6 +47,7 @@
>  #include <linux/notifier.h>
>  #include <linux/cpu.h>
>  #include <linux/mutex.h>
> +#include <linux/time.h>
>  
>  #ifdef CONFIG_DEBUG_LOCK_ALLOC
>  static struct lock_class_key rcu_lock_key;
> @@ -269,6 +270,81 @@ static void rcu_do_batch(struct rcu_data
>   *   rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace
>   *   period (if necessary).
>   */
> +
> +#ifdef CONFIG_RCU_CPU_STALL
> +
> +static inline void record_gp_check_time(struct rcu_ctrlblk *rcp)
> +{
> +	rcp->gp_check = get_seconds() + 3;
> +}
> +static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
> +{
> +	int cpu;
> +	long delta;
> +
> +	/* Only let one CPU complain about others per time interval. */
> +
> +	spin_lock(&rcp->lock);
> +	delta = get_seconds() - rcp->gp_check;
> +	if (delta < 2L ||
> +	    cpus_empty(rcp->cpumask)) {
> +		spin_unlock(&rcp->lock);
> +		return;
> +	rcp->gp_check = get_seconds() + 30;
> +	}
> +	spin_unlock(&rcp->lock);
> +
> +	/* OK, time to rat on our buddy... */
> +
> +	printk(KERN_ERR "RCU detected CPU stalls:");
> +	for_each_cpu_mask(cpu, rcp->cpumask)
> +		printk(" %d", cpu);
> +	printk(" (detected by %d, t=%lu/%lu)\n",
> +	       smp_processor_id(), get_seconds(), rcp->gp_check);
> +}
> +static void print_cpu_stall(struct rcu_ctrlblk *rcp)
> +{
> +	printk(KERN_ERR "RCU detected CPU %d stall (t=%lu/%lu)\n",
> +			smp_processor_id(), get_seconds(), rcp->gp_check);
> +	dump_stack();
> +	spin_lock(&rcp->lock);
> +	if ((long)(get_seconds() - rcp->gp_check) >= 0L)
> +		rcp->gp_check = get_seconds() + 30;
> +	spin_unlock(&rcp->lock);
> +}
> +static inline void check_cpu_stall(struct rcu_ctrlblk *rcp,
> +				   struct rcu_data *rdp)
> +{
> +	long delta;
> +
> +	delta = get_seconds() - rcp->gp_check;
> +	if (cpu_isset(smp_processor_id(), rcp->cpumask) && delta >= 0L) {
> +
> +		/* We haven't checked in, so go dump stack. */
> +
> +		print_cpu_stall(rcp);
> +
> +	} else if (!cpus_empty(rcp->cpumask) && delta >= 2L) {
> +
> +		/* They had two seconds to dump stack, so complain. */
> +
> +		print_other_cpu_stall(rcp);
> +
> +	}
> +}
> +
> +#else /* #ifdef CONFIG_RCU_CPU_STALL */
> +
> +static inline void record_gp_check_time(struct rcu_ctrlblk *rcp)
> +{
> +}
> +static inline void check_cpu_stall(struct rcu_ctrlblk *rcp,
> +				   struct rcu_data *rdp)
> +{
> +}
> +
> +#endif /* #else #ifdef CONFIG_RCU_CPU_STALL */
> +
>  /*
>   * Register a new batch of callbacks, and start it up if there is currently no
>   * active batch and the batch to be registered has not already occurred.
> @@ -285,6 +361,7 @@ static void rcu_start_batch(struct rcu_c
>  		 */
>  		smp_wmb();
>  		rcp->cur++;
> +		record_gp_check_time(rcp);
>  
>  		/*
>  		 * Accessing nohz_cpu_mask before incrementing rcp->cur needs a
> @@ -468,6 +545,9 @@ static void rcu_process_callbacks(struct
>  
>  static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
>  {
> +	/* Check for CPU stalls, if enabled. */
> +	check_cpu_stall(rcp, rdp);
> +
>  	/* This cpu has pending rcu entries and the grace period
>  	 * for them has completed.
>  	 */
> diff -urpNa -X dontdiff linux-2.6.27-rc1/lib/Kconfig.debug linux-2.6.27-rc1-cpustall/lib/Kconfig.debug
> --- linux-2.6.27-rc1/lib/Kconfig.debug	2008-07-30 08:48:17.000000000 -0700
> +++ linux-2.6.27-rc1-cpustall/lib/Kconfig.debug	2008-08-10 12:14:18.000000000 -0700
> @@ -597,6 +597,19 @@ config RCU_TORTURE_TEST_RUNNABLE
>  	  Say N here if you want the RCU torture tests to start only
>  	  after being manually enabled via /proc.
>  
> +config RCU_CPU_STALL
> +	bool "Check for stalled CPUs delaying RCU grace periods"
> +	depends on CLASSIC_RCU
> +	default n
> +	help
> +	  This option causes RCU to printk information on which
> +	  CPUs are delaying the current grace period, but only when
> +	  the grace period extends for excessive time periods.
> +
> +	  Say Y if you want RCU to perform such checks.
> +
> +	  Say N if you are unsure.
> +
>  config KPROBES_SANITY_TEST
>  	bool "Kprobes sanity tests"
>  	depends on DEBUG_KERNEL

[-- Attachment #2: stall.c --]
[-- Type: text/x-csrc, Size: 1719 bytes --]

/*
 * Taken from Linux Device Drivers, 3rd Edition.
 * Corbet, Rubini, and Kroah-Hartman.
 *
 * Modified by Paul E. McKenney (c) 2008 to test CPU-stall detection.
 */

#include <linux/init.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/kthread.h>
#include <linux/err.h>
#include <linux/time.h>
MODULE_LICENSE("Dual BSD/GPL");

int stall_secs = 0;
module_param(stall_secs, int, 0);
MODULE_PARM_DESC(stall_secs, "Number of seconds to stall");

static struct task_struct *kstall_task;

static int stall_thread(void *arg)
{
	unsigned long stop_at;

	printk(KERN_ALERT "stall_thread %ld\n", (long)arg);
	do {
		stop_at = get_seconds();
		printk(KERN_ALERT "stall_thread at %ld seconds\n", stop_at);
		stop_at += stall_secs;
		printk(KERN_ALERT "stall_thread wait until %ld seconds\n", stop_at);
		while ((long)(get_seconds() - stop_at) < 0)
			continue;
		printk(KERN_ALERT "stall_thread ended wait at %ld seconds\n", get_seconds());
		set_current_state(TASK_UNINTERRUPTIBLE);
		schedule_timeout(10 * HZ);
		printk(KERN_ALERT "stall_thread %ld awakening\n", (long)arg);
	} while (!kthread_should_stop());
	printk(KERN_ALERT "stall_thread %ld exiting\n", (long)arg);
	return 0;
}

static int stall_init(void)
{
	int err;

	printk(KERN_ALERT "Hello, world!  stall_secs = %d\n", stall_secs);
	kstall_task = kthread_run(stall_thread, (void *)(long)stall_secs,
				  "stall_thread");
	if (IS_ERR(kstall_task)) {
		err = PTR_ERR(kstall_task);
		kstall_task = NULL;
		return err;
	}
	return 0;
}

static void stall_exit(void)
{
	if (kstall_task != NULL) {
		kthread_stop(kstall_task);
	}
	kstall_task = NULL;
	printk(KERN_ALERT "Goodbye, cruel world\n");
}

module_init(stall_init);
module_exit(stall_exit);

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH diagnostic] Re: HPET regression in 2.6.26 versus 2.6.25 -- RCU problem
  2008-08-11  1:35   ` [PATCH diagnostic] " Paul E. McKenney
  2008-08-11  1:38     ` Paul E. McKenney
@ 2008-08-11 11:38     ` Ingo Molnar
  2008-08-11 13:17       ` Paul E. McKenney
  1 sibling, 1 reply; 11+ messages in thread
From: Ingo Molnar @ 2008-08-11 11:38 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: David Witbrodt, Peter Zijlstra, linux-kernel, Yinghai Lu,
	Thomas Gleixner, H. Peter Anvin, netdev

* Paul E. McKenney <paulmck@linux.vnet.ibm.com> wrote:

> And here is the patch.  It is still a bit raw, so the results should 
> be viewed with some suspicion.  It adds a default-off kernel parameter 
> CONFIG_RCU_CPU_STALL which must be enabled.
> 
> Rather than exponential backoff, it backs off to once per 30 seconds. 
> My feeling upon thinking on it was that if you have stalled RCU grace 
> periods for that long, a few extra printk() messages are probably the 
> least of your worries...

while this wont debug problems were timer irqs are genuinely stuck for 
long periods of time, it should find problems with RCU completion logic 
itself in the presence of correct timer irqs - and the lack of any 
messages from this debug option should point the finger more firmly in 
the direction of stalled timer irqs.

So i find this debug feature rather useful and have applied it to 
tip/core/rcu (and cleaned it up a bit). I renamed the config option to 
CONFIG_DEBUG_RCU_STALL to make it more in line with usual debug option 
names. Lets see whether -tip testing finds any false positives.

	Ingo

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH diagnostic] Re: HPET regression in 2.6.26 versus 2.6.25 -- RCU problem
  2008-08-11 11:38     ` Ingo Molnar
@ 2008-08-11 13:17       ` Paul E. McKenney
  0 siblings, 0 replies; 11+ messages in thread
From: Paul E. McKenney @ 2008-08-11 13:17 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: David Witbrodt, Peter Zijlstra, linux-kernel, Yinghai Lu,
	Thomas Gleixner, H. Peter Anvin, netdev

On Mon, Aug 11, 2008 at 01:38:17PM +0200, Ingo Molnar wrote:
> 
> * Paul E. McKenney <paulmck@linux.vnet.ibm.com> wrote:
> 
> > And here is the patch.  It is still a bit raw, so the results should 
> > be viewed with some suspicion.  It adds a default-off kernel parameter 
> > CONFIG_RCU_CPU_STALL which must be enabled.
> > 
> > Rather than exponential backoff, it backs off to once per 30 seconds. 
> > My feeling upon thinking on it was that if you have stalled RCU grace 
> > periods for that long, a few extra printk() messages are probably the 
> > least of your worries...
> 
> while this wont debug problems were timer irqs are genuinely stuck for 
> long periods of time, it should find problems with RCU completion logic 
> itself in the presence of correct timer irqs - and the lack of any 
> messages from this debug option should point the finger more firmly in 
> the direction of stalled timer irqs.
> 
> So i find this debug feature rather useful and have applied it to 
> tip/core/rcu (and cleaned it up a bit). I renamed the config option to 
> CONFIG_DEBUG_RCU_STALL to make it more in line with usual debug option 
> names. Lets see whether -tip testing finds any false positives.

Sounds good!

For whatever it is worth, this diagnostic can also locate latency issues
in non-CONFIG_PREEMPT kernels, even when those problems are outside of
preempt_disable() regions.  Latency tracer is of course a better tool
for things -inside- of preempt_disable() regions.

							Thanx, Paul

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH diagnostic] Re: HPET regression in 2.6.26 versus 2.6.25 -- RCU problem
@ 2008-08-11 16:04 David Witbrodt
  2008-08-11 16:22 ` Paul E. McKenney
  0 siblings, 1 reply; 11+ messages in thread
From: David Witbrodt @ 2008-08-11 16:04 UTC (permalink / raw)
  To: paulmck
  Cc: Peter Zijlstra, linux-kernel, Yinghai Lu, Ingo Molnar,
	Thomas Gleixner, H. Peter Anvin, netdev

I didn't check my email yesterday -- sorry about that, but sometimes life
intervenes -- so I'm a bit late replying.

Also, I see several messages relevant to this thread in my inbox:  I have
decided to address each in order, so that I don't mix something up and do
something foolish.

> And here is the patch.  It is still a bit raw, so the results should
> be viewed with some suspicion.  It adds a default-off kernel parameter
> CONFIG_RCU_CPU_STALL which must be enabled.

Thanks for the patch.  I had a problem applying the patch because I
have not yet transitioned my email system from my old machine to my new
3-system home network setup.  (I used to share a data partition between
Windows and Linux so that my archives would stay in sync; my new setup
will allow keeping the POP downloads on one machine, and sharing the
archives via IMAP, but even since May I still haven't gotten around to 
it.)

My ISP's webmail interface altered the whitespace, and I'm so new to
git that I couldn't figure out how to keep it from rejecting the
patch.  I had updated Linus' git tree to 2.6.27-rc2, and when I saw
that your patch was against something in 2.6.27-rc1 I thought this
might be the problem.  Visually inspecting the files, I saw that the
lines matched perfectly, other than whitespace, so I just gave up and
applied the patches manually.

I ran 'make menuconfig', but nothing about your new feature was asked.
Then I realized that I had changed the .config to CONFIG_PREEMPT because
of an experiment you had my try a few days ago.  When I disabled that,
I was able to see the new option and enable it.

The kernel built fine, so I installed and rebooted...

> Rather than exponential backoff, it backs off to once per 30 seconds.
> My feeling upon thinking on it was that if you have stalled RCU grace
> periods for that long, a few extra printk() messages are probably the
> least of your worries...

Well, I was hoping to see something interesting.  I ran it with parameters
"debug initcall_debug", and it locked up at the same place.  I let it for
15 minutes, in case of some delayed reaction.  Nada.

The output was nearly identical to what I posted last Tuesday (see
http://www.uwsg.indiana.edu/hypermail/linux/kernel/0808.0/2224.html).
Here are the last few lines:
==================================
[snip]
calling pci_bios_assign_resources+0x0/0x8b
pci 0000:00:01.0: PCI bridge, secondary bus 0000:01
pci 0000:00:01.0:   IO window: 0xe000-0xefff
pci 0000:00:01.0:   MEM window: 0xfdd00000-0xfdefffff
pci 0000:00:01.0:   PREFETCH window: 0x000000d8000000-0x000000dfffffff
pci 0000:00:14.4: PCI bridge, secondary bus 0000:02
pci 0000:00:14.4:   IO window: 0xd000-0xdfff
pci 0000:00:14.4:   MEM window: 0xfdc00000-0xfdcfffff
pci 0000:00:14.4:   PREFETCH window: 0x000000fdf00000-0x000000fdffffff
initcall pci_bios_assign_resources returned 0 after 285702 msecs
calling inet_init+0x0/0x250
NET: Registered protocol family 2
===== END OUTPUT =================

The only difference in the output was trivial:  "285696 msecs" became
"285702 msecs".  None of the printk()'s from your driver were executed.

(As I mentioned on Tuesday, that number of milliseconds is WAY off, and
it still bothers me.  The total time from the GRUB screen disappearing
to the last line printed is < 5 secs (maybe < 3 secs), not 285 secs!)

Moving on to the other LKML messages....

Thanks,
Dave W.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH diagnostic] Re: HPET regression in 2.6.26 versus 2.6.25 -- RCU problem
@ 2008-08-11 16:22 David Witbrodt
  2008-08-11 16:29 ` Paul E. McKenney
  0 siblings, 1 reply; 11+ messages in thread
From: David Witbrodt @ 2008-08-11 16:22 UTC (permalink / raw)
  To: paulmck
  Cc: Peter Zijlstra, linux-kernel, Yinghai Lu, Ingo Molnar,
	Thomas Gleixner, H. Peter Anvin, netdev

> And attached is the kernel module I used to test the patch, for whatever
> that might be worth.

Thank you for that, too.  I have some questions and a comment:

1.  This module can only be loaded from a working system, right?  You
mean that _you_ tested the kernel code on _your_ system to see if it
was working correctly, and did not mean that I should use this with
the problematic kernels on my system?  I could easily run this with
something like 'modprobe stall' on my working machine, or on the
problem machine if I boot with the "hpet=disabled" parameter, but
doing so would not help us find the problem, right?

2.  Coincidentally, I had started reading Understanding the Linux Kernel
(first edition, 2000) about a week before I got socked with the kernel
freeze issue.  I was reading it at work -- I work as a tutor, and there
are sometimes slow times without students to help -- but haven't had a
chance to do so in the best 10 days while I've been struggling with my
own kernel problem.  It is also quite out-of-date -- published in 2000,
and discussing the 2.2 kernel line.  I know that there have been a
couple of new editions since then, but I just happened to have this first
edition on my shelf... so I took it into work for a little light reading.

Anyway, I have a lot on my personal TODO list for the fall, but maybe
early next year I will have a chance to play with the kernel for fun.
When I do so, I will have your new, experimental module to look at...
among tons of other things.

Thanks,
Dave W.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH diagnostic] Re: HPET regression in 2.6.26 versus 2.6.25 -- RCU problem
  2008-08-11 16:04 David Witbrodt
@ 2008-08-11 16:22 ` Paul E. McKenney
  0 siblings, 0 replies; 11+ messages in thread
From: Paul E. McKenney @ 2008-08-11 16:22 UTC (permalink / raw)
  To: David Witbrodt
  Cc: Peter Zijlstra, linux-kernel, Yinghai Lu, Ingo Molnar,
	Thomas Gleixner, H. Peter Anvin, netdev

On Mon, Aug 11, 2008 at 09:04:12AM -0700, David Witbrodt wrote:
> 
> I didn't check my email yesterday -- sorry about that, but sometimes life
> intervenes -- so I'm a bit late replying.
> 
> Also, I see several messages relevant to this thread in my inbox:  I have
> decided to address each in order, so that I don't mix something up and do
> something foolish.
> 
> 
> 
> > And here is the patch.  It is still a bit raw, so the results should
> > be viewed with some suspicion.  It adds a default-off kernel parameter
> > CONFIG_RCU_CPU_STALL which must be enabled.
> 
> Thanks for the patch.  I had a problem applying the patch because I
> have not yet transitioned my email system from my old machine to my new
> 3-system home network setup.  (I used to share a data partition between
> Windows and Linux so that my archives would stay in sync; my new setup
> will allow keeping the POP downloads on one machine, and sharing the
> archives via IMAP, but even since May I still haven't gotten around to 
> it.)
> 
> My ISP's webmail interface altered the whitespace, and I'm so new to
> git that I couldn't figure out how to keep it from rejecting the
> patch.  I had updated Linus' git tree to 2.6.27-rc2, and when I saw
> that your patch was against something in 2.6.27-rc1 I thought this
> might be the problem.  Visually inspecting the files, I saw that the
> lines matched perfectly, other than whitespace, so I just gave up and
> applied the patches manually.
> 
> I ran 'make menuconfig', but nothing about your new feature was asked.
> Then I realized that I had changed the .config to CONFIG_PREEMPT because
> of an experiment you had my try a few days ago.  When I disabled that,
> I was able to see the new option and enable it.
> 
> The kernel built fine, so I installed and rebooted...
> 
> 
> > Rather than exponential backoff, it backs off to once per 30 seconds.
> > My feeling upon thinking on it was that if you have stalled RCU grace
> > periods for that long, a few extra printk() messages are probably the
> > least of your worries...
> 
> Well, I was hoping to see something interesting.  I ran it with parameters
> "debug initcall_debug", and it locked up at the same place.  I let it for
> 15 minutes, in case of some delayed reaction.  Nada.

Interesting.  The causes could be:

o	Scheduling-clock interrupts aren't happening, as Ingo suggested.

o	All the CPUs are spinning with hard irqs disabled, or are
	otherwise AWOL.

And perhaps other issues as well.

							Thanx, Paul

> The output was nearly identical to what I posted last Tuesday (see
> http://www.uwsg.indiana.edu/hypermail/linux/kernel/0808.0/2224.html).
> Here are the last few lines:
> ==================================
> [snip]
> calling pci_bios_assign_resources+0x0/0x8b
> pci 0000:00:01.0: PCI bridge, secondary bus 0000:01
> pci 0000:00:01.0:   IO window: 0xe000-0xefff
> pci 0000:00:01.0:   MEM window: 0xfdd00000-0xfdefffff
> pci 0000:00:01.0:   PREFETCH window: 0x000000d8000000-0x000000dfffffff
> pci 0000:00:14.4: PCI bridge, secondary bus 0000:02
> pci 0000:00:14.4:   IO window: 0xd000-0xdfff
> pci 0000:00:14.4:   MEM window: 0xfdc00000-0xfdcfffff
> pci 0000:00:14.4:   PREFETCH window: 0x000000fdf00000-0x000000fdffffff
> initcall pci_bios_assign_resources returned 0 after 285702 msecs
> calling inet_init+0x0/0x250
> NET: Registered protocol family 2
> ===== END OUTPUT =================
> 
> The only difference in the output was trivial:  "285696 msecs" became
> "285702 msecs".  None of the printk()'s from your driver were executed.
> 
> (As I mentioned on Tuesday, that number of milliseconds is WAY off, and
> it still bothers me.  The total time from the GRUB screen disappearing
> to the last line printed is < 5 secs (maybe < 3 secs), not 285 secs!)
> 
> Moving on to the other LKML messages....
> 
> Thanks,
> Dave W.
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH diagnostic] Re: HPET regression in 2.6.26 versus 2.6.25 -- RCU problem
  2008-08-11 16:22 David Witbrodt
@ 2008-08-11 16:29 ` Paul E. McKenney
  0 siblings, 0 replies; 11+ messages in thread
From: Paul E. McKenney @ 2008-08-11 16:29 UTC (permalink / raw)
  To: David Witbrodt
  Cc: Peter Zijlstra, linux-kernel, Yinghai Lu, Ingo Molnar,
	Thomas Gleixner, H. Peter Anvin, netdev

On Mon, Aug 11, 2008 at 09:22:01AM -0700, David Witbrodt wrote:
> 
> > And attached is the kernel module I used to test the patch, for whatever
> > that might be worth.
> 
> Thank you for that, too.  I have some questions and a comment:
> 
> 1.  This module can only be loaded from a working system, right?  You
> mean that _you_ tested the kernel code on _your_ system to see if it
> was working correctly, and did not mean that I should use this with
> the problematic kernels on my system?  I could easily run this with
> something like 'modprobe stall' on my working machine, or on the
> problem machine if I boot with the "hpet=disabled" parameter, but
> doing so would not help us find the problem, right?

Yep.  This module was intended strictly to allow me to debug the
diagnostic code.

> 2.  Coincidentally, I had started reading Understanding the Linux Kernel
> (first edition, 2000) about a week before I got socked with the kernel
> freeze issue.  I was reading it at work -- I work as a tutor, and there
> are sometimes slow times without students to help -- but haven't had a
> chance to do so in the best 10 days while I've been struggling with my
> own kernel problem.  It is also quite out-of-date -- published in 2000,
> and discussing the 2.2 kernel line.  I know that there have been a
> couple of new editions since then, but I just happened to have this first
> edition on my shelf... so I took it into work for a little light reading.
> 
> Anyway, I have a lot on my personal TODO list for the fall, but maybe
> early next year I will have a chance to play with the kernel for fun.
> When I do so, I will have your new, experimental module to look at...
> among tons of other things.

Well, the stall module derives pretty directly from some example code
in Corbet, Rubini, and Kroah-Hartman's "Linux Device Drivers" book.  ;-)

							Thanx, Paul

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH diagnostic] Re: HPET regression in 2.6.26 versus 2.6.25 -- RCU problem
@ 2008-08-11 18:13 David Witbrodt
  0 siblings, 0 replies; 11+ messages in thread
From: David Witbrodt @ 2008-08-11 18:13 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Paul E. McKenney, Peter Zijlstra, linux-kernel, Yinghai Lu,
	Thomas Gleixner, H. Peter Anvin, netdev




> So i find this debug feature rather useful and have applied it to 
> tip/core/rcu (and cleaned it up a bit). I renamed the config option to 
> CONFIG_DEBUG_RCU_STALL to make it more in line with usual debug option 
> names. Lets see whether -tip testing finds any false positives.

Hmmm, should have read ALL of the emails first!  Drat...

Just for kicks, I rebuilt using Ingo's version -- since I had manually
pasted Paul's patch, there was the possibility of PEBKAC interference:
no additional output at the lock up.


Dave W.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH diagnostic] Re: HPET regression in 2.6.26 versus 2.6.25 -- RCU problem
@ 2008-08-11 18:22 David Witbrodt
  2008-08-11 19:27 ` Paul E. McKenney
  0 siblings, 1 reply; 11+ messages in thread
From: David Witbrodt @ 2008-08-11 18:22 UTC (permalink / raw)
  To: paulmck
  Cc: Peter Zijlstra, linux-kernel, Yinghai Lu, Ingo Molnar,
	Thomas Gleixner, H. Peter Anvin, netdev



> > Well, I was hoping to see something interesting.  I ran it with parameters
> > "debug initcall_debug", and it locked up at the same place.  I let it for
> > 15 minutes, in case of some delayed reaction.  Nada.
> 
> Interesting.  The causes could be:
> 
> o    Scheduling-clock interrupts aren't happening, as Ingo suggested.

Does anyone have a short answer to this question:  Were the changes between
2.6.25 and 2.6.26 so major that interrupts are NOW being used that were not
being used before?

Again, I don't even pretend to understand the kernel's inner workings, but
2.6.25 _did_ work on this hardware... and with HPET enabled.


DW

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH diagnostic] Re: HPET regression in 2.6.26 versus 2.6.25 -- RCU problem
  2008-08-11 18:22 [PATCH diagnostic] Re: HPET regression in 2.6.26 versus 2.6.25 -- RCU problem David Witbrodt
@ 2008-08-11 19:27 ` Paul E. McKenney
  0 siblings, 0 replies; 11+ messages in thread
From: Paul E. McKenney @ 2008-08-11 19:27 UTC (permalink / raw)
  To: David Witbrodt
  Cc: Peter Zijlstra, linux-kernel, Yinghai Lu, Ingo Molnar,
	Thomas Gleixner, H. Peter Anvin, netdev

On Mon, Aug 11, 2008 at 11:22:21AM -0700, David Witbrodt wrote:
> 
> 
> > > Well, I was hoping to see something interesting.  I ran it with parameters
> > > "debug initcall_debug", and it locked up at the same place.  I let it for
> > > 15 minutes, in case of some delayed reaction.  Nada.
> > 
> > Interesting.  The causes could be:
> > 
> > o    Scheduling-clock interrupts aren't happening, as Ingo suggested.
> 
> Does anyone have a short answer to this question:  Were the changes between
> 2.6.25 and 2.6.26 so major that interrupts are NOW being used that were not
> being used before?

Not that I am aware of, but I must defer to others who know more about
Linux's timer interrupts than do I.

							Thanx, Paul

> Again, I don't even pretend to understand the kernel's inner workings, but
> 2.6.25 _did_ work on this hardware... and with HPET enabled.
> 
> 
> DW

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2008-08-11 19:35 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-08-11 18:22 [PATCH diagnostic] Re: HPET regression in 2.6.26 versus 2.6.25 -- RCU problem David Witbrodt
2008-08-11 19:27 ` Paul E. McKenney
  -- strict thread matches above, loose matches on Subject: below --
2008-08-11 18:13 David Witbrodt
2008-08-11 16:22 David Witbrodt
2008-08-11 16:29 ` Paul E. McKenney
2008-08-11 16:04 David Witbrodt
2008-08-11 16:22 ` Paul E. McKenney
2008-08-09 22:35 David Witbrodt
2008-08-10 15:15 ` Paul E. McKenney
2008-08-11  1:35   ` [PATCH diagnostic] " Paul E. McKenney
2008-08-11  1:38     ` Paul E. McKenney
2008-08-11 11:38     ` Ingo Molnar
2008-08-11 13:17       ` Paul E. McKenney

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).