[PATCH,RFC] RCU-based detection of stalled CPUs for Classic RCU

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH,RFC] RCU-based detection of stalled CPUs for Classic RCU
@ 2008-10-02  0:36 Paul E. McKenney
  2008-10-02  8:07 ` Ingo Molnar
  0 siblings, 1 reply; 17+ messages in thread
From: Paul E. McKenney @ 2008-10-02  0:36 UTC (permalink / raw)
  To: linux-kernel
  Cc: mingo, akpm, mathieu.desnoyers, laijs, manfred, dipankar, niv,
	dvhltc, josht

Hello!

This patch adds stalled-CPU detection to Classic RCU.  This capability
is enabled by a new config variable CONFIG_RCU_CPU_STALL_DETECTOR,
which defaults disabled.  This is a debugging feature, not something
that non-kernel-hackers would be expected to care about.  This feature
can detect looping CPUs in !PREEMPT builds and looping CPUs with
preemption disabled in PREEMPT builds.  This is essentially a port of
this functionality from the treercu patch.

One current shortcoming: on some systems, stalls are detected during
early boot, when we normally would not care about them.  My thought is
to add a call from late initialization to suppress stall detection until
the system is well along its way to being booted, but thought I should
check to see if there might already be something for this purpose.

(Currently against 2.6.27-rc8, FYI.)

Thoughts?

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---

 include/linux/rcuclassic.h |    9 ++++
 kernel/rcuclassic.c        |   88 +++++++++++++++++++++++++++++++++++++++++++++
 lib/Kconfig.debug          |   13 ++++++
 3 files changed, 110 insertions(+)

diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h
index 4ab8436..9b62e9a 100644
--- a/include/linux/rcuclassic.h
+++ b/include/linux/rcuclassic.h
@@ -40,6 +40,10 @@
 #include <linux/cpumask.h>
 #include <linux/seqlock.h>
 
+#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
+#define RCU_SECONDS_TILL_STALL_CHECK	3	/* for rcp->seconds_stall */
+#define RCU_SECONDS_TILL_STALL_RECHECK	30	/* for rcp->seconds_stall */
+#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
 
 /* Global control variables for rcupdate callback mechanism. */
 struct rcu_ctrlblk {
@@ -52,6 +56,11 @@ struct rcu_ctrlblk {
 	spinlock_t	lock	____cacheline_internodealigned_in_smp;
 	cpumask_t	cpumask; /* CPUs that need to switch in order    */
 				 /* for current batch to proceed.        */
+#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
+	unsigned long gp_start;	 /* Time at which GP started in jiffies. */
+	unsigned long seconds_stall;
+				 /* Time at which to check for CPU stalls. */
+#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
 } ____cacheline_internodealigned_in_smp;
 
 /* Is batch a before batch b ? */
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
index aad93cd..c092ba9 100644
--- a/kernel/rcuclassic.c
+++ b/kernel/rcuclassic.c
@@ -118,6 +118,87 @@ static inline void force_quiescent_state(struct rcu_data *rdp,
 }
 #endif
 
+#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
+
+static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
+{
+	rcp->gp_start = jiffies;
+	rcp->seconds_stall = get_seconds() + RCU_SECONDS_TILL_STALL_CHECK;
+}
+
+static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
+{
+	int cpu;
+	long delta;
+	unsigned long flags;
+
+	/* Only let one CPU complain about others per time interval. */
+
+	spin_lock_irqsave(&rcp->lock, flags);
+	delta = get_seconds() - rcp->seconds_stall;
+	if (delta < 2 || rcp->cur != rcp->completed) {
+		spin_unlock_irqrestore(&rcp->lock, flags);
+		return;
+	}
+	rcp->seconds_stall = get_seconds() + RCU_SECONDS_TILL_STALL_RECHECK;
+	spin_unlock_irqrestore(&rcp->lock, flags);
+
+	/* OK, time to rat on our buddy... */
+
+	printk(KERN_ERR "RCU detected CPU stalls:");
+	for_each_possible_cpu(cpu) {
+		if (cpu_isset(cpu, rcp->cpumask))
+			printk(" %d", cpu);
+	}
+	printk(" (detected by %d, t=%ld jiffies)\n",
+	       smp_processor_id(), (long)(jiffies - rcp->gp_start));
+}
+
+static void print_cpu_stall(struct rcu_ctrlblk *rcp)
+{
+	unsigned long flags;
+
+	printk(KERN_ERR "RCU detected CPU %d stall (t=%lu/%lu jiffies)\n",
+			smp_processor_id(), get_seconds(),
+			jiffies - rcp->gp_start);
+	dump_stack();
+	spin_lock_irqsave(&rcp->lock, flags);
+	if ((long)(get_seconds() - rcp->seconds_stall) >= 0)
+		rcp->seconds_stall =
+			get_seconds() + RCU_SECONDS_TILL_STALL_RECHECK;
+	spin_unlock_irqrestore(&rcp->lock, flags);
+	set_need_resched();  /* kick ourselves to get things going. */
+}
+
+static void check_cpu_stall(struct rcu_ctrlblk *rcp)
+{
+	long delta;
+
+	delta = get_seconds() - rcp->seconds_stall;
+	if (cpu_isset(smp_processor_id(), rcp->cpumask) && delta >= 0) {
+		
+		/* We haven't checked in, so go dump stack. */
+		print_cpu_stall(rcp);
+
+	} else if (rcp->cur != rcp->completed && delta >= 2) {
+
+		/* They had two seconds to dump stack, so complain. */
+		print_other_cpu_stall(rcp);
+	}
+}
+
+#else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
+
+static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
+{
+}
+
+static void check_cpu_stall(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
+{
+}
+
+#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
+
 /**
  * call_rcu - Queue an RCU callback for invocation after a grace period.
  * @head: structure to be used for queueing the RCU updates.
@@ -285,6 +366,7 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp)
 		 */
 		smp_wmb();
 		rcp->cur++;
+		record_gp_stall_check_time(rcp);
 
 		/*
 		 * Accessing nohz_cpu_mask before incrementing rcp->cur needs a
@@ -468,6 +550,9 @@ static void rcu_process_callbacks(struct softirq_action *unused)
 
 static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
 {
+	/* Check for CPU stalls, if enabled. */
+	check_cpu_stall(rcp);
+
 	/* This cpu has pending rcu entries and the grace period
 	 * for them has completed.
 	 */
@@ -558,6 +643,9 @@ void rcu_check_callbacks(int cpu, int user)
 static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
 						struct rcu_data *rdp)
 {
+#ifdef CONFIG_DEBUG_RCU_STALL
+	printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n");
+#endif /* #ifdef CONFIG_DEBUG_RCU_STALL */
 	memset(rdp, 0, sizeof(*rdp));
 	rdp->curtail = &rdp->curlist;
 	rdp->nxttail = &rdp->nxtlist;
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 0b50481..9fee969 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -597,6 +597,19 @@ config RCU_TORTURE_TEST_RUNNABLE
 	  Say N here if you want the RCU torture tests to start only
 	  after being manually enabled via /proc.
 
+config RCU_CPU_STALL_DETECTOR
+	bool "Check for stalled CPUs delaying RCU grace periods"
+	depends on CLASSIC_RCU
+	default n
+	help
+	  This option causes RCU to printk information on which
+	  CPUs are delaying the current grace period, but only when
+	  the grace period extends for excessive time periods.
+
+	  Say Y if you want RCU to perform such checks.
+
+	  Say N if you are unsure.
+
 config KPROBES_SANITY_TEST
 	bool "Kprobes sanity tests"
 	depends on DEBUG_KERNEL

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH,RFC] RCU-based detection of stalled CPUs for Classic RCU
  2008-10-02  0:36 [PATCH,RFC] RCU-based detection of stalled CPUs for Classic RCU Paul E. McKenney
@ 2008-10-02  8:07 ` Ingo Molnar
  2008-10-02 14:05   ` Paul E. McKenney
  0 siblings, 1 reply; 17+ messages in thread
From: Ingo Molnar @ 2008-10-02  8:07 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: linux-kernel, akpm, mathieu.desnoyers, laijs, manfred, dipankar,
	niv, dvhltc, josht


* Paul E. McKenney <paulmck@linux.vnet.ibm.com> wrote:

> Hello!
> 
> This patch adds stalled-CPU detection to Classic RCU.  This capability 
> is enabled by a new config variable CONFIG_RCU_CPU_STALL_DETECTOR, 
> which defaults disabled.  This is a debugging feature, not something 
> that non-kernel-hackers would be expected to care about.  This feature 
> can detect looping CPUs in !PREEMPT builds and looping CPUs with 
> preemption disabled in PREEMPT builds.  This is essentially a port of 
> this functionality from the treercu patch.
> 
> One current shortcoming: on some systems, stalls are detected during 
> early boot, when we normally would not care about them.  My thought is 
> to add a call from late initialization to suppress stall detection 
> until the system is well along its way to being booted, but thought I 
> should check to see if there might already be something for this 
> purpose.

could you be a bit more specific, why do those warnings show up and why 
dont we care about them? There are things like networking that 
occasionally do an rcu_sync() and a stall could mean a bootup hang.

> (Currently against 2.6.27-rc8, FYI.)
> 
> Thoughts?
> 
> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

i think this is a very good idea in general - often the question comes 
up whether a hang seen in the RCU code is indeed caused by RCU or other 
factors. Could you perhaps rebase it against tip/core/rcu ? [or 
tip/master for convenience]

	Ingo

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH,RFC] RCU-based detection of stalled CPUs for Classic RCU
  2008-10-02  8:07 ` Ingo Molnar
@ 2008-10-02 14:05   ` Paul E. McKenney
  2008-10-02 15:10     ` Mathieu Desnoyers
  2008-10-02 17:39     ` [PATCH,RFC] " Paul E. McKenney
  0 siblings, 2 replies; 17+ messages in thread
From: Paul E. McKenney @ 2008-10-02 14:05 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: linux-kernel, akpm, mathieu.desnoyers, laijs, manfred, dipankar,
	niv, dvhltc, josht

On Thu, Oct 02, 2008 at 10:07:26AM +0200, Ingo Molnar wrote:
> 
> * Paul E. McKenney <paulmck@linux.vnet.ibm.com> wrote:
> 
> > Hello!
> > 
> > This patch adds stalled-CPU detection to Classic RCU.  This capability 
> > is enabled by a new config variable CONFIG_RCU_CPU_STALL_DETECTOR, 
> > which defaults disabled.  This is a debugging feature, not something 
> > that non-kernel-hackers would be expected to care about.  This feature 
> > can detect looping CPUs in !PREEMPT builds and looping CPUs with 
> > preemption disabled in PREEMPT builds.  This is essentially a port of 
> > this functionality from the treercu patch.
> > 
> > One current shortcoming: on some systems, stalls are detected during 
> > early boot, when we normally would not care about them.  My thought is 
> > to add a call from late initialization to suppress stall detection 
> > until the system is well along its way to being booted, but thought I 
> > should check to see if there might already be something for this 
> > purpose.
> 
> could you be a bit more specific, why do those warnings show up and why 
> dont we care about them? There are things like networking that 
> occasionally do an rcu_sync() and a stall could mean a bootup hang.

Hmmm...

Good point, I was just falling back on my old "we don't care about RCU
stalls in boot-time code" rule from long ago.  It is entirely possible
(in fact reasonably likely) that this rule no longer applies to Linux as
it exists today.  So please see below for the console output.  Not all
systems report this stall.  I have recently been running only on Power
systems, will fire off on some x86s.  My kneejerk reaction was that the
"stall" was really due to the clock-setting operation -- RCU stalls are
based on get_seconds().

So maybe I need to change the stall-detection code to use jiffies
instead.

Thoughts?

> > (Currently against 2.6.27-rc8, FYI.)
> > 
> > Thoughts?
> > 
> > Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> 
> i think this is a very good idea in general - often the question comes 
> up whether a hang seen in the RCU code is indeed caused by RCU or other 
> factors. Could you perhaps rebase it against tip/core/rcu ? [or 
> tip/master for convenience]

Will do!

							Thanx, Paul

------------------------------------------------------------------------
Console output: search for "RCU detected CPU 5 stall" near the end.
------------------------------------------------------------------------

Elapsed time since release of system processors: 7742 mins 58 secs

Config file read, 2048 bytes
Welcome
Welcome to yaboot version 1.3.13-cas (Red Hat 1.3.13.cas-5)
Enter "help" to get some basic usage information
boot: autobench
Please wait, loading kernel...
   Elf64 kernel loaded...
Loading ramdisk...
ramdisk loaded at 02b00000, size: 2331 Kbytes
OF stdout device is: /vdevice/vty@30000000
Hypertas detected, assuming LPAR !
    -\|/-\|/-\|/-\|
Elapsed time since release of system processors: 7743 mins 29 secs

Config file read, 2048 bytes
Welcome
Welcome to yaboot version 1.3.13-cas (Red Hat 1.3.13.cas-5)
Enter "help" to get some basic usage information
boot: autobench 
Please wait, loading kernel...
   Elf64 kernel loaded...
Loading ramdisk...
ramdisk loaded at 02b00000, size: 2331 Kbytes
OF stdout device is: /vdevice/vty@30000000
Hypertas detected, assuming LPAR !
command line: ro console=hvc0 autobench_args: root=/dev/sda6
ABAT:1222922465 
memory layout at init:
  alloc_bottom : 0000000002d47000
  alloc_top    : 0000000008000000
  alloc_top_hi : 0000000008000000
  rmo_top      : 0000000008000000
  ram_top      : 0000000008000000
Looking for displays
instantiating rtas at 0x000000000757a000 ... done
boot cpu hw idx 0000000000000000
starting cpu hw idx 0000000000000002... done
starting cpu hw idx 0000000000000004... done
starting cpu hw idx 0000000000000006... done
copying OF device tree ...
Building dt strings...
Building dt structure...
Device tree strings 0x0000000002f48000 -> 0x0000000002f49537
Device tree struct  0x0000000002f4a000 -> 0x0000000002f58000
Calling quiesce ...
returning from prom_init
Using pSeries machine description
Using 1TB segments
Found initrd at 0xc000000002b00000:0xc000000002d46c00
console [udbg0] enabled
Partition configured for 8 cpus.
CPU maps initialized for 2 threads per core
Starting Linux PPC64 #1 SMP PREEMPT Thu Oct 2 00:39:54 EDT 2008
-----------------------------------------------------
ppc64_pft_size                = 0x19
physicalMemorySize            = 0x80000000
htab_hash_mask                = 0x3ffff
-----------------------------------------------------
Initializing cgroup subsys cpuset
Linux version 2.6.27-rc7-autokern1 (root@tundro2.rchland.ibm.com) (gcc
version 4.1.2 20071124 (Red Hat 4.1.2-42)) #1 SMP PREEMPT Thu Oct 2
00:39:54 EDT 2008
[boot]0012 Setup Arch
EEH: No capable adapters found
PPC64 nvram contains 15360 bytes
Zone PFN ranges:
  DMA      0x00000000 -> 0x00080000
  Normal   0x00080000 -> 0x00080000
Movable zone start PFN for each node
early_node_map[2] active PFN ranges
    0: 0x00000000 -> 0x00044000
    1: 0x00044000 -> 0x00080000
[boot]0015 Setup Done
Built 2 zonelists in Node order, mobility grouping on.  Total pages:
517120
Policy zone: DMA
Kernel command line: ro console=hvc0 autobench_args: root=/dev/sda6
ABAT:1222922465 
Experimental hierarchical RCU implementation.
RCU-based detection of stalled CPUs is enabled.
Experimental hierarchical RCU init done.
[boot]0020 XICS Init
[boot]0021 XICS Done
PID hash table entries: 4096 (order: 12, 32768 bytes)
clocksource: timebase mult[7d0000] shift[22] registered
Console: colour dummy device 80x25
console handover: boot [udbg0] -> real [hvc0]
Dentry cache hash table entries: 262144 (order: 9, 2097152 bytes)
Inode-cache hash table entries: 131072 (order: 8, 1048576 bytes)
freeing bootmem node 0
freeing bootmem node 1
Memory: 2038156k/2097152k available (7200k kernel code, 58996k reserved,
1052k data, 513k bss, 292k init)
SLUB: Genslabs=13, HWalign=128, Order=0-3, MinObjects=0, CPUs=8,
Nodes=16
Calibrating delay loop... 915.45 BogoMIPS (lpj=1830912)
Mount-cache hash table entries: 256
Initializing cgroup subsys ns
Initializing cgroup subsys cpuacct
Processor 1 found.
Processor 2 found.
Processor 3 found.
Processor 4 found.
Processor 5 found.
Processor 6 found.
Processor 7 found.
Brought up 8 CPUs
net_namespace: 1152 bytes
NET: Registered protocol family 16
IBM eBus Device Driver
PCI: Probing PCI hardware
SCSI subsystem initialized
usbcore: registered new interface driver usbfs
usbcore: registered new interface driver hub
usbcore: registered new device driver usb
NET: Registered protocol family 2
IP route cache hash table entries: 65536 (order: 7, 524288 bytes)
TCP established hash table entries: 262144 (order: 10, 4194304 bytes)
TCP bind hash table entries: 65536 (order: 8, 1048576 bytes)
TCP: Hash tables configured (established 262144 bind 65536)
TCP reno registered
NET: Registered protocol family 1
checking if image is initramfs... it is
Freeing initrd memory: 2331k freed
IOMMU table initialized, virtual merging enabled
audit: initializing netlink socket (disabled)
type=2000 audit(1222908206.468:1): initialized
HugeTLB registered 16 MB page size, pre-allocated 0 pages
HugeTLB registered 16 GB page size, pre-allocated 0 pages
HugeTLB registered 64 KB page size, pre-allocated 0 pages
Installing knfsd (copyright (C) 1996 okir@monad.swb.de).
msgmni has been set to 3985
Block layer SCSI generic (bsg) driver version 0.4 loaded (major 254)
io scheduler noop registered
io scheduler anticipatory registered (default)
io scheduler deadline registered
io scheduler cfq registered
Generic RTC Driver v1.07
Serial: 8250/16550 driver4 ports, IRQ sharing disabled
brd: module loaded
loop: module loaded
Intel(R) PRO/1000 Network Driver - version 7.3.20-k3-NAPI
Copyright (c) 1999-2006 Intel Corporation.
pcnet32.c:v1.35 21.Apr.2008 tsbogend@alpha.franken.de
e100: Intel(R) PRO/100 Network Driver, 3.5.23-k4-NAPI
e100: Copyright(c) 1999-2006 Intel Corporation
console [netcon0] enabled
netconsole: network logging started
Uniform Multi-Platform E-IDE driver
ipr: IBM Power RAID SCSI Device Driver version: 2.4.1 (April 24, 2007)
ibmvscsi 30000003: SRP_VERSION: 16.a
scsi0 : IBM POWER Virtual SCSI Adapter 1.5.8
ibmvscsi 30000003: partner initialization complete
ibmvscsi 30000003: sent SRP login
ibmvscsi 30000003: SRP_LOGIN succeeded
ibmvscsi 30000003: host srp version: 16.a, host partition tundro1 (1),
OS 2, max io 262144
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi 0:0:1:0: Direct-Access     IBM      VDASD blkdev     0001 PQ: 0
ANSI: 4
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
scsi scan: INQUIRY result too short (5), using 36
st: Version 20080504, fixed bufsize 32768, s/g segs 256
Driver 'st' needs updating - please use bus_type methods
Driver 'sd' needs updating - please use bus_type methods
sd 0:0:1:0: [sda] 73400922 512-byte hardware sectors (37581 MB)
sd 0:0:1:0: [sda] Write Protect is off
sd 0:0:1:0: [sda] Write cache: disabled, read cache: disabled, doesn't
support DPO or FUA
sd 0:0:1:0: [sda] 73400922 512-byte hardware sectors (37581 MB)
sd 0:0:1:0: [sda] Write Protect is off
sd 0:0:1:0: [sda] Write cache: disabled, read cache: disabled, doesn't
support DPO or FUA
 sda: sda1 sda2 sda3 sda4 < sda5 sda6 sda7 >
sd 0:0:1:0: [sda] Attached SCSI disk
Driver 'sr' needs updating - please use bus_type methods
sd 0:0:1:0: Attached scsi generic sg0 type 0
Initializing USB Mass Storage driver...
usbcore: registered new interface driver usb-storage
USB Mass Storage support registered.
mice: PS/2 mouse device common for all mice
md: linear personality registered for level -1
md: raid0 personality registered for level 0
md: raid1 personality registered for level 1
device-mapper: ioctl: 4.14.0-ioctl (2008-04-23) initialised:
dm-devel@redhat.com
usbcore: registered new interface driver hiddev
usbcore: registered new interface driver usbhid
usbhid: v2.6:USB HID core driver
IPv4 over IPv4 tunneling driver
TCP cubic registered
NET: Registered protocol family 17
RPC: Registered udp transport module.
RPC: Registered tcp transport module.
registered taskstats version 1
Freeing unused kernel memory: 292k freed
Red Hat nash version 5.1.19.6 starting
Mounting proc filesystem
Mounting sysfs filesystem
Creating /dev
Creating initial device nodes
Setting up hotplug.
Creating block device nodes.
Loading ehea.ko module
IBM eHEA ethernet device driver (Release EHEA_0092)
Scanning and configuring dmraid supported devices
Creating root device.
Mounting root filesystem.
kjournald starting.  Commit interval 5 seconds
EXT3-fs: mounted filesystem with ordered data mode.
Setting up other filesystems.
Setting up new root fs
no fstab.sys, mounting internal defaults
Switching to new root and running init.
unmounting old /dev
unmounting old /proc
unmounting old /sys
INIT: version 2.86 booting
                Welcome to Red Hat Enterprise Linux Server
                Press 'I' to enter interactive startup.
RCU detected CPU 5 stall (t=1222922609/2)
Call Trace:
[c000000042d03920] [c00000000000fdec] .show_stack+0x70/0x184
(unreliable)
[c000000042d039d0] [c0000000000a1de0] .__rcu_pending+0x98/0x2ec
[c000000042d03a70] [c0000000000a206c] .rcu_pending+0x38/0x88
[c000000042d03af0] [c00000000006363c] .update_process_times+0x48/0x94
[c000000042d03b80] [c00000000007bae4] .tick_sched_timer+0xbc/0x118
[c000000042d03c20] [c000000000073a88] .__run_hrtimer+0x78/0x118
[c000000042d03cc0] [c000000000074b3c] .hrtimer_interrupt+0x128/0x1e4
[c000000042d03d90] [c00000000002304c] .timer_interrupt+0xe0/0x15c
[c000000042d03e30] [c000000000003700] decrementer_common+0x100/0x180
Setting clock  (localtime): Thu Oct  2 00:43:30 EDT 2008 [  OK  ]
Starting udev: [  OK  ]
Setting hostname tundro2.rchland.ibm.com:  [  OK  ]
No devices found
Setting up Logical Volume Management:   No volume groups found
[  OK  ]
Checking filesystems
Checking all file systems.
[/sbin/fsck.ext3 (1) -- /] fsck.ext3 -a /dev/sda6 
/dev/sda6: Superblock last mount time is in the future.  FIXED.
/dev/sda6: clean, 134758/1281696 files, 853722/2560336 blocks
[  OK  ]
Remounting root filesystem in read-write mode:  [  OK  ]
Mounting local filesystems:  [  OK  ]
Enabling local filesystem quotas:  quotaon: Warning: No quota format
detected in the kernel.
[  OK  ]
Enabling /etc/fstab swaps:  swapon: /dev/sda5: Invalid argument
[FAILED]
INIT: Entering runlevel: 3
Entering non-interactive startup
Starting boot.loadmodules:  [  OK  ]
Starting background readahead: [  OK  ]
Bringing up loopback interface:  [  OK  ]
Bringing up interface eth0:  [  OK  ]
Starting auditd: [  OK  ]
Starting system logger: [  OK  ]
Starting kernel logger: [  OK  ]
Starting irqbalance: [  OK  ]
Starting RPC idmapd: FATAL: Module sunrpc not found.
FATAL: Error running install command for sunrpc
Error: RPC MTAB does not exist.
Starting system message bus: [  OK  ]
Starting Bluetooth services:[  OK  ][  OK  ]Can't open RFCOMM control
socket: Address family not supported by protocol

Mounting other filesystems:  [  OK  ]
Starting PC/SC smart card daemon (pcscd): [  OK  ]
Starting hidd: Can't open HIDP control socket: Address family not
supported by protocol
[FAILED]
Starting autofs:  Loading autofs4: [  OK  ]
Starting automount: [  OK  ]
[  OK  ]
Starting iprinit:  Starting ipr initialization daemon[  OK  ]
[  OK  ]
Starting iprupdate:  Checking ipr microcode levels
Completed ipr microcode updates[  OK  ]
[  OK  ]
Starting iprdump:  Starting ipr dump daemon[  OK  ]
[  OK  ]
Starting sshd: [  OK  ]
Starting cups: [  OK  ]
Starting xinetd: [  OK  ]
Starting vpdupdate: [  OK  ]
Starting console mouse services: [  OK  ]
Starting crond: [  OK  ]
Starting xfs: [  OK  ]
Starting anacron: [  OK  ]
Starting atd: [  OK  ]
Starting yum-updatesd: [  OK  ]
Starting Avahi daemon... [  OK  ]
Starting HAL daemon: [  OK  ]
Starting smartd: [  OK  ]

Red Hat Enterprise Linux Server release 5.2 (Tikanga)
Kernel 2.6.27-rc7-autokern1 on an ppc64

tundro2.rchland.ibm.com login:-- 0:conmux-control -- time-stamp --
Oct/01/08 21:45:47 --
-- 0:conmux-control -- time-stamp -- Oct/01/08 22:01:03 --
(bot:conmon-payload) disconnected

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH,RFC] RCU-based detection of stalled CPUs for Classic RCU
  2008-10-02 14:05   ` Paul E. McKenney
@ 2008-10-02 15:10     ` Mathieu Desnoyers
  2008-10-02 17:43       ` Paul E. McKenney
  2008-10-02 17:39     ` [PATCH,RFC] " Paul E. McKenney
  1 sibling, 1 reply; 17+ messages in thread
From: Mathieu Desnoyers @ 2008-10-02 15:10 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: Ingo Molnar, linux-kernel, akpm, laijs, manfred, dipankar, niv,
	dvhltc, josht

* Paul E. McKenney (paulmck@linux.vnet.ibm.com) wrote:
> On Thu, Oct 02, 2008 at 10:07:26AM +0200, Ingo Molnar wrote:
> > 
> > * Paul E. McKenney <paulmck@linux.vnet.ibm.com> wrote:
> > 
> > > Hello!
> > > 
> > > This patch adds stalled-CPU detection to Classic RCU.  This capability 
> > > is enabled by a new config variable CONFIG_RCU_CPU_STALL_DETECTOR, 
> > > which defaults disabled.  This is a debugging feature, not something 
> > > that non-kernel-hackers would be expected to care about.  This feature 
> > > can detect looping CPUs in !PREEMPT builds and looping CPUs with 
> > > preemption disabled in PREEMPT builds.  This is essentially a port of 
> > > this functionality from the treercu patch.
> > > 
> > > One current shortcoming: on some systems, stalls are detected during 
> > > early boot, when we normally would not care about them.  My thought is 
> > > to add a call from late initialization to suppress stall detection 
> > > until the system is well along its way to being booted, but thought I 
> > > should check to see if there might already be something for this 
> > > purpose.
> > 
> > could you be a bit more specific, why do those warnings show up and why 
> > dont we care about them? There are things like networking that 
> > occasionally do an rcu_sync() and a stall could mean a bootup hang.
> 
> Hmmm...
> 
> Good point, I was just falling back on my old "we don't care about RCU
> stalls in boot-time code" rule from long ago.  It is entirely possible
> (in fact reasonably likely) that this rule no longer applies to Linux as
> it exists today.  So please see below for the console output.  Not all
> systems report this stall.  I have recently been running only on Power
> systems, will fire off on some x86s.  My kneejerk reaction was that the
> "stall" was really due to the clock-setting operation -- RCU stalls are
> based on get_seconds().
> 
> So maybe I need to change the stall-detection code to use jiffies
> instead.
> 
> Thoughts?
> 

You could possibly use get monotonic time, which should hopefully be a
bit better than its NTP-corrected alternatives.

Also, do you have all the tools you need to poinpoint the precise source
of RCU stalls ? I haven't looked at RCU trace, but I should say adding a
new tracepoint to instrument RCU callback execution at boot time and to
trace it with LTTng is close to trivial. (actually, very early kernel
boot is not so trivial as we would have to create a small kernel module
to activate lttng tracing, but tracing the early phases of userspace
bootup is trivial given we can use the userspace API).

Mathieu

> > > (Currently against 2.6.27-rc8, FYI.)
> > > 
> > > Thoughts?
> > > 
> > > Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> > 
> > i think this is a very good idea in general - often the question comes 
> > up whether a hang seen in the RCU code is indeed caused by RCU or other 
> > factors. Could you perhaps rebase it against tip/core/rcu ? [or 
> > tip/master for convenience]
> 
> Will do!
> 
> 							Thanx, Paul
> 
> ------------------------------------------------------------------------
> Console output: search for "RCU detected CPU 5 stall" near the end.
> ------------------------------------------------------------------------
> 
> Elapsed time since release of system processors: 7742 mins 58 secs
> 
> Config file read, 2048 bytes
> Welcome
> Welcome to yaboot version 1.3.13-cas (Red Hat 1.3.13.cas-5)
> Enter "help" to get some basic usage information
> boot: autobench
> Please wait, loading kernel...
>    Elf64 kernel loaded...
> Loading ramdisk...
> ramdisk loaded at 02b00000, size: 2331 Kbytes
> OF stdout device is: /vdevice/vty@30000000
> Hypertas detected, assuming LPAR !
>     -\|/-\|/-\|/-\|
> Elapsed time since release of system processors: 7743 mins 29 secs
> 
> Config file read, 2048 bytes
> Welcome
> Welcome to yaboot version 1.3.13-cas (Red Hat 1.3.13.cas-5)
> Enter "help" to get some basic usage information
> boot: autobench 
> Please wait, loading kernel...
>    Elf64 kernel loaded...
> Loading ramdisk...
> ramdisk loaded at 02b00000, size: 2331 Kbytes
> OF stdout device is: /vdevice/vty@30000000
> Hypertas detected, assuming LPAR !
> command line: ro console=hvc0 autobench_args: root=/dev/sda6
> ABAT:1222922465 
> memory layout at init:
>   alloc_bottom : 0000000002d47000
>   alloc_top    : 0000000008000000
>   alloc_top_hi : 0000000008000000
>   rmo_top      : 0000000008000000
>   ram_top      : 0000000008000000
> Looking for displays
> instantiating rtas at 0x000000000757a000 ... done
> boot cpu hw idx 0000000000000000
> starting cpu hw idx 0000000000000002... done
> starting cpu hw idx 0000000000000004... done
> starting cpu hw idx 0000000000000006... done
> copying OF device tree ...
> Building dt strings...
> Building dt structure...
> Device tree strings 0x0000000002f48000 -> 0x0000000002f49537
> Device tree struct  0x0000000002f4a000 -> 0x0000000002f58000
> Calling quiesce ...
> returning from prom_init
> Using pSeries machine description
> Using 1TB segments
> Found initrd at 0xc000000002b00000:0xc000000002d46c00
> console [udbg0] enabled
> Partition configured for 8 cpus.
> CPU maps initialized for 2 threads per core
> Starting Linux PPC64 #1 SMP PREEMPT Thu Oct 2 00:39:54 EDT 2008
> -----------------------------------------------------
> ppc64_pft_size                = 0x19
> physicalMemorySize            = 0x80000000
> htab_hash_mask                = 0x3ffff
> -----------------------------------------------------
> Initializing cgroup subsys cpuset
> Linux version 2.6.27-rc7-autokern1 (root@tundro2.rchland.ibm.com) (gcc
> version 4.1.2 20071124 (Red Hat 4.1.2-42)) #1 SMP PREEMPT Thu Oct 2
> 00:39:54 EDT 2008
> [boot]0012 Setup Arch
> EEH: No capable adapters found
> PPC64 nvram contains 15360 bytes
> Zone PFN ranges:
>   DMA      0x00000000 -> 0x00080000
>   Normal   0x00080000 -> 0x00080000
> Movable zone start PFN for each node
> early_node_map[2] active PFN ranges
>     0: 0x00000000 -> 0x00044000
>     1: 0x00044000 -> 0x00080000
> [boot]0015 Setup Done
> Built 2 zonelists in Node order, mobility grouping on.  Total pages:
> 517120
> Policy zone: DMA
> Kernel command line: ro console=hvc0 autobench_args: root=/dev/sda6
> ABAT:1222922465 
> Experimental hierarchical RCU implementation.
> RCU-based detection of stalled CPUs is enabled.
> Experimental hierarchical RCU init done.
> [boot]0020 XICS Init
> [boot]0021 XICS Done
> PID hash table entries: 4096 (order: 12, 32768 bytes)
> clocksource: timebase mult[7d0000] shift[22] registered
> Console: colour dummy device 80x25
> console handover: boot [udbg0] -> real [hvc0]
> Dentry cache hash table entries: 262144 (order: 9, 2097152 bytes)
> Inode-cache hash table entries: 131072 (order: 8, 1048576 bytes)
> freeing bootmem node 0
> freeing bootmem node 1
> Memory: 2038156k/2097152k available (7200k kernel code, 58996k reserved,
> 1052k data, 513k bss, 292k init)
> SLUB: Genslabs=13, HWalign=128, Order=0-3, MinObjects=0, CPUs=8,
> Nodes=16
> Calibrating delay loop... 915.45 BogoMIPS (lpj=1830912)
> Mount-cache hash table entries: 256
> Initializing cgroup subsys ns
> Initializing cgroup subsys cpuacct
> Processor 1 found.
> Processor 2 found.
> Processor 3 found.
> Processor 4 found.
> Processor 5 found.
> Processor 6 found.
> Processor 7 found.
> Brought up 8 CPUs
> net_namespace: 1152 bytes
> NET: Registered protocol family 16
> IBM eBus Device Driver
> PCI: Probing PCI hardware
> SCSI subsystem initialized
> usbcore: registered new interface driver usbfs
> usbcore: registered new interface driver hub
> usbcore: registered new device driver usb
> NET: Registered protocol family 2
> IP route cache hash table entries: 65536 (order: 7, 524288 bytes)
> TCP established hash table entries: 262144 (order: 10, 4194304 bytes)
> TCP bind hash table entries: 65536 (order: 8, 1048576 bytes)
> TCP: Hash tables configured (established 262144 bind 65536)
> TCP reno registered
> NET: Registered protocol family 1
> checking if image is initramfs... it is
> Freeing initrd memory: 2331k freed
> IOMMU table initialized, virtual merging enabled
> audit: initializing netlink socket (disabled)
> type=2000 audit(1222908206.468:1): initialized
> HugeTLB registered 16 MB page size, pre-allocated 0 pages
> HugeTLB registered 16 GB page size, pre-allocated 0 pages
> HugeTLB registered 64 KB page size, pre-allocated 0 pages
> Installing knfsd (copyright (C) 1996 okir@monad.swb.de).
> msgmni has been set to 3985
> Block layer SCSI generic (bsg) driver version 0.4 loaded (major 254)
> io scheduler noop registered
> io scheduler anticipatory registered (default)
> io scheduler deadline registered
> io scheduler cfq registered
> Generic RTC Driver v1.07
> Serial: 8250/16550 driver4 ports, IRQ sharing disabled
> brd: module loaded
> loop: module loaded
> Intel(R) PRO/1000 Network Driver - version 7.3.20-k3-NAPI
> Copyright (c) 1999-2006 Intel Corporation.
> pcnet32.c:v1.35 21.Apr.2008 tsbogend@alpha.franken.de
> e100: Intel(R) PRO/100 Network Driver, 3.5.23-k4-NAPI
> e100: Copyright(c) 1999-2006 Intel Corporation
> console [netcon0] enabled
> netconsole: network logging started
> Uniform Multi-Platform E-IDE driver
> ipr: IBM Power RAID SCSI Device Driver version: 2.4.1 (April 24, 2007)
> ibmvscsi 30000003: SRP_VERSION: 16.a
> scsi0 : IBM POWER Virtual SCSI Adapter 1.5.8
> ibmvscsi 30000003: partner initialization complete
> ibmvscsi 30000003: sent SRP login
> ibmvscsi 30000003: SRP_LOGIN succeeded
> ibmvscsi 30000003: host srp version: 16.a, host partition tundro1 (1),
> OS 2, max io 262144
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi 0:0:1:0: Direct-Access     IBM      VDASD blkdev     0001 PQ: 0
> ANSI: 4
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> scsi scan: INQUIRY result too short (5), using 36
> st: Version 20080504, fixed bufsize 32768, s/g segs 256
> Driver 'st' needs updating - please use bus_type methods
> Driver 'sd' needs updating - please use bus_type methods
> sd 0:0:1:0: [sda] 73400922 512-byte hardware sectors (37581 MB)
> sd 0:0:1:0: [sda] Write Protect is off
> sd 0:0:1:0: [sda] Write cache: disabled, read cache: disabled, doesn't
> support DPO or FUA
> sd 0:0:1:0: [sda] 73400922 512-byte hardware sectors (37581 MB)
> sd 0:0:1:0: [sda] Write Protect is off
> sd 0:0:1:0: [sda] Write cache: disabled, read cache: disabled, doesn't
> support DPO or FUA
>  sda: sda1 sda2 sda3 sda4 < sda5 sda6 sda7 >
> sd 0:0:1:0: [sda] Attached SCSI disk
> Driver 'sr' needs updating - please use bus_type methods
> sd 0:0:1:0: Attached scsi generic sg0 type 0
> Initializing USB Mass Storage driver...
> usbcore: registered new interface driver usb-storage
> USB Mass Storage support registered.
> mice: PS/2 mouse device common for all mice
> md: linear personality registered for level -1
> md: raid0 personality registered for level 0
> md: raid1 personality registered for level 1
> device-mapper: ioctl: 4.14.0-ioctl (2008-04-23) initialised:
> dm-devel@redhat.com
> usbcore: registered new interface driver hiddev
> usbcore: registered new interface driver usbhid
> usbhid: v2.6:USB HID core driver
> IPv4 over IPv4 tunneling driver
> TCP cubic registered
> NET: Registered protocol family 17
> RPC: Registered udp transport module.
> RPC: Registered tcp transport module.
> registered taskstats version 1
> Freeing unused kernel memory: 292k freed
> Red Hat nash version 5.1.19.6 starting
> Mounting proc filesystem
> Mounting sysfs filesystem
> Creating /dev
> Creating initial device nodes
> Setting up hotplug.
> Creating block device nodes.
> Loading ehea.ko module
> IBM eHEA ethernet device driver (Release EHEA_0092)
> Scanning and configuring dmraid supported devices
> Creating root device.
> Mounting root filesystem.
> kjournald starting.  Commit interval 5 seconds
> EXT3-fs: mounted filesystem with ordered data mode.
> Setting up other filesystems.
> Setting up new root fs
> no fstab.sys, mounting internal defaults
> Switching to new root and running init.
> unmounting old /dev
> unmounting old /proc
> unmounting old /sys
> INIT: version 2.86 booting
>                 Welcome to Red Hat Enterprise Linux Server
>                 Press 'I' to enter interactive startup.
> RCU detected CPU 5 stall (t=1222922609/2)
> Call Trace:
> [c000000042d03920] [c00000000000fdec] .show_stack+0x70/0x184
> (unreliable)
> [c000000042d039d0] [c0000000000a1de0] .__rcu_pending+0x98/0x2ec
> [c000000042d03a70] [c0000000000a206c] .rcu_pending+0x38/0x88
> [c000000042d03af0] [c00000000006363c] .update_process_times+0x48/0x94
> [c000000042d03b80] [c00000000007bae4] .tick_sched_timer+0xbc/0x118
> [c000000042d03c20] [c000000000073a88] .__run_hrtimer+0x78/0x118
> [c000000042d03cc0] [c000000000074b3c] .hrtimer_interrupt+0x128/0x1e4
> [c000000042d03d90] [c00000000002304c] .timer_interrupt+0xe0/0x15c
> [c000000042d03e30] [c000000000003700] decrementer_common+0x100/0x180
> Setting clock  (localtime): Thu Oct  2 00:43:30 EDT 2008 [  OK  ]
> Starting udev: [  OK  ]
> Setting hostname tundro2.rchland.ibm.com:  [  OK  ]
> No devices found
> Setting up Logical Volume Management:   No volume groups found
> [  OK  ]
> Checking filesystems
> Checking all file systems.
> [/sbin/fsck.ext3 (1) -- /] fsck.ext3 -a /dev/sda6 
> /dev/sda6: Superblock last mount time is in the future.  FIXED.
> /dev/sda6: clean, 134758/1281696 files, 853722/2560336 blocks
> [  OK  ]
> Remounting root filesystem in read-write mode:  [  OK  ]
> Mounting local filesystems:  [  OK  ]
> Enabling local filesystem quotas:  quotaon: Warning: No quota format
> detected in the kernel.
> [  OK  ]
> Enabling /etc/fstab swaps:  swapon: /dev/sda5: Invalid argument
> [FAILED]
> INIT: Entering runlevel: 3
> Entering non-interactive startup
> Starting boot.loadmodules:  [  OK  ]
> Starting background readahead: [  OK  ]
> Bringing up loopback interface:  [  OK  ]
> Bringing up interface eth0:  [  OK  ]
> Starting auditd: [  OK  ]
> Starting system logger: [  OK  ]
> Starting kernel logger: [  OK  ]
> Starting irqbalance: [  OK  ]
> Starting RPC idmapd: FATAL: Module sunrpc not found.
> FATAL: Error running install command for sunrpc
> Error: RPC MTAB does not exist.
> Starting system message bus: [  OK  ]
> Starting Bluetooth services:[  OK  ][  OK  ]Can't open RFCOMM control
> socket: Address family not supported by protocol
> 
> Mounting other filesystems:  [  OK  ]
> Starting PC/SC smart card daemon (pcscd): [  OK  ]
> Starting hidd: Can't open HIDP control socket: Address family not
> supported by protocol
> [FAILED]
> Starting autofs:  Loading autofs4: [  OK  ]
> Starting automount: [  OK  ]
> [  OK  ]
> Starting iprinit:  Starting ipr initialization daemon[  OK  ]
> [  OK  ]
> Starting iprupdate:  Checking ipr microcode levels
> Completed ipr microcode updates[  OK  ]
> [  OK  ]
> Starting iprdump:  Starting ipr dump daemon[  OK  ]
> [  OK  ]
> Starting sshd: [  OK  ]
> Starting cups: [  OK  ]
> Starting xinetd: [  OK  ]
> Starting vpdupdate: [  OK  ]
> Starting console mouse services: [  OK  ]
> Starting crond: [  OK  ]
> Starting xfs: [  OK  ]
> Starting anacron: [  OK  ]
> Starting atd: [  OK  ]
> Starting yum-updatesd: [  OK  ]
> Starting Avahi daemon... [  OK  ]
> Starting HAL daemon: [  OK  ]
> Starting smartd: [  OK  ]
> 
> Red Hat Enterprise Linux Server release 5.2 (Tikanga)
> Kernel 2.6.27-rc7-autokern1 on an ppc64
> 
> tundro2.rchland.ibm.com login:-- 0:conmux-control -- time-stamp --
> Oct/01/08 21:45:47 --
> -- 0:conmux-control -- time-stamp -- Oct/01/08 22:01:03 --
> (bot:conmon-payload) disconnected

-- 
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH,RFC] RCU-based detection of stalled CPUs for Classic RCU
  2008-10-02 14:05   ` Paul E. McKenney
  2008-10-02 15:10     ` Mathieu Desnoyers
@ 2008-10-02 17:39     ` Paul E. McKenney
  2008-10-03 10:12       ` Lai Jiangshan
  1 sibling, 1 reply; 17+ messages in thread
From: Paul E. McKenney @ 2008-10-02 17:39 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: linux-kernel, akpm, mathieu.desnoyers, laijs, manfred, dipankar,
	niv, dvhltc, josht

Hello again!

This patch adds stalled-CPU detection to Classic RCU.  This capability 
is enabled by a new config variable CONFIG_RCU_CPU_STALL_DETECTOR, 
which defaults disabled.  This is a debugging feature, not something 
that non-kernel-hackers would be expected to care about.  This feature 
can detect looping CPUs in !PREEMPT builds and looping CPUs with 
preemption disabled in PREEMPT builds.  This is essentially a port of 
this functionality from the treercu patch.

This version uses jiffies rather than get_seconds(), which eliminates
the spurious boot-time CPU stall warnings seen on some systems with
the previous patch.

This is still against v2.6.27-rc8 -- I will do a version against tip
this evening (Pacific Time) when I get back to the combination of better
bandwidth and AC power.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---

 include/linux/rcuclassic.h |    9 ++++
 kernel/rcuclassic.c        |   88 +++++++++++++++++++++++++++++++++++++++++++++
 lib/Kconfig.debug          |   13 ++++++
 3 files changed, 110 insertions(+)

diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h
index 4ab8436..cab055b 100644
--- a/include/linux/rcuclassic.h
+++ b/include/linux/rcuclassic.h
@@ -40,6 +40,10 @@
 #include <linux/cpumask.h>
 #include <linux/seqlock.h>
 
+#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
+#define RCU_SECONDS_TILL_STALL_CHECK	3 * HZ	/* for rcp->jiffies_stall */
+#define RCU_SECONDS_TILL_STALL_RECHECK	30 * HZ	/* for rcp->jiffies_stall */
+#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
 
 /* Global control variables for rcupdate callback mechanism. */
 struct rcu_ctrlblk {
@@ -52,6 +56,11 @@ struct rcu_ctrlblk {
 	spinlock_t	lock	____cacheline_internodealigned_in_smp;
 	cpumask_t	cpumask; /* CPUs that need to switch in order    */
 				 /* for current batch to proceed.        */
+#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
+	unsigned long gp_start;	 /* Time at which GP started in jiffies. */
+	unsigned long jiffies_stall;
+				 /* Time at which to check for CPU stalls. */
+#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
 } ____cacheline_internodealigned_in_smp;
 
 /* Is batch a before batch b ? */
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
index aad93cd..a299876 100644
--- a/kernel/rcuclassic.c
+++ b/kernel/rcuclassic.c
@@ -118,6 +118,87 @@ static inline void force_quiescent_state(struct rcu_data *rdp,
 }
 #endif
 
+#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
+
+static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
+{
+	rcp->gp_start = jiffies;
+	rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_CHECK;
+}
+
+static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
+{
+	int cpu;
+	long delta;
+	unsigned long flags;
+
+	/* Only let one CPU complain about others per time interval. */
+
+	spin_lock_irqsave(&rcp->lock, flags);
+	delta = jiffies - rcp->jiffies_stall;
+	if (delta < 2 || rcp->cur != rcp->completed) {
+		spin_unlock_irqrestore(&rcp->lock, flags);
+		return;
+	}
+	rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
+	spin_unlock_irqrestore(&rcp->lock, flags);
+
+	/* OK, time to rat on our buddy... */
+
+	printk(KERN_ERR "RCU detected CPU stalls:");
+	for_each_possible_cpu(cpu) {
+		if (cpu_isset(cpu, rcp->cpumask))
+			printk(" %d", cpu);
+	}
+	printk(" (detected by %d, t=%ld jiffies)\n",
+	       smp_processor_id(), (long)(jiffies - rcp->gp_start));
+}
+
+static void print_cpu_stall(struct rcu_ctrlblk *rcp)
+{
+	unsigned long flags;
+
+	printk(KERN_ERR "RCU detected CPU %d stall (t=%lu/%lu jiffies)\n",
+			smp_processor_id(), jiffies,
+			jiffies - rcp->gp_start);
+	dump_stack();
+	spin_lock_irqsave(&rcp->lock, flags);
+	if ((long)(jiffies - rcp->jiffies_stall) >= 0)
+		rcp->jiffies_stall =
+			jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
+	spin_unlock_irqrestore(&rcp->lock, flags);
+	set_need_resched();  /* kick ourselves to get things going. */
+}
+
+static void check_cpu_stall(struct rcu_ctrlblk *rcp)
+{
+	long delta;
+
+	delta = jiffies - rcp->jiffies_stall;
+	if (cpu_isset(smp_processor_id(), rcp->cpumask) && delta >= 0) {
+		
+		/* We haven't checked in, so go dump stack. */
+		print_cpu_stall(rcp);
+
+	} else if (rcp->cur != rcp->completed && delta >= 2) {
+
+		/* They had two seconds to dump stack, so complain. */
+		print_other_cpu_stall(rcp);
+	}
+}
+
+#else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
+
+static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
+{
+}
+
+static void check_cpu_stall(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
+{
+}
+
+#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
+
 /**
  * call_rcu - Queue an RCU callback for invocation after a grace period.
  * @head: structure to be used for queueing the RCU updates.
@@ -285,6 +366,7 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp)
 		 */
 		smp_wmb();
 		rcp->cur++;
+		record_gp_stall_check_time(rcp);
 
 		/*
 		 * Accessing nohz_cpu_mask before incrementing rcp->cur needs a
@@ -468,6 +550,9 @@ static void rcu_process_callbacks(struct softirq_action *unused)
 
 static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
 {
+	/* Check for CPU stalls, if enabled. */
+	check_cpu_stall(rcp);
+
 	/* This cpu has pending rcu entries and the grace period
 	 * for them has completed.
 	 */
@@ -558,6 +643,9 @@ void rcu_check_callbacks(int cpu, int user)
 static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
 						struct rcu_data *rdp)
 {
+#ifdef CONFIG_DEBUG_RCU_STALL
+	printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n");
+#endif /* #ifdef CONFIG_DEBUG_RCU_STALL */
 	memset(rdp, 0, sizeof(*rdp));
 	rdp->curtail = &rdp->curlist;
 	rdp->nxttail = &rdp->nxtlist;
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 0b50481..9fee969 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -597,6 +597,19 @@ config RCU_TORTURE_TEST_RUNNABLE
 	  Say N here if you want the RCU torture tests to start only
 	  after being manually enabled via /proc.
 
+config RCU_CPU_STALL_DETECTOR
+	bool "Check for stalled CPUs delaying RCU grace periods"
+	depends on CLASSIC_RCU
+	default n
+	help
+	  This option causes RCU to printk information on which
+	  CPUs are delaying the current grace period, but only when
+	  the grace period extends for excessive time periods.
+
+	  Say Y if you want RCU to perform such checks.
+
+	  Say N if you are unsure.
+
 config KPROBES_SANITY_TEST
 	bool "Kprobes sanity tests"
 	depends on DEBUG_KERNEL

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH,RFC] RCU-based detection of stalled CPUs for Classic RCU
  2008-10-02 15:10     ` Mathieu Desnoyers
@ 2008-10-02 17:43       ` Paul E. McKenney
  2008-10-02 18:51         ` [PATCH,RFC] RCU-based detection of stalled CPUs for Classic RCU (LTTng support) Mathieu Desnoyers
  0 siblings, 1 reply; 17+ messages in thread
From: Paul E. McKenney @ 2008-10-02 17:43 UTC (permalink / raw)
  To: Mathieu Desnoyers
  Cc: Ingo Molnar, linux-kernel, akpm, laijs, manfred, dipankar, niv,
	dvhltc, josht

On Thu, Oct 02, 2008 at 11:10:46AM -0400, Mathieu Desnoyers wrote:
> * Paul E. McKenney (paulmck@linux.vnet.ibm.com) wrote:
> > On Thu, Oct 02, 2008 at 10:07:26AM +0200, Ingo Molnar wrote:
> > > 
> > > * Paul E. McKenney <paulmck@linux.vnet.ibm.com> wrote:
> > > 
> > > > Hello!
> > > > 
> > > > This patch adds stalled-CPU detection to Classic RCU.  This capability 
> > > > is enabled by a new config variable CONFIG_RCU_CPU_STALL_DETECTOR, 
> > > > which defaults disabled.  This is a debugging feature, not something 
> > > > that non-kernel-hackers would be expected to care about.  This feature 
> > > > can detect looping CPUs in !PREEMPT builds and looping CPUs with 
> > > > preemption disabled in PREEMPT builds.  This is essentially a port of 
> > > > this functionality from the treercu patch.
> > > > 
> > > > One current shortcoming: on some systems, stalls are detected during 
> > > > early boot, when we normally would not care about them.  My thought is 
> > > > to add a call from late initialization to suppress stall detection 
> > > > until the system is well along its way to being booted, but thought I 
> > > > should check to see if there might already be something for this 
> > > > purpose.
> > > 
> > > could you be a bit more specific, why do those warnings show up and why 
> > > dont we care about them? There are things like networking that 
> > > occasionally do an rcu_sync() and a stall could mean a bootup hang.
> > 
> > Hmmm...
> > 
> > Good point, I was just falling back on my old "we don't care about RCU
> > stalls in boot-time code" rule from long ago.  It is entirely possible
> > (in fact reasonably likely) that this rule no longer applies to Linux as
> > it exists today.  So please see below for the console output.  Not all
> > systems report this stall.  I have recently been running only on Power
> > systems, will fire off on some x86s.  My kneejerk reaction was that the
> > "stall" was really due to the clock-setting operation -- RCU stalls are
> > based on get_seconds().
> > 
> > So maybe I need to change the stall-detection code to use jiffies
> > instead.
> > 
> > Thoughts?
> > 
> 
> You could possibly use get monotonic time, which should hopefully be a
> bit better than its NTP-corrected alternatives.

OK.  I thought jiffies were monotonic (though not perfectly correlated
to the passage of time).  This is a warning timeout, so does not need
better than a few percent accuracy, as long as time never jumps too far.

But what API did you have in mind?

> Also, do you have all the tools you need to poinpoint the precise source
> of RCU stalls ? I haven't looked at RCU trace, but I should say adding a
> new tracepoint to instrument RCU callback execution at boot time and to
> trace it with LTTng is close to trivial. (actually, very early kernel
> boot is not so trivial as we would have to create a small kernel module
> to activate lttng tracing, but tracing the early phases of userspace
> bootup is trivial given we can use the userspace API).

I just dump stack, which historically worked pretty well.

Could you please tell me more about what you think should be added?

						Thanx, Paul

> Mathieu
> 
> > > > (Currently against 2.6.27-rc8, FYI.)
> > > > 
> > > > Thoughts?
> > > > 
> > > > Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> > > 
> > > i think this is a very good idea in general - often the question comes 
> > > up whether a hang seen in the RCU code is indeed caused by RCU or other 
> > > factors. Could you perhaps rebase it against tip/core/rcu ? [or 
> > > tip/master for convenience]
> > 
> > Will do!
> > 
> > 							Thanx, Paul
> > 
> > ------------------------------------------------------------------------
> > Console output: search for "RCU detected CPU 5 stall" near the end.
> > ------------------------------------------------------------------------
> > 
> > Elapsed time since release of system processors: 7742 mins 58 secs
> > 
> > Config file read, 2048 bytes
> > Welcome
> > Welcome to yaboot version 1.3.13-cas (Red Hat 1.3.13.cas-5)
> > Enter "help" to get some basic usage information
> > boot: autobench
> > Please wait, loading kernel...
> >    Elf64 kernel loaded...
> > Loading ramdisk...
> > ramdisk loaded at 02b00000, size: 2331 Kbytes
> > OF stdout device is: /vdevice/vty@30000000
> > Hypertas detected, assuming LPAR !
> >     -\|/-\|/-\|/-\|
> > Elapsed time since release of system processors: 7743 mins 29 secs
> > 
> > Config file read, 2048 bytes
> > Welcome
> > Welcome to yaboot version 1.3.13-cas (Red Hat 1.3.13.cas-5)
> > Enter "help" to get some basic usage information
> > boot: autobench 
> > Please wait, loading kernel...
> >    Elf64 kernel loaded...
> > Loading ramdisk...
> > ramdisk loaded at 02b00000, size: 2331 Kbytes
> > OF stdout device is: /vdevice/vty@30000000
> > Hypertas detected, assuming LPAR !
> > command line: ro console=hvc0 autobench_args: root=/dev/sda6
> > ABAT:1222922465 
> > memory layout at init:
> >   alloc_bottom : 0000000002d47000
> >   alloc_top    : 0000000008000000
> >   alloc_top_hi : 0000000008000000
> >   rmo_top      : 0000000008000000
> >   ram_top      : 0000000008000000
> > Looking for displays
> > instantiating rtas at 0x000000000757a000 ... done
> > boot cpu hw idx 0000000000000000
> > starting cpu hw idx 0000000000000002... done
> > starting cpu hw idx 0000000000000004... done
> > starting cpu hw idx 0000000000000006... done
> > copying OF device tree ...
> > Building dt strings...
> > Building dt structure...
> > Device tree strings 0x0000000002f48000 -> 0x0000000002f49537
> > Device tree struct  0x0000000002f4a000 -> 0x0000000002f58000
> > Calling quiesce ...
> > returning from prom_init
> > Using pSeries machine description
> > Using 1TB segments
> > Found initrd at 0xc000000002b00000:0xc000000002d46c00
> > console [udbg0] enabled
> > Partition configured for 8 cpus.
> > CPU maps initialized for 2 threads per core
> > Starting Linux PPC64 #1 SMP PREEMPT Thu Oct 2 00:39:54 EDT 2008
> > -----------------------------------------------------
> > ppc64_pft_size                = 0x19
> > physicalMemorySize            = 0x80000000
> > htab_hash_mask                = 0x3ffff
> > -----------------------------------------------------
> > Initializing cgroup subsys cpuset
> > Linux version 2.6.27-rc7-autokern1 (root@tundro2.rchland.ibm.com) (gcc
> > version 4.1.2 20071124 (Red Hat 4.1.2-42)) #1 SMP PREEMPT Thu Oct 2
> > 00:39:54 EDT 2008
> > [boot]0012 Setup Arch
> > EEH: No capable adapters found
> > PPC64 nvram contains 15360 bytes
> > Zone PFN ranges:
> >   DMA      0x00000000 -> 0x00080000
> >   Normal   0x00080000 -> 0x00080000
> > Movable zone start PFN for each node
> > early_node_map[2] active PFN ranges
> >     0: 0x00000000 -> 0x00044000
> >     1: 0x00044000 -> 0x00080000
> > [boot]0015 Setup Done
> > Built 2 zonelists in Node order, mobility grouping on.  Total pages:
> > 517120
> > Policy zone: DMA
> > Kernel command line: ro console=hvc0 autobench_args: root=/dev/sda6
> > ABAT:1222922465 
> > Experimental hierarchical RCU implementation.
> > RCU-based detection of stalled CPUs is enabled.
> > Experimental hierarchical RCU init done.
> > [boot]0020 XICS Init
> > [boot]0021 XICS Done
> > PID hash table entries: 4096 (order: 12, 32768 bytes)
> > clocksource: timebase mult[7d0000] shift[22] registered
> > Console: colour dummy device 80x25
> > console handover: boot [udbg0] -> real [hvc0]
> > Dentry cache hash table entries: 262144 (order: 9, 2097152 bytes)
> > Inode-cache hash table entries: 131072 (order: 8, 1048576 bytes)
> > freeing bootmem node 0
> > freeing bootmem node 1
> > Memory: 2038156k/2097152k available (7200k kernel code, 58996k reserved,
> > 1052k data, 513k bss, 292k init)
> > SLUB: Genslabs=13, HWalign=128, Order=0-3, MinObjects=0, CPUs=8,
> > Nodes=16
> > Calibrating delay loop... 915.45 BogoMIPS (lpj=1830912)
> > Mount-cache hash table entries: 256
> > Initializing cgroup subsys ns
> > Initializing cgroup subsys cpuacct
> > Processor 1 found.
> > Processor 2 found.
> > Processor 3 found.
> > Processor 4 found.
> > Processor 5 found.
> > Processor 6 found.
> > Processor 7 found.
> > Brought up 8 CPUs
> > net_namespace: 1152 bytes
> > NET: Registered protocol family 16
> > IBM eBus Device Driver
> > PCI: Probing PCI hardware
> > SCSI subsystem initialized
> > usbcore: registered new interface driver usbfs
> > usbcore: registered new interface driver hub
> > usbcore: registered new device driver usb
> > NET: Registered protocol family 2
> > IP route cache hash table entries: 65536 (order: 7, 524288 bytes)
> > TCP established hash table entries: 262144 (order: 10, 4194304 bytes)
> > TCP bind hash table entries: 65536 (order: 8, 1048576 bytes)
> > TCP: Hash tables configured (established 262144 bind 65536)
> > TCP reno registered
> > NET: Registered protocol family 1
> > checking if image is initramfs... it is
> > Freeing initrd memory: 2331k freed
> > IOMMU table initialized, virtual merging enabled
> > audit: initializing netlink socket (disabled)
> > type=2000 audit(1222908206.468:1): initialized
> > HugeTLB registered 16 MB page size, pre-allocated 0 pages
> > HugeTLB registered 16 GB page size, pre-allocated 0 pages
> > HugeTLB registered 64 KB page size, pre-allocated 0 pages
> > Installing knfsd (copyright (C) 1996 okir@monad.swb.de).
> > msgmni has been set to 3985
> > Block layer SCSI generic (bsg) driver version 0.4 loaded (major 254)
> > io scheduler noop registered
> > io scheduler anticipatory registered (default)
> > io scheduler deadline registered
> > io scheduler cfq registered
> > Generic RTC Driver v1.07
> > Serial: 8250/16550 driver4 ports, IRQ sharing disabled
> > brd: module loaded
> > loop: module loaded
> > Intel(R) PRO/1000 Network Driver - version 7.3.20-k3-NAPI
> > Copyright (c) 1999-2006 Intel Corporation.
> > pcnet32.c:v1.35 21.Apr.2008 tsbogend@alpha.franken.de
> > e100: Intel(R) PRO/100 Network Driver, 3.5.23-k4-NAPI
> > e100: Copyright(c) 1999-2006 Intel Corporation
> > console [netcon0] enabled
> > netconsole: network logging started
> > Uniform Multi-Platform E-IDE driver
> > ipr: IBM Power RAID SCSI Device Driver version: 2.4.1 (April 24, 2007)
> > ibmvscsi 30000003: SRP_VERSION: 16.a
> > scsi0 : IBM POWER Virtual SCSI Adapter 1.5.8
> > ibmvscsi 30000003: partner initialization complete
> > ibmvscsi 30000003: sent SRP login
> > ibmvscsi 30000003: SRP_LOGIN succeeded
> > ibmvscsi 30000003: host srp version: 16.a, host partition tundro1 (1),
> > OS 2, max io 262144
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi 0:0:1:0: Direct-Access     IBM      VDASD blkdev     0001 PQ: 0
> > ANSI: 4
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > scsi scan: INQUIRY result too short (5), using 36
> > st: Version 20080504, fixed bufsize 32768, s/g segs 256
> > Driver 'st' needs updating - please use bus_type methods
> > Driver 'sd' needs updating - please use bus_type methods
> > sd 0:0:1:0: [sda] 73400922 512-byte hardware sectors (37581 MB)
> > sd 0:0:1:0: [sda] Write Protect is off
> > sd 0:0:1:0: [sda] Write cache: disabled, read cache: disabled, doesn't
> > support DPO or FUA
> > sd 0:0:1:0: [sda] 73400922 512-byte hardware sectors (37581 MB)
> > sd 0:0:1:0: [sda] Write Protect is off
> > sd 0:0:1:0: [sda] Write cache: disabled, read cache: disabled, doesn't
> > support DPO or FUA
> >  sda: sda1 sda2 sda3 sda4 < sda5 sda6 sda7 >
> > sd 0:0:1:0: [sda] Attached SCSI disk
> > Driver 'sr' needs updating - please use bus_type methods
> > sd 0:0:1:0: Attached scsi generic sg0 type 0
> > Initializing USB Mass Storage driver...
> > usbcore: registered new interface driver usb-storage
> > USB Mass Storage support registered.
> > mice: PS/2 mouse device common for all mice
> > md: linear personality registered for level -1
> > md: raid0 personality registered for level 0
> > md: raid1 personality registered for level 1
> > device-mapper: ioctl: 4.14.0-ioctl (2008-04-23) initialised:
> > dm-devel@redhat.com
> > usbcore: registered new interface driver hiddev
> > usbcore: registered new interface driver usbhid
> > usbhid: v2.6:USB HID core driver
> > IPv4 over IPv4 tunneling driver
> > TCP cubic registered
> > NET: Registered protocol family 17
> > RPC: Registered udp transport module.
> > RPC: Registered tcp transport module.
> > registered taskstats version 1
> > Freeing unused kernel memory: 292k freed
> > Red Hat nash version 5.1.19.6 starting
> > Mounting proc filesystem
> > Mounting sysfs filesystem
> > Creating /dev
> > Creating initial device nodes
> > Setting up hotplug.
> > Creating block device nodes.
> > Loading ehea.ko module
> > IBM eHEA ethernet device driver (Release EHEA_0092)
> > Scanning and configuring dmraid supported devices
> > Creating root device.
> > Mounting root filesystem.
> > kjournald starting.  Commit interval 5 seconds
> > EXT3-fs: mounted filesystem with ordered data mode.
> > Setting up other filesystems.
> > Setting up new root fs
> > no fstab.sys, mounting internal defaults
> > Switching to new root and running init.
> > unmounting old /dev
> > unmounting old /proc
> > unmounting old /sys
> > INIT: version 2.86 booting
> >                 Welcome to Red Hat Enterprise Linux Server
> >                 Press 'I' to enter interactive startup.
> > RCU detected CPU 5 stall (t=1222922609/2)
> > Call Trace:
> > [c000000042d03920] [c00000000000fdec] .show_stack+0x70/0x184
> > (unreliable)
> > [c000000042d039d0] [c0000000000a1de0] .__rcu_pending+0x98/0x2ec
> > [c000000042d03a70] [c0000000000a206c] .rcu_pending+0x38/0x88
> > [c000000042d03af0] [c00000000006363c] .update_process_times+0x48/0x94
> > [c000000042d03b80] [c00000000007bae4] .tick_sched_timer+0xbc/0x118
> > [c000000042d03c20] [c000000000073a88] .__run_hrtimer+0x78/0x118
> > [c000000042d03cc0] [c000000000074b3c] .hrtimer_interrupt+0x128/0x1e4
> > [c000000042d03d90] [c00000000002304c] .timer_interrupt+0xe0/0x15c
> > [c000000042d03e30] [c000000000003700] decrementer_common+0x100/0x180
> > Setting clock  (localtime): Thu Oct  2 00:43:30 EDT 2008 [  OK  ]
> > Starting udev: [  OK  ]
> > Setting hostname tundro2.rchland.ibm.com:  [  OK  ]
> > No devices found
> > Setting up Logical Volume Management:   No volume groups found
> > [  OK  ]
> > Checking filesystems
> > Checking all file systems.
> > [/sbin/fsck.ext3 (1) -- /] fsck.ext3 -a /dev/sda6 
> > /dev/sda6: Superblock last mount time is in the future.  FIXED.
> > /dev/sda6: clean, 134758/1281696 files, 853722/2560336 blocks
> > [  OK  ]
> > Remounting root filesystem in read-write mode:  [  OK  ]
> > Mounting local filesystems:  [  OK  ]
> > Enabling local filesystem quotas:  quotaon: Warning: No quota format
> > detected in the kernel.
> > [  OK  ]
> > Enabling /etc/fstab swaps:  swapon: /dev/sda5: Invalid argument
> > [FAILED]
> > INIT: Entering runlevel: 3
> > Entering non-interactive startup
> > Starting boot.loadmodules:  [  OK  ]
> > Starting background readahead: [  OK  ]
> > Bringing up loopback interface:  [  OK  ]
> > Bringing up interface eth0:  [  OK  ]
> > Starting auditd: [  OK  ]
> > Starting system logger: [  OK  ]
> > Starting kernel logger: [  OK  ]
> > Starting irqbalance: [  OK  ]
> > Starting RPC idmapd: FATAL: Module sunrpc not found.
> > FATAL: Error running install command for sunrpc
> > Error: RPC MTAB does not exist.
> > Starting system message bus: [  OK  ]
> > Starting Bluetooth services:[  OK  ][  OK  ]Can't open RFCOMM control
> > socket: Address family not supported by protocol
> > 
> > Mounting other filesystems:  [  OK  ]
> > Starting PC/SC smart card daemon (pcscd): [  OK  ]
> > Starting hidd: Can't open HIDP control socket: Address family not
> > supported by protocol
> > [FAILED]
> > Starting autofs:  Loading autofs4: [  OK  ]
> > Starting automount: [  OK  ]
> > [  OK  ]
> > Starting iprinit:  Starting ipr initialization daemon[  OK  ]
> > [  OK  ]
> > Starting iprupdate:  Checking ipr microcode levels
> > Completed ipr microcode updates[  OK  ]
> > [  OK  ]
> > Starting iprdump:  Starting ipr dump daemon[  OK  ]
> > [  OK  ]
> > Starting sshd: [  OK  ]
> > Starting cups: [  OK  ]
> > Starting xinetd: [  OK  ]
> > Starting vpdupdate: [  OK  ]
> > Starting console mouse services: [  OK  ]
> > Starting crond: [  OK  ]
> > Starting xfs: [  OK  ]
> > Starting anacron: [  OK  ]
> > Starting atd: [  OK  ]
> > Starting yum-updatesd: [  OK  ]
> > Starting Avahi daemon... [  OK  ]
> > Starting HAL daemon: [  OK  ]
> > Starting smartd: [  OK  ]
> > 
> > Red Hat Enterprise Linux Server release 5.2 (Tikanga)
> > Kernel 2.6.27-rc7-autokern1 on an ppc64
> > 
> > tundro2.rchland.ibm.com login:-- 0:conmux-control -- time-stamp --
> > Oct/01/08 21:45:47 --
> > -- 0:conmux-control -- time-stamp -- Oct/01/08 22:01:03 --
> > (bot:conmon-payload) disconnected
> 
> -- 
> Mathieu Desnoyers
> OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH,RFC] RCU-based detection of stalled CPUs for Classic RCU (LTTng support)
  2008-10-02 17:43       ` Paul E. McKenney
@ 2008-10-02 18:51         ` Mathieu Desnoyers
  2008-10-02 22:55           ` Paul E. McKenney
  0 siblings, 1 reply; 17+ messages in thread
From: Mathieu Desnoyers @ 2008-10-02 18:51 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: Ingo Molnar, linux-kernel, akpm, laijs, manfred, dipankar, niv,
	dvhltc, josht, ltt-dev

* Paul E. McKenney (paulmck@linux.vnet.ibm.com) wrote:
> On Thu, Oct 02, 2008 at 11:10:46AM -0400, Mathieu Desnoyers wrote:
> > * Paul E. McKenney (paulmck@linux.vnet.ibm.com) wrote:
> > > On Thu, Oct 02, 2008 at 10:07:26AM +0200, Ingo Molnar wrote:
> > > > 
> > > > * Paul E. McKenney <paulmck@linux.vnet.ibm.com> wrote:
> > > > 
> > > > > Hello!
> > > > > 
> > > > > This patch adds stalled-CPU detection to Classic RCU.  This capability 
> > > > > is enabled by a new config variable CONFIG_RCU_CPU_STALL_DETECTOR, 
> > > > > which defaults disabled.  This is a debugging feature, not something 
> > > > > that non-kernel-hackers would be expected to care about.  This feature 
> > > > > can detect looping CPUs in !PREEMPT builds and looping CPUs with 
> > > > > preemption disabled in PREEMPT builds.  This is essentially a port of 
> > > > > this functionality from the treercu patch.
> > > > > 
> > > > > One current shortcoming: on some systems, stalls are detected during 
> > > > > early boot, when we normally would not care about them.  My thought is 
> > > > > to add a call from late initialization to suppress stall detection 
> > > > > until the system is well along its way to being booted, but thought I 
> > > > > should check to see if there might already be something for this 
> > > > > purpose.
> > > > 
> > > > could you be a bit more specific, why do those warnings show up and why 
> > > > dont we care about them? There are things like networking that 
> > > > occasionally do an rcu_sync() and a stall could mean a bootup hang.
> > > 
> > > Hmmm...
> > > 
> > > Good point, I was just falling back on my old "we don't care about RCU
> > > stalls in boot-time code" rule from long ago.  It is entirely possible
> > > (in fact reasonably likely) that this rule no longer applies to Linux as
> > > it exists today.  So please see below for the console output.  Not all
> > > systems report this stall.  I have recently been running only on Power
> > > systems, will fire off on some x86s.  My kneejerk reaction was that the
> > > "stall" was really due to the clock-setting operation -- RCU stalls are
> > > based on get_seconds().
> > > 
> > > So maybe I need to change the stall-detection code to use jiffies
> > > instead.
> > > 
> > > Thoughts?
> > > 
> > 
> > You could possibly use get monotonic time, which should hopefully be a
> > bit better than its NTP-corrected alternatives.
> 
> OK.  I thought jiffies were monotonic (though not perfectly correlated
> to the passage of time).  This is a warning timeout, so does not need
> better than a few percent accuracy, as long as time never jumps too far.
> 

Well, jiffies should also be monotonic, but I don't know their exact
status wrt dyntick systems.

> But what API did you have in mind?
> 

Something like do_posix_clock_monotonic_gettime() in linux/time.h. I
think it should be ok after early boot.

> > Also, do you have all the tools you need to poinpoint the precise source
> > of RCU stalls ? I haven't looked at RCU trace, but I should say adding a
> > new tracepoint to instrument RCU callback execution at boot time and to
> > trace it with LTTng is close to trivial. (actually, very early kernel
> > boot is not so trivial as we would have to create a small kernel module
> > to activate lttng tracing, but tracing the early phases of userspace
> > bootup is trivial given we can use the userspace API).
> 
> I just dump stack, which historically worked pretty well.
> 
> Could you please tell me more about what you think should be added?
> 

I just created two patches which instrument RCU callbacks and call_rcu
in classic and preempt RCU to the -lttng tree to show how simple it is
once the LTTng infrastructure is in place.

Basically, all you need to do is to pull this tree (branch
2.6.27-rc8-lttng-0.31) :

git://git.kernel.org/pub/scm/linux/kernel/git/compudj/linux-2.6-lttng.git

And follow :
http://ltt.polymtl.ca/svn/trunk/lttv/QUICKSTART

Which will basically tell you to enable markers/tracepoints and leave
most ltt-related options to default (all in general setup menu).

wget http://ltt.polymtl.ca/lttng/ltt-control-0.53-02102008.tar.gz
extract, ./configure, make, make install

wget http://ltt.polymtl.ca/packages/lttv-0.10.0-pre15-12082008.tar.gz
extract, ./configure, make, make install
(see QUICKSTART for package dependencies. Will be automatically checked
by configure)

You may have to add a mount point for debugfs and load some kernel
modules (as explained in the QUICKSTART). Don't forget rcu-trace.ko.

To take a trace, once you are booted in the kernel :

ltt-armall
(check that the rcu_* markers are enabled by looking at the "state"
field of cat /proc/ltt | grep rcu)
lttctl -n trace -d -l /mnt/debugfs/ltt -t /tmp/trace
[Reproduce behavior to trace]
lttctl -n trace -R
# Produce a text dump
lttv -m textDump -t /tmp/trace

# Produce a text dump filtering only rcu events :
lttv -m textDump -t /tmp/trace \
  -e "event.name=rcu_classic_callback|event.name=rcu_classic_call_rcu|\
event.name=rcu_classic_call_rcu_bh|event.name=rcu_preempt_callback|\
event.name=rcu_preempt_call_rcu_sched|event.name=rcu_preempt_call_rcu"

Then some scripting could be required to turn the function pointers into
symbols. Keep you kernel vmlinux around to perform symbol resolution.
A quick trick is to use objdump -S --start-address=0x.... vmlinux|less
but I am certain there are much much better ways to do it. ;)

There is also the gui you can use to dig into the trace :
lttv-gui -t /tmp/trace

That should help you pinpoint which are the functions that took so much
time and which is their associated call_rcu caller IP.

If you run a trace with preempt rcu, the textdump should give you an
output like this :

[...]
rcu_preempt_call_rcu: 130.773191675 (/tmp/trace/cpu_5), 3977, 3977, lttctl, , 3961, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AE105 }
rcu_preempt_call_rcu: 130.773194003 (/tmp/trace/cpu_5), 3977, 3977, lttctl, , 3961, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AE105 }
rcu_preempt_call_rcu: 130.773195929 (/tmp/trace/cpu_5), 3977, 3977, lttctl, , 3961, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AE105 }
rcu_preempt_call_rcu: 130.773197399 (/tmp/trace/cpu_5), 3977, 3977, lttctl, , 3961, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AE105 }
rcu_preempt_call_rcu: 130.773200429 (/tmp/trace/cpu_5), 3977, 3977, lttctl, , 3961, 0x0, SYSCALL { func = 0xffffffff802c6670, ip = 0xFFFFFFFF8023A45B }
rcu_preempt_call_rcu: 130.773213907 (/tmp/trace/cpu_5), 3961, 3961, bash, , 3960, 0x0, SYSCALL { func = 0xffffffff8024a980, ip = 0xFFFFFFFF8023937D }
rcu_preempt_call_rcu: 130.773215527 (/tmp/trace/cpu_5), 3961, 3961, bash, , 3960, 0x0, SYSCALL { func = 0xffffffff80238550, ip = 0xFFFFFFFF80239985 }
rcu_preempt_callback: 130.789025735 (/tmp/trace/cpu_2), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
rcu_preempt_callback: 130.789026731 (/tmp/trace/cpu_2), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
rcu_preempt_callback: 130.789050610 (/tmp/trace/cpu_5), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
rcu_preempt_callback: 130.789051096 (/tmp/trace/cpu_5), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802c6670 }
rcu_preempt_callback: 130.789052083 (/tmp/trace/cpu_5), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff8024a980 }
rcu_preempt_callback: 130.789052713 (/tmp/trace/cpu_5), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff80238550 }
rcu_preempt_callback: 130.805051097 (/tmp/trace/cpu_5), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
rcu_preempt_callback: 130.805052318 (/tmp/trace/cpu_5), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
rcu_preempt_callback: 130.805052822 (/tmp/trace/cpu_5), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
rcu_preempt_callback: 130.805053434 (/tmp/trace/cpu_5), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
rcu_preempt_callback: 130.805054226 (/tmp/trace/cpu_5), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802c6670 }
rcu_preempt_callback: 130.805055339 (/tmp/trace/cpu_5), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff8024a980 }
rcu_preempt_callback: 130.805056014 (/tmp/trace/cpu_5), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff80238550 }
rcu_preempt_call_rcu: 131.442990122 (/tmp/trace/cpu_7), 3685, 3685, hald-addon-stor, , 3634, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AA8D9 }
rcu_preempt_call_rcu: 131.443615323 (/tmp/trace/cpu_6), 3687, 3687, hald-addon-stor, , 3634, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AA8D9 }
rcu_preempt_call_rcu: 131.446861604 (/tmp/trace/cpu_7), 3683, 3683, hald-addon-stor, , 3634, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AA8D9 }
rcu_preempt_call_rcu: 131.447487425 (/tmp/trace/cpu_5), 3681, 3681, hald-addon-stor, , 3634, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AA8D9 }
rcu_preempt_callback: 131.473052317 (/tmp/trace/cpu_5), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
rcu_preempt_callback: 131.473078626 (/tmp/trace/cpu_6), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
rcu_preempt_callback: 131.477001607 (/tmp/trace/cpu_7), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
rcu_preempt_callback: 131.477002435 (/tmp/trace/cpu_7), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
rcu_preempt_call_rcu: 133.443017217 (/tmp/trace/cpu_5), 3681, 3681, hald-addon-stor, , 3634, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AA8D9 }
rcu_preempt_call_rcu: 133.443636900 (/tmp/trace/cpu_6), 3687, 3687, hald-addon-stor, , 3634, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AA8D9 }
rcu_preempt_call_rcu: 133.446887423 (/tmp/trace/cpu_7), 3683, 3683, hald-addon-stor, , 3634, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AA8D9 }
rcu_preempt_call_rcu: 133.447509515 (/tmp/trace/cpu_7), 3685, 3685, hald-addon-stor, , 3634, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AA8D9 }
rcu_preempt_callback: 133.473044696 (/tmp/trace/cpu_5), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
rcu_preempt_callback: 133.473070649 (/tmp/trace/cpu_6), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
rcu_preempt_callback: 133.476994292 (/tmp/trace/cpu_7), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
rcu_preempt_callback: 133.476994961 (/tmp/trace/cpu_7), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
rcu_preempt_call_rcu: 134.085511140 (/tmp/trace/cpu_5), 3961, 3961, bash, , 3960, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AE105 }
rcu_preempt_call_rcu: 134.085545237 (/tmp/trace/cpu_5), 3981, 3981, bash, , 3961, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AE105 }
rcu_preempt_call_rcu: 134.086124869 (/tmp/trace/cpu_2), 3981, 3981, ./lttctl, , 3961, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802B8E2D }
rcu_preempt_call_rcu: 134.086135078 (/tmp/trace/cpu_2), 3981, 3981, ./lttctl, , 3961, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802B8E2D }
rcu_preempt_call_rcu: 134.086143519 (/tmp/trace/cpu_2), 3981, 3981, ./lttctl, , 3961, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802B8E2D }
rcu_preempt_call_rcu: 134.086206323 (/tmp/trace/cpu_2), 3981, 3981, ./lttctl, , 3961, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802B8E2D }
rcu_preempt_call_rcu: 134.086526166 (/tmp/trace/cpu_2), 3981, 3981, ./lttctl, , 3961, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AE105 }
[...]

Basically, once you have the LTTng infrastructure in place, all that
needs to be added to RCU to do that is this (which is already in the
-lttng tree), plus a small LTTng "probe" module. If you want, you could
add a tracepoint in your stalled CPU detector and send the data through
LTTng so you have a starting point to dig from in large traces. From
that point, finding the root cause of long RCU-based CPU stalls becomes
trivial.


RCU : tracepoint instrumentation

Instrument RCU classic and RCU preempt callback execution and call_rcu.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
CC: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
---
 include/trace/rcu.h |   31 +++++++++++++++++++++++++++++++
 kernel/rcuclassic.c |    4 ++++
 kernel/rcupreempt.c |    4 ++++
 3 files changed, 39 insertions(+)

Index: linux-2.6-lttng/include/trace/rcu.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/include/trace/rcu.h	2008-10-02 14:18:05.000000000 -0400
@@ -0,0 +1,31 @@
+#ifndef _TRACE_RCU_H
+#define _TRACE_RCU_H
+
+#include <linux/tracepoint.h>
+#include <linux/rcupdate.h>
+
+DEFINE_TRACE(rcu_classic_callback,
+	TPPROTO(struct rcu_head *head),
+	TPARGS(head));
+
+DEFINE_TRACE(rcu_classic_call_rcu,
+	TPPROTO(struct rcu_head *head, unsigned long ip),
+	TPARGS(head, ip));
+
+DEFINE_TRACE(rcu_classic_call_rcu_bh,
+	TPPROTO(struct rcu_head *head, unsigned long ip),
+	TPARGS(head, ip));
+
+DEFINE_TRACE(rcu_preempt_callback,
+	TPPROTO(struct rcu_head *head),
+	TPARGS(head));
+
+DEFINE_TRACE(rcu_preempt_call_rcu,
+	TPPROTO(struct rcu_head *head, unsigned long ip),
+	TPARGS(head, ip));
+
+DEFINE_TRACE(rcu_preempt_call_rcu_sched,
+	TPPROTO(struct rcu_head *head, unsigned long ip),
+	TPARGS(head, ip));
+
+#endif
Index: linux-2.6-lttng/kernel/rcuclassic.c
===================================================================
--- linux-2.6-lttng.orig/kernel/rcuclassic.c	2008-10-02 13:44:35.000000000 -0400
+++ linux-2.6-lttng/kernel/rcuclassic.c	2008-10-02 14:16:45.000000000 -0400
@@ -47,6 +47,7 @@
 #include <linux/notifier.h>
 #include <linux/cpu.h>
 #include <linux/mutex.h>
+#include <trace/rcu.h>
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 static struct lock_class_key rcu_lock_key;
@@ -138,6 +139,7 @@ void call_rcu(struct rcu_head *head,
 	head->func = func;
 	head->next = NULL;
 	local_irq_save(flags);
+	trace_rcu_classic_call_rcu(head, _RET_IP_);
 	rdp = &__get_cpu_var(rcu_data);
 	*rdp->nxttail = head;
 	rdp->nxttail = &head->next;
@@ -174,6 +176,7 @@ void call_rcu_bh(struct rcu_head *head,
 	head->func = func;
 	head->next = NULL;
 	local_irq_save(flags);
+	trace_rcu_classic_call_rcu_bh(head, _RET_IP_);
 	rdp = &__get_cpu_var(rcu_bh_data);
 	*rdp->nxttail = head;
 	rdp->nxttail = &head->next;
@@ -232,6 +235,7 @@ static void rcu_do_batch(struct rcu_data
 	while (list) {
 		next = list->next;
 		prefetch(next);
+		trace_rcu_classic_callback(list);
 		list->func(list);
 		list = next;
 		if (++count >= rdp->blimit)
Index: linux-2.6-lttng/kernel/rcupreempt.c
===================================================================
--- linux-2.6-lttng.orig/kernel/rcupreempt.c	2008-10-02 13:53:41.000000000 -0400
+++ linux-2.6-lttng/kernel/rcupreempt.c	2008-10-02 14:15:47.000000000 -0400
@@ -57,6 +57,7 @@
 #include <linux/byteorder/swabb.h>
 #include <linux/cpumask.h>
 #include <linux/rcupreempt_trace.h>
+#include <trace/rcu.h>
 
 /*
  * Macro that prevents the compiler from reordering accesses, but does
@@ -1108,6 +1109,7 @@ static void rcu_process_callbacks(struct
 	spin_unlock_irqrestore(&rdp->lock, flags);
 	while (list) {
 		next = list->next;
+		trace_rcu_preempt_callback(list);
 		list->func(list);
 		list = next;
 		RCU_TRACE_ME(rcupreempt_trace_invoke);
@@ -1122,6 +1124,7 @@ void call_rcu(struct rcu_head *head, voi
 	head->func = func;
 	head->next = NULL;
 	local_irq_save(flags);
+	trace_rcu_preempt_call_rcu(head, _RET_IP_);
 	rdp = RCU_DATA_ME();
 	spin_lock(&rdp->lock);
 	__rcu_advance_callbacks(rdp);
@@ -1141,6 +1144,7 @@ void call_rcu_sched(struct rcu_head *hea
 	head->func = func;
 	head->next = NULL;
 	local_irq_save(flags);
+	trace_rcu_preempt_call_rcu_sched(head, _RET_IP_);
 	rdp = RCU_DATA_ME();
 	spin_lock(&rdp->lock);
 	*rdp->nextschedtail = head;

The LTTng probe module looks like this :


LTTng trace RCU probe

RCU callback/call_rcu LTTng probe.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
CC: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
---
 ltt/probes/Makefile    |    2 -
 ltt/probes/rcu-trace.c |   54 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 55 insertions(+), 1 deletion(-)

Index: linux-2.6-lttng/ltt/probes/rcu-trace.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/ltt/probes/rcu-trace.c	2008-10-02 14:23:17.000000000 -0400
@@ -0,0 +1,54 @@
+/*
+ * ltt/probes/rcu-trace.c
+ *
+ * RCU tracepoint probes.
+ */
+
+#include <linux/module.h>
+#include <trace/rcu.h>
+
+#ifdef CONFIG_CLASSIC_RCU
+void probe_rcu_classic_callback(struct rcu_head *head)
+{
+	trace_mark_tp(rcu_classic_callback, rcu_classic_callback,
+		probe_rcu_classic_callback, "func %p", head->func);
+}
+
+void probe_rcu_classic_call_rcu(struct rcu_head *head, unsigned long ip)
+{
+	trace_mark_tp(rcu_classic_call_rcu, rcu_classic_call_rcu,
+		probe_rcu_classic_call_rcu, "func %p ip 0x%lX", head->func, ip);
+}
+
+void probe_rcu_classic_call_rcu_bh(struct rcu_head *head, unsigned long ip)
+{
+	trace_mark_tp(rcu_classic_call_rcu_bh, rcu_classic_call_rcu_bh,
+		probe_rcu_classic_call_rcu_bh, "func %p ip 0x%lX",
+		head->func, ip);
+}
+#endif
+
+#ifdef CONFIG_PREEMPT_RCU
+void probe_rcu_preempt_callback(struct rcu_head *head)
+{
+	trace_mark_tp(rcu_preempt_callback, rcu_preempt_callback,
+		probe_rcu_preempt_callback, "func %p", head->func);
+}
+
+void probe_rcu_preempt_call_rcu(struct rcu_head *head, unsigned long ip)
+{
+	trace_mark_tp(rcu_preempt_call_rcu, rcu_preempt_call_rcu,
+		probe_rcu_preempt_call_rcu, "func %p ip 0x%lX", head->func, ip);
+}
+
+void probe_rcu_preempt_call_rcu_sched(struct rcu_head *head, unsigned long ip)
+{
+	trace_mark_tp(rcu_preempt_call_rcu_sched, rcu_preempt_call_rcu_sched,
+		probe_rcu_preempt_call_rcu_sched, "func %p ip 0x%lX",
+		head->func, ip);
+}
+#endif
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mathieu Desnoyers");
+MODULE_DESCRIPTION("RCU Tracepoint Probes");
Index: linux-2.6-lttng/ltt/probes/Makefile
===================================================================
--- linux-2.6-lttng.orig/ltt/probes/Makefile	2008-10-02 14:11:20.000000000 -0400
+++ linux-2.6-lttng/ltt/probes/Makefile	2008-10-02 14:18:16.000000000 -0400
@@ -9,7 +9,7 @@ CFLAGS_REMOVE_lockdep-trace.o = -pg
 endif
 
 obj-$(CONFIG_LTT_TRACEPROBES)	+= kernel-trace.o mm-trace.o fs-trace.o \
-				ipc-trace.o lockdep-trace.o
+				ipc-trace.o lockdep-trace.o rcu-trace.o
 ifeq ($(CONFIG_NET),y)
 CFLAGS_REMOVE_net-trace.o = -pg
 obj-$(CONFIG_TRACEPROBES)	+= net-trace.o


Mathieu

-- 
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH,RFC] RCU-based detection of stalled CPUs for Classic RCU (LTTng support)
  2008-10-02 18:51         ` [PATCH,RFC] RCU-based detection of stalled CPUs for Classic RCU (LTTng support) Mathieu Desnoyers
@ 2008-10-02 22:55           ` Paul E. McKenney
  2008-10-02 23:06             ` [PATCH tip/master] RCU-based detection of stalled CPUs for Classic RCU Paul E. McKenney
  0 siblings, 1 reply; 17+ messages in thread
From: Paul E. McKenney @ 2008-10-02 22:55 UTC (permalink / raw)
  To: Mathieu Desnoyers
  Cc: Ingo Molnar, linux-kernel, akpm, laijs, manfred, dipankar, niv,
	dvhltc, josht, ltt-dev

On Thu, Oct 02, 2008 at 02:51:15PM -0400, Mathieu Desnoyers wrote:
> * Paul E. McKenney (paulmck@linux.vnet.ibm.com) wrote:
> > On Thu, Oct 02, 2008 at 11:10:46AM -0400, Mathieu Desnoyers wrote:
> > > * Paul E. McKenney (paulmck@linux.vnet.ibm.com) wrote:
> > > > On Thu, Oct 02, 2008 at 10:07:26AM +0200, Ingo Molnar wrote:
> > > > > 
> > > > > * Paul E. McKenney <paulmck@linux.vnet.ibm.com> wrote:
> > > > > 
> > > > > > Hello!
> > > > > > 
> > > > > > This patch adds stalled-CPU detection to Classic RCU.  This capability 
> > > > > > is enabled by a new config variable CONFIG_RCU_CPU_STALL_DETECTOR, 
> > > > > > which defaults disabled.  This is a debugging feature, not something 
> > > > > > that non-kernel-hackers would be expected to care about.  This feature 
> > > > > > can detect looping CPUs in !PREEMPT builds and looping CPUs with 
> > > > > > preemption disabled in PREEMPT builds.  This is essentially a port of 
> > > > > > this functionality from the treercu patch.
> > > > > > 
> > > > > > One current shortcoming: on some systems, stalls are detected during 
> > > > > > early boot, when we normally would not care about them.  My thought is 
> > > > > > to add a call from late initialization to suppress stall detection 
> > > > > > until the system is well along its way to being booted, but thought I 
> > > > > > should check to see if there might already be something for this 
> > > > > > purpose.
> > > > > 
> > > > > could you be a bit more specific, why do those warnings show up and why 
> > > > > dont we care about them? There are things like networking that 
> > > > > occasionally do an rcu_sync() and a stall could mean a bootup hang.
> > > > 
> > > > Hmmm...
> > > > 
> > > > Good point, I was just falling back on my old "we don't care about RCU
> > > > stalls in boot-time code" rule from long ago.  It is entirely possible
> > > > (in fact reasonably likely) that this rule no longer applies to Linux as
> > > > it exists today.  So please see below for the console output.  Not all
> > > > systems report this stall.  I have recently been running only on Power
> > > > systems, will fire off on some x86s.  My kneejerk reaction was that the
> > > > "stall" was really due to the clock-setting operation -- RCU stalls are
> > > > based on get_seconds().
> > > > 
> > > > So maybe I need to change the stall-detection code to use jiffies
> > > > instead.
> > > > 
> > > > Thoughts?
> > > > 
> > > 
> > > You could possibly use get monotonic time, which should hopefully be a
> > > bit better than its NTP-corrected alternatives.
> > 
> > OK.  I thought jiffies were monotonic (though not perfectly correlated
> > to the passage of time).  This is a warning timeout, so does not need
> > better than a few percent accuracy, as long as time never jumps too far.
> > 
> 
> Well, jiffies should also be monotonic, but I don't know their exact
> status wrt dyntick systems.

There will be at least one non-dyntick CPU if there are RCU callbacks
pending.  If there are no RCU callbacks pending, then RCU won't be
doing anything, hence will not be detecting stalls.

> > But what API did you have in mind?
> > 
> 
> Something like do_posix_clock_monotonic_gettime() in linux/time.h. I
> think it should be ok after early boot.

I like jiffies for this non-super-exacting purpose.  ;-)

> > > Also, do you have all the tools you need to poinpoint the precise source
> > > of RCU stalls ? I haven't looked at RCU trace, but I should say adding a
> > > new tracepoint to instrument RCU callback execution at boot time and to
> > > trace it with LTTng is close to trivial. (actually, very early kernel
> > > boot is not so trivial as we would have to create a small kernel module
> > > to activate lttng tracing, but tracing the early phases of userspace
> > > bootup is trivial given we can use the userspace API).
> > 
> > I just dump stack, which historically worked pretty well.
> > 
> > Could you please tell me more about what you think should be added?
> > 
> 
> I just created two patches which instrument RCU callbacks and call_rcu
> in classic and preempt RCU to the -lttng tree to show how simple it is
> once the LTTng infrastructure is in place.

OK, I think I see the disconnection here...

When RCU detects a stall, that often indicates that some CPU is spinning
in the kernel (for !CONFIG_PREEMPT) or that some CPU is spinning in the
kernel with preemption disabled (for CONFIG_PREEMPT).  The stack trace
will normally show what is spinning.

Tracing the RCU activity in this case will not help -- it is not RCU that
is spinning, but rather that RCU is stalled waiting for some other part
of the kernel that is spinning.  (In a past life, RCU once complained
about a specific CPU that had managed to fail in a way that left the
system running!  Except that RCU grace periods never ended...)

FWIW, what I normally use for debugging RCU itself is counters rather
than log messages.  The reason I avoid log messages is that the race
conditions I often find myself chasing are reasonably narrow, so that
I would have a very large number of log messages to pile through.
Nevertheless, I will keep track of these patches in case I come across
some situation that is more friendly to log-based debugging -- and thank
you for putting them together!!!

							Thanx, Paul

> Basically, all you need to do is to pull this tree (branch
> 2.6.27-rc8-lttng-0.31) :
> 
> git://git.kernel.org/pub/scm/linux/kernel/git/compudj/linux-2.6-lttng.git
> 
> And follow :
> http://ltt.polymtl.ca/svn/trunk/lttv/QUICKSTART
> 
> Which will basically tell you to enable markers/tracepoints and leave
> most ltt-related options to default (all in general setup menu).
> 
> wget http://ltt.polymtl.ca/lttng/ltt-control-0.53-02102008.tar.gz
> extract, ./configure, make, make install
> 
> wget http://ltt.polymtl.ca/packages/lttv-0.10.0-pre15-12082008.tar.gz
> extract, ./configure, make, make install
> (see QUICKSTART for package dependencies. Will be automatically checked
> by configure)
> 
> You may have to add a mount point for debugfs and load some kernel
> modules (as explained in the QUICKSTART). Don't forget rcu-trace.ko.
> 
> To take a trace, once you are booted in the kernel :
> 
> ltt-armall
> (check that the rcu_* markers are enabled by looking at the "state"
> field of cat /proc/ltt | grep rcu)
> lttctl -n trace -d -l /mnt/debugfs/ltt -t /tmp/trace
> [Reproduce behavior to trace]
> lttctl -n trace -R
> # Produce a text dump
> lttv -m textDump -t /tmp/trace
> 
> # Produce a text dump filtering only rcu events :
> lttv -m textDump -t /tmp/trace \
>   -e "event.name=rcu_classic_callback|event.name=rcu_classic_call_rcu|\
> event.name=rcu_classic_call_rcu_bh|event.name=rcu_preempt_callback|\
> event.name=rcu_preempt_call_rcu_sched|event.name=rcu_preempt_call_rcu"
> 
> Then some scripting could be required to turn the function pointers into
> symbols. Keep you kernel vmlinux around to perform symbol resolution.
> A quick trick is to use objdump -S --start-address=0x.... vmlinux|less
> but I am certain there are much much better ways to do it. ;)
> 
> There is also the gui you can use to dig into the trace :
> lttv-gui -t /tmp/trace
> 
> That should help you pinpoint which are the functions that took so much
> time and which is their associated call_rcu caller IP.
> 
> If you run a trace with preempt rcu, the textdump should give you an
> output like this :
> 
> [...]
> rcu_preempt_call_rcu: 130.773191675 (/tmp/trace/cpu_5), 3977, 3977, lttctl, , 3961, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AE105 }
> rcu_preempt_call_rcu: 130.773194003 (/tmp/trace/cpu_5), 3977, 3977, lttctl, , 3961, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AE105 }
> rcu_preempt_call_rcu: 130.773195929 (/tmp/trace/cpu_5), 3977, 3977, lttctl, , 3961, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AE105 }
> rcu_preempt_call_rcu: 130.773197399 (/tmp/trace/cpu_5), 3977, 3977, lttctl, , 3961, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AE105 }
> rcu_preempt_call_rcu: 130.773200429 (/tmp/trace/cpu_5), 3977, 3977, lttctl, , 3961, 0x0, SYSCALL { func = 0xffffffff802c6670, ip = 0xFFFFFFFF8023A45B }
> rcu_preempt_call_rcu: 130.773213907 (/tmp/trace/cpu_5), 3961, 3961, bash, , 3960, 0x0, SYSCALL { func = 0xffffffff8024a980, ip = 0xFFFFFFFF8023937D }
> rcu_preempt_call_rcu: 130.773215527 (/tmp/trace/cpu_5), 3961, 3961, bash, , 3960, 0x0, SYSCALL { func = 0xffffffff80238550, ip = 0xFFFFFFFF80239985 }
> rcu_preempt_callback: 130.789025735 (/tmp/trace/cpu_2), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
> rcu_preempt_callback: 130.789026731 (/tmp/trace/cpu_2), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
> rcu_preempt_callback: 130.789050610 (/tmp/trace/cpu_5), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
> rcu_preempt_callback: 130.789051096 (/tmp/trace/cpu_5), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802c6670 }
> rcu_preempt_callback: 130.789052083 (/tmp/trace/cpu_5), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff8024a980 }
> rcu_preempt_callback: 130.789052713 (/tmp/trace/cpu_5), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff80238550 }
> rcu_preempt_callback: 130.805051097 (/tmp/trace/cpu_5), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
> rcu_preempt_callback: 130.805052318 (/tmp/trace/cpu_5), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
> rcu_preempt_callback: 130.805052822 (/tmp/trace/cpu_5), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
> rcu_preempt_callback: 130.805053434 (/tmp/trace/cpu_5), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
> rcu_preempt_callback: 130.805054226 (/tmp/trace/cpu_5), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802c6670 }
> rcu_preempt_callback: 130.805055339 (/tmp/trace/cpu_5), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff8024a980 }
> rcu_preempt_callback: 130.805056014 (/tmp/trace/cpu_5), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff80238550 }
> rcu_preempt_call_rcu: 131.442990122 (/tmp/trace/cpu_7), 3685, 3685, hald-addon-stor, , 3634, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AA8D9 }
> rcu_preempt_call_rcu: 131.443615323 (/tmp/trace/cpu_6), 3687, 3687, hald-addon-stor, , 3634, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AA8D9 }
> rcu_preempt_call_rcu: 131.446861604 (/tmp/trace/cpu_7), 3683, 3683, hald-addon-stor, , 3634, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AA8D9 }
> rcu_preempt_call_rcu: 131.447487425 (/tmp/trace/cpu_5), 3681, 3681, hald-addon-stor, , 3634, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AA8D9 }
> rcu_preempt_callback: 131.473052317 (/tmp/trace/cpu_5), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
> rcu_preempt_callback: 131.473078626 (/tmp/trace/cpu_6), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
> rcu_preempt_callback: 131.477001607 (/tmp/trace/cpu_7), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
> rcu_preempt_callback: 131.477002435 (/tmp/trace/cpu_7), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
> rcu_preempt_call_rcu: 133.443017217 (/tmp/trace/cpu_5), 3681, 3681, hald-addon-stor, , 3634, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AA8D9 }
> rcu_preempt_call_rcu: 133.443636900 (/tmp/trace/cpu_6), 3687, 3687, hald-addon-stor, , 3634, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AA8D9 }
> rcu_preempt_call_rcu: 133.446887423 (/tmp/trace/cpu_7), 3683, 3683, hald-addon-stor, , 3634, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AA8D9 }
> rcu_preempt_call_rcu: 133.447509515 (/tmp/trace/cpu_7), 3685, 3685, hald-addon-stor, , 3634, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AA8D9 }
> rcu_preempt_callback: 133.473044696 (/tmp/trace/cpu_5), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
> rcu_preempt_callback: 133.473070649 (/tmp/trace/cpu_6), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
> rcu_preempt_callback: 133.476994292 (/tmp/trace/cpu_7), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
> rcu_preempt_callback: 133.476994961 (/tmp/trace/cpu_7), 0, 0, swapper, , 0, 0x0, SOFTIRQ { func = 0xffffffff802addc0 }
> rcu_preempt_call_rcu: 134.085511140 (/tmp/trace/cpu_5), 3961, 3961, bash, , 3960, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AE105 }
> rcu_preempt_call_rcu: 134.085545237 (/tmp/trace/cpu_5), 3981, 3981, bash, , 3961, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AE105 }
> rcu_preempt_call_rcu: 134.086124869 (/tmp/trace/cpu_2), 3981, 3981, ./lttctl, , 3961, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802B8E2D }
> rcu_preempt_call_rcu: 134.086135078 (/tmp/trace/cpu_2), 3981, 3981, ./lttctl, , 3961, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802B8E2D }
> rcu_preempt_call_rcu: 134.086143519 (/tmp/trace/cpu_2), 3981, 3981, ./lttctl, , 3961, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802B8E2D }
> rcu_preempt_call_rcu: 134.086206323 (/tmp/trace/cpu_2), 3981, 3981, ./lttctl, , 3961, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802B8E2D }
> rcu_preempt_call_rcu: 134.086526166 (/tmp/trace/cpu_2), 3981, 3981, ./lttctl, , 3961, 0x0, SYSCALL { func = 0xffffffff802addc0, ip = 0xFFFFFFFF802AE105 }
> [...]
> 
> Basically, once you have the LTTng infrastructure in place, all that
> needs to be added to RCU to do that is this (which is already in the
> -lttng tree), plus a small LTTng "probe" module. If you want, you could
> add a tracepoint in your stalled CPU detector and send the data through
> LTTng so you have a starting point to dig from in large traces. From
> that point, finding the root cause of long RCU-based CPU stalls becomes
> trivial.
> 
> 
> RCU : tracepoint instrumentation
> 
> Instrument RCU classic and RCU preempt callback execution and call_rcu.
> 
> Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
> CC: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
> ---
>  include/trace/rcu.h |   31 +++++++++++++++++++++++++++++++
>  kernel/rcuclassic.c |    4 ++++
>  kernel/rcupreempt.c |    4 ++++
>  3 files changed, 39 insertions(+)
> 
> Index: linux-2.6-lttng/include/trace/rcu.h
> ===================================================================
> --- /dev/null	1970-01-01 00:00:00.000000000 +0000
> +++ linux-2.6-lttng/include/trace/rcu.h	2008-10-02 14:18:05.000000000 -0400
> @@ -0,0 +1,31 @@
> +#ifndef _TRACE_RCU_H
> +#define _TRACE_RCU_H
> +
> +#include <linux/tracepoint.h>
> +#include <linux/rcupdate.h>
> +
> +DEFINE_TRACE(rcu_classic_callback,
> +	TPPROTO(struct rcu_head *head),
> +	TPARGS(head));
> +
> +DEFINE_TRACE(rcu_classic_call_rcu,
> +	TPPROTO(struct rcu_head *head, unsigned long ip),
> +	TPARGS(head, ip));
> +
> +DEFINE_TRACE(rcu_classic_call_rcu_bh,
> +	TPPROTO(struct rcu_head *head, unsigned long ip),
> +	TPARGS(head, ip));
> +
> +DEFINE_TRACE(rcu_preempt_callback,
> +	TPPROTO(struct rcu_head *head),
> +	TPARGS(head));
> +
> +DEFINE_TRACE(rcu_preempt_call_rcu,
> +	TPPROTO(struct rcu_head *head, unsigned long ip),
> +	TPARGS(head, ip));
> +
> +DEFINE_TRACE(rcu_preempt_call_rcu_sched,
> +	TPPROTO(struct rcu_head *head, unsigned long ip),
> +	TPARGS(head, ip));
> +
> +#endif
> Index: linux-2.6-lttng/kernel/rcuclassic.c
> ===================================================================
> --- linux-2.6-lttng.orig/kernel/rcuclassic.c	2008-10-02 13:44:35.000000000 -0400
> +++ linux-2.6-lttng/kernel/rcuclassic.c	2008-10-02 14:16:45.000000000 -0400
> @@ -47,6 +47,7 @@
>  #include <linux/notifier.h>
>  #include <linux/cpu.h>
>  #include <linux/mutex.h>
> +#include <trace/rcu.h>
> 
>  #ifdef CONFIG_DEBUG_LOCK_ALLOC
>  static struct lock_class_key rcu_lock_key;
> @@ -138,6 +139,7 @@ void call_rcu(struct rcu_head *head,
>  	head->func = func;
>  	head->next = NULL;
>  	local_irq_save(flags);
> +	trace_rcu_classic_call_rcu(head, _RET_IP_);
>  	rdp = &__get_cpu_var(rcu_data);
>  	*rdp->nxttail = head;
>  	rdp->nxttail = &head->next;
> @@ -174,6 +176,7 @@ void call_rcu_bh(struct rcu_head *head,
>  	head->func = func;
>  	head->next = NULL;
>  	local_irq_save(flags);
> +	trace_rcu_classic_call_rcu_bh(head, _RET_IP_);
>  	rdp = &__get_cpu_var(rcu_bh_data);
>  	*rdp->nxttail = head;
>  	rdp->nxttail = &head->next;
> @@ -232,6 +235,7 @@ static void rcu_do_batch(struct rcu_data
>  	while (list) {
>  		next = list->next;
>  		prefetch(next);
> +		trace_rcu_classic_callback(list);
>  		list->func(list);
>  		list = next;
>  		if (++count >= rdp->blimit)
> Index: linux-2.6-lttng/kernel/rcupreempt.c
> ===================================================================
> --- linux-2.6-lttng.orig/kernel/rcupreempt.c	2008-10-02 13:53:41.000000000 -0400
> +++ linux-2.6-lttng/kernel/rcupreempt.c	2008-10-02 14:15:47.000000000 -0400
> @@ -57,6 +57,7 @@
>  #include <linux/byteorder/swabb.h>
>  #include <linux/cpumask.h>
>  #include <linux/rcupreempt_trace.h>
> +#include <trace/rcu.h>
> 
>  /*
>   * Macro that prevents the compiler from reordering accesses, but does
> @@ -1108,6 +1109,7 @@ static void rcu_process_callbacks(struct
>  	spin_unlock_irqrestore(&rdp->lock, flags);
>  	while (list) {
>  		next = list->next;
> +		trace_rcu_preempt_callback(list);
>  		list->func(list);
>  		list = next;
>  		RCU_TRACE_ME(rcupreempt_trace_invoke);
> @@ -1122,6 +1124,7 @@ void call_rcu(struct rcu_head *head, voi
>  	head->func = func;
>  	head->next = NULL;
>  	local_irq_save(flags);
> +	trace_rcu_preempt_call_rcu(head, _RET_IP_);
>  	rdp = RCU_DATA_ME();
>  	spin_lock(&rdp->lock);
>  	__rcu_advance_callbacks(rdp);
> @@ -1141,6 +1144,7 @@ void call_rcu_sched(struct rcu_head *hea
>  	head->func = func;
>  	head->next = NULL;
>  	local_irq_save(flags);
> +	trace_rcu_preempt_call_rcu_sched(head, _RET_IP_);
>  	rdp = RCU_DATA_ME();
>  	spin_lock(&rdp->lock);
>  	*rdp->nextschedtail = head;
> 
> The LTTng probe module looks like this :
> 
> 
> LTTng trace RCU probe
> 
> RCU callback/call_rcu LTTng probe.
> 
> Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
> CC: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
> ---
>  ltt/probes/Makefile    |    2 -
>  ltt/probes/rcu-trace.c |   54 +++++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 55 insertions(+), 1 deletion(-)
> 
> Index: linux-2.6-lttng/ltt/probes/rcu-trace.c
> ===================================================================
> --- /dev/null	1970-01-01 00:00:00.000000000 +0000
> +++ linux-2.6-lttng/ltt/probes/rcu-trace.c	2008-10-02 14:23:17.000000000 -0400
> @@ -0,0 +1,54 @@
> +/*
> + * ltt/probes/rcu-trace.c
> + *
> + * RCU tracepoint probes.
> + */
> +
> +#include <linux/module.h>
> +#include <trace/rcu.h>
> +
> +#ifdef CONFIG_CLASSIC_RCU
> +void probe_rcu_classic_callback(struct rcu_head *head)
> +{
> +	trace_mark_tp(rcu_classic_callback, rcu_classic_callback,
> +		probe_rcu_classic_callback, "func %p", head->func);
> +}
> +
> +void probe_rcu_classic_call_rcu(struct rcu_head *head, unsigned long ip)
> +{
> +	trace_mark_tp(rcu_classic_call_rcu, rcu_classic_call_rcu,
> +		probe_rcu_classic_call_rcu, "func %p ip 0x%lX", head->func, ip);
> +}
> +
> +void probe_rcu_classic_call_rcu_bh(struct rcu_head *head, unsigned long ip)
> +{
> +	trace_mark_tp(rcu_classic_call_rcu_bh, rcu_classic_call_rcu_bh,
> +		probe_rcu_classic_call_rcu_bh, "func %p ip 0x%lX",
> +		head->func, ip);
> +}
> +#endif
> +
> +#ifdef CONFIG_PREEMPT_RCU
> +void probe_rcu_preempt_callback(struct rcu_head *head)
> +{
> +	trace_mark_tp(rcu_preempt_callback, rcu_preempt_callback,
> +		probe_rcu_preempt_callback, "func %p", head->func);
> +}
> +
> +void probe_rcu_preempt_call_rcu(struct rcu_head *head, unsigned long ip)
> +{
> +	trace_mark_tp(rcu_preempt_call_rcu, rcu_preempt_call_rcu,
> +		probe_rcu_preempt_call_rcu, "func %p ip 0x%lX", head->func, ip);
> +}
> +
> +void probe_rcu_preempt_call_rcu_sched(struct rcu_head *head, unsigned long ip)
> +{
> +	trace_mark_tp(rcu_preempt_call_rcu_sched, rcu_preempt_call_rcu_sched,
> +		probe_rcu_preempt_call_rcu_sched, "func %p ip 0x%lX",
> +		head->func, ip);
> +}
> +#endif
> +
> +MODULE_LICENSE("GPL");
> +MODULE_AUTHOR("Mathieu Desnoyers");
> +MODULE_DESCRIPTION("RCU Tracepoint Probes");
> Index: linux-2.6-lttng/ltt/probes/Makefile
> ===================================================================
> --- linux-2.6-lttng.orig/ltt/probes/Makefile	2008-10-02 14:11:20.000000000 -0400
> +++ linux-2.6-lttng/ltt/probes/Makefile	2008-10-02 14:18:16.000000000 -0400
> @@ -9,7 +9,7 @@ CFLAGS_REMOVE_lockdep-trace.o = -pg
>  endif
> 
>  obj-$(CONFIG_LTT_TRACEPROBES)	+= kernel-trace.o mm-trace.o fs-trace.o \
> -				ipc-trace.o lockdep-trace.o
> +				ipc-trace.o lockdep-trace.o rcu-trace.o
>  ifeq ($(CONFIG_NET),y)
>  CFLAGS_REMOVE_net-trace.o = -pg
>  obj-$(CONFIG_TRACEPROBES)	+= net-trace.o
> 
> 
> Mathieu
> 
> -- 
> Mathieu Desnoyers
> OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH tip/master] RCU-based detection of stalled CPUs for Classic RCU
  2008-10-02 22:55           ` Paul E. McKenney
@ 2008-10-02 23:06             ` Paul E. McKenney
  2008-10-03  4:41               ` Mathieu Desnoyers
  2008-10-03  8:39               ` Ingo Molnar
  0 siblings, 2 replies; 17+ messages in thread
From: Paul E. McKenney @ 2008-10-02 23:06 UTC (permalink / raw)
  To: linux-kernel
  Cc: Ingo Molnar, mathieu.desnoyers, akpm, laijs, manfred, dipankar,
	niv, dvhltc, josht, ltt-dev

Hello!

This patch adds stalled-CPU detection to Classic RCU.  This capability
is enabled by a new config variable CONFIG_RCU_CPU_STALL_DETECTOR, which
defaults disabled.  This is a debugging feature to detect infinite loops
in kernel code, not something that non-kernel-hackers would be expected
to care about.  This feature can detect looping CPUs in !PREEMPT builds
and looping CPUs with preemption disabled in PREEMPT builds.  This is
essentially a port of this functionality from the treercu patch, replacing
the stall debug patch that is already in tip/core/rcu (commit 67182ae1c4).

The changes from the patch in tip/core/rcu include making the config
variable name match that in treercu, changing from seconds to jiffies to
avoid spurious warnings, and printing a boot message when this feature
is enabled.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---

 include/linux/rcuclassic.h |   12 ++-
 kernel/rcuclassic.c        |  166 +++++++++++++++++++++++----------------------
 lib/Kconfig.debug          |    2 
 3 files changed, 96 insertions(+), 84 deletions(-)

diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h
index 29bf528..2d72d20 100644
--- a/include/linux/rcuclassic.h
+++ b/include/linux/rcuclassic.h
@@ -40,15 +40,21 @@
 #include <linux/cpumask.h>
 #include <linux/seqlock.h>
 
+#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
+#define RCU_SECONDS_TILL_STALL_CHECK	3 * HZ	/* for rcp->jiffies_stall */
+#define RCU_SECONDS_TILL_STALL_RECHECK	30 * HZ	/* for rcp->jiffies_stall */
+#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
 
 /* Global control variables for rcupdate callback mechanism. */
 struct rcu_ctrlblk {
 	long	cur;		/* Current batch number.                      */
 	long	completed;	/* Number of the last completed batch         */
 	long	pending;	/* Number of the last pending batch           */
-#ifdef CONFIG_DEBUG_RCU_STALL
-	unsigned long gp_check;	/* Time grace period should end, in seconds.  */
-#endif /* #ifdef CONFIG_DEBUG_RCU_STALL */
+#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
+	unsigned long gp_start;	/* Time at which GP started in jiffies. */
+	unsigned long jiffies_stall;
+				/* Time at which to check for CPU stalls. */
+#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
 
 	int	signaled;
 
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
index ed15128..eae2fb6 100644
--- a/kernel/rcuclassic.c
+++ b/kernel/rcuclassic.c
@@ -164,6 +164,87 @@ static void __call_rcu(struct rcu_head *head, struct rcu_ctrlblk *rcp,
 	}
 }
 
+#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
+
+static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
+{
+	rcp->gp_start = jiffies;
+	rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_CHECK;
+}
+
+static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
+{
+	int cpu;
+	long delta;
+	unsigned long flags;
+
+	/* Only let one CPU complain about others per time interval. */
+
+	spin_lock_irqsave(&rcp->lock, flags);
+	delta = jiffies - rcp->jiffies_stall;
+	if (delta < 2 || rcp->cur != rcp->completed) {
+		spin_unlock_irqrestore(&rcp->lock, flags);
+		return;
+	}
+	rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
+	spin_unlock_irqrestore(&rcp->lock, flags);
+
+	/* OK, time to rat on our buddy... */
+
+	printk(KERN_ERR "RCU detected CPU stalls:");
+	for_each_possible_cpu(cpu) {
+		if (cpu_isset(cpu, rcp->cpumask))
+			printk(" %d", cpu);
+	}
+	printk(" (detected by %d, t=%ld jiffies)\n",
+	       smp_processor_id(), (long)(jiffies - rcp->gp_start));
+}
+
+static void print_cpu_stall(struct rcu_ctrlblk *rcp)
+{
+	unsigned long flags;
+
+	printk(KERN_ERR "RCU detected CPU %d stall (t=%lu/%lu jiffies)\n",
+			smp_processor_id(), jiffies,
+			jiffies - rcp->gp_start);
+	dump_stack();
+	spin_lock_irqsave(&rcp->lock, flags);
+	if ((long)(jiffies - rcp->jiffies_stall) >= 0)
+		rcp->jiffies_stall =
+			jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
+	spin_unlock_irqrestore(&rcp->lock, flags);
+	set_need_resched();  /* kick ourselves to get things going. */
+}
+
+static void check_cpu_stall(struct rcu_ctrlblk *rcp)
+{
+	long delta;
+
+	delta = jiffies - rcp->jiffies_stall;
+	if (cpu_isset(smp_processor_id(), rcp->cpumask) && delta >= 0) {
+		
+		/* We haven't checked in, so go dump stack. */
+		print_cpu_stall(rcp);
+
+	} else if (rcp->cur != rcp->completed && delta >= 2) {
+
+		/* They had two seconds to dump stack, so complain. */
+		print_other_cpu_stall(rcp);
+	}
+}
+
+#else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
+
+static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
+{
+}
+
+static void check_cpu_stall(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
+{
+}
+
+#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
+
 /**
  * call_rcu - Queue an RCU callback for invocation after a grace period.
  * @head: structure to be used for queueing the RCU updates.
@@ -293,84 +374,6 @@ static void rcu_do_batch(struct rcu_data *rdp)
  *   period (if necessary).
  */
 
-#ifdef CONFIG_DEBUG_RCU_STALL
-
-static inline void record_gp_check_time(struct rcu_ctrlblk *rcp)
-{
-	rcp->gp_check = get_seconds() + 3;
-}
-
-static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
-{
-	int cpu;
-	long delta;
-	unsigned long flags;
-
-	/* Only let one CPU complain about others per time interval. */
-
-	spin_lock_irqsave(&rcp->lock, flags);
-	delta = get_seconds() - rcp->gp_check;
-	if (delta < 2L || cpus_empty(rcp->cpumask)) {
-		spin_unlock(&rcp->lock);
-		return;
-	}
-	rcp->gp_check = get_seconds() + 30;
-	spin_unlock_irqrestore(&rcp->lock, flags);
-
-	/* OK, time to rat on our buddy... */
-
-	printk(KERN_ERR "RCU detected CPU stalls:");
-	for_each_cpu_mask(cpu, rcp->cpumask)
-		printk(" %d", cpu);
-	printk(" (detected by %d, t=%lu/%lu)\n",
-	       smp_processor_id(), get_seconds(), rcp->gp_check);
-}
-
-static void print_cpu_stall(struct rcu_ctrlblk *rcp)
-{
-	unsigned long flags;
-
-	printk(KERN_ERR "RCU detected CPU %d stall (t=%lu/%lu)\n",
-			smp_processor_id(), get_seconds(), rcp->gp_check);
-	dump_stack();
-	spin_lock_irqsave(&rcp->lock, flags);
-	if ((long)(get_seconds() - rcp->gp_check) >= 0L)
-		rcp->gp_check = get_seconds() + 30;
-	spin_unlock_irqrestore(&rcp->lock, flags);
-}
-
-static void check_cpu_stall(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
-{
-	long delta;
-
-	delta = get_seconds() - rcp->gp_check;
-	if (cpu_isset(smp_processor_id(), rcp->cpumask) && delta >= 0L) {
-
-		/* We haven't checked in, so go dump stack. */
-
-		print_cpu_stall(rcp);
-
-	} else {
-		if (!cpus_empty(rcp->cpumask) && delta >= 2L) {
-			/* They had two seconds to dump stack, so complain. */
-			print_other_cpu_stall(rcp);
-		}
-	}
-}
-
-#else /* #ifdef CONFIG_DEBUG_RCU_STALL */
-
-static inline void record_gp_check_time(struct rcu_ctrlblk *rcp)
-{
-}
-
-static inline void
-check_cpu_stall(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
-{
-}
-
-#endif /* #else #ifdef CONFIG_DEBUG_RCU_STALL */
-
 /*
  * Register a new batch of callbacks, and start it up if there is currently no
  * active batch and the batch to be registered has not already occurred.
@@ -381,7 +384,7 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp)
 	if (rcp->cur != rcp->pending &&
 			rcp->completed == rcp->cur) {
 		rcp->cur++;
-		record_gp_check_time(rcp);
+		record_gp_stall_check_time(rcp);
 
 		/*
 		 * Accessing nohz_cpu_mask before incrementing rcp->cur needs a
@@ -603,7 +606,7 @@ static void rcu_process_callbacks(struct softirq_action *unused)
 static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
 {
 	/* Check for CPU stalls, if enabled. */
-	check_cpu_stall(rcp, rdp);
+	check_cpu_stall(rcp);
 
 	if (rdp->nxtlist) {
 		long completed_snap = ACCESS_ONCE(rcp->completed);
@@ -769,6 +772,9 @@ static struct notifier_block __cpuinitdata rcu_nb = {
  */
 void __init __rcu_init(void)
 {
+#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
+	printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n");
+#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
 	rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE,
 			(void *)(long)smp_processor_id());
 	/* Register notifier for non-boot CPUs */
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 4e921a8..e0e0582 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -616,7 +616,7 @@ config RCU_TORTURE_TEST_RUNNABLE
 	  Say N here if you want the RCU torture tests to start only
 	  after being manually enabled via /proc.
 
-config RCU_CPU_STALL
+config RCU_CPU_STALL_DETECTOR
 	bool "Check for stalled CPUs delaying RCU grace periods"
 	depends on CLASSIC_RCU
 	default n

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH tip/master] RCU-based detection of stalled CPUs for Classic RCU
  2008-10-02 23:06             ` [PATCH tip/master] RCU-based detection of stalled CPUs for Classic RCU Paul E. McKenney
@ 2008-10-03  4:41               ` Mathieu Desnoyers
  2008-10-03  5:02                 ` Paul E. McKenney
  2008-10-03  8:39               ` Ingo Molnar
  1 sibling, 1 reply; 17+ messages in thread
From: Mathieu Desnoyers @ 2008-10-03  4:41 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: linux-kernel, Ingo Molnar, akpm, laijs, manfred, dipankar, niv,
	dvhltc, josht, ltt-dev

* Paul E. McKenney (paulmck@linux.vnet.ibm.com) wrote:
> Hello!
> 
> This patch adds stalled-CPU detection to Classic RCU.  This capability
> is enabled by a new config variable CONFIG_RCU_CPU_STALL_DETECTOR, which
> defaults disabled.  This is a debugging feature to detect infinite loops
> in kernel code, not something that non-kernel-hackers would be expected
> to care about.  This feature can detect looping CPUs in !PREEMPT builds
> and looping CPUs with preemption disabled in PREEMPT builds.  This is
> essentially a port of this functionality from the treercu patch, replacing
> the stall debug patch that is already in tip/core/rcu (commit 67182ae1c4).
> 
> The changes from the patch in tip/core/rcu include making the config
> variable name match that in treercu, changing from seconds to jiffies to
> avoid spurious warnings, and printing a boot message when this feature
> is enabled.
> 

Hi Paul,

Thanks for the previous explanations. Out of curiosity, what can this
patch do that the nmi watchdog can't do ?

Mathieu

> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> ---
> 
>  include/linux/rcuclassic.h |   12 ++-
>  kernel/rcuclassic.c        |  166 +++++++++++++++++++++++----------------------
>  lib/Kconfig.debug          |    2 
>  3 files changed, 96 insertions(+), 84 deletions(-)
> 
> diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h
> index 29bf528..2d72d20 100644
> --- a/include/linux/rcuclassic.h
> +++ b/include/linux/rcuclassic.h
> @@ -40,15 +40,21 @@
>  #include <linux/cpumask.h>
>  #include <linux/seqlock.h>
>  
> +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
> +#define RCU_SECONDS_TILL_STALL_CHECK	3 * HZ	/* for rcp->jiffies_stall */
> +#define RCU_SECONDS_TILL_STALL_RECHECK	30 * HZ	/* for rcp->jiffies_stall */
> +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
>  
>  /* Global control variables for rcupdate callback mechanism. */
>  struct rcu_ctrlblk {
>  	long	cur;		/* Current batch number.                      */
>  	long	completed;	/* Number of the last completed batch         */
>  	long	pending;	/* Number of the last pending batch           */
> -#ifdef CONFIG_DEBUG_RCU_STALL
> -	unsigned long gp_check;	/* Time grace period should end, in seconds.  */
> -#endif /* #ifdef CONFIG_DEBUG_RCU_STALL */
> +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
> +	unsigned long gp_start;	/* Time at which GP started in jiffies. */
> +	unsigned long jiffies_stall;
> +				/* Time at which to check for CPU stalls. */
> +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
>  
>  	int	signaled;
>  
> diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
> index ed15128..eae2fb6 100644
> --- a/kernel/rcuclassic.c
> +++ b/kernel/rcuclassic.c
> @@ -164,6 +164,87 @@ static void __call_rcu(struct rcu_head *head, struct rcu_ctrlblk *rcp,
>  	}
>  }
>  
> +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
> +
> +static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
> +{
> +	rcp->gp_start = jiffies;
> +	rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_CHECK;
> +}
> +
> +static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
> +{
> +	int cpu;
> +	long delta;
> +	unsigned long flags;
> +
> +	/* Only let one CPU complain about others per time interval. */
> +
> +	spin_lock_irqsave(&rcp->lock, flags);
> +	delta = jiffies - rcp->jiffies_stall;
> +	if (delta < 2 || rcp->cur != rcp->completed) {
> +		spin_unlock_irqrestore(&rcp->lock, flags);
> +		return;
> +	}
> +	rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
> +	spin_unlock_irqrestore(&rcp->lock, flags);
> +
> +	/* OK, time to rat on our buddy... */
> +
> +	printk(KERN_ERR "RCU detected CPU stalls:");
> +	for_each_possible_cpu(cpu) {
> +		if (cpu_isset(cpu, rcp->cpumask))
> +			printk(" %d", cpu);
> +	}
> +	printk(" (detected by %d, t=%ld jiffies)\n",
> +	       smp_processor_id(), (long)(jiffies - rcp->gp_start));
> +}
> +
> +static void print_cpu_stall(struct rcu_ctrlblk *rcp)
> +{
> +	unsigned long flags;
> +
> +	printk(KERN_ERR "RCU detected CPU %d stall (t=%lu/%lu jiffies)\n",
> +			smp_processor_id(), jiffies,
> +			jiffies - rcp->gp_start);
> +	dump_stack();
> +	spin_lock_irqsave(&rcp->lock, flags);
> +	if ((long)(jiffies - rcp->jiffies_stall) >= 0)
> +		rcp->jiffies_stall =
> +			jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
> +	spin_unlock_irqrestore(&rcp->lock, flags);
> +	set_need_resched();  /* kick ourselves to get things going. */
> +}
> +
> +static void check_cpu_stall(struct rcu_ctrlblk *rcp)
> +{
> +	long delta;
> +
> +	delta = jiffies - rcp->jiffies_stall;
> +	if (cpu_isset(smp_processor_id(), rcp->cpumask) && delta >= 0) {
> +		
> +		/* We haven't checked in, so go dump stack. */
> +		print_cpu_stall(rcp);
> +
> +	} else if (rcp->cur != rcp->completed && delta >= 2) {
> +
> +		/* They had two seconds to dump stack, so complain. */
> +		print_other_cpu_stall(rcp);
> +	}
> +}
> +
> +#else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
> +
> +static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
> +{
> +}
> +
> +static void check_cpu_stall(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
> +{
> +}
> +
> +#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
> +
>  /**
>   * call_rcu - Queue an RCU callback for invocation after a grace period.
>   * @head: structure to be used for queueing the RCU updates.
> @@ -293,84 +374,6 @@ static void rcu_do_batch(struct rcu_data *rdp)
>   *   period (if necessary).
>   */
>  
> -#ifdef CONFIG_DEBUG_RCU_STALL
> -
> -static inline void record_gp_check_time(struct rcu_ctrlblk *rcp)
> -{
> -	rcp->gp_check = get_seconds() + 3;
> -}
> -
> -static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
> -{
> -	int cpu;
> -	long delta;
> -	unsigned long flags;
> -
> -	/* Only let one CPU complain about others per time interval. */
> -
> -	spin_lock_irqsave(&rcp->lock, flags);
> -	delta = get_seconds() - rcp->gp_check;
> -	if (delta < 2L || cpus_empty(rcp->cpumask)) {
> -		spin_unlock(&rcp->lock);
> -		return;
> -	}
> -	rcp->gp_check = get_seconds() + 30;
> -	spin_unlock_irqrestore(&rcp->lock, flags);
> -
> -	/* OK, time to rat on our buddy... */
> -
> -	printk(KERN_ERR "RCU detected CPU stalls:");
> -	for_each_cpu_mask(cpu, rcp->cpumask)
> -		printk(" %d", cpu);
> -	printk(" (detected by %d, t=%lu/%lu)\n",
> -	       smp_processor_id(), get_seconds(), rcp->gp_check);
> -}
> -
> -static void print_cpu_stall(struct rcu_ctrlblk *rcp)
> -{
> -	unsigned long flags;
> -
> -	printk(KERN_ERR "RCU detected CPU %d stall (t=%lu/%lu)\n",
> -			smp_processor_id(), get_seconds(), rcp->gp_check);
> -	dump_stack();
> -	spin_lock_irqsave(&rcp->lock, flags);
> -	if ((long)(get_seconds() - rcp->gp_check) >= 0L)
> -		rcp->gp_check = get_seconds() + 30;
> -	spin_unlock_irqrestore(&rcp->lock, flags);
> -}
> -
> -static void check_cpu_stall(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
> -{
> -	long delta;
> -
> -	delta = get_seconds() - rcp->gp_check;
> -	if (cpu_isset(smp_processor_id(), rcp->cpumask) && delta >= 0L) {
> -
> -		/* We haven't checked in, so go dump stack. */
> -
> -		print_cpu_stall(rcp);
> -
> -	} else {
> -		if (!cpus_empty(rcp->cpumask) && delta >= 2L) {
> -			/* They had two seconds to dump stack, so complain. */
> -			print_other_cpu_stall(rcp);
> -		}
> -	}
> -}
> -
> -#else /* #ifdef CONFIG_DEBUG_RCU_STALL */
> -
> -static inline void record_gp_check_time(struct rcu_ctrlblk *rcp)
> -{
> -}
> -
> -static inline void
> -check_cpu_stall(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
> -{
> -}
> -
> -#endif /* #else #ifdef CONFIG_DEBUG_RCU_STALL */
> -
>  /*
>   * Register a new batch of callbacks, and start it up if there is currently no
>   * active batch and the batch to be registered has not already occurred.
> @@ -381,7 +384,7 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp)
>  	if (rcp->cur != rcp->pending &&
>  			rcp->completed == rcp->cur) {
>  		rcp->cur++;
> -		record_gp_check_time(rcp);
> +		record_gp_stall_check_time(rcp);
>  
>  		/*
>  		 * Accessing nohz_cpu_mask before incrementing rcp->cur needs a
> @@ -603,7 +606,7 @@ static void rcu_process_callbacks(struct softirq_action *unused)
>  static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
>  {
>  	/* Check for CPU stalls, if enabled. */
> -	check_cpu_stall(rcp, rdp);
> +	check_cpu_stall(rcp);
>  
>  	if (rdp->nxtlist) {
>  		long completed_snap = ACCESS_ONCE(rcp->completed);
> @@ -769,6 +772,9 @@ static struct notifier_block __cpuinitdata rcu_nb = {
>   */
>  void __init __rcu_init(void)
>  {
> +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
> +	printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n");
> +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
>  	rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE,
>  			(void *)(long)smp_processor_id());
>  	/* Register notifier for non-boot CPUs */
> diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
> index 4e921a8..e0e0582 100644
> --- a/lib/Kconfig.debug
> +++ b/lib/Kconfig.debug
> @@ -616,7 +616,7 @@ config RCU_TORTURE_TEST_RUNNABLE
>  	  Say N here if you want the RCU torture tests to start only
>  	  after being manually enabled via /proc.
>  
> -config RCU_CPU_STALL
> +config RCU_CPU_STALL_DETECTOR
>  	bool "Check for stalled CPUs delaying RCU grace periods"
>  	depends on CLASSIC_RCU
>  	default n

-- 
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH tip/master] RCU-based detection of stalled CPUs for Classic RCU
  2008-10-03  4:41               ` Mathieu Desnoyers
@ 2008-10-03  5:02                 ` Paul E. McKenney
  0 siblings, 0 replies; 17+ messages in thread
From: Paul E. McKenney @ 2008-10-03  5:02 UTC (permalink / raw)
  To: Mathieu Desnoyers
  Cc: linux-kernel, Ingo Molnar, akpm, laijs, manfred, dipankar, niv,
	dvhltc, josht, ltt-dev

On Fri, Oct 03, 2008 at 12:41:27AM -0400, Mathieu Desnoyers wrote:
> * Paul E. McKenney (paulmck@linux.vnet.ibm.com) wrote:
> > Hello!
> > 
> > This patch adds stalled-CPU detection to Classic RCU.  This capability
> > is enabled by a new config variable CONFIG_RCU_CPU_STALL_DETECTOR, which
> > defaults disabled.  This is a debugging feature to detect infinite loops
> > in kernel code, not something that non-kernel-hackers would be expected
> > to care about.  This feature can detect looping CPUs in !PREEMPT builds
> > and looping CPUs with preemption disabled in PREEMPT builds.  This is
> > essentially a port of this functionality from the treercu patch, replacing
> > the stall debug patch that is already in tip/core/rcu (commit 67182ae1c4).
> > 
> > The changes from the patch in tip/core/rcu include making the config
> > variable name match that in treercu, changing from seconds to jiffies to
> > avoid spurious warnings, and printing a boot message when this feature
> > is enabled.
> > 
> 
> Hi Paul,
> 
> Thanks for the previous explanations. Out of curiosity, what can this
> patch do that the nmi watchdog can't do ?

Hello, Mathieu,

Operate on architectures that don't have NMIs.  Possibly allow shorter
timeouts than the NMI watchdog.  Operate on a per-CPU basis (or does
the NMI watchdog do that these days?).

							Thanx, Paul

> Mathieu
> 
> > Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> > ---
> > 
> >  include/linux/rcuclassic.h |   12 ++-
> >  kernel/rcuclassic.c        |  166 +++++++++++++++++++++++----------------------
> >  lib/Kconfig.debug          |    2 
> >  3 files changed, 96 insertions(+), 84 deletions(-)
> > 
> > diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h
> > index 29bf528..2d72d20 100644
> > --- a/include/linux/rcuclassic.h
> > +++ b/include/linux/rcuclassic.h
> > @@ -40,15 +40,21 @@
> >  #include <linux/cpumask.h>
> >  #include <linux/seqlock.h>
> >  
> > +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
> > +#define RCU_SECONDS_TILL_STALL_CHECK	3 * HZ	/* for rcp->jiffies_stall */
> > +#define RCU_SECONDS_TILL_STALL_RECHECK	30 * HZ	/* for rcp->jiffies_stall */
> > +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
> >  
> >  /* Global control variables for rcupdate callback mechanism. */
> >  struct rcu_ctrlblk {
> >  	long	cur;		/* Current batch number.                      */
> >  	long	completed;	/* Number of the last completed batch         */
> >  	long	pending;	/* Number of the last pending batch           */
> > -#ifdef CONFIG_DEBUG_RCU_STALL
> > -	unsigned long gp_check;	/* Time grace period should end, in seconds.  */
> > -#endif /* #ifdef CONFIG_DEBUG_RCU_STALL */
> > +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
> > +	unsigned long gp_start;	/* Time at which GP started in jiffies. */
> > +	unsigned long jiffies_stall;
> > +				/* Time at which to check for CPU stalls. */
> > +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
> >  
> >  	int	signaled;
> >  
> > diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
> > index ed15128..eae2fb6 100644
> > --- a/kernel/rcuclassic.c
> > +++ b/kernel/rcuclassic.c
> > @@ -164,6 +164,87 @@ static void __call_rcu(struct rcu_head *head, struct rcu_ctrlblk *rcp,
> >  	}
> >  }
> >  
> > +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
> > +
> > +static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
> > +{
> > +	rcp->gp_start = jiffies;
> > +	rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_CHECK;
> > +}
> > +
> > +static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
> > +{
> > +	int cpu;
> > +	long delta;
> > +	unsigned long flags;
> > +
> > +	/* Only let one CPU complain about others per time interval. */
> > +
> > +	spin_lock_irqsave(&rcp->lock, flags);
> > +	delta = jiffies - rcp->jiffies_stall;
> > +	if (delta < 2 || rcp->cur != rcp->completed) {
> > +		spin_unlock_irqrestore(&rcp->lock, flags);
> > +		return;
> > +	}
> > +	rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
> > +	spin_unlock_irqrestore(&rcp->lock, flags);
> > +
> > +	/* OK, time to rat on our buddy... */
> > +
> > +	printk(KERN_ERR "RCU detected CPU stalls:");
> > +	for_each_possible_cpu(cpu) {
> > +		if (cpu_isset(cpu, rcp->cpumask))
> > +			printk(" %d", cpu);
> > +	}
> > +	printk(" (detected by %d, t=%ld jiffies)\n",
> > +	       smp_processor_id(), (long)(jiffies - rcp->gp_start));
> > +}
> > +
> > +static void print_cpu_stall(struct rcu_ctrlblk *rcp)
> > +{
> > +	unsigned long flags;
> > +
> > +	printk(KERN_ERR "RCU detected CPU %d stall (t=%lu/%lu jiffies)\n",
> > +			smp_processor_id(), jiffies,
> > +			jiffies - rcp->gp_start);
> > +	dump_stack();
> > +	spin_lock_irqsave(&rcp->lock, flags);
> > +	if ((long)(jiffies - rcp->jiffies_stall) >= 0)
> > +		rcp->jiffies_stall =
> > +			jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
> > +	spin_unlock_irqrestore(&rcp->lock, flags);
> > +	set_need_resched();  /* kick ourselves to get things going. */
> > +}
> > +
> > +static void check_cpu_stall(struct rcu_ctrlblk *rcp)
> > +{
> > +	long delta;
> > +
> > +	delta = jiffies - rcp->jiffies_stall;
> > +	if (cpu_isset(smp_processor_id(), rcp->cpumask) && delta >= 0) {
> > +		
> > +		/* We haven't checked in, so go dump stack. */
> > +		print_cpu_stall(rcp);
> > +
> > +	} else if (rcp->cur != rcp->completed && delta >= 2) {
> > +
> > +		/* They had two seconds to dump stack, so complain. */
> > +		print_other_cpu_stall(rcp);
> > +	}
> > +}
> > +
> > +#else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
> > +
> > +static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
> > +{
> > +}
> > +
> > +static void check_cpu_stall(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
> > +{
> > +}
> > +
> > +#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
> > +
> >  /**
> >   * call_rcu - Queue an RCU callback for invocation after a grace period.
> >   * @head: structure to be used for queueing the RCU updates.
> > @@ -293,84 +374,6 @@ static void rcu_do_batch(struct rcu_data *rdp)
> >   *   period (if necessary).
> >   */
> >  
> > -#ifdef CONFIG_DEBUG_RCU_STALL
> > -
> > -static inline void record_gp_check_time(struct rcu_ctrlblk *rcp)
> > -{
> > -	rcp->gp_check = get_seconds() + 3;
> > -}
> > -
> > -static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
> > -{
> > -	int cpu;
> > -	long delta;
> > -	unsigned long flags;
> > -
> > -	/* Only let one CPU complain about others per time interval. */
> > -
> > -	spin_lock_irqsave(&rcp->lock, flags);
> > -	delta = get_seconds() - rcp->gp_check;
> > -	if (delta < 2L || cpus_empty(rcp->cpumask)) {
> > -		spin_unlock(&rcp->lock);
> > -		return;
> > -	}
> > -	rcp->gp_check = get_seconds() + 30;
> > -	spin_unlock_irqrestore(&rcp->lock, flags);
> > -
> > -	/* OK, time to rat on our buddy... */
> > -
> > -	printk(KERN_ERR "RCU detected CPU stalls:");
> > -	for_each_cpu_mask(cpu, rcp->cpumask)
> > -		printk(" %d", cpu);
> > -	printk(" (detected by %d, t=%lu/%lu)\n",
> > -	       smp_processor_id(), get_seconds(), rcp->gp_check);
> > -}
> > -
> > -static void print_cpu_stall(struct rcu_ctrlblk *rcp)
> > -{
> > -	unsigned long flags;
> > -
> > -	printk(KERN_ERR "RCU detected CPU %d stall (t=%lu/%lu)\n",
> > -			smp_processor_id(), get_seconds(), rcp->gp_check);
> > -	dump_stack();
> > -	spin_lock_irqsave(&rcp->lock, flags);
> > -	if ((long)(get_seconds() - rcp->gp_check) >= 0L)
> > -		rcp->gp_check = get_seconds() + 30;
> > -	spin_unlock_irqrestore(&rcp->lock, flags);
> > -}
> > -
> > -static void check_cpu_stall(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
> > -{
> > -	long delta;
> > -
> > -	delta = get_seconds() - rcp->gp_check;
> > -	if (cpu_isset(smp_processor_id(), rcp->cpumask) && delta >= 0L) {
> > -
> > -		/* We haven't checked in, so go dump stack. */
> > -
> > -		print_cpu_stall(rcp);
> > -
> > -	} else {
> > -		if (!cpus_empty(rcp->cpumask) && delta >= 2L) {
> > -			/* They had two seconds to dump stack, so complain. */
> > -			print_other_cpu_stall(rcp);
> > -		}
> > -	}
> > -}
> > -
> > -#else /* #ifdef CONFIG_DEBUG_RCU_STALL */
> > -
> > -static inline void record_gp_check_time(struct rcu_ctrlblk *rcp)
> > -{
> > -}
> > -
> > -static inline void
> > -check_cpu_stall(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
> > -{
> > -}
> > -
> > -#endif /* #else #ifdef CONFIG_DEBUG_RCU_STALL */
> > -
> >  /*
> >   * Register a new batch of callbacks, and start it up if there is currently no
> >   * active batch and the batch to be registered has not already occurred.
> > @@ -381,7 +384,7 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp)
> >  	if (rcp->cur != rcp->pending &&
> >  			rcp->completed == rcp->cur) {
> >  		rcp->cur++;
> > -		record_gp_check_time(rcp);
> > +		record_gp_stall_check_time(rcp);
> >  
> >  		/*
> >  		 * Accessing nohz_cpu_mask before incrementing rcp->cur needs a
> > @@ -603,7 +606,7 @@ static void rcu_process_callbacks(struct softirq_action *unused)
> >  static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
> >  {
> >  	/* Check for CPU stalls, if enabled. */
> > -	check_cpu_stall(rcp, rdp);
> > +	check_cpu_stall(rcp);
> >  
> >  	if (rdp->nxtlist) {
> >  		long completed_snap = ACCESS_ONCE(rcp->completed);
> > @@ -769,6 +772,9 @@ static struct notifier_block __cpuinitdata rcu_nb = {
> >   */
> >  void __init __rcu_init(void)
> >  {
> > +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
> > +	printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n");
> > +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
> >  	rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE,
> >  			(void *)(long)smp_processor_id());
> >  	/* Register notifier for non-boot CPUs */
> > diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
> > index 4e921a8..e0e0582 100644
> > --- a/lib/Kconfig.debug
> > +++ b/lib/Kconfig.debug
> > @@ -616,7 +616,7 @@ config RCU_TORTURE_TEST_RUNNABLE
> >  	  Say N here if you want the RCU torture tests to start only
> >  	  after being manually enabled via /proc.
> >  
> > -config RCU_CPU_STALL
> > +config RCU_CPU_STALL_DETECTOR
> >  	bool "Check for stalled CPUs delaying RCU grace periods"
> >  	depends on CLASSIC_RCU
> >  	default n
> 
> -- 
> Mathieu Desnoyers
> OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH tip/master] RCU-based detection of stalled CPUs for Classic RCU
  2008-10-02 23:06             ` [PATCH tip/master] RCU-based detection of stalled CPUs for Classic RCU Paul E. McKenney
  2008-10-03  4:41               ` Mathieu Desnoyers
@ 2008-10-03  8:39               ` Ingo Molnar
  2008-10-03  8:42                 ` [PATCH] rcu: RCU-based detection of stalled CPUs for Classic RCU, fix Ingo Molnar
  2008-10-03 15:13                 ` [PATCH tip/master] RCU-based detection of stalled CPUs for Classic RCU Paul E. McKenney
  1 sibling, 2 replies; 17+ messages in thread
From: Ingo Molnar @ 2008-10-03  8:39 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: linux-kernel, mathieu.desnoyers, akpm, laijs, manfred, dipankar,
	niv, dvhltc, josht, ltt-dev


* Paul E. McKenney <paulmck@linux.vnet.ibm.com> wrote:

> Hello!
> 
> This patch adds stalled-CPU detection to Classic RCU.  This capability 
> is enabled by a new config variable CONFIG_RCU_CPU_STALL_DETECTOR, 
> which defaults disabled.  This is a debugging feature to detect 
> infinite loops in kernel code, not something that non-kernel-hackers 
> would be expected to care about.  This feature can detect looping CPUs 
> in !PREEMPT builds and looping CPUs with preemption disabled in 
> PREEMPT builds.  This is essentially a port of this functionality from 
> the treercu patch, replacing the stall debug patch that is already in 
> tip/core/rcu (commit 67182ae1c4).
> 
> The changes from the patch in tip/core/rcu include making the config 
> variable name match that in treercu, changing from seconds to jiffies 
> to avoid spurious warnings, and printing a boot message when this 
> feature is enabled.
> 
> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

applied to tip/core/rcu, thanks Paul!

i tidied up two tiny details noticed by scripts/checkpatch.pl:

> +#define RCU_SECONDS_TILL_STALL_CHECK	3 * HZ	/* for rcp->jiffies_stall */
> +#define RCU_SECONDS_TILL_STALL_RECHECK	30 * HZ	/* for rcp->jiffies_stall */

adding paranthesis around the expression.

> +	if (cpu_isset(smp_processor_id(), rcp->cpumask) && delta >= 0) {
> +		

and have removed the two tailing tabs from this line.

	Ingo

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH] rcu: RCU-based detection of stalled CPUs for Classic RCU, fix
  2008-10-03  8:39               ` Ingo Molnar
@ 2008-10-03  8:42                 ` Ingo Molnar
  2008-10-03 15:14                   ` Paul E. McKenney
  2008-10-03 15:13                 ` [PATCH tip/master] RCU-based detection of stalled CPUs for Classic RCU Paul E. McKenney
  1 sibling, 1 reply; 17+ messages in thread
From: Ingo Molnar @ 2008-10-03  8:42 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: linux-kernel, mathieu.desnoyers, akpm, laijs, manfred, dipankar,
	niv, dvhltc, josht, ltt-dev


-tip testing found a minor build bug - find the fix below.

	Ingo

-------------------->
>From 2ec2b482b10a1ed3493c224f1893cddd3d33833b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 3 Oct 2008 10:41:00 +0200
Subject: [PATCH] rcu: RCU-based detection of stalled CPUs for Classic RCU, fix

fix the !CONFIG_RCU_CPU_STALL_DETECTOR path:

 kernel/rcuclassic.c: In function '__rcu_pending':
 kernel/rcuclassic.c:609: error: too few arguments to function 'check_cpu_stall'

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/rcuclassic.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
index 0d07e6e..37f72e5 100644
--- a/kernel/rcuclassic.c
+++ b/kernel/rcuclassic.c
@@ -239,7 +239,7 @@ static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
 {
 }
 
-static void check_cpu_stall(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
+static inline void check_cpu_stall(struct rcu_ctrlblk *rcp)
 {
 }
 

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH,RFC] RCU-based detection of stalled CPUs for Classic RCU
  2008-10-02 17:39     ` [PATCH,RFC] " Paul E. McKenney
@ 2008-10-03 10:12       ` Lai Jiangshan
  2008-10-03 15:12         ` Paul E. McKenney
  0 siblings, 1 reply; 17+ messages in thread
From: Lai Jiangshan @ 2008-10-03 10:12 UTC (permalink / raw)
  To: paulmck
  Cc: Ingo Molnar, linux-kernel, akpm, mathieu.desnoyers, manfred,
	dipankar, niv, dvhltc, josht

I found a magic number in it.

Paul E. McKenney wrote:
> Hello again!
> 
> This patch adds stalled-CPU detection to Classic RCU.  This capability 
> is enabled by a new config variable CONFIG_RCU_CPU_STALL_DETECTOR, 
> which defaults disabled.  This is a debugging feature, not something 
> that non-kernel-hackers would be expected to care about.  This feature 
> can detect looping CPUs in !PREEMPT builds and looping CPUs with 
> preemption disabled in PREEMPT builds.  This is essentially a port of 
> this functionality from the treercu patch.
> 
> This version uses jiffies rather than get_seconds(), which eliminates
> the spurious boot-time CPU stall warnings seen on some systems with
> the previous patch.
> 
> This is still against v2.6.27-rc8 -- I will do a version against tip
> this evening (Pacific Time) when I get back to the combination of better
> bandwidth and AC power.
> 
> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> ---
> 
>  include/linux/rcuclassic.h |    9 ++++
>  kernel/rcuclassic.c        |   88 +++++++++++++++++++++++++++++++++++++++++++++
>  lib/Kconfig.debug          |   13 ++++++
>  3 files changed, 110 insertions(+)
> 
> diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h
> index 4ab8436..cab055b 100644
> --- a/include/linux/rcuclassic.h
> +++ b/include/linux/rcuclassic.h
> @@ -40,6 +40,10 @@
>  #include <linux/cpumask.h>
>  #include <linux/seqlock.h>
>  
> +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
> +#define RCU_SECONDS_TILL_STALL_CHECK	3 * HZ	/* for rcp->jiffies_stall */
> +#define RCU_SECONDS_TILL_STALL_RECHECK	30 * HZ	/* for rcp->jiffies_stall */
> +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
>  
>  /* Global control variables for rcupdate callback mechanism. */
>  struct rcu_ctrlblk {
> @@ -52,6 +56,11 @@ struct rcu_ctrlblk {
>  	spinlock_t	lock	____cacheline_internodealigned_in_smp;
>  	cpumask_t	cpumask; /* CPUs that need to switch in order    */
>  				 /* for current batch to proceed.        */
> +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
> +	unsigned long gp_start;	 /* Time at which GP started in jiffies. */
> +	unsigned long jiffies_stall;
> +				 /* Time at which to check for CPU stalls. */
> +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
>  } ____cacheline_internodealigned_in_smp;
>  
>  /* Is batch a before batch b ? */
> diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
> index aad93cd..a299876 100644
> --- a/kernel/rcuclassic.c
> +++ b/kernel/rcuclassic.c
> @@ -118,6 +118,87 @@ static inline void force_quiescent_state(struct rcu_data *rdp,
>  }
>  #endif
>  
> +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
> +
> +static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
> +{
> +	rcp->gp_start = jiffies;
> +	rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_CHECK;
> +}
> +
> +static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
> +{
> +	int cpu;
> +	long delta;
> +	unsigned long flags;
> +
> +	/* Only let one CPU complain about others per time interval. */
> +
> +	spin_lock_irqsave(&rcp->lock, flags);
> +	delta = jiffies - rcp->jiffies_stall;
> +	if (delta < 2 || rcp->cur != rcp->completed) {

Is it (2 * HZ)?
should it be defined as macro?

> +		spin_unlock_irqrestore(&rcp->lock, flags);
> +		return;
> +	}
> +	rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
> +	spin_unlock_irqrestore(&rcp->lock, flags);
> +
> +	/* OK, time to rat on our buddy... */
> +
> +	printk(KERN_ERR "RCU detected CPU stalls:");
> +	for_each_possible_cpu(cpu) {
> +		if (cpu_isset(cpu, rcp->cpumask))
> +			printk(" %d", cpu);
> +	}
> +	printk(" (detected by %d, t=%ld jiffies)\n",
> +	       smp_processor_id(), (long)(jiffies - rcp->gp_start));
> +}
> +
> +static void print_cpu_stall(struct rcu_ctrlblk *rcp)
> +{
> +	unsigned long flags;
> +
> +	printk(KERN_ERR "RCU detected CPU %d stall (t=%lu/%lu jiffies)\n",
> +			smp_processor_id(), jiffies,
> +			jiffies - rcp->gp_start);
> +	dump_stack();
> +	spin_lock_irqsave(&rcp->lock, flags);
> +	if ((long)(jiffies - rcp->jiffies_stall) >= 0)
> +		rcp->jiffies_stall =
> +			jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
> +	spin_unlock_irqrestore(&rcp->lock, flags);
> +	set_need_resched();  /* kick ourselves to get things going. */
> +}
> +
> +static void check_cpu_stall(struct rcu_ctrlblk *rcp)
> +{
> +	long delta;
> +
> +	delta = jiffies - rcp->jiffies_stall;
> +	if (cpu_isset(smp_processor_id(), rcp->cpumask) && delta >= 0) {
> +		
> +		/* We haven't checked in, so go dump stack. */
> +		print_cpu_stall(rcp);
> +
> +	} else if (rcp->cur != rcp->completed && delta >= 2) {
> +
> +		/* They had two seconds to dump stack, so complain. */

appear here again! and it's inconsistent with comment.

> +		print_other_cpu_stall(rcp);
> +	}
> +}
> +
> +#else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
> +
> +static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
> +{
> +}
> +
> +static void check_cpu_stall(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
> +{
> +}
> +
> +#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
> +
>  /**
>   * call_rcu - Queue an RCU callback for invocation after a grace period.
>   * @head: structure to be used for queueing the RCU updates.
> @@ -285,6 +366,7 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp)
>  		 */
>  		smp_wmb();
>  		rcp->cur++;
> +		record_gp_stall_check_time(rcp);
>  
>  		/*
>  		 * Accessing nohz_cpu_mask before incrementing rcp->cur needs a
> @@ -468,6 +550,9 @@ static void rcu_process_callbacks(struct softirq_action *unused)
>  
>  static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
>  {
> +	/* Check for CPU stalls, if enabled. */
> +	check_cpu_stall(rcp);
> +
>  	/* This cpu has pending rcu entries and the grace period
>  	 * for them has completed.
>  	 */
> @@ -558,6 +643,9 @@ void rcu_check_callbacks(int cpu, int user)
>  static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
>  						struct rcu_data *rdp)
>  {
> +#ifdef CONFIG_DEBUG_RCU_STALL
> +	printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n");
> +#endif /* #ifdef CONFIG_DEBUG_RCU_STALL */
>  	memset(rdp, 0, sizeof(*rdp));
>  	rdp->curtail = &rdp->curlist;
>  	rdp->nxttail = &rdp->nxtlist;
> diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
> index 0b50481..9fee969 100644
> --- a/lib/Kconfig.debug
> +++ b/lib/Kconfig.debug
> @@ -597,6 +597,19 @@ config RCU_TORTURE_TEST_RUNNABLE
>  	  Say N here if you want the RCU torture tests to start only
>  	  after being manually enabled via /proc.
>  
> +config RCU_CPU_STALL_DETECTOR
> +	bool "Check for stalled CPUs delaying RCU grace periods"
> +	depends on CLASSIC_RCU
> +	default n
> +	help
> +	  This option causes RCU to printk information on which
> +	  CPUs are delaying the current grace period, but only when
> +	  the grace period extends for excessive time periods.
> +
> +	  Say Y if you want RCU to perform such checks.
> +
> +	  Say N if you are unsure.
> +
>  config KPROBES_SANITY_TEST
>  	bool "Kprobes sanity tests"
>  	depends on DEBUG_KERNEL
> 
> 
> 



^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH,RFC] RCU-based detection of stalled CPUs for Classic RCU
  2008-10-03 10:12       ` Lai Jiangshan
@ 2008-10-03 15:12         ` Paul E. McKenney
  0 siblings, 0 replies; 17+ messages in thread
From: Paul E. McKenney @ 2008-10-03 15:12 UTC (permalink / raw)
  To: Lai Jiangshan
  Cc: Ingo Molnar, linux-kernel, akpm, mathieu.desnoyers, manfred,
	dipankar, niv, dvhltc, josht

On Fri, Oct 03, 2008 at 06:12:28PM +0800, Lai Jiangshan wrote:
> I found a magic number in it.

Good catch, Jiangshan!!!  See below for responses, and please see the
end of this email for an untested patch.

> Paul E. McKenney wrote:
> > Hello again!
> > 
> > This patch adds stalled-CPU detection to Classic RCU.  This capability 
> > is enabled by a new config variable CONFIG_RCU_CPU_STALL_DETECTOR, 
> > which defaults disabled.  This is a debugging feature, not something 
> > that non-kernel-hackers would be expected to care about.  This feature 
> > can detect looping CPUs in !PREEMPT builds and looping CPUs with 
> > preemption disabled in PREEMPT builds.  This is essentially a port of 
> > this functionality from the treercu patch.
> > 
> > This version uses jiffies rather than get_seconds(), which eliminates
> > the spurious boot-time CPU stall warnings seen on some systems with
> > the previous patch.
> > 
> > This is still against v2.6.27-rc8 -- I will do a version against tip
> > this evening (Pacific Time) when I get back to the combination of better
> > bandwidth and AC power.
> > 
> > Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> > ---
> > 
> >  include/linux/rcuclassic.h |    9 ++++
> >  kernel/rcuclassic.c        |   88 +++++++++++++++++++++++++++++++++++++++++++++
> >  lib/Kconfig.debug          |   13 ++++++
> >  3 files changed, 110 insertions(+)
> > 
> > diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h
> > index 4ab8436..cab055b 100644
> > --- a/include/linux/rcuclassic.h
> > +++ b/include/linux/rcuclassic.h
> > @@ -40,6 +40,10 @@
> >  #include <linux/cpumask.h>
> >  #include <linux/seqlock.h>
> >  
> > +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
> > +#define RCU_SECONDS_TILL_STALL_CHECK	3 * HZ	/* for rcp->jiffies_stall */
> > +#define RCU_SECONDS_TILL_STALL_RECHECK	30 * HZ	/* for rcp->jiffies_stall */
> > +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
> >  
> >  /* Global control variables for rcupdate callback mechanism. */
> >  struct rcu_ctrlblk {
> > @@ -52,6 +56,11 @@ struct rcu_ctrlblk {
> >  	spinlock_t	lock	____cacheline_internodealigned_in_smp;
> >  	cpumask_t	cpumask; /* CPUs that need to switch in order    */
> >  				 /* for current batch to proceed.        */
> > +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
> > +	unsigned long gp_start;	 /* Time at which GP started in jiffies. */
> > +	unsigned long jiffies_stall;
> > +				 /* Time at which to check for CPU stalls. */
> > +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
> >  } ____cacheline_internodealigned_in_smp;
> >  
> >  /* Is batch a before batch b ? */
> > diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
> > index aad93cd..a299876 100644
> > --- a/kernel/rcuclassic.c
> > +++ b/kernel/rcuclassic.c
> > @@ -118,6 +118,87 @@ static inline void force_quiescent_state(struct rcu_data *rdp,
> >  }
> >  #endif
> >  
> > +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
> > +
> > +static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
> > +{
> > +	rcp->gp_start = jiffies;
> > +	rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_CHECK;
> > +}
> > +
> > +static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
> > +{
> > +	int cpu;
> > +	long delta;
> > +	unsigned long flags;
> > +
> > +	/* Only let one CPU complain about others per time interval. */
> > +
> > +	spin_lock_irqsave(&rcp->lock, flags);
> > +	delta = jiffies - rcp->jiffies_stall;
> > +	if (delta < 2 || rcp->cur != rcp->completed) {
> 
> Is it (2 * HZ)?
> should it be defined as macro?

It should be "2", though a macro might be good.  The reason for "2" is
that it guarantees that the other CPU had a full period to respond, so
it would be "2" regardless of the time units.

> > +		spin_unlock_irqrestore(&rcp->lock, flags);
> > +		return;
> > +	}
> > +	rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
> > +	spin_unlock_irqrestore(&rcp->lock, flags);
> > +
> > +	/* OK, time to rat on our buddy... */
> > +
> > +	printk(KERN_ERR "RCU detected CPU stalls:");
> > +	for_each_possible_cpu(cpu) {
> > +		if (cpu_isset(cpu, rcp->cpumask))
> > +			printk(" %d", cpu);
> > +	}
> > +	printk(" (detected by %d, t=%ld jiffies)\n",
> > +	       smp_processor_id(), (long)(jiffies - rcp->gp_start));
> > +}
> > +
> > +static void print_cpu_stall(struct rcu_ctrlblk *rcp)
> > +{
> > +	unsigned long flags;
> > +
> > +	printk(KERN_ERR "RCU detected CPU %d stall (t=%lu/%lu jiffies)\n",
> > +			smp_processor_id(), jiffies,
> > +			jiffies - rcp->gp_start);
> > +	dump_stack();
> > +	spin_lock_irqsave(&rcp->lock, flags);
> > +	if ((long)(jiffies - rcp->jiffies_stall) >= 0)
> > +		rcp->jiffies_stall =
> > +			jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
> > +	spin_unlock_irqrestore(&rcp->lock, flags);
> > +	set_need_resched();  /* kick ourselves to get things going. */
> > +}
> > +
> > +static void check_cpu_stall(struct rcu_ctrlblk *rcp)
> > +{
> > +	long delta;
> > +
> > +	delta = jiffies - rcp->jiffies_stall;
> > +	if (cpu_isset(smp_processor_id(), rcp->cpumask) && delta >= 0) {
> > +		
> > +		/* We haven't checked in, so go dump stack. */
> > +		print_cpu_stall(rcp);
> > +
> > +	} else if (rcp->cur != rcp->completed && delta >= 2) {
> > +
> > +		/* They had two seconds to dump stack, so complain. */
> 
> appear here again! and it's inconsistent with comment.

Indeed!  The comment is wrong.

> > +		print_other_cpu_stall(rcp);
> > +	}
> > +}
> > +
> > +#else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
> > +
> > +static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
> > +{
> > +}
> > +
> > +static void check_cpu_stall(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
> > +{
> > +}
> > +
> > +#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
> > +
> >  /**
> >   * call_rcu - Queue an RCU callback for invocation after a grace period.
> >   * @head: structure to be used for queueing the RCU updates.
> > @@ -285,6 +366,7 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp)
> >  		 */
> >  		smp_wmb();
> >  		rcp->cur++;
> > +		record_gp_stall_check_time(rcp);
> >  
> >  		/*
> >  		 * Accessing nohz_cpu_mask before incrementing rcp->cur needs a
> > @@ -468,6 +550,9 @@ static void rcu_process_callbacks(struct softirq_action *unused)
> >  
> >  static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
> >  {
> > +	/* Check for CPU stalls, if enabled. */
> > +	check_cpu_stall(rcp);
> > +
> >  	/* This cpu has pending rcu entries and the grace period
> >  	 * for them has completed.
> >  	 */
> > @@ -558,6 +643,9 @@ void rcu_check_callbacks(int cpu, int user)
> >  static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
> >  						struct rcu_data *rdp)
> >  {
> > +#ifdef CONFIG_DEBUG_RCU_STALL
> > +	printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n");
> > +#endif /* #ifdef CONFIG_DEBUG_RCU_STALL */
> >  	memset(rdp, 0, sizeof(*rdp));
> >  	rdp->curtail = &rdp->curlist;
> >  	rdp->nxttail = &rdp->nxtlist;
> > diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
> > index 0b50481..9fee969 100644
> > --- a/lib/Kconfig.debug
> > +++ b/lib/Kconfig.debug
> > @@ -597,6 +597,19 @@ config RCU_TORTURE_TEST_RUNNABLE
> >  	  Say N here if you want the RCU torture tests to start only
> >  	  after being manually enabled via /proc.
> >  
> > +config RCU_CPU_STALL_DETECTOR
> > +	bool "Check for stalled CPUs delaying RCU grace periods"
> > +	depends on CLASSIC_RCU
> > +	default n
> > +	help
> > +	  This option causes RCU to printk information on which
> > +	  CPUs are delaying the current grace period, but only when
> > +	  the grace period extends for excessive time periods.
> > +
> > +	  Say Y if you want RCU to perform such checks.
> > +
> > +	  Say N if you are unsure.
> > +
> >  config KPROBES_SANITY_TEST
> >  	bool "Kprobes sanity tests"
> >  	depends on DEBUG_KERNEL

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---

 include/linux/rcuclassic.h |    4 ++++
 kernel/rcuclassic.c        |    6 +++---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h
index 8388647..69e1929 100644
--- a/include/linux/rcuclassic.h
+++ b/include/linux/rcuclassic.h
@@ -43,6 +43,10 @@
 #ifdef CONFIG_RCU_CPU_STALL_DETECTOR
 #define RCU_SECONDS_TILL_STALL_CHECK	(3 * HZ)  /* for rcp->jiffies_stall */
 #define RCU_SECONDS_TILL_STALL_RECHECK	(30 * HZ) /* for rcp->jiffies_stall */
+#define RCU_STALL_RAT_DELAY		2	  /* Allow other CPUs time */
+						  /*  to take at least one */
+						  /*  scheduling clock irq */
+						  /*  before ratting on them. */
 #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
 
 /* Global control variables for rcupdate callback mechanism. */
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
index 0ce0802..8c2d15d 100644
--- a/kernel/rcuclassic.c
+++ b/kernel/rcuclassic.c
@@ -136,7 +136,7 @@ static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
 
 	spin_lock_irqsave(&rcp->lock, flags);
 	delta = jiffies - rcp->seconds_stall;
-	if (delta < 2 || rcp->cur != rcp->completed) {
+	if (delta < RCU_STALL_RAT_DELAY || rcp->cur != rcp->completed) {
 		spin_unlock_irqrestore(&rcp->lock, flags);
 		return;
 	}
@@ -180,9 +180,9 @@ static void check_cpu_stall(struct rcu_ctrlblk *rcp)
 		/* We haven't checked in, so go dump stack. */
 		print_cpu_stall(rcp);
 
-	} else if (rcp->cur != rcp->completed && delta >= 2) {
+	} else if (rcp->cur != rcp->completed && delta >= RCU_STALL_RAT_DELAY) {
 
-		/* They had two seconds to dump stack, so complain. */
+		/* They had two time units to dump stack, so complain. */
 		print_other_cpu_stall(rcp);
 	}
 }

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH tip/master] RCU-based detection of stalled CPUs for Classic RCU
  2008-10-03  8:39               ` Ingo Molnar
  2008-10-03  8:42                 ` [PATCH] rcu: RCU-based detection of stalled CPUs for Classic RCU, fix Ingo Molnar
@ 2008-10-03 15:13                 ` Paul E. McKenney
  1 sibling, 0 replies; 17+ messages in thread
From: Paul E. McKenney @ 2008-10-03 15:13 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: linux-kernel, mathieu.desnoyers, akpm, laijs, manfred, dipankar,
	niv, dvhltc, josht, ltt-dev

On Fri, Oct 03, 2008 at 10:39:47AM +0200, Ingo Molnar wrote:
> 
> * Paul E. McKenney <paulmck@linux.vnet.ibm.com> wrote:
> 
> > Hello!
> > 
> > This patch adds stalled-CPU detection to Classic RCU.  This capability 
> > is enabled by a new config variable CONFIG_RCU_CPU_STALL_DETECTOR, 
> > which defaults disabled.  This is a debugging feature to detect 
> > infinite loops in kernel code, not something that non-kernel-hackers 
> > would be expected to care about.  This feature can detect looping CPUs 
> > in !PREEMPT builds and looping CPUs with preemption disabled in 
> > PREEMPT builds.  This is essentially a port of this functionality from 
> > the treercu patch, replacing the stall debug patch that is already in 
> > tip/core/rcu (commit 67182ae1c4).
> > 
> > The changes from the patch in tip/core/rcu include making the config 
> > variable name match that in treercu, changing from seconds to jiffies 
> > to avoid spurious warnings, and printing a boot message when this 
> > feature is enabled.
> > 
> > Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> 
> applied to tip/core/rcu, thanks Paul!
> 
> i tidied up two tiny details noticed by scripts/checkpatch.pl:
> 
> > +#define RCU_SECONDS_TILL_STALL_CHECK	3 * HZ	/* for rcp->jiffies_stall */
> > +#define RCU_SECONDS_TILL_STALL_RECHECK	30 * HZ	/* for rcp->jiffies_stall */
> 
> adding paranthesis around the expression.
> 
> > +	if (cpu_isset(smp_processor_id(), rcp->cpumask) && delta >= 0) {
> > +		
> 
> and have removed the two tailing tabs from this line.

<red face>  checkpatch.pl...  I have heard of it!  :-)

							Thanx, Paul

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] rcu: RCU-based detection of stalled CPUs for Classic RCU, fix
  2008-10-03  8:42                 ` [PATCH] rcu: RCU-based detection of stalled CPUs for Classic RCU, fix Ingo Molnar
@ 2008-10-03 15:14                   ` Paul E. McKenney
  0 siblings, 0 replies; 17+ messages in thread
From: Paul E. McKenney @ 2008-10-03 15:14 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: linux-kernel, mathieu.desnoyers, akpm, laijs, manfred, dipankar,
	niv, dvhltc, josht, ltt-dev

On Fri, Oct 03, 2008 at 10:42:33AM +0200, Ingo Molnar wrote:
> 
> -tip testing found a minor build bug - find the fix below.
> 
> 	Ingo
> 
> -------------------->
> >From 2ec2b482b10a1ed3493c224f1893cddd3d33833b Mon Sep 17 00:00:00 2001
> From: Ingo Molnar <mingo@elte.hu>
> Date: Fri, 3 Oct 2008 10:41:00 +0200
> Subject: [PATCH] rcu: RCU-based detection of stalled CPUs for Classic RCU, fix
> 
> fix the !CONFIG_RCU_CPU_STALL_DETECTOR path:
> 
>  kernel/rcuclassic.c: In function '__rcu_pending':
>  kernel/rcuclassic.c:609: error: too few arguments to function 'check_cpu_stall'
> 
> Signed-off-by: Ingo Molnar <mingo@elte.hu>
> ---
>  kernel/rcuclassic.c |    2 +-
>  1 files changed, 1 insertions(+), 1 deletions(-)
> 
> diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
> index 0d07e6e..37f72e5 100644
> --- a/kernel/rcuclassic.c
> +++ b/kernel/rcuclassic.c
> @@ -239,7 +239,7 @@ static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
>  {
>  }
> 
> -static void check_cpu_stall(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
> +static inline void check_cpu_stall(struct rcu_ctrlblk *rcp)
>  {
>  }

I should have learned by now to test with all relevant config parameter
combinations...  My apologies for the hassle!!!

							Thanx, Paul

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2008-10-03 15:14 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-10-02  0:36 [PATCH,RFC] RCU-based detection of stalled CPUs for Classic RCU Paul E. McKenney
2008-10-02  8:07 ` Ingo Molnar
2008-10-02 14:05   ` Paul E. McKenney
2008-10-02 15:10     ` Mathieu Desnoyers
2008-10-02 17:43       ` Paul E. McKenney
2008-10-02 18:51         ` [PATCH,RFC] RCU-based detection of stalled CPUs for Classic RCU (LTTng support) Mathieu Desnoyers
2008-10-02 22:55           ` Paul E. McKenney
2008-10-02 23:06             ` [PATCH tip/master] RCU-based detection of stalled CPUs for Classic RCU Paul E. McKenney
2008-10-03  4:41               ` Mathieu Desnoyers
2008-10-03  5:02                 ` Paul E. McKenney
2008-10-03  8:39               ` Ingo Molnar
2008-10-03  8:42                 ` [PATCH] rcu: RCU-based detection of stalled CPUs for Classic RCU, fix Ingo Molnar
2008-10-03 15:14                   ` Paul E. McKenney
2008-10-03 15:13                 ` [PATCH tip/master] RCU-based detection of stalled CPUs for Classic RCU Paul E. McKenney
2008-10-02 17:39     ` [PATCH,RFC] " Paul E. McKenney
2008-10-03 10:12       ` Lai Jiangshan
2008-10-03 15:12         ` Paul E. McKenney

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox