From: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
To: Peter Hurley <peter@hurleysoftware.com>
Cc: Byungchul Park <byungchul.park@lge.com>,
akpm@linux-foundation.org, mingo@kernel.org,
linux-kernel@vger.kernel.org, akinobu.mita@gmail.com,
jack@suse.cz, torvalds@linux-foundation.org,
Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>,
Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Subject: Re: [PATCH 3/3] spinlock_debug: panic on recursive lock spin_dump()
Date: Tue, 2 Feb 2016 16:59:40 +0900 [thread overview]
Message-ID: <20160202075940.GA499@swordfish> (raw)
In-Reply-To: <20160201161458.GA609@swordfish>
On (02/02/16 01:14), Sergey Senozhatsky wrote:
> how about splitting ->owner_cpu 4 bytes as:
>
> | |
> 1 byte spin bug recursion | 1 byte spin_dump recursion counter | 2 bytes owner cpu
> | |
after some thinking... no, this will not do the trick. one byte is not
enough for recursion counter -- we can have 8K CPUs on the system and
8K-1 cpus can "suspect a lockup". so, a slightly different approach:
1) split ->owner_cpu 4 bytes in struct raw_spinlock
unsigned short owner_cpu;
unsigned short recursion;
I still can use only ->owner_cpu, but it's much easier when they are
apart. with a single 4 byte variable for recursion and cpu owner we
need to take extra care of higher 2 bytes every time we touch the
->owner_cpu
CPU1 CPU2
spin_dump
->owner_cpu[recursion_bits] += 1 spin_unlock
->owner_cpu = -1
^^^ need to store cpu_id in
lower 2 bytes, avoiding
overwrite of 2 higher bytes, etc.
->owner_cpu[recursion_bits] -= 1
which is fragile and ugly.
2) ->recursion has most significant bit for spin_bug() bit, the
remaining bits are for recursion counter.
spin_bug() does
set SPIN_BUG bit (most significant bit)
spin_dump
clear SPIN_BUG bit
spin_dump() does
read SPIN_BUG bit
inc ->recursion
do_checks
printk...
dec ->recursion
and the do_checks is:
-- "if the SPIN_BUG bit is set AND recursion counter > NR_CPUS"
then we have a spin_bug() recursion on at least one of the CPUs
and we need to panic the system
printk
spin_lock
spin_bug
spin_dump
printk
spin_lock
spin_bug
spin_dump
...
-- "if the SPIN_BUG bit is clear AND recursion counter >= SHRT_MAX/2"
then we have spin_dump() recursion (16K calls.. can be bigger) and
we need to panic the system. if recursion counter < SHRT_MAX/2 - keep
going. "suspected soft lockup" potentially can be resolved (the lock
owner unlocks the lock), so we need to have a big enough limit before
we declare panic().
printk
spin_lock
spin_dump
printk
spin_lock
spin_dump
...
I guess I'll I'll start a new thread with the next submission, to
refresh it.
RFC, any opinions are appreciated.
not yet tested code.
---
include/linux/spinlock_types.h | 4 +++-
kernel/locking/spinlock_debug.c | 40 +++++++++++++++++++++++++++++++++++++---
2 files changed, 40 insertions(+), 4 deletions(-)
diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h
index 73548eb..c8f6b56 100644
--- a/include/linux/spinlock_types.h
+++ b/include/linux/spinlock_types.h
@@ -23,7 +23,9 @@ typedef struct raw_spinlock {
unsigned int break_lock;
#endif
#ifdef CONFIG_DEBUG_SPINLOCK
- unsigned int magic, owner_cpu;
+ unsigned int magic;
+ unsigned short owner_cpu;
+ unsigned short recursion;
void *owner;
#endif
#ifdef CONFIG_DEBUG_LOCK_ALLOC
diff --git a/kernel/locking/spinlock_debug.c b/kernel/locking/spinlock_debug.c
index 0374a59..f838fe9 100644
--- a/kernel/locking/spinlock_debug.c
+++ b/kernel/locking/spinlock_debug.c
@@ -13,6 +13,8 @@
#include <linux/delay.h>
#include <linux/export.h>
+#define SPIN_BUG_RECURSION (1 << 15)
+
void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name,
struct lock_class_key *key)
{
@@ -26,7 +28,8 @@ void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name,
lock->raw_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
lock->magic = SPINLOCK_MAGIC;
lock->owner = SPINLOCK_OWNER_INIT;
- lock->owner_cpu = -1;
+ lock->owner_cpu = USHRT_MAX;
+ lock->recursion = 0;
}
EXPORT_SYMBOL(__raw_spin_lock_init);
@@ -49,9 +52,31 @@ void __rwlock_init(rwlock_t *lock, const char *name,
EXPORT_SYMBOL(__rwlock_init);
+static void spin_recursion_panic(raw_spinlock_t *lock, const char *msg)
+{
+ panic("lock: %pS %s recursion on CPU#%d, %s/%d\n",
+ lock, msg, raw_smp_processor_id(),
+ current->comm, task_pid_nr(current));
+}
+
static void spin_dump(raw_spinlock_t *lock, const char *msg)
{
struct task_struct *owner = NULL;
+ unsigned short dump_counter;
+ bool spin_bug;
+
+ spin_bug = lock->recursion & SPIN_BUG_RECURSION;
+ dump_counter = lock->recursion & SHRT_MAX;
+ smp_rmb();
+
+ smp_wmb();
+ lock->recursion += 1;
+ dump_counter++;
+
+ if (spin_bug && dump_counter > NR_CPUS) /* num_online_cpus() */
+ spin_recursion_panic(lock, "spin_bug()");
+ if (dump_counter >= (SHRT_MAX >> 1))
+ spin_recursion_panic(lock, "spin_dump()");
if (lock->owner && lock->owner != SPINLOCK_OWNER_INIT)
owner = lock->owner;
@@ -63,8 +88,11 @@ static void spin_dump(raw_spinlock_t *lock, const char *msg)
lock, lock->magic,
owner ? owner->comm : "<none>",
owner ? task_pid_nr(owner) : -1,
- lock->owner_cpu);
+ lock->owner_cpu == USHRT_MAX ? -1 : lock->owner_cpu);
dump_stack();
+
+ smp_wmb();
+ lock->recursion -= 1;
}
static void spin_bug(raw_spinlock_t *lock, const char *msg)
@@ -72,7 +100,13 @@ static void spin_bug(raw_spinlock_t *lock, const char *msg)
if (!debug_locks_off())
return;
+ smp_wmb();
+ lock->recursion |= SPIN_BUG_RECURSION;
+
spin_dump(lock, msg);
+
+ smp_wmb();
+ lock->recursion &= ~SPIN_BUG_RECURSION;
}
#define SPIN_BUG_ON(cond, lock, msg) if (unlikely(cond)) spin_bug(lock, msg)
@@ -100,7 +134,7 @@ static inline void debug_spin_unlock(raw_spinlock_t *lock)
SPIN_BUG_ON(lock->owner_cpu != raw_smp_processor_id(),
lock, "wrong CPU");
lock->owner = SPINLOCK_OWNER_INIT;
- lock->owner_cpu = -1;
+ lock->owner_cpu = USHRT_MAX;
}
static void __spin_lock_debug(raw_spinlock_t *lock)
--
2.7.0
next prev parent reply other threads:[~2016-02-02 7:58 UTC|newest]
Thread overview: 35+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-01-27 12:01 [PATCH v4] lib/spinlock_debug.c: prevent a recursive cycle in the debug code Byungchul Park
2016-01-27 22:49 ` Peter Hurley
2016-01-28 7:15 ` Byungchul Park
2016-01-29 8:19 ` Byungchul Park
2016-01-28 1:42 ` Byungchul Park
2016-01-28 2:37 ` Sergey Senozhatsky
2016-01-28 4:36 ` byungchul.park
2016-01-28 6:05 ` Sergey Senozhatsky
2016-01-28 8:13 ` Byungchul Park
2016-01-28 10:41 ` Sergey Senozhatsky
2016-01-28 10:53 ` Sergey Senozhatsky
2016-01-28 15:42 ` Sergey Senozhatsky
2016-01-28 23:08 ` Peter Hurley
2016-01-28 23:54 ` Byungchul Park
2016-01-29 0:54 ` Sergey Senozhatsky
2016-01-29 3:00 ` Byungchul Park
2016-01-29 4:05 ` Sergey Senozhatsky
2016-01-29 12:15 ` Byungchul Park
2016-01-29 0:27 ` Sergey Senozhatsky
2016-01-29 4:32 ` Peter Hurley
2016-01-29 5:28 ` Sergey Senozhatsky
2016-01-29 5:48 ` Peter Hurley
2016-01-29 6:16 ` Sergey Senozhatsky
2016-01-29 6:37 ` Sergey Senozhatsky
2016-01-31 12:30 ` Sergey Senozhatsky
2016-01-31 12:33 ` [PATCH 1/3] printk: introduce console_reset_on_panic() function Sergey Senozhatsky
2016-01-31 12:33 ` [PATCH 2/3] printk: introduce reset_console_drivers() Sergey Senozhatsky
2016-01-31 12:47 ` kbuild test robot
2016-01-31 12:33 ` [PATCH 3/3] spinlock_debug: panic on recursive lock spin_dump() Sergey Senozhatsky
2016-02-01 16:14 ` Sergey Senozhatsky
2016-02-02 7:59 ` Sergey Senozhatsky [this message]
2016-01-31 12:42 ` [PATCH 1/3] printk: introduce console_reset_on_panic() function kbuild test robot
2016-01-29 6:54 ` [PATCH v4] lib/spinlock_debug.c: prevent a recursive cycle in the debug code Byungchul Park
2016-01-29 7:13 ` Sergey Senozhatsky
2016-01-29 8:13 ` Byungchul Park
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20160202075940.GA499@swordfish \
--to=sergey.senozhatsky.work@gmail.com \
--cc=akinobu.mita@gmail.com \
--cc=akpm@linux-foundation.org \
--cc=byungchul.park@lge.com \
--cc=jack@suse.cz \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@kernel.org \
--cc=peter@hurleysoftware.com \
--cc=sergey.senozhatsky@gmail.com \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.