From: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
To: Peter Hurley <peter@hurleysoftware.com>
Cc: Byungchul Park <byungchul.park@lge.com>,
akpm@linux-foundation.org, mingo@kernel.org,
linux-kernel@vger.kernel.org, akinobu.mita@gmail.com,
jack@suse.cz, torvalds@linux-foundation.org,
Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>,
Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Subject: Re: [PATCH 3/3] spinlock_debug: panic on recursive lock spin_dump()
Date: Tue, 2 Feb 2016 16:59:40 +0900 [thread overview]
Message-ID: <20160202075940.GA499@swordfish> (raw)
In-Reply-To: <20160201161458.GA609@swordfish>
On (02/02/16 01:14), Sergey Senozhatsky wrote:
> how about splitting ->owner_cpu 4 bytes as:
>
> | |
> 1 byte spin bug recursion | 1 byte spin_dump recursion counter | 2 bytes owner cpu
> | |
after some thinking... no, this will not do the trick. one byte is not
enough for recursion counter -- we can have 8K CPUs on the system and
8K-1 cpus can "suspect a lockup". so, a slightly different approach:
1) split ->owner_cpu 4 bytes in struct raw_spinlock
unsigned short owner_cpu;
unsigned short recursion;
I still can use only ->owner_cpu, but it's much easier when they are
apart. with a single 4 byte variable for recursion and cpu owner we
need to take extra care of higher 2 bytes every time we touch the
->owner_cpu
CPU1 CPU2
spin_dump
->owner_cpu[recursion_bits] += 1 spin_unlock
->owner_cpu = -1
^^^ need to store cpu_id in
lower 2 bytes, avoiding
overwrite of 2 higher bytes, etc.
->owner_cpu[recursion_bits] -= 1
which is fragile and ugly.
2) ->recursion has most significant bit for spin_bug() bit, the
remaining bits are for recursion counter.
spin_bug() does
set SPIN_BUG bit (most significant bit)
spin_dump
clear SPIN_BUG bit
spin_dump() does
read SPIN_BUG bit
inc ->recursion
do_checks
printk...
dec ->recursion
and the do_checks is:
-- "if the SPIN_BUG bit is set AND recursion counter > NR_CPUS"
then we have a spin_bug() recursion on at least one of the CPUs
and we need to panic the system
printk
spin_lock
spin_bug
spin_dump
printk
spin_lock
spin_bug
spin_dump
...
-- "if the SPIN_BUG bit is clear AND recursion counter >= SHRT_MAX/2"
then we have spin_dump() recursion (16K calls.. can be bigger) and
we need to panic the system. if recursion counter < SHRT_MAX/2 - keep
going. "suspected soft lockup" potentially can be resolved (the lock
owner unlocks the lock), so we need to have a big enough limit before
we declare panic().
printk
spin_lock
spin_dump
printk
spin_lock
spin_dump
...
I guess I'll I'll start a new thread with the next submission, to
refresh it.
RFC, any opinions are appreciated.
not yet tested code.
---
include/linux/spinlock_types.h | 4 +++-
kernel/locking/spinlock_debug.c | 40 +++++++++++++++++++++++++++++++++++++---
2 files changed, 40 insertions(+), 4 deletions(-)
diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h
index 73548eb..c8f6b56 100644
--- a/include/linux/spinlock_types.h
+++ b/include/linux/spinlock_types.h
@@ -23,7 +23,9 @@ typedef struct raw_spinlock {
unsigned int break_lock;
#endif
#ifdef CONFIG_DEBUG_SPINLOCK
- unsigned int magic, owner_cpu;
+ unsigned int magic;
+ unsigned short owner_cpu;
+ unsigned short recursion;
void *owner;
#endif
#ifdef CONFIG_DEBUG_LOCK_ALLOC
diff --git a/kernel/locking/spinlock_debug.c b/kernel/locking/spinlock_debug.c
index 0374a59..f838fe9 100644
--- a/kernel/locking/spinlock_debug.c
+++ b/kernel/locking/spinlock_debug.c
@@ -13,6 +13,8 @@
#include <linux/delay.h>
#include <linux/export.h>
+#define SPIN_BUG_RECURSION (1 << 15)
+
void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name,
struct lock_class_key *key)
{
@@ -26,7 +28,8 @@ void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name,
lock->raw_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
lock->magic = SPINLOCK_MAGIC;
lock->owner = SPINLOCK_OWNER_INIT;
- lock->owner_cpu = -1;
+ lock->owner_cpu = USHRT_MAX;
+ lock->recursion = 0;
}
EXPORT_SYMBOL(__raw_spin_lock_init);
@@ -49,9 +52,31 @@ void __rwlock_init(rwlock_t *lock, const char *name,
EXPORT_SYMBOL(__rwlock_init);
+static void spin_recursion_panic(raw_spinlock_t *lock, const char *msg)
+{
+ panic("lock: %pS %s recursion on CPU#%d, %s/%d\n",
+ lock, msg, raw_smp_processor_id(),
+ current->comm, task_pid_nr(current));
+}
+
static void spin_dump(raw_spinlock_t *lock, const char *msg)
{
struct task_struct *owner = NULL;
+ unsigned short dump_counter;
+ bool spin_bug;
+
+ spin_bug = lock->recursion & SPIN_BUG_RECURSION;
+ dump_counter = lock->recursion & SHRT_MAX;
+ smp_rmb();
+
+ smp_wmb();
+ lock->recursion += 1;
+ dump_counter++;
+
+ if (spin_bug && dump_counter > NR_CPUS) /* num_online_cpus() */
+ spin_recursion_panic(lock, "spin_bug()");
+ if (dump_counter >= (SHRT_MAX >> 1))
+ spin_recursion_panic(lock, "spin_dump()");
if (lock->owner && lock->owner != SPINLOCK_OWNER_INIT)
owner = lock->owner;
@@ -63,8 +88,11 @@ static void spin_dump(raw_spinlock_t *lock, const char *msg)
lock, lock->magic,
owner ? owner->comm : "<none>",
owner ? task_pid_nr(owner) : -1,
- lock->owner_cpu);
+ lock->owner_cpu == USHRT_MAX ? -1 : lock->owner_cpu);
dump_stack();
+
+ smp_wmb();
+ lock->recursion -= 1;
}
static void spin_bug(raw_spinlock_t *lock, const char *msg)
@@ -72,7 +100,13 @@ static void spin_bug(raw_spinlock_t *lock, const char *msg)
if (!debug_locks_off())
return;
+ smp_wmb();
+ lock->recursion |= SPIN_BUG_RECURSION;
+
spin_dump(lock, msg);
+
+ smp_wmb();
+ lock->recursion &= ~SPIN_BUG_RECURSION;
}
#define SPIN_BUG_ON(cond, lock, msg) if (unlikely(cond)) spin_bug(lock, msg)
@@ -100,7 +134,7 @@ static inline void debug_spin_unlock(raw_spinlock_t *lock)
SPIN_BUG_ON(lock->owner_cpu != raw_smp_processor_id(),
lock, "wrong CPU");
lock->owner = SPINLOCK_OWNER_INIT;
- lock->owner_cpu = -1;
+ lock->owner_cpu = USHRT_MAX;
}
static void __spin_lock_debug(raw_spinlock_t *lock)
--
2.7.0
next prev parent reply other threads:[~2016-02-02 7:58 UTC|newest]
Thread overview: 35+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-01-27 12:01 [PATCH v4] lib/spinlock_debug.c: prevent a recursive cycle in the debug code Byungchul Park
2016-01-27 22:49 ` Peter Hurley
2016-01-28 7:15 ` Byungchul Park
2016-01-29 8:19 ` Byungchul Park
2016-01-28 1:42 ` Byungchul Park
2016-01-28 2:37 ` Sergey Senozhatsky
2016-01-28 4:36 ` byungchul.park
2016-01-28 6:05 ` Sergey Senozhatsky
2016-01-28 8:13 ` Byungchul Park
2016-01-28 10:41 ` Sergey Senozhatsky
2016-01-28 10:53 ` Sergey Senozhatsky
2016-01-28 15:42 ` Sergey Senozhatsky
2016-01-28 23:08 ` Peter Hurley
2016-01-28 23:54 ` Byungchul Park
2016-01-29 0:54 ` Sergey Senozhatsky
2016-01-29 3:00 ` Byungchul Park
2016-01-29 4:05 ` Sergey Senozhatsky
2016-01-29 12:15 ` Byungchul Park
2016-01-29 0:27 ` Sergey Senozhatsky
2016-01-29 4:32 ` Peter Hurley
2016-01-29 5:28 ` Sergey Senozhatsky
2016-01-29 5:48 ` Peter Hurley
2016-01-29 6:16 ` Sergey Senozhatsky
2016-01-29 6:37 ` Sergey Senozhatsky
2016-01-31 12:30 ` Sergey Senozhatsky
2016-01-31 12:33 ` [PATCH 1/3] printk: introduce console_reset_on_panic() function Sergey Senozhatsky
2016-01-31 12:33 ` [PATCH 2/3] printk: introduce reset_console_drivers() Sergey Senozhatsky
2016-01-31 12:47 ` kbuild test robot
2016-01-31 12:33 ` [PATCH 3/3] spinlock_debug: panic on recursive lock spin_dump() Sergey Senozhatsky
2016-02-01 16:14 ` Sergey Senozhatsky
2016-02-02 7:59 ` Sergey Senozhatsky [this message]
2016-01-31 12:42 ` [PATCH 1/3] printk: introduce console_reset_on_panic() function kbuild test robot
2016-01-29 6:54 ` [PATCH v4] lib/spinlock_debug.c: prevent a recursive cycle in the debug code Byungchul Park
2016-01-29 7:13 ` Sergey Senozhatsky
2016-01-29 8:13 ` Byungchul Park
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20160202075940.GA499@swordfish \
--to=sergey.senozhatsky.work@gmail.com \
--cc=akinobu.mita@gmail.com \
--cc=akpm@linux-foundation.org \
--cc=byungchul.park@lge.com \
--cc=jack@suse.cz \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@kernel.org \
--cc=peter@hurleysoftware.com \
--cc=sergey.senozhatsky@gmail.com \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).