public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Dave Jones <davej@redhat.com>, Al Viro <viro@zeniv.linux.org.uk>,
	Linux Kernel <linux-kernel@vger.kernel.org>,
	mingo@kernel.org
Subject: Re: processes hung after sys_renameat, and 'missing' processes
Date: Mon, 04 Jun 2012 12:49:48 +0200	[thread overview]
Message-ID: <1338806988.28282.17.camel@twins> (raw)
In-Reply-To: <1338802157.28282.7.camel@twins>

On Mon, 2012-06-04 at 11:29 +0200, Peter Zijlstra wrote:
> On Sun, 2012-06-03 at 17:16 -0700, Linus Torvalds wrote:
> > On Sun, Jun 3, 2012 at 5:00 PM, Dave Jones <davej@redhat.com> wrote:
> > >
> > > sysrq-d: http://fpaste.org/ow9O/
> > 
> > Ugh. I'm adding PeterZ to the cc, just to see if he can make more sense of it.
> > 
> > Peter, is there no way to make the lock thing print not just the lock
> > class name, but also the pointer to the actual *instance* of the lock
> > held?
> 
> Sorta, we have a pointer to the struct lockdep_map inside whatever lock
> type. But we don't have the lock type so we cannot actually provide the
> pointer to the spinlock_t struct mutex etc..
> 
> > Also, it's a bit unclear to me, but I *think* that most of those users
> > don't actually "hold" the lock - they are waiting for it. Yes/no?
> 
> Yes, this is waiting to acquire, lockdep started out with 2 hooks, one
> before the actual acquire and one on release. Its done before the actual
> acquire so we can warn before we lock up in case of an actual deadlock.
> 
> >  Does
> > the lockdep information have the capability to distinguish between
> > "waiting for" vs "actually successfully owns the lock"?
> 
> Sometimes.. when build with lockstat we have enough hooks to do this. I
> suppose I could make all those hooks available for all of lockdep and
> track the per lock state more accurate in order to improve this
> printout.


Something like the compile tested only patch below should do both
things. It will however make that already long output line longer still.


[19247.688328] 1 lock held by agetty/509:
[19247.689732]  #0:  (&tty->atomic_read_lock){+.+.+.}, at: [<ffffffff813c12f0>] n_tty_read+0x5d0/0x8a0

Will become something like:

19247.688328] 1 lock on stack by agetty/509:
[19247.689732]  #0: blocked: (&tty->atomic_read_lock){+.+.+.}, instance: ffffffffffffffff, at: [<ffffffff813c12f0>] n_tty_read+0x5d0/0x8a0

where instance is clearly a saner number :-)

---
 include/linux/lockdep.h     |   44 ++++++++-------
 include/trace/events/lock.h |    3 -
 kernel/lockdep.c            |  127 ++++++++++++++++++++++++++++---------------
 3 files changed, 106 insertions(+), 68 deletions(-)

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 00e4637..7a989a8 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -211,6 +211,13 @@ struct lock_chain {
  */
 #define MAX_LOCKDEP_KEYS		((1UL << MAX_LOCKDEP_KEYS_BITS) - 1)
 
+enum held_lock_state {
+	hls_unheld = 0,
+	hls_acquiring,
+	hls_contended,
+	hls_acquired,
+};
+
 struct held_lock {
 	/*
 	 * One-way hash of the dependency chain up to this point. We
@@ -254,7 +261,10 @@ struct held_lock {
 	unsigned int read:2;        /* see lock_acquire() comment */
 	unsigned int check:2;       /* see lock_acquire() comment */
 	unsigned int hardirqs_off:1;
-	unsigned int references:11;					/* 32 bits */
+	unsigned int state:2;	    /* see held_lock_state */
+									/* 9 bit hole */
+	unsigned int references:16;
+									/* 16 bit hole */
 };
 
 /*
@@ -363,6 +373,18 @@ extern void lockdep_trace_alloc(gfp_t mask);
 
 #define lockdep_recursing(tsk)	((tsk)->lockdep_recursion)
 
+extern void lock_contended(struct lockdep_map *lock, unsigned long ip);
+extern void lock_acquired(struct lockdep_map *lock, unsigned long ip);
+
+#define LOCK_CONTENDED(_lock, try, lock)			\
+do {								\
+	if (!try(_lock)) {					\
+		lock_contended(&(_lock)->dep_map, _RET_IP_);	\
+		lock(_lock);					\
+	}							\
+	lock_acquired(&(_lock)->dep_map, _RET_IP_);			\
+} while (0)
+
 #else /* !LOCKDEP */
 
 static inline void lockdep_off(void)
@@ -414,31 +436,13 @@ struct lock_class_key { };
 
 #define lockdep_recursing(tsk)			(0)
 
-#endif /* !LOCKDEP */
-
-#ifdef CONFIG_LOCK_STAT
-
-extern void lock_contended(struct lockdep_map *lock, unsigned long ip);
-extern void lock_acquired(struct lockdep_map *lock, unsigned long ip);
-
-#define LOCK_CONTENDED(_lock, try, lock)			\
-do {								\
-	if (!try(_lock)) {					\
-		lock_contended(&(_lock)->dep_map, _RET_IP_);	\
-		lock(_lock);					\
-	}							\
-	lock_acquired(&(_lock)->dep_map, _RET_IP_);			\
-} while (0)
-
-#else /* CONFIG_LOCK_STAT */
-
 #define lock_contended(lockdep_map, ip) do {} while (0)
 #define lock_acquired(lockdep_map, ip) do {} while (0)
 
 #define LOCK_CONTENDED(_lock, try, lock) \
 	lock(_lock)
 
-#endif /* CONFIG_LOCK_STAT */
+#endif /* !LOCKDEP */
 
 #ifdef CONFIG_LOCKDEP
 
diff --git a/include/trace/events/lock.h b/include/trace/events/lock.h
index 2821b86..dbcb2f5 100644
--- a/include/trace/events/lock.h
+++ b/include/trace/events/lock.h
@@ -61,8 +61,6 @@ DEFINE_EVENT(lock, lock_release,
 	TP_ARGS(lock, ip)
 );
 
-#ifdef CONFIG_LOCK_STAT
-
 DEFINE_EVENT(lock, lock_contended,
 
 	TP_PROTO(struct lockdep_map *lock, unsigned long ip),
@@ -78,7 +76,6 @@ DEFINE_EVENT(lock, lock_acquired,
 );
 
 #endif
-#endif
 
 #endif /* _TRACE_LOCK_H */
 
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index ea9ee45..81ae879 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -541,10 +541,23 @@ static void print_lockdep_cache(struct lockdep_map *lock)
 	printk("%s", name);
 }
 
+static void print_lock_state(struct held_lock *hlock)
+{
+	switch (hlock->state) {
+	/* holding an unheld lock is fail!  */
+	case hls_unheld:	printk("FAIL:    "); break;
+
+	case hls_acquiring:	/* fall-through */
+	case hls_contended:	printk("blocked: "); break;
+	case hls_acquired:	printk("held:    "); break;
+	};
+}
+
 static void print_lock(struct held_lock *hlock)
 {
+	print_lock_state(hlock);
 	print_lock_name(hlock_class(hlock));
-	printk(", at: ");
+	printk(", instance: %p, at: ", hlock->instance);
 	print_ip_sym(hlock->acquire_ip);
 }
 
@@ -556,7 +569,7 @@ static void lockdep_print_held_locks(struct task_struct *curr)
 		printk("no locks held by %s/%d.\n", curr->comm, task_pid_nr(curr));
 		return;
 	}
-	printk("%d lock%s held by %s/%d:\n",
+	printk("%d lock%s on stack by %s/%d:\n",
 		depth, depth > 1 ? "s" : "", curr->comm, task_pid_nr(curr));
 
 	for (i = 0; i < depth; i++) {
@@ -3093,6 +3106,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
 	hlock->check = check;
 	hlock->hardirqs_off = !!hardirqs_off;
 	hlock->references = references;
+	hlock->state = hls_acquiring;
 #ifdef CONFIG_LOCK_STAT
 	hlock->waittime_stamp = 0;
 	hlock->holdtime_stamp = lockstat_clock();
@@ -3607,7 +3621,6 @@ void lockdep_clear_current_reclaim_state(void)
 	current->lockdep_reclaim_gfp = 0;
 }
 
-#ifdef CONFIG_LOCK_STAT
 static int
 print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock,
 			   unsigned long ip)
@@ -3637,14 +3650,72 @@ print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock,
 	return 0;
 }
 
+#ifdef CONFIG_LOCK_STAT
+
+static void lockstat_contended(struct held_lock *hlock)
+{
+	int contention_point, contending_point;
+	struct lock_class_stats *stats;
+
+	hlock->waittime_stamp = lockstat_clock();
+
+	contention_point = lock_point(hlock_class(hlock)->contention_point, ip);
+	contending_point = lock_point(hlock_class(hlock)->contending_point,
+				      lock->ip);
+
+	stats = get_lock_stats(hlock_class(hlock));
+	if (contention_point < LOCKSTAT_POINTS)
+		stats->contention_point[contention_point]++;
+	if (contending_point < LOCKSTAT_POINTS)
+		stats->contending_point[contending_point]++;
+	if (lock->cpu != smp_processor_id())
+		stats->bounces[bounce_contended + !!hlock->read]++;
+	put_lock_stats(stats);
+}
+
+static void lockstat_acquired(struct held_lock *hlock)
+{
+	struct lockdep_map *lock = hlock->instance;
+	struct lock_class_stats *stats;
+	u64 now, waittime = 0;
+	int cpu;
+
+	cpu = smp_processor_id();
+	if (hlock->waittime_stamp) {
+		now = lockstat_clock();
+		waittime = now - hlock->waittime_stamp;
+		hlock->holdtime_stamp = now;
+	}
+
+	stats = get_lock_stats(hlock_class(hlock));
+	if (waittime) {
+		if (hlock->read)
+			lock_time_inc(&stats->read_waittime, waittime);
+		else
+			lock_time_inc(&stats->write_waittime, waittime);
+	}
+	if (lock->cpu != cpu)
+		stats->bounces[bounce_acquired + !!hlock->read]++;
+	put_lock_stats(stats);
+
+	lock->cpu = cpu;
+	lock->ip = ip;
+}
+
+#else /* CONFIG_LOCK_STAT */
+
+static inline void lockstat_contended(struct held_lock *hlock) { }
+static inline void lockstat_acquired(struct held_lock *hlock) { }
+
+#endif /* CONFIG_LOCK_STAT */
+
 static void
 __lock_contended(struct lockdep_map *lock, unsigned long ip)
 {
 	struct task_struct *curr = current;
 	struct held_lock *hlock, *prev_hlock;
-	struct lock_class_stats *stats;
 	unsigned int depth;
-	int i, contention_point, contending_point;
+	int i;
 
 	depth = curr->lockdep_depth;
 	/*
@@ -3673,20 +3744,8 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip)
 	if (hlock->instance != lock)
 		return;
 
-	hlock->waittime_stamp = lockstat_clock();
-
-	contention_point = lock_point(hlock_class(hlock)->contention_point, ip);
-	contending_point = lock_point(hlock_class(hlock)->contending_point,
-				      lock->ip);
-
-	stats = get_lock_stats(hlock_class(hlock));
-	if (contention_point < LOCKSTAT_POINTS)
-		stats->contention_point[contention_point]++;
-	if (contending_point < LOCKSTAT_POINTS)
-		stats->contending_point[contending_point]++;
-	if (lock->cpu != smp_processor_id())
-		stats->bounces[bounce_contended + !!hlock->read]++;
-	put_lock_stats(stats);
+	hlock->state = hls_contended;
+	lockstat_contended(hlock);
 }
 
 static void
@@ -3694,10 +3753,8 @@ __lock_acquired(struct lockdep_map *lock, unsigned long ip)
 {
 	struct task_struct *curr = current;
 	struct held_lock *hlock, *prev_hlock;
-	struct lock_class_stats *stats;
 	unsigned int depth;
-	u64 now, waittime = 0;
-	int i, cpu;
+	int i;
 
 	depth = curr->lockdep_depth;
 	/*
@@ -3726,28 +3783,8 @@ __lock_acquired(struct lockdep_map *lock, unsigned long ip)
 	if (hlock->instance != lock)
 		return;
 
-	cpu = smp_processor_id();
-	if (hlock->waittime_stamp) {
-		now = lockstat_clock();
-		waittime = now - hlock->waittime_stamp;
-		hlock->holdtime_stamp = now;
-	}
-
-	trace_lock_acquired(lock, ip);
-
-	stats = get_lock_stats(hlock_class(hlock));
-	if (waittime) {
-		if (hlock->read)
-			lock_time_inc(&stats->read_waittime, waittime);
-		else
-			lock_time_inc(&stats->write_waittime, waittime);
-	}
-	if (lock->cpu != cpu)
-		stats->bounces[bounce_acquired + !!hlock->read]++;
-	put_lock_stats(stats);
-
-	lock->cpu = cpu;
-	lock->ip = ip;
+	hlock->state = hls_acquired;
+	lockstat_acquired(hlock);
 }
 
 void lock_contended(struct lockdep_map *lock, unsigned long ip)
@@ -3783,12 +3820,12 @@ void lock_acquired(struct lockdep_map *lock, unsigned long ip)
 	raw_local_irq_save(flags);
 	check_flags(flags);
 	current->lockdep_recursion = 1;
+	trace_lock_acquired(lock, ip);
 	__lock_acquired(lock, ip);
 	current->lockdep_recursion = 0;
 	raw_local_irq_restore(flags);
 }
 EXPORT_SYMBOL_GPL(lock_acquired);
-#endif
 
 /*
  * Used by the testsuite, sanitize the validator state


  reply	other threads:[~2012-06-04 10:50 UTC|newest]

Thread overview: 65+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-06-03 22:36 processes hung after sys_renameat, and 'missing' processes Dave Jones
2012-06-03 22:51 ` Dave Jones
2012-06-03 23:07 ` Linus Torvalds
2012-06-03 23:17   ` Al Viro
2012-06-03 23:28     ` Al Viro
2012-06-03 23:40       ` Al Viro
2012-06-03 23:59         ` Al Viro
2012-06-04  0:07       ` Dave Jones
2012-06-06 19:42       ` Dave Jones
2012-06-06 22:38         ` Linus Torvalds
2012-06-06 23:00           ` Dave Jones
2012-06-06 23:31             ` Linus Torvalds
2012-06-06 23:54               ` Al Viro
2012-06-07  0:29                 ` Dave Jones
2012-06-07  0:40                   ` Al Viro
2012-06-07  0:42                   ` Linus Torvalds
2012-06-07  1:19                     ` Dave Jones
2012-06-07  1:29                       ` Al Viro
2012-06-07  1:31                         ` Dave Jones
2012-06-07  1:31                         ` Al Viro
2012-06-07  1:42                           ` Dave Jones
2012-06-07  1:45                           ` Linus Torvalds
2012-06-07  1:54                             ` Al Viro
2012-06-07  2:08                               ` Dave Jones
2012-06-07 19:36                         ` Al Viro
2012-06-07 20:43                           ` Sage Weil
2012-06-07 23:12                           ` Eric W. Biederman
2012-06-07 23:39                             ` Al Viro
2012-06-07 23:57                             ` Linus Torvalds
2012-06-08  0:36                               ` Al Viro
2012-06-08  0:42                                 ` Linus Torvalds
2012-06-08  0:59                                 ` Al Viro
2012-06-08  5:25                                   ` Eric W. Biederman
2012-06-08  5:48                                     ` Al Viro
2012-06-08  7:54                                       ` Eric W. Biederman
2012-06-08 20:20                                         ` Al Viro
2012-06-08  2:08                                 ` Eric W. Biederman
2012-06-08  2:37                                   ` Al Viro
2012-06-08  2:18                           ` Al Viro
2012-06-08 16:22                           ` J. Bruce Fields
2012-06-08 17:44                             ` Linus Torvalds
2012-06-11 12:17                               ` J. Bruce Fields
2012-06-07  1:40                       ` Linus Torvalds
2012-06-07  0:35                 ` Linus Torvalds
2012-06-07 10:26               ` Peter Zijlstra
2012-06-07 15:30                 ` Linus Torvalds
2012-06-08  7:31                   ` Peter Zijlstra
2012-06-08 14:38                     ` Dave Jones
2012-06-08 14:51                       ` Peter Zijlstra
2012-06-08 15:01                         ` Dave Jones
2012-06-08 15:11                           ` Peter Zijlstra
2012-06-08 15:21                             ` Dave Jones
2012-06-08 14:46                     ` J. Bruce Fields
2012-06-08 15:08                       ` Peter Zijlstra
2012-06-11 12:17                         ` J. Bruce Fields
2012-06-04  0:00   ` Dave Jones
2012-06-04  0:16     ` Linus Torvalds
2012-06-04  0:20       ` Al Viro
2012-06-04  9:35         ` Peter Zijlstra
2012-06-04  9:29       ` Peter Zijlstra
2012-06-04 10:49         ` Peter Zijlstra [this message]
2012-06-07  0:13           ` Dave Jones
  -- strict thread matches above, loose matches on Subject: below --
2012-06-07  7:07 Miklos Szeredi
2012-06-07 15:44 ` Linus Torvalds
2012-06-11 16:02   ` Miklos Szeredi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1338806988.28282.17.camel@twins \
    --to=peterz@infradead.org \
    --cc=davej@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=torvalds@linux-foundation.org \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox