All of lore.kernel.org
 help / color / mirror / Atom feed
From: "J. Bruce Fields" <bfields@fieldses.org>
To: Peter Zijlstra <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>,
	Dave Jones <davej@redhat.com>, Al Viro <viro@zeniv.linux.org.uk>,
	Linux Kernel <linux-kernel@vger.kernel.org>,
	Miklos Szeredi <mszeredi@suse.cz>, Jan Kara <jack@suse.cz>
Subject: Re: processes hung after sys_renameat, and 'missing' processes
Date: Fri, 8 Jun 2012 10:46:02 -0400	[thread overview]
Message-ID: <20120608144602.GA17251@fieldses.org> (raw)
In-Reply-To: <1339140698.23343.26.camel@twins>

On Fri, Jun 08, 2012 at 09:31:38AM +0200, Peter Zijlstra wrote:
> On Thu, 2012-06-07 at 08:30 -0700, Linus Torvalds wrote:
> > On Thu, Jun 7, 2012 at 3:26 AM, Peter Zijlstra <peterz@infradead.org> wrote:
> > > On Wed, 2012-06-06 at 16:31 -0700, Linus Torvalds wrote:
> > >> PeterZ - the fact that we use mutex_lock_nested with
> > >> I_MUTEX_PARENT/I_MUTEX_CHILD/nothing hopefully doesn't screw us up in
> > >> case we get it wrong, right?
> > >
> > > Sadly, if you get that annotation wrong you can annotate an actual
> > > deadlock away.

What's a (contrived as you want) example where that happens?

> > > This the reason you have to be very careful when
> > > annotating stuff.

Or alternatively--what do I need to check before I call
mutex_lock_nested?

--b.

> > 
> > Is that true even if you can see an actual deadlock on the lock *instances*?
> 
> Yep.. :/
> 
> > The point of the locking classes (to me) was always that you could see
> > the deadlock even if it didn't *actually* happen, just *potentially*
> > happened. If the lock classes then mean that that hides an actual
> > deadlock information, that's a big downside.
> 
> Right, so in order to report a deadlock on instances we'd have to do a
> full lock graph walk on every contended lock acquisition. This is quite
> expensive.
> 
> Although I can look into doing that from say the NMI watchdog and/or
> some sysrq key.
> 
> > And yeah, that really means that we absolutely *have* to get the
> > instance data in the lock debug printout, since we can't see deadlocks
> > any other way. 
> 
> Well, typically deadlocks aren't that hard to find once you trigger
> them. The current case of course being the exception to that rule.. 
> 
> > Can you check the compile error that Dave reported for
> > your patch, because right now Dave is running without it, afaik, due
> > to your lock-instance patch not really working in practice.
> 
> Oh, missed that in the thread.. yeah here goes.
> 
> 
> Now also compile tested with CONFIG_LOCK_STAT...
> 
> ---
>  include/linux/lockdep.h     |   44 ++++++++-------
>  include/trace/events/lock.h |    3 -
>  kernel/lockdep.c            |  128 ++++++++++++++++++++++++++++----------------
>  3 files changed, 107 insertions(+), 68 deletions(-)
> 
> --- a/include/linux/lockdep.h
> +++ b/include/linux/lockdep.h
> @@ -211,6 +211,13 @@ struct lock_chain {
>   */
>  #define MAX_LOCKDEP_KEYS		((1UL << MAX_LOCKDEP_KEYS_BITS) - 1)
>  
> +enum held_lock_state {
> +	hls_unheld = 0,
> +	hls_acquiring,
> +	hls_contended,
> +	hls_acquired,
> +};
> +
>  struct held_lock {
>  	/*
>  	 * One-way hash of the dependency chain up to this point. We
> @@ -254,7 +261,10 @@ struct held_lock {
>  	unsigned int read:2;        /* see lock_acquire() comment */
>  	unsigned int check:2;       /* see lock_acquire() comment */
>  	unsigned int hardirqs_off:1;
> -	unsigned int references:11;					/* 32 bits */
> +	unsigned int state:2;	    /* see held_lock_state */
> +									/* 9 bit hole */
> +	unsigned int references:16;
> +									/* 16 bit hole */
>  };
>  
>  /*
> @@ -363,6 +373,18 @@ extern void lockdep_trace_alloc(gfp_t ma
>  
>  #define lockdep_recursing(tsk)	((tsk)->lockdep_recursion)
>  
> +extern void lock_contended(struct lockdep_map *lock, unsigned long ip);
> +extern void lock_acquired(struct lockdep_map *lock, unsigned long ip);
> +
> +#define LOCK_CONTENDED(_lock, try, lock)			\
> +do {								\
> +	if (!try(_lock)) {					\
> +		lock_contended(&(_lock)->dep_map, _RET_IP_);	\
> +		lock(_lock);					\
> +	}							\
> +	lock_acquired(&(_lock)->dep_map, _RET_IP_);			\
> +} while (0)
> +
>  #else /* !LOCKDEP */
>  
>  static inline void lockdep_off(void)
> @@ -414,31 +436,13 @@ struct lock_class_key { };
>  
>  #define lockdep_recursing(tsk)			(0)
>  
> -#endif /* !LOCKDEP */
> -
> -#ifdef CONFIG_LOCK_STAT
> -
> -extern void lock_contended(struct lockdep_map *lock, unsigned long ip);
> -extern void lock_acquired(struct lockdep_map *lock, unsigned long ip);
> -
> -#define LOCK_CONTENDED(_lock, try, lock)			\
> -do {								\
> -	if (!try(_lock)) {					\
> -		lock_contended(&(_lock)->dep_map, _RET_IP_);	\
> -		lock(_lock);					\
> -	}							\
> -	lock_acquired(&(_lock)->dep_map, _RET_IP_);			\
> -} while (0)
> -
> -#else /* CONFIG_LOCK_STAT */
> -
>  #define lock_contended(lockdep_map, ip) do {} while (0)
>  #define lock_acquired(lockdep_map, ip) do {} while (0)
>  
>  #define LOCK_CONTENDED(_lock, try, lock) \
>  	lock(_lock)
>  
> -#endif /* CONFIG_LOCK_STAT */
> +#endif /* !LOCKDEP */
>  
>  #ifdef CONFIG_LOCKDEP
>  
> --- a/include/trace/events/lock.h
> +++ b/include/trace/events/lock.h
> @@ -61,8 +61,6 @@ DEFINE_EVENT(lock, lock_release,
>  	TP_ARGS(lock, ip)
>  );
>  
> -#ifdef CONFIG_LOCK_STAT
> -
>  DEFINE_EVENT(lock, lock_contended,
>  
>  	TP_PROTO(struct lockdep_map *lock, unsigned long ip),
> @@ -78,7 +76,6 @@ DEFINE_EVENT(lock, lock_acquired,
>  );
>  
>  #endif
> -#endif
>  
>  #endif /* _TRACE_LOCK_H */
>  
> --- a/kernel/lockdep.c
> +++ b/kernel/lockdep.c
> @@ -541,10 +541,23 @@ static void print_lockdep_cache(struct l
>  	printk("%s", name);
>  }
>  
> +static void print_lock_state(struct held_lock *hlock)
> +{
> +	switch (hlock->state) {
> +	/* holding an unheld lock is fail!  */
> +	case hls_unheld:	printk("FAIL:    "); break;
> +
> +	case hls_acquiring:	/* fall-through */
> +	case hls_contended:	printk("blocked: "); break;
> +	case hls_acquired:	printk("held:    "); break;
> +	};
> +}
> +
>  static void print_lock(struct held_lock *hlock)
>  {
> +	print_lock_state(hlock);
>  	print_lock_name(hlock_class(hlock));
> -	printk(", at: ");
> +	printk(", instance: %p, at: ", hlock->instance);
>  	print_ip_sym(hlock->acquire_ip);
>  }
>  
> @@ -556,7 +569,7 @@ static void lockdep_print_held_locks(str
>  		printk("no locks held by %s/%d.\n", curr->comm, task_pid_nr(curr));
>  		return;
>  	}
> -	printk("%d lock%s held by %s/%d:\n",
> +	printk("%d lock%s on stack by %s/%d:\n",
>  		depth, depth > 1 ? "s" : "", curr->comm, task_pid_nr(curr));
>  
>  	for (i = 0; i < depth; i++) {
> @@ -3093,6 +3106,7 @@ static int __lock_acquire(struct lockdep
>  	hlock->check = check;
>  	hlock->hardirqs_off = !!hardirqs_off;
>  	hlock->references = references;
> +	hlock->state = hls_acquiring;
>  #ifdef CONFIG_LOCK_STAT
>  	hlock->waittime_stamp = 0;
>  	hlock->holdtime_stamp = lockstat_clock();
> @@ -3607,7 +3621,6 @@ void lockdep_clear_current_reclaim_state
>  	current->lockdep_reclaim_gfp = 0;
>  }
>  
> -#ifdef CONFIG_LOCK_STAT
>  static int
>  print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock,
>  			   unsigned long ip)
> @@ -3637,14 +3650,73 @@ print_lock_contention_bug(struct task_st
>  	return 0;
>  }
>  
> +#ifdef CONFIG_LOCK_STAT
> +
> +static void lockstat_contended(struct held_lock *hlock, unsigned long ip)
> +{
> +	struct lockdep_map *lock = hlock->instance;
> +	int contention_point, contending_point;
> +	struct lock_class_stats *stats;
> +
> +	hlock->waittime_stamp = lockstat_clock();
> +
> +	contention_point = lock_point(hlock_class(hlock)->contention_point, ip);
> +	contending_point = lock_point(hlock_class(hlock)->contending_point,
> +				      lock->ip);
> +
> +	stats = get_lock_stats(hlock_class(hlock));
> +	if (contention_point < LOCKSTAT_POINTS)
> +		stats->contention_point[contention_point]++;
> +	if (contending_point < LOCKSTAT_POINTS)
> +		stats->contending_point[contending_point]++;
> +	if (lock->cpu != smp_processor_id())
> +		stats->bounces[bounce_contended + !!hlock->read]++;
> +	put_lock_stats(stats);
> +}
> +
> +static void lockstat_acquired(struct held_lock *hlock, unsigned long ip)
> +{
> +	struct lockdep_map *lock = hlock->instance;
> +	struct lock_class_stats *stats;
> +	u64 now, waittime = 0;
> +	int cpu;
> +
> +	cpu = smp_processor_id();
> +	if (hlock->waittime_stamp) {
> +		now = lockstat_clock();
> +		waittime = now - hlock->waittime_stamp;
> +		hlock->holdtime_stamp = now;
> +	}
> +
> +	stats = get_lock_stats(hlock_class(hlock));
> +	if (waittime) {
> +		if (hlock->read)
> +			lock_time_inc(&stats->read_waittime, waittime);
> +		else
> +			lock_time_inc(&stats->write_waittime, waittime);
> +	}
> +	if (lock->cpu != cpu)
> +		stats->bounces[bounce_acquired + !!hlock->read]++;
> +	put_lock_stats(stats);
> +
> +	lock->cpu = cpu;
> +	lock->ip = ip;
> +}
> +
> +#else /* CONFIG_LOCK_STAT */
> +
> +static inline void lockstat_contended(struct held_lock *hlock, unsigned long ip) { }
> +static inline void lockstat_acquired(struct held_lock *hlock, unsigned long ip) { }
> +
> +#endif /* CONFIG_LOCK_STAT */
> +
>  static void
>  __lock_contended(struct lockdep_map *lock, unsigned long ip)
>  {
>  	struct task_struct *curr = current;
>  	struct held_lock *hlock, *prev_hlock;
> -	struct lock_class_stats *stats;
>  	unsigned int depth;
> -	int i, contention_point, contending_point;
> +	int i;
>  
>  	depth = curr->lockdep_depth;
>  	/*
> @@ -3673,20 +3745,8 @@ __lock_contended(struct lockdep_map *loc
>  	if (hlock->instance != lock)
>  		return;
>  
> -	hlock->waittime_stamp = lockstat_clock();
> -
> -	contention_point = lock_point(hlock_class(hlock)->contention_point, ip);
> -	contending_point = lock_point(hlock_class(hlock)->contending_point,
> -				      lock->ip);
> -
> -	stats = get_lock_stats(hlock_class(hlock));
> -	if (contention_point < LOCKSTAT_POINTS)
> -		stats->contention_point[contention_point]++;
> -	if (contending_point < LOCKSTAT_POINTS)
> -		stats->contending_point[contending_point]++;
> -	if (lock->cpu != smp_processor_id())
> -		stats->bounces[bounce_contended + !!hlock->read]++;
> -	put_lock_stats(stats);
> +	hlock->state = hls_contended;
> +	lockstat_contended(hlock, ip);
>  }
>  
>  static void
> @@ -3694,10 +3754,8 @@ __lock_acquired(struct lockdep_map *lock
>  {
>  	struct task_struct *curr = current;
>  	struct held_lock *hlock, *prev_hlock;
> -	struct lock_class_stats *stats;
>  	unsigned int depth;
> -	u64 now, waittime = 0;
> -	int i, cpu;
> +	int i;
>  
>  	depth = curr->lockdep_depth;
>  	/*
> @@ -3726,28 +3784,8 @@ __lock_acquired(struct lockdep_map *lock
>  	if (hlock->instance != lock)
>  		return;
>  
> -	cpu = smp_processor_id();
> -	if (hlock->waittime_stamp) {
> -		now = lockstat_clock();
> -		waittime = now - hlock->waittime_stamp;
> -		hlock->holdtime_stamp = now;
> -	}
> -
> -	trace_lock_acquired(lock, ip);
> -
> -	stats = get_lock_stats(hlock_class(hlock));
> -	if (waittime) {
> -		if (hlock->read)
> -			lock_time_inc(&stats->read_waittime, waittime);
> -		else
> -			lock_time_inc(&stats->write_waittime, waittime);
> -	}
> -	if (lock->cpu != cpu)
> -		stats->bounces[bounce_acquired + !!hlock->read]++;
> -	put_lock_stats(stats);
> -
> -	lock->cpu = cpu;
> -	lock->ip = ip;
> +	hlock->state = hls_acquired;
> +	lockstat_acquired(hlock, ip);
>  }
>  
>  void lock_contended(struct lockdep_map *lock, unsigned long ip)
> @@ -3783,12 +3821,12 @@ void lock_acquired(struct lockdep_map *l
>  	raw_local_irq_save(flags);
>  	check_flags(flags);
>  	current->lockdep_recursion = 1;
> +	trace_lock_acquired(lock, ip);
>  	__lock_acquired(lock, ip);
>  	current->lockdep_recursion = 0;
>  	raw_local_irq_restore(flags);
>  }
>  EXPORT_SYMBOL_GPL(lock_acquired);
> -#endif
>  
>  /*
>   * Used by the testsuite, sanitize the validator state
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

  parent reply	other threads:[~2012-06-08 14:46 UTC|newest]

Thread overview: 65+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-06-03 22:36 processes hung after sys_renameat, and 'missing' processes Dave Jones
2012-06-03 22:51 ` Dave Jones
2012-06-03 23:07 ` Linus Torvalds
2012-06-03 23:17   ` Al Viro
2012-06-03 23:28     ` Al Viro
2012-06-03 23:40       ` Al Viro
2012-06-03 23:59         ` Al Viro
2012-06-04  0:07       ` Dave Jones
2012-06-06 19:42       ` Dave Jones
2012-06-06 22:38         ` Linus Torvalds
2012-06-06 23:00           ` Dave Jones
2012-06-06 23:31             ` Linus Torvalds
2012-06-06 23:54               ` Al Viro
2012-06-07  0:29                 ` Dave Jones
2012-06-07  0:40                   ` Al Viro
2012-06-07  0:42                   ` Linus Torvalds
2012-06-07  1:19                     ` Dave Jones
2012-06-07  1:29                       ` Al Viro
2012-06-07  1:31                         ` Dave Jones
2012-06-07  1:31                         ` Al Viro
2012-06-07  1:42                           ` Dave Jones
2012-06-07  1:45                           ` Linus Torvalds
2012-06-07  1:54                             ` Al Viro
2012-06-07  2:08                               ` Dave Jones
2012-06-07 19:36                         ` Al Viro
2012-06-07 20:43                           ` Sage Weil
2012-06-07 23:12                           ` Eric W. Biederman
2012-06-07 23:39                             ` Al Viro
2012-06-07 23:57                             ` Linus Torvalds
2012-06-08  0:36                               ` Al Viro
2012-06-08  0:42                                 ` Linus Torvalds
2012-06-08  0:59                                 ` Al Viro
2012-06-08  5:25                                   ` Eric W. Biederman
2012-06-08  5:48                                     ` Al Viro
2012-06-08  7:54                                       ` Eric W. Biederman
2012-06-08 20:20                                         ` Al Viro
2012-06-08  2:08                                 ` Eric W. Biederman
2012-06-08  2:37                                   ` Al Viro
2012-06-08  2:18                           ` Al Viro
2012-06-08 16:22                           ` J. Bruce Fields
2012-06-08 17:44                             ` Linus Torvalds
2012-06-11 12:17                               ` J. Bruce Fields
2012-06-07  1:40                       ` Linus Torvalds
2012-06-07  0:35                 ` Linus Torvalds
2012-06-07 10:26               ` Peter Zijlstra
2012-06-07 15:30                 ` Linus Torvalds
2012-06-08  7:31                   ` Peter Zijlstra
2012-06-08 14:38                     ` Dave Jones
2012-06-08 14:51                       ` Peter Zijlstra
2012-06-08 15:01                         ` Dave Jones
2012-06-08 15:11                           ` Peter Zijlstra
2012-06-08 15:21                             ` Dave Jones
2012-06-08 14:46                     ` J. Bruce Fields [this message]
2012-06-08 15:08                       ` Peter Zijlstra
2012-06-11 12:17                         ` J. Bruce Fields
2012-06-04  0:00   ` Dave Jones
2012-06-04  0:16     ` Linus Torvalds
2012-06-04  0:20       ` Al Viro
2012-06-04  9:35         ` Peter Zijlstra
2012-06-04  9:29       ` Peter Zijlstra
2012-06-04 10:49         ` Peter Zijlstra
2012-06-07  0:13           ` Dave Jones
  -- strict thread matches above, loose matches on Subject: below --
2012-06-07  7:07 Miklos Szeredi
2012-06-07 15:44 ` Linus Torvalds
2012-06-11 16:02   ` Miklos Szeredi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20120608144602.GA17251@fieldses.org \
    --to=bfields@fieldses.org \
    --cc=davej@redhat.com \
    --cc=jack@suse.cz \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mszeredi@suse.cz \
    --cc=peterz@infradead.org \
    --cc=torvalds@linux-foundation.org \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.