public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Mathieu Desnoyers <compudj@krystal.dyndns.org>
To: Jason Baron <jbaron@redhat.com>
Cc: linux-kernel@vger.kernel.org, fche@redhat.com,
	fweisbec@gmail.com, edwintorok@gmail.com, mingo@elte.hu,
	peterz@infradead.org
Subject: Re: tracepoints for kernel/mutex.c
Date: Thu, 16 Oct 2008 18:10:46 -0400	[thread overview]
Message-ID: <20081016221046.GD21877@Krystal> (raw)
In-Reply-To: <20081016210430.GC25738@redhat.com>

* Jason Baron (jbaron@redhat.com) wrote:
> hi,
> 
> Below are 3 tracepoints I've been playing with in kernel/mutex.c using
> a SystemTap script. The idea is to detect and determine the cause of
> lock contention. Currently I get the following output:
> 
> <contended mutex nam> <process name and pid of the contention> <time of
> contention> <pid that woke me up(caused the contention?)>
> 
> for example:
> 
> name: &inode->i_mutex, pid name: udevd, pid: 26567, jiffies: 1 cause:
> 26566
>  
> 
> Using SystemTap I can now modify my script to figure out the inode number, the 
> file and pathname, or the backtrace and get to the bottom of the
> contention.
> 
> I think this approach has a number of advantages. It has low
> overhead in the off case, since its based on tracepoints. It is
> minimally invasive in the code path (3 tracepoints). It also allows me
> to explore data structures and parts of the kernel by simply modifying
> the SystemTap script. I do not need to re-compile the kernel and reboot.
> 
> You can run the script via: $stap mutex.stp
> 
> thanks,
> 
> -Jason
> 
> 
> diff --git a/include/trace/mutex.h b/include/trace/mutex.h
> new file mode 100644
> index 0000000..8d365d6
> --- /dev/null
> +++ b/include/trace/mutex.h
> @@ -0,0 +1,10 @@
> +#ifndef _TRACE_MUTEX_H
> +#define _TRACE_MUTEX_H
> +
> +#include <linux/tracepoint.h>
> +
> +DEFINE_TRACE(mutex_lock_contention, TPPROTO(int pid, unsigned long time), TPARGS(pid, time));
> +DEFINE_TRACE(mutex_lock_contention_length, TPPROTO(char *name, int pid, unsigned long time), TPARGS(name, pid, time));
> +DEFINE_TRACE(mutex_wakeup, TPPROTO(int target_pid, int sender_pid), TPARGS(target_pid, sender_pid));
> +
> +#endif
> diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c
> index 1d94160..847a9fb 100644
> --- a/kernel/mutex-debug.c
> +++ b/kernel/mutex-debug.c
> @@ -97,6 +97,7 @@ void debug_mutex_init(struct mutex *lock, const char *name,
>  #endif
>  	lock->owner = NULL;
>  	lock->magic = lock;
> +	lock->name = name;
>  }
>  
>  /***
> diff --git a/kernel/mutex.c b/kernel/mutex.c
> index 12c779d..62c2aab 100644
> --- a/kernel/mutex.c
> +++ b/kernel/mutex.c
> @@ -18,6 +18,7 @@
>  #include <linux/spinlock.h>
>  #include <linux/interrupt.h>
>  #include <linux/debug_locks.h>
> +#include <trace/mutex.h>
>  
>  /*
>   * In the DEBUG case we are using the "NULL fastpath" for mutexes,
> @@ -147,6 +148,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
>  		goto done;
>  
>  	lock_contended(&lock->dep_map, ip);
> +	trace_mutex_lock_contention((int)task->pid, jiffies);
>  
>  	for (;;) {
>  		/*
> @@ -182,7 +184,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
>  		schedule();
>  		spin_lock_mutex(&lock->wait_lock, flags);
>  	}
> -
> +	trace_mutex_lock_contention_length(lock->name, (int)task->pid, jiffies);
>  done:
>  	lock_acquired(&lock->dep_map);
>  	/* got the lock - rejoice! */
> @@ -256,7 +258,7 @@ __mutex_unlock_common_slowpath(atomic_t *lock_count, int nested)
>  					   struct mutex_waiter, list);
>  
>  		debug_mutex_wake_waiter(lock, waiter);
> -
> +		trace_mutex_wakeup(waiter->task->pid, current->pid);
>  		wake_up_process(waiter->task);
>  	}
>  
> diff --git a/ltt/probes/Makefile b/ltt/probes/Makefile
> index 2ee8a46..0c54d01 100644
> --- a/ltt/probes/Makefile
> +++ b/ltt/probes/Makefile
> @@ -10,7 +10,8 @@ CFLAGS_REMOVE_rcu-trace.o = -pg
>  endif
>  
>  obj-$(CONFIG_LTT_TRACEPROBES)	+= kernel-trace.o mm-trace.o fs-trace.o \
> -				ipc-trace.o lockdep-trace.o rcu-trace.o
> +				ipc-trace.o lockdep-trace.o rcu-trace.o \
> +				mutex-trace.o
>  
>  ifeq ($(CONFIG_NET),y)
>  ifdef CONFIG_FTRACE
> diff --git a/ltt/probes/mutex-trace.c b/ltt/probes/mutex-trace.c
> new file mode 100644
> index 0000000..3b1ebeb
> --- /dev/null
> +++ b/ltt/probes/mutex-trace.c
> @@ -0,0 +1,43 @@
> +/*
> + * ltt/probes/fs-trace.c
> + *

Perhaps you meant mutex-trace.c ? :)

> + * Mutex tracepoint probes.
> + */
> +
> +#include <linux/module.h>
> +#include <trace/mutex.h>
> +
> +static unsigned long mutex_times[PID_MAX_DEFAULT+1];
> +static int mutex_wakeup[PID_MAX_DEFAULT+1];
> +

What happens if PID_MAX_LIMIT > PID_MAX_DEFAULT ?

What happens when a process ID gets reused (exit/fork reusing a PID) ?

Could you add this information to the task_structs or does it really
have to be separate ?

Mathieu

> +void probe_mutex_lock_contention(int pid, unsigned long time)
> +{
> +	mutex_times[pid] = (unsigned long)time;	
> +
> +	trace_mark_tp(mutex_lock_contention, mutex_lock_contention,
> +		probe_mutex_lock_contention, "pid %d time %lu", pid, time);
> +}
> +
> +void probe_mutex_lock_contention_length(char *name, int pid, unsigned long time)
> +{
> +	unsigned long delay = time - mutex_times[pid];
> +	int reason = mutex_wakeup[pid];
> +
> +	/* FIXME: why is name sometimes null? */
> +	if (!name)
> +		return;
> +
> +	trace_mark_tp(mutex_lock_contention_length, mutex_lock_contention_length,
> +		probe_mutex_lock_contention_length, 
> +		"name %s pid %d delay %lu cause %d", 
> +		name, pid, delay, reason);
> +}
> +
> +void probe_mutex_wakeup(int target_pid, int sender_pid)
> +{
> +	mutex_wakeup[target_pid] = sender_pid;
> +
> +	trace_mark_tp(mutex_wakeup, mutex_wakeup,
> +		probe_mutex_wakeup, "pid %d pid %d", target_pid, sender_pid);
> +}
> +
> 
> 
> # mutex.stp
> 
> probe kernel.mark("mutex_lock_contention") 
> { 
> }
> 
> probe kernel.mark("mutex_wakeup") 
> { 
> }
> 
> probe kernel.mark("mutex_lock_contention_length") 
> { 
> 	name = $arg1
> 	pid = $arg2
> 	delay = $arg3
> 	cause = $arg4
> 
> 	printf("name: %s, pid name: %s, pid: %d, jiffies: %d cause: %d\n", 
> 		name, execname(), pid, delay, cause);
> }
> 

-- 
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

      parent reply	other threads:[~2008-10-16 22:11 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-10-16 21:04 tracepoints for kernel/mutex.c Jason Baron
2008-10-16 21:34 ` Peter Zijlstra
2008-10-16 22:14   ` Thomas Gleixner
2008-10-17  5:09   ` KOSAKI Motohiro
2008-10-17  8:15     ` Peter Zijlstra
2008-10-17  8:22       ` KOSAKI Motohiro
2008-10-17 16:36     ` Mathieu Desnoyers
2008-10-17 14:48   ` Jason Baron
2008-10-17 16:43     ` Mathieu Desnoyers
2008-10-17 16:58       ` Frank Ch. Eigler
2008-10-17 16:48     ` Peter Zijlstra
2008-10-17 16:54       ` Mathieu Desnoyers
2008-10-16 22:10 ` Mathieu Desnoyers [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20081016221046.GD21877@Krystal \
    --to=compudj@krystal.dyndns.org \
    --cc=edwintorok@gmail.com \
    --cc=fche@redhat.com \
    --cc=fweisbec@gmail.com \
    --cc=jbaron@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=peterz@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox