All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mathieu Desnoyers <compudj@krystal.dyndns.org>
To: Jason Baron <jbaron@redhat.com>
Cc: linux-kernel@vger.kernel.org, fche@redhat.com,
	fweisbec@gmail.com, edwintorok@gmail.com, mingo@elte.hu,
	peterz@infradead.org
Subject: Re: tracepoints for kernel/mutex.c
Date: Thu, 16 Oct 2008 18:10:46 -0400	[thread overview]
Message-ID: <20081016221046.GD21877@Krystal> (raw)
In-Reply-To: <20081016210430.GC25738@redhat.com>

* Jason Baron (jbaron@redhat.com) wrote:
> hi,
> 
> Below are 3 tracepoints I've been playing with in kernel/mutex.c using
> a SystemTap script. The idea is to detect and determine the cause of
> lock contention. Currently I get the following output:
> 
> <contended mutex nam> <process name and pid of the contention> <time of
> contention> <pid that woke me up(caused the contention?)>
> 
> for example:
> 
> name: &inode->i_mutex, pid name: udevd, pid: 26567, jiffies: 1 cause:
> 26566
>  
> 
> Using SystemTap I can now modify my script to figure out the inode number, the 
> file and pathname, or the backtrace and get to the bottom of the
> contention.
> 
> I think this approach has a number of advantages. It has low
> overhead in the off case, since its based on tracepoints. It is
> minimally invasive in the code path (3 tracepoints). It also allows me
> to explore data structures and parts of the kernel by simply modifying
> the SystemTap script. I do not need to re-compile the kernel and reboot.
> 
> You can run the script via: $stap mutex.stp
> 
> thanks,
> 
> -Jason
> 
> 
> diff --git a/include/trace/mutex.h b/include/trace/mutex.h
> new file mode 100644
> index 0000000..8d365d6
> --- /dev/null
> +++ b/include/trace/mutex.h
> @@ -0,0 +1,10 @@
> +#ifndef _TRACE_MUTEX_H
> +#define _TRACE_MUTEX_H
> +
> +#include <linux/tracepoint.h>
> +
> +DEFINE_TRACE(mutex_lock_contention, TPPROTO(int pid, unsigned long time), TPARGS(pid, time));
> +DEFINE_TRACE(mutex_lock_contention_length, TPPROTO(char *name, int pid, unsigned long time), TPARGS(name, pid, time));
> +DEFINE_TRACE(mutex_wakeup, TPPROTO(int target_pid, int sender_pid), TPARGS(target_pid, sender_pid));
> +
> +#endif
> diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c
> index 1d94160..847a9fb 100644
> --- a/kernel/mutex-debug.c
> +++ b/kernel/mutex-debug.c
> @@ -97,6 +97,7 @@ void debug_mutex_init(struct mutex *lock, const char *name,
>  #endif
>  	lock->owner = NULL;
>  	lock->magic = lock;
> +	lock->name = name;
>  }
>  
>  /***
> diff --git a/kernel/mutex.c b/kernel/mutex.c
> index 12c779d..62c2aab 100644
> --- a/kernel/mutex.c
> +++ b/kernel/mutex.c
> @@ -18,6 +18,7 @@
>  #include <linux/spinlock.h>
>  #include <linux/interrupt.h>
>  #include <linux/debug_locks.h>
> +#include <trace/mutex.h>
>  
>  /*
>   * In the DEBUG case we are using the "NULL fastpath" for mutexes,
> @@ -147,6 +148,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
>  		goto done;
>  
>  	lock_contended(&lock->dep_map, ip);
> +	trace_mutex_lock_contention((int)task->pid, jiffies);
>  
>  	for (;;) {
>  		/*
> @@ -182,7 +184,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
>  		schedule();
>  		spin_lock_mutex(&lock->wait_lock, flags);
>  	}
> -
> +	trace_mutex_lock_contention_length(lock->name, (int)task->pid, jiffies);
>  done:
>  	lock_acquired(&lock->dep_map);
>  	/* got the lock - rejoice! */
> @@ -256,7 +258,7 @@ __mutex_unlock_common_slowpath(atomic_t *lock_count, int nested)
>  					   struct mutex_waiter, list);
>  
>  		debug_mutex_wake_waiter(lock, waiter);
> -
> +		trace_mutex_wakeup(waiter->task->pid, current->pid);
>  		wake_up_process(waiter->task);
>  	}
>  
> diff --git a/ltt/probes/Makefile b/ltt/probes/Makefile
> index 2ee8a46..0c54d01 100644
> --- a/ltt/probes/Makefile
> +++ b/ltt/probes/Makefile
> @@ -10,7 +10,8 @@ CFLAGS_REMOVE_rcu-trace.o = -pg
>  endif
>  
>  obj-$(CONFIG_LTT_TRACEPROBES)	+= kernel-trace.o mm-trace.o fs-trace.o \
> -				ipc-trace.o lockdep-trace.o rcu-trace.o
> +				ipc-trace.o lockdep-trace.o rcu-trace.o \
> +				mutex-trace.o
>  
>  ifeq ($(CONFIG_NET),y)
>  ifdef CONFIG_FTRACE
> diff --git a/ltt/probes/mutex-trace.c b/ltt/probes/mutex-trace.c
> new file mode 100644
> index 0000000..3b1ebeb
> --- /dev/null
> +++ b/ltt/probes/mutex-trace.c
> @@ -0,0 +1,43 @@
> +/*
> + * ltt/probes/fs-trace.c
> + *

Perhaps you meant mutex-trace.c ? :)

> + * Mutex tracepoint probes.
> + */
> +
> +#include <linux/module.h>
> +#include <trace/mutex.h>
> +
> +static unsigned long mutex_times[PID_MAX_DEFAULT+1];
> +static int mutex_wakeup[PID_MAX_DEFAULT+1];
> +

What happens if PID_MAX_LIMIT > PID_MAX_DEFAULT ?

What happens when a process ID gets reused (exit/fork reusing a PID) ?

Could you add this information to the task_structs or does it really
have to be separate ?

Mathieu

> +void probe_mutex_lock_contention(int pid, unsigned long time)
> +{
> +	mutex_times[pid] = (unsigned long)time;	
> +
> +	trace_mark_tp(mutex_lock_contention, mutex_lock_contention,
> +		probe_mutex_lock_contention, "pid %d time %lu", pid, time);
> +}
> +
> +void probe_mutex_lock_contention_length(char *name, int pid, unsigned long time)
> +{
> +	unsigned long delay = time - mutex_times[pid];
> +	int reason = mutex_wakeup[pid];
> +
> +	/* FIXME: why is name sometimes null? */
> +	if (!name)
> +		return;
> +
> +	trace_mark_tp(mutex_lock_contention_length, mutex_lock_contention_length,
> +		probe_mutex_lock_contention_length, 
> +		"name %s pid %d delay %lu cause %d", 
> +		name, pid, delay, reason);
> +}
> +
> +void probe_mutex_wakeup(int target_pid, int sender_pid)
> +{
> +	mutex_wakeup[target_pid] = sender_pid;
> +
> +	trace_mark_tp(mutex_wakeup, mutex_wakeup,
> +		probe_mutex_wakeup, "pid %d pid %d", target_pid, sender_pid);
> +}
> +
> 
> 
> # mutex.stp
> 
> probe kernel.mark("mutex_lock_contention") 
> { 
> }
> 
> probe kernel.mark("mutex_wakeup") 
> { 
> }
> 
> probe kernel.mark("mutex_lock_contention_length") 
> { 
> 	name = $arg1
> 	pid = $arg2
> 	delay = $arg3
> 	cause = $arg4
> 
> 	printf("name: %s, pid name: %s, pid: %d, jiffies: %d cause: %d\n", 
> 		name, execname(), pid, delay, cause);
> }
> 

-- 
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

      parent reply	other threads:[~2008-10-16 22:11 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-10-16 21:04 tracepoints for kernel/mutex.c Jason Baron
2008-10-16 21:34 ` Peter Zijlstra
2008-10-16 22:14   ` Thomas Gleixner
2008-10-17  5:09   ` KOSAKI Motohiro
2008-10-17  8:15     ` Peter Zijlstra
2008-10-17  8:22       ` KOSAKI Motohiro
2008-10-17 16:36     ` Mathieu Desnoyers
2008-10-17 14:48   ` Jason Baron
2008-10-17 16:43     ` Mathieu Desnoyers
2008-10-17 16:58       ` Frank Ch. Eigler
2008-10-17 16:48     ` Peter Zijlstra
2008-10-17 16:54       ` Mathieu Desnoyers
2008-10-16 22:10 ` Mathieu Desnoyers [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20081016221046.GD21877@Krystal \
    --to=compudj@krystal.dyndns.org \
    --cc=edwintorok@gmail.com \
    --cc=fche@redhat.com \
    --cc=fweisbec@gmail.com \
    --cc=jbaron@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=peterz@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.