public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* tracepoints for kernel/mutex.c
@ 2008-10-16 21:04 Jason Baron
  2008-10-16 21:34 ` Peter Zijlstra
  2008-10-16 22:10 ` Mathieu Desnoyers
  0 siblings, 2 replies; 13+ messages in thread
From: Jason Baron @ 2008-10-16 21:04 UTC (permalink / raw)
  To: linux-kernel; +Cc: compudj, fche, fweisbec, edwintorok, mingo, peterz

hi,

Below are 3 tracepoints I've been playing with in kernel/mutex.c using
a SystemTap script. The idea is to detect and determine the cause of
lock contention. Currently I get the following output:

<contended mutex nam> <process name and pid of the contention> <time of
contention> <pid that woke me up(caused the contention?)>

for example:

name: &inode->i_mutex, pid name: udevd, pid: 26567, jiffies: 1 cause:
26566
 

Using SystemTap I can now modify my script to figure out the inode number, the 
file and pathname, or the backtrace and get to the bottom of the
contention.

I think this approach has a number of advantages. It has low
overhead in the off case, since its based on tracepoints. It is
minimally invasive in the code path (3 tracepoints). It also allows me
to explore data structures and parts of the kernel by simply modifying
the SystemTap script. I do not need to re-compile the kernel and reboot.

You can run the script via: $stap mutex.stp

thanks,

-Jason


diff --git a/include/trace/mutex.h b/include/trace/mutex.h
new file mode 100644
index 0000000..8d365d6
--- /dev/null
+++ b/include/trace/mutex.h
@@ -0,0 +1,10 @@
+#ifndef _TRACE_MUTEX_H
+#define _TRACE_MUTEX_H
+
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(mutex_lock_contention, TPPROTO(int pid, unsigned long time), TPARGS(pid, time));
+DEFINE_TRACE(mutex_lock_contention_length, TPPROTO(char *name, int pid, unsigned long time), TPARGS(name, pid, time));
+DEFINE_TRACE(mutex_wakeup, TPPROTO(int target_pid, int sender_pid), TPARGS(target_pid, sender_pid));
+
+#endif
diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c
index 1d94160..847a9fb 100644
--- a/kernel/mutex-debug.c
+++ b/kernel/mutex-debug.c
@@ -97,6 +97,7 @@ void debug_mutex_init(struct mutex *lock, const char *name,
 #endif
 	lock->owner = NULL;
 	lock->magic = lock;
+	lock->name = name;
 }
 
 /***
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 12c779d..62c2aab 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -18,6 +18,7 @@
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
 #include <linux/debug_locks.h>
+#include <trace/mutex.h>
 
 /*
  * In the DEBUG case we are using the "NULL fastpath" for mutexes,
@@ -147,6 +148,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 		goto done;
 
 	lock_contended(&lock->dep_map, ip);
+	trace_mutex_lock_contention((int)task->pid, jiffies);
 
 	for (;;) {
 		/*
@@ -182,7 +184,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 		schedule();
 		spin_lock_mutex(&lock->wait_lock, flags);
 	}
-
+	trace_mutex_lock_contention_length(lock->name, (int)task->pid, jiffies);
 done:
 	lock_acquired(&lock->dep_map);
 	/* got the lock - rejoice! */
@@ -256,7 +258,7 @@ __mutex_unlock_common_slowpath(atomic_t *lock_count, int nested)
 					   struct mutex_waiter, list);
 
 		debug_mutex_wake_waiter(lock, waiter);
-
+		trace_mutex_wakeup(waiter->task->pid, current->pid);
 		wake_up_process(waiter->task);
 	}
 
diff --git a/ltt/probes/Makefile b/ltt/probes/Makefile
index 2ee8a46..0c54d01 100644
--- a/ltt/probes/Makefile
+++ b/ltt/probes/Makefile
@@ -10,7 +10,8 @@ CFLAGS_REMOVE_rcu-trace.o = -pg
 endif
 
 obj-$(CONFIG_LTT_TRACEPROBES)	+= kernel-trace.o mm-trace.o fs-trace.o \
-				ipc-trace.o lockdep-trace.o rcu-trace.o
+				ipc-trace.o lockdep-trace.o rcu-trace.o \
+				mutex-trace.o
 
 ifeq ($(CONFIG_NET),y)
 ifdef CONFIG_FTRACE
diff --git a/ltt/probes/mutex-trace.c b/ltt/probes/mutex-trace.c
new file mode 100644
index 0000000..3b1ebeb
--- /dev/null
+++ b/ltt/probes/mutex-trace.c
@@ -0,0 +1,43 @@
+/*
+ * ltt/probes/fs-trace.c
+ *
+ * Mutex tracepoint probes.
+ */
+
+#include <linux/module.h>
+#include <trace/mutex.h>
+
+static unsigned long mutex_times[PID_MAX_DEFAULT+1];
+static int mutex_wakeup[PID_MAX_DEFAULT+1];
+
+void probe_mutex_lock_contention(int pid, unsigned long time)
+{
+	mutex_times[pid] = (unsigned long)time;	
+
+	trace_mark_tp(mutex_lock_contention, mutex_lock_contention,
+		probe_mutex_lock_contention, "pid %d time %lu", pid, time);
+}
+
+void probe_mutex_lock_contention_length(char *name, int pid, unsigned long time)
+{
+	unsigned long delay = time - mutex_times[pid];
+	int reason = mutex_wakeup[pid];
+
+	/* FIXME: why is name sometimes null? */
+	if (!name)
+		return;
+
+	trace_mark_tp(mutex_lock_contention_length, mutex_lock_contention_length,
+		probe_mutex_lock_contention_length, 
+		"name %s pid %d delay %lu cause %d", 
+		name, pid, delay, reason);
+}
+
+void probe_mutex_wakeup(int target_pid, int sender_pid)
+{
+	mutex_wakeup[target_pid] = sender_pid;
+
+	trace_mark_tp(mutex_wakeup, mutex_wakeup,
+		probe_mutex_wakeup, "pid %d pid %d", target_pid, sender_pid);
+}
+


# mutex.stp

probe kernel.mark("mutex_lock_contention") 
{ 
}

probe kernel.mark("mutex_wakeup") 
{ 
}

probe kernel.mark("mutex_lock_contention_length") 
{ 
	name = $arg1
	pid = $arg2
	delay = $arg3
	cause = $arg4

	printf("name: %s, pid name: %s, pid: %d, jiffies: %d cause: %d\n", 
		name, execname(), pid, delay, cause);
}

^ permalink raw reply related	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2008-10-17 16:59 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-10-16 21:04 tracepoints for kernel/mutex.c Jason Baron
2008-10-16 21:34 ` Peter Zijlstra
2008-10-16 22:14   ` Thomas Gleixner
2008-10-17  5:09   ` KOSAKI Motohiro
2008-10-17  8:15     ` Peter Zijlstra
2008-10-17  8:22       ` KOSAKI Motohiro
2008-10-17 16:36     ` Mathieu Desnoyers
2008-10-17 14:48   ` Jason Baron
2008-10-17 16:43     ` Mathieu Desnoyers
2008-10-17 16:58       ` Frank Ch. Eigler
2008-10-17 16:48     ` Peter Zijlstra
2008-10-17 16:54       ` Mathieu Desnoyers
2008-10-16 22:10 ` Mathieu Desnoyers

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox