public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Jason Baron <jbaron@redhat.com>
To: linux-kernel@vger.kernel.org
Cc: compudj@krystal.dyndns.org, fche@redhat.com, fweisbec@gmail.com,
	edwintorok@gmail.com, mingo@elte.hu, peterz@infradead.org
Subject: tracepoints for kernel/mutex.c
Date: Thu, 16 Oct 2008 17:04:30 -0400	[thread overview]
Message-ID: <20081016210430.GC25738@redhat.com> (raw)

hi,

Below are 3 tracepoints I've been playing with in kernel/mutex.c using
a SystemTap script. The idea is to detect and determine the cause of
lock contention. Currently I get the following output:

<contended mutex nam> <process name and pid of the contention> <time of
contention> <pid that woke me up(caused the contention?)>

for example:

name: &inode->i_mutex, pid name: udevd, pid: 26567, jiffies: 1 cause:
26566
 

Using SystemTap I can now modify my script to figure out the inode number, the 
file and pathname, or the backtrace and get to the bottom of the
contention.

I think this approach has a number of advantages. It has low
overhead in the off case, since its based on tracepoints. It is
minimally invasive in the code path (3 tracepoints). It also allows me
to explore data structures and parts of the kernel by simply modifying
the SystemTap script. I do not need to re-compile the kernel and reboot.

You can run the script via: $stap mutex.stp

thanks,

-Jason


diff --git a/include/trace/mutex.h b/include/trace/mutex.h
new file mode 100644
index 0000000..8d365d6
--- /dev/null
+++ b/include/trace/mutex.h
@@ -0,0 +1,10 @@
+#ifndef _TRACE_MUTEX_H
+#define _TRACE_MUTEX_H
+
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(mutex_lock_contention, TPPROTO(int pid, unsigned long time), TPARGS(pid, time));
+DEFINE_TRACE(mutex_lock_contention_length, TPPROTO(char *name, int pid, unsigned long time), TPARGS(name, pid, time));
+DEFINE_TRACE(mutex_wakeup, TPPROTO(int target_pid, int sender_pid), TPARGS(target_pid, sender_pid));
+
+#endif
diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c
index 1d94160..847a9fb 100644
--- a/kernel/mutex-debug.c
+++ b/kernel/mutex-debug.c
@@ -97,6 +97,7 @@ void debug_mutex_init(struct mutex *lock, const char *name,
 #endif
 	lock->owner = NULL;
 	lock->magic = lock;
+	lock->name = name;
 }
 
 /***
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 12c779d..62c2aab 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -18,6 +18,7 @@
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
 #include <linux/debug_locks.h>
+#include <trace/mutex.h>
 
 /*
  * In the DEBUG case we are using the "NULL fastpath" for mutexes,
@@ -147,6 +148,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 		goto done;
 
 	lock_contended(&lock->dep_map, ip);
+	trace_mutex_lock_contention((int)task->pid, jiffies);
 
 	for (;;) {
 		/*
@@ -182,7 +184,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 		schedule();
 		spin_lock_mutex(&lock->wait_lock, flags);
 	}
-
+	trace_mutex_lock_contention_length(lock->name, (int)task->pid, jiffies);
 done:
 	lock_acquired(&lock->dep_map);
 	/* got the lock - rejoice! */
@@ -256,7 +258,7 @@ __mutex_unlock_common_slowpath(atomic_t *lock_count, int nested)
 					   struct mutex_waiter, list);
 
 		debug_mutex_wake_waiter(lock, waiter);
-
+		trace_mutex_wakeup(waiter->task->pid, current->pid);
 		wake_up_process(waiter->task);
 	}
 
diff --git a/ltt/probes/Makefile b/ltt/probes/Makefile
index 2ee8a46..0c54d01 100644
--- a/ltt/probes/Makefile
+++ b/ltt/probes/Makefile
@@ -10,7 +10,8 @@ CFLAGS_REMOVE_rcu-trace.o = -pg
 endif
 
 obj-$(CONFIG_LTT_TRACEPROBES)	+= kernel-trace.o mm-trace.o fs-trace.o \
-				ipc-trace.o lockdep-trace.o rcu-trace.o
+				ipc-trace.o lockdep-trace.o rcu-trace.o \
+				mutex-trace.o
 
 ifeq ($(CONFIG_NET),y)
 ifdef CONFIG_FTRACE
diff --git a/ltt/probes/mutex-trace.c b/ltt/probes/mutex-trace.c
new file mode 100644
index 0000000..3b1ebeb
--- /dev/null
+++ b/ltt/probes/mutex-trace.c
@@ -0,0 +1,43 @@
+/*
+ * ltt/probes/fs-trace.c
+ *
+ * Mutex tracepoint probes.
+ */
+
+#include <linux/module.h>
+#include <trace/mutex.h>
+
+static unsigned long mutex_times[PID_MAX_DEFAULT+1];
+static int mutex_wakeup[PID_MAX_DEFAULT+1];
+
+void probe_mutex_lock_contention(int pid, unsigned long time)
+{
+	mutex_times[pid] = (unsigned long)time;	
+
+	trace_mark_tp(mutex_lock_contention, mutex_lock_contention,
+		probe_mutex_lock_contention, "pid %d time %lu", pid, time);
+}
+
+void probe_mutex_lock_contention_length(char *name, int pid, unsigned long time)
+{
+	unsigned long delay = time - mutex_times[pid];
+	int reason = mutex_wakeup[pid];
+
+	/* FIXME: why is name sometimes null? */
+	if (!name)
+		return;
+
+	trace_mark_tp(mutex_lock_contention_length, mutex_lock_contention_length,
+		probe_mutex_lock_contention_length, 
+		"name %s pid %d delay %lu cause %d", 
+		name, pid, delay, reason);
+}
+
+void probe_mutex_wakeup(int target_pid, int sender_pid)
+{
+	mutex_wakeup[target_pid] = sender_pid;
+
+	trace_mark_tp(mutex_wakeup, mutex_wakeup,
+		probe_mutex_wakeup, "pid %d pid %d", target_pid, sender_pid);
+}
+


# mutex.stp

probe kernel.mark("mutex_lock_contention") 
{ 
}

probe kernel.mark("mutex_wakeup") 
{ 
}

probe kernel.mark("mutex_lock_contention_length") 
{ 
	name = $arg1
	pid = $arg2
	delay = $arg3
	cause = $arg4

	printf("name: %s, pid name: %s, pid: %d, jiffies: %d cause: %d\n", 
		name, execname(), pid, delay, cause);
}

             reply	other threads:[~2008-10-16 21:05 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-10-16 21:04 Jason Baron [this message]
2008-10-16 21:34 ` tracepoints for kernel/mutex.c Peter Zijlstra
2008-10-16 22:14   ` Thomas Gleixner
2008-10-17  5:09   ` KOSAKI Motohiro
2008-10-17  8:15     ` Peter Zijlstra
2008-10-17  8:22       ` KOSAKI Motohiro
2008-10-17 16:36     ` Mathieu Desnoyers
2008-10-17 14:48   ` Jason Baron
2008-10-17 16:43     ` Mathieu Desnoyers
2008-10-17 16:58       ` Frank Ch. Eigler
2008-10-17 16:48     ` Peter Zijlstra
2008-10-17 16:54       ` Mathieu Desnoyers
2008-10-16 22:10 ` Mathieu Desnoyers

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20081016210430.GC25738@redhat.com \
    --to=jbaron@redhat.com \
    --cc=compudj@krystal.dyndns.org \
    --cc=edwintorok@gmail.com \
    --cc=fche@redhat.com \
    --cc=fweisbec@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=peterz@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox