From: Steven Rostedt <rostedt@goodmis.org>
To: LKML <linux-kernel@vger.kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>,
Linus Torvalds <torvalds@linux-foundation.org>,
Andrew Morton <akpm@linux-foundation.org>,
Peter Zijlstra <a.p.zijlstra@chello.nl>,
Christoph Hellwig <hch@infradead.org>,
Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>,
Gregory Haskins <ghaskins@novell.com>,
Arnaldo Carvalho de Melo <acme@ghostprotocols.net>,
Thomas Gleixner <tglx@linutronix.de>,
Tim Bird <tim.bird@am.sony.com>, Sam Ravnborg <sam@ravnborg.org>,
"Frank Ch. Eigler" <fche@redhat.com>,
Jan Kiszka <jan.kiszka@siemens.com>,
John Stultz <johnstul@us.ibm.com>,
Steven Rostedt <srostedt@redhat.com>
Subject: [RFC PATCH 17/23 -v4] Add tracing of context switches
Date: Mon, 21 Jan 2008 10:22:48 -0500 [thread overview]
Message-ID: <20080121152353.599676730@goodmis.org> (raw)
In-Reply-To: 20080121152231.579118762@goodmis.org
[-- Attachment #1: trace-add-cmdline-switch.patch --]
[-- Type: text/plain, Size: 11526 bytes --]
This patch adds context switch tracing, of the format of:
_------=> CPU#
/ _-----=> irqs-off
| / _----=> need-resched
|| / _---=> hardirq/softirq
||| / _--=> preempt-depth
|||| /
||||| delay
cmd pid ||||| time | caller
\ / ||||| \ | /
swapper-0 1d..3 137us+: 0:140:R --> 2912:120
sshd-2912 1d..3 216us+: 2912:120:S --> 0:140
swapper-0 1d..3 261us+: 0:140:R --> 2912:120
bash-2920 0d..3 267us+: 2920:120:S --> 0:140
sshd-2912 1d..3 330us!: 2912:120:S --> 0:140
swapper-0 1d..3 2389us+: 0:140:R --> 2847:120
yum-upda-2847 1d..3 2411us!: 2847:120:S --> 0:140
swapper-0 0d..3 11089us+: 0:140:R --> 3139:120
gdm-bina-3139 0d..3 11113us!: 3139:120:S --> 0:140
swapper-0 1d..3 102328us+: 0:140:R --> 2847:120
yum-upda-2847 1d..3 102348us!: 2847:120:S --> 0:140
Two files are added to /debugfs/tracing
sched_trace - outputs the above format.
sched_trace_ctrl
0 - turns off context switch tracing.
1 - turns on context switch tracing.
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
---
lib/tracing/Kconfig | 10 +
lib/tracing/Makefile | 1
lib/tracing/trace_sched_switch.c | 205 +++++++++++++++++++++++++++++++++++++++
lib/tracing/tracer.c | 32 ++++++
lib/tracing/tracer.h | 18 +++
5 files changed, 265 insertions(+), 1 deletion(-)
Index: linux-mcount.git/lib/tracing/Kconfig
===================================================================
--- linux-mcount.git.orig/lib/tracing/Kconfig 2008-01-21 09:59:15.000000000 -0500
+++ linux-mcount.git/lib/tracing/Kconfig 2008-01-21 10:03:57.000000000 -0500
@@ -24,3 +24,13 @@ config FUNCTION_TRACER
insert a call to an architecture specific __mcount routine,
that the debugging mechanism using this facility will hook by
providing a set of inline routines.
+
+config CONTEXT_SWITCH_TRACER
+ bool "Trace process context switches"
+ depends on DEBUG_KERNEL
+ default n
+ select TRACING
+ help
+ This tracer hooks into the context switch and records
+ all switching of tasks.
+
Index: linux-mcount.git/lib/tracing/Makefile
===================================================================
--- linux-mcount.git.orig/lib/tracing/Makefile 2008-01-21 09:59:15.000000000 -0500
+++ linux-mcount.git/lib/tracing/Makefile 2008-01-21 10:03:57.000000000 -0500
@@ -1,6 +1,7 @@
obj-$(CONFIG_MCOUNT) += libmcount.o
obj-$(CONFIG_TRACING) += tracer.o
+obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
obj-$(CONFIG_FUNCTION_TRACER) += trace_function.o
libmcount-y := mcount.o
Index: linux-mcount.git/lib/tracing/trace_sched_switch.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-mcount.git/lib/tracing/trace_sched_switch.c 2008-01-21 10:03:57.000000000 -0500
@@ -0,0 +1,205 @@
+/*
+ * trace context switch
+ *
+ * Copyright (C) 2007 Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+#include <linux/kallsyms.h>
+#include <linux/uaccess.h>
+#include <linux/marker.h>
+#include <linux/mcount.h>
+
+#include "tracer.h"
+
+static struct tracing_trace sched_switch_trace __read_mostly;
+static DEFINE_PER_CPU(struct tracing_trace_cpu, sched_switch_trace_cpu);
+
+static int trace_enabled __read_mostly;
+static unsigned long trace_nr_entries = (16384UL);
+
+static int __init set_nr_entries(char *str)
+{
+ if (!str)
+ return 0;
+ trace_nr_entries = simple_strtoul(str, &str, 0);
+ return 1;
+}
+__setup("trace_ctx_entries=", set_nr_entries);
+
+static notrace void sched_switch_callback(const struct marker *mdata,
+ void *private_data,
+ const char *format, ...)
+{
+ struct tracing_trace *tr = mdata->private;
+ struct tracing_trace_cpu *data;
+ struct task_struct *prev;
+ struct task_struct *next;
+ unsigned long flags;
+ va_list ap;
+ int cpu;
+
+ if (likely(!trace_enabled))
+ return;
+
+ va_start(ap, format);
+ prev = va_arg(ap, typeof(prev));
+ next = va_arg(ap, typeof(next));
+ va_end(ap);
+
+ raw_local_irq_save(flags);
+ cpu = raw_smp_processor_id();
+ data = tr->data[cpu];
+ atomic_inc(&data->disabled);
+
+ if (likely(atomic_read(&data->disabled) == 1))
+ tracing_sched_switch_trace(tr, data, prev, next, flags);
+
+ atomic_dec(&data->disabled);
+ raw_local_irq_restore(flags);
+}
+
+static notrace void sched_switch_reset(struct tracing_trace *tr)
+{
+ int cpu;
+
+ tr->time_start = now();
+
+ for_each_online_cpu(cpu)
+ tracing_reset(tr->data[cpu]);
+}
+
+#ifdef CONFIG_DEBUG_FS
+static void sched_switch_trace_ctrl_update(struct tracing_trace *tr,
+ unsigned long val)
+{
+ val = !!val;
+
+ /* When starting a new trace, reset the buffers */
+ if (val)
+ sched_switch_reset(tr);
+
+ if (tr->ctrl ^ val) {
+ if (val)
+ trace_enabled = 1;
+ else
+ trace_enabled = 0;
+ tr->ctrl = val;
+ }
+}
+
+static __init void sched_switch_trace_init_debugfs(void)
+{
+ struct dentry *d_tracer;
+ struct dentry *entry;
+
+ d_tracer = tracing_init_dentry();
+
+ sched_switch_trace.ctrl_update = sched_switch_trace_ctrl_update;
+
+ entry = debugfs_create_file("sched_trace_ctrl", 0644, d_tracer,
+ &sched_switch_trace, &tracing_ctrl_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs "
+ "'sched_trace_ctrl' entry\n");
+
+ entry = debugfs_create_file("sched_trace", 0444, d_tracer,
+ &sched_switch_trace, &tracing_lt_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs 'sched_trace' entry\n");
+}
+
+#else
+static __init void sched_switch_trace_init_debugfs(void)
+{
+ /*
+ * No way to turn on or off the trace function
+ * without debugfs, so we just turn it on.
+ */
+}
+#endif
+
+static void sched_switch_trace_open(struct tracing_iterator *iter)
+{
+ /* stop the trace while dumping */
+ if (iter->tr->ctrl)
+ trace_enabled = 0;
+}
+
+static void sched_switch_trace_close(struct tracing_iterator *iter)
+{
+ if (iter->tr->ctrl)
+ trace_enabled = 1;
+}
+
+__init static int sched_switch_trace_alloc_buffers(void)
+{
+ const int order = page_order(trace_nr_entries * TRACING_ENTRY_SIZE);
+ const unsigned long size = (1UL << order) << PAGE_SHIFT;
+ struct tracing_entry *array;
+ int ret;
+ int i;
+
+ for_each_possible_cpu(i) {
+ sched_switch_trace.data[i] =
+ &per_cpu(sched_switch_trace_cpu, i);
+ array = (struct tracing_entry *)
+ __get_free_pages(GFP_KERNEL, order);
+ if (array == NULL) {
+ printk(KERN_ERR "sched_switch tracer: failed to"
+ " allocate %ld bytes for trace buffer!\n", size);
+ goto free_buffers;
+ }
+ sched_switch_trace.data[i]->trace = array;
+ }
+
+ /*
+ * Since we allocate by orders of pages, we may be able to
+ * round up a bit.
+ */
+ sched_switch_trace.entries = size / TRACING_ENTRY_SIZE;
+
+ pr_info("sched_switch tracer: %ld bytes allocated for %ld",
+ size, trace_nr_entries);
+ pr_info(" entries of %ld bytes\n", (long)TRACING_ENTRY_SIZE);
+ pr_info(" actual entries %ld\n", sched_switch_trace.entries);
+
+ sched_switch_trace_init_debugfs();
+
+ sched_switch_trace.open = sched_switch_trace_open;
+ sched_switch_trace.close = sched_switch_trace_close;
+
+ ret = marker_probe_register("kernel_sched_schedule",
+ "prev %p next %p",
+ sched_switch_callback,
+ &sched_switch_trace);
+ if (ret) {
+ pr_info("sched trace: Couldn't add marker"
+ " probe to switch_to\n");
+ goto out;
+ }
+
+ ret = marker_arm("kernel_sched_schedule");
+ if (ret) {
+ pr_info("sched trace: Couldn't arm probe switch_to\n");
+ goto out;
+ }
+
+ out:
+ return 0;
+
+ free_buffers:
+ for (i-- ; i >= 0; i--) {
+ struct tracing_trace_cpu *data = sched_switch_trace.data[i];
+
+ if (data && data->trace) {
+ free_pages((unsigned long)data->trace, order);
+ data->trace = NULL;
+ }
+ }
+ return -ENOMEM;
+}
+
+device_initcall(sched_switch_trace_alloc_buffers);
Index: linux-mcount.git/lib/tracing/tracer.c
===================================================================
--- linux-mcount.git.orig/lib/tracing/tracer.c 2008-01-21 10:01:49.000000000 -0500
+++ linux-mcount.git/lib/tracing/tracer.c 2008-01-21 10:03:57.000000000 -0500
@@ -31,6 +31,7 @@ enum trace_type {
__TRACE_FIRST_TYPE = 0,
TRACE_FN,
+ TRACE_CTX,
__TRACE_LAST_TYPE
};
@@ -107,6 +108,24 @@ notrace void tracing_function_trace(stru
entry->fn.parent_ip = parent_ip;
}
+notrace void tracing_sched_switch_trace(struct tracing_trace *tr,
+ struct tracing_trace_cpu *data,
+ struct task_struct *prev,
+ struct task_struct *next,
+ unsigned long flags)
+{
+ struct tracing_entry *entry;
+
+ entry = tracing_get_trace_entry(tr, data);
+ tracing_generic_entry_update(entry, flags);
+ entry->type = TRACE_CTX;
+ entry->ctx.prev_pid = prev->pid;
+ entry->ctx.prev_prio = prev->prio;
+ entry->ctx.prev_state = prev->state;
+ entry->ctx.next_pid = next->pid;
+ entry->ctx.next_prio = next->prio;
+}
+
#ifdef CONFIG_DEBUG_FS
enum trace_iterator {
TRACE_ITER_SYM_ONLY = 1,
@@ -414,6 +433,8 @@ lat_print_timestamp(struct seq_file *m,
seq_puts(m, " : ");
}
+static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
+
static void notrace
print_lat_fmt(struct seq_file *m, struct tracing_iterator *iter,
unsigned int trace_idx, int cpu)
@@ -424,6 +445,7 @@ print_lat_fmt(struct seq_file *m, struct
unsigned long rel_usecs;
int sym_only = !!(trace_flags & TRACE_ITER_SYM_ONLY);
int verbose = !!(trace_flags & TRACE_ITER_VERBOSE);
+ int S;
if (!next_entry)
next_entry = entry;
@@ -450,6 +472,16 @@ print_lat_fmt(struct seq_file *m, struct
seq_print_ip_sym(m, entry->fn.parent_ip, sym_only);
seq_puts(m, ")\n");
break;
+ case TRACE_CTX:
+ S = entry->ctx.prev_state < sizeof(state_to_char) ?
+ state_to_char[entry->ctx.prev_state] : 'X';
+ seq_printf(m, " %d:%d:%c --> %d:%d\n",
+ entry->ctx.prev_pid,
+ entry->ctx.prev_prio,
+ S,
+ entry->ctx.next_pid,
+ entry->ctx.next_prio);
+ break;
}
}
Index: linux-mcount.git/lib/tracing/tracer.h
===================================================================
--- linux-mcount.git.orig/lib/tracing/tracer.h 2008-01-21 09:59:15.000000000 -0500
+++ linux-mcount.git/lib/tracing/tracer.h 2008-01-21 10:03:57.000000000 -0500
@@ -10,6 +10,14 @@ struct tracing_function {
unsigned long parent_ip;
};
+struct tracing_sched_switch {
+ unsigned int prev_pid;
+ unsigned char prev_prio;
+ unsigned char prev_state;
+ unsigned int next_pid;
+ unsigned char next_prio;
+};
+
struct tracing_entry {
char type;
char cpu; /* who will want to trace more than 256 CPUS? */
@@ -18,7 +26,10 @@ struct tracing_entry {
int pid;
cycle_t t;
char comm[TASK_COMM_LEN];
- struct tracing_function fn;
+ union {
+ struct tracing_function fn;
+ struct tracing_sched_switch ctx;
+ };
};
struct tracing_trace_cpu {
@@ -75,6 +86,11 @@ void tracing_function_trace(struct traci
unsigned long ip,
unsigned long parent_ip,
unsigned long flags);
+void tracing_sched_switch_trace(struct tracing_trace *tr,
+ struct tracing_trace_cpu *data,
+ struct task_struct *prev,
+ struct task_struct *next,
+ unsigned long flags);
extern struct file_operations tracing_fops;
extern struct file_operations tracing_lt_fops;
--
next prev parent reply other threads:[~2008-01-21 15:30 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-01-21 15:22 [RFC PATCH 00/23 -v4] mcount and latency tracing utility -v4 Steven Rostedt
2008-01-21 15:22 ` [RFC PATCH 01/23 -v4] Add basic support for gcc profiler instrumentation Steven Rostedt
2008-01-21 15:22 ` [RFC PATCH 02/23 -v4] Annotate core code that should not be traced Steven Rostedt
2008-01-21 15:22 ` [RFC PATCH 03/23 -v4] x86_64: notrace annotations Steven Rostedt
2008-01-21 15:22 ` [RFC PATCH 04/23 -v4] add notrace annotations to vsyscall Steven Rostedt
2008-01-21 15:22 ` [RFC PATCH 05/23 -v4] add notrace annotations for NMI routines Steven Rostedt
2008-01-21 15:22 ` [RFC PATCH 06/23 -v4] handle accurate time keeping over long delays Steven Rostedt
2008-01-21 15:22 ` [RFC PATCH 07/23 -v4] ppc clock accumulate fix Steven Rostedt
2008-01-21 15:22 ` [RFC PATCH 08/23 -v4] Fixup merge between xtime_cache and timkkeeping starvation fix Steven Rostedt
2008-01-21 15:22 ` [RFC PATCH 09/23 -v4] time keeping add cycle_raw for actual incrementation Steven Rostedt
2008-01-21 15:22 ` [RFC PATCH 10/23 -v4] initialize the clock source to jiffies clock Steven Rostedt
2008-01-21 15:22 ` [RFC PATCH 11/23 -v4] add get_monotonic_cycles Steven Rostedt
2008-01-21 15:22 ` [RFC PATCH 12/23 -v4] Use RCU algorithm for monotonic cycles Steven Rostedt
2008-01-22 0:20 ` Nick Piggin
2008-01-22 0:49 ` Steven Rostedt
2008-01-21 15:22 ` [RFC PATCH 13/23 -v4] add notrace annotations to timing events Steven Rostedt
2008-01-21 15:22 ` [RFC PATCH 14/23 -v4] mcount based trace in the form of a header file library Steven Rostedt
2008-01-21 15:22 ` [RFC PATCH 15/23 -v4] Add context switch marker to sched.c Steven Rostedt
2008-01-21 15:22 ` [RFC PATCH 16/23 -v4] Make the task State char-string visible to all Steven Rostedt
2008-01-21 15:22 ` Steven Rostedt [this message]
2008-01-21 15:22 ` [RFC PATCH 18/23 -v4] Generic command line storage Steven Rostedt
2008-01-21 15:22 ` [RFC PATCH 19/23 -v4] trace generic call to schedule switch Steven Rostedt
2008-01-21 15:22 ` [RFC PATCH 20/23 -v4] Add marker in try_to_wake_up Steven Rostedt
2008-01-21 15:22 ` [RFC PATCH 21/23 -v4] mcount tracer for wakeup latency timings Steven Rostedt
2008-01-21 15:22 ` [RFC PATCH 22/23 -v4] Trace irq disabled critical timings Steven Rostedt
2008-01-21 15:22 ` [RFC PATCH 23/23 -v4] trace preempt off " Steven Rostedt
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20080121152353.599676730@goodmis.org \
--to=rostedt@goodmis.org \
--cc=a.p.zijlstra@chello.nl \
--cc=acme@ghostprotocols.net \
--cc=akpm@linux-foundation.org \
--cc=fche@redhat.com \
--cc=ghaskins@novell.com \
--cc=hch@infradead.org \
--cc=jan.kiszka@siemens.com \
--cc=johnstul@us.ibm.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mathieu.desnoyers@polymtl.ca \
--cc=mingo@elte.hu \
--cc=sam@ravnborg.org \
--cc=srostedt@redhat.com \
--cc=tglx@linutronix.de \
--cc=tim.bird@am.sony.com \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.