public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Steven Rostedt <rostedt@goodmis.org>
To: LKML <linux-kernel@vger.kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>,
	Christoph Hellwig <hch@infradead.org>,
	Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>,
	Gregory Haskins <ghaskins@novell.com>,
	Arnaldo Carvalho de Melo <acme@ghostprotocols.net>,
	Thomas Gleixner <tglx@linutronix.de>,
	Tim Bird <tim.bird@am.sony.com>, Sam Ravnborg <sam@ravnborg.org>,
	"Frank Ch. Eigler" <fche@redhat.com>,
	Jan Kiszka <jan.kiszka@siemens.com>,
	Steven Rostedt <srostedt@redhat.com>
Subject: [RFC PATCH 27/30 v3] Add tracing of context switches
Date: Tue, 15 Jan 2008 15:49:34 -0500	[thread overview]
Message-ID: <20080115205026.402021623@goodmis.org> (raw)
In-Reply-To: 20080115204907.838227723@goodmis.org

[-- Attachment #1: trace-add-cmdline-switch.patch --]
[-- Type: text/plain, Size: 11224 bytes --]

This patch adds context switch tracing, of the format of:

                 _------=> CPU#            
                / _-----=> irqs-off        
               | / _----=> need-resched    
               || / _---=> hardirq/softirq 
               ||| / _--=> preempt-depth   
               |||| /                      
               |||||     delay             
   cmd     pid ||||| time  |   caller      
      \   /    |||||   \   |   /           
  <idle>-0     1d..3  361us!:  0:140:R --> 5028
    bash-5036  0d..3  552us+:  5036:120:D --> 0
  <idle>-0     0d..3  606us!:  0:140:R --> 5036
    sshd-5028  1d..3  943us!:  5028:120:S --> 0
  <idle>-0     1d..3 1316us+:  0:140:R --> 5028
    bash-5036  0d..3 1388us!:  5036:120:S --> 0
    sshd-5028  1d..3 1772us!:  5028:120:S --> 0

Two files are added to /debugfs/tracing

  sched_trace - outputs the above format.

  sched_trace_ctrl
     0 - turns off context switch tracing.
     1 - turns on context switch tracing.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
---
 lib/tracing/Kconfig              |   10 +
 lib/tracing/Makefile             |    1 
 lib/tracing/trace_sched_switch.c |  205 +++++++++++++++++++++++++++++++++++++++
 lib/tracing/tracer.c             |   31 +++++
 lib/tracing/tracer.h             |   18 +++
 5 files changed, 264 insertions(+), 1 deletion(-)

Index: linux-compile.git/lib/tracing/Kconfig
===================================================================
--- linux-compile.git.orig/lib/tracing/Kconfig	2008-01-15 11:13:54.000000000 -0500
+++ linux-compile.git/lib/tracing/Kconfig	2008-01-15 11:15:00.000000000 -0500
@@ -42,3 +42,13 @@ config CRITICAL_IRQSOFF_TIMING
 
 	      echo 0 > /debug/tracing/preempt_max_latency
 
+config CONTEXT_SWITCH_TRACER
+	bool "Trace process context switches"
+	depends on DEBUG_KERNEL
+	default n
+	select TRACING
+	select MARKERS
+	help
+	  This tracer hooks into the context switch and records
+	  all switching of tasks.
+
Index: linux-compile.git/lib/tracing/Makefile
===================================================================
--- linux-compile.git.orig/lib/tracing/Makefile	2008-01-15 11:12:31.000000000 -0500
+++ linux-compile.git/lib/tracing/Makefile	2008-01-15 11:14:44.000000000 -0500
@@ -1,6 +1,7 @@
 obj-$(CONFIG_MCOUNT) += libmcount.o
 
 obj-$(CONFIG_TRACING) += tracer.o
+obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
 obj-$(CONFIG_FUNCTION_TRACER) += trace_function.o
 obj-$(CONFIG_CRITICAL_IRQSOFF_TIMING) += trace_irqsoff.o
 
Index: linux-compile.git/lib/tracing/trace_sched_switch.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-compile.git/lib/tracing/trace_sched_switch.c	2008-01-15 11:14:44.000000000 -0500
@@ -0,0 +1,205 @@
+/*
+ * trace context switch
+ *
+ * Copyright (C) 2007 Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+#include <linux/kallsyms.h>
+#include <linux/uaccess.h>
+#include <linux/marker.h>
+#include <linux/mcount.h>
+
+#include "tracer.h"
+
+static struct tracing_trace sched_switch_trace __read_mostly;
+static DEFINE_PER_CPU(struct tracing_trace_cpu, sched_switch_trace_cpu);
+
+static int trace_enabled __read_mostly;
+
+static notrace void sched_switch_callback(const struct marker *mdata,
+					  void *private_data,
+					  const char *format, ...)
+{
+	struct tracing_trace *tr = mdata->private;
+	struct tracing_trace_cpu *data;
+	struct task_struct *prev = current;
+	int prev_pid, next_pid;
+	unsigned long flags;
+	va_list ap;
+	int cpu;
+
+	if (!trace_enabled)
+		return;
+
+	va_start(ap, format);
+
+	/* just use prev pointer and grab next. */
+	prev_pid = va_arg(ap, typeof(prev_pid));
+
+	/*
+	 * Unfortunately we are limited to just the
+	 * next_pid, and the marker doesn't give us
+	 * next itself.
+	 */
+	next_pid = va_arg(ap, typeof(next_pid));
+
+	/* Ignore prev_state, since we get that from prev itself */
+	va_end(ap);
+
+	raw_local_irq_save(flags);
+	cpu = raw_smp_processor_id();
+	data = tr->data[cpu];
+	atomic_inc(&data->disabled);
+
+	if (likely(atomic_read(&data->disabled) == 1))
+		tracing_sched_switch_trace(tr, data, prev, next_pid, flags);
+
+	atomic_dec(&data->disabled);
+	raw_local_irq_restore(flags);
+}
+
+static notrace void sched_switch_reset(struct tracing_trace *tr)
+{
+	int cpu;
+
+	tr->time_start = now();
+
+	for_each_online_cpu(cpu)
+		tracing_reset(tr->data[cpu]);
+}
+
+#ifdef CONFIG_DEBUG_FS
+static void sched_switch_trace_ctrl_update(struct tracing_trace *tr,
+					   unsigned long val)
+{
+	val = !!val;
+
+	/* When starting a new trace, reset the buffers */
+	if (val)
+		sched_switch_reset(tr);
+
+	if (tr->ctrl ^ val) {
+		if (val)
+			trace_enabled = 1;
+		else
+			trace_enabled = 0;
+		tr->ctrl = val;
+	}
+}
+
+static __init void sched_switch_trace_init_debugfs(void)
+{
+	struct dentry *d_tracer;
+	struct dentry *entry;
+
+	d_tracer = tracing_init_dentry();
+
+	sched_switch_trace.ctrl_update = sched_switch_trace_ctrl_update;
+
+	entry = debugfs_create_file("sched_trace_ctrl", 0644, d_tracer,
+				    &sched_switch_trace, &tracing_ctrl_fops);
+	if (!entry)
+		pr_warning("Could not create debugfs "
+			   "'sched_trace_ctrl' entry\n");
+
+	entry = debugfs_create_file("sched_trace", 0444, d_tracer,
+				    &sched_switch_trace, &tracing_lt_fops);
+	if (!entry)
+		pr_warning("Could not create debugfs 'sched_trace' entry\n");
+}
+
+#else
+static __init void sched_switch_trace_init_debugfs(void)
+{
+	/*
+	 * No way to turn on or off the trace function
+	 * without debugfs, so we just turn it on.
+	 */
+}
+#endif
+
+static void sched_switch_trace_open(struct tracing_iterator *iter)
+{
+	/* stop the trace while dumping */
+	if (iter->tr->ctrl)
+		trace_enabled = 0;
+}
+
+static void sched_switch_trace_close(struct tracing_iterator *iter)
+{
+	if (iter->tr->ctrl)
+		trace_enabled = 1;
+}
+
+__init static int sched_switch_trace_alloc_buffers(void)
+{
+	const int order = page_order(TRACING_NR_ENTRIES * TRACING_ENTRY_SIZE);
+	const unsigned long size = (1UL << order) << PAGE_SHIFT;
+	struct tracing_entry *array;
+	int ret;
+	int i;
+
+	for_each_possible_cpu(i) {
+		sched_switch_trace.data[i] =
+			&per_cpu(sched_switch_trace_cpu, i);
+		array = (struct tracing_entry *)
+			  __get_free_pages(GFP_KERNEL, order);
+		if (array == NULL) {
+			printk(KERN_ERR "sched_switch tracer: failed to"
+			       " allocate %ld bytes for trace buffer!\n", size);
+			goto free_buffers;
+		}
+		sched_switch_trace.data[i]->trace = array;
+	}
+
+	/*
+	 * Since we allocate by orders of pages, we may be able to
+	 * round up a bit.
+	 */
+	sched_switch_trace.entries = size / TRACING_ENTRY_SIZE;
+
+	pr_info("sched_switch tracer: %ld bytes allocated for %ld",
+		size, TRACING_NR_ENTRIES);
+	pr_info(" entries of %ld bytes\n", (long)TRACING_ENTRY_SIZE);
+	pr_info("   actual entries %ld\n", sched_switch_trace.entries);
+
+	sched_switch_trace_init_debugfs();
+
+	sched_switch_trace.open = sched_switch_trace_open;
+	sched_switch_trace.close = sched_switch_trace_close;
+
+	ret = marker_probe_register("kernel_sched_schedule",
+				    "prev_pid %d next_pid %d prev_state %ld",
+				    sched_switch_callback,
+				    &sched_switch_trace);
+	if (ret) {
+		pr_info("sched trace: Couldn't add marker"
+			" probe to switch_to\n");
+		goto out;
+	}
+
+	ret = marker_arm("kernel_sched_schedule");
+	if (ret) {
+		pr_info("sched trace: Couldn't arm probe switch_to\n");
+		goto out;
+	}
+
+ out:
+	return 0;
+
+ free_buffers:
+	for (i-- ; i >= 0; i--) {
+		struct tracing_trace_cpu *data = sched_switch_trace.data[i];
+
+		if (data && data->trace) {
+			free_pages((unsigned long)data->trace, order);
+			data->trace = NULL;
+		}
+	}
+	return -ENOMEM;
+}
+
+device_initcall(sched_switch_trace_alloc_buffers);
Index: linux-compile.git/lib/tracing/tracer.c
===================================================================
--- linux-compile.git.orig/lib/tracing/tracer.c	2008-01-15 11:12:31.000000000 -0500
+++ linux-compile.git/lib/tracing/tracer.c	2008-01-15 11:14:44.000000000 -0500
@@ -31,6 +31,7 @@ enum trace_type
 	__TRACE_FIRST_TYPE = 0,
 
 	TRACE_FN,
+	TRACE_CTX,
 
 	__TRACE_LAST_TYPE
 };
@@ -107,6 +108,24 @@ notrace void tracing_function_trace(stru
 	entry->fn.parent_ip = parent_ip;
 }
 
+notrace void tracing_sched_switch_trace(struct tracing_trace *tr,
+					struct tracing_trace_cpu *data,
+					struct task_struct *prev,
+					int next_pid,
+					unsigned long flags)
+{
+	struct tracing_entry *entry;
+
+	entry = tracing_get_trace_entry(tr, data);
+	tracing_generic_entry_update(entry, flags);
+	entry->type		= TRACE_CTX;
+	entry->ctx.prev_pid	= prev->pid;
+	entry->ctx.prev_prio	= prev->prio;
+	entry->ctx.prev_state	= prev->state;
+	entry->ctx.next_pid	= next_pid;
+	/* would like to save next prio, but we cant :-( */
+}
+
 #ifdef CONFIG_DEBUG_FS
 enum trace_iterator {
 	TRACE_ITER_SYM_ONLY	= 1,
@@ -414,6 +433,8 @@ lat_print_timestamp(struct seq_file *m, 
 		seq_puts(m, " : ");
 }
 
+static const char state_to_char[] = "RSDTtZX";
+
 static void notrace
 print_lat_fmt(struct seq_file *m, struct tracing_iterator *iter,
 	      unsigned int trace_idx, int cpu)
@@ -424,6 +445,7 @@ print_lat_fmt(struct seq_file *m, struct
 	unsigned long rel_usecs;
 	int sym_only = !!(trace_flags & TRACE_ITER_SYM_ONLY);
 	int verbose = !!(trace_flags & TRACE_ITER_VERBOSE);
+	int S;
 
 	if (!next_entry)
 		next_entry = entry;
@@ -450,6 +472,15 @@ print_lat_fmt(struct seq_file *m, struct
 		seq_print_ip_sym(m, entry->fn.parent_ip, sym_only);
 		seq_puts(m, ")\n");
 		break;
+	case TRACE_CTX:
+		S = entry->ctx.prev_state < sizeof(state_to_char) ?
+			state_to_char[entry->ctx.prev_state] : 'X';
+		seq_printf(m, " %d:%d:%c --> %d\n",
+			   entry->ctx.prev_pid,
+			   entry->ctx.prev_prio,
+			   S,
+			   entry->ctx.next_pid);
+		break;
 	}
 }
 
Index: linux-compile.git/lib/tracing/tracer.h
===================================================================
--- linux-compile.git.orig/lib/tracing/tracer.h	2008-01-15 11:12:31.000000000 -0500
+++ linux-compile.git/lib/tracing/tracer.h	2008-01-15 11:14:44.000000000 -0500
@@ -10,6 +10,14 @@ struct tracing_function {
 	unsigned long parent_ip;
 };
 
+struct tracing_sched_switch {
+	unsigned int prev_pid;
+	unsigned char prev_prio;
+	unsigned char prev_state;
+	unsigned int next_pid;
+	unsigned char next_prio;
+};
+
 struct tracing_entry {
 	char type;
 	char cpu;  /* who will want to trace more than 256 CPUS? */
@@ -18,7 +26,10 @@ struct tracing_entry {
 	int pid;
 	cycle_t t;
 	char comm[TASK_COMM_LEN];
-	struct tracing_function fn;
+	union {
+		struct tracing_function fn;
+		struct tracing_sched_switch ctx;
+	};
 };
 
 struct tracing_trace_cpu {
@@ -76,6 +87,11 @@ void tracing_function_trace(struct traci
 			    unsigned long ip,
 			    unsigned long parent_ip,
 			    unsigned long flags);
+void tracing_sched_switch_trace(struct tracing_trace *tr,
+				struct tracing_trace_cpu *data,
+				struct task_struct *prev,
+				int next_pid,
+				unsigned long flags);
 
 extern struct file_operations tracing_fops;
 extern struct file_operations tracing_lt_fops;

-- 

  parent reply	other threads:[~2008-01-15 20:55 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-01-15 20:49 [RFC PATCH 00/30 v3] mcount and latency tracing utility -v3 Steven Rostedt
2008-01-15 20:49 ` [RFC PATCH 01/30 v3] Add basic support for gcc profiler instrumentation Steven Rostedt
2008-01-15 20:49 ` [RFC PATCH 02/30 v3] Annotate core code that should not be traced Steven Rostedt
2008-01-15 20:49 ` [RFC PATCH 03/30 v3] x86_64: notrace annotations Steven Rostedt
2008-01-15 20:49 ` [RFC PATCH 04/30 v3] add notrace annotations to vsyscall Steven Rostedt
2008-01-15 20:49 ` [RFC PATCH 05/30 v3] add notrace annotations for NMI routines Steven Rostedt
2008-01-15 20:49 ` [RFC PATCH 06/30 v3] mcount based trace in the form of a header file library Steven Rostedt
2008-01-15 20:49 ` [RFC PATCH 07/30 v3] tracer add debugfs interface Steven Rostedt
2008-01-15 20:49 ` [RFC PATCH 08/30 v3] mcount tracer output file Steven Rostedt
2008-01-15 20:49 ` [RFC PATCH 09/30 v3] mcount tracer show task comm and pid Steven Rostedt
2008-01-15 20:49 ` [RFC PATCH 10/30 v3] Add a symbol only trace output Steven Rostedt
2008-01-15 20:49 ` [RFC PATCH 11/30 v3] Reset the tracer when started Steven Rostedt
2008-01-15 20:49 ` [RFC PATCH 12/30 v3] separate out the percpu date into a percpu struct Steven Rostedt
2008-01-15 20:49 ` [RFC PATCH 13/30 v3] handle accurate time keeping over long delays Steven Rostedt
2008-01-15 20:49 ` [RFC PATCH 14/30 v3] ppc clock accumulate fix Steven Rostedt
2008-01-15 20:49 ` [RFC PATCH 15/30 v3] Fixup merge between xtime_cache and timkkeeping starvation fix Steven Rostedt
2008-01-15 20:49 ` [RFC PATCH 16/30 v3] time keeping add cycle_raw for actual incrementation Steven Rostedt
2008-01-15 20:49 ` [RFC PATCH 17/30 v3] initialize the clock source to jiffies clock Steven Rostedt
2008-01-15 21:14   ` Mathieu Desnoyers
2008-01-15 21:27     ` Steven Rostedt
2008-01-15 20:49 ` [RFC PATCH 18/30 v3] add get_monotonic_cycles Steven Rostedt
2008-01-15 20:49 ` [RFC PATCH 19/30 v3] add notrace annotations to timing events Steven Rostedt
2008-01-15 20:49 ` [RFC PATCH 20/30 v3] Add timestamps to tracer Steven Rostedt
2008-01-15 20:49 ` [RFC PATCH 21/30 v3] Sort trace by timestamp Steven Rostedt
2008-01-15 20:49 ` [RFC PATCH 22/30 v3] speed up the output of the tracer Steven Rostedt
2008-01-15 20:49 ` [RFC PATCH 23/30 v3] Add latency_trace format tor tracer Steven Rostedt
2008-01-15 20:49 ` [RFC PATCH 24/30 v3] Split out specific tracing functions Steven Rostedt
2008-01-15 20:49 ` [RFC PATCH 25/30 v3] Trace irq disabled critical timings Steven Rostedt
2008-01-15 20:49 ` [RFC PATCH 26/30 v3] Add context switch marker to sched.c Steven Rostedt
2008-01-15 20:49 ` Steven Rostedt [this message]
2008-01-15 20:49 ` [RFC PATCH 28/30 v3] Generic command line storage Steven Rostedt
2008-01-15 21:30   ` Mathieu Desnoyers
2008-01-15 22:15     ` Steven Rostedt
2008-01-15 20:49 ` [RFC PATCH 29/30 v3] make varaible size buffers for traces Steven Rostedt
2008-01-15 20:49 ` [RFC PATCH 30/30 v3] trace preempt off critical timings Steven Rostedt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080115205026.402021623@goodmis.org \
    --to=rostedt@goodmis.org \
    --cc=a.p.zijlstra@chello.nl \
    --cc=acme@ghostprotocols.net \
    --cc=akpm@linux-foundation.org \
    --cc=fche@redhat.com \
    --cc=ghaskins@novell.com \
    --cc=hch@infradead.org \
    --cc=jan.kiszka@siemens.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mathieu.desnoyers@polymtl.ca \
    --cc=mingo@elte.hu \
    --cc=sam@ravnborg.org \
    --cc=srostedt@redhat.com \
    --cc=tglx@linutronix.de \
    --cc=tim.bird@am.sony.com \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox