utrace-based ftrace "process" engine, v2

All of lore.kernel.org
 help / color / mirror / Atom feed

From: "Frank Ch. Eigler" <fche@redhat.com>
To: Roland McGrath <roland@redhat.com>
Cc: utrace-devel@redhat.com, linux-kernel@vger.kernel.org,
	systemtap@sources.redhat.com
Subject: utrace-based ftrace "process" engine, v2
Date: Sun, 22 Feb 2009 17:22:27 -0500	[thread overview]
Message-ID: <20090222222227.GB31207@redhat.com> (raw)
In-Reply-To: <20090209072218.214FBFC35D@magilla.sf.frob.com>

Hi -

This is v2 of the prototype utrace-ftrace interface.  This code is
based on Roland McGrath's utrace API, which provides programmatic
hooks to the in-tree tracehook layer.  This new patch interfaces many
of those events to ftrace, as configured by a small number of debugfs
controls.  Here's the /debugfs/tracing/process_trace_README:

process event tracer mini-HOWTO

1. Select process hierarchy to monitor.  Other processes will be
   completely unaffected.  Leave at 0 for system-wide tracing.
#  echo NNN > process_follow_pid

2. Determine which process event traces are potentially desired.
   syscall and signal tracing slow down monitored processes.
#  echo 0 > process_trace_{syscalls,signals,lifecycle}

3. Add any final uid- or taskcomm-based filtering.  Non-matching
   processes will skip trace messages, but will still be slowed.
#  echo NNN > process_trace_uid_filter # -1: unrestricted 
#  echo ls > process_trace_taskcomm_filter # empty: unrestricted

4. Start tracing.
#  echo process > current_tracer

5. Examine trace.
#  cat trace

6. Stop tracing.
#  echo nop > current_tracer


Signed-off-By: Frank Ch. Eigler <fche@redhat.com>
---

 include/linux/processtrace.h |   41 +++
 kernel/trace/Kconfig         |    9 +
 kernel/trace/Makefile        |    1 +
 kernel/trace/trace.h         |   30 ++-
 kernel/trace/trace_process.c |  591 ++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 661 insertions(+), 11 deletions(-)

diff --git a/include/linux/processtrace.h b/include/linux/processtrace.h
new file mode 100644
index 0000000..f2b7d94
--- /dev/null
+++ b/include/linux/processtrace.h
@@ -0,0 +1,41 @@
+#ifndef PROCESSTRACE_H
+#define PROCESSTRACE_H
+
+#include <linux/types.h>
+#include <linux/list.h>
+
+struct process_trace_entry {
+	unsigned char opcode;	/* one of _UTRACE_EVENT_* */
+	char comm[TASK_COMM_LEN]; /* XXX: should be in/via trace_entry */
+	union {
+		struct {
+			pid_t child;
+			unsigned long flags;
+		} trace_clone;
+		struct {
+			long code;
+		} trace_exit;
+		struct {
+		} trace_exec;
+		struct {
+			int si_signo;
+			int si_errno;
+			int si_code;
+		} trace_signal;
+		struct {
+			long callno;
+			unsigned long args[6];
+		} trace_syscall_entry;
+		struct {
+			long rc;
+			long error;
+		} trace_syscall_exit;
+	};
+};
+
+/* in kernel/trace/trace_process.c */
+
+extern void enable_process_trace(void);
+extern void disable_process_trace(void);
+
+#endif /* PROCESSTRACE_H */
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index e2a4ff6..3ff727e 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -149,6 +149,15 @@ config CONTEXT_SWITCH_TRACER
 	  This tracer gets called from the context switch and records
 	  all switching of tasks.
 
+config PROCESS_TRACER
+	bool "Trace process events via utrace"
+	depends on DEBUG_KERNEL
+	select TRACING
+	select UTRACE
+	help
+	  This tracer provides trace records from process events
+	  accessible to utrace: lifecycle, system calls, and signals.
+
 config BOOT_TRACER
 	bool "Trace boot initcalls"
 	depends on DEBUG_KERNEL
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 349d5a9..a774db2 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -33,5 +33,6 @@ obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o
 obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
 obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o
 obj-$(CONFIG_POWER_TRACER) += trace_power.o
+obj-$(CONFIG_PROCESS_TRACER) += trace_process.o
 
 libftrace-y := ftrace.o
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 4d3d381..b4ebccb 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -7,6 +7,7 @@
 #include <linux/clocksource.h>
 #include <linux/ring_buffer.h>
 #include <linux/mmiotrace.h>
+#include <linux/processtrace.h>
 #include <linux/ftrace.h>
 #include <trace/boot.h>
 
@@ -30,6 +31,7 @@ enum trace_type {
 	TRACE_USER_STACK,
 	TRACE_HW_BRANCHES,
 	TRACE_POWER,
+	TRACE_PROCESS,
 
 	__TRACE_LAST_TYPE
 };
@@ -38,7 +40,7 @@ enum trace_type {
  * The trace entry - the most basic unit of tracing. This is what
  * is printed in the end as a single line in the trace output, such as:
  *
- *     bash-15816 [01]   235.197585: idle_cpu <- irq_enter
+ *     bash-15816 [01]	 235.197585: idle_cpu <- irq_enter
  */
 struct trace_entry {
 	unsigned char		type;
@@ -153,7 +155,7 @@ struct trace_boot_ret {
 #define TRACE_FILE_SIZE 20
 struct trace_branch {
 	struct trace_entry	ent;
-	unsigned	        line;
+	unsigned		line;
 	char			func[TRACE_FUNC_SIZE+1];
 	char			file[TRACE_FILE_SIZE+1];
 	char			correct;
@@ -170,11 +172,16 @@ struct trace_power {
 	struct power_trace	state_data;
 };
 
+struct trace_process {
+	struct trace_entry		ent;
+	struct process_trace_entry	event;
+};
+
 /*
  * trace_flag_type is an enumeration that holds different
  * states when a trace occurs. These are:
  *  IRQS_OFF		- interrupts were disabled
- *  IRQS_NOSUPPORT 	- arch does not support irqs_disabled_flags
+ *  IRQS_NOSUPPORT	- arch does not support irqs_disabled_flags
  *  NEED_RESCED		- reschedule is requested
  *  HARDIRQ		- inside an interrupt handler
  *  SOFTIRQ		- inside a softirq handler
@@ -279,7 +286,8 @@ extern void __ftrace_bad_type(void);
 		IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry,	\
 			  TRACE_GRAPH_RET);		\
 		IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\
- 		IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \
+		IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \
+		IF_ASSIGN(var, ent, struct trace_process, TRACE_PROCESS); \
 		__ftrace_bad_type();					\
 	} while (0)
 
@@ -297,8 +305,8 @@ enum print_line_t {
  * flags value in struct tracer_flags.
  */
 struct tracer_opt {
-	const char 	*name; /* Will appear on the trace_options file */
-	u32 		bit; /* Mask assigned in val field in tracer_flags */
+	const char	*name; /* Will appear on the trace_options file */
+	u32		bit; /* Mask assigned in val field in tracer_flags */
 };
 
 /*
@@ -307,7 +315,7 @@ struct tracer_opt {
  */
 struct tracer_flags {
 	u32			val;
-	struct tracer_opt 	*opts;
+	struct tracer_opt	*opts;
 };
 
 /* Makes more easy to define a tracer opt */
@@ -339,7 +347,7 @@ struct tracer {
 	int			(*set_flag)(u32 old_flags, u32 bit, int set);
 	struct tracer		*next;
 	int			print_max;
-	struct tracer_flags 	*flags;
+	struct tracer_flags	*flags;
 };
 
 struct trace_seq {
@@ -561,7 +569,7 @@ static inline int ftrace_trace_task(struct task_struct *task)
  * positions into trace_flags that controls the output.
  *
  * NOTE: These bits must match the trace_options array in
- *       trace.c.
+ *	 trace.c.
  */
 enum trace_iterator_flags {
 	TRACE_ITER_PRINT_PARENT		= 0x01,
@@ -578,8 +586,8 @@ enum trace_iterator_flags {
 	TRACE_ITER_PREEMPTONLY		= 0x800,
 	TRACE_ITER_BRANCH		= 0x1000,
 	TRACE_ITER_ANNOTATE		= 0x2000,
-	TRACE_ITER_USERSTACKTRACE       = 0x4000,
-	TRACE_ITER_SYM_USEROBJ          = 0x8000,
+	TRACE_ITER_USERSTACKTRACE	= 0x4000,
+	TRACE_ITER_SYM_USEROBJ		= 0x8000,
 	TRACE_ITER_PRINTK_MSGONLY	= 0x10000
 };
 
diff --git a/kernel/trace/trace_process.c b/kernel/trace/trace_process.c
new file mode 100644
index 0000000..038ff36
--- /dev/null
+++ b/kernel/trace/trace_process.c
@@ -0,0 +1,591 @@
+/*
+ * utrace-based process event tracing
+ * Copyright (C) 2009 Red Hat Inc.
+ * By Frank Ch. Eigler <fche@redhat.com>
+ *
+ * Based on mmio ftrace engine by Pekka Paalanen
+ * and utrace-syscall-tracing prototype by Ananth Mavinakayanahalli
+ */
+
+/* #define DEBUG 1 */
+
+#include <linux/kernel.h>
+#include <linux/utrace.h>
+#include <linux/uaccess.h>
+#include <linux/debugfs.h>
+#include <asm/syscall.h>
+
+#include "trace.h"
+
+/* A process must match these filters in order to be traced. */
+static char trace_taskcomm_filter[TASK_COMM_LEN]; /* \0: unrestricted */
+static u32 trace_taskuid_filter = -1; /* -1: unrestricted */
+static u32 trace_lifecycle_p = 1;
+static u32 trace_syscalls_p = 1;
+static u32 trace_signals_p = 1;
+
+/* A process must be a direct child of given pid in order to be
+   followed. */
+static u32 process_follow_pid; /* 0: unrestricted/systemwide */
+
+/* XXX: lock the above? */
+
+
+/* trace data collection */
+
+static struct trace_array *process_trace_array;
+
+static void process_reset_data(struct trace_array *tr)
+{
+	pr_debug("in %s\n", __func__);
+	tracing_reset_online_cpus(tr);
+}
+
+static int process_trace_init(struct trace_array *tr)
+{
+	pr_debug("in %s\n", __func__);
+	process_trace_array = tr;
+	process_reset_data(tr);
+	enable_process_trace();
+	return 0;
+}
+
+static void process_trace_reset(struct trace_array *tr)
+{
+	pr_debug("in %s\n", __func__);
+	disable_process_trace();
+	process_reset_data(tr);
+	process_trace_array = NULL;
+}
+
+static void process_trace_start(struct trace_array *tr)
+{
+	pr_debug("in %s\n", __func__);
+	process_reset_data(tr);
+}
+
+static void __trace_processtrace(struct trace_array *tr,
+				struct trace_array_cpu *data,
+				struct process_trace_entry *ent)
+{
+	struct ring_buffer_event *event;
+	struct trace_process *entry;
+	unsigned long irq_flags;
+
+	event	= ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+					   &irq_flags);
+	if (!event)
+		return;
+	entry	= ring_buffer_event_data(event);
+	tracing_generic_entry_update(&entry->ent, 0, preempt_count());
+	entry->ent.cpu			= raw_smp_processor_id();
+	entry->ent.type			= TRACE_PROCESS;
+	strlcpy(ent->comm, current->comm, TASK_COMM_LEN);
+	entry->event			= *ent;
+	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+
+	trace_wake_up();
+}
+
+void process_trace(struct process_trace_entry *ent)
+{
+	struct trace_array *tr = process_trace_array;
+	struct trace_array_cpu *data;
+
+	preempt_disable();
+	data = tr->data[smp_processor_id()];
+	__trace_processtrace(tr, data, ent);
+	preempt_enable();
+}
+
+
+/* trace data rendering */
+
+static void process_pipe_open(struct trace_iterator *iter)
+{
+	struct trace_seq *s = &iter->seq;
+	pr_debug("in %s\n", __func__);
+	trace_seq_printf(s, "VERSION 200901\n");
+}
+
+static void process_close(struct trace_iterator *iter)
+{
+	iter->private = NULL;
+}
+
+static ssize_t process_read(struct trace_iterator *iter, struct file *filp,
+				char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+	ssize_t ret;
+	struct trace_seq *s = &iter->seq;
+	ret = trace_seq_to_user(s, ubuf, cnt);
+	return (ret == -EBUSY) ? 0 : ret;
+}
+
+static enum print_line_t process_print(struct trace_iterator *iter)
+{
+	struct trace_entry *entry = iter->ent;
+	struct trace_process *field;
+	struct trace_seq *s	= &iter->seq;
+	unsigned long long t	= ns2usecs(iter->ts);
+	unsigned long usec_rem	= do_div(t, 1000000ULL);
+	unsigned secs		= (unsigned long)t;
+	int ret = 1;
+
+	trace_assign_type(field, entry);
+
+	/* XXX: If print_lat_fmt() were not static, we wouldn't have
+	   to duplicate this. */
+	trace_seq_printf(s, "%16s %5d %3d %9lu.%06ld ",
+			 field->event.comm,
+			 entry->pid, entry->cpu,
+			 secs,
+			 usec_rem);
+
+	switch (field->event.opcode) {
+	case _UTRACE_EVENT_CLONE:
+		ret = trace_seq_printf(s, "fork %d flags 0x%lx\n",
+				       field->event.trace_clone.child,
+				       field->event.trace_clone.flags);
+		break;
+	case _UTRACE_EVENT_EXEC:
+		ret = trace_seq_printf(s, "exec\n");
+		break;
+	case _UTRACE_EVENT_EXIT:
+		ret = trace_seq_printf(s, "exit %ld\n",
+				       field->event.trace_exit.code);
+		break;
+	case _UTRACE_EVENT_SIGNAL:
+		ret = trace_seq_printf(s, "signal %d errno %d code 0x%x\n",
+				       field->event.trace_signal.si_signo,
+				       field->event.trace_signal.si_errno,
+				       field->event.trace_signal.si_code);
+		break;
+	case _UTRACE_EVENT_SYSCALL_ENTRY:
+		ret = trace_seq_printf(s, "syscall %ld [0x%lx 0x%lx 0x%lx 0x%lx 0x%lx]\n",
+				       field->event.trace_syscall_entry.callno,
+				       field->event.trace_syscall_entry.args[0],
+				       field->event.trace_syscall_entry.args[1],
+				       field->event.trace_syscall_entry.args[2],
+				       field->event.trace_syscall_entry.args[3],
+				       field->event.trace_syscall_entry.args[4],
+				       field->event.trace_syscall_entry.args[5]);
+		break;
+	case _UTRACE_EVENT_SYSCALL_EXIT:
+		ret = trace_seq_printf(s, "syscall rc %ld error %ld\n",
+				       field->event.trace_syscall_exit.rc,
+				       field->event.trace_syscall_exit.error);
+		break;
+	default:
+		ret = trace_seq_printf(s, "process code %d?\n",
+				       field->event.opcode);
+		break;
+	}
+	if (ret)
+		return TRACE_TYPE_HANDLED;
+	return TRACE_TYPE_HANDLED;
+}
+
+
+static enum print_line_t process_print_line(struct trace_iterator *iter)
+{
+	switch (iter->ent->type) {
+	case TRACE_PROCESS:
+		return process_print(iter);
+	default:
+		return TRACE_TYPE_HANDLED; /* ignore unknown entries */
+	}
+}
+
+static struct tracer process_tracer = {
+	.name		= "process",
+	.init		= process_trace_init,
+	.reset		= process_trace_reset,
+	.start		= process_trace_start,
+	.pipe_open	= process_pipe_open,
+	.close		= process_close,
+	.read		= process_read,
+	.print_line	= process_print_line,
+};
+
+
+
+/* utrace backend */
+
+/* Should tracing apply to given task?	Compare against filter
+   values. */
+static int trace_test(struct task_struct *tsk)
+{
+	if (trace_taskcomm_filter[0]
+	    && strncmp(trace_taskcomm_filter, tsk->comm, TASK_COMM_LEN))
+		return 0;
+
+	if (trace_taskuid_filter != (u32)-1
+	    && trace_taskuid_filter != task_uid(tsk))
+		return 0;
+
+	return 1;
+}
+
+
+static const struct utrace_engine_ops process_trace_ops;
+
+static void process_trace_tryattach(struct task_struct *tsk)
+{
+	struct utrace_attached_engine *engine;
+
+	pr_debug("in %s\n", __func__);
+	engine = utrace_attach_task(tsk,
+				    UTRACE_ATTACH_CREATE |
+				    UTRACE_ATTACH_EXCLUSIVE,
+				    &process_trace_ops, NULL);
+	if (IS_ERR(engine) || (engine == NULL)) {
+		pr_warning("utrace_attach_task %d (rc %p)\n",
+			   tsk->pid, engine);
+	} else {
+		int rc;
+
+		/* We always hook cost-free events. */
+		unsigned long events =
+			UTRACE_EVENT(CLONE) |
+			UTRACE_EVENT(EXEC) |
+			UTRACE_EVENT(EXIT);
+
+		/* Penalizing events are individually controlled, so that
+		   utrace doesn't even take the monitored threads off their
+		   fast paths, nor bother call our callbacks. */
+		if (trace_syscalls_p)
+			events |= UTRACE_EVENT_SYSCALL;
+		if (trace_signals_p)
+			events |= UTRACE_EVENT_SIGNAL_ALL;
+
+		rc = utrace_set_events(tsk, engine, events);
+		if (rc == -EINPROGRESS)
+			rc = utrace_barrier(tsk, engine);
+		if (rc)
+			pr_warning("utrace_set_events/barrier rc %d\n", rc);
+
+		utrace_engine_put(engine);
+		pr_debug("attached in %s to %s(%d)\n", __func__,
+			 tsk->comm, tsk->pid);
+	}
+}
+
+
+u32 process_trace_report_clone(enum utrace_resume_action action,
+			       struct utrace_attached_engine *engine,
+			       struct task_struct *parent,
+			       unsigned long clone_flags,
+			       struct task_struct *child)
+{
+	if (trace_lifecycle_p && trace_test(parent)) {
+		struct process_trace_entry ent;
+		ent.opcode = _UTRACE_EVENT_CLONE;
+		ent.trace_clone.child = child->pid;
+		ent.trace_clone.flags = clone_flags;
+		process_trace(&ent);
+	}
+
+	process_trace_tryattach(child);
+
+	return UTRACE_RESUME;
+}
+
+
+u32 process_trace_report_syscall_entry(u32 action,
+				       struct utrace_attached_engine *engine,
+				       struct task_struct *task,
+				       struct pt_regs *regs)
+{
+	if (trace_syscalls_p && trace_test(task)) {
+		struct process_trace_entry ent;
+		ent.opcode = _UTRACE_EVENT_SYSCALL_ENTRY;
+		ent.trace_syscall_entry.callno = syscall_get_nr(task, regs);
+		syscall_get_arguments(task, regs, 0, 6,
+				      ent.trace_syscall_entry.args);
+		process_trace(&ent);
+	}
+
+	return UTRACE_RESUME;
+}
+
+
+u32 process_trace_report_syscall_exit(enum utrace_resume_action action,
+				   struct utrace_attached_engine *engine,
+				   struct task_struct *task,
+				   struct pt_regs *regs)
+{
+	if (trace_syscalls_p && trace_test(task)) {
+		struct process_trace_entry ent;
+		ent.opcode = _UTRACE_EVENT_SYSCALL_EXIT;
+		ent.trace_syscall_exit.rc = syscall_get_return_value(task, regs);
+		ent.trace_syscall_exit.error = syscall_get_error(task, regs);
+		process_trace(&ent);
+	}
+
+	return UTRACE_RESUME;
+}
+
+
+u32 process_trace_report_exec(enum utrace_resume_action action,
+			      struct utrace_attached_engine *engine,
+			      struct task_struct *task,
+			      const struct linux_binfmt *fmt,
+			      const struct linux_binprm *bprm,
+			      struct pt_regs *regs)
+{
+	if (trace_lifecycle_p && trace_test(task)) {
+		struct process_trace_entry ent;
+		ent.opcode = _UTRACE_EVENT_EXEC;
+		process_trace(&ent);
+	}
+
+	/* We're already attached; no need for a new tryattach. */
+
+	return UTRACE_RESUME;
+}
+
+
+u32 process_trace_report_signal(u32 action,
+				struct utrace_attached_engine *engine,
+				struct task_struct *task,
+				struct pt_regs *regs,
+				siginfo_t *info,
+				const struct k_sigaction *orig_ka,
+				struct k_sigaction *return_ka)
+{
+	if (trace_signals_p && trace_test(task)) {
+		struct process_trace_entry ent;
+		ent.opcode = _UTRACE_EVENT_SIGNAL;
+		ent.trace_signal.si_signo = info->si_signo;
+		ent.trace_signal.si_errno = info->si_errno;
+		ent.trace_signal.si_code = info->si_code;
+		process_trace(&ent);
+	}
+
+	/* We're already attached, so no need for a new tryattach. */
+
+	return UTRACE_RESUME | utrace_signal_action(action);
+}
+
+
+u32 process_trace_report_exit(enum utrace_resume_action action,
+			      struct utrace_attached_engine *engine,
+			      struct task_struct *task,
+			      long orig_code, long *code)
+{
+	if (trace_lifecycle_p && trace_test(task)) {
+		struct process_trace_entry ent;
+		ent.opcode = _UTRACE_EVENT_EXIT;
+		ent.trace_exit.code = orig_code;
+		process_trace(&ent);
+	}
+
+	/* There is no need to explicitly attach or detach here. */
+
+	return UTRACE_RESUME;
+}
+
+
+void enable_process_trace()
+{
+	struct task_struct *grp, *tsk;
+
+	pr_debug("in %s\n", __func__);
+	rcu_read_lock();
+	do_each_thread(grp, tsk) {
+		/* Skip over kernel threads. */
+		if (tsk->flags & PF_KTHREAD)
+			continue;
+
+		if (process_follow_pid) {
+			if (tsk->tgid == process_follow_pid ||
+			    tsk->parent->tgid == process_follow_pid)
+				process_trace_tryattach(tsk);
+		} else {
+			process_trace_tryattach(tsk);
+		}
+	} while_each_thread(grp, tsk);
+	rcu_read_unlock();
+}
+
+void disable_process_trace()
+{
+	struct utrace_attached_engine *engine;
+	struct task_struct *grp, *tsk;
+	int rc;
+
+	pr_debug("in %s\n", __func__);
+	rcu_read_lock();
+	do_each_thread(grp, tsk) {
+		/* Find matching engine, if any.  Returns -ENOENT for
+		   unattached threads. */
+		engine = utrace_attach_task(tsk, UTRACE_ATTACH_MATCH_OPS,
+					    &process_trace_ops, 0);
+		if (IS_ERR(engine)) {
+			if (PTR_ERR(engine) != -ENOENT)
+				pr_warning("utrace_attach_task %d (rc %ld)\n",
+					   tsk->pid, -PTR_ERR(engine));
+		} else if (engine == NULL) {
+			pr_warning("utrace_attach_task %d (null engine)\n",
+				   tsk->pid);
+		} else {
+			/* Found one of our own engines.  Detach.  */
+			rc = utrace_control(tsk, engine, UTRACE_DETACH);
+			switch (rc) {
+			case 0:		    /* success */
+				break;
+			case -ESRCH:	    /* REAP callback already begun */
+			case -EALREADY:	    /* DEATH callback already begun */
+				break;
+			default:
+				rc = -rc;
+				pr_warning("utrace_detach %d (rc %d)\n",
+					   tsk->pid, rc);
+				break;
+			}
+			utrace_engine_put(engine);
+			pr_debug("detached in %s from %s(%d)\n", __func__,
+				 tsk->comm, tsk->pid);
+		}
+	} while_each_thread(grp, tsk);
+	rcu_read_unlock();
+}
+
+
+static const struct utrace_engine_ops process_trace_ops = {
+	.report_clone = process_trace_report_clone,
+	.report_exec = process_trace_report_exec,
+	.report_exit = process_trace_report_exit,
+	.report_signal = process_trace_report_signal,
+	.report_syscall_entry = process_trace_report_syscall_entry,
+	.report_syscall_exit = process_trace_report_syscall_exit,
+};
+
+
+
+/* control interfaces */
+
+
+static ssize_t
+trace_taskcomm_filter_read(struct file *filp, char __user *ubuf,
+			   size_t cnt, loff_t *ppos)
+{
+	return simple_read_from_buffer(ubuf, cnt, ppos,
+				       trace_taskcomm_filter, TASK_COMM_LEN);
+}
+
+
+static ssize_t
+trace_taskcomm_filter_write(struct file *filp, const char __user *ubuf,
+			    size_t cnt, loff_t *fpos)
+{
+	char *end;
+
+	if (cnt > TASK_COMM_LEN)
+		cnt = TASK_COMM_LEN;
+
+	if (copy_from_user(trace_taskcomm_filter, ubuf, cnt))
+		return -EFAULT;
+
+	/* Cut from the first nil or newline. */
+	trace_taskcomm_filter[cnt] = '\0';
+	end = strchr(trace_taskcomm_filter, '\n');
+	if (end)
+		*end = '\0';
+
+	*fpos += cnt;
+	return cnt;
+}
+
+
+static const struct file_operations trace_taskcomm_filter_fops = {
+	.open		= tracing_open_generic,
+	.read		= trace_taskcomm_filter_read,
+	.write		= trace_taskcomm_filter_write,
+};
+
+
+
+static char README_text[] =
+	"process event tracer mini-HOWTO\n"
+	"\n"
+	"1. Select process hierarchy to monitor.  Other processes will be\n"
+	"   completely unaffected.  Leave at 0 for system-wide tracing.\n"
+	"#  echo NNN > process_follow_pid\n"
+	"\n"
+	"2. Determine which process event traces are potentially desired.\n"
+	"   syscall and signal tracing slow down monitored processes.\n"
+	"#  echo 0 > process_trace_{syscalls,signals,lifecycle}\n"
+	"\n"
+	"3. Add any final uid- or taskcomm-based filtering.  Non-matching\n"
+	"   processes will skip trace messages, but will still be slowed.\n"
+	"#  echo NNN > process_trace_uid_filter # -1: unrestricted \n"
+	"#  echo ls > process_trace_taskcomm_filter # empty: unrestricted\n"
+	"\n"
+	"4. Start tracing.\n"
+	"#  echo process > current_tracer\n"
+	"\n"
+	"5. Examine trace.\n"
+	"#  cat trace\n"
+	"\n"
+	"6. Stop tracing.\n"
+	"#  echo nop > current_tracer\n"
+	;
+
+static struct debugfs_blob_wrapper README_blob = {
+	.data = README_text,
+	.size = sizeof(README_text),
+};
+
+
+static __init int init_process_trace(void)
+{
+	struct dentry *d_tracer;
+	struct dentry *entry;
+
+	d_tracer = tracing_init_dentry();
+
+	entry = debugfs_create_blob("process_trace_README", 0444, d_tracer,
+				    &README_blob);
+	if (!entry)
+		pr_warning("Could not create debugfs 'process_trace_README' entry\n");
+
+	/* Control for scoping process following. */
+	entry = debugfs_create_u32("process_follow_pid", 0644, d_tracer,
+				   &process_follow_pid);
+	if (!entry)
+		pr_warning("Could not create debugfs 'process_follow_pid' entry\n");
+
+	/* Process-level filters */
+	entry = debugfs_create_file("process_trace_taskcomm_filter", 0644,
+				    d_tracer, NULL, &trace_taskcomm_filter_fops);
+	/* XXX: it'd be nice to have a read/write debugfs_create_blob. */
+	if (!entry)
+		pr_warning("Could not create debugfs 'process_trace_taskcomm_filter' entry\n");
+
+	entry = debugfs_create_u32("process_trace_uid_filter", 0644, d_tracer,
+				   &trace_taskuid_filter);
+	if (!entry)
+		pr_warning("Could not create debugfs 'process_trace_uid_filter' entry\n");
+
+	/* Event-level filters. */
+	entry = debugfs_create_u32("process_trace_lifecycle", 0644, d_tracer,
+				   &trace_lifecycle_p);
+	if (!entry)
+		pr_warning("Could not create debugfs 'process_trace_lifecycle' entry\n");
+
+	entry = debugfs_create_u32("process_trace_syscalls", 0644, d_tracer,
+				   &trace_syscalls_p);
+	if (!entry)
+		pr_warning("Could not create debugfs 'process_trace_syscalls' entry\n");
+
+	entry = debugfs_create_u32("process_trace_signals", 0644, d_tracer,
+				   &trace_signals_p);
+	if (!entry)
+		pr_warning("Could not create debugfs 'process_trace_signals' entry\n");
+
+	return register_tracer(&process_tracer);
+}
+
+device_initcall(init_process_trace);

     prev parent reply	other threads:[~2009-02-22 22:22 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-01-27 19:54 proof-of-concept, utrace->ftrace engine Frank Ch. Eigler
2009-02-09  7:22 ` Roland McGrath
2009-02-22 22:22   ` Frank Ch. Eigler [this message]

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:f2b7d94 dfblob:e2a4ff6 dfblob:3ff727e dfblob:349d5a9
dfblob:a774db2 dfblob:4d3d381 dfblob:b4ebccb dfblob:038ff36 )
 OR (
bs:"utrace-based ftrace "
bs:"process"
bs:" engine, v2" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20090222222227.GB31207@redhat.com \
    --to=fche@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=roland@redhat.com \
    --cc=systemtap@sources.redhat.com \
    --cc=utrace-devel@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.