All of lore.kernel.org
 help / color / mirror / Atom feed
From: Steven Rostedt <rostedt@goodmis.org>
To: linux-kernel@vger.kernel.org
Cc: Ingo Molnar <mingo@kernel.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	"Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Subject: [for-next][PATCH 3/5] tracing: Implement event pid filtering
Date: Thu, 29 Oct 2015 03:07:57 -0400	[thread overview]
Message-ID: <20151029071032.182254920@goodmis.org> (raw)
In-Reply-To: 20151029070754.584668951@goodmis.org

[-- Attachment #1: 0003-tracing-Implement-event-pid-filtering.patch --]
[-- Type: text/plain, Size: 9613 bytes --]

From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>

Add the necessary hooks to use the pids loaded in set_event_pid to filter
all the events enabled in the tracing instance that match the pids listed.

Two probes are added to both sched_switch and sched_wakeup tracepoints to be
called before other probes are called and after the other probes are called.
The first is used to set the necessary flags to let the probes know to test
if they should be traced or not.

The sched_switch pre probe will set the "ignore_pid" flag if neither the
previous or next task has a matching pid.

The sched_switch probe will set the "ignore_pid" flag if the next task
does not match the matching pid.

The pre probe allows for probes tracing sched_switch to be traced if
necessary.

The sched_wakeup pre probe will set the "ignore_pid" flag if neither the
current task nor the wakee task has a matching pid.

The sched_wakeup post probe will set the "ignore_pid" flag if the current
task does not have a matching pid.

Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/trace_events.h |   7 ++
 kernel/trace/trace.h         |   2 +
 kernel/trace/trace_events.c  | 148 ++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 154 insertions(+), 3 deletions(-)

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index f85693bbcdc3..429fdfc3baf5 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -328,6 +328,7 @@ enum {
 	EVENT_FILE_FL_SOFT_DISABLED_BIT,
 	EVENT_FILE_FL_TRIGGER_MODE_BIT,
 	EVENT_FILE_FL_TRIGGER_COND_BIT,
+	EVENT_FILE_FL_PID_FILTER_BIT,
 };
 
 /*
@@ -341,6 +342,7 @@ enum {
  *                   tracepoint may be enabled)
  *  TRIGGER_MODE  - When set, invoke the triggers associated with the event
  *  TRIGGER_COND  - When set, one or more triggers has an associated filter
+ *  PID_FILTER    - When set, the event is filtered based on pid
  */
 enum {
 	EVENT_FILE_FL_ENABLED		= (1 << EVENT_FILE_FL_ENABLED_BIT),
@@ -351,6 +353,7 @@ enum {
 	EVENT_FILE_FL_SOFT_DISABLED	= (1 << EVENT_FILE_FL_SOFT_DISABLED_BIT),
 	EVENT_FILE_FL_TRIGGER_MODE	= (1 << EVENT_FILE_FL_TRIGGER_MODE_BIT),
 	EVENT_FILE_FL_TRIGGER_COND	= (1 << EVENT_FILE_FL_TRIGGER_COND_BIT),
+	EVENT_FILE_FL_PID_FILTER	= (1 << EVENT_FILE_FL_PID_FILTER_BIT),
 };
 
 struct trace_event_file {
@@ -429,6 +432,8 @@ extern enum event_trigger_type event_triggers_call(struct trace_event_file *file
 extern void event_triggers_post_call(struct trace_event_file *file,
 				     enum event_trigger_type tt);
 
+bool trace_event_ignore_this_pid(struct trace_event_file *trace_file);
+
 /**
  * trace_trigger_soft_disabled - do triggers and test if soft disabled
  * @file: The file pointer of the event to test
@@ -448,6 +453,8 @@ trace_trigger_soft_disabled(struct trace_event_file *file)
 			event_triggers_call(file, NULL);
 		if (eflags & EVENT_FILE_FL_SOFT_DISABLED)
 			return true;
+		if (eflags & EVENT_FILE_FL_PID_FILTER)
+			return trace_event_ignore_this_pid(file);
 	}
 	return false;
 }
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 250481043bb5..89ffdaf3e371 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -156,6 +156,8 @@ struct trace_array_cpu {
 	pid_t			pid;
 	kuid_t			uid;
 	char			comm[TASK_COMM_LEN];
+
+	bool			ignore_pid;
 };
 
 struct tracer;
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 2ad7014707ee..ab07058e27c1 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -22,6 +22,8 @@
 #include <linux/slab.h>
 #include <linux/delay.h>
 
+#include <trace/events/sched.h>
+
 #include <asm/setup.h>
 
 #include "trace_output.h"
@@ -212,12 +214,32 @@ int trace_event_raw_init(struct trace_event_call *call)
 }
 EXPORT_SYMBOL_GPL(trace_event_raw_init);
 
+bool trace_event_ignore_this_pid(struct trace_event_file *trace_file)
+{
+	struct trace_array *tr = trace_file->tr;
+	struct trace_array_cpu *data;
+	struct trace_pid_list *pid_list;
+
+	pid_list = rcu_dereference_sched(tr->filtered_pids);
+	if (!pid_list)
+		return false;
+
+	data = this_cpu_ptr(tr->trace_buffer.data);
+
+	return data->ignore_pid;
+}
+EXPORT_SYMBOL_GPL(trace_event_ignore_this_pid);
+
 void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer,
 				 struct trace_event_file *trace_file,
 				 unsigned long len)
 {
 	struct trace_event_call *event_call = trace_file->event_call;
 
+	if ((trace_file->flags & EVENT_FILE_FL_PID_FILTER) &&
+	    trace_event_ignore_this_pid(trace_file))
+		return NULL;
+
 	local_save_flags(fbuffer->flags);
 	fbuffer->pc = preempt_count();
 	fbuffer->trace_file = trace_file;
@@ -459,15 +481,114 @@ static int cmp_pid(const void *key, const void *elt)
 	return 1;
 }
 
+static bool
+check_ignore_pid(struct trace_pid_list *filtered_pids, struct task_struct *task)
+{
+	pid_t search_pid;
+	pid_t *pid;
+
+	/*
+	 * Return false, because if filtered_pids does not exist,
+	 * all pids are good to trace.
+	 */
+	if (!filtered_pids)
+		return false;
+
+	search_pid = task->pid;
+
+	pid = bsearch(&search_pid, filtered_pids->pids,
+		      filtered_pids->nr_pids, sizeof(pid_t),
+		      cmp_pid);
+	if (!pid)
+		return true;
+
+	return false;
+}
+
+static void
+event_filter_pid_sched_switch_probe_pre(void *data,
+		    struct task_struct *prev, struct task_struct *next)
+{
+	struct trace_array *tr = data;
+	struct trace_pid_list *pid_list;
+
+	pid_list = rcu_dereference_sched(tr->filtered_pids);
+
+	this_cpu_write(tr->trace_buffer.data->ignore_pid,
+		       check_ignore_pid(pid_list, prev) &&
+		       check_ignore_pid(pid_list, next));
+}
+
+static void
+event_filter_pid_sched_switch_probe_post(void *data,
+		    struct task_struct *prev, struct task_struct *next)
+{
+	struct trace_array *tr = data;
+	struct trace_pid_list *pid_list;
+
+	pid_list = rcu_dereference_sched(tr->filtered_pids);
+
+	this_cpu_write(tr->trace_buffer.data->ignore_pid,
+		       check_ignore_pid(pid_list, next));
+}
+
+static void
+event_filter_pid_sched_wakeup_probe_pre(void *data, struct task_struct *task)
+{
+	struct trace_array *tr = data;
+	struct trace_pid_list *pid_list;
+
+	/* Nothing to do if we are already tracing */
+	if (!this_cpu_read(tr->trace_buffer.data->ignore_pid))
+		return;
+
+	pid_list = rcu_dereference_sched(tr->filtered_pids);
+
+	this_cpu_write(tr->trace_buffer.data->ignore_pid,
+		       check_ignore_pid(pid_list, task));
+}
+
+static void
+event_filter_pid_sched_wakeup_probe_post(void *data, struct task_struct *task)
+{
+	struct trace_array *tr = data;
+	struct trace_pid_list *pid_list;
+
+	/* Nothing to do if we are not tracing */
+	if (this_cpu_read(tr->trace_buffer.data->ignore_pid))
+		return;
+
+	pid_list = rcu_dereference_sched(tr->filtered_pids);
+
+	/* Set tracing if current is enabled */
+	this_cpu_write(tr->trace_buffer.data->ignore_pid,
+		       check_ignore_pid(pid_list, current));
+}
+
 static void __ftrace_clear_event_pids(struct trace_array *tr)
 {
 	struct trace_pid_list *pid_list;
+	struct trace_event_file *file;
+	int cpu;
 
 	pid_list = rcu_dereference_protected(tr->filtered_pids,
 					     lockdep_is_held(&event_mutex));
 	if (!pid_list)
 		return;
 
+	unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_pre, tr);
+	unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_post, tr);
+
+	unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre, tr);
+	unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_post, tr);
+
+	list_for_each_entry(file, &tr->events, list) {
+		clear_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags);
+	}
+
+	for_each_possible_cpu(cpu)
+		per_cpu_ptr(tr->trace_buffer.data, cpu)->ignore_pid = false;
+
 	rcu_assign_pointer(tr->filtered_pids, NULL);
 
 	/* Wait till all users are no longer using pid filtering */
@@ -1429,13 +1550,14 @@ static int max_pids(struct trace_pid_list *pid_list)
 }
 
 static ssize_t
-ftrace_event_pid_write(struct file *file, const char __user *ubuf,
+ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
 		       size_t cnt, loff_t *ppos)
 {
-	struct seq_file *m = file->private_data;
+	struct seq_file *m = filp->private_data;
 	struct trace_array *tr = m->private;
 	struct trace_pid_list *filtered_pids = NULL;
 	struct trace_pid_list *pid_list = NULL;
+	struct trace_event_file *file;
 	struct trace_parser parser;
 	unsigned long val;
 	loff_t this_pos;
@@ -1564,15 +1686,35 @@ ftrace_event_pid_write(struct file *file, const char __user *ubuf,
 
 	rcu_assign_pointer(tr->filtered_pids, pid_list);
 
-	mutex_unlock(&event_mutex);
+	list_for_each_entry(file, &tr->events, list) {
+		set_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags);
+	}
 
 	if (filtered_pids) {
 		synchronize_sched();
 
 		free_pages((unsigned long)filtered_pids->pids, filtered_pids->order);
 		kfree(filtered_pids);
+	} else {
+		/*
+		 * Register a probe that is called before all other probes
+		 * to set ignore_pid if next or prev do not match.
+		 * Register a probe this is called after all other probes
+		 * to only keep ignore_pid set if next pid matches.
+		 */
+		register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_pre,
+						 tr, INT_MAX);
+		register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_post,
+						 tr, 0);
+
+		register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre,
+						 tr, INT_MAX);
+		register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_post,
+						 tr, 0);
 	}
 
+	mutex_unlock(&event_mutex);
+
 	ret = read;
 	*ppos += read;
 
-- 
2.6.1



  parent reply	other threads:[~2015-10-29  7:11 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-10-29  7:07 [for-next][PATCH 0/5] tracing: Addition of set_event_pid Steven Rostedt
2015-10-29  7:07 ` [for-next][PATCH 1/5] tracepoint: Give priority to probes of tracepoints Steven Rostedt
2015-10-29 17:52   ` Mathieu Desnoyers
2015-10-30 20:25     ` Steven Rostedt
2015-10-30 20:49       ` Mathieu Desnoyers
2015-10-29  7:07 ` [for-next][PATCH 2/5] tracing: Add set_event_pid directory for future use Steven Rostedt
2015-11-19 23:24   ` Paul E. McKenney
2015-11-20  4:17     ` Steven Rostedt
2015-12-01 18:12       ` Paul E. McKenney
2015-10-29  7:07 ` Steven Rostedt [this message]
2015-10-29  7:07 ` [for-next][PATCH 4/5] tracing: Check all tasks on each CPU when filtering pids Steven Rostedt
2015-10-30  6:16   ` Jiaxing Wang
2015-10-30 20:28     ` Steven Rostedt
2015-10-29  7:07 ` [for-next][PATCH 5/5] tracing: Fix sparse RCU warning Steven Rostedt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20151029071032.182254920@goodmis.org \
    --to=rostedt@goodmis.org \
    --cc=akpm@linux-foundation.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=paulmck@linux.vnet.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.