From: Li Zefan <lizf@cn.fujitsu.com>
To: Ingo Molnar <mingo@elte.hu>
Cc: Tom Zanussi <tzanussi@gmail.com>,
Steven Rostedt <rostedt@goodmis.org>,
Frederic Weisbecker <fweisbec@gmail.com>,
LKML <linux-kernel@vger.kernel.org>
Subject: [PATCH 7/7] tracing/filters: make filter preds RCU safe
Date: Sat, 11 Apr 2009 15:55:46 +0800 [thread overview]
Message-ID: <49E04D02.2080505@cn.fujitsu.com> (raw)
In-Reply-To: <49E04C22.4040608@cn.fujitsu.com>
I noticed ftrace_event_call->preds and ->preds[*] are not protected
by any lock, and I can manage to trigger kernel oops.
This patch adds filter_mutex to protect concurrent access to
event_subsystem->preds/preds[i] or ftrace_event_call->preds/preds[i],
and make the fast-path filter_match_preds() RCU safe.
Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
---
kernel/trace/trace.h | 6 +-
kernel/trace/trace_events.c | 4 +-
kernel/trace/trace_events_filter.c | 104 +++++++++++++++++++++++++---------
kernel/trace/trace_events_stage_3.h | 10 +++-
4 files changed, 91 insertions(+), 33 deletions(-)
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index e685ac2..ae05fbb 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -847,13 +847,15 @@ struct filter_pred {
int trace_define_field(struct ftrace_event_call *call, char *type,
char *name, int offset, int size);
extern void filter_free_pred(struct filter_pred *pred);
-extern void filter_print_preds(struct filter_pred **preds,
+extern void filter_print_preds(struct ftrace_event_call *call,
struct trace_seq *s);
extern int filter_parse(char **pbuf, struct filter_pred *pred);
extern int filter_add_pred(struct ftrace_event_call *call,
struct filter_pred *pred);
extern void filter_free_preds(struct ftrace_event_call *call);
-extern int filter_match_preds(struct ftrace_event_call *call, void *rec);
+extern int filter_match_preds(struct filter_pred **preds, void *rec);
+extern void filter_print_subsystem_preds(struct event_subsystem *system,
+ struct trace_seq *s);
extern void filter_free_subsystem_preds(struct event_subsystem *system);
extern int filter_add_subsystem_pred(struct event_subsystem *system,
struct filter_pred *pred);
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 576f4fa..ca1a2b0 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -481,7 +481,7 @@ event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
trace_seq_init(s);
- filter_print_preds(call->preds, s);
+ filter_print_preds(call, s);
r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
kfree(s);
@@ -549,7 +549,7 @@ subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
trace_seq_init(s);
- filter_print_preds(system->preds, s);
+ filter_print_subsystem_preds(system, s);
r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
kfree(s);
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index f16a921..8e32dd8 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -22,10 +22,14 @@
#include <linux/uaccess.h>
#include <linux/module.h>
#include <linux/ctype.h>
+#include <linux/mutex.h>
+#include <linux/rcupdate.h>
#include "trace.h"
#include "trace_output.h"
+static DEFINE_MUTEX(filter_mutex);
+
static int filter_pred_64(struct filter_pred *pred, void *event)
{
u64 *addr = (u64 *)(event + pred->offset);
@@ -82,15 +86,19 @@ static int filter_pred_string(struct filter_pred *pred, void *event)
return match;
}
-/* return 1 if event matches, 0 otherwise (discard) */
-int filter_match_preds(struct ftrace_event_call *call, void *rec)
+/*
+ * return 1 if event matches, 0 otherwise (discard)
+ *
+ * should be protected by rcu_read_lock()
+ * */
+int filter_match_preds(struct filter_pred **preds, void *rec)
{
int i, matched, and_failed = 0;
struct filter_pred *pred;
for (i = 0; i < MAX_FILTER_PRED; i++) {
- if (call->preds[i]) {
- pred = call->preds[i];
+ pred = rcu_dereference(preds[i]);
+ if (pred) {
if (and_failed && !pred->or)
continue;
matched = pred->fn(pred, rec);
@@ -109,7 +117,8 @@ int filter_match_preds(struct ftrace_event_call *call, void *rec)
return 1;
}
-void filter_print_preds(struct filter_pred **preds, struct trace_seq *s)
+static void __filter_print_preds(struct filter_pred **preds,
+ struct trace_seq *s)
{
char *field_name;
struct filter_pred *pred;
@@ -137,6 +146,21 @@ void filter_print_preds(struct filter_pred **preds, struct trace_seq *s)
}
}
+void filter_print_preds(struct ftrace_event_call *call, struct trace_seq *s)
+{
+ mutex_lock(&filter_mutex);
+ __filter_print_preds(call->preds, s);
+ mutex_unlock(&filter_mutex);
+}
+
+void filter_print_subsystem_preds(struct event_subsystem *system,
+ struct trace_seq *s)
+{
+ mutex_lock(&filter_mutex);
+ __filter_print_preds(system->preds, s);
+ mutex_unlock(&filter_mutex);
+}
+
static struct ftrace_event_field *
find_event_field(struct ftrace_event_call *call, char *name)
{
@@ -160,29 +184,37 @@ void filter_free_pred(struct filter_pred *pred)
kfree(pred);
}
-void filter_free_preds(struct ftrace_event_call *call)
+static void __filter_free_preds(struct filter_pred **preds)
{
int i;
- if (call->preds) {
- for (i = 0; i < MAX_FILTER_PRED; i++)
- filter_free_pred(call->preds[i]);
- kfree(call->preds);
- call->preds = NULL;
- }
+ if (!preds)
+ return;
+
+ for (i = 0; i < MAX_FILTER_PRED; i++)
+ filter_free_pred(preds[i]);
+ kfree(preds);
+}
+
+void filter_free_preds(struct ftrace_event_call *call)
+{
+ struct filter_pred **preds = call->preds;
+
+ mutex_lock(&filter_mutex);
+ rcu_assign_pointer(call->preds, NULL);
+ mutex_unlock(&filter_mutex);
+
+ synchronize_rcu();
+ __filter_free_preds(preds);
}
void filter_free_subsystem_preds(struct event_subsystem *system)
{
struct ftrace_event_call *call = __start_ftrace_events;
- int i;
- if (system->preds) {
- for (i = 0; i < MAX_FILTER_PRED; i++)
- filter_free_pred(system->preds[i]);
- kfree(system->preds);
- system->preds = NULL;
- }
+ mutex_lock(&filter_mutex);
+ __filter_free_preds(system->preds);
+ mutex_unlock(&filter_mutex);
events_for_each(call) {
if (!call->name || !call->regfunc)
@@ -197,25 +229,35 @@ static int __filter_add_pred(struct ftrace_event_call *call,
struct filter_pred *pred)
{
int i;
+ int ret = 0;
+ struct filter_pred **preds;
- if (call->preds && !pred->compound)
+ if (!pred->compound)
filter_free_preds(call);
+ mutex_lock(&filter_mutex);
+
if (!call->preds) {
- call->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred),
- GFP_KERNEL);
- if (!call->preds)
- return -ENOMEM;
+ preds = kzalloc(MAX_FILTER_PRED * sizeof(pred),
+ GFP_KERNEL);
+ if (!preds) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
+ rcu_assign_pointer(call->preds, preds);
}
for (i = 0; i < MAX_FILTER_PRED; i++) {
if (!call->preds[i]) {
- call->preds[i] = pred;
- return 0;
+ rcu_assign_pointer(call->preds[i], pred);
+ goto unlock;
}
}
+ ret = -ENOSPC;
- return -ENOSPC;
+unlock:
+ mutex_unlock(&filter_mutex);
+ return ret;
}
static int is_string_field(const char *type)
@@ -304,11 +346,15 @@ int filter_add_subsystem_pred(struct event_subsystem *system,
if (system->preds && !pred->compound)
filter_free_subsystem_preds(system);
+ mutex_lock(&filter_mutex);
+
if (!system->preds) {
system->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred),
GFP_KERNEL);
- if (!system->preds)
+ if (!system->preds) {
+ mutex_unlock(&filter_mutex);
return -ENOMEM;
+ }
}
for (i = 0; i < MAX_FILTER_PRED; i++) {
@@ -318,6 +364,8 @@ int filter_add_subsystem_pred(struct event_subsystem *system,
}
}
+ mutex_unlock(&filter_mutex);
+
if (i == MAX_FILTER_PRED)
return -ENOSPC;
diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h
index 9d2fa78..973941d 100644
--- a/kernel/trace/trace_events_stage_3.h
+++ b/kernel/trace/trace_events_stage_3.h
@@ -207,8 +207,10 @@ static void ftrace_raw_event_##call(proto) \
struct ftrace_event_call *call = &event_##call; \
struct ring_buffer_event *event; \
struct ftrace_raw_##call *entry; \
+ struct filter_pred **preds; \
unsigned long irq_flags; \
int pc; \
+ int matched = 1; \
\
local_save_flags(irq_flags); \
pc = preempt_count(); \
@@ -222,7 +224,13 @@ static void ftrace_raw_event_##call(proto) \
\
assign; \
\
- if (call->preds && !filter_match_preds(call, entry)) \
+ rcu_read_lock(); \
+ preds = rcu_dereference(call->preds); \
+ if (preds) \
+ matched = filter_match_preds(preds, entry); \
+ rcu_read_unlock(); \
+ \
+ if (!matched) \
ring_buffer_event_discard(event); \
\
trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \
--
1.5.4.rc3
next prev parent reply other threads:[~2009-04-11 7:55 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-04-11 7:52 [PATCH 0/7] tracing: bug fixes for tracing/filters Li Zefan
2009-04-11 7:52 ` [PATCH 1/7] tracing/filters: NUL-terminate user input filter Li Zefan
2009-04-11 7:52 ` [PATCH 2/7] tracing/filters: fix NULL pointer dereference Li Zefan
2009-04-12 10:06 ` [tip:tracing/urgent] " Li Zefan
2009-04-11 7:52 ` [PATCH 3/7] tracing/filters: allow user input integer to be oct or hex Li Zefan
2009-04-12 10:06 ` [tip:tracing/urgent] " Li Zefan
2009-04-11 7:53 ` [PATCH 4/7] tracing/filters: allow user to specify a filter val to be string Li Zefan
2009-04-11 14:35 ` Frederic Weisbecker
2009-04-12 10:04 ` Ingo Molnar
2009-04-13 1:37 ` Li Zefan
2009-04-13 3:45 ` Ingo Molnar
2009-04-11 7:55 ` [PATCH 5/7] tracing/filters: disallow newline as delimeter Li Zefan
2009-04-11 7:55 ` [PATCH 6/7] tracing/filters: return proper error code when writing filter file Li Zefan
2009-04-12 10:07 ` [tip:tracing/urgent] " Li Zefan
2009-04-11 7:55 ` Li Zefan [this message]
2009-04-11 9:30 ` [PATCH 0/7] tracing: bug fixes for tracing/filters Tom Zanussi
2009-04-11 10:08 ` Li Zefan
2009-04-11 14:48 ` Frederic Weisbecker
2009-04-11 17:36 ` Paul E. McKenney
2009-04-11 17:58 ` Tom Zanussi
2009-04-12 10:02 ` Ingo Molnar
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=49E04D02.2080505@cn.fujitsu.com \
--to=lizf@cn.fujitsu.com \
--cc=fweisbec@gmail.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@elte.hu \
--cc=rostedt@goodmis.org \
--cc=tzanussi@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox