From: Chen Jun <chenjun102@huawei.com>
To: <rostedt@goodmis.org>, <mhiramat@kernel.org>,
<mathieu.desnoyers@efficios.com>, <linux-kernel@vger.kernel.org>,
<linux-trace-kernel@vger.kernel.org>
Cc: <chenjun102@huawei.com>
Subject: [PATCH] tracing: Add "within" filter for call-stack-based event filtering
Date: Mon, 8 Jun 2026 22:55:56 +0800 [thread overview]
Message-ID: <20260608145556.94931-1-chenjun102@huawei.com> (raw)
Low-level kernel functions are called from many different paths.
When debugging, it is often useful to filter trace events to only
those occurring within a specific call chain.
Add a "within" filter predicate that tests whether a given function
appears in the current call stack at event time. The function name
is resolved to its address range via kallsyms during filter setup;
at runtime, stack_trace_save() captures the call stack and compares
each return address against the stored range.
Example:
echo 'within == "vfs_read"' > events/sched/sched_switch/filter
Only "==" and "!=" operators are supported. The filter depends on
CONFIG_STACKTRACE.
Signed-off-by: Chen Jun <chenjun102@huawei.com>
---
Documentation/trace/events.rst | 12 +++++++++
include/linux/trace_events.h | 1 +
kernel/trace/trace.h | 3 ++-
kernel/trace/trace_events.c | 3 +++
kernel/trace/trace_events_filter.c | 41 ++++++++++++++++++++++++++++--
5 files changed, 57 insertions(+), 3 deletions(-)
diff --git a/Documentation/trace/events.rst b/Documentation/trace/events.rst
index 18d112963dec..6e3877d376a9 100644
--- a/Documentation/trace/events.rst
+++ b/Documentation/trace/events.rst
@@ -243,6 +243,18 @@ the function "security_prepare_creds" and less than the end of that function.
The ".function" postfix can only be attached to values of size long, and can only
be compared with "==" or "!=".
+The special field "within" can be used to filter events based on whether
+a specific function appears in the current call stack::
+
+ within == "function_name"
+ within != "function_name"
+
+For example, to only trace events where "vfs_read" is in the call stack::
+
+ # echo 'within == "vfs_read"' > events/sched/sched_switch/filter
+
+The within field supports only the "==" and "!=" operators.
+
Cpumask fields or scalar fields that encode a CPU number can be filtered using
a user-provided cpumask in cpulist format. The format is as follows::
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 40a43a4c7caf..9ed22c210add 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -851,6 +851,7 @@ enum {
FILTER_COMM,
FILTER_CPU,
FILTER_STACKTRACE,
+ FILTER_WITHIN,
};
extern int trace_event_raw_init(struct trace_event_call *call);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 80fe152af1dd..a383da42badf 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1825,7 +1825,8 @@ static inline bool is_string_field(struct ftrace_event_field *field)
field->filter_type == FILTER_RDYN_STRING ||
field->filter_type == FILTER_STATIC_STRING ||
field->filter_type == FILTER_PTR_STRING ||
- field->filter_type == FILTER_COMM;
+ field->filter_type == FILTER_COMM ||
+ field->filter_type == FILTER_WITHIN;
}
static inline bool is_function_field(struct ftrace_event_field *field)
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index c46e623e7e0d..b7d681e55b0c 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -199,6 +199,9 @@ static int trace_define_generic_fields(void)
__generic_field(char *, comm, FILTER_COMM);
__generic_field(char *, stacktrace, FILTER_STACKTRACE);
__generic_field(char *, STACKTRACE, FILTER_STACKTRACE);
+#ifdef CONFIG_STACKTRACE
+ __generic_field(char *, within, FILTER_WITHIN);
+#endif
return ret;
}
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 609325f57942..34e1a7f0b3cd 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -72,6 +72,7 @@ enum filter_pred_fn {
FILTER_PRED_FN_CPUMASK,
FILTER_PRED_FN_CPUMASK_CPU,
FILTER_PRED_FN_FUNCTION,
+ FILTER_PRED_FN_WITHIN,
FILTER_PRED_FN_,
FILTER_PRED_TEST_VISITED,
};
@@ -1009,6 +1010,22 @@ static int filter_pred_function(struct filter_pred *pred, void *event)
return pred->op == OP_EQ ? ret : !ret;
}
+/* Filter predicate for within. */
+static int filter_pred_within(struct filter_pred *pred, void *event)
+{
+#ifdef CONFIG_STACKTRACE
+ unsigned long entries[16];
+ unsigned int nr_entries;
+ int i;
+
+ nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 0);
+ for (i = 0; i < nr_entries; i++)
+ if (pred->val <= entries[i] && entries[i] < pred->val2)
+ return !pred->not;
+#endif
+ return pred->not;
+}
+
/*
* regex_match_foo - Basic regex callbacks
*
@@ -1617,6 +1634,8 @@ static int filter_pred_fn_call(struct filter_pred *pred, void *event)
return filter_pred_cpumask_cpu(pred, event);
case FILTER_PRED_FN_FUNCTION:
return filter_pred_function(pred, event);
+ case FILTER_PRED_FN_WITHIN:
+ return filter_pred_within(pred, event);
case FILTER_PRED_TEST_VISITED:
return test_pred_visited_fn(pred, event);
default:
@@ -2002,10 +2021,28 @@ static int parse_pred(const char *str, void *data,
} else if (field->filter_type == FILTER_DYN_STRING) {
pred->fn_num = FILTER_PRED_FN_STRLOC;
- } else if (field->filter_type == FILTER_RDYN_STRING)
+ } else if (field->filter_type == FILTER_RDYN_STRING) {
pred->fn_num = FILTER_PRED_FN_STRRELLOC;
- else {
+ } else if (field->filter_type == FILTER_WITHIN) {
+ unsigned long func;
+
+ if (op == OP_GLOB)
+ goto err_free;
+ pred->fn_num = FILTER_PRED_FN_WITHIN;
+ func = kallsyms_lookup_name(pred->regex->pattern);
+ if (!func) {
+ parse_error(pe, FILT_ERR_NO_FUNCTION, pos + i);
+ goto err_free;
+ }
+ /* Now find the function start and end address */
+ if (!kallsyms_lookup_size_offset(func, &size, &offset)) {
+ parse_error(pe, FILT_ERR_NO_FUNCTION, pos + i);
+ goto err_free;
+ }
+ pred->val = func - offset;
+ pred->val2 = pred->val + size;
+ } else {
if (!ustring_per_cpu) {
/* Once allocated, keep it around for good */
ustring_per_cpu = alloc_percpu(struct ustring_buffer);
--
2.43.0
reply other threads:[~2026-06-08 15:02 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260608145556.94931-1-chenjun102@huawei.com \
--to=chenjun102@huawei.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-trace-kernel@vger.kernel.org \
--cc=mathieu.desnoyers@efficios.com \
--cc=mhiramat@kernel.org \
--cc=rostedt@goodmis.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox