* [PATCH v3 1/2] tracing: Allow tracer to add more than 32 options
2025-09-29 22:34 [PATCH v3 0/2] tracing: Add an option to show symbols in _text+offset for function profiler Masami Hiramatsu (Google)
@ 2025-09-29 22:34 ` Masami Hiramatsu (Google)
2025-10-15 21:20 ` Steven Rostedt
2025-09-29 22:35 ` [PATCH v3 2/2] tracing: Add an option to show symbols in _text+offset for function profiler Masami Hiramatsu (Google)
1 sibling, 1 reply; 7+ messages in thread
From: Masami Hiramatsu (Google) @ 2025-09-29 22:34 UTC (permalink / raw)
To: Steven Rostedt
Cc: Masami Hiramatsu, Mark Rutland, Mathieu Desnoyers, linux-kernel,
linux-trace-kernel
From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Since enum trace_iterator_flags is 32bit, the max number of the
option flags is limited to 32 and it is fully used now. To add
a new option, we need to expand it.
This replaces trace_iterator_flags with just a list of `static
const u64` so that we can add another 32 new options. Now all
option masks are u64 instead of u32 or unsigned int.
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
Changes in v3:
- Make TRACE_ITER_* to global.
---
kernel/trace/trace.c | 24 +++++++----
kernel/trace/trace.h | 81 +++++++++++++++++++------------------
kernel/trace/trace_irqsoff.c | 2 -
kernel/trace/trace_sched_wakeup.c | 2 -
4 files changed, 58 insertions(+), 51 deletions(-)
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 4283ed4e8f59..652b7dd34c25 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -86,6 +86,11 @@ void __init disable_tracing_selftest(const char *reason)
#define tracing_selftest_disabled 0
#endif
+/* Define TRACE_ITER_* flags. */
+#undef C
+#define C(a, b) const u64 TRACE_ITER_##a = (1ULL << TRACE_ITER_##a##_BIT);
+TRACE_FLAGS
+
/* Pipe tracepoints to printk */
static struct trace_iterator *tracepoint_print_iter;
int tracepoint_printk;
@@ -1736,7 +1741,7 @@ unsigned long nsecs_to_usecs(unsigned long nsecs)
* of strings in the order that the evals (enum) were defined.
*/
#undef C
-#define C(a, b) b
+#define C(a, b) b,
/* These must match the bit positions in trace_iterator_flags */
static const char *trace_options[] = {
@@ -5144,7 +5149,7 @@ static int tracing_trace_options_show(struct seq_file *m, void *v)
trace_opts = tr->current_trace->flags->opts;
for (i = 0; trace_options[i]; i++) {
- if (tr->trace_flags & (1 << i))
+ if (tr->trace_flags & (1ULL << i))
seq_printf(m, "%s\n", trace_options[i]);
else
seq_printf(m, "no%s\n", trace_options[i]);
@@ -5197,7 +5202,7 @@ static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
}
/* Some tracers require overwrite to stay enabled */
-int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
+int trace_keep_overwrite(struct tracer *tracer, u64 mask, int set)
{
if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
return -1;
@@ -5205,7 +5210,7 @@ int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
return 0;
}
-int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
+int set_tracer_flag(struct trace_array *tr, u64 mask, int enabled)
{
if ((mask == TRACE_ITER_RECORD_TGID) ||
(mask == TRACE_ITER_RECORD_CMD) ||
@@ -5307,7 +5312,7 @@ int trace_set_options(struct trace_array *tr, char *option)
if (ret < 0)
ret = set_tracer_option(tr, cmp, neg);
else
- ret = set_tracer_flag(tr, 1 << ret, !neg);
+ ret = set_tracer_flag(tr, 1ULL << ret, !neg);
mutex_unlock(&trace_types_lock);
mutex_unlock(&event_mutex);
@@ -9119,7 +9124,7 @@ trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
get_tr_index(tr_index, &tr, &index);
- if (tr->trace_flags & (1 << index))
+ if (tr->trace_flags & (1ULL << index))
buf = "1\n";
else
buf = "0\n";
@@ -9148,7 +9153,7 @@ trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
mutex_lock(&event_mutex);
mutex_lock(&trace_types_lock);
- ret = set_tracer_flag(tr, 1 << index, val);
+ ret = set_tracer_flag(tr, 1ULL << index, val);
mutex_unlock(&trace_types_lock);
mutex_unlock(&event_mutex);
@@ -9312,8 +9317,9 @@ static void create_trace_options_dir(struct trace_array *tr)
for (i = 0; trace_options[i]; i++) {
if (top_level ||
- !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
+ !((1ULL << i) & TOP_LEVEL_TRACE_FLAGS)) {
create_trace_option_core_file(tr, trace_options[i], i);
+ }
}
}
@@ -9997,7 +10003,7 @@ static int __remove_instance(struct trace_array *tr)
/* Disable all the flags that were enabled coming in */
for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
if ((1 << i) & ZEROED_TRACE_FLAGS)
- set_tracer_flag(tr, 1 << i, 0);
+ set_tracer_flag(tr, 1ULL << i, 0);
}
if (printk_trace == tr)
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 1dbf1d3cf2f1..41c613ea0b4d 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -216,7 +216,7 @@ struct array_buffer {
int cpu;
};
-#define TRACE_FLAGS_MAX_SIZE 32
+#define TRACE_FLAGS_MAX_SIZE 64
struct trace_options {
struct tracer *tracer;
@@ -390,7 +390,7 @@ struct trace_array {
int buffer_percent;
unsigned int n_err_log_entries;
struct tracer *current_trace;
- unsigned int trace_flags;
+ u64 trace_flags;
unsigned char trace_flags_index[TRACE_FLAGS_MAX_SIZE];
unsigned int flags;
raw_spinlock_t start_lock;
@@ -631,7 +631,7 @@ struct tracer {
u32 old_flags, u32 bit, int set);
/* Return 0 if OK with change, else return non-zero */
int (*flag_changed)(struct trace_array *tr,
- u32 mask, int set);
+ u64 mask, int set);
struct tracer *next;
struct tracer_flags *flags;
int enabled;
@@ -1323,22 +1323,22 @@ extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
*/
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
# define FGRAPH_FLAGS \
- C(DISPLAY_GRAPH, "display-graph"),
+ C(DISPLAY_GRAPH, "display-graph")
#else
# define FGRAPH_FLAGS
#endif
#ifdef CONFIG_BRANCH_TRACER
# define BRANCH_FLAGS \
- C(BRANCH, "branch"),
+ C(BRANCH, "branch")
#else
# define BRANCH_FLAGS
#endif
#ifdef CONFIG_FUNCTION_TRACER
# define FUNCTION_FLAGS \
- C(FUNCTION, "function-trace"), \
- C(FUNC_FORK, "function-fork"),
+ C(FUNCTION, "function-trace") \
+ C(FUNC_FORK, "function-fork")
# define FUNCTION_DEFAULT_FLAGS TRACE_ITER_FUNCTION
#else
# define FUNCTION_FLAGS
@@ -1348,7 +1348,7 @@ extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
#ifdef CONFIG_STACKTRACE
# define STACK_FLAGS \
- C(STACKTRACE, "stacktrace"),
+ C(STACKTRACE, "stacktrace")
#else
# define STACK_FLAGS
#endif
@@ -1361,33 +1361,33 @@ extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
* trace.c (this macro guarantees it).
*/
#define TRACE_FLAGS \
- C(PRINT_PARENT, "print-parent"), \
- C(SYM_OFFSET, "sym-offset"), \
- C(SYM_ADDR, "sym-addr"), \
- C(VERBOSE, "verbose"), \
- C(RAW, "raw"), \
- C(HEX, "hex"), \
- C(BIN, "bin"), \
- C(BLOCK, "block"), \
- C(FIELDS, "fields"), \
- C(PRINTK, "trace_printk"), \
- C(ANNOTATE, "annotate"), \
- C(USERSTACKTRACE, "userstacktrace"), \
- C(SYM_USEROBJ, "sym-userobj"), \
- C(PRINTK_MSGONLY, "printk-msg-only"), \
- C(CONTEXT_INFO, "context-info"), /* Print pid/cpu/time */ \
- C(LATENCY_FMT, "latency-format"), \
- C(RECORD_CMD, "record-cmd"), \
- C(RECORD_TGID, "record-tgid"), \
- C(OVERWRITE, "overwrite"), \
- C(STOP_ON_FREE, "disable_on_free"), \
- C(IRQ_INFO, "irq-info"), \
- C(MARKERS, "markers"), \
- C(EVENT_FORK, "event-fork"), \
- C(TRACE_PRINTK, "trace_printk_dest"), \
- C(COPY_MARKER, "copy_trace_marker"),\
- C(PAUSE_ON_TRACE, "pause-on-trace"), \
- C(HASH_PTR, "hash-ptr"), /* Print hashed pointer */ \
+ C(PRINT_PARENT, "print-parent") \
+ C(SYM_OFFSET, "sym-offset") \
+ C(SYM_ADDR, "sym-addr") \
+ C(VERBOSE, "verbose") \
+ C(RAW, "raw") \
+ C(HEX, "hex") \
+ C(BIN, "bin") \
+ C(BLOCK, "block") \
+ C(FIELDS, "fields") \
+ C(PRINTK, "trace_printk") \
+ C(ANNOTATE, "annotate") \
+ C(USERSTACKTRACE, "userstacktrace") \
+ C(SYM_USEROBJ, "sym-userobj") \
+ C(PRINTK_MSGONLY, "printk-msg-only") \
+ C(CONTEXT_INFO, "context-info") /* Print pid/cpu/time */ \
+ C(LATENCY_FMT, "latency-format") \
+ C(RECORD_CMD, "record-cmd") \
+ C(RECORD_TGID, "record-tgid") \
+ C(OVERWRITE, "overwrite") \
+ C(STOP_ON_FREE, "disable_on_free") \
+ C(IRQ_INFO, "irq-info") \
+ C(MARKERS, "markers") \
+ C(EVENT_FORK, "event-fork") \
+ C(TRACE_PRINTK, "trace_printk_dest") \
+ C(COPY_MARKER, "copy_trace_marker")\
+ C(PAUSE_ON_TRACE, "pause-on-trace") \
+ C(HASH_PTR, "hash-ptr") /* Print hashed pointer */ \
FUNCTION_FLAGS \
FGRAPH_FLAGS \
STACK_FLAGS \
@@ -1398,7 +1398,7 @@ extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
* that will define the bits for the flag masks.
*/
#undef C
-#define C(a, b) TRACE_ITER_##a##_BIT
+#define C(a, b) TRACE_ITER_##a##_BIT,
enum trace_iterator_bits {
TRACE_FLAGS
@@ -1411,9 +1411,10 @@ enum trace_iterator_bits {
* use the bits as defined above.
*/
#undef C
-#define C(a, b) TRACE_ITER_##a = (1 << TRACE_ITER_##a##_BIT)
+#define C(a, b) extern const u64 TRACE_ITER_##a;
-enum trace_iterator_flags { TRACE_FLAGS };
+TRACE_FLAGS
+#undef C
/*
* TRACE_ITER_SYM_MASK masks the options in trace_flags that
@@ -2058,8 +2059,8 @@ extern const char *__stop___tracepoint_str[];
void trace_printk_control(bool enabled);
void trace_printk_start_comm(void);
-int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set);
-int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled);
+int trace_keep_overwrite(struct tracer *tracer, u64 mask, int set);
+int set_tracer_flag(struct trace_array *tr, u64 mask, int enabled);
/* Used from boot time tracer */
extern int trace_set_options(struct trace_array *tr, char *option);
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 5496758b6c76..1a65f9f1075c 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -539,7 +539,7 @@ static inline int irqsoff_function_set(struct trace_array *tr, u32 mask, int set
}
#endif /* CONFIG_FUNCTION_TRACER */
-static int irqsoff_flag_changed(struct trace_array *tr, u32 mask, int set)
+static int irqsoff_flag_changed(struct trace_array *tr, u64 mask, int set)
{
struct tracer *tracer = tr->current_trace;
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index bf1cb80742ae..45865b4f753f 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -328,7 +328,7 @@ __trace_function(struct trace_array *tr,
trace_function(tr, ip, parent_ip, trace_ctx, NULL);
}
-static int wakeup_flag_changed(struct trace_array *tr, u32 mask, int set)
+static int wakeup_flag_changed(struct trace_array *tr, u64 mask, int set)
{
struct tracer *tracer = tr->current_trace;
^ permalink raw reply related [flat|nested] 7+ messages in thread* [PATCH v3 2/2] tracing: Add an option to show symbols in _text+offset for function profiler
2025-09-29 22:34 [PATCH v3 0/2] tracing: Add an option to show symbols in _text+offset for function profiler Masami Hiramatsu (Google)
2025-09-29 22:34 ` [PATCH v3 1/2] tracing: Allow tracer to add more than 32 options Masami Hiramatsu (Google)
@ 2025-09-29 22:35 ` Masami Hiramatsu (Google)
1 sibling, 0 replies; 7+ messages in thread
From: Masami Hiramatsu (Google) @ 2025-09-29 22:35 UTC (permalink / raw)
To: Steven Rostedt
Cc: Masami Hiramatsu, Mark Rutland, Mathieu Desnoyers, linux-kernel,
linux-trace-kernel
From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Function profiler shows the hit count of each function using its symbol
name. However, there are some same-name local symbols, which we can not
distinguish.
To solve this issue, this introduces an option to show the symbols
in "_text+OFFSET" format. This can avoid exposing the random shift of
KASLR. The functions in modules are shown as "MODNAME+OFFSET" where the
offset is from ".text".
E.g. for the kernel text symbols, specify vmlinux and the output to
addr2line, you can find the actual function and source info;
$ addr2line -fie vmlinux _text+3078208
__balance_callbacks
kernel/sched/core.c:5064
for modules, specify the module file and .text+OFFSET;
$ addr2line -fie samples/trace_events/trace-events-sample.ko .text+8224
do_simple_thread_func
samples/trace_events/trace-events-sample.c:23
Suggested-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
Changes in v2:
- Define a dummy TRACE_ITER_PROF_TEXT_OFFSET if CONFIG_FUNCTION_PROFILER=n.
---
kernel/trace/ftrace.c | 26 +++++++++++++++++++++++++-
kernel/trace/trace.c | 5 +++--
kernel/trace/trace.h | 11 ++++++++++-
3 files changed, 38 insertions(+), 4 deletions(-)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 00b76d450a89..d4802bb93793 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -534,7 +534,9 @@ static int function_stat_headers(struct seq_file *m)
static int function_stat_show(struct seq_file *m, void *v)
{
+ struct trace_array *tr = trace_get_global_array();
struct ftrace_profile *rec = v;
+ const char *refsymbol = NULL;
char str[KSYM_SYMBOL_LEN];
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
static struct trace_seq s;
@@ -554,7 +556,29 @@ static int function_stat_show(struct seq_file *m, void *v)
return 0;
#endif
- kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
+ if (tr->trace_flags & TRACE_ITER_PROF_TEXT_OFFSET) {
+ long offset;
+
+ if (core_kernel_text(rec->ip)) {
+ refsymbol = "_text";
+ offset = rec->ip - (unsigned long)_text;
+ } else {
+ struct module *mod;
+
+ guard(rcu)();
+ mod = __module_text_address(rec->ip);
+ if (mod) {
+ refsymbol = mod->name;
+ /* Calculate offset from module's text entry address. */
+ offset = rec->ip - (unsigned long)mod->mem[MOD_TEXT].base;
+ }
+ }
+ if (refsymbol)
+ snprintf(str, sizeof(str), " %s%+ld", refsymbol, offset);
+ }
+ if (!refsymbol)
+ kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
+
seq_printf(m, " %-30.30s %10lu", str, rec->counter);
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 652b7dd34c25..3c39b7c68742 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -527,7 +527,8 @@ EXPORT_SYMBOL_GPL(unregister_ftrace_export);
/* trace_options that are only supported by global_trace */
#define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \
- TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
+ TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD | \
+ TRACE_ITER_PROF_TEXT_OFFSET)
/* trace_flags that are default zero for instances */
#define ZEROED_TRACE_FLAGS \
@@ -11111,7 +11112,7 @@ __init static int tracer_alloc_buffers(void)
#ifdef CONFIG_FUNCTION_TRACER
/* Used to set module cached ftrace filtering at boot up */
-__init struct trace_array *trace_get_global_array(void)
+struct trace_array *trace_get_global_array(void)
{
return &global_trace;
}
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 41c613ea0b4d..b64f322d8c52 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1353,6 +1353,14 @@ extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
# define STACK_FLAGS
#endif
+#ifdef CONFIG_FUNCTION_PROFILER
+# define PROFILER_FLAGS \
+ C(PROF_TEXT_OFFSET, "prof-text-offset")
+#else
+# define PROFILER_FLAGS
+# define TRACE_ITER_PROF_TEXT_OFFSET 0UL
+#endif
+
/*
* trace_iterator_flags is an enumeration that defines bit
* positions into trace_flags that controls the output.
@@ -1391,7 +1399,8 @@ extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
FUNCTION_FLAGS \
FGRAPH_FLAGS \
STACK_FLAGS \
- BRANCH_FLAGS
+ BRANCH_FLAGS \
+ PROFILER_FLAGS
/*
* By defining C, we can make TRACE_FLAGS a list of bit names
^ permalink raw reply related [flat|nested] 7+ messages in thread