* [PATCH v5 1/4] tracing: add __print_sym() to replace __print_symbolic()
2024-06-14 8:19 [PATCH v5 0/4] tracing: improve symbolic printing Johannes Berg
@ 2024-06-14 8:19 ` Johannes Berg
2024-06-14 8:19 ` [PATCH v5 2/4] tracing/timer: use __print_sym() Johannes Berg
` (3 subsequent siblings)
4 siblings, 0 replies; 8+ messages in thread
From: Johannes Berg @ 2024-06-14 8:19 UTC (permalink / raw)
To: linux-kernel; +Cc: linux-trace-kernel, netdev, linux-wireless, Johannes Berg
From: Johannes Berg <johannes.berg@intel.com>
The way __print_symbolic() works is limited and inefficient
in multiple ways:
- you can only use it with a static list of symbols, but
e.g. the SKB dropreasons are now a dynamic list
- it builds the list in memory _three_ times, so it takes
a lot of memory:
- The print_fmt contains the list (since it's passed to
the macro there). This actually contains the names
_twice_, which is fixed up at runtime.
- TRACE_DEFINE_ENUM() puts a 24-byte struct trace_eval_map
for every entry, plus the string pointed to by it, which
cannot be deduplicated with the strings in the print_fmt
- The in-kernel symbolic printing creates yet another list
of struct trace_print_flags for trace_print_symbols_seq()
- it also requires runtime fixup during init, which is a lot
of string parsing due to the print_fmt fixup
Introduce __print_sym() to - over time - replace the old one.
We can easily extend this also to __print_flags later, but I
cared only about the SKB dropreasons for now, which has only
__print_symbolic().
This new __print_sym() requires only a single list of items,
created by TRACE_DEFINE_SYM_LIST(), or can even use another
already existing list by using TRACE_DEFINE_SYM_FNS() with
lookup and show methods.
Then, instead of doing an init-time fixup, just do this at the
time when userspace reads the print_fmt. This way, dynamically
updated lists are possible.
For userspace, nothing actually changes, because the print_fmt
is shown exactly the same way the old __print_symbolic() was.
This adds about 4k .text in my test builds, but that'll be
more than paid for by the actual conversions.
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
v2:
- fix RCU
- use ':' as separator to simplify the code, that's
still not valid in a C identifier
v3:
- add missing #undef lines (reported by Simon Horman)
- clarify name is not NUL-terminated and fix logic
for the comparison for that
v4:
- fix non-modular builds and handle TRACE_EVENT_FL_DYNAMIC
v5:
- fix build warning in non-modular build
---
include/asm-generic/vmlinux.lds.h | 3 +-
include/linux/module.h | 2 +
include/linux/trace_events.h | 7 ++
include/linux/tracepoint.h | 20 +++++
include/trace/stages/init.h | 55 ++++++++++++
include/trace/stages/stage2_data_offsets.h | 6 ++
include/trace/stages/stage3_trace_output.h | 9 ++
include/trace/stages/stage7_class_define.h | 3 +
kernel/module/main.c | 3 +
kernel/trace/trace_events.c | 99 +++++++++++++++++++++-
kernel/trace/trace_output.c | 45 ++++++++++
11 files changed, 249 insertions(+), 3 deletions(-)
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 5703526d6ebf..8275a06bcaee 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -258,7 +258,8 @@
#define FTRACE_EVENTS() \
. = ALIGN(8); \
BOUNDED_SECTION(_ftrace_events) \
- BOUNDED_SECTION_BY(_ftrace_eval_map, _ftrace_eval_maps)
+ BOUNDED_SECTION_BY(_ftrace_eval_map, _ftrace_eval_maps) \
+ BOUNDED_SECTION(_ftrace_sym_defs)
#else
#define FTRACE_EVENTS()
#endif
diff --git a/include/linux/module.h b/include/linux/module.h
index ffa1c603163c..7256762d59e1 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -524,6 +524,8 @@ struct module {
unsigned int num_trace_events;
struct trace_eval_map **trace_evals;
unsigned int num_trace_evals;
+ struct trace_sym_def **trace_sym_defs;
+ unsigned int num_trace_sym_defs;
#endif
#ifdef CONFIG_FTRACE_MCOUNT_RECORD
unsigned int num_ftrace_callsites;
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 9df3e2973626..2743280c9a46 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -27,6 +27,13 @@ const char *trace_print_flags_seq(struct trace_seq *p, const char *delim,
const char *trace_print_symbols_seq(struct trace_seq *p, unsigned long val,
const struct trace_print_flags *symbol_array);
+const char *trace_print_sym_seq(struct trace_seq *p, unsigned long long val,
+ const char *(*lookup)(unsigned long long val));
+const char *trace_sym_lookup(const struct trace_sym_entry *list,
+ size_t len, unsigned long long value);
+void trace_sym_show(struct seq_file *m,
+ const struct trace_sym_entry *list, size_t len);
+
#if BITS_PER_LONG == 32
const char *trace_print_flags_seq_u64(struct trace_seq *p, const char *delim,
unsigned long long flags,
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 689b6d71590e..cc3b387953d1 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -31,6 +31,24 @@ struct trace_eval_map {
unsigned long eval_value;
};
+struct trace_sym_def {
+ const char *system;
+ const char *symbol_id;
+ /* may return NULL, called under rcu_read_lock() */
+ const char * (*lookup)(unsigned long long);
+ /*
+ * Must print the list: ', { val, "name"}, ...'
+ * with no trailing comma, but with the leading ', '
+ * to simplify things:
+ */
+ void (*show)(struct seq_file *);
+};
+
+struct trace_sym_entry {
+ unsigned long long value;
+ const char *name;
+};
+
#define TRACEPOINT_DEFAULT_PRIO 10
extern struct srcu_struct tracepoint_srcu;
@@ -109,6 +127,8 @@ extern void syscall_unregfunc(void);
#define TRACE_DEFINE_ENUM(x)
#define TRACE_DEFINE_SIZEOF(x)
+#define TRACE_DEFINE_SYM_FNS(...)
+#define TRACE_DEFINE_SYM_LIST(...)
#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
diff --git a/include/trace/stages/init.h b/include/trace/stages/init.h
index 000bcfc8dd2e..ef63ab64250d 100644
--- a/include/trace/stages/init.h
+++ b/include/trace/stages/init.h
@@ -23,6 +23,61 @@ TRACE_MAKE_SYSTEM_STR();
__section("_ftrace_eval_map") \
*TRACE_SYSTEM##_##a = &__##TRACE_SYSTEM##_##a
+/*
+ * Define a symbol for __print_sym by giving lookup and
+ * show functions. See &struct trace_sym_def.
+ */
+#undef TRACE_DEFINE_SYM_FNS
+#define TRACE_DEFINE_SYM_FNS(_symbol_id, _lookup, _show) \
+ _TRACE_DEFINE_SYM_FNS(TRACE_SYSTEM, _symbol_id, _lookup, _show)
+#define _TRACE_DEFINE_SYM_FNS(_system, _symbol_id, _lookup, _show) \
+ __TRACE_DEFINE_SYM_FNS(_system, _symbol_id, _lookup, _show)
+#define __TRACE_DEFINE_SYM_FNS(_system, _symbol_id, _lookup, _show) \
+ ___TRACE_DEFINE_SYM_FNS(_system ## _ ## _symbol_id, _symbol_id, \
+ _lookup, _show)
+#define ___TRACE_DEFINE_SYM_FNS(_name, _symbol_id, _lookup, _show) \
+ static struct trace_sym_def \
+ __trace_sym_def_ ## _name = { \
+ .system = TRACE_SYSTEM_STRING, \
+ .symbol_id = #_symbol_id, \
+ .lookup = _lookup, \
+ .show = _show, \
+ }; \
+ static struct trace_sym_def \
+ __section("_ftrace_sym_defs") \
+ *__trace_sym_def_p_ ## _name = &__trace_sym_def_ ## _name
+
+/*
+ * Define a symbol for __print_sym by giving lookup and
+ * show functions. See &struct trace_sym_def.
+ */
+#undef TRACE_DEFINE_SYM_LIST
+#define TRACE_DEFINE_SYM_LIST(_symbol_id, ...) \
+ _TRACE_DEFINE_SYM_LIST(TRACE_SYSTEM, _symbol_id, __VA_ARGS__)
+#define _TRACE_DEFINE_SYM_LIST(_system, _symbol_id, ...) \
+ __TRACE_DEFINE_SYM_LIST(_system, _symbol_id, __VA_ARGS__)
+#define __TRACE_DEFINE_SYM_LIST(_system, _symbol_id, ...) \
+ ___TRACE_DEFINE_SYM_LIST(_system ## _ ## _symbol_id, _symbol_id,\
+ __VA_ARGS__)
+#define ___TRACE_DEFINE_SYM_LIST(_name, _symbol_id, ...) \
+ static struct trace_sym_entry \
+ __trace_sym_list_ ## _name[] = { __VA_ARGS__ }; \
+ static const char * \
+ __trace_sym_lookup_ ## _name(unsigned long long value) \
+ { \
+ return trace_sym_lookup(__trace_sym_list_ ## _name, \
+ ARRAY_SIZE(__trace_sym_list_ ## _name), value); \
+ } \
+ static void \
+ __trace_sym_show_ ## _name(struct seq_file *m) \
+ { \
+ trace_sym_show(m, __trace_sym_list_ ## _name, \
+ ARRAY_SIZE(__trace_sym_list_ ## _name)); \
+ } \
+ ___TRACE_DEFINE_SYM_FNS(_name, _symbol_id, \
+ __trace_sym_lookup_ ## _name, \
+ __trace_sym_show_ ## _name)
+
#undef TRACE_DEFINE_SIZEOF
#define TRACE_DEFINE_SIZEOF(a) \
static struct trace_eval_map __used __initdata \
diff --git a/include/trace/stages/stage2_data_offsets.h b/include/trace/stages/stage2_data_offsets.h
index 8b0cff06d346..5afd9de7deb3 100644
--- a/include/trace/stages/stage2_data_offsets.h
+++ b/include/trace/stages/stage2_data_offsets.h
@@ -5,6 +5,12 @@
#undef TRACE_DEFINE_ENUM
#define TRACE_DEFINE_ENUM(a)
+#undef TRACE_DEFINE_SYM_FNS
+#define TRACE_DEFINE_SYM_FNS(_symbol_id, _lookup, _show)
+
+#undef TRACE_DEFINE_SYM_LIST
+#define TRACE_DEFINE_SYM_LIST(_symbol_id, ...)
+
#undef TRACE_DEFINE_SIZEOF
#define TRACE_DEFINE_SIZEOF(a)
diff --git a/include/trace/stages/stage3_trace_output.h b/include/trace/stages/stage3_trace_output.h
index c1fb1355d309..d2c6458b62dc 100644
--- a/include/trace/stages/stage3_trace_output.h
+++ b/include/trace/stages/stage3_trace_output.h
@@ -79,6 +79,15 @@
trace_print_symbols_seq(p, value, symbols); \
})
+#undef __print_sym
+#define __print_sym(value, symbol_id) \
+ ___print_sym(TRACE_SYSTEM, value, symbol_id)
+#define ___print_sym(sys, value, symbol_id) \
+ ____print_sym(sys, value, symbol_id)
+#define ____print_sym(sys, value, symbol_id) \
+ trace_print_sym_seq(p, value, \
+ __trace_sym_def_p_ ## sys ## _ ## symbol_id->lookup)
+
#undef __print_flags_u64
#undef __print_symbolic_u64
#if BITS_PER_LONG == 32
diff --git a/include/trace/stages/stage7_class_define.h b/include/trace/stages/stage7_class_define.h
index bcb960d16fc0..cf552916935f 100644
--- a/include/trace/stages/stage7_class_define.h
+++ b/include/trace/stages/stage7_class_define.h
@@ -25,6 +25,9 @@
#undef __print_hex_dump
#undef __get_buf
+#undef __print_sym
+#define __print_sym(value, symbol_id) __print_sym(value:symbol_id)
+
/*
* The below is not executed in the kernel. It is only what is
* displayed in the print format for userspace to parse.
diff --git a/kernel/module/main.c b/kernel/module/main.c
index d18a94b973e1..5b1700b56a06 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -2179,6 +2179,9 @@ static int find_module_sections(struct module *mod, struct load_info *info)
mod->trace_evals = section_objs(info, "_ftrace_eval_map",
sizeof(*mod->trace_evals),
&mod->num_trace_evals);
+ mod->trace_sym_defs = section_objs(info, "_ftrace_sym_defs",
+ sizeof(*mod->trace_sym_defs),
+ &mod->num_trace_sym_defs);
#endif
#ifdef CONFIG_TRACING
mod->trace_bprintk_fmt_start = section_objs(info, "__trace_printk_fmt",
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 6ef29eba90ce..0859e879ff09 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -1570,6 +1570,102 @@ static void *f_next(struct seq_file *m, void *v, loff_t *pos)
return node;
}
+extern struct trace_sym_def *__start_ftrace_sym_defs[];
+extern struct trace_sym_def *__stop_ftrace_sym_defs[];
+
+/* note: @name is not NUL-terminated */
+static void show_sym_list(struct seq_file *m, struct trace_event_call *call,
+ const char *name, unsigned int name_len)
+{
+ struct trace_sym_def **sym_defs;
+ unsigned int n_sym_defs, i;
+
+ if ((call->flags & TRACE_EVENT_FL_DYNAMIC) || !call->module) {
+ sym_defs = __start_ftrace_sym_defs;
+ n_sym_defs = __stop_ftrace_sym_defs - __start_ftrace_sym_defs;
+ } else {
+#ifdef CONFIG_MODULES
+ struct module *mod = call->module;
+
+ sym_defs = mod->trace_sym_defs;
+ n_sym_defs = mod->num_trace_sym_defs;
+#else
+ return;
+#endif /* CONFIG_MODULES */
+ }
+
+ for (i = 0; i < n_sym_defs; i++) {
+ unsigned int sym_len;
+
+ if (!sym_defs[i])
+ continue;
+ if (sym_defs[i]->system != call->class->system)
+ continue;
+ sym_len = strlen(sym_defs[i]->symbol_id);
+ if (name_len != sym_len)
+ continue;
+ if (strncmp(sym_defs[i]->symbol_id, name, sym_len))
+ continue;
+ if (sym_defs[i]->show)
+ sym_defs[i]->show(m);
+ break;
+ }
+}
+
+static void show_print_fmt(struct seq_file *m, struct trace_event_call *call)
+{
+ char *ptr = call->print_fmt;
+ bool in_print_sym = false;
+ int quote = 0;
+
+ seq_puts(m, "\nprint fmt: ");
+ while (*ptr) {
+ if (*ptr == '\\') {
+ seq_putc(m, *ptr);
+ ptr++;
+ /* paranoid */
+ if (!*ptr)
+ break;
+ goto next;
+ }
+ if (*ptr == '"') {
+ quote ^= 1;
+ goto next;
+ }
+ if (quote)
+ goto next;
+
+ if (in_print_sym && *ptr != ':')
+ goto next;
+
+ if (in_print_sym && *ptr == ':') {
+ const char *name;
+
+ ptr++;
+ name = ptr;
+ /* skip the name */
+ while (*ptr && *ptr != ')')
+ ptr++;
+ /* and show the actual list inline now */
+ show_sym_list(m, call, name, ptr - name);
+ in_print_sym = false;
+ continue;
+ }
+
+ if (strncmp(ptr, "__print_sym(", 12) == 0) {
+ ptr += 12;
+ seq_puts(m, "__print_symbolic(");
+ in_print_sym = true;
+ continue;
+ }
+next:
+ seq_putc(m, *ptr);
+ ptr++;
+ }
+
+ seq_putc(m, '\n');
+}
+
static int f_show(struct seq_file *m, void *v)
{
struct trace_event_call *call = event_file_data(m->private);
@@ -1588,8 +1684,7 @@ static int f_show(struct seq_file *m, void *v)
return 0;
case FORMAT_PRINTFMT:
- seq_printf(m, "\nprint fmt: %s\n",
- call->print_fmt);
+ show_print_fmt(m, call);
return 0;
}
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index d8b302d01083..8a3aa661ea46 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -124,6 +124,51 @@ trace_print_symbols_seq(struct trace_seq *p, unsigned long val,
}
EXPORT_SYMBOL(trace_print_symbols_seq);
+const char *trace_sym_lookup(const struct trace_sym_entry *list,
+ size_t len, unsigned long long value)
+{
+ size_t i;
+
+ for (i = 0; i < len; i++) {
+ if (list[i].value == value)
+ return list[i].name;
+ }
+ return NULL;
+}
+EXPORT_SYMBOL(trace_sym_lookup);
+
+void trace_sym_show(struct seq_file *m,
+ const struct trace_sym_entry *list, size_t len)
+{
+ size_t i;
+
+ for (i = 0; i < len; i++)
+ seq_printf(m, ", { %lld, \"%s\" }",
+ list[i].value, list[i].name);
+}
+EXPORT_SYMBOL(trace_sym_show);
+
+const char *
+trace_print_sym_seq(struct trace_seq *p, unsigned long long val,
+ const char *(*lookup)(unsigned long long val))
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+ const char *name;
+
+ rcu_read_lock();
+ name = lookup(val);
+ if (name)
+ trace_seq_puts(p, name);
+ else
+ trace_seq_printf(p, "0x%llx", val);
+ rcu_read_unlock();
+
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+EXPORT_SYMBOL(trace_print_sym_seq);
+
#if BITS_PER_LONG == 32
const char *
trace_print_flags_seq_u64(struct trace_seq *p, const char *delim,
--
2.45.2
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH v5 2/4] tracing/timer: use __print_sym()
2024-06-14 8:19 [PATCH v5 0/4] tracing: improve symbolic printing Johannes Berg
2024-06-14 8:19 ` [PATCH v5 1/4] tracing: add __print_sym() to replace __print_symbolic() Johannes Berg
@ 2024-06-14 8:19 ` Johannes Berg
2024-06-14 8:19 ` [PATCH v5 3/4] net: dropreason: use new __print_sym() in tracing Johannes Berg
` (2 subsequent siblings)
4 siblings, 0 replies; 8+ messages in thread
From: Johannes Berg @ 2024-06-14 8:19 UTC (permalink / raw)
To: linux-kernel; +Cc: linux-trace-kernel, netdev, linux-wireless, Johannes Berg
From: Johannes Berg <johannes.berg@intel.com>
Use the new __print_sym() in the timer tracing, just to show
how to convert something. This adds ~80 bytes of .text for a
saving of ~1.5K of data in my builds.
Note the format changes from
print fmt: "success=%d dependency=%s", REC->success, __print_symbolic(REC->dependency, { 0, "NONE" }, { (1 << 0), "POSIX_TIMER" }, { (1 << 1), "PERF_EVENTS" }, { (1 << 2), "SCHED" }, { (1 << 3), "CLOCK_UNSTABLE" }, { (1 << 4), "RCU" }, { (1 << 5), "RCU_EXP" })
to
print fmt: "success=%d dependency=%s", REC->success, __print_symbolic(REC->dependency, { 0, "NONE" }, { 1, "POSIX_TIMER" }, { 2, "PERF_EVENTS" }, { 4, "SCHED" }, { 8, "CLOCK_UNSTABLE" }, { 16, "RCU" }, { 32, "RCU_EXP" })
since the values are now just printed in the show function as
pure decimal values.
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
include/trace/events/timer.h | 22 +++++++---------------
1 file changed, 7 insertions(+), 15 deletions(-)
diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h
index 1ef58a04fc57..d483abffed78 100644
--- a/include/trace/events/timer.h
+++ b/include/trace/events/timer.h
@@ -402,26 +402,18 @@ TRACE_EVENT(itimer_expire,
#undef tick_dep_mask_name
#undef tick_dep_name_end
-/* The MASK will convert to their bits and they need to be processed too */
-#define tick_dep_name(sdep) TRACE_DEFINE_ENUM(TICK_DEP_BIT_##sdep); \
- TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep);
-#define tick_dep_name_end(sdep) TRACE_DEFINE_ENUM(TICK_DEP_BIT_##sdep); \
- TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep);
-/* NONE only has a mask defined for it */
-#define tick_dep_mask_name(sdep) TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep);
-
-TICK_DEP_NAMES
-
-#undef tick_dep_name
-#undef tick_dep_mask_name
-#undef tick_dep_name_end
-
#define tick_dep_name(sdep) { TICK_DEP_MASK_##sdep, #sdep },
#define tick_dep_mask_name(sdep) { TICK_DEP_MASK_##sdep, #sdep },
#define tick_dep_name_end(sdep) { TICK_DEP_MASK_##sdep, #sdep }
+TRACE_DEFINE_SYM_LIST(tick_dep_names, TICK_DEP_NAMES);
+
+#undef tick_dep_name
+#undef tick_dep_mask_name
+#undef tick_dep_name_end
+
#define show_tick_dep_name(val) \
- __print_symbolic(val, TICK_DEP_NAMES)
+ __print_sym(val, tick_dep_names)
TRACE_EVENT(tick_stop,
--
2.45.2
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH v5 3/4] net: dropreason: use new __print_sym() in tracing
2024-06-14 8:19 [PATCH v5 0/4] tracing: improve symbolic printing Johannes Berg
2024-06-14 8:19 ` [PATCH v5 1/4] tracing: add __print_sym() to replace __print_symbolic() Johannes Berg
2024-06-14 8:19 ` [PATCH v5 2/4] tracing/timer: use __print_sym() Johannes Berg
@ 2024-06-14 8:19 ` Johannes Berg
2024-06-18 8:26 ` Paolo Abeni
2024-06-14 8:19 ` [PATCH v5 4/4] net: drop_monitor: use drop_reason_lookup() Johannes Berg
2024-08-19 22:23 ` [PATCH v5 0/4] tracing: improve symbolic printing Steven Rostedt
4 siblings, 1 reply; 8+ messages in thread
From: Johannes Berg @ 2024-06-14 8:19 UTC (permalink / raw)
To: linux-kernel; +Cc: linux-trace-kernel, netdev, linux-wireless, Johannes Berg
From: Johannes Berg <johannes.berg@intel.com>
The __print_symbolic() could only ever print the core
drop reasons, since that's the way the infrastructure
works. Now that we have __print_sym() with all the
advantages mentioned in that commit, convert to that
and get all the drop reasons from all subsystems. As
we already have a list of them, that's really easy.
This is a little bit of .text (~100 bytes in my build)
and saves a lot of .data (~17k).
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
include/net/dropreason.h | 5 +++++
include/trace/events/skb.h | 16 +++-----------
net/core/skbuff.c | 43 ++++++++++++++++++++++++++++++++++++++
3 files changed, 51 insertions(+), 13 deletions(-)
diff --git a/include/net/dropreason.h b/include/net/dropreason.h
index 56cb7be92244..c157070b5303 100644
--- a/include/net/dropreason.h
+++ b/include/net/dropreason.h
@@ -42,6 +42,11 @@ struct drop_reason_list {
extern const struct drop_reason_list __rcu *
drop_reasons_by_subsys[SKB_DROP_REASON_SUBSYS_NUM];
+#ifdef CONFIG_TRACEPOINTS
+const char *drop_reason_lookup(unsigned long long value);
+void drop_reason_show(struct seq_file *m);
+#endif
+
void drop_reasons_register_subsys(enum skb_drop_reason_subsys subsys,
const struct drop_reason_list *list);
void drop_reasons_unregister_subsys(enum skb_drop_reason_subsys subsys);
diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
index 07e0715628ec..8a1a63f9e796 100644
--- a/include/trace/events/skb.h
+++ b/include/trace/events/skb.h
@@ -8,15 +8,9 @@
#include <linux/skbuff.h>
#include <linux/netdevice.h>
#include <linux/tracepoint.h>
+#include <net/dropreason.h>
-#undef FN
-#define FN(reason) TRACE_DEFINE_ENUM(SKB_DROP_REASON_##reason);
-DEFINE_DROP_REASON(FN, FN)
-
-#undef FN
-#undef FNe
-#define FN(reason) { SKB_DROP_REASON_##reason, #reason },
-#define FNe(reason) { SKB_DROP_REASON_##reason, #reason }
+TRACE_DEFINE_SYM_FNS(drop_reason, drop_reason_lookup, drop_reason_show);
/*
* Tracepoint for free an sk_buff:
@@ -44,13 +38,9 @@ TRACE_EVENT(kfree_skb,
TP_printk("skbaddr=%p protocol=%u location=%pS reason: %s",
__entry->skbaddr, __entry->protocol, __entry->location,
- __print_symbolic(__entry->reason,
- DEFINE_DROP_REASON(FN, FNe)))
+ __print_sym(__entry->reason, drop_reason ))
);
-#undef FN
-#undef FNe
-
TRACE_EVENT(consume_skb,
TP_PROTO(struct sk_buff *skb, void *location),
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 466999a7515e..cd1ea6c3e0f8 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -145,6 +145,49 @@ drop_reasons_by_subsys[SKB_DROP_REASON_SUBSYS_NUM] = {
};
EXPORT_SYMBOL(drop_reasons_by_subsys);
+#ifdef CONFIG_TRACEPOINTS
+const char *drop_reason_lookup(unsigned long long value)
+{
+ unsigned long long subsys_id = value >> SKB_DROP_REASON_SUBSYS_SHIFT;
+ u32 reason = value & ~SKB_DROP_REASON_SUBSYS_MASK;
+ const struct drop_reason_list *subsys;
+
+ if (subsys_id >= SKB_DROP_REASON_SUBSYS_NUM)
+ return NULL;
+
+ subsys = rcu_dereference(drop_reasons_by_subsys[subsys_id]);
+ if (!subsys)
+ return NULL;
+ if (reason >= subsys->n_reasons)
+ return NULL;
+ return subsys->reasons[reason];
+}
+
+void drop_reason_show(struct seq_file *m)
+{
+ u32 subsys_id;
+
+ rcu_read_lock();
+ for (subsys_id = 0; subsys_id < SKB_DROP_REASON_SUBSYS_NUM; subsys_id++) {
+ const struct drop_reason_list *subsys;
+ u32 i;
+
+ subsys = rcu_dereference(drop_reasons_by_subsys[subsys_id]);
+ if (!subsys)
+ continue;
+
+ for (i = 0; i < subsys->n_reasons; i++) {
+ if (!subsys->reasons[i])
+ continue;
+ seq_printf(m, ", { %u, \"%s\" }",
+ (subsys_id << SKB_DROP_REASON_SUBSYS_SHIFT) | i,
+ subsys->reasons[i]);
+ }
+ }
+ rcu_read_unlock();
+}
+#endif
+
/**
* drop_reasons_register_subsys - register another drop reason subsystem
* @subsys: the subsystem to register, must not be the core
--
2.45.2
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH v5 0/4] tracing: improve symbolic printing
2024-06-14 8:19 [PATCH v5 0/4] tracing: improve symbolic printing Johannes Berg
` (3 preceding siblings ...)
2024-06-14 8:19 ` [PATCH v5 4/4] net: drop_monitor: use drop_reason_lookup() Johannes Berg
@ 2024-08-19 22:23 ` Steven Rostedt
2024-08-26 18:24 ` Johannes Berg
4 siblings, 1 reply; 8+ messages in thread
From: Steven Rostedt @ 2024-08-19 22:23 UTC (permalink / raw)
To: Johannes Berg; +Cc: linux-kernel, linux-trace-kernel, netdev, linux-wireless
Hi Johannes,
I finally got around to testing your patches.
I did the following:
# cat /sys/kernel/tracing/events/*/*/format
and hit this:
BUG: unable to handle page fault for address: ffffffff8e6333d0
#PF: supervisor read access in kernel mode
#PF: error_code(0x0000) - not-present page
PGD 183c40067 P4D 183c40067 PUD 183c41063 PMD 1003ef063 PTE 800ffffe7b9cc062
Oops: Oops: 0000 [#1] PREEMPT SMP PTI
CPU: 7 UID: 0 PID: 893 Comm: cat Not tainted 6.11.0-rc4-test-00004-g4ce2836f008b #56 68afcee1248519f8b3b088836c40746e4a6b69d3
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
RIP: 0010:f_show (kernel/trace/trace_events.c:1601 kernel/trace/trace_events.c:1651 kernel/trace/trace_events.c:1689)
Code: 33 63 8e 48 2d d0 33 63 8e 48 c1 f8 03 85 c0 74 67 89 c0 4c 89 c3 49 8d 04 c0 48 89 04 24 eb 0a 48 83 c3 08 48 39 1c 24 74 4e <4c> 8b 3b 4d 85 ff 74 ee 49 8b 45 10 48 8b 00 49 39 07 75 e2 49 8b
All code
========
0: 33 63 8e xor -0x72(%rbx),%esp
3: 48 2d d0 33 63 8e sub $0xffffffff8e6333d0,%rax
9: 48 c1 f8 03 sar $0x3,%rax
d: 85 c0 test %eax,%eax
f: 74 67 je 0x78
11: 89 c0 mov %eax,%eax
13: 4c 89 c3 mov %r8,%rbx
16: 49 8d 04 c0 lea (%r8,%rax,8),%rax
1a: 48 89 04 24 mov %rax,(%rsp)
1e: eb 0a jmp 0x2a
20: 48 83 c3 08 add $0x8,%rbx
24: 48 39 1c 24 cmp %rbx,(%rsp)
28: 74 4e je 0x78
2a:* 4c 8b 3b mov (%rbx),%r15 <-- trapping instruction
2d: 4d 85 ff test %r15,%r15
30: 74 ee je 0x20
32: 49 8b 45 10 mov 0x10(%r13),%rax
36: 48 8b 00 mov (%rax),%rax
39: 49 39 07 cmp %rax,(%r15)
3c: 75 e2 jne 0x20
3e: 49 rex.WB
3f: 8b .byte 0x8b
Code starting with the faulting instruction
===========================================
0: 4c 8b 3b mov (%rbx),%r15
3: 4d 85 ff test %r15,%r15
6: 74 ee je 0xfffffffffffffff6
8: 49 8b 45 10 mov 0x10(%r13),%rax
c: 48 8b 00 mov (%rax),%rax
f: 49 39 07 cmp %rax,(%r15)
12: 75 e2 jne 0xfffffffffffffff6
14: 49 rex.WB
15: 8b .byte 0x8b
RSP: 0018:ffffb43981457ca8 EFLAGS: 00010202
RAX: ffffffff8e6333e0 RBX: ffffffff8e6333d0 RCX: 00000000000002cd
RDX: ffff942b4b0bd000 RSI: 000000000000006e RDI: ffff942b42f9cc30
RBP: ffff942b42f9cc30 R08: ffffffff8e6333d0 R09: 63696c6f626d7973
R10: 735f746e6972705f R11: 2863696c6f626d79 R12: 0000000000000000
R13: ffffffff8de44880 R14: ffffffff8de44516 R15: ffffffff8de44515
FS: 00007f556c562740(0000) GS:ffff942cbdfc0000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: ffffffff8e6333d0 CR3: 000000010a1d0006 CR4: 0000000000170ef0
Call Trace:
<TASK>
? __die (arch/x86/kernel/dumpstack.c:421 arch/x86/kernel/dumpstack.c:434)
? page_fault_oops (arch/x86/mm/fault.c:715)
? search_module_extables (kernel/module/main.c:3280)
? search_bpf_extables (kernel/bpf/core.c:799)
? exc_page_fault (arch/x86/mm/fault.c:1198 arch/x86/mm/fault.c:1479 arch/x86/mm/fault.c:1539)
? asm_exc_page_fault (arch/x86/include/asm/idtentry.h:623)
? f_show (kernel/trace/trace_events.c:1601 kernel/trace/trace_events.c:1651 kernel/trace/trace_events.c:1689)
? f_show (kernel/trace/trace_events.c:1623 kernel/trace/trace_events.c:1689)
seq_read_iter (fs/seq_file.c:273)
seq_read (fs/seq_file.c:163)
vfs_read (fs/read_write.c:474)
? __handle_mm_fault (mm/memory.c:3945 mm/memory.c:5521 mm/memory.c:5664)
ksys_read (fs/read_write.c:619)
do_syscall_64 (arch/x86/entry/common.c:52 (discriminator 1) arch/x86/entry/common.c:83 (discriminator 1))
? handle_mm_fault (mm/memory.c:5744 (discriminator 1) mm/memory.c:5840 (discriminator 1))
? exc_page_fault (arch/x86/mm/fault.c:1342 arch/x86/mm/fault.c:1481 arch/x86/mm/fault.c:1539)
? irqentry_exit_to_user_mode (arch/x86/include/asm/entry-common.h:57 (discriminator 1) include/linux/entry-common.h:330 (discriminator 1) kernel/entry/common.c:231 (discriminator 1))
entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130)
RIP: 0033:0x7f556c65ca5d
Code: 31 c0 e9 c6 fe ff ff 50 48 8d 3d a6 60 0a 00 e8 a9 08 02 00 66 0f 1f 84 00 00 00 00 00 80 3d 81 3b 0e 00 00 74 17 31 c0 0f 05 <48> 3d 00 f0 ff ff 77 5b c3 66 2e 0f 1f 84 00 00 00 00 00 48 83 ec
All code
========
0: 31 c0 xor %eax,%eax
2: e9 c6 fe ff ff jmp 0xfffffffffffffecd
7: 50 push %rax
8: 48 8d 3d a6 60 0a 00 lea 0xa60a6(%rip),%rdi # 0xa60b5
f: e8 a9 08 02 00 call 0x208bd
14: 66 0f 1f 84 00 00 00 nopw 0x0(%rax,%rax,1)
1b: 00 00
1d: 80 3d 81 3b 0e 00 00 cmpb $0x0,0xe3b81(%rip) # 0xe3ba5
24: 74 17 je 0x3d
26: 31 c0 xor %eax,%eax
28: 0f 05 syscall
2a:* 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax <-- trapping instruction
30: 77 5b ja 0x8d
32: c3 ret
33: 66 2e 0f 1f 84 00 00 cs nopw 0x0(%rax,%rax,1)
3a: 00 00 00
3d: 48 rex.W
3e: 83 .byte 0x83
3f: ec in (%dx),%al
Code starting with the faulting instruction
===========================================
0: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax
6: 77 5b ja 0x63
8: c3 ret
9: 66 2e 0f 1f 84 00 00 cs nopw 0x0(%rax,%rax,1)
10: 00 00 00
13: 48 rex.W
14: 83 .byte 0x83
15: ec in (%dx),%al
RSP: 002b:00007ffc88ecb878 EFLAGS: 00000246 ORIG_RAX: 0000000000000000
RAX: ffffffffffffffda RBX: 0000000000020000 RCX: 00007f556c65ca5d
RDX: 0000000000020000 RSI: 00007f556c541000 RDI: 0000000000000003
RBP: 0000000000020000 R08: 00000000ffffffff R09: 0000000000000000
R10: 0000000000000022 R11: 0000000000000246 R12: 00007f556c541000
R13: 0000000000000003 R14: 0000000000020000 R15: 0000000000000000
</TASK>
Modules linked in: snd_hda_intel snd_intel_dspcfg snd_intel_sdw_acpi snd_hda_codec snd_hwdep snd_hda_core
CR2: ffffffff8e6333d0
---[ end trace 0000000000000000 ]---
-- Steve
^ permalink raw reply [flat|nested] 8+ messages in thread