From: Cao Ruichuang <create0818@163.com>
To: petr.pavlu@suse.com
Cc: linux-trace-kernel@vger.kernel.org, linux-kernel@vger.kernel.org,
mhiramat@kernel.org, Cao Ruichuang <create0818@163.com>
Subject: [PATCH] tracing: separate module tracepoint strings from trace_printk formats
Date: Mon, 13 Apr 2026 20:33:59 +0800 [thread overview]
Message-ID: <20260413123359.32517-1-create0818@163.com> (raw)
In-Reply-To: <41e81533-0fd6-49f5-b7c1-b4e172affd2a@suse.com>
The previous module tracepoint_string() fix took the smallest
implementation path and reused the existing module trace_printk format
storage.
That was enough to make module __tracepoint_str entries show up in
printk_formats and be accepted by trace_is_tracepoint_string(), but it
also made those copied mappings persist after module unload. That does
not match the expected module lifetime semantics.
Handle module tracepoint_string() mappings separately instead of mixing
them into the module trace_printk format list. Keep copying the strings
into tracing-managed storage while the module is loaded, but track them
on their own list and drop them again on MODULE_STATE_GOING.
Keep module trace_printk format handling unchanged.
This split is intentional: module trace_printk formats and module
tracepoint_string() mappings do not have the same lifetime requirements.
Keeping them in one shared structure would either preserve
tracepoint_string() mappings too long again, or require mixed
ownership/refcount rules in a trace_printk-oriented structure.
The separate module tracepoint_string() list intentionally keeps one
copied mapping per module entry instead of trying to share copies across
modules by string contents. printk_formats is address-based, and sharing
those copies would add another layer of shared ownership/refcounting
without changing the lifetime rule this fix is trying to restore.
Link: https://bugzilla.kernel.org/show_bug.cgi?id=217196
Signed-off-by: Cao Ruichuang <create0818@163.com>
---
include/linux/tracepoint.h | 9 +-
kernel/trace/trace_printk.c | 250 ++++++++++++++++++++++--------------
2 files changed, 157 insertions(+), 102 deletions(-)
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index f14da542402..aec598a4017 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -479,11 +479,10 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
*
* For built-in code, the tracing system uses the original string address.
* For modules, the tracing code saves tracepoint strings into
- * tracing-managed storage when the module loads, so their mappings remain
- * available through printk_formats and trace string checks even after the
- * module's own memory goes away. As long as the string does not change
- * during the life of the module, it is fine to use tracepoint_string()
- * within a module.
+ * tracing-managed storage while the module is loaded, and drops those
+ * mappings again when the module unloads. As long as the string does not
+ * change during the life of the module, it is fine to use
+ * tracepoint_string() within a module.
*/
#define tracepoint_string(str) \
({ \
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
index 9f67ce42ef6..0420ffcff93 100644
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c
@@ -21,24 +21,24 @@
#ifdef CONFIG_MODULES
-/*
- * modules trace_printk() formats and tracepoint_string() strings are
- * autosaved in struct trace_bprintk_fmt, which are queued on
- * trace_bprintk_fmt_list.
- */
+/* module trace_printk() formats are autosaved on trace_bprintk_fmt_list. */
static LIST_HEAD(trace_bprintk_fmt_list);
+/* module tracepoint_string() copies live on tracepoint_string_list. */
+static LIST_HEAD(tracepoint_string_list);
-/* serialize accesses to trace_bprintk_fmt_list */
+/* serialize accesses to module format and tracepoint-string lists */
static DEFINE_MUTEX(btrace_mutex);
struct trace_bprintk_fmt {
struct list_head list;
const char *fmt;
- unsigned int type;
};
-#define TRACE_BPRINTK_TYPE BIT(0)
-#define TRACE_TRACEPOINT_TYPE BIT(1)
+struct tracepoint_string_entry {
+ struct list_head list;
+ struct module *mod;
+ const char *str;
+};
static inline struct trace_bprintk_fmt *lookup_format(const char *fmt)
{
@@ -54,24 +54,21 @@ static inline struct trace_bprintk_fmt *lookup_format(const char *fmt)
return NULL;
}
-static void hold_module_trace_format(const char **start, const char **end,
- unsigned int type)
+static void hold_module_trace_bprintk_format(const char **start, const char **end)
{
const char **iter;
char *fmt;
/* allocate the trace_printk per cpu buffers */
- if ((type & TRACE_BPRINTK_TYPE) && start != end)
+ if (start != end)
trace_printk_init_buffers();
mutex_lock(&btrace_mutex);
for (iter = start; iter < end; iter++) {
struct trace_bprintk_fmt *tb_fmt = lookup_format(*iter);
if (tb_fmt) {
- if (!IS_ERR(tb_fmt)) {
- tb_fmt->type |= type;
+ if (!IS_ERR(tb_fmt))
*iter = tb_fmt->fmt;
- }
continue;
}
@@ -83,7 +80,6 @@ static void hold_module_trace_format(const char **start, const char **end,
list_add_tail(&tb_fmt->list, &trace_bprintk_fmt_list);
strcpy(fmt, *iter);
tb_fmt->fmt = fmt;
- tb_fmt->type = type;
} else
kfree(tb_fmt);
}
@@ -93,89 +89,156 @@ static void hold_module_trace_format(const char **start, const char **end,
mutex_unlock(&btrace_mutex);
}
+static void hold_module_tracepoint_strings(struct module *mod,
+ const char **start,
+ const char **end)
+{
+ const char **iter;
+
+ mutex_lock(&btrace_mutex);
+ for (iter = start; iter < end; iter++) {
+ struct tracepoint_string_entry *tp_entry;
+ char *str;
+
+ tp_entry = kmalloc_obj(*tp_entry);
+ if (!tp_entry)
+ continue;
+
+ str = kstrdup(*iter, GFP_KERNEL);
+ if (!str) {
+ kfree(tp_entry);
+ continue;
+ }
+
+ tp_entry->mod = mod;
+ tp_entry->str = str;
+ list_add_tail(&tp_entry->list, &tracepoint_string_list);
+ *iter = tp_entry->str;
+ }
+ mutex_unlock(&btrace_mutex);
+}
+
+static void release_module_tracepoint_strings(struct module *mod)
+{
+ struct tracepoint_string_entry *tp_entry, *n;
+
+ mutex_lock(&btrace_mutex);
+ list_for_each_entry_safe(tp_entry, n, &tracepoint_string_list, list) {
+ if (tp_entry->mod != mod)
+ continue;
+ list_del(&tp_entry->list);
+ kfree(tp_entry->str);
+ kfree(tp_entry);
+ }
+ mutex_unlock(&btrace_mutex);
+}
+
static int module_trace_format_notify(struct notifier_block *self,
unsigned long val, void *data)
{
struct module *mod = data;
- if (val != MODULE_STATE_COMING)
- return NOTIFY_OK;
+ switch (val) {
+ case MODULE_STATE_COMING:
+ if (mod->num_trace_bprintk_fmt) {
+ const char **start = mod->trace_bprintk_fmt_start;
+ const char **end = start + mod->num_trace_bprintk_fmt;
- if (mod->num_trace_bprintk_fmt) {
- const char **start = mod->trace_bprintk_fmt_start;
- const char **end = start + mod->num_trace_bprintk_fmt;
+ hold_module_trace_bprintk_format(start, end);
+ }
+
+ if (mod->num_tracepoint_strings) {
+ const char **start = mod->tracepoint_strings_start;
+ const char **end = start + mod->num_tracepoint_strings;
- hold_module_trace_format(start, end, TRACE_BPRINTK_TYPE);
+ hold_module_tracepoint_strings(mod, start, end);
+ }
+ break;
+ case MODULE_STATE_GOING:
+ release_module_tracepoint_strings(mod);
+ break;
}
- if (mod->num_tracepoint_strings) {
- const char **start = mod->tracepoint_strings_start;
- const char **end = start + mod->num_tracepoint_strings;
+ return NOTIFY_OK;
+}
- hold_module_trace_format(start, end, TRACE_TRACEPOINT_TYPE);
+static const char **find_first_mod_entry(void)
+{
+ struct trace_bprintk_fmt *tb_fmt;
+ struct tracepoint_string_entry *tp_entry;
+
+ if (!list_empty(&trace_bprintk_fmt_list)) {
+ tb_fmt = list_first_entry(&trace_bprintk_fmt_list,
+ typeof(*tb_fmt), list);
+ return &tb_fmt->fmt;
}
- return NOTIFY_OK;
+ if (!list_empty(&tracepoint_string_list)) {
+ tp_entry = list_first_entry(&tracepoint_string_list,
+ typeof(*tp_entry), list);
+ return &tp_entry->str;
+ }
+
+ return NULL;
}
-/*
- * The debugfs/tracing/printk_formats file maps the addresses with
- * the ASCII formats that are used in the bprintk events in the
- * buffer. For userspace tools to be able to decode the events from
- * the buffer, they need to be able to map the address with the format.
- *
- * The addresses of the bprintk formats are in their own section
- * __trace_printk_fmt. But for modules we copy them into a link list.
- * The code to print the formats and their addresses passes around the
- * address of the fmt string. If the fmt address passed into the seq
- * functions is within the kernel core __trace_printk_fmt section, then
- * it simply uses the next pointer in the list.
- *
- * When the fmt pointer is outside the kernel core __trace_printk_fmt
- * section, then we need to read the link list pointers. The trick is
- * we pass the address of the string to the seq function just like
- * we do for the kernel core formats. To get back the structure that
- * holds the format, we simply use container_of() and then go to the
- * next format in the list.
- */
-static const char **
-find_next_mod_format(int start_index, void *v, const char **fmt, loff_t *pos)
+static struct trace_bprintk_fmt *lookup_mod_format_ptr(const char **fmt_ptr)
{
- struct trace_bprintk_fmt *mod_fmt;
+ struct trace_bprintk_fmt *tb_fmt;
- if (list_empty(&trace_bprintk_fmt_list))
- return NULL;
+ list_for_each_entry(tb_fmt, &trace_bprintk_fmt_list, list) {
+ if (fmt_ptr == &tb_fmt->fmt)
+ return tb_fmt;
+ }
- /*
- * v will point to the address of the fmt record from t_next
- * v will be NULL from t_start.
- * If this is the first pointer or called from start
- * then we need to walk the list.
- */
- if (!v || start_index == *pos) {
- struct trace_bprintk_fmt *p;
-
- /* search the module list */
- list_for_each_entry(p, &trace_bprintk_fmt_list, list) {
- if (start_index == *pos)
- return &p->fmt;
- start_index++;
+ return NULL;
+}
+
+static struct tracepoint_string_entry *lookup_mod_tracepoint_ptr(const char **str_ptr)
+{
+ struct tracepoint_string_entry *tp_entry;
+
+ list_for_each_entry(tp_entry, &tracepoint_string_list, list) {
+ if (str_ptr == &tp_entry->str)
+ return tp_entry;
+ }
+
+ return NULL;
+}
+
+static const char **find_next_mod_entry(int start_index, void *v, loff_t *pos)
+{
+ struct trace_bprintk_fmt *tb_fmt;
+ struct tracepoint_string_entry *tp_entry;
+
+ if (!v || start_index == *pos)
+ return find_first_mod_entry();
+
+ tb_fmt = lookup_mod_format_ptr(v);
+ if (tb_fmt) {
+ if (tb_fmt->list.next != &trace_bprintk_fmt_list) {
+ tb_fmt = list_next_entry(tb_fmt, list);
+ return &tb_fmt->fmt;
}
- /* pos > index */
+
+ if (!list_empty(&tracepoint_string_list)) {
+ tp_entry = list_first_entry(&tracepoint_string_list,
+ typeof(*tp_entry), list);
+ return &tp_entry->str;
+ }
+
return NULL;
}
- /*
- * v points to the address of the fmt field in the mod list
- * structure that holds the module print format.
- */
- mod_fmt = container_of(v, typeof(*mod_fmt), fmt);
- if (mod_fmt->list.next == &trace_bprintk_fmt_list)
+ tp_entry = lookup_mod_tracepoint_ptr(v);
+ if (!tp_entry)
return NULL;
- mod_fmt = container_of(mod_fmt->list.next, typeof(*mod_fmt), list);
+ if (tp_entry->list.next == &tracepoint_string_list)
+ return NULL;
- return &mod_fmt->fmt;
+ tp_entry = list_next_entry(tp_entry, list);
+ return &tp_entry->str;
}
static void format_mod_start(void)
@@ -195,8 +258,8 @@ module_trace_format_notify(struct notifier_block *self,
{
return NOTIFY_OK;
}
-static inline const char **
-find_next_mod_format(int start_index, void *v, const char **fmt, loff_t *pos)
+static inline const char **find_next_mod_entry(int start_index, void *v,
+ loff_t *pos)
{
return NULL;
}
@@ -274,7 +337,7 @@ bool trace_is_tracepoint_string(const char *str)
{
const char **ptr = __start___tracepoint_str;
#ifdef CONFIG_MODULES
- struct trace_bprintk_fmt *tb_fmt;
+ struct tracepoint_string_entry *tp_entry;
#endif
for (ptr = __start___tracepoint_str; ptr < __stop___tracepoint_str; ptr++) {
@@ -284,8 +347,8 @@ bool trace_is_tracepoint_string(const char *str)
#ifdef CONFIG_MODULES
mutex_lock(&btrace_mutex);
- list_for_each_entry(tb_fmt, &trace_bprintk_fmt_list, list) {
- if ((tb_fmt->type & TRACE_TRACEPOINT_TYPE) && str == tb_fmt->fmt) {
+ list_for_each_entry(tp_entry, &tracepoint_string_list, list) {
+ if (str == tp_entry->str) {
mutex_unlock(&btrace_mutex);
return true;
}
@@ -297,9 +360,8 @@ bool trace_is_tracepoint_string(const char *str)
static const char **find_next(void *v, loff_t *pos)
{
- const char **fmt = v;
int start_index;
- int last_index;
+ int next_index;
start_index = __stop___trace_bprintk_fmt - __start___trace_bprintk_fmt;
@@ -307,25 +369,19 @@ static const char **find_next(void *v, loff_t *pos)
return __start___trace_bprintk_fmt + *pos;
/*
- * The __tracepoint_str section is treated the same as the
- * __trace_printk_fmt section. The difference is that the
- * __trace_printk_fmt section should only be used by trace_printk()
- * in a debugging environment, as if anything exists in that section
- * the trace_prink() helper buffers are allocated, which would just
- * waste space in a production environment.
- *
- * The __tracepoint_str sections on the other hand are used by
- * tracepoints which need to map pointers to their strings to
- * the ASCII text for userspace.
+ * Built-in __tracepoint_str entries are exported directly from the
+ * core section. Module tracepoint_string() mappings are kept on a
+ * separate tracing-managed list below, because their lifetime is tied
+ * to module load/unload and differs from module trace_printk() formats.
*/
- last_index = start_index;
+ next_index = start_index;
start_index = __stop___tracepoint_str - __start___tracepoint_str;
- if (*pos < last_index + start_index)
- return __start___tracepoint_str + (*pos - last_index);
+ if (*pos < next_index + start_index)
+ return __start___tracepoint_str + (*pos - next_index);
- start_index += last_index;
- return find_next_mod_format(start_index, v, fmt, pos);
+ start_index += next_index;
+ return find_next_mod_entry(start_index, v, pos);
}
static void *
--
2.39.5 (Apple Git-154)
next prev parent reply other threads:[~2026-04-13 12:35 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-04-06 17:09 [PATCH] tracing: preserve module tracepoint strings Cao Ruichuang
2026-04-08 20:32 ` Steven Rostedt
2026-04-09 12:37 ` Petr Pavlu
2026-04-10 5:18 ` [PATCH v2] " Cao Ruichuang
2026-04-13 9:40 ` Petr Pavlu
2026-04-13 12:33 ` Cao Ruichuang [this message]
2026-04-14 11:37 ` [PATCH] tracing: separate module tracepoint strings from trace_printk formats Petr Pavlu
2026-04-16 8:03 ` Cao Ruichuang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260413123359.32517-1-create0818@163.com \
--to=create0818@163.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-trace-kernel@vger.kernel.org \
--cc=mhiramat@kernel.org \
--cc=petr.pavlu@suse.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox