From: Steven Rostedt <rostedt@goodmis.org>
To: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>,
linux-kernel@vger.kernel.org, linux-trace-kernel@vger.kernel.org,
bpf@vger.kernel.org, x86@kernel.org,
Mathieu Desnoyers <mathieu.desnoyers@efficios.com>,
Josh Poimboeuf <jpoimboe@kernel.org>,
Peter Zijlstra <peterz@infradead.org>,
Ingo Molnar <mingo@kernel.org>, Jiri Olsa <jolsa@kernel.org>,
Thomas Gleixner <tglx@linutronix.de>,
Borislav Petkov <bp@alien8.de>,
Dave Hansen <dave.hansen@linux.intel.com>,
"H. Peter Anvin" <hpa@zytor.com>,
Andrii Nakryiko <andrii@kernel.org>
Subject: Re: [PATCH v9 00/13] unwind_user: x86: Deferred unwinding infrastructure
Date: Wed, 21 May 2025 12:50:48 -0400 [thread overview]
Message-ID: <20250521125048.4d572d08@gandalf.local.home> (raw)
In-Reply-To: <20250520195549.17f6c2c7@gandalf.local.home>
On Tue, 20 May 2025 19:55:49 -0400
Steven Rostedt <rostedt@goodmis.org> wrote:
> There's a proposal to move trace_sched_process_exit() to before exit_mm().
> If that happens, we could make that tracepoint a "faultable" tracepoint and
> then the unwind infrastructure could attach to it and do the unwinding from
> that tracepoint.
The below patch does work. It's just a PoC and would need to be broken up
and also cleaned up.
I created a TRACE_EVENT_FAULTABLE() that is basically just a
TRACE_EVENT_SYSCALL(), and used that for the sched_process_exit tracepoint.
I then had the unwinder attach to that tracepoint when the first unwind
callback is registered.
I had to change the check in the trace from testing PF_EXITING to just
current->mm is NULL.
But this does work for the exiting of a task:
-- Steve
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index a351763e6965..eb98bb61126e 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -617,6 +617,8 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
#define TRACE_EVENT_SYSCALL(name, proto, args, struct, assign, \
print, reg, unreg) \
DECLARE_TRACE_SYSCALL(name, PARAMS(proto), PARAMS(args))
+#define TRACE_EVENT_FAULTABLE(name, proto, args, struct, assign, print) \
+ DECLARE_TRACE_SYSCALL(name, PARAMS(proto), PARAMS(args))
#define TRACE_EVENT_FLAGS(event, flag)
diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h
index ed52d0506c69..b228424744fd 100644
--- a/include/trace/define_trace.h
+++ b/include/trace/define_trace.h
@@ -50,6 +50,10 @@
#define TRACE_EVENT_SYSCALL(name, proto, args, struct, assign, print, reg, unreg) \
DEFINE_TRACE_SYSCALL(name, reg, unreg, PARAMS(proto), PARAMS(args))
+#undef TRACE_EVENT_FAULTABLE
+#define TRACE_EVENT_FAULTABLE(name, proto, args, struct, assign, print) \
+ DEFINE_TRACE_SYSCALL(name, NULL, NULL, PARAMS(proto), PARAMS(args))
+
#undef TRACE_EVENT_NOP
#define TRACE_EVENT_NOP(name, proto, args, struct, assign, print)
@@ -125,6 +129,7 @@
#undef TRACE_EVENT_FN
#undef TRACE_EVENT_FN_COND
#undef TRACE_EVENT_SYSCALL
+#undef TRACE_EVENT_FAULTABLE
#undef TRACE_EVENT_CONDITION
#undef TRACE_EVENT_NOP
#undef DEFINE_EVENT_NOP
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 3bec9fb73a36..c6d7894970e3 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -326,13 +326,13 @@ DEFINE_EVENT(sched_process_template, sched_process_free,
TP_ARGS(p));
/*
- * Tracepoint for a task exiting.
+ * Tracepoint for a task exiting (allows faulting)
* Note, it's a superset of sched_process_template and should be kept
* compatible as much as possible. sched_process_exits has an extra
* `group_dead` argument, so sched_process_template can't be used,
* unfortunately, just like sched_migrate_task above.
*/
-TRACE_EVENT(sched_process_exit,
+TRACE_EVENT_FAULTABLE(sched_process_exit,
TP_PROTO(struct task_struct *p, bool group_dead),
diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h
index 4f22136fd465..0ed57e7906d1 100644
--- a/include/trace/trace_events.h
+++ b/include/trace/trace_events.h
@@ -55,6 +55,16 @@
PARAMS(print)); \
DEFINE_EVENT(name, name, PARAMS(proto), PARAMS(args));
+#undef TRACE_EVENT_FAULTABLE
+#define TRACE_EVENT_FAULTABLE(name, proto, args, tstruct, assign, print) \
+ DECLARE_EVENT_SYSCALL_CLASS(name, \
+ PARAMS(proto), \
+ PARAMS(args), \
+ PARAMS(tstruct), \
+ PARAMS(assign), \
+ PARAMS(print)); \
+ DEFINE_EVENT(name, name, PARAMS(proto), PARAMS(args));
+
#include "stages/stage1_struct_define.h"
#undef DECLARE_EVENT_CLASS
diff --git a/kernel/unwind/deferred.c b/kernel/unwind/deferred.c
index 63d0237bad3e..7aad471f2887 100644
--- a/kernel/unwind/deferred.c
+++ b/kernel/unwind/deferred.c
@@ -11,6 +11,8 @@
#include <linux/slab.h>
#include <linux/mm.h>
+#include <trace/events/sched.h>
+
#define UNWIND_MAX_ENTRIES 512
/* Guards adding to or removing from the list of callbacks */
@@ -77,7 +79,7 @@ int unwind_deferred_trace(struct unwind_stacktrace *trace)
/* Should always be called from faultable context */
might_fault();
- if (current->flags & PF_EXITING)
+ if (!current->mm)
return -EINVAL;
if (!info->cache) {
@@ -107,14 +109,14 @@ int unwind_deferred_trace(struct unwind_stacktrace *trace)
return 0;
}
-static void unwind_deferred_task_work(struct callback_head *head)
+static void process_unwind_deferred(void)
{
- struct unwind_task_info *info = container_of(head, struct unwind_task_info, work);
+ struct task_struct *task = current;
+ struct unwind_task_info *info = &task->unwind_info;
struct unwind_stacktrace trace;
struct unwind_work *work;
unsigned long bits;
u64 timestamp;
- struct task_struct *task = current;
int idx;
if (WARN_ON_ONCE(!unwind_pending(task)))
@@ -152,6 +155,21 @@ static void unwind_deferred_task_work(struct callback_head *head)
srcu_read_unlock(&unwind_srcu, idx);
}
+static void unwind_deferred_task_work(struct callback_head *head)
+{
+ process_unwind_deferred();
+}
+
+static void unwind_deferred_callback(void *data, struct task_struct *p, bool group_dead)
+{
+ if (!unwind_pending(p))
+ return;
+
+ process_unwind_deferred();
+
+ task_work_cancel(p, &p->unwind_info.work);
+}
+
static int unwind_deferred_request_nmi(struct unwind_work *work, u64 *timestamp)
{
struct unwind_task_info *info = ¤t->unwind_info;
@@ -329,6 +347,10 @@ void unwind_deferred_cancel(struct unwind_work *work)
for_each_process_thread(g, t) {
clear_bit(bit, &t->unwind_mask);
}
+
+ /* Is this the last registered unwinding? */
+ if (!unwind_mask)
+ unregister_trace_sched_process_exit(unwind_deferred_callback, NULL);
}
int unwind_deferred_init(struct unwind_work *work, unwind_callback_t func)
@@ -341,6 +363,15 @@ int unwind_deferred_init(struct unwind_work *work, unwind_callback_t func)
if (unwind_mask == ~(UNWIND_PENDING))
return -EBUSY;
+ /* Is this the first registered unwinding? */
+ if (!unwind_mask) {
+ int ret;
+
+ ret = register_trace_sched_process_exit(unwind_deferred_callback, NULL);
+ if (ret < 0)
+ return ret;
+ }
+
work->bit = ffz(unwind_mask);
unwind_mask |= 1UL << work->bit;
prev parent reply other threads:[~2025-05-21 16:50 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-05-13 22:34 [PATCH v9 00/13] unwind_user: x86: Deferred unwinding infrastructure Steven Rostedt
2025-05-13 22:34 ` [PATCH v9 01/13] unwind_user: Add user space unwinding API Steven Rostedt
2025-05-13 22:34 ` [PATCH v9 02/13] unwind_user: Add frame pointer support Steven Rostedt
2025-05-13 22:34 ` [PATCH v9 03/13] unwind_user/x86: Enable frame pointer unwinding on x86 Steven Rostedt
2025-05-13 22:34 ` [PATCH v9 04/13] perf/x86: Rename and move get_segment_base() and make it global Steven Rostedt
2025-05-13 22:34 ` [PATCH v9 05/13] unwind_user: Add compat mode frame pointer support Steven Rostedt
2025-05-13 22:34 ` [PATCH v9 06/13] unwind_user/x86: Enable compat mode frame pointer unwinding on x86 Steven Rostedt
2025-05-13 22:34 ` [PATCH v9 07/13] unwind_user/deferred: Add unwind_deferred_trace() Steven Rostedt
2025-05-13 22:34 ` [PATCH v9 08/13] unwind_user/deferred: Add unwind cache Steven Rostedt
2025-05-13 22:34 ` [PATCH v9 09/13] unwind_user/deferred: Add deferred unwinding interface Steven Rostedt
2025-05-13 22:34 ` [PATCH v9 10/13] unwind_user/deferred: Make unwind deferral requests NMI-safe Steven Rostedt
2025-05-13 22:34 ` [PATCH v9 11/13] unwind deferred: Use bitmask to determine which callbacks to call Steven Rostedt
2025-05-13 22:34 ` [PATCH v9 12/13] unwind deferred: Use SRCU unwind_deferred_task_work() Steven Rostedt
2025-05-13 22:34 ` [PATCH v9 13/13] unwind: Clear unwind_mask on exit back to user space Steven Rostedt
2025-05-14 17:27 ` [PATCH v9 00/13] unwind_user: x86: Deferred unwinding infrastructure Steven Rostedt
2025-05-16 23:39 ` Namhyung Kim
2025-05-19 15:33 ` Steven Rostedt
2025-05-20 9:35 ` Ingo Molnar
2025-05-20 15:57 ` Steven Rostedt
2025-05-20 16:29 ` Steven Rostedt
2025-05-20 23:26 ` Masami Hiramatsu
2025-05-20 23:55 ` Steven Rostedt
2025-05-21 16:50 ` Steven Rostedt [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250521125048.4d572d08@gandalf.local.home \
--to=rostedt@goodmis.org \
--cc=andrii@kernel.org \
--cc=bp@alien8.de \
--cc=bpf@vger.kernel.org \
--cc=dave.hansen@linux.intel.com \
--cc=hpa@zytor.com \
--cc=jolsa@kernel.org \
--cc=jpoimboe@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-trace-kernel@vger.kernel.org \
--cc=mathieu.desnoyers@efficios.com \
--cc=mhiramat@kernel.org \
--cc=mingo@kernel.org \
--cc=namhyung@kernel.org \
--cc=peterz@infradead.org \
--cc=tglx@linutronix.de \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).