From: Steven Rostedt <rostedt@goodmis.org>
To: linux-kernel@vger.kernel.org, linux-trace-kernel@vger.kernel.org,
bpf@vger.kernel.org, x86@kernel.org
Cc: Masami Hiramatsu <mhiramat@kernel.org>,
Mathieu Desnoyers <mathieu.desnoyers@efficios.com>,
Josh Poimboeuf <jpoimboe@kernel.org>,
Peter Zijlstra <peterz@infradead.org>,
Ingo Molnar <mingo@kernel.org>, Jiri Olsa <jolsa@kernel.org>,
Namhyung Kim <namhyung@kernel.org>,
Thomas Gleixner <tglx@linutronix.de>,
Andrii Nakryiko <andrii@kernel.org>,
Indu Bhagat <indu.bhagat@oracle.com>,
"Jose E. Marchesi" <jemarch@gnu.org>,
Beau Belgrave <beaub@linux.microsoft.com>,
Jens Remus <jremus@linux.ibm.com>,
Linus Torvalds <torvalds@linux-foundation.org>,
Andrew Morton <akpm@linux-foundation.org>
Subject: [PATCH v10 07/14] unwind_user/deferred: Make unwind deferral requests NMI-safe
Date: Tue, 10 Jun 2025 20:54:28 -0400 [thread overview]
Message-ID: <20250611010428.938845449@goodmis.org> (raw)
In-Reply-To: 20250611005421.144238328@goodmis.org
From: Josh Poimboeuf <jpoimboe@kernel.org>
Make unwind_deferred_request() NMI-safe so tracers in NMI context can
call it and safely request a user space stacktrace when the task exits.
A "nmi_timestamp" is added to the unwind_task_info that gets updated by
NMIs to not race with setting the info->timestamp.
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
Changes since v9: https://lore.kernel.org/linux-trace-kernel/20250513223552.636076711@goodmis.org/
- Check for ret < 0 instead of just ret != 0 from return code of
task_work_add(). Don't want to just assume it's less than zero as it
needs to return a negative on error.
include/linux/unwind_deferred_types.h | 1 +
kernel/unwind/deferred.c | 91 ++++++++++++++++++++++++---
2 files changed, 84 insertions(+), 8 deletions(-)
diff --git a/include/linux/unwind_deferred_types.h b/include/linux/unwind_deferred_types.h
index 5df264cf81ad..ae27a02234b8 100644
--- a/include/linux/unwind_deferred_types.h
+++ b/include/linux/unwind_deferred_types.h
@@ -11,6 +11,7 @@ struct unwind_task_info {
struct unwind_cache *cache;
struct callback_head work;
u64 timestamp;
+ u64 nmi_timestamp;
int pending;
};
diff --git a/kernel/unwind/deferred.c b/kernel/unwind/deferred.c
index b76c704ddc6d..88c867c32c01 100644
--- a/kernel/unwind/deferred.c
+++ b/kernel/unwind/deferred.c
@@ -25,8 +25,27 @@ static u64 get_timestamp(struct unwind_task_info *info)
{
lockdep_assert_irqs_disabled();
- if (!info->timestamp)
- info->timestamp = local_clock();
+ /*
+ * Note, the timestamp is generated on the first request.
+ * If it exists here, then the timestamp is earlier than
+ * this request and it means that this request will be
+ * valid for the stracktrace.
+ */
+ if (!info->timestamp) {
+ WRITE_ONCE(info->timestamp, local_clock());
+ barrier();
+ /*
+ * If an NMI came in and set a timestamp, it means that
+ * it happened before this timestamp was set (otherwise
+ * the NMI would have used this one). Use the NMI timestamp
+ * instead.
+ */
+ if (unlikely(info->nmi_timestamp)) {
+ WRITE_ONCE(info->timestamp, info->nmi_timestamp);
+ barrier();
+ WRITE_ONCE(info->nmi_timestamp, 0);
+ }
+ }
return info->timestamp;
}
@@ -103,6 +122,13 @@ static void unwind_deferred_task_work(struct callback_head *head)
unwind_deferred_trace(&trace);
+ /* Check if the timestamp was only set by NMI */
+ if (info->nmi_timestamp) {
+ WRITE_ONCE(info->timestamp, info->nmi_timestamp);
+ barrier();
+ WRITE_ONCE(info->nmi_timestamp, 0);
+ }
+
timestamp = info->timestamp;
guard(mutex)(&callback_mutex);
@@ -111,6 +137,48 @@ static void unwind_deferred_task_work(struct callback_head *head)
}
}
+static int unwind_deferred_request_nmi(struct unwind_work *work, u64 *timestamp)
+{
+ struct unwind_task_info *info = ¤t->unwind_info;
+ bool inited_timestamp = false;
+ int ret;
+
+ /* Always use the nmi_timestamp first */
+ *timestamp = info->nmi_timestamp ? : info->timestamp;
+
+ if (!*timestamp) {
+ /*
+ * This is the first unwind request since the most recent entry
+ * from user space. Initialize the task timestamp.
+ *
+ * Don't write to info->timestamp directly, otherwise it may race
+ * with an interruption of get_timestamp().
+ */
+ info->nmi_timestamp = local_clock();
+ *timestamp = info->nmi_timestamp;
+ inited_timestamp = true;
+ }
+
+ if (info->pending)
+ return 1;
+
+ ret = task_work_add(current, &info->work, TWA_NMI_CURRENT);
+ if (ret < 0) {
+ /*
+ * If this set nmi_timestamp and is not using it,
+ * there's no guarantee that it will be used.
+ * Set it back to zero.
+ */
+ if (inited_timestamp)
+ info->nmi_timestamp = 0;
+ return ret;
+ }
+
+ info->pending = 1;
+
+ return 0;
+}
+
/**
* unwind_deferred_request - Request a user stacktrace on task exit
* @work: Unwind descriptor requesting the trace
@@ -139,31 +207,38 @@ static void unwind_deferred_task_work(struct callback_head *head)
int unwind_deferred_request(struct unwind_work *work, u64 *timestamp)
{
struct unwind_task_info *info = ¤t->unwind_info;
+ int pending;
int ret;
*timestamp = 0;
- if (WARN_ON_ONCE(in_nmi()))
- return -EINVAL;
-
if ((current->flags & (PF_KTHREAD | PF_EXITING)) ||
!user_mode(task_pt_regs(current)))
return -EINVAL;
+ if (in_nmi())
+ return unwind_deferred_request_nmi(work, timestamp);
+
guard(irqsave)();
*timestamp = get_timestamp(info);
/* callback already pending? */
- if (info->pending)
+ pending = READ_ONCE(info->pending);
+ if (pending)
+ return 1;
+
+ /* Claim the work unless an NMI just now swooped in to do so. */
+ if (!try_cmpxchg(&info->pending, &pending, 1))
return 1;
/* The work has been claimed, now schedule it. */
ret = task_work_add(current, &info->work, TWA_RESUME);
- if (WARN_ON_ONCE(ret))
+ if (WARN_ON_ONCE(ret)) {
+ WRITE_ONCE(info->pending, 0);
return ret;
+ }
- info->pending = 1;
return 0;
}
--
2.47.2
next prev parent reply other threads:[~2025-06-11 1:03 UTC|newest]
Thread overview: 79+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-06-11 0:54 [PATCH v10 00/14] unwind_user: x86: Deferred unwinding infrastructure Steven Rostedt
2025-06-11 0:54 ` [PATCH v10 01/14] unwind_user: Add user space unwinding API Steven Rostedt
2025-06-18 13:49 ` Peter Zijlstra
2025-06-11 0:54 ` [PATCH v10 02/14] unwind_user: Add frame pointer support Steven Rostedt
2025-06-18 13:52 ` Peter Zijlstra
2025-06-18 15:09 ` Steven Rostedt
2025-06-23 16:31 ` Indu Bhagat
2025-06-24 20:30 ` Steven Rostedt
2025-06-11 0:54 ` [PATCH v10 03/14] unwind_user: Add compat mode " Steven Rostedt
2025-06-18 13:46 ` Peter Zijlstra
2025-06-18 15:10 ` Steven Rostedt
2025-06-18 17:52 ` Linus Torvalds
2025-06-18 18:37 ` Steven Rostedt
2025-06-18 13:47 ` Peter Zijlstra
2025-06-18 15:18 ` Steven Rostedt
2025-06-19 7:51 ` Peter Zijlstra
2025-06-19 8:39 ` Steven Rostedt
2025-06-11 0:54 ` [PATCH v10 04/14] unwind_user/deferred: Add unwind_deferred_trace() Steven Rostedt
2025-06-18 13:54 ` Peter Zijlstra
2025-06-18 13:59 ` Peter Zijlstra
2025-06-18 15:20 ` Steven Rostedt
2025-06-18 16:26 ` Steven Rostedt
2025-06-18 14:01 ` Peter Zijlstra
2025-06-18 15:23 ` Steven Rostedt
2025-06-18 14:02 ` Peter Zijlstra
2025-06-18 15:29 ` Steven Rostedt
2025-06-19 7:54 ` Peter Zijlstra
2025-06-19 8:44 ` Steven Rostedt
2025-06-11 0:54 ` [PATCH v10 05/14] unwind_user/deferred: Add unwind cache Steven Rostedt
2025-06-18 14:13 ` Peter Zijlstra
2025-06-18 15:33 ` Steven Rostedt
2025-06-19 7:56 ` Peter Zijlstra
2025-06-19 8:47 ` Steven Rostedt
2025-06-19 9:04 ` Peter Zijlstra
2025-06-19 9:12 ` Steven Rostedt
2025-06-11 0:54 ` [PATCH v10 06/14] unwind_user/deferred: Add deferred unwinding interface Steven Rostedt
2025-06-18 14:20 ` Peter Zijlstra
2025-06-18 15:37 ` Steven Rostedt
2025-06-18 15:38 ` Steven Rostedt
2025-06-19 8:01 ` Peter Zijlstra
2025-06-19 8:49 ` Steven Rostedt
2025-06-18 18:46 ` Peter Zijlstra
2025-06-18 19:09 ` Steven Rostedt
2025-06-18 19:36 ` Steven Rostedt
2025-06-19 7:50 ` Peter Zijlstra
2025-06-19 8:56 ` Steven Rostedt
2025-06-19 9:11 ` Peter Zijlstra
2025-06-24 14:03 ` Steven Rostedt
2025-06-24 22:36 ` Steven Rostedt
2025-06-11 0:54 ` Steven Rostedt [this message]
2025-06-19 8:34 ` [PATCH v10 07/14] unwind_user/deferred: Make unwind deferral requests NMI-safe Peter Zijlstra
2025-06-19 8:37 ` Steven Rostedt
2025-06-19 8:44 ` Peter Zijlstra
2025-06-19 8:48 ` Peter Zijlstra
2025-06-19 9:10 ` Steven Rostedt
2025-06-19 9:24 ` Peter Zijlstra
2025-06-19 8:57 ` Peter Zijlstra
2025-06-19 9:07 ` Steven Rostedt
2025-06-19 9:32 ` Peter Zijlstra
2025-06-19 9:34 ` Peter Zijlstra
2025-06-19 9:42 ` Steven Rostedt
2025-06-19 9:45 ` Peter Zijlstra
2025-06-19 10:19 ` Steven Rostedt
2025-06-19 10:39 ` Peter Zijlstra
2025-06-19 13:04 ` Steven Rostedt
2025-06-11 0:54 ` [PATCH v10 08/14] unwind deferred: Use bitmask to determine which callbacks to call Steven Rostedt
2025-06-20 8:15 ` Peter Zijlstra
2025-06-24 14:55 ` Steven Rostedt
2025-06-24 15:00 ` Peter Zijlstra
2025-06-24 16:36 ` Steven Rostedt
2025-06-11 0:54 ` [PATCH v10 09/14] unwind deferred: Use SRCU unwind_deferred_task_work() Steven Rostedt
2025-06-11 0:54 ` [PATCH v10 10/14] unwind: Clear unwind_mask on exit back to user space Steven Rostedt
2025-06-11 0:54 ` [PATCH v10 11/14] unwind: Finish up unwind when a task exits Steven Rostedt
2025-06-11 0:54 ` [PATCH v10 12/14] unwind_user/x86: Enable frame pointer unwinding on x86 Steven Rostedt
2025-06-11 0:54 ` [PATCH v10 13/14] perf/x86: Rename and move get_segment_base() and make it global Steven Rostedt
2025-06-11 0:54 ` [PATCH v10 14/14] unwind_user/x86: Enable compat mode frame pointer unwinding on x86 Steven Rostedt
2025-06-12 21:44 ` [PATCH v10 00/14] unwind_user: x86: Deferred unwinding infrastructure Andrii Nakryiko
2025-06-12 22:02 ` Josh Poimboeuf
2025-06-12 23:30 ` Andrii Nakryiko
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250611010428.938845449@goodmis.org \
--to=rostedt@goodmis.org \
--cc=akpm@linux-foundation.org \
--cc=andrii@kernel.org \
--cc=beaub@linux.microsoft.com \
--cc=bpf@vger.kernel.org \
--cc=indu.bhagat@oracle.com \
--cc=jemarch@gnu.org \
--cc=jolsa@kernel.org \
--cc=jpoimboe@kernel.org \
--cc=jremus@linux.ibm.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-trace-kernel@vger.kernel.org \
--cc=mathieu.desnoyers@efficios.com \
--cc=mhiramat@kernel.org \
--cc=mingo@kernel.org \
--cc=namhyung@kernel.org \
--cc=peterz@infradead.org \
--cc=tglx@linutronix.de \
--cc=torvalds@linux-foundation.org \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).