From: Steven Rostedt <rostedt@kernel.org>
To: linux-kernel@vger.kernel.org, linux-trace-kernel@vger.kernel.org,
bpf@vger.kernel.org, x86@kernel.org
Cc: Masami Hiramatsu <mhiramat@kernel.org>,
Mathieu Desnoyers <mathieu.desnoyers@efficios.com>,
Josh Poimboeuf <jpoimboe@kernel.org>,
Peter Zijlstra <peterz@infradead.org>,
Ingo Molnar <mingo@kernel.org>, Jiri Olsa <jolsa@kernel.org>,
Namhyung Kim <namhyung@kernel.org>,
Thomas Gleixner <tglx@linutronix.de>,
Andrii Nakryiko <andrii@kernel.org>,
Indu Bhagat <indu.bhagat@oracle.com>,
"Jose E. Marchesi" <jemarch@gnu.org>,
Beau Belgrave <beaub@linux.microsoft.com>,
Jens Remus <jremus@linux.ibm.com>,
Linus Torvalds <torvalds@linux-foundation.org>,
Andrew Morton <akpm@linux-foundation.org>,
Jens Axboe <axboe@kernel.dk>, Florian Weimer <fweimer@redhat.com>,
Sam James <sam@gentoo.org>
Subject: [PATCH v13 07/14] unwind_user/deferred: Make unwind deferral requests NMI-safe
Date: Mon, 07 Jul 2025 21:22:46 -0400 [thread overview]
Message-ID: <20250708012358.831631671@kernel.org> (raw)
In-Reply-To: 20250708012239.268642741@kernel.org
From: Steven Rostedt <rostedt@goodmis.org>
Make unwind_deferred_request() NMI-safe so tracers in NMI context can
call it and safely request a user space stacktrace when the task exits.
Note, this is only allowed for architectures that implement a safe
cmpxchg. If an architecture requests a deferred stack trace from NMI
context that does not support a safe NMI cmpxchg, it will get an -EINVAL.
For those architectures, they would need another method (perhaps an
irqwork), to request a deferred user space stack trace. That can be dealt
with later if one of theses architectures require this feature.
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
Changes since v12: https://lore.kernel.org/20250701005451.737614486@goodmis.org
- Now that the timestamp has been replaced by a cookie that uses only a 32
bit cmpxchg(), this code just checks if the architecture has a safe
cmpxchg that can be used in NMI and doesn't do the 64 bit check.
Only the pending value is converted to local_t.
include/linux/unwind_deferred_types.h | 4 +-
kernel/unwind/deferred.c | 56 ++++++++++++++++++++++-----
2 files changed, 49 insertions(+), 11 deletions(-)
diff --git a/include/linux/unwind_deferred_types.h b/include/linux/unwind_deferred_types.h
index 79b4f8cece53..cd95ed1c8610 100644
--- a/include/linux/unwind_deferred_types.h
+++ b/include/linux/unwind_deferred_types.h
@@ -2,6 +2,8 @@
#ifndef _LINUX_UNWIND_USER_DEFERRED_TYPES_H
#define _LINUX_UNWIND_USER_DEFERRED_TYPES_H
+#include <asm/local.h>
+
struct unwind_cache {
unsigned int nr_entries;
unsigned long entries[];
@@ -20,7 +22,7 @@ struct unwind_task_info {
struct unwind_cache *cache;
struct callback_head work;
union unwind_task_id id;
- int pending;
+ local_t pending;
};
#endif /* _LINUX_UNWIND_USER_DEFERRED_TYPES_H */
diff --git a/kernel/unwind/deferred.c b/kernel/unwind/deferred.c
index b1faaa55e5d5..2417e4ebbc82 100644
--- a/kernel/unwind/deferred.c
+++ b/kernel/unwind/deferred.c
@@ -12,6 +12,31 @@
#include <linux/slab.h>
#include <linux/mm.h>
+/*
+ * For requesting a deferred user space stack trace from NMI context
+ * the architecture must support a safe cmpxchg in NMI context.
+ * For those architectures that do not have that, then it cannot ask
+ * for a deferred user space stack trace from an NMI context. If it
+ * does, then it will get -EINVAL.
+ */
+#if defined(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG)
+# define CAN_USE_IN_NMI 1
+static inline bool try_assign_cnt(struct unwind_task_info *info, u32 cnt)
+{
+ u32 old = 0;
+
+ return try_cmpxchg(&info->id.cnt, &old, cnt);
+}
+#else
+# define CAN_USE_IN_NMI 0
+/* When NMIs are not allowed, this always succeeds */
+static inline bool try_assign_cnt(struct unwind_task_info *info, u32 cnt)
+{
+ info->id.cnt = cnt;
+ return true;
+}
+#endif
+
/* Make the cache fit in a 4K page */
#define UNWIND_MAX_ENTRIES \
((SZ_4K - sizeof(struct unwind_cache)) / sizeof(long))
@@ -43,7 +68,6 @@ static u64 get_cookie(struct unwind_task_info *info)
{
u32 cpu_cnt;
u32 cnt;
- u32 old = 0;
if (info->id.cpu)
return info->id.id;
@@ -52,7 +76,7 @@ static u64 get_cookie(struct unwind_task_info *info)
cpu_cnt += 2;
cnt = cpu_cnt | 1; /* Always make non zero */
- if (try_cmpxchg(&info->id.cnt, &old, cnt)) {
+ if (try_assign_cnt(info, cnt)) {
/* Update the per cpu counter */
__this_cpu_write(unwind_ctx_ctr, cpu_cnt);
}
@@ -119,11 +143,11 @@ static void unwind_deferred_task_work(struct callback_head *head)
struct unwind_work *work;
u64 cookie;
- if (WARN_ON_ONCE(!info->pending))
+ if (WARN_ON_ONCE(!local_read(&info->pending)))
return;
/* Allow work to come in again */
- WRITE_ONCE(info->pending, 0);
+ local_set(&info->pending, 0);
/*
* From here on out, the callback must always be called, even if it's
@@ -170,31 +194,43 @@ static void unwind_deferred_task_work(struct callback_head *head)
int unwind_deferred_request(struct unwind_work *work, u64 *cookie)
{
struct unwind_task_info *info = ¤t->unwind_info;
+ long pending;
int ret;
*cookie = 0;
- if (WARN_ON_ONCE(in_nmi()))
- return -EINVAL;
-
if ((current->flags & (PF_KTHREAD | PF_EXITING)) ||
!user_mode(task_pt_regs(current)))
return -EINVAL;
+ /* NMI requires having safe cmpxchg operations */
+ if (!CAN_USE_IN_NMI && in_nmi())
+ return -EINVAL;
+
guard(irqsave)();
*cookie = get_cookie(info);
/* callback already pending? */
- if (info->pending)
+ pending = local_read(&info->pending);
+ if (pending)
return 1;
+ if (CAN_USE_IN_NMI) {
+ /* Claim the work unless an NMI just now swooped in to do so. */
+ if (!local_try_cmpxchg(&info->pending, &pending, 1))
+ return 1;
+ } else {
+ local_set(&info->pending, 1);
+ }
+
/* The work has been claimed, now schedule it. */
ret = task_work_add(current, &info->work, TWA_RESUME);
- if (WARN_ON_ONCE(ret))
+ if (WARN_ON_ONCE(ret)) {
+ local_set(&info->pending, 0);
return ret;
+ }
- info->pending = 1;
return 0;
}
--
2.47.2
next prev parent reply other threads:[~2025-07-08 1:23 UTC|newest]
Thread overview: 45+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-07-08 1:22 [PATCH v13 00/14] unwind_user: x86: Deferred unwinding infrastructure Steven Rostedt
2025-07-08 1:22 ` [PATCH v13 01/14] unwind_user: Add user space unwinding API Steven Rostedt
2025-07-08 1:22 ` [PATCH v13 02/14] unwind_user: Add frame pointer support Steven Rostedt
2025-07-09 10:01 ` Jens Remus
2025-07-10 12:28 ` Jens Remus
2025-07-10 15:21 ` Steven Rostedt
2025-07-10 15:41 ` Jens Remus
2025-07-10 17:08 ` Steven Rostedt
2025-07-14 12:52 ` Jens Remus
2025-07-08 1:22 ` [PATCH v13 03/14] unwind_user: Add compat mode " Steven Rostedt
2025-07-08 1:22 ` [PATCH v13 04/14] unwind_user/deferred: Add unwind_user_faultable() Steven Rostedt
2025-07-08 1:22 ` [PATCH v13 05/14] unwind_user/deferred: Add unwind cache Steven Rostedt
2025-07-08 1:22 ` [PATCH v13 06/14] unwind_user/deferred: Add deferred unwinding interface Steven Rostedt
2025-07-08 1:22 ` Steven Rostedt [this message]
2025-07-14 13:29 ` [PATCH v13 07/14] unwind_user/deferred: Make unwind deferral requests NMI-safe Peter Zijlstra
2025-07-14 14:19 ` Steven Rostedt
2025-07-14 15:05 ` Peter Zijlstra
2025-07-14 15:11 ` Steven Rostedt
2025-07-15 9:09 ` Peter Zijlstra
2025-07-15 12:35 ` Steven Rostedt
2025-07-08 1:22 ` [PATCH v13 08/14] unwind deferred: Use bitmask to determine which callbacks to call Steven Rostedt
2025-07-08 1:22 ` [PATCH v13 09/14] unwind deferred: Use SRCU unwind_deferred_task_work() Steven Rostedt
2025-07-14 13:56 ` Peter Zijlstra
2025-07-14 14:21 ` Steven Rostedt
2025-07-14 15:03 ` Peter Zijlstra
2025-07-08 1:22 ` [PATCH v13 10/14] unwind: Clear unwind_mask on exit back to user space Steven Rostedt
2025-07-15 10:29 ` Peter Zijlstra
2025-07-15 12:49 ` Steven Rostedt
2025-07-15 18:06 ` Steven Rostedt
2025-07-15 18:10 ` Steven Rostedt
2025-07-15 18:26 ` Steven Rostedt
2025-07-15 19:04 ` Peter Zijlstra
2025-07-15 19:01 ` Peter Zijlstra
2025-07-15 17:20 ` Steven Rostedt
2025-07-15 19:07 ` Peter Zijlstra
2025-07-15 22:01 ` Steven Rostedt
2025-07-16 18:26 ` Steven Rostedt
2025-07-16 18:33 ` Steven Rostedt
2025-07-16 19:25 ` Steven Rostedt
2025-07-08 1:22 ` [PATCH v13 11/14] unwind: Add USED bit to only have one conditional on way " Steven Rostedt
2025-07-08 1:22 ` [PATCH v13 12/14] unwind: Finish up unwind when a task exits Steven Rostedt
2025-07-08 1:22 ` [PATCH v13 13/14] unwind_user/x86: Enable frame pointer unwinding on x86 Steven Rostedt
2025-07-11 8:43 ` David Laight
2025-07-11 16:11 ` Steven Rostedt
2025-07-08 1:22 ` [PATCH v13 14/14] unwind_user/x86: Enable compat mode " Steven Rostedt
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250708012358.831631671@kernel.org \
--to=rostedt@kernel.org \
--cc=akpm@linux-foundation.org \
--cc=andrii@kernel.org \
--cc=axboe@kernel.dk \
--cc=beaub@linux.microsoft.com \
--cc=bpf@vger.kernel.org \
--cc=fweimer@redhat.com \
--cc=indu.bhagat@oracle.com \
--cc=jemarch@gnu.org \
--cc=jolsa@kernel.org \
--cc=jpoimboe@kernel.org \
--cc=jremus@linux.ibm.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-trace-kernel@vger.kernel.org \
--cc=mathieu.desnoyers@efficios.com \
--cc=mhiramat@kernel.org \
--cc=mingo@kernel.org \
--cc=namhyung@kernel.org \
--cc=peterz@infradead.org \
--cc=sam@gentoo.org \
--cc=tglx@linutronix.de \
--cc=torvalds@linux-foundation.org \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).