public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: Jens Remus <jremus@linux.ibm.com>
Cc: Steven Rostedt <rostedt@kernel.org>,
	linux-kernel@vger.kernel.org, linux-trace-kernel@vger.kernel.org,
	bpf@vger.kernel.org, x86@kernel.org,
	Masami Hiramatsu <mhiramat@kernel.org>,
	Mathieu Desnoyers <mathieu.desnoyers@efficios.com>,
	Josh Poimboeuf <jpoimboe@kernel.org>,
	Ingo Molnar <mingo@kernel.org>, Jiri Olsa <jolsa@kernel.org>,
	Arnaldo Carvalho de Melo <acme@kernel.org>,
	Namhyung Kim <namhyung@kernel.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Andrii Nakryiko <andrii@kernel.org>,
	Indu Bhagat <indu.bhagat@oracle.com>,
	"Jose E. Marchesi" <jemarch@gnu.org>,
	Beau Belgrave <beaub@linux.microsoft.com>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	Florian Weimer <fweimer@redhat.com>, Sam James <sam@gentoo.org>,
	Kees Cook <kees@kernel.org>,
	Carlos O'Donell <codonell@redhat.com>,
	Heiko Carstens <hca@linux.ibm.com>,
	Vasily Gorbik <gor@linux.ibm.com>
Subject: Re: [PATCH v16 0/4] perf: Support the deferred unwinding infrastructure
Date: Fri, 24 Oct 2025 16:51:56 +0200	[thread overview]
Message-ID: <20251024145156.GM4068168@noisy.programming.kicks-ass.net> (raw)
In-Reply-To: <20251024140815.GE3245006@noisy.programming.kicks-ass.net>

On Fri, Oct 24, 2025 at 04:08:15PM +0200, Peter Zijlstra wrote:

> Yeah, I suppose that should work. Let me rework things accordingly.

---
Subject: unwind_user/x86: Teach FP unwind about start of function
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri Oct 24 12:31:10 CEST 2025

When userspace is interrupted at the start of a function, before we
get a chance to complete the frame, unwind will miss one caller.

X86 has a uprobe specific fixup for this, add bits to the generic
unwinder to support this.

Suggested-by: Jens Remus <jremus@linux.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/events/core.c             |   40 -------------------------------------
 arch/x86/include/asm/unwind_user.h |   12 +++++++++++
 arch/x86/include/asm/uprobes.h     |    9 ++++++++
 arch/x86/kernel/uprobes.c          |   32 +++++++++++++++++++++++++++++
 include/linux/unwind_user_types.h  |    1 
 kernel/unwind/user.c               |   35 ++++++++++++++++++++++++--------
 6 files changed, 80 insertions(+), 49 deletions(-)

--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2845,46 +2845,6 @@ static unsigned long get_segment_base(un
 	return get_desc_base(desc);
 }
 
-#ifdef CONFIG_UPROBES
-/*
- * Heuristic-based check if uprobe is installed at the function entry.
- *
- * Under assumption of user code being compiled with frame pointers,
- * `push %rbp/%ebp` is a good indicator that we indeed are.
- *
- * Similarly, `endbr64` (assuming 64-bit mode) is also a common pattern.
- * If we get this wrong, captured stack trace might have one extra bogus
- * entry, but the rest of stack trace will still be meaningful.
- */
-static bool is_uprobe_at_func_entry(struct pt_regs *regs)
-{
-	struct arch_uprobe *auprobe;
-
-	if (!current->utask)
-		return false;
-
-	auprobe = current->utask->auprobe;
-	if (!auprobe)
-		return false;
-
-	/* push %rbp/%ebp */
-	if (auprobe->insn[0] == 0x55)
-		return true;
-
-	/* endbr64 (64-bit only) */
-	if (user_64bit_mode(regs) && is_endbr((u32 *)auprobe->insn))
-		return true;
-
-	return false;
-}
-
-#else
-static bool is_uprobe_at_func_entry(struct pt_regs *regs)
-{
-	return false;
-}
-#endif /* CONFIG_UPROBES */
-
 #ifdef CONFIG_IA32_EMULATION
 
 #include <linux/compat.h>
--- a/arch/x86/include/asm/unwind_user.h
+++ b/arch/x86/include/asm/unwind_user.h
@@ -3,6 +3,7 @@
 #define _ASM_X86_UNWIND_USER_H
 
 #include <asm/ptrace.h>
+#include <asm/uprobes.h>
 
 #define ARCH_INIT_USER_FP_FRAME(ws)			\
 	.cfa_off	=  2*(ws),			\
@@ -10,6 +11,12 @@
 	.fp_off		= -2*(ws),			\
 	.use_fp		= true,
 
+#define ARCH_INIT_USER_FP_ENTRY_FRAME(ws)		\
+	.cfa_off	=  1*(ws),			\
+	.ra_off		= -1*(ws),			\
+	.fp_off		= 0,				\
+	.use_fp		= false,
+
 static inline int unwind_user_word_size(struct pt_regs *regs)
 {
 	/* We can't unwind VM86 stacks */
@@ -22,4 +29,9 @@ static inline int unwind_user_word_size(
 	return sizeof(long);
 }
 
+static inline bool unwind_user_at_function_start(struct pt_regs *regs)
+{
+	return is_uprobe_at_func_entry(regs);
+}
+
 #endif /* _ASM_X86_UNWIND_USER_H */
--- a/arch/x86/include/asm/uprobes.h
+++ b/arch/x86/include/asm/uprobes.h
@@ -62,4 +62,13 @@ struct arch_uprobe_task {
 	unsigned int			saved_tf;
 };
 
+#ifdef CONFIG_UPROBES
+extern bool is_uprobe_at_func_entry(struct pt_regs *regs);
+#else
+static bool is_uprobe_at_func_entry(struct pt_regs *regs)
+{
+	return false;
+}
+#endif /* CONFIG_UPROBES */
+
 #endif	/* _ASM_UPROBES_H */
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -1791,3 +1791,35 @@ bool arch_uretprobe_is_alive(struct retu
 	else
 		return regs->sp <= ret->stack;
 }
+
+/*
+ * Heuristic-based check if uprobe is installed at the function entry.
+ *
+ * Under assumption of user code being compiled with frame pointers,
+ * `push %rbp/%ebp` is a good indicator that we indeed are.
+ *
+ * Similarly, `endbr64` (assuming 64-bit mode) is also a common pattern.
+ * If we get this wrong, captured stack trace might have one extra bogus
+ * entry, but the rest of stack trace will still be meaningful.
+ */
+bool is_uprobe_at_func_entry(struct pt_regs *regs)
+{
+	struct arch_uprobe *auprobe;
+
+	if (!current->utask)
+		return false;
+
+	auprobe = current->utask->auprobe;
+	if (!auprobe)
+		return false;
+
+	/* push %rbp/%ebp */
+	if (auprobe->insn[0] == 0x55)
+		return true;
+
+	/* endbr64 (64-bit only) */
+	if (user_64bit_mode(regs) && is_endbr((u32 *)auprobe->insn))
+		return true;
+
+	return false;
+}
--- a/include/linux/unwind_user_types.h
+++ b/include/linux/unwind_user_types.h
@@ -39,6 +39,7 @@ struct unwind_user_state {
 	unsigned int				ws;
 	enum unwind_user_type			current_type;
 	unsigned int				available_types;
+	bool					topmost;
 	bool					done;
 };
 
--- a/kernel/unwind/user.c
+++ b/kernel/unwind/user.c
@@ -26,14 +26,12 @@ get_user_word(unsigned long *word, unsig
 	return get_user(*word, addr);
 }
 
-static int unwind_user_next_fp(struct unwind_user_state *state)
+static int unwind_user_next_common(struct unwind_user_state *state,
+				   const struct unwind_user_frame *frame)
 {
-	const struct unwind_user_frame frame = {
-		ARCH_INIT_USER_FP_FRAME(state->ws)
-	};
 	unsigned long cfa, fp, ra;
 
-	if (frame.use_fp) {
+	if (frame->use_fp) {
 		if (state->fp < state->sp)
 			return -EINVAL;
 		cfa = state->fp;
@@ -42,7 +40,7 @@ static int unwind_user_next_fp(struct un
 	}
 
 	/* Get the Canonical Frame Address (CFA) */
-	cfa += frame.cfa_off;
+	cfa += frame->cfa_off;
 
 	/* stack going in wrong direction? */
 	if (cfa <= state->sp)
@@ -53,19 +51,37 @@ static int unwind_user_next_fp(struct un
 		return -EINVAL;
 
 	/* Find the Return Address (RA) */
-	if (get_user_word(&ra, cfa, frame.ra_off, state->ws))
+	if (get_user_word(&ra, cfa, frame->ra_off, state->ws))
 		return -EINVAL;
 
-	if (frame.fp_off && get_user_word(&fp, cfa, frame.fp_off, state->ws))
+	if (frame->fp_off && get_user_word(&fp, cfa, frame->fp_off, state->ws))
 		return -EINVAL;
 
 	state->ip = ra;
 	state->sp = cfa;
-	if (frame.fp_off)
+	if (frame->fp_off)
 		state->fp = fp;
+	state->topmost = false;
 	return 0;
 }
 
+static int unwind_user_next_fp(struct unwind_user_state *state)
+{
+	struct pt_regs *regs = task_pt_regs(current);
+
+	const struct unwind_user_frame fp_frame = {
+		ARCH_INIT_USER_FP_FRAME(state->ws)
+	};
+	const struct unwind_user_frame fp_entry_frame = {
+		ARCH_INIT_USER_FP_ENTRY_FRAME(state->ws)
+	};
+
+	if (state->topmost && unwind_user_at_function_start(regs))
+		return unwind_user_next_common(state, &fp_entry_frame);
+
+	return unwind_user_next_common(state, &fp_frame);
+}
+
 static int unwind_user_next(struct unwind_user_state *state)
 {
 	unsigned long iter_mask = state->available_types;
@@ -118,6 +134,7 @@ static int unwind_user_start(struct unwi
 		state->done = true;
 		return -EINVAL;
 	}
+	state->topmost = true;
 
 	return 0;
 }

  reply	other threads:[~2025-10-24 14:52 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-07 21:40 [PATCH v16 0/4] perf: Support the deferred unwinding infrastructure Steven Rostedt
2025-10-07 21:40 ` [PATCH v16 1/4] unwind: Add interface to allow tracing a single task Steven Rostedt
2025-10-07 21:40 ` [PATCH v16 2/4] perf: Support deferred user callchains Steven Rostedt
2025-10-07 21:40 ` [PATCH v16 3/4] perf: Have the deferred request record the user context cookie Steven Rostedt
2025-10-07 21:40 ` [PATCH v16 4/4] perf: Support deferred user callchains for per CPU events Steven Rostedt
2025-10-23 15:00 ` [PATCH v16 0/4] perf: Support the deferred unwinding infrastructure Peter Zijlstra
2025-10-23 16:40   ` Steven Rostedt
2025-10-24  8:26     ` Peter Zijlstra
2025-10-24 12:58       ` Peter Zijlstra
2025-10-26  0:58         ` Namhyung Kim
2025-10-24  9:29   ` Peter Zijlstra
2025-10-24 10:41     ` Peter Zijlstra
2025-10-24 13:58       ` Jens Remus
2025-10-24 14:08         ` Peter Zijlstra
2025-10-24 14:51           ` Peter Zijlstra [this message]
2025-10-24 14:54             ` Peter Zijlstra
2025-10-24 14:57               ` Peter Zijlstra
2025-10-24 15:10                 ` Peter Zijlstra
2025-10-24 15:09             ` Jens Remus
2025-10-24 15:11               ` Peter Zijlstra
2025-10-29  9:36             ` [tip: perf/core] unwind_user/x86: Teach FP unwind about start of function tip-bot2 for Peter Zijlstra
2025-11-04 11:22             ` [PATCH v16 0/4] perf: Support the deferred unwinding infrastructure Florian Weimer
2025-11-05 13:08               ` Peter Zijlstra
2025-10-29  9:36   ` [tip: perf/core] perf: Support deferred user unwind tip-bot2 for Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251024145156.GM4068168@noisy.programming.kicks-ass.net \
    --to=peterz@infradead.org \
    --cc=acme@kernel.org \
    --cc=akpm@linux-foundation.org \
    --cc=andrii@kernel.org \
    --cc=beaub@linux.microsoft.com \
    --cc=bpf@vger.kernel.org \
    --cc=codonell@redhat.com \
    --cc=fweimer@redhat.com \
    --cc=gor@linux.ibm.com \
    --cc=hca@linux.ibm.com \
    --cc=indu.bhagat@oracle.com \
    --cc=jemarch@gnu.org \
    --cc=jolsa@kernel.org \
    --cc=jpoimboe@kernel.org \
    --cc=jremus@linux.ibm.com \
    --cc=kees@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-trace-kernel@vger.kernel.org \
    --cc=mathieu.desnoyers@efficios.com \
    --cc=mhiramat@kernel.org \
    --cc=mingo@kernel.org \
    --cc=namhyung@kernel.org \
    --cc=rostedt@kernel.org \
    --cc=sam@gentoo.org \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox