* [PATCH 1/2] arm64: stacktrace: Check kretprobe_find_ret_addr() return value
2025-05-21 11:09 [PATCH 0/2] arm64: stacktrace: Enable reliable stacktrace Mark Rutland
@ 2025-05-21 11:09 ` Mark Rutland
2025-05-21 16:35 ` Song Liu
2025-05-21 11:10 ` [PATCH 2/2] arm64: stacktrace: Implement arch_stack_walk_reliable() Mark Rutland
2025-06-20 12:35 ` [PATCH 0/2] arm64: stacktrace: Enable reliable stacktrace Catalin Marinas
2 siblings, 1 reply; 6+ messages in thread
From: Mark Rutland @ 2025-05-21 11:09 UTC (permalink / raw)
To: linux-arm-kernel
Cc: andrea.porta, catalin.marinas, jpoimboe, leitao, linux-toolchains,
live-patching, mark.rutland, mbenes, pmladek, song, will
If kretprobe_find_ret_addr() fails to find the original return address,
it returns 0. Check for this case so that a reliable stacktrace won't
silently ignore it.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Andrea della Porta <andrea.porta@suse.com>
Cc: Breno Leitao <leitao@debian.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Josh Poimboeuf <jpoimboe@kernel.org>
Cc: Miroslav Benes <mbenes@suse.cz>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Song Liu <song@kernel.org>
Cc: Will Deacon <will@kernel.org>
---
arch/arm64/kernel/stacktrace.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 1d9d51d7627fd..f6494c0942144 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -152,6 +152,8 @@ kunwind_recover_return_address(struct kunwind_state *state)
orig_pc = kretprobe_find_ret_addr(state->task,
(void *)state->common.fp,
&state->kr_cur);
+ if (!orig_pc)
+ return -EINVAL;
state->common.pc = orig_pc;
state->flags.kretprobe = 1;
}
--
2.30.2
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 2/2] arm64: stacktrace: Implement arch_stack_walk_reliable()
2025-05-21 11:09 [PATCH 0/2] arm64: stacktrace: Enable reliable stacktrace Mark Rutland
2025-05-21 11:09 ` [PATCH 1/2] arm64: stacktrace: Check kretprobe_find_ret_addr() return value Mark Rutland
@ 2025-05-21 11:10 ` Mark Rutland
2025-06-20 12:35 ` [PATCH 0/2] arm64: stacktrace: Enable reliable stacktrace Catalin Marinas
2 siblings, 0 replies; 6+ messages in thread
From: Mark Rutland @ 2025-05-21 11:10 UTC (permalink / raw)
To: linux-arm-kernel
Cc: andrea.porta, catalin.marinas, jpoimboe, leitao, linux-toolchains,
live-patching, mark.rutland, mbenes, pmladek, song, will
From: Song Liu <song@kernel.org>
Add arch_stack_walk_reliable(), which will be used during kernel live
patching to detect when threads have completed executing old versions of
functions.
Note that arch_stack_walk_reliable() only needs to guarantee that it
returns an error code when it cannot provide a reliable stacktrace. It
is not required to provide a reliable stacktrace in all scenarios so
long as it returns said error code.
At present we can only reliably unwind up to an exception boundary. In
future we should be able to improve this with additional data from the
compiler (e.g. sframe).
Signed-off-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20250320171559.3423224-2-song@kernel.org
[ Mark: Simplify logic, clarify commit message ]
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Andrea della Porta <andrea.porta@suse.com>
Cc: Breno Leitao <leitao@debian.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Josh Poimboeuf <jpoimboe@kernel.org>
Cc: Miroslav Benes <mbenes@suse.cz>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Song Liu <song@kernel.org>
Cc: Will Deacon <will@kernel.org>
---
arch/arm64/Kconfig | 2 +-
arch/arm64/kernel/stacktrace.c | 53 +++++++++++++++++++++++++++-------
2 files changed, 44 insertions(+), 11 deletions(-)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index a182295e6f08b..7a3463bafb274 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -279,6 +279,7 @@ config ARM64
select HAVE_SOFTIRQ_ON_OWN_STACK
select USER_STACKTRACE_SUPPORT
select VDSO_GETRANDOM
+ select HAVE_RELIABLE_STACKTRACE
help
ARM 64-bit (AArch64) Linux support.
@@ -2512,4 +2513,3 @@ endmenu # "CPU Power Management"
source "drivers/acpi/Kconfig"
source "arch/arm64/kvm/Kconfig"
-
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index f6494c0942144..acf682afbd478 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -279,21 +279,24 @@ kunwind_next(struct kunwind_state *state)
typedef bool (*kunwind_consume_fn)(const struct kunwind_state *state, void *cookie);
-static __always_inline void
+static __always_inline int
do_kunwind(struct kunwind_state *state, kunwind_consume_fn consume_state,
void *cookie)
{
- if (kunwind_recover_return_address(state))
- return;
+ int ret;
- while (1) {
- int ret;
+ ret = kunwind_recover_return_address(state);
+ if (ret)
+ return ret;
+ while (1) {
if (!consume_state(state, cookie))
- break;
+ return -EINVAL;
ret = kunwind_next(state);
+ if (ret == -ENOENT)
+ return 0;
if (ret < 0)
- break;
+ return ret;
}
}
@@ -326,7 +329,7 @@ do_kunwind(struct kunwind_state *state, kunwind_consume_fn consume_state,
: stackinfo_get_unknown(); \
})
-static __always_inline void
+static __always_inline int
kunwind_stack_walk(kunwind_consume_fn consume_state,
void *cookie, struct task_struct *task,
struct pt_regs *regs)
@@ -354,7 +357,7 @@ kunwind_stack_walk(kunwind_consume_fn consume_state,
if (regs) {
if (task != current)
- return;
+ return -EINVAL;
kunwind_init_from_regs(&state, regs);
} else if (task == current) {
kunwind_init_from_caller(&state);
@@ -362,7 +365,7 @@ kunwind_stack_walk(kunwind_consume_fn consume_state,
kunwind_init_from_task(&state, task);
}
- do_kunwind(&state, consume_state, cookie);
+ return do_kunwind(&state, consume_state, cookie);
}
struct kunwind_consume_entry_data {
@@ -389,6 +392,36 @@ noinline noinstr void arch_stack_walk(stack_trace_consume_fn consume_entry,
kunwind_stack_walk(arch_kunwind_consume_entry, &data, task, regs);
}
+static __always_inline bool
+arch_reliable_kunwind_consume_entry(const struct kunwind_state *state, void *cookie)
+{
+ /*
+ * At an exception boundary we can reliably consume the saved PC. We do
+ * not know whether the LR was live when the exception was taken, and
+ * so we cannot perform the next unwind step reliably.
+ *
+ * All that matters is whether the *entire* unwind is reliable, so give
+ * up as soon as we hit an exception boundary.
+ */
+ if (state->source == KUNWIND_SOURCE_REGS_PC)
+ return false;
+
+ return arch_kunwind_consume_entry(state, cookie);
+}
+
+noinline noinstr int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
+ void *cookie,
+ struct task_struct *task)
+{
+ struct kunwind_consume_entry_data data = {
+ .consume_entry = consume_entry,
+ .cookie = cookie,
+ };
+
+ return kunwind_stack_walk(arch_reliable_kunwind_consume_entry, &data,
+ task, NULL);
+}
+
struct bpf_unwind_consume_entry_data {
bool (*consume_entry)(void *cookie, u64 ip, u64 sp, u64 fp);
void *cookie;
--
2.30.2
^ permalink raw reply related [flat|nested] 6+ messages in thread