* [PATCH v5 1/8] sframe: Allow kernelspace sframe sections
2026-04-28 18:36 [PATCH v5 0/8] unwind, arm64: add sframe unwinder for kernel Dylan Hatch
@ 2026-04-28 18:36 ` Dylan Hatch
2026-04-28 18:36 ` [PATCH v5 2/8] arm64, unwind: build kernel with sframe V3 info Dylan Hatch
` (8 subsequent siblings)
9 siblings, 0 replies; 17+ messages in thread
From: Dylan Hatch @ 2026-04-28 18:36 UTC (permalink / raw)
To: Roman Gushchin, Weinan Liu, Will Deacon, Josh Poimboeuf,
Indu Bhagat, Peter Zijlstra, Steven Rostedt, Catalin Marinas,
Jiri Kosina, Jens Remus
Cc: Dylan Hatch, Mark Rutland, Prasanna Kumar T S M, Puranjay Mohan,
Song Liu, joe.lawrence, linux-toolchains, linux-kernel,
live-patching, linux-arm-kernel, Randy Dunlap
Generalize the sframe lookup code to support kernelspace sections. This
is done by defining a SFRAME_LOOKUP option that can be activated
separate from HAVE_UNWIND_USER_SFRAME, as there will be other client to
this library than just userspace unwind.
Sframe section location is now tracked in a separate sec_type field to
determine whether user-access functions are necessary to read the sframe
data. Relevant type delarations are moved and renamed to reflect the
non-user sframe support.
Reviewed-by: Jens Remus <jremus@linux.ibm.com>
Signed-off-by: Dylan Hatch <dylanbhatch@google.com>
---
MAINTAINERS | 2 +-
arch/Kconfig | 4 +
.../{unwind_user_sframe.h => unwind_sframe.h} | 6 +-
arch/x86/include/asm/unwind_user.h | 12 +-
include/linux/sframe.h | 48 ++--
include/linux/unwind_types.h | 46 +++
include/linux/unwind_user_types.h | 41 ---
kernel/unwind/Makefile | 2 +-
kernel/unwind/sframe.c | 270 ++++++++++++------
kernel/unwind/user.c | 41 +--
10 files changed, 293 insertions(+), 179 deletions(-)
rename arch/x86/include/asm/{unwind_user_sframe.h => unwind_sframe.h} (50%)
create mode 100644 include/linux/unwind_types.h
diff --git a/MAINTAINERS b/MAINTAINERS
index 8c46465ee7a9..cfc7dec88da4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -27557,7 +27557,7 @@ F: Documentation/driver-api/uio-howto.rst
F: drivers/uio/
F: include/linux/uio_driver.h
-USERSPACE STACK UNWINDING
+STACK UNWINDING
M: Josh Poimboeuf <jpoimboe@kernel.org>
M: Steven Rostedt <rostedt@goodmis.org>
S: Maintained
diff --git a/arch/Kconfig b/arch/Kconfig
index f1ed8bc0806d..d7caf2e245ce 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -486,6 +486,9 @@ config AS_SFRAME3
def_bool $(as-instr,.cfi_startproc\n.cfi_endproc,-Wa$(comma)--gsframe-3)
select AS_SFRAME
+config UNWIND_SFRAME_LOOKUP
+ bool
+
config UNWIND_USER
bool
@@ -496,6 +499,7 @@ config HAVE_UNWIND_USER_FP
config HAVE_UNWIND_USER_SFRAME
bool
select UNWIND_USER
+ select UNWIND_SFRAME_LOOKUP
config SFRAME_VALIDATION
bool "Enable .sframe section debugging"
diff --git a/arch/x86/include/asm/unwind_user_sframe.h b/arch/x86/include/asm/unwind_sframe.h
similarity index 50%
rename from arch/x86/include/asm/unwind_user_sframe.h
rename to arch/x86/include/asm/unwind_sframe.h
index d828ae1a4aac..44d42e6ffde4 100644
--- a/arch/x86/include/asm/unwind_user_sframe.h
+++ b/arch/x86/include/asm/unwind_sframe.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_UNWIND_USER_SFRAME_H
-#define _ASM_X86_UNWIND_USER_SFRAME_H
+#ifndef _ASM_X86_UNWIND_SFRAME_H
+#define _ASM_X86_UNWIND_SFRAME_H
#ifdef CONFIG_X86_64
@@ -9,4 +9,4 @@
#endif
-#endif /* _ASM_X86_UNWIND_USER_SFRAME_H */
+#endif /* _ASM_X86_UNWIND_SFRAME_H */
diff --git a/arch/x86/include/asm/unwind_user.h b/arch/x86/include/asm/unwind_user.h
index ae46906c3b39..8fdab3581b86 100644
--- a/arch/x86/include/asm/unwind_user.h
+++ b/arch/x86/include/asm/unwind_user.h
@@ -55,30 +55,30 @@ static inline int unwind_user_get_reg(unsigned long *val, unsigned int regnum)
#define ARCH_INIT_USER_FP_FRAME(ws) \
.cfa = { \
- .rule = UNWIND_USER_CFA_RULE_FP_OFFSET,\
+ .rule = UNWIND_CFA_RULE_FP_OFFSET,\
.offset = 2*(ws), \
}, \
.ra = { \
- .rule = UNWIND_USER_RULE_CFA_OFFSET_DEREF,\
+ .rule = UNWIND_RULE_CFA_OFFSET_DEREF,\
.offset = -1*(ws), \
}, \
.fp = { \
- .rule = UNWIND_USER_RULE_CFA_OFFSET_DEREF,\
+ .rule = UNWIND_RULE_CFA_OFFSET_DEREF,\
.offset = -2*(ws), \
}, \
.outermost = false,
#define ARCH_INIT_USER_FP_ENTRY_FRAME(ws) \
.cfa = { \
- .rule = UNWIND_USER_CFA_RULE_SP_OFFSET,\
+ .rule = UNWIND_CFA_RULE_SP_OFFSET,\
.offset = 1*(ws), \
}, \
.ra = { \
- .rule = UNWIND_USER_RULE_CFA_OFFSET_DEREF,\
+ .rule = UNWIND_RULE_CFA_OFFSET_DEREF,\
.offset = -1*(ws), \
}, \
.fp = { \
- .rule = UNWIND_USER_RULE_RETAIN,\
+ .rule = UNWIND_RULE_RETAIN,\
}, \
.outermost = false,
diff --git a/include/linux/sframe.h b/include/linux/sframe.h
index b79c5ec09229..0cb2924367bc 100644
--- a/include/linux/sframe.h
+++ b/include/linux/sframe.h
@@ -3,37 +3,46 @@
#define _LINUX_SFRAME_H
#include <linux/mm_types.h>
+#include <linux/unwind_types.h>
#include <linux/srcu.h>
-#include <linux/unwind_user_types.h>
-#ifdef CONFIG_HAVE_UNWIND_USER_SFRAME
+#ifdef CONFIG_UNWIND_SFRAME_LOOKUP
+
+enum sframe_sec_type {
+ SFRAME_KERNEL,
+ SFRAME_USER,
+};
struct sframe_section {
- struct rcu_head rcu;
+ struct rcu_head rcu;
#ifdef CONFIG_DYNAMIC_DEBUG
- const char *filename;
+ const char *filename;
#endif
- unsigned long sframe_start;
- unsigned long sframe_end;
- unsigned long text_start;
- unsigned long text_end;
-
- unsigned long fdes_start;
- unsigned long fres_start;
- unsigned long fres_end;
- unsigned int num_fdes;
-
- signed char ra_off;
- signed char fp_off;
+ enum sframe_sec_type sec_type;
+ unsigned long sframe_start;
+ unsigned long sframe_end;
+ unsigned long text_start;
+ unsigned long text_end;
+
+ unsigned long fdes_start;
+ unsigned long fres_start;
+ unsigned long fres_end;
+ unsigned int num_fdes;
+
+ signed char ra_off;
+ signed char fp_off;
};
+#endif /* CONFIG_UNWIND_SFRAME_LOOKUP */
+
+#ifdef CONFIG_HAVE_UNWIND_USER_SFRAME
+
#define INIT_MM_SFRAME .sframe_mt = MTREE_INIT(sframe_mt, 0),
extern void sframe_free_mm(struct mm_struct *mm);
extern int sframe_add_section(unsigned long sframe_start, unsigned long sframe_end,
unsigned long text_start, unsigned long text_end);
extern int sframe_remove_section(unsigned long sframe_addr);
-extern int sframe_find(unsigned long ip, struct unwind_user_frame *frame);
static inline bool current_has_sframe(void)
{
@@ -42,6 +51,8 @@ static inline bool current_has_sframe(void)
return mm && !mtree_empty(&mm->sframe_mt);
}
+extern int sframe_find_user(unsigned long ip, struct unwind_frame *frame);
+
#else /* !CONFIG_HAVE_UNWIND_USER_SFRAME */
#define INIT_MM_SFRAME
@@ -52,9 +63,10 @@ static inline int sframe_add_section(unsigned long sframe_start, unsigned long s
return -ENOSYS;
}
static inline int sframe_remove_section(unsigned long sframe_addr) { return -ENOSYS; }
-static inline int sframe_find(unsigned long ip, struct unwind_user_frame *frame) { return -ENOSYS; }
static inline bool current_has_sframe(void) { return false; }
+static inline int sframe_find_user(unsigned long ip, struct unwind_frame *frame) { return -ENOSYS; }
+
#endif /* CONFIG_HAVE_UNWIND_USER_SFRAME */
#endif /* _LINUX_SFRAME_H */
diff --git a/include/linux/unwind_types.h b/include/linux/unwind_types.h
new file mode 100644
index 000000000000..08bcb0aa04aa
--- /dev/null
+++ b/include/linux/unwind_types.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_UNWIND_TYPES_H
+#define _LINUX_UNWIND_TYPES_H
+
+#define UNWIND_RULE_DEREF BIT(31)
+
+enum unwind_cfa_rule {
+ UNWIND_CFA_RULE_SP_OFFSET, /* CFA = SP + offset */
+ UNWIND_CFA_RULE_FP_OFFSET, /* CFA = FP + offset */
+ UNWIND_CFA_RULE_REG_OFFSET, /* CFA = reg + offset */
+ /* DEREF variants */
+ UNWIND_CFA_RULE_REG_OFFSET_DEREF = /* CFA = *(reg + offset) */
+ UNWIND_CFA_RULE_REG_OFFSET | UNWIND_RULE_DEREF,
+};
+
+struct unwind_cfa_rule_data {
+ enum unwind_cfa_rule rule;
+ s32 offset;
+ unsigned int regnum;
+};
+
+enum unwind_rule {
+ UNWIND_RULE_RETAIN, /* entity = entity */
+ UNWIND_RULE_CFA_OFFSET, /* entity = CFA + offset */
+ UNWIND_RULE_REG_OFFSET, /* entity = register + offset */
+ /* DEREF variants */
+ UNWIND_RULE_CFA_OFFSET_DEREF = /* entity = *(CFA + offset) */
+ UNWIND_RULE_CFA_OFFSET | UNWIND_RULE_DEREF,
+ UNWIND_RULE_REG_OFFSET_DEREF = /* entity = *(register + offset) */
+ UNWIND_RULE_REG_OFFSET | UNWIND_RULE_DEREF,
+};
+
+struct unwind_rule_data {
+ enum unwind_rule rule;
+ s32 offset;
+ unsigned int regnum;
+};
+
+struct unwind_frame {
+ struct unwind_cfa_rule_data cfa;
+ struct unwind_rule_data ra;
+ struct unwind_rule_data fp;
+ bool outermost;
+};
+
+#endif /* _LINUX_UNWIND_TYPES_H */
diff --git a/include/linux/unwind_user_types.h b/include/linux/unwind_user_types.h
index 059e5c76f2f3..646e5fb774db 100644
--- a/include/linux/unwind_user_types.h
+++ b/include/linux/unwind_user_types.h
@@ -27,47 +27,6 @@ struct unwind_stacktrace {
unsigned long *entries;
};
-#define UNWIND_USER_RULE_DEREF BIT(31)
-
-enum unwind_user_cfa_rule {
- UNWIND_USER_CFA_RULE_SP_OFFSET, /* CFA = SP + offset */
- UNWIND_USER_CFA_RULE_FP_OFFSET, /* CFA = FP + offset */
- UNWIND_USER_CFA_RULE_REG_OFFSET, /* CFA = reg + offset */
- /* DEREF variants */
- UNWIND_USER_CFA_RULE_REG_OFFSET_DEREF = /* CFA = *(reg + offset) */
- UNWIND_USER_CFA_RULE_REG_OFFSET | UNWIND_USER_RULE_DEREF,
-};
-
-struct unwind_user_cfa_rule_data {
- enum unwind_user_cfa_rule rule;
- s32 offset;
- unsigned int regnum;
-};
-
-enum unwind_user_rule {
- UNWIND_USER_RULE_RETAIN, /* entity = entity */
- UNWIND_USER_RULE_CFA_OFFSET, /* entity = CFA + offset */
- UNWIND_USER_RULE_REG_OFFSET, /* entity = register + offset */
- /* DEREF variants */
- UNWIND_USER_RULE_CFA_OFFSET_DEREF = /* entity = *(CFA + offset) */
- UNWIND_USER_RULE_CFA_OFFSET | UNWIND_USER_RULE_DEREF,
- UNWIND_USER_RULE_REG_OFFSET_DEREF = /* entity = *(register + offset) */
- UNWIND_USER_RULE_REG_OFFSET | UNWIND_USER_RULE_DEREF,
-};
-
-struct unwind_user_rule_data {
- enum unwind_user_rule rule;
- s32 offset;
- unsigned int regnum;
-};
-
-struct unwind_user_frame {
- struct unwind_user_cfa_rule_data cfa;
- struct unwind_user_rule_data ra;
- struct unwind_user_rule_data fp;
- bool outermost;
-};
-
struct unwind_user_state {
unsigned long ip;
unsigned long sp;
diff --git a/kernel/unwind/Makefile b/kernel/unwind/Makefile
index 146038165865..c5f9f8124564 100644
--- a/kernel/unwind/Makefile
+++ b/kernel/unwind/Makefile
@@ -1,2 +1,2 @@
obj-$(CONFIG_UNWIND_USER) += user.o deferred.o
- obj-$(CONFIG_HAVE_UNWIND_USER_SFRAME) += sframe.o
+ obj-$(CONFIG_UNWIND_SFRAME_LOOKUP) += sframe.o
diff --git a/kernel/unwind/sframe.c b/kernel/unwind/sframe.c
index f24997e84e05..89dd8c5a6a10 100644
--- a/kernel/unwind/sframe.c
+++ b/kernel/unwind/sframe.c
@@ -12,8 +12,8 @@
#include <linux/mm.h>
#include <linux/string_helpers.h>
#include <linux/sframe.h>
-#include <asm/unwind_user_sframe.h>
-#include <linux/unwind_user_types.h>
+#include <linux/unwind_types.h>
+#include <asm/unwind_sframe.h>
#include "sframe.h"
#include "sframe_debug.h"
@@ -44,8 +44,6 @@ struct sframe_fre_internal {
unsigned char dw_size;
};
-DEFINE_STATIC_SRCU(sframe_srcu);
-
static __always_inline unsigned char fre_type_to_size(unsigned char fre_type)
{
if (fre_type > 2)
@@ -60,6 +58,77 @@ static __always_inline unsigned char dataword_size_enum_to_size(unsigned char da
return 1 << dataword_size;
}
+#ifdef CONFIG_HAVE_UNWIND_USER_SFRAME
+
+DEFINE_STATIC_SRCU(sframe_srcu);
+
+#define UNSAFE_USER_COPY(to, from, size, label) \
+ unsafe_copy_from_user(to, (void __user *)from, size, label)
+
+#define UNSAFE_USER_GET(to, from, type, label) \
+ unsafe_get_user(to, (type __user *)from, label)
+
+#else /* !CONFIG_HAVE_UNWIND_USER_SFRAME */
+
+#define UNSAFE_USER_COPY(to, from, size, label) do { \
+ (void)to; (void)from; (void)size; \
+ goto label; \
+} while (0)
+
+#define UNSAFE_USER_GET(to, from, type, label) do { \
+ (void)to; (void)from; \
+ goto label; \
+} while (0)
+
+#endif /* !CONFIG_HAVE_UNWIND_USER_SFRAME */
+
+#ifdef CONFIG_HAVE_UNWIND_KERNEL_SFRAME
+
+#define KERNEL_COPY(to, from, size, label) memcpy(to, (void *)from, size)
+#define KERNEL_GET(to, from, type, label) ({ (to) = *(type *)(from); })
+
+#else /* !CONFIG_HAVE_UNWIND_KERNEL_SFRAME */
+
+#define KERNEL_COPY(to, from, size, label) do { \
+ (void)(to); (void)(from); (void)size; \
+ goto label; \
+} while (0)
+
+#define KERNEL_GET(to, from, type, label) do { \
+ (void)(to); (void)(from); \
+ goto label; \
+} while (0)
+
+#endif /* !CONFIG_HAVE_UNWIND_KERNEL_SFRAME */
+
+#define DATA_COPY(sec, to, from, size, label) \
+({ \
+ switch (sec->sec_type) { \
+ case SFRAME_KERNEL: \
+ KERNEL_COPY(to, from, size, label); \
+ break; \
+ case SFRAME_USER: \
+ UNSAFE_USER_COPY(to, from, size, label); \
+ break; \
+ default: \
+ goto label; \
+ } \
+})
+
+#define DATA_GET(sec, to, from, type, label) \
+({ \
+ switch (sec->sec_type) { \
+ case SFRAME_KERNEL: \
+ KERNEL_GET(to, from, type, label); \
+ break; \
+ case SFRAME_USER: \
+ UNSAFE_USER_GET(to, from, type, label); \
+ break; \
+ default: \
+ goto label; \
+ } \
+})
+
static __always_inline int __read_fde(struct sframe_section *sec,
unsigned int fde_num,
struct sframe_fde_internal *fde)
@@ -69,8 +138,8 @@ static __always_inline int __read_fde(struct sframe_section *sec,
struct sframe_fda_v3 _fda;
fde_addr = sec->fdes_start + (fde_num * sizeof(struct sframe_fde_v3));
- unsafe_copy_from_user(&_fde, (void __user *)fde_addr,
- sizeof(struct sframe_fde_v3), Efault);
+ DATA_COPY(sec, &_fde, fde_addr,
+ sizeof(struct sframe_fde_v3), Efault);
func_addr = fde_addr + _fde.func_start_off;
if (func_addr < sec->text_start || func_addr > sec->text_end)
@@ -79,8 +148,8 @@ static __always_inline int __read_fde(struct sframe_section *sec,
fda_addr = sec->fres_start + _fde.fres_off;
if (fda_addr + sizeof(struct sframe_fda_v3) > sec->fres_end)
return -EINVAL;
- unsafe_copy_from_user(&_fda, (void __user *)fda_addr,
- sizeof(struct sframe_fda_v3), Efault);
+ DATA_COPY(sec, &_fda, fda_addr,
+ sizeof(struct sframe_fda_v3), Efault);
fde->func_addr = func_addr;
fde->func_size = _fde.func_size;
@@ -102,21 +171,21 @@ static __always_inline int __find_fde(struct sframe_section *sec,
struct sframe_fde_internal *fde)
{
unsigned long func_addr_low = 0, func_addr_high = ULONG_MAX;
- struct sframe_fde_v3 __user *first, *low, *high, *found = NULL;
+ struct sframe_fde_v3 *first, *low, *high, *found = NULL;
int ret;
- first = (void __user *)sec->fdes_start;
+ first = (void *)sec->fdes_start;
low = first;
high = first + sec->num_fdes - 1;
while (low <= high) {
- struct sframe_fde_v3 __user *mid;
+ struct sframe_fde_v3 *mid;
s64 func_off;
unsigned long func_addr;
mid = low + ((high - low) / 2);
- unsafe_get_user(func_off, (s64 __user *)mid, Efault);
+ DATA_GET(sec, func_off, mid, s64, Efault);
func_addr = (unsigned long)mid + func_off;
if (ip >= func_addr) {
@@ -154,47 +223,47 @@ static __always_inline int __find_fde(struct sframe_section *sec,
return -EFAULT;
}
-#define ____UNSAFE_GET_USER_INC(to, from, type, label) \
+#define ____GET_INC(sec, to, from, type, label) \
({ \
type __to; \
- unsafe_get_user(__to, (type __user *)from, label); \
+ DATA_GET(sec, __to, from, type, label); \
from += sizeof(__to); \
to = __to; \
})
-#define __UNSAFE_GET_USER_INC(to, from, size, label, u_or_s) \
+#define __GET_INC(sec, to, from, size, label, u_or_s) \
({ \
switch (size) { \
case 1: \
- ____UNSAFE_GET_USER_INC(to, from, u_or_s##8, label); \
+ ____GET_INC(sec, to, from, u_or_s##8, label); \
break; \
case 2: \
- ____UNSAFE_GET_USER_INC(to, from, u_or_s##16, label); \
+ ____GET_INC(sec, to, from, u_or_s##16, label); \
break; \
case 4: \
- ____UNSAFE_GET_USER_INC(to, from, u_or_s##32, label); \
+ ____GET_INC(sec, to, from, u_or_s##32, label); \
break; \
default: \
return -EFAULT; \
} \
})
-#define UNSAFE_GET_USER_UNSIGNED_INC(to, from, size, label) \
- __UNSAFE_GET_USER_INC(to, from, size, label, u)
+#define GET_UNSIGNED_INC(sec, to, from, size, label) \
+ __GET_INC(sec, to, from, size, label, u)
-#define UNSAFE_GET_USER_SIGNED_INC(to, from, size, label) \
- __UNSAFE_GET_USER_INC(to, from, size, label, s)
+#define GET_SIGNED_INC(sec, to, from, size, label) \
+ __GET_INC(sec, to, from, size, label, s)
-#define UNSAFE_GET_USER_INC(to, from, size, label) \
- _Generic(to, \
- u8 : UNSAFE_GET_USER_UNSIGNED_INC(to, from, size, label), \
- u16 : UNSAFE_GET_USER_UNSIGNED_INC(to, from, size, label), \
- u32 : UNSAFE_GET_USER_UNSIGNED_INC(to, from, size, label), \
- u64 : UNSAFE_GET_USER_UNSIGNED_INC(to, from, size, label), \
- s8 : UNSAFE_GET_USER_SIGNED_INC(to, from, size, label), \
- s16 : UNSAFE_GET_USER_SIGNED_INC(to, from, size, label), \
- s32 : UNSAFE_GET_USER_SIGNED_INC(to, from, size, label), \
- s64 : UNSAFE_GET_USER_SIGNED_INC(to, from, size, label))
+#define GET_INC(sec, to, from, size, label) \
+ _Generic(to, \
+ u8 : GET_UNSIGNED_INC(sec, to, from, size, label), \
+ u16 : GET_UNSIGNED_INC(sec, to, from, size, label), \
+ u32 : GET_UNSIGNED_INC(sec, to, from, size, label), \
+ u64 : GET_UNSIGNED_INC(sec, to, from, size, label), \
+ s8 : GET_SIGNED_INC(sec, to, from, size, label), \
+ s16 : GET_SIGNED_INC(sec, to, from, size, label), \
+ s32 : GET_SIGNED_INC(sec, to, from, size, label), \
+ s64 : GET_SIGNED_INC(sec, to, from, size, label))
static __always_inline int
__read_regular_fre_datawords(struct sframe_section *sec,
@@ -207,19 +276,19 @@ __read_regular_fre_datawords(struct sframe_section *sec,
s32 cfa_off, ra_off, fp_off;
unsigned int cfa_regnum;
- UNSAFE_GET_USER_INC(cfa_off, cur, dataword_size, Efault);
+ GET_INC(sec, cfa_off, cur, dataword_size, Efault);
dataword_count--;
ra_off = sec->ra_off;
if (!ra_off && dataword_count) {
dataword_count--;
- UNSAFE_GET_USER_INC(ra_off, cur, dataword_size, Efault);
+ GET_INC(sec, ra_off, cur, dataword_size, Efault);
}
fp_off = sec->fp_off;
if (!fp_off && dataword_count) {
dataword_count--;
- UNSAFE_GET_USER_INC(fp_off, cur, dataword_size, Efault);
+ GET_INC(sec, fp_off, cur, dataword_size, Efault);
}
if (dataword_count)
@@ -255,17 +324,17 @@ __read_flex_fde_fre_datawords(struct sframe_section *sec,
if (dataword_count < 2)
return -EFAULT;
- UNSAFE_GET_USER_INC(cfa_ctl, cur, dataword_size, Efault);
- UNSAFE_GET_USER_INC(cfa_off, cur, dataword_size, Efault);
+ GET_INC(sec, cfa_ctl, cur, dataword_size, Efault);
+ GET_INC(sec, cfa_off, cur, dataword_size, Efault);
dataword_count -= 2;
ra_off = sec->ra_off;
ra_ctl = ra_off ? 2 : 0; /* regnum=0, deref_p=(ra_off != 0), reg_p=0 */
if (dataword_count >= 2) {
- UNSAFE_GET_USER_INC(ra_ctl, cur, dataword_size, Efault);
+ GET_INC(sec, ra_ctl, cur, dataword_size, Efault);
dataword_count--;
if (ra_ctl) {
- UNSAFE_GET_USER_INC(ra_off, cur, dataword_size, Efault);
+ GET_INC(sec, ra_off, cur, dataword_size, Efault);
dataword_count--;
} else {
/* Padding RA location info */
@@ -276,10 +345,10 @@ __read_flex_fde_fre_datawords(struct sframe_section *sec,
fp_off = sec->fp_off;
fp_ctl = fp_off ? 2 : 0; /* regnum=0, deref_p=(fp_off != 0), reg_p=0 */
if (dataword_count >= 2) {
- UNSAFE_GET_USER_INC(fp_ctl, cur, dataword_size, Efault);
+ GET_INC(sec, fp_ctl, cur, dataword_size, Efault);
dataword_count--;
if (fp_ctl) {
- UNSAFE_GET_USER_INC(fp_off, cur, dataword_size, Efault);
+ GET_INC(sec, fp_off, cur, dataword_size, Efault);
dataword_count--;
} else {
/* Padding FP location info */
@@ -353,11 +422,11 @@ static __always_inline int __read_fre(struct sframe_section *sec,
if (fre_addr + addr_size + 1 > sec->fres_end)
return -EFAULT;
- UNSAFE_GET_USER_INC(ip_off, cur, addr_size, Efault);
+ GET_INC(sec, ip_off, cur, addr_size, Efault);
if (fde_pctype == SFRAME_FDE_PCTYPE_INC && ip_off > fde->func_size)
return -EFAULT;
- UNSAFE_GET_USER_INC(info, cur, 1, Efault);
+ GET_INC(sec, info, cur, 1, Efault);
dataword_count = SFRAME_V3_FRE_DATAWORD_COUNT(info);
dataword_size = dataword_size_enum_to_size(SFRAME_V3_FRE_DATAWORD_SIZE(info));
if (!dataword_size)
@@ -380,7 +449,7 @@ static __always_inline int __read_fre(struct sframe_section *sec,
}
static __always_inline int
-sframe_init_cfa_rule_data(struct unwind_user_cfa_rule_data *cfa_rule_data,
+sframe_init_cfa_rule_data(struct unwind_cfa_rule_data *cfa_rule_data,
u32 ctlword, s32 offset)
{
bool deref_p = SFRAME_V3_FLEX_FDE_CTLWORD_DEREF_P(ctlword);
@@ -391,13 +460,13 @@ sframe_init_cfa_rule_data(struct unwind_user_cfa_rule_data *cfa_rule_data,
switch (regnum) {
case SFRAME_REG_SP:
- cfa_rule_data->rule = UNWIND_USER_CFA_RULE_SP_OFFSET;
+ cfa_rule_data->rule = UNWIND_CFA_RULE_SP_OFFSET;
break;
case SFRAME_REG_FP:
- cfa_rule_data->rule = UNWIND_USER_CFA_RULE_FP_OFFSET;
+ cfa_rule_data->rule = UNWIND_CFA_RULE_FP_OFFSET;
break;
default:
- cfa_rule_data->rule = UNWIND_USER_CFA_RULE_REG_OFFSET;
+ cfa_rule_data->rule = UNWIND_CFA_RULE_REG_OFFSET;
cfa_rule_data->regnum = regnum;
}
} else {
@@ -405,7 +474,7 @@ sframe_init_cfa_rule_data(struct unwind_user_cfa_rule_data *cfa_rule_data,
}
if (deref_p)
- cfa_rule_data->rule |= UNWIND_USER_RULE_DEREF;
+ cfa_rule_data->rule |= UNWIND_RULE_DEREF;
cfa_rule_data->offset = offset;
@@ -413,27 +482,27 @@ sframe_init_cfa_rule_data(struct unwind_user_cfa_rule_data *cfa_rule_data,
}
static __always_inline void
-sframe_init_rule_data(struct unwind_user_rule_data *rule_data,
+sframe_init_rule_data(struct unwind_rule_data *rule_data,
u32 ctlword, s32 offset)
{
bool deref_p = SFRAME_V3_FLEX_FDE_CTLWORD_DEREF_P(ctlword);
bool reg_p = SFRAME_V3_FLEX_FDE_CTLWORD_REG_P(ctlword);
if (!ctlword && !offset) {
- rule_data->rule = UNWIND_USER_RULE_RETAIN;
+ rule_data->rule = UNWIND_RULE_RETAIN;
return;
}
if (reg_p) {
unsigned int regnum = SFRAME_V3_FLEX_FDE_CTLWORD_REGNUM(ctlword);
- rule_data->rule = UNWIND_USER_RULE_REG_OFFSET;
+ rule_data->rule = UNWIND_RULE_REG_OFFSET;
rule_data->regnum = regnum;
} else {
- rule_data->rule = UNWIND_USER_RULE_CFA_OFFSET;
+ rule_data->rule = UNWIND_RULE_CFA_OFFSET;
}
if (deref_p)
- rule_data->rule |= UNWIND_USER_RULE_DEREF;
+ rule_data->rule |= UNWIND_RULE_DEREF;
rule_data->offset = offset;
}
@@ -441,7 +510,7 @@ sframe_init_rule_data(struct unwind_user_rule_data *rule_data,
static __always_inline int __find_fre(struct sframe_section *sec,
struct sframe_fde_internal *fde,
unsigned long ip,
- struct unwind_user_frame *frame)
+ struct unwind_frame *frame)
{
unsigned char fde_pctype = SFRAME_V3_FDE_PCTYPE(fde->info);
struct sframe_fre_internal *fre, *prev_fre = NULL;
@@ -501,40 +570,18 @@ static __always_inline int __find_fre(struct sframe_section *sec,
return 0;
}
-int sframe_find(unsigned long ip, struct unwind_user_frame *frame)
+static __always_inline int __sframe_find(struct sframe_section *sec,
+ unsigned long ip,
+ struct unwind_frame *frame)
{
- struct mm_struct *mm = current->mm;
- struct sframe_section *sec;
struct sframe_fde_internal fde;
int ret;
- if (!mm)
- return -EINVAL;
-
- guard(srcu)(&sframe_srcu);
-
- sec = mtree_load(&mm->sframe_mt, ip);
- if (!sec)
- return -EINVAL;
-
- if (!user_read_access_begin((void __user *)sec->sframe_start,
- sec->sframe_end - sec->sframe_start))
- return -EFAULT;
-
ret = __find_fde(sec, ip, &fde);
if (ret)
- goto end;
-
- ret = __find_fre(sec, &fde, ip, frame);
-end:
- user_read_access_end();
-
- if (ret == -EFAULT) {
- dbg_sec("removing bad .sframe section\n");
- WARN_ON_ONCE(sframe_remove_section(sec->sframe_start));
- }
+ return ret;
- return ret;
+ return __find_fre(sec, &fde, ip, frame);
}
#ifdef CONFIG_SFRAME_VALIDATION
@@ -657,20 +704,23 @@ static int sframe_validate_section(struct sframe_section *sec) { return 0; }
#endif /* !CONFIG_SFRAME_VALIDATION */
-static void free_section(struct sframe_section *sec)
-{
- dbg_free(sec);
- kfree(sec);
-}
-
static int sframe_read_header(struct sframe_section *sec)
{
unsigned long header_end, fdes_start, fdes_end, fres_start, fres_end;
struct sframe_header shdr;
unsigned int num_fdes;
- if (copy_from_user(&shdr, (void __user *)sec->sframe_start, sizeof(shdr))) {
- dbg_sec("header usercopy failed\n");
+ switch (sec->sec_type) {
+ case SFRAME_USER:
+ if (copy_from_user(&shdr, (void __user *)sec->sframe_start, sizeof(shdr))) {
+ dbg_sec("header usercopy failed\n");
+ return -EFAULT;
+ }
+ break;
+ case SFRAME_KERNEL:
+ shdr = *(struct sframe_header *)sec->sframe_start;
+ break;
+ default:
return -EFAULT;
}
@@ -717,6 +767,45 @@ static int sframe_read_header(struct sframe_section *sec)
return 0;
}
+#ifdef CONFIG_HAVE_UNWIND_USER_SFRAME
+
+int sframe_find_user(unsigned long ip, struct unwind_frame *frame)
+{
+ struct mm_struct *mm = current->mm;
+ struct sframe_section *sec;
+ int ret;
+
+ if (!mm)
+ return -EINVAL;
+
+ guard(srcu)(&sframe_srcu);
+
+ sec = mtree_load(&mm->sframe_mt, ip);
+ if (!sec)
+ return -EINVAL;
+
+ if (!user_read_access_begin((void __user *)sec->sframe_start,
+ sec->sframe_end - sec->sframe_start))
+ return -EFAULT;
+
+ ret = __sframe_find(sec, ip, frame);
+
+ user_read_access_end();
+
+ if (ret == -EFAULT) {
+ dbg_sec("removing bad .sframe section\n");
+ WARN_ON_ONCE(sframe_remove_section(sec->sframe_start));
+ }
+
+ return ret;
+}
+
+static void free_section(struct sframe_section *sec)
+{
+ dbg_free(sec);
+ kfree(sec);
+}
+
int sframe_add_section(unsigned long sframe_start, unsigned long sframe_end,
unsigned long text_start, unsigned long text_end)
{
@@ -753,6 +842,7 @@ int sframe_add_section(unsigned long sframe_start, unsigned long sframe_end,
if (!sec)
return -ENOMEM;
+ sec->sec_type = SFRAME_USER;
sec->sframe_start = sframe_start;
sec->sframe_end = sframe_end;
sec->text_start = text_start;
@@ -838,3 +928,5 @@ void sframe_free_mm(struct mm_struct *mm)
mtree_destroy(&mm->sframe_mt);
}
+
+#endif /* CONFIG_HAVE_UNWIND_USER_SFRAME */
diff --git a/kernel/unwind/user.c b/kernel/unwind/user.c
index eb7d9489f671..9e57dd79559a 100644
--- a/kernel/unwind/user.c
+++ b/kernel/unwind/user.c
@@ -8,6 +8,7 @@
#include <linux/unwind_user.h>
#include <linux/uaccess.h>
#include <linux/sframe.h>
+#include <linux/unwind_types.h>
#define for_each_user_frame(state) \
for (unwind_user_start(state); !(state)->done; unwind_user_next(state))
@@ -28,7 +29,7 @@ get_user_word(unsigned long *word, unsigned long base, int off, unsigned int ws)
}
static int unwind_user_next_common(struct unwind_user_state *state,
- const struct unwind_user_frame *frame)
+ const struct unwind_frame *frame)
{
unsigned long cfa, fp, ra;
@@ -40,16 +41,16 @@ static int unwind_user_next_common(struct unwind_user_state *state,
/* Get the Canonical Frame Address (CFA) */
switch (frame->cfa.rule) {
- case UNWIND_USER_CFA_RULE_SP_OFFSET:
+ case UNWIND_CFA_RULE_SP_OFFSET:
cfa = state->sp;
break;
- case UNWIND_USER_CFA_RULE_FP_OFFSET:
+ case UNWIND_CFA_RULE_FP_OFFSET:
if (state->fp < state->sp)
return -EINVAL;
cfa = state->fp;
break;
- case UNWIND_USER_CFA_RULE_REG_OFFSET:
- case UNWIND_USER_CFA_RULE_REG_OFFSET_DEREF:
+ case UNWIND_CFA_RULE_REG_OFFSET:
+ case UNWIND_CFA_RULE_REG_OFFSET_DEREF:
if (!state->topmost || unwind_user_get_reg(&cfa, frame->cfa.regnum))
return -EINVAL;
break;
@@ -58,7 +59,7 @@ static int unwind_user_next_common(struct unwind_user_state *state,
return -EINVAL;
}
cfa += frame->cfa.offset;
- if (frame->cfa.rule & UNWIND_USER_RULE_DEREF &&
+ if (frame->cfa.rule & UNWIND_RULE_DEREF &&
get_user_word(&cfa, cfa, 0, state->ws))
return -EINVAL;
@@ -76,16 +77,16 @@ static int unwind_user_next_common(struct unwind_user_state *state,
/* Get the Return Address (RA) */
switch (frame->ra.rule) {
- case UNWIND_USER_RULE_RETAIN:
+ case UNWIND_RULE_RETAIN:
if (!state->topmost || unwind_user_get_ra_reg(&ra))
return -EINVAL;
break;
/* UNWIND_USER_RULE_CFA_OFFSET not implemented on purpose */
- case UNWIND_USER_RULE_CFA_OFFSET_DEREF:
+ case UNWIND_RULE_CFA_OFFSET_DEREF:
ra = cfa + frame->ra.offset;
break;
- case UNWIND_USER_RULE_REG_OFFSET:
- case UNWIND_USER_RULE_REG_OFFSET_DEREF:
+ case UNWIND_RULE_REG_OFFSET:
+ case UNWIND_RULE_REG_OFFSET_DEREF:
if (!state->topmost || unwind_user_get_reg(&ra, frame->ra.regnum))
return -EINVAL;
ra += frame->ra.offset;
@@ -94,21 +95,21 @@ static int unwind_user_next_common(struct unwind_user_state *state,
WARN_ON_ONCE(1);
return -EINVAL;
}
- if (frame->ra.rule & UNWIND_USER_RULE_DEREF &&
+ if (frame->ra.rule & UNWIND_RULE_DEREF &&
get_user_word(&ra, ra, 0, state->ws))
return -EINVAL;
/* Get the Frame Pointer (FP) */
switch (frame->fp.rule) {
- case UNWIND_USER_RULE_RETAIN:
+ case UNWIND_RULE_RETAIN:
fp = state->fp;
break;
/* UNWIND_USER_RULE_CFA_OFFSET not implemented on purpose */
- case UNWIND_USER_RULE_CFA_OFFSET_DEREF:
+ case UNWIND_RULE_CFA_OFFSET_DEREF:
fp = cfa + frame->fp.offset;
break;
- case UNWIND_USER_RULE_REG_OFFSET:
- case UNWIND_USER_RULE_REG_OFFSET_DEREF:
+ case UNWIND_RULE_REG_OFFSET:
+ case UNWIND_RULE_REG_OFFSET_DEREF:
if (!state->topmost || unwind_user_get_reg(&fp, frame->fp.regnum))
return -EINVAL;
fp += frame->fp.offset;
@@ -117,7 +118,7 @@ static int unwind_user_next_common(struct unwind_user_state *state,
WARN_ON_ONCE(1);
return -EINVAL;
}
- if (frame->fp.rule & UNWIND_USER_RULE_DEREF &&
+ if (frame->fp.rule & UNWIND_RULE_DEREF &&
get_user_word(&fp, fp, 0, state->ws))
return -EINVAL;
@@ -133,13 +134,13 @@ static int unwind_user_next_fp(struct unwind_user_state *state)
struct pt_regs *regs = task_pt_regs(current);
if (state->topmost && unwind_user_at_function_start(regs)) {
- const struct unwind_user_frame fp_entry_frame = {
+ const struct unwind_frame fp_entry_frame = {
ARCH_INIT_USER_FP_ENTRY_FRAME(state->ws)
};
return unwind_user_next_common(state, &fp_entry_frame);
}
- const struct unwind_user_frame fp_frame = {
+ const struct unwind_frame fp_frame = {
ARCH_INIT_USER_FP_FRAME(state->ws)
};
return unwind_user_next_common(state, &fp_frame);
@@ -147,10 +148,10 @@ static int unwind_user_next_fp(struct unwind_user_state *state)
static int unwind_user_next_sframe(struct unwind_user_state *state)
{
- struct unwind_user_frame frame;
+ struct unwind_frame frame;
/* sframe expects the frame to be local storage */
- if (sframe_find(state->ip, &frame))
+ if (sframe_find_user(state->ip, &frame))
return -ENOENT;
return unwind_user_next_common(state, &frame);
}
--
2.54.0.545.g6539524ca2-goog
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH v5 2/8] arm64, unwind: build kernel with sframe V3 info
2026-04-28 18:36 [PATCH v5 0/8] unwind, arm64: add sframe unwinder for kernel Dylan Hatch
2026-04-28 18:36 ` [PATCH v5 1/8] sframe: Allow kernelspace sframe sections Dylan Hatch
@ 2026-04-28 18:36 ` Dylan Hatch
2026-04-28 18:36 ` [PATCH v5 3/8] arm64: entry: add unwind info for various kernel entries Dylan Hatch
` (7 subsequent siblings)
9 siblings, 0 replies; 17+ messages in thread
From: Dylan Hatch @ 2026-04-28 18:36 UTC (permalink / raw)
To: Roman Gushchin, Weinan Liu, Will Deacon, Josh Poimboeuf,
Indu Bhagat, Peter Zijlstra, Steven Rostedt, Catalin Marinas,
Jiri Kosina, Jens Remus
Cc: Dylan Hatch, Mark Rutland, Prasanna Kumar T S M, Puranjay Mohan,
Song Liu, joe.lawrence, linux-toolchains, linux-kernel,
live-patching, linux-arm-kernel, Randy Dunlap
Build with -Wa,--gsframe-3 flags to generate a .sframe section. This
will be used for in-kernel reliable stacktrace in cases where the frame
pointer alone is insufficient.
Currently, the sframe format only supports arm64, x86_64 and s390x
architectures.
Signed-off-by: Weinan Liu <wnliu@google.com>
Reviewed-by: Prasanna Kumar T S M <ptsm@linux.microsoft.com>
Reviewed-by: Jens Remus <jremus@linux.ibm.com>
Signed-off-by: Dylan Hatch <dylanbhatch@google.com>
---
MAINTAINERS | 1 +
Makefile | 8 ++++++++
arch/Kconfig | 21 +++++++++++++++++++++
arch/arm64/Kconfig | 1 +
arch/arm64/include/asm/unwind_sframe.h | 8 ++++++++
arch/arm64/kernel/vdso/Makefile | 2 +-
include/asm-generic/sections.h | 4 ++++
include/asm-generic/vmlinux.lds.h | 15 +++++++++++++++
8 files changed, 59 insertions(+), 1 deletion(-)
create mode 100644 arch/arm64/include/asm/unwind_sframe.h
diff --git a/MAINTAINERS b/MAINTAINERS
index cfc7dec88da4..a7d75f9cb5f4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -27561,6 +27561,7 @@ STACK UNWINDING
M: Josh Poimboeuf <jpoimboe@kernel.org>
M: Steven Rostedt <rostedt@goodmis.org>
S: Maintained
+F: arch/*/include/asm/unwind_sframe.h
F: include/linux/sframe.h
F: include/linux/unwind*.h
F: kernel/unwind/
diff --git a/Makefile b/Makefile
index 2b15f0b4a0cb..6c94a5257679 100644
--- a/Makefile
+++ b/Makefile
@@ -1110,6 +1110,14 @@ endif
# Ensure compilers do not transform certain loops into calls to wcslen()
KBUILD_CFLAGS += -fno-builtin-wcslen
+# build with sframe table
+ifdef CONFIG_HAVE_UNWIND_KERNEL_SFRAME
+CC_FLAGS_SFRAME := -Wa,--gsframe-3
+KBUILD_CFLAGS += $(CC_FLAGS_SFRAME)
+KBUILD_AFLAGS += $(CC_FLAGS_SFRAME)
+export CC_FLAGS_SFRAME
+endif
+
# change __FILE__ to the relative path to the source directory
ifdef building_out_of_srctree
KBUILD_CPPFLAGS += -fmacro-prefix-map=$(srcroot)/=
diff --git a/arch/Kconfig b/arch/Kconfig
index d7caf2e245ce..8d27b3249e7a 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -520,6 +520,27 @@ config SFRAME_VALIDATION
If unsure, say N.
+config ARCH_SUPPORTS_UNWIND_KERNEL_SFRAME
+ bool
+ help
+ An architecture can select this if it enables the SFrame (Simple
+ Frame) unwinder for unwinding kernel stack traces. It uses an unwind
+ table that is directly generated by the toolchain based on DWARF CFI
+ information.
+
+config HAVE_UNWIND_KERNEL_SFRAME
+ bool "Sframe unwinder"
+ depends on AS_SFRAME3
+ depends on 64BIT
+ depends on ARCH_SUPPORTS_UNWIND_KERNEL_SFRAME
+ select UNWIND_SFRAME_LOOKUP
+ help
+ This option enables the SFrame (Simple Frame) unwinder for unwinding
+ kernel stack traces. It uses unwind an table that is directly
+ generated by the toolchain based on DWARF CFI information. In
+ practice, this can provide more reliable stacktrace results than
+ unwinding with frame pointers alone.
+
config HAVE_PERF_REGS
bool
help
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 38dba5f7e4d2..f7ae8eaaadc4 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -112,6 +112,7 @@ config ARM64
select ARCH_SUPPORTS_SCHED_SMT
select ARCH_SUPPORTS_SCHED_CLUSTER
select ARCH_SUPPORTS_SCHED_MC
+ select ARCH_SUPPORTS_UNWIND_KERNEL_SFRAME
select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT
select ARCH_WANT_DEFAULT_BPF_JIT
diff --git a/arch/arm64/include/asm/unwind_sframe.h b/arch/arm64/include/asm/unwind_sframe.h
new file mode 100644
index 000000000000..876412881196
--- /dev/null
+++ b/arch/arm64/include/asm/unwind_sframe.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_ARM64_UNWIND_SFRAME_H
+#define _ASM_ARM64_UNWIND_SFRAME_H
+
+#define SFRAME_REG_SP 31
+#define SFRAME_REG_FP 29
+
+#endif /* _ASM_ARM64_UNWIND_SFRAME_H */
diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index 7dec05dd33b7..c60ef921956f 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -38,7 +38,7 @@ ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO
CC_FLAGS_REMOVE_VDSO := $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) \
$(RANDSTRUCT_CFLAGS) $(KSTACK_ERASE_CFLAGS) \
$(GCC_PLUGINS_CFLAGS) \
- $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) \
+ $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) $(CC_FLAGS_SFRAME) \
-Wmissing-prototypes -Wmissing-declarations
CC_FLAGS_ADD_VDSO := -O2 -mcmodel=tiny -fasynchronous-unwind-tables
diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h
index 0755bc39b0d8..336d27011a58 100644
--- a/include/asm-generic/sections.h
+++ b/include/asm-generic/sections.h
@@ -31,6 +31,7 @@
* __irqentry_text_start, __irqentry_text_end
* __softirqentry_text_start, __softirqentry_text_end
* __start_opd, __end_opd
+ * __start_sframe, __end_sframe
*/
extern char _text[], _stext[], _etext[];
extern char _data[], _sdata[], _edata[];
@@ -53,6 +54,9 @@ extern char __ctors_start[], __ctors_end[];
/* Start and end of .opd section - used for function descriptors. */
extern char __start_opd[], __end_opd[];
+/* Start and end of .sframe section - used for stack unwinding. */
+extern char __start_sframe[], __end_sframe[];
+
/* Start and end of instrumentation protected text section */
extern char __noinstr_text_start[], __noinstr_text_end[];
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 1e1580febe4b..090da633db92 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -491,6 +491,8 @@
*(.rodata1) \
} \
\
+ SFRAME \
+ \
/* PCI quirks */ \
.pci_fixup : AT(ADDR(.pci_fixup) - LOAD_OFFSET) { \
BOUNDED_SECTION_PRE_LABEL(.pci_fixup_early, _pci_fixups_early, __start, __end) \
@@ -911,6 +913,19 @@
#define TRACEDATA
#endif
+#ifdef CONFIG_HAVE_UNWIND_KERNEL_SFRAME
+#define SFRAME \
+ /* sframe */ \
+ .sframe : AT(ADDR(.sframe) - LOAD_OFFSET) { \
+ __start_sframe = .; \
+ KEEP(*(.sframe)) \
+ KEEP(*(.init.sframe)) \
+ __end_sframe = .; \
+ }
+#else
+#define SFRAME
+#endif
+
#ifdef CONFIG_PRINTK_INDEX
#define PRINTK_INDEX \
.printk_index : AT(ADDR(.printk_index) - LOAD_OFFSET) { \
--
2.54.0.545.g6539524ca2-goog
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH v5 3/8] arm64: entry: add unwind info for various kernel entries
2026-04-28 18:36 [PATCH v5 0/8] unwind, arm64: add sframe unwinder for kernel Dylan Hatch
2026-04-28 18:36 ` [PATCH v5 1/8] sframe: Allow kernelspace sframe sections Dylan Hatch
2026-04-28 18:36 ` [PATCH v5 2/8] arm64, unwind: build kernel with sframe V3 info Dylan Hatch
@ 2026-04-28 18:36 ` Dylan Hatch
2026-04-29 15:26 ` Mark Rutland
2026-04-28 18:36 ` [PATCH v5 4/8] sframe: Provide PC lookup for vmlinux .sframe section Dylan Hatch
` (6 subsequent siblings)
9 siblings, 1 reply; 17+ messages in thread
From: Dylan Hatch @ 2026-04-28 18:36 UTC (permalink / raw)
To: Roman Gushchin, Weinan Liu, Will Deacon, Josh Poimboeuf,
Indu Bhagat, Peter Zijlstra, Steven Rostedt, Catalin Marinas,
Jiri Kosina, Jens Remus
Cc: Dylan Hatch, Mark Rutland, Prasanna Kumar T S M, Puranjay Mohan,
Song Liu, joe.lawrence, linux-toolchains, linux-kernel,
live-patching, linux-arm-kernel, Randy Dunlap
From: Weinan Liu <wnliu@google.com>
DWARF CFI (Call Frame Information) specifies how to recover the return
address and callee-saved registers at each PC in a given function.
Compilers are able to generate the CFI annotations when they compile
the code to assembly language. For handcrafted assembly, we need to
annotate them by hand.
Annotate minimal CFI to enable stacktracing using SFrame for kernel
exception entries through el1*_64_*() paths and irq entries through
call_on_irq_stack()
Signed-off-by: Weinan Liu <wnliu@google.com>
Suggested-by: Jens Remus <jremus@linux.ibm.com>
Reviewed-by: Jens Remus <jremus@linux.ibm.com>
Signed-off-by: Dylan Hatch <dylanbhatch@google.com>
---
arch/arm64/kernel/entry.S | 23 +++++++++++++++++++++++
1 file changed, 23 insertions(+)
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index f8018b5c1f9a..dc55b0b19cfa 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -30,6 +30,12 @@
#include <asm/asm-uaccess.h>
#include <asm/unistd.h>
+/*
+ * Do not generate .eh_frame. Only generate .debug_frame and optionally
+ * .sframe (via assembler option --gsframe[-N]).
+ */
+ .cfi_sections .debug_frame
+
.macro clear_gp_regs
.irp n,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29
mov x\n, xzr
@@ -575,7 +581,16 @@ SYM_CODE_START_LOCAL(el\el\ht\()_\regsize\()_\label)
.if \el == 0
b ret_to_user
.else
+ /*
+ * Minimal DWARF CFI for unwinding across the call above.
+ * Enable unwinding for el1*_64_*() path only.
+ */
+ .cfi_startproc
+ .cfi_def_cfa_offset PT_REGS_SIZE
+ .cfi_offset 29, S_FP - PT_REGS_SIZE
+ .cfi_offset 30, S_LR - PT_REGS_SIZE
b ret_to_kernel
+ .cfi_endproc
.endif
SYM_CODE_END(el\el\ht\()_\regsize\()_\label)
.endm
@@ -872,6 +887,7 @@ NOKPROBE(ret_from_fork)
* Calls func(regs) using this CPU's irq stack and shadow irq stack.
*/
SYM_FUNC_START(call_on_irq_stack)
+ .cfi_startproc
save_and_disable_daif x9
#ifdef CONFIG_SHADOW_CALL_STACK
get_current_task x16
@@ -882,6 +898,9 @@ SYM_FUNC_START(call_on_irq_stack)
/* Create a frame record to save our LR and SP (implicit in FP) */
stp x29, x30, [sp, #-16]!
mov x29, sp
+ .cfi_def_cfa 29, 16
+ .cfi_offset 29, -16
+ .cfi_offset 30, -8
ldr_this_cpu x16, irq_stack_ptr, x17
@@ -897,9 +916,13 @@ SYM_FUNC_START(call_on_irq_stack)
*/
mov sp, x29
ldp x29, x30, [sp], #16
+ .cfi_restore 29
+ .cfi_restore 30
+ .cfi_def_cfa 31, 0
scs_load_current
restore_irq x9
ret
+ .cfi_endproc
SYM_FUNC_END(call_on_irq_stack)
NOKPROBE(call_on_irq_stack)
--
2.54.0.545.g6539524ca2-goog
^ permalink raw reply related [flat|nested] 17+ messages in thread* Re: [PATCH v5 3/8] arm64: entry: add unwind info for various kernel entries
2026-04-28 18:36 ` [PATCH v5 3/8] arm64: entry: add unwind info for various kernel entries Dylan Hatch
@ 2026-04-29 15:26 ` Mark Rutland
0 siblings, 0 replies; 17+ messages in thread
From: Mark Rutland @ 2026-04-29 15:26 UTC (permalink / raw)
To: Dylan Hatch
Cc: Roman Gushchin, Weinan Liu, Will Deacon, Josh Poimboeuf,
Indu Bhagat, Peter Zijlstra, Steven Rostedt, Catalin Marinas,
Jiri Kosina, Jens Remus, Prasanna Kumar T S M, Puranjay Mohan,
Song Liu, joe.lawrence, linux-toolchains, linux-kernel,
live-patching, linux-arm-kernel, Randy Dunlap
Hi Dylan,
On Tue, Apr 28, 2026 at 06:36:38PM +0000, Dylan Hatch wrote:
> From: Weinan Liu <wnliu@google.com>
>
> DWARF CFI (Call Frame Information) specifies how to recover the return
> address and callee-saved registers at each PC in a given function.
> Compilers are able to generate the CFI annotations when they compile
> the code to assembly language. For handcrafted assembly, we need to
> annotate them by hand.
>
> Annotate minimal CFI to enable stacktracing using SFrame for kernel
> exception entries through el1*_64_*() paths
I thought we were only consuming SFrame when unwinding an exeption
boundary?
We shouldn't be taking exceptions _from_ the entry assembly functions
unless something has gone horribly wrong, and so I don't see why we'd
need CFI entries for the entry assembly functions.
Am I missing some reason we need CFI entries for the entry assembly
functions? I strongly suspect it is not necessary to add these, and I'd
prefer to omit them.
> and irq entries through call_on_irq_stack()
Needing some sort of unwind annotations for call_on_irq_stack() makes
sense to me, but don't we need something for other assembly functions
too?
We can interrupt things like memset(); I assume we'll treat those as
unreliable until annotated?
Mark.
> Signed-off-by: Weinan Liu <wnliu@google.com>
> Suggested-by: Jens Remus <jremus@linux.ibm.com>
> Reviewed-by: Jens Remus <jremus@linux.ibm.com>
> Signed-off-by: Dylan Hatch <dylanbhatch@google.com>
> ---
> arch/arm64/kernel/entry.S | 23 +++++++++++++++++++++++
> 1 file changed, 23 insertions(+)
>
> diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
> index f8018b5c1f9a..dc55b0b19cfa 100644
> --- a/arch/arm64/kernel/entry.S
> +++ b/arch/arm64/kernel/entry.S
> @@ -30,6 +30,12 @@
> #include <asm/asm-uaccess.h>
> #include <asm/unistd.h>
>
> +/*
> + * Do not generate .eh_frame. Only generate .debug_frame and optionally
> + * .sframe (via assembler option --gsframe[-N]).
> + */
> + .cfi_sections .debug_frame
> +
> .macro clear_gp_regs
> .irp n,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29
> mov x\n, xzr
> @@ -575,7 +581,16 @@ SYM_CODE_START_LOCAL(el\el\ht\()_\regsize\()_\label)
> .if \el == 0
> b ret_to_user
> .else
> + /*
> + * Minimal DWARF CFI for unwinding across the call above.
> + * Enable unwinding for el1*_64_*() path only.
> + */
> + .cfi_startproc
> + .cfi_def_cfa_offset PT_REGS_SIZE
> + .cfi_offset 29, S_FP - PT_REGS_SIZE
> + .cfi_offset 30, S_LR - PT_REGS_SIZE
> b ret_to_kernel
> + .cfi_endproc
> .endif
> SYM_CODE_END(el\el\ht\()_\regsize\()_\label)
> .endm
> @@ -872,6 +887,7 @@ NOKPROBE(ret_from_fork)
> * Calls func(regs) using this CPU's irq stack and shadow irq stack.
> */
> SYM_FUNC_START(call_on_irq_stack)
> + .cfi_startproc
> save_and_disable_daif x9
> #ifdef CONFIG_SHADOW_CALL_STACK
> get_current_task x16
> @@ -882,6 +898,9 @@ SYM_FUNC_START(call_on_irq_stack)
> /* Create a frame record to save our LR and SP (implicit in FP) */
> stp x29, x30, [sp, #-16]!
> mov x29, sp
> + .cfi_def_cfa 29, 16
> + .cfi_offset 29, -16
> + .cfi_offset 30, -8
>
> ldr_this_cpu x16, irq_stack_ptr, x17
>
> @@ -897,9 +916,13 @@ SYM_FUNC_START(call_on_irq_stack)
> */
> mov sp, x29
> ldp x29, x30, [sp], #16
> + .cfi_restore 29
> + .cfi_restore 30
> + .cfi_def_cfa 31, 0
> scs_load_current
> restore_irq x9
> ret
> + .cfi_endproc
> SYM_FUNC_END(call_on_irq_stack)
> NOKPROBE(call_on_irq_stack)
>
> --
> 2.54.0.545.g6539524ca2-goog
>
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH v5 4/8] sframe: Provide PC lookup for vmlinux .sframe section
2026-04-28 18:36 [PATCH v5 0/8] unwind, arm64: add sframe unwinder for kernel Dylan Hatch
` (2 preceding siblings ...)
2026-04-28 18:36 ` [PATCH v5 3/8] arm64: entry: add unwind info for various kernel entries Dylan Hatch
@ 2026-04-28 18:36 ` Dylan Hatch
2026-04-28 18:36 ` [PATCH v5 5/8] sframe: Allow unsorted FDEs Dylan Hatch
` (5 subsequent siblings)
9 siblings, 0 replies; 17+ messages in thread
From: Dylan Hatch @ 2026-04-28 18:36 UTC (permalink / raw)
To: Roman Gushchin, Weinan Liu, Will Deacon, Josh Poimboeuf,
Indu Bhagat, Peter Zijlstra, Steven Rostedt, Catalin Marinas,
Jiri Kosina, Jens Remus
Cc: Dylan Hatch, Mark Rutland, Prasanna Kumar T S M, Puranjay Mohan,
Song Liu, joe.lawrence, linux-toolchains, linux-kernel,
live-patching, linux-arm-kernel, Randy Dunlap
With SFRAME_UNWINDER, read in the .sframe section at boot. This provides
unwind data as an alternative/supplement to frame pointer-based
unwinding.
Reviewed-by: Jens Remus <jremus@linux.ibm.com>
Signed-off-by: Dylan Hatch <dylanbhatch@google.com>
---
arch/arm64/kernel/setup.c | 2 ++
include/linux/sframe.h | 14 ++++++++++++++
kernel/unwind/sframe.c | 36 ++++++++++++++++++++++++++++++++++++
3 files changed, 52 insertions(+)
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 23c05dc7a8f2..4a633bc7aefb 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -32,6 +32,7 @@
#include <linux/sched/task.h>
#include <linux/scs.h>
#include <linux/mm.h>
+#include <linux/sframe.h>
#include <asm/acpi.h>
#include <asm/fixmap.h>
@@ -375,6 +376,7 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p)
"This indicates a broken bootloader or old kernel\n",
boot_args[1], boot_args[2], boot_args[3]);
}
+ init_sframe_table();
}
static inline bool cpu_can_disable(unsigned int cpu)
diff --git a/include/linux/sframe.h b/include/linux/sframe.h
index 0cb2924367bc..5b7341b61a7c 100644
--- a/include/linux/sframe.h
+++ b/include/linux/sframe.h
@@ -69,4 +69,18 @@ static inline int sframe_find_user(unsigned long ip, struct unwind_frame *frame)
#endif /* CONFIG_HAVE_UNWIND_USER_SFRAME */
+#ifdef CONFIG_HAVE_UNWIND_KERNEL_SFRAME
+
+void __init init_sframe_table(void);
+void sframe_module_init(struct module *mod, void *sframe, size_t sframe_size,
+ void *text, size_t text_size);
+
+extern int sframe_find_kernel(unsigned long ip, struct unwind_frame *frame);
+
+#else
+
+static inline void __init init_sframe_table(void) {}
+
+#endif /* CONFIG_HAVE_UNWIND_KERNEL_SFRAME */
+
#endif /* _LINUX_SFRAME_H */
diff --git a/kernel/unwind/sframe.c b/kernel/unwind/sframe.c
index 89dd8c5a6a10..430bff9533ee 100644
--- a/kernel/unwind/sframe.c
+++ b/kernel/unwind/sframe.c
@@ -14,10 +14,20 @@
#include <linux/sframe.h>
#include <linux/unwind_types.h>
#include <asm/unwind_sframe.h>
+#ifdef CONFIG_HAVE_UNWIND_KERNEL_SFRAME
+#include <linux/kallsyms.h>
+#endif
#include "sframe.h"
#include "sframe_debug.h"
+#ifdef CONFIG_HAVE_UNWIND_KERNEL_SFRAME
+
+static bool sframe_init __ro_after_init;
+static struct sframe_section kernel_sfsec __ro_after_init;
+
+#endif /* CONFIG_HAVE_UNWIND_KERNEL_SFRAME */
+
struct sframe_fde_internal {
unsigned long func_addr;
u32 func_size;
@@ -930,3 +940,29 @@ void sframe_free_mm(struct mm_struct *mm)
}
#endif /* CONFIG_HAVE_UNWIND_USER_SFRAME */
+
+#ifdef CONFIG_HAVE_UNWIND_KERNEL_SFRAME
+
+int sframe_find_kernel(unsigned long ip, struct unwind_frame *frame)
+{
+ if (!frame || !sframe_init)
+ return -EINVAL;
+
+ return __sframe_find(&kernel_sfsec, ip, frame);
+}
+
+void __init init_sframe_table(void)
+{
+ kernel_sfsec.sec_type = SFRAME_KERNEL;
+ kernel_sfsec.sframe_start = (unsigned long)__start_sframe;
+ kernel_sfsec.sframe_end = (unsigned long)__end_sframe;
+ kernel_sfsec.text_start = (unsigned long)_stext;
+ kernel_sfsec.text_end = (unsigned long)_etext;
+
+ if (WARN_ON(sframe_read_header(&kernel_sfsec)))
+ return;
+
+ sframe_init = true;
+}
+
+#endif /* CONFIG_HAVE_UNWIND_KERNEL_SFRAME */
--
2.54.0.545.g6539524ca2-goog
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH v5 5/8] sframe: Allow unsorted FDEs
2026-04-28 18:36 [PATCH v5 0/8] unwind, arm64: add sframe unwinder for kernel Dylan Hatch
` (3 preceding siblings ...)
2026-04-28 18:36 ` [PATCH v5 4/8] sframe: Provide PC lookup for vmlinux .sframe section Dylan Hatch
@ 2026-04-28 18:36 ` Dylan Hatch
2026-04-30 10:04 ` Jens Remus
2026-04-28 18:36 ` [PATCH v5 6/8] arm64/module, sframe: Add sframe support for modules Dylan Hatch
` (4 subsequent siblings)
9 siblings, 1 reply; 17+ messages in thread
From: Dylan Hatch @ 2026-04-28 18:36 UTC (permalink / raw)
To: Roman Gushchin, Weinan Liu, Will Deacon, Josh Poimboeuf,
Indu Bhagat, Peter Zijlstra, Steven Rostedt, Catalin Marinas,
Jiri Kosina, Jens Remus
Cc: Dylan Hatch, Mark Rutland, Prasanna Kumar T S M, Puranjay Mohan,
Song Liu, joe.lawrence, linux-toolchains, linux-kernel,
live-patching, linux-arm-kernel, Randy Dunlap
The .sframe in kernel modules is built without SFRAME_F_FDE_SORTED set.
In order to allow sframe PC lookup in modules, add a code path to handle
unsorted FDE tables by doing a simple linear search.
Reviewed-by: Jens Remus <jremus@linux.ibm.com>
Signed-off-by: Dylan Hatch <dylanbhatch@google.com>
---
include/linux/sframe.h | 1 +
kernel/unwind/sframe.c | 45 +++++++++++++++++++++++++++++++++++++-----
2 files changed, 41 insertions(+), 5 deletions(-)
diff --git a/include/linux/sframe.h b/include/linux/sframe.h
index 5b7341b61a7c..8ae31ed36226 100644
--- a/include/linux/sframe.h
+++ b/include/linux/sframe.h
@@ -28,6 +28,7 @@ struct sframe_section {
unsigned long fres_start;
unsigned long fres_end;
unsigned int num_fdes;
+ bool fdes_sorted;
signed char ra_off;
signed char fp_off;
diff --git a/kernel/unwind/sframe.c b/kernel/unwind/sframe.c
index 430bff9533ee..dcf4deb378dc 100644
--- a/kernel/unwind/sframe.c
+++ b/kernel/unwind/sframe.c
@@ -176,9 +176,35 @@ static __always_inline int __read_fde(struct sframe_section *sec,
return -EFAULT;
}
-static __always_inline int __find_fde(struct sframe_section *sec,
- unsigned long ip,
- struct sframe_fde_internal *fde)
+static __always_inline int __find_fde_unsorted(struct sframe_section *sec,
+ unsigned long ip,
+ struct sframe_fde_internal *fde)
+{
+ struct sframe_fde_v3 *cur, *start, *end;
+
+ start = (struct sframe_fde_v3 *)sec->fdes_start;
+ end = start + sec->num_fdes;
+
+ for (cur = start; cur < end; cur++) {
+ s64 func_off;
+ u32 func_size;
+ unsigned long func_addr;
+
+ DATA_GET(sec, func_off, &cur->func_start_off, s64, Efault);
+ DATA_GET(sec, func_size, &cur->func_size, u32, Efault);
+ func_addr = (unsigned long)cur + func_off;
+
+ if (ip >= func_addr && ip < func_addr + func_size)
+ return __read_fde(sec, cur - start, fde);
+ }
+ return -EINVAL;
+Efault:
+ return -EFAULT;
+}
+
+static __always_inline int __find_fde_sorted(struct sframe_section *sec,
+ unsigned long ip,
+ struct sframe_fde_internal *fde)
{
unsigned long func_addr_low = 0, func_addr_high = ULONG_MAX;
struct sframe_fde_v3 *first, *low, *high, *found = NULL;
@@ -233,6 +259,15 @@ static __always_inline int __find_fde(struct sframe_section *sec,
return -EFAULT;
}
+static __always_inline int __find_fde(struct sframe_section *sec,
+ unsigned long ip,
+ struct sframe_fde_internal *fde)
+{
+ if (sec->fdes_sorted)
+ return __find_fde_sorted(sec, ip, fde);
+ return __find_fde_unsorted(sec, ip, fde);
+}
+
#define ____GET_INC(sec, to, from, type, label) \
({ \
type __to; \
@@ -657,7 +692,7 @@ static int sframe_validate_section(struct sframe_section *sec)
return ret;
ip = fde.func_addr;
- if (ip <= prev_ip) {
+ if (sec->fdes_sorted && ip <= prev_ip) {
dbg_sec("fde %u not sorted\n", i);
return -EFAULT;
}
@@ -736,7 +771,6 @@ static int sframe_read_header(struct sframe_section *sec)
if (shdr.preamble.magic != SFRAME_MAGIC ||
shdr.preamble.version != SFRAME_VERSION_3 ||
- !(shdr.preamble.flags & SFRAME_F_FDE_SORTED) ||
!(shdr.preamble.flags & SFRAME_F_FDE_FUNC_START_PCREL) ||
shdr.auxhdr_len) {
dbg_sec("bad/unsupported sframe header\n");
@@ -766,6 +800,7 @@ static int sframe_read_header(struct sframe_section *sec)
return -EINVAL;
}
+ sec->fdes_sorted = shdr.preamble.flags & SFRAME_F_FDE_SORTED;
sec->num_fdes = num_fdes;
sec->fdes_start = fdes_start;
sec->fres_start = fres_start;
--
2.54.0.545.g6539524ca2-goog
^ permalink raw reply related [flat|nested] 17+ messages in thread* Re: [PATCH v5 5/8] sframe: Allow unsorted FDEs
2026-04-28 18:36 ` [PATCH v5 5/8] sframe: Allow unsorted FDEs Dylan Hatch
@ 2026-04-30 10:04 ` Jens Remus
0 siblings, 0 replies; 17+ messages in thread
From: Jens Remus @ 2026-04-30 10:04 UTC (permalink / raw)
To: Dylan Hatch, Roman Gushchin, Weinan Liu, Will Deacon,
Josh Poimboeuf, Indu Bhagat, Peter Zijlstra, Steven Rostedt,
Catalin Marinas, Jiri Kosina
Cc: Mark Rutland, Prasanna Kumar T S M, Puranjay Mohan, Song Liu,
joe.lawrence, linux-toolchains, linux-kernel, live-patching,
linux-arm-kernel, Randy Dunlap, Heiko Carstens
On 4/28/2026 8:36 PM, Dylan Hatch wrote:
> The .sframe in kernel modules is built without SFRAME_F_FDE_SORTED set.
> In order to allow sframe PC lookup in modules, add a code path to handle
> unsorted FDE tables by doing a simple linear search.
>
> Reviewed-by: Jens Remus <jremus@linux.ibm.com>
> Signed-off-by: Dylan Hatch <dylanbhatch@google.com>
Indu suggested that it would be preferable if a module's .sframe FDE
index table could be sorted during loading of the module to enable
binary search instead of having to resort to linear search. I propose
to drop everything from this patch except for the following, squash
it into the following patch that adds sframe support for modules, and
extend that to sort the .sframe FDE index table. See my separate
feedback to that patch.
> diff --git a/include/linux/sframe.h b/include/linux/sframe.h
> @@ -28,6 +28,7 @@ struct sframe_section {
> unsigned long fres_start;
> unsigned long fres_end;
> unsigned int num_fdes;
> + bool fdes_sorted;
>
> signed char ra_off;
> signed char fp_off;
> diff --git a/kernel/unwind/sframe.c b/kernel/unwind/sframe.c
> @@ -736,7 +771,6 @@ static int sframe_read_header(struct sframe_section *sec)
>
> if (shdr.preamble.magic != SFRAME_MAGIC ||
> shdr.preamble.version != SFRAME_VERSION_3 ||
> - !(shdr.preamble.flags & SFRAME_F_FDE_SORTED) ||
> !(shdr.preamble.flags & SFRAME_F_FDE_FUNC_START_PCREL) ||
> shdr.auxhdr_len) {
> dbg_sec("bad/unsupported sframe header\n");
> @@ -766,6 +800,7 @@ static int sframe_read_header(struct sframe_section *sec)
> return -EINVAL;
> }
>
> + sec->fdes_sorted = shdr.preamble.flags & SFRAME_F_FDE_SORTED;
> sec->num_fdes = num_fdes;
> sec->fdes_start = fdes_start;
> sec->fres_start = fres_start;
Regards,
Jens
--
Jens Remus
Linux on Z Development (D3303)
jremus@de.ibm.com / jremus@linux.ibm.com
IBM Deutschland Research & Development GmbH; Vorsitzender des Aufsichtsrats: Wolfgang Wendt; Geschäftsführung: David Faller; Sitz der Gesellschaft: Ehningen; Registergericht: Amtsgericht Stuttgart, HRB 243294
IBM Data Privacy Statement: https://www.ibm.com/privacy/
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH v5 6/8] arm64/module, sframe: Add sframe support for modules
2026-04-28 18:36 [PATCH v5 0/8] unwind, arm64: add sframe unwinder for kernel Dylan Hatch
` (4 preceding siblings ...)
2026-04-28 18:36 ` [PATCH v5 5/8] sframe: Allow unsorted FDEs Dylan Hatch
@ 2026-04-28 18:36 ` Dylan Hatch
2026-04-30 10:04 ` Jens Remus
2026-04-28 18:36 ` [PATCH v5 7/8] sframe: Introduce in-kernel SFRAME_VALIDATION Dylan Hatch
` (3 subsequent siblings)
9 siblings, 1 reply; 17+ messages in thread
From: Dylan Hatch @ 2026-04-28 18:36 UTC (permalink / raw)
To: Roman Gushchin, Weinan Liu, Will Deacon, Josh Poimboeuf,
Indu Bhagat, Peter Zijlstra, Steven Rostedt, Catalin Marinas,
Jiri Kosina, Jens Remus
Cc: Dylan Hatch, Mark Rutland, Prasanna Kumar T S M, Puranjay Mohan,
Song Liu, joe.lawrence, linux-toolchains, linux-kernel,
live-patching, linux-arm-kernel, Randy Dunlap
Add sframe table to mod_arch_specific and support sframe PC lookups when
an .sframe section can be found on incoming modules.
Signed-off-by: Weinan Liu <wnliu@google.com>
Reviewed-by: Jens Remus <jremus@linux.ibm.com>
Signed-off-by: Dylan Hatch <dylanbhatch@google.com>
---
arch/arm64/include/asm/module.h | 6 +++++
arch/arm64/kernel/module.c | 8 +++++++
include/linux/sframe.h | 2 ++
kernel/unwind/sframe.c | 40 +++++++++++++++++++++++++++++++--
4 files changed, 54 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h
index fb9b88eebeb1..07f309c51eee 100644
--- a/arch/arm64/include/asm/module.h
+++ b/arch/arm64/include/asm/module.h
@@ -6,6 +6,7 @@
#define __ASM_MODULE_H
#include <asm-generic/module.h>
+#include <linux/sframe.h>
struct mod_plt_sec {
int plt_shndx;
@@ -17,6 +18,11 @@ struct mod_arch_specific {
struct mod_plt_sec core;
struct mod_plt_sec init;
+#ifdef CONFIG_HAVE_UNWIND_KERNEL_SFRAME
+ struct sframe_section sframe_sec;
+ bool sframe_init;
+#endif
+
/* for CONFIG_DYNAMIC_FTRACE */
struct plt_entry *ftrace_trampolines;
struct plt_entry *init_ftrace_trampolines;
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index 24adb581af0e..427f187e9531 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -18,6 +18,7 @@
#include <linux/moduleloader.h>
#include <linux/random.h>
#include <linux/scs.h>
+#include <linux/sframe.h>
#include <asm/alternative.h>
#include <asm/insn.h>
@@ -515,5 +516,12 @@ int module_finalize(const Elf_Ehdr *hdr,
}
}
+ s = find_section(hdr, sechdrs, ".sframe");
+ if (s) {
+ struct module_memory *t = &me->mem[MOD_TEXT];
+
+ sframe_module_init(me, (void *)s->sh_addr, s->sh_size,
+ t->base, t->size);
+ }
return module_init_ftrace_plt(hdr, sechdrs, me);
}
diff --git a/include/linux/sframe.h b/include/linux/sframe.h
index 8ae31ed36226..27f5a66190af 100644
--- a/include/linux/sframe.h
+++ b/include/linux/sframe.h
@@ -81,6 +81,8 @@ extern int sframe_find_kernel(unsigned long ip, struct unwind_frame *frame);
#else
static inline void __init init_sframe_table(void) {}
+static inline void sframe_module_init(struct module *mod, void *sframe, size_t sframe_size,
+ void *text, size_t text_size) {}
#endif /* CONFIG_HAVE_UNWIND_KERNEL_SFRAME */
diff --git a/kernel/unwind/sframe.c b/kernel/unwind/sframe.c
index dcf4deb378dc..70001c8e586d 100644
--- a/kernel/unwind/sframe.c
+++ b/kernel/unwind/sframe.c
@@ -980,10 +980,27 @@ void sframe_free_mm(struct mm_struct *mm)
int sframe_find_kernel(unsigned long ip, struct unwind_frame *frame)
{
- if (!frame || !sframe_init)
+ struct sframe_section *sec;
+
+ if (!frame)
return -EINVAL;
- return __sframe_find(&kernel_sfsec, ip, frame);
+ if (is_ksym_addr(ip)) {
+ if (!sframe_init)
+ return -EINVAL;
+
+ sec = &kernel_sfsec;
+ } else {
+ struct module *mod;
+
+ mod = __module_address(ip);
+ if (!mod || !mod->arch.sframe_init)
+ return -EINVAL;
+
+ sec = &mod->arch.sframe_sec;
+ }
+
+ return __sframe_find(sec, ip, frame);
}
void __init init_sframe_table(void)
@@ -1000,4 +1017,23 @@ void __init init_sframe_table(void)
sframe_init = true;
}
+void sframe_module_init(struct module *mod, void *sframe, size_t sframe_size,
+ void *text, size_t text_size)
+{
+ struct sframe_section sec;
+
+ memset(&sec, 0, sizeof(sec));
+ sec.sec_type = SFRAME_KERNEL;
+ sec.sframe_start = (unsigned long)sframe;
+ sec.sframe_end = (unsigned long)sframe + sframe_size;
+ sec.text_start = (unsigned long)text;
+ sec.text_end = (unsigned long)text + text_size;
+
+ if (WARN_ON(sframe_read_header(&sec)))
+ return;
+
+ mod->arch.sframe_sec = sec;
+ mod->arch.sframe_init = true;
+}
+
#endif /* CONFIG_HAVE_UNWIND_KERNEL_SFRAME */
--
2.54.0.545.g6539524ca2-goog
^ permalink raw reply related [flat|nested] 17+ messages in thread* Re: [PATCH v5 6/8] arm64/module, sframe: Add sframe support for modules
2026-04-28 18:36 ` [PATCH v5 6/8] arm64/module, sframe: Add sframe support for modules Dylan Hatch
@ 2026-04-30 10:04 ` Jens Remus
0 siblings, 0 replies; 17+ messages in thread
From: Jens Remus @ 2026-04-30 10:04 UTC (permalink / raw)
To: Dylan Hatch, Roman Gushchin, Weinan Liu, Will Deacon,
Josh Poimboeuf, Indu Bhagat, Peter Zijlstra, Steven Rostedt,
Catalin Marinas, Jiri Kosina
Cc: Mark Rutland, Prasanna Kumar T S M, Puranjay Mohan, Song Liu,
joe.lawrence, linux-toolchains, linux-kernel, live-patching,
linux-arm-kernel, Randy Dunlap, Heiko Carstens
On 4/28/2026 8:36 PM, Dylan Hatch wrote:
> Add sframe table to mod_arch_specific and support sframe PC lookups when
> an .sframe section can be found on incoming modules.
One small fix and a proposal to sort the module's SFrame FDE index.
> diff --git a/kernel/unwind/sframe.c b/kernel/unwind/sframe.c
A subsequent patch adds a call to sframe_validate_section(), which would
operate on the temporary struct sframe_section instance and thus fail
to use container_of() to access the struct module instance. To resolve
change as follows:
> +void sframe_module_init(struct module *mod, void *sframe, size_t sframe_size,
> + void *text, size_t text_size)
> +{
> + struct sframe_section sec;
struct sframe_section *sec = &mod->arch.sframe_sec;
It is fine to initialize the module's struct sframe_section instance as
use of the information is guarded by mod->arch.sframe_init, which is
only set if the instance has been full initialized.
> +
> + memset(&sec, 0, sizeof(sec));
Can be dropped if struct module instance got zero-initialized.
> + sec.sec_type = SFRAME_KERNEL;
> + sec.sframe_start = (unsigned long)sframe;
> + sec.sframe_end = (unsigned long)sframe + sframe_size;
> + sec.text_start = (unsigned long)text;
> + sec.text_end = (unsigned long)text + text_size;
Adjust all lines above to pointer access.
> +
> + if (WARN_ON(sframe_read_header(&sec)))
Ditto.
> + return;
> +
> + mod->arch.sframe_sec = sec;
Drop.
> + mod->arch.sframe_init = true;
> +}
Indu suggested that it would be preferable if a module's .sframe FDE
index table could be sorted during loading of the module to enable
binary search instead of having to resort to linear search. I propose
to change this patch as follows to sort the module .sframe FDE index
table in sframe_module_init(). Note that the patch assumes above
changes have been implemented. The sorting is very similar to sorting
of ORC tables in arch/x86/kernel/unwind_orc.c in unwind_module_init().
diff --git a/kernel/unwind/sframe.c b/kernel/unwind/sframe.c
--- a/kernel/unwind/sframe.c
+++ b/kernel/unwind/sframe.c
@@ -12,6 +12,7 @@
#include <linux/mm.h>
#include <linux/string_helpers.h>
#include <linux/sframe.h>
+#include <linux/sort.h>
#include <linux/unwind_types.h>
#include <asm/unwind_sframe.h>
#ifdef CONFIG_HAVE_UNWIND_KERNEL_SFRAME
@@ -1038,6 +1039,50 @@ void __init init_sframe_table(void)
sframe_init = true;
}
+static int sframe_sort_cmp_fde(const void *a, const void *b)
+{
+ const struct sframe_fde_v3 *fde_a = a, *fde_b = b;
+ unsigned long func_start_a, func_start_b;
+
+ func_start_a = (unsigned long)fde_a + fde_a->func_start_off;
+ func_start_b = (unsigned long)fde_b + fde_b->func_start_off;
+
+ return cmp_int(func_start_a, func_start_b);
+}
+
+static void sframe_sort_swap_fde(void *a, void *b, int size)
+{
+ struct sframe_fde_v3 *fde_a = a, *fde_b = b;
+ struct sframe_fde_v3 temp;
+ long delta;
+
+ /* Swap potentially unaligned FDE */
+ memcpy(&temp, fde_a, sizeof(struct sframe_fde_v3));
+ memcpy(fde_a, fde_b, sizeof(struct sframe_fde_v3));
+ memcpy(fde_b, &temp, sizeof(struct sframe_fde_v3));
+
+ /* Adjust FDE function start offset from FDE */
+ delta = (long)((unsigned long)fde_b - (unsigned long)fde_a);
+ fde_a->func_start_off += delta;
+ fde_b->func_start_off -= delta;
+}
+
+static int sframe_sort_fdes(struct sframe_section *sec)
+{
+ void *fdes = (void *)sec->fdes_start;
+ size_t num_fdes = sec->num_fdes;
+
+ if (sec->sec_type != SFRAME_KERNEL)
+ return -EINVAL;
+ if (sec->fdes_sorted)
+ return 0;
+
+ sort(fdes, num_fdes, sizeof(struct sframe_fde_v3),
+ sframe_sort_cmp_fde, sframe_sort_swap_fde);
+ sec->fdes_sorted = true;
+ return 0;
+}
+
void sframe_module_init(struct module *mod, void *sframe, size_t sframe_size,
void *text, size_t text_size)
{
@@ -1053,6 +1098,8 @@ void sframe_module_init(struct module *mod, void *sframe, size_t sframe_size,
if (WARN_ON(sframe_read_header(sec)))
return;
+ if (WARN_ON(sframe_sort_fdes(sec)))
+ return;
if (WARN_ON(sframe_validate_section(sec)))
return;
Regards,
Jens
--
Jens Remus
Linux on Z Development (D3303)
jremus@de.ibm.com / jremus@linux.ibm.com
IBM Deutschland Research & Development GmbH; Vorsitzender des Aufsichtsrats: Wolfgang Wendt; Geschäftsführung: David Faller; Sitz der Gesellschaft: Ehningen; Registergericht: Amtsgericht Stuttgart, HRB 243294
IBM Data Privacy Statement: https://www.ibm.com/privacy/
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH v5 7/8] sframe: Introduce in-kernel SFRAME_VALIDATION
2026-04-28 18:36 [PATCH v5 0/8] unwind, arm64: add sframe unwinder for kernel Dylan Hatch
` (5 preceding siblings ...)
2026-04-28 18:36 ` [PATCH v5 6/8] arm64/module, sframe: Add sframe support for modules Dylan Hatch
@ 2026-04-28 18:36 ` Dylan Hatch
2026-04-30 10:04 ` Jens Remus
2026-04-28 18:36 ` [PATCH v5 8/8] unwind: arm64: Use sframe to unwind interrupt frames Dylan Hatch
` (2 subsequent siblings)
9 siblings, 1 reply; 17+ messages in thread
From: Dylan Hatch @ 2026-04-28 18:36 UTC (permalink / raw)
To: Roman Gushchin, Weinan Liu, Will Deacon, Josh Poimboeuf,
Indu Bhagat, Peter Zijlstra, Steven Rostedt, Catalin Marinas,
Jiri Kosina, Jens Remus
Cc: Dylan Hatch, Mark Rutland, Prasanna Kumar T S M, Puranjay Mohan,
Song Liu, joe.lawrence, linux-toolchains, linux-kernel,
live-patching, linux-arm-kernel, Randy Dunlap
Generalize the __safe* helpers to support a non-user-access code path.
This requires arch-specific function address validation. This is because
arm64 vmlinux keeps .exit.text (normally discarded), and .rodata.text
sections both of which lie outside the bounds of the normal .text.
.rodata.text contains code that is never executed by the kernel mapping,
but for which the toolchain nonetheless generates sframe data, and needs
to be considered valid for a PC lookup.
Additionally .init.text lies outside .text for all arches and must be
accounted for as well.
Suggested-by: Jens Remus <jremus@linux.ibm.com>
Reviewed-by: Jens Remus <jremus@linux.ibm.com>
Signed-off-by: Dylan Hatch <dylanbhatch@google.com>
---
arch/Kconfig | 2 +-
arch/arm64/include/asm/sections.h | 1 +
arch/arm64/include/asm/unwind_sframe.h | 47 ++++++++++++++++++++++++++
arch/arm64/kernel/vmlinux.lds.S | 2 ++
include/linux/sframe.h | 2 ++
kernel/unwind/sframe.c | 25 ++++++++++++--
6 files changed, 76 insertions(+), 3 deletions(-)
diff --git a/arch/Kconfig b/arch/Kconfig
index 8d27b3249e7a..a528f5b23647 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -503,7 +503,7 @@ config HAVE_UNWIND_USER_SFRAME
config SFRAME_VALIDATION
bool "Enable .sframe section debugging"
- depends on HAVE_UNWIND_USER_SFRAME
+ depends on UNWIND_SFRAME_LOOKUP
depends on DYNAMIC_DEBUG
help
When adding an .sframe section for a task, validate the entire
diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h
index 51b0d594239e..5edb4304f661 100644
--- a/arch/arm64/include/asm/sections.h
+++ b/arch/arm64/include/asm/sections.h
@@ -23,6 +23,7 @@ extern char __irqentry_text_start[], __irqentry_text_end[];
extern char __mmuoff_data_start[], __mmuoff_data_end[];
extern char __entry_tramp_text_start[], __entry_tramp_text_end[];
extern char __relocate_new_kernel_start[], __relocate_new_kernel_end[];
+extern char _srodatatext[], _erodatatext[];
static inline size_t entry_tramp_text_size(void)
{
diff --git a/arch/arm64/include/asm/unwind_sframe.h b/arch/arm64/include/asm/unwind_sframe.h
index 876412881196..66ebe5f38bd0 100644
--- a/arch/arm64/include/asm/unwind_sframe.h
+++ b/arch/arm64/include/asm/unwind_sframe.h
@@ -2,7 +2,54 @@
#ifndef _ASM_ARM64_UNWIND_SFRAME_H
#define _ASM_ARM64_UNWIND_SFRAME_H
+#include <linux/module.h>
+#include <linux/sframe.h>
+#include <asm/sections.h>
+
#define SFRAME_REG_SP 31
#define SFRAME_REG_FP 29
+static inline bool sframe_func_start_addr_valid(struct sframe_section *sec,
+ unsigned long func_addr)
+{
+ /* Common case for unwinding */
+ if (sec->text_start <= func_addr && func_addr < sec->text_end)
+ return true;
+
+ if (sec->sec_type != SFRAME_KERNEL)
+ return false;
+
+ /*
+ * Account for vmlinux and module code outside the normal .text section.
+ * The toolchain still generates sframe data for these functions, so
+ * sframe lookups on them should be allowed.
+ */
+ if (sec == &kernel_sfsec) {
+ if (is_kernel_inittext(func_addr))
+ return true;
+
+ /* .exit.text is retained in vmlinux on arm64. */
+ if (func_addr >= (unsigned long)__exittext_begin &&
+ func_addr < (unsigned long)__exittext_end)
+ return true;
+
+
+ /*
+ * .rodata.text is never executed from the kernel mapping, but
+ * still has sframe data
+ */
+ if (func_addr >= (unsigned long)_srodatatext &&
+ func_addr < (unsigned long)_erodatatext)
+ return true;
+ } else {
+ struct module *mod = container_of(sec, struct module,
+ arch.sframe_sec);
+ if (within_module_mem_type(func_addr, mod, MOD_INIT_TEXT))
+ return true;
+ }
+
+ return false;
+}
+#define sframe_func_start_addr_valid sframe_func_start_addr_valid
+
#endif /* _ASM_ARM64_UNWIND_SFRAME_H */
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 2964aad0362e..8c2dae6e7a86 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -213,12 +213,14 @@ SECTIONS
/* code sections that are never executed via the kernel mapping */
.rodata.text : {
+ _srodatatext = .;
TRAMP_TEXT
HIBERNATE_TEXT
KEXEC_TEXT
IDMAP_TEXT
. = ALIGN(PAGE_SIZE);
}
+ _erodatatext = .;
idmap_pg_dir = .;
. += PAGE_SIZE;
diff --git a/include/linux/sframe.h b/include/linux/sframe.h
index 27f5a66190af..ac3aa9db7d91 100644
--- a/include/linux/sframe.h
+++ b/include/linux/sframe.h
@@ -34,6 +34,8 @@ struct sframe_section {
signed char fp_off;
};
+extern struct sframe_section kernel_sfsec __ro_after_init;
+
#endif /* CONFIG_UNWIND_SFRAME_LOOKUP */
#ifdef CONFIG_HAVE_UNWIND_USER_SFRAME
diff --git a/kernel/unwind/sframe.c b/kernel/unwind/sframe.c
index 70001c8e586d..99c2a39c51ce 100644
--- a/kernel/unwind/sframe.c
+++ b/kernel/unwind/sframe.c
@@ -21,10 +21,18 @@
#include "sframe.h"
#include "sframe_debug.h"
+#ifndef sframe_func_start_addr_valid
+static inline bool sframe_func_start_addr_valid(struct sframe_section *sec,
+ unsigned long func_addr)
+{
+ return (sec->text_start <= func_addr && func_addr < sec->text_end);
+}
+#endif
+
#ifdef CONFIG_HAVE_UNWIND_KERNEL_SFRAME
static bool sframe_init __ro_after_init;
-static struct sframe_section kernel_sfsec __ro_after_init;
+struct sframe_section kernel_sfsec __ro_after_init;
#endif /* CONFIG_HAVE_UNWIND_KERNEL_SFRAME */
@@ -152,7 +160,7 @@ static __always_inline int __read_fde(struct sframe_section *sec,
sizeof(struct sframe_fde_v3), Efault);
func_addr = fde_addr + _fde.func_start_off;
- if (func_addr < sec->text_start || func_addr > sec->text_end)
+ if (!sframe_func_start_addr_valid(sec, func_addr))
return -EINVAL;
fda_addr = sec->fres_start + _fde.fres_off;
@@ -636,6 +644,9 @@ static int safe_read_fde(struct sframe_section *sec,
{
int ret;
+ if (sec->sec_type == SFRAME_KERNEL)
+ return __read_fde(sec, fde_num, fde);
+
if (!user_read_access_begin((void __user *)sec->sframe_start,
sec->sframe_end - sec->sframe_start))
return -EFAULT;
@@ -651,6 +662,9 @@ static int safe_read_fre(struct sframe_section *sec,
{
int ret;
+ if (sec->sec_type == SFRAME_KERNEL)
+ return __read_fre(sec, fde, fre_addr, fre);
+
if (!user_read_access_begin((void __user *)sec->sframe_start,
sec->sframe_end - sec->sframe_start))
return -EFAULT;
@@ -665,6 +679,9 @@ static int safe_read_fre_datawords(struct sframe_section *sec,
{
int ret;
+ if (sec->sec_type == SFRAME_KERNEL)
+ return __read_fre_datawords(sec, fde, fre);
+
if (!user_read_access_begin((void __user *)sec->sframe_start,
sec->sframe_end - sec->sframe_start))
return -EFAULT;
@@ -1013,6 +1030,8 @@ void __init init_sframe_table(void)
if (WARN_ON(sframe_read_header(&kernel_sfsec)))
return;
+ if (WARN_ON(sframe_validate_section(&kernel_sfsec)))
+ return;
sframe_init = true;
}
@@ -1031,6 +1050,8 @@ void sframe_module_init(struct module *mod, void *sframe, size_t sframe_size,
if (WARN_ON(sframe_read_header(&sec)))
return;
+ if (WARN_ON(sframe_validate_section(&sec)))
+ return;
mod->arch.sframe_sec = sec;
mod->arch.sframe_init = true;
--
2.54.0.545.g6539524ca2-goog
^ permalink raw reply related [flat|nested] 17+ messages in thread* Re: [PATCH v5 7/8] sframe: Introduce in-kernel SFRAME_VALIDATION
2026-04-28 18:36 ` [PATCH v5 7/8] sframe: Introduce in-kernel SFRAME_VALIDATION Dylan Hatch
@ 2026-04-30 10:04 ` Jens Remus
0 siblings, 0 replies; 17+ messages in thread
From: Jens Remus @ 2026-04-30 10:04 UTC (permalink / raw)
To: Dylan Hatch, Roman Gushchin, Weinan Liu, Will Deacon,
Josh Poimboeuf, Indu Bhagat, Peter Zijlstra, Steven Rostedt,
Catalin Marinas, Jiri Kosina
Cc: Mark Rutland, Prasanna Kumar T S M, Puranjay Mohan, Song Liu,
joe.lawrence, linux-toolchains, linux-kernel, live-patching,
linux-arm-kernel, Randy Dunlap, Heiko Carstens
On 4/28/2026 8:36 PM, Dylan Hatch wrote:
> Generalize the __safe* helpers to support a non-user-access code path.
>
> This requires arch-specific function address validation. This is because
> arm64 vmlinux keeps .exit.text (normally discarded), and .rodata.text
> sections both of which lie outside the bounds of the normal .text.
> .rodata.text contains code that is never executed by the kernel mapping,
> but for which the toolchain nonetheless generates sframe data, and needs
> to be considered valid for a PC lookup.
>
> Additionally .init.text lies outside .text for all arches and must be
> accounted for as well.
> diff --git a/arch/arm64/include/asm/unwind_sframe.h b/arch/arm64/include/asm/unwind_sframe.h
> @@ -2,7 +2,54 @@
> #ifndef _ASM_ARM64_UNWIND_SFRAME_H
> #define _ASM_ARM64_UNWIND_SFRAME_H
>
> +#include <linux/module.h>
> +#include <linux/sframe.h>
> +#include <asm/sections.h>
> +
> #define SFRAME_REG_SP 31
> #define SFRAME_REG_FP 29
>
> +static inline bool sframe_func_start_addr_valid(struct sframe_section *sec,
> + unsigned long func_addr)
> +{
> + /* Common case for unwinding */
> + if (sec->text_start <= func_addr && func_addr < sec->text_end)
> + return true;
> +
> + if (sec->sec_type != SFRAME_KERNEL)
> + return false;
> +
> + /*
> + * Account for vmlinux and module code outside the normal .text section.
> + * The toolchain still generates sframe data for these functions, so
> + * sframe lookups on them should be allowed.
> + */
> + if (sec == &kernel_sfsec) {
> + if (is_kernel_inittext(func_addr))
> + return true;
> +
> + /* .exit.text is retained in vmlinux on arm64. */
> + if (func_addr >= (unsigned long)__exittext_begin &&
> + func_addr < (unsigned long)__exittext_end)
> + return true;
> +
> +
Nit: Superfluous empty line (2 instead of 1).
> + /*
> + * .rodata.text is never executed from the kernel mapping, but
> + * still has sframe data
> + */
> + if (func_addr >= (unsigned long)_srodatatext &&
> + func_addr < (unsigned long)_erodatatext)
> + return true;
> + } else {
> + struct module *mod = container_of(sec, struct module,
> + arch.sframe_sec);
This currently does not work properly when sframe_validate_section() is
called from sframe_module_init(), which operates on a temporary struct
sframe_section section, that is not (yet) the one in struct module. See
my feedback to the respective patch for how to resolve.
> + if (within_module_mem_type(func_addr, mod, MOD_INIT_TEXT))
> + return true;
> + }
> +
> + return false;
> +}
> +#define sframe_func_start_addr_valid sframe_func_start_addr_valid
> +
> #endif /* _ASM_ARM64_UNWIND_SFRAME_H */
Regards,
Jens
--
Jens Remus
Linux on Z Development (D3303)
jremus@de.ibm.com / jremus@linux.ibm.com
IBM Deutschland Research & Development GmbH; Vorsitzender des Aufsichtsrats: Wolfgang Wendt; Geschäftsführung: David Faller; Sitz der Gesellschaft: Ehningen; Registergericht: Amtsgericht Stuttgart, HRB 243294
IBM Data Privacy Statement: https://www.ibm.com/privacy/
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH v5 8/8] unwind: arm64: Use sframe to unwind interrupt frames
2026-04-28 18:36 [PATCH v5 0/8] unwind, arm64: add sframe unwinder for kernel Dylan Hatch
` (6 preceding siblings ...)
2026-04-28 18:36 ` [PATCH v5 7/8] sframe: Introduce in-kernel SFRAME_VALIDATION Dylan Hatch
@ 2026-04-28 18:36 ` Dylan Hatch
2026-05-01 16:46 ` Mark Rutland
2026-04-29 17:18 ` [PATCH v5 0/8] unwind, arm64: add sframe unwinder for kernel Mark Rutland
2026-04-30 10:11 ` Jens Remus
9 siblings, 1 reply; 17+ messages in thread
From: Dylan Hatch @ 2026-04-28 18:36 UTC (permalink / raw)
To: Roman Gushchin, Weinan Liu, Will Deacon, Josh Poimboeuf,
Indu Bhagat, Peter Zijlstra, Steven Rostedt, Catalin Marinas,
Jiri Kosina, Jens Remus
Cc: Dylan Hatch, Mark Rutland, Prasanna Kumar T S M, Puranjay Mohan,
Song Liu, joe.lawrence, linux-toolchains, linux-kernel,
live-patching, linux-arm-kernel, Randy Dunlap
Add unwind_next_frame_sframe() function to unwind by sframe info if
present. Use this method at exception boundaries, falling back to
frame-pointer unwind only on failure. In such failure cases, the
stacktrace is considered unreliable.
During normal unwind, prefer frame pointer unwind (for better
performance) with sframe as a backup.
This change restores the LR behavior originally introduced in commit
c2c6b27b5aa14fa2 ("arm64: stacktrace: unwind exception boundaries"),
But later removed in commit 32ed1205682e ("arm64: stacktrace: Skip
reporting LR at exception boundaries")
This can be done because the sframe data can be used to determine
whether the LR is current for the PC value recovered from pt_regs at the
exception boundary.
Signed-off-by: Weinan Liu <wnliu@google.com>
Reviewed-by: Prasanna Kumar T S M <ptsm@linux.microsoft.com>
Reviewed-by: Jens Remus <jremus@linux.ibm.com>
Signed-off-by: Dylan Hatch <dylanbhatch@google.com>
---
arch/arm64/include/asm/stacktrace/common.h | 6 +
arch/arm64/kernel/stacktrace.c | 246 +++++++++++++++++++--
2 files changed, 232 insertions(+), 20 deletions(-)
diff --git a/arch/arm64/include/asm/stacktrace/common.h b/arch/arm64/include/asm/stacktrace/common.h
index 821a8fdd31af..4df68181e1b5 100644
--- a/arch/arm64/include/asm/stacktrace/common.h
+++ b/arch/arm64/include/asm/stacktrace/common.h
@@ -21,6 +21,8 @@ struct stack_info {
*
* @fp: The fp value in the frame record (or the real fp)
* @pc: The lr value in the frame record (or the real lr)
+ * @sp: The sp value at the call site of the current function.
+ * @unreliable: Stacktrace is unreliable.
*
* @stack: The stack currently being unwound.
* @stacks: An array of stacks which can be unwound.
@@ -29,7 +31,11 @@ struct stack_info {
struct unwind_state {
unsigned long fp;
unsigned long pc;
+#ifdef CONFIG_HAVE_UNWIND_KERNEL_SFRAME
+ unsigned long sp;
+#endif
+ bool unreliable;
struct stack_info stack;
struct stack_info *stacks;
int nr_stacks;
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 3ebcf8c53fb0..c935323f393b 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -14,6 +14,7 @@
#include <linux/sched/debug.h>
#include <linux/sched/task_stack.h>
#include <linux/stacktrace.h>
+#include <linux/sframe.h>
#include <asm/efi.h>
#include <asm/irq.h>
@@ -26,6 +27,7 @@ enum kunwind_source {
KUNWIND_SOURCE_CALLER,
KUNWIND_SOURCE_TASK,
KUNWIND_SOURCE_REGS_PC,
+ KUNWIND_SOURCE_REGS_LR,
};
union unwind_flags {
@@ -85,6 +87,9 @@ kunwind_init_from_regs(struct kunwind_state *state,
state->regs = regs;
state->common.fp = regs->regs[29];
state->common.pc = regs->pc;
+#ifdef CONFIG_HAVE_UNWIND_KERNEL_SFRAME
+ state->common.sp = regs->sp;
+#endif
state->source = KUNWIND_SOURCE_REGS_PC;
}
@@ -103,6 +108,9 @@ kunwind_init_from_caller(struct kunwind_state *state)
state->common.fp = (unsigned long)__builtin_frame_address(1);
state->common.pc = (unsigned long)__builtin_return_address(0);
+#ifdef CONFIG_HAVE_UNWIND_KERNEL_SFRAME
+ state->common.sp = (unsigned long)__builtin_frame_address(0);
+#endif
state->source = KUNWIND_SOURCE_CALLER;
}
@@ -124,6 +132,9 @@ kunwind_init_from_task(struct kunwind_state *state,
state->common.fp = thread_saved_fp(task);
state->common.pc = thread_saved_pc(task);
+#ifdef CONFIG_HAVE_UNWIND_KERNEL_SFRAME
+ state->common.sp = thread_saved_sp(task);
+#endif
state->source = KUNWIND_SOURCE_TASK;
}
@@ -181,7 +192,6 @@ int kunwind_next_regs_pc(struct kunwind_state *state)
state->regs = regs;
state->common.pc = regs->pc;
state->common.fp = regs->regs[29];
- state->regs = NULL;
state->source = KUNWIND_SOURCE_REGS_PC;
return 0;
}
@@ -237,6 +247,9 @@ kunwind_next_frame_record(struct kunwind_state *state)
unwind_consume_stack(&state->common, info, fp, sizeof(*record));
+#ifdef CONFIG_HAVE_UNWIND_KERNEL_SFRAME
+ state->common.sp = state->common.fp;
+#endif
state->common.fp = new_fp;
state->common.pc = new_pc;
state->source = KUNWIND_SOURCE_FRAME;
@@ -244,6 +257,176 @@ kunwind_next_frame_record(struct kunwind_state *state)
return 0;
}
+#ifdef CONFIG_HAVE_UNWIND_KERNEL_SFRAME
+
+static __always_inline struct stack_info *
+get_word(struct unwind_state *state, unsigned long *word)
+{
+ unsigned long addr = *word;
+ struct stack_info *info;
+
+ info = unwind_find_stack(state, addr, sizeof(addr));
+ if (!info)
+ return info;
+
+ *word = READ_ONCE(*(unsigned long *)addr);
+
+ return info;
+}
+
+static __always_inline int
+get_consume_word(struct unwind_state *state, unsigned long *word)
+{
+ struct stack_info *info;
+ unsigned long addr = *word;
+
+ info = get_word(state, word);
+ if (!info)
+ return -EINVAL;
+
+ unwind_consume_stack(state, info, addr, sizeof(addr));
+ return 0;
+}
+
+/*
+ * Unwind to the next frame according to sframe.
+ */
+static __always_inline int
+unwind_next_frame_sframe(struct kunwind_state *state)
+{
+ struct unwind_frame frame;
+ unsigned long cfa, fp, ra;
+ enum kunwind_source source = KUNWIND_SOURCE_FRAME;
+ struct pt_regs *regs = state->regs;
+
+ int err;
+
+ /* FP/SP alignment 8 bytes */
+ if (state->common.fp & 0x7 || state->common.sp & 0x7)
+ return -EINVAL;
+
+ /*
+ * Most/all outermost functions are not visible to sframe. So, check for
+ * a meta frame record if the sframe lookup fails.
+ */
+ err = sframe_find_kernel(state->common.pc, &frame);
+ if (err)
+ return kunwind_next_frame_record_meta(state);
+
+ if (frame.outermost)
+ return -ENOENT;
+
+ /* Get the Canonical Frame Address (CFA) */
+ switch (frame.cfa.rule) {
+ case UNWIND_CFA_RULE_SP_OFFSET:
+ cfa = state->common.sp;
+ break;
+ case UNWIND_CFA_RULE_FP_OFFSET:
+ if (state->common.fp < state->common.sp)
+ return -EINVAL;
+ cfa = state->common.fp;
+ break;
+ case UNWIND_CFA_RULE_REG_OFFSET:
+ case UNWIND_CFA_RULE_REG_OFFSET_DEREF:
+ /* regs only available in topmost/interrupt frame */
+ if (!regs || frame.cfa.regnum > 30)
+ return -EINVAL;
+ cfa = regs->regs[frame.cfa.regnum];
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ return -EINVAL;
+ }
+ cfa += frame.cfa.offset;
+
+ /*
+ * CFA typically points to a higher address than RA or FP, so don't
+ * consume from the stack when we read it.
+ */
+ if (frame.cfa.rule & UNWIND_RULE_DEREF &&
+ !get_word(&state->common, &cfa))
+ return -EINVAL;
+
+ /* CFA alignment 8 bytes */
+ if (cfa & 0x7)
+ return -EINVAL;
+
+ /* Get the Return Address (RA) */
+ switch (frame.ra.rule) {
+ case UNWIND_RULE_RETAIN:
+ /* regs only available in topmost/interrupt frame */
+ if (!regs)
+ return -EINVAL;
+ ra = regs->regs[30];
+ source = KUNWIND_SOURCE_REGS_LR;
+ break;
+ /* UNWIND_USER_RULE_CFA_OFFSET not implemented on purpose */
+ case UNWIND_RULE_CFA_OFFSET_DEREF:
+ ra = cfa + frame.ra.offset;
+ break;
+ case UNWIND_RULE_REG_OFFSET:
+ case UNWIND_RULE_REG_OFFSET_DEREF:
+ /* regs only available in topmost/interrupt frame */
+ if (!regs)
+ return -EINVAL;
+ ra = regs->regs[frame.cfa.regnum];
+ ra += frame.ra.offset;
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ return -EINVAL;
+ }
+
+ /* Get the Frame Pointer (FP) */
+ switch (frame.fp.rule) {
+ case UNWIND_RULE_RETAIN:
+ fp = state->common.fp;
+ break;
+ /* UNWIND_USER_RULE_CFA_OFFSET not implemented on purpose */
+ case UNWIND_RULE_CFA_OFFSET_DEREF:
+ fp = cfa + frame.fp.offset;
+ break;
+ case UNWIND_RULE_REG_OFFSET:
+ case UNWIND_RULE_REG_OFFSET_DEREF:
+ /* regs only available in topmost/interrupt frame */
+ if (!regs)
+ return -EINVAL;
+ fp = regs->regs[frame.fp.regnum];
+ fp += frame.fp.offset;
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ return -EINVAL;
+ }
+
+ /*
+ * Consume RA and FP from the stack. The frame record puts FP at a lower
+ * address than RA, so we always read FP first.
+ */
+ if (frame.fp.rule & UNWIND_RULE_DEREF &&
+ !get_word(&state->common, &fp))
+ return -EINVAL;
+
+ if (frame.ra.rule & UNWIND_RULE_DEREF &&
+ get_consume_word(&state->common, &ra))
+ return -EINVAL;
+
+ state->common.pc = ra;
+ state->common.sp = cfa;
+ state->common.fp = fp;
+
+ state->source = source;
+
+ return 0;
+}
+
+#else /* !CONFIG_HAVE_UNWIND_KERNEL_SFRAME */
+
+static __always_inline int
+unwind_next_frame_sframe(struct kunwind_state *state) { return -EINVAL; }
+
+#endif /* !CONFIG_HAVE_UNWIND_KERNEL_SFRAME*/
+
/*
* Unwind from one frame record (A) to the next frame record (B).
*
@@ -259,12 +442,25 @@ kunwind_next(struct kunwind_state *state)
state->flags.all = 0;
switch (state->source) {
+ case KUNWIND_SOURCE_REGS_PC:
+ err = unwind_next_frame_sframe(state);
+
+ if (err && err != -ENOENT) {
+ /* Fallback to FP based unwinder */
+ err = kunwind_next_frame_record(state);
+ state->common.unreliable = true;
+ }
+ state->regs = NULL;
+ break;
case KUNWIND_SOURCE_FRAME:
case KUNWIND_SOURCE_CALLER:
case KUNWIND_SOURCE_TASK:
- case KUNWIND_SOURCE_REGS_PC:
+ case KUNWIND_SOURCE_REGS_LR:
err = kunwind_next_frame_record(state);
+ if (err && err != -ENOENT)
+ err = unwind_next_frame_sframe(state);
break;
+
default:
err = -EINVAL;
}
@@ -350,6 +546,9 @@ kunwind_stack_walk(kunwind_consume_fn consume_state,
.common = {
.stacks = stacks,
.nr_stacks = ARRAY_SIZE(stacks),
+#ifdef CONFIG_HAVE_UNWIND_KERNEL_SFRAME
+ .sp = 0,
+#endif
},
};
@@ -390,34 +589,40 @@ noinline noinstr void arch_stack_walk(stack_trace_consume_fn consume_entry,
kunwind_stack_walk(arch_kunwind_consume_entry, &data, task, regs);
}
+struct kunwind_reliable_consume_entry_data {
+ stack_trace_consume_fn consume_entry;
+ void *cookie;
+ bool unreliable;
+};
+
static __always_inline bool
-arch_reliable_kunwind_consume_entry(const struct kunwind_state *state, void *cookie)
+arch_kunwind_reliable_consume_entry(const struct kunwind_state *state, void *cookie)
{
- /*
- * At an exception boundary we can reliably consume the saved PC. We do
- * not know whether the LR was live when the exception was taken, and
- * so we cannot perform the next unwind step reliably.
- *
- * All that matters is whether the *entire* unwind is reliable, so give
- * up as soon as we hit an exception boundary.
- */
- if (state->source == KUNWIND_SOURCE_REGS_PC)
- return false;
+ struct kunwind_reliable_consume_entry_data *data = cookie;
- return arch_kunwind_consume_entry(state, cookie);
+ if (state->common.unreliable) {
+ data->unreliable = true;
+ return false;
+ }
+ return data->consume_entry(data->cookie, state->common.pc);
}
-noinline noinstr int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
- void *cookie,
- struct task_struct *task)
+noinline notrace int arch_stack_walk_reliable(
+ stack_trace_consume_fn consume_entry,
+ void *cookie, struct task_struct *task)
{
- struct kunwind_consume_entry_data data = {
+ struct kunwind_reliable_consume_entry_data data = {
.consume_entry = consume_entry,
.cookie = cookie,
+ .unreliable = false,
};
- return kunwind_stack_walk(arch_reliable_kunwind_consume_entry, &data,
- task, NULL);
+ kunwind_stack_walk(arch_kunwind_reliable_consume_entry, &data, task, NULL);
+
+ if (data.unreliable)
+ return -EINVAL;
+
+ return 0;
}
struct bpf_unwind_consume_entry_data {
@@ -452,6 +657,7 @@ static const char *state_source_string(const struct kunwind_state *state)
case KUNWIND_SOURCE_CALLER: return "C";
case KUNWIND_SOURCE_TASK: return "T";
case KUNWIND_SOURCE_REGS_PC: return "P";
+ case KUNWIND_SOURCE_REGS_LR: return "L";
default: return "U";
}
}
--
2.54.0.545.g6539524ca2-goog
^ permalink raw reply related [flat|nested] 17+ messages in thread* Re: [PATCH v5 8/8] unwind: arm64: Use sframe to unwind interrupt frames
2026-04-28 18:36 ` [PATCH v5 8/8] unwind: arm64: Use sframe to unwind interrupt frames Dylan Hatch
@ 2026-05-01 16:46 ` Mark Rutland
2026-05-04 8:47 ` Jens Remus
0 siblings, 1 reply; 17+ messages in thread
From: Mark Rutland @ 2026-05-01 16:46 UTC (permalink / raw)
To: Dylan Hatch
Cc: Roman Gushchin, Weinan Liu, Will Deacon, Josh Poimboeuf,
Indu Bhagat, Peter Zijlstra, Steven Rostedt, Catalin Marinas,
Jiri Kosina, Jens Remus, Prasanna Kumar T S M, Puranjay Mohan,
Song Liu, joe.lawrence, linux-toolchains, linux-kernel,
live-patching, linux-arm-kernel, Randy Dunlap
Hi Dylan,
Thanks for putting this together. I think this is looking pretty good.
However, there are some things that aren't quite right and need some
work, which I've commented on below.
More generally, there are a few things that aren't addressed by this
series that we will also need to address. Importantly:
(1) For correctness, we'll need to address a latent issue with unwinding
across an fgraph return trampoline, where the return address is
transiently unrecoverable.
Before this series, that doesn't matter for livepatching because the
livepatching code isn't called synchronously within the fgraph
handler, and unwinds which cross an exception boundary are marked as
unreliable.
After this series, that does matter as we can unwind across an
exception boundary, and might happen to interrupt that transient
window.
I think we can solve that with some restructuring of that code,
restoring the original address *before* removing that from the
fgraph return stack, and ensuring that the unwinder can find it.
I'm not immediately sure whether kretprobes has a similar issue.
(2) To make unwinding generally possible, we'll need to annotate some
assembly functions as unwindable. We'll need to do that for string
routines under lib/, and probably some crypto code, but we don't
need to do that for most code in head.S, entry.S, etc.
The vast majority of relevant assembly functions are leaf functions
(where the return address is never moved out of the LR), so we can
probably get away with a simple annotation for those that avoids the
need for open-coded CFI directives everywhere.
I've pushed some reliable stacktrace tests to:
git://git.kernel.org/pub/scm/linux/kernel/git/mark/linux.git stacktrace/tests
That finds the fgraph issue (regardless of this series). When merged
with this series triggers a warning in kunwind_next_frame_record_meta(),
where unwind_next_frame_sframe() calls that erroneously as a fallback.
As noted below, I think that fallback path should be removed, and
unwind_next_frame_sframe() should return an error in that case.
On Tue, Apr 28, 2026 at 06:36:43PM +0000, Dylan Hatch wrote:
> Add unwind_next_frame_sframe() function to unwind by sframe info if
> present. Use this method at exception boundaries, falling back to
> frame-pointer unwind only on failure. In such failure cases, the
> stacktrace is considered unreliable.
>
> During normal unwind, prefer frame pointer unwind (for better
> performance) with sframe as a backup.
We should certainly use SFrame at an exception boundary. However, when
frame point unwind fails I do not think we should use it as a backup.
That only fails when something is already wrong, and an SFrame unwind
isn't necessarily going to be better. I think we should immediately fail
in those cases.
> This change restores the LR behavior originally introduced in commit
> c2c6b27b5aa14fa2 ("arm64: stacktrace: unwind exception boundaries"),
> But later removed in commit 32ed1205682e ("arm64: stacktrace: Skip
> reporting LR at exception boundaries")
>
> This can be done because the sframe data can be used to determine
> whether the LR is current for the PC value recovered from pt_regs at the
> exception boundary.
>
> Signed-off-by: Weinan Liu <wnliu@google.com>
> Reviewed-by: Prasanna Kumar T S M <ptsm@linux.microsoft.com>
> Reviewed-by: Jens Remus <jremus@linux.ibm.com>
> Signed-off-by: Dylan Hatch <dylanbhatch@google.com>
> ---
> arch/arm64/include/asm/stacktrace/common.h | 6 +
> arch/arm64/kernel/stacktrace.c | 246 +++++++++++++++++++--
> 2 files changed, 232 insertions(+), 20 deletions(-)
>
> diff --git a/arch/arm64/include/asm/stacktrace/common.h b/arch/arm64/include/asm/stacktrace/common.h
> index 821a8fdd31af..4df68181e1b5 100644
> --- a/arch/arm64/include/asm/stacktrace/common.h
> +++ b/arch/arm64/include/asm/stacktrace/common.h
> @@ -21,6 +21,8 @@ struct stack_info {
> *
> * @fp: The fp value in the frame record (or the real fp)
> * @pc: The lr value in the frame record (or the real lr)
> + * @sp: The sp value at the call site of the current function.
> + * @unreliable: Stacktrace is unreliable.
> *
> * @stack: The stack currently being unwound.
> * @stacks: An array of stacks which can be unwound.
> @@ -29,7 +31,11 @@ struct stack_info {
> struct unwind_state {
> unsigned long fp;
> unsigned long pc;
> +#ifdef CONFIG_HAVE_UNWIND_KERNEL_SFRAME
> + unsigned long sp;
> +#endif
As this is only used by the kernel unwinder (and not the hyp unwinder),
this should live in struct kunwind_state in stacktrace.c.
That said, for unwinding across exception boundaries we should not need
this, as the SP value will be in the pt_regs. If we only use SFrame for
the exception boundary case, we can remove this entirely. I would
strongly prefer that we do that.
> + bool unreliable;
Likewise, this should live in struct kunwind_state.
> struct stack_info stack;
> struct stack_info *stacks;
> int nr_stacks;
> diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
> index 3ebcf8c53fb0..c935323f393b 100644
> --- a/arch/arm64/kernel/stacktrace.c
> +++ b/arch/arm64/kernel/stacktrace.c
> @@ -14,6 +14,7 @@
> #include <linux/sched/debug.h>
> #include <linux/sched/task_stack.h>
> #include <linux/stacktrace.h>
> +#include <linux/sframe.h>
Nit: these are supposed to be ordered alphabetically, so the include of
<linux/sframe.h> should be just before <linux/stacktrace.h>.
>
> #include <asm/efi.h>
> #include <asm/irq.h>
> @@ -26,6 +27,7 @@ enum kunwind_source {
> KUNWIND_SOURCE_CALLER,
> KUNWIND_SOURCE_TASK,
> KUNWIND_SOURCE_REGS_PC,
> + KUNWIND_SOURCE_REGS_LR,
> };
>
> union unwind_flags {
> @@ -85,6 +87,9 @@ kunwind_init_from_regs(struct kunwind_state *state,
> state->regs = regs;
> state->common.fp = regs->regs[29];
> state->common.pc = regs->pc;
> +#ifdef CONFIG_HAVE_UNWIND_KERNEL_SFRAME
> + state->common.sp = regs->sp;
> +#endif
As above, I don't think we need to stash the SP, as it only matters when
performing the next unwind from the KUNWIND_SOURCE_REGS_PC state, and in
that state we have the regs.
> state->source = KUNWIND_SOURCE_REGS_PC;
> }
>
> @@ -103,6 +108,9 @@ kunwind_init_from_caller(struct kunwind_state *state)
>
> state->common.fp = (unsigned long)__builtin_frame_address(1);
> state->common.pc = (unsigned long)__builtin_return_address(0);
> +#ifdef CONFIG_HAVE_UNWIND_KERNEL_SFRAME
> + state->common.sp = (unsigned long)__builtin_frame_address(0);
> +#endif
> state->source = KUNWIND_SOURCE_CALLER;
> }
This is not correct. On arm64, __builtin_frame_address(0) returns the
address of the current function's frame record. That's not the same as
the SP of the caller (which would necessarily differ by at least the
size of that frame record).
For example, the following:
void *return_own_frame(void)
{
return __builtin_frame_address(0);
}
... is compiled by GCC 15.2.0 as:
0000000000000000 <return_own_frame>:
0: d503233f paciasp
4: a9bf7bfd stp x29, x30, [sp, #-16]!
8: 910003fd mov x29, sp
c: aa1d03e0 mov x0, x29
10: a8c17bfd ldp x29, x30, [sp], #16
14: d50323bf autiasp
18: d65f03c0 ret
1c: d503201f nop
As above, I think we can remove unwind_state:sp entirely, and omit this.
> @@ -124,6 +132,9 @@ kunwind_init_from_task(struct kunwind_state *state,
>
> state->common.fp = thread_saved_fp(task);
> state->common.pc = thread_saved_pc(task);
> +#ifdef CONFIG_HAVE_UNWIND_KERNEL_SFRAME
> + state->common.sp = thread_saved_sp(task);
> +#endif
> state->source = KUNWIND_SOURCE_TASK;
> }
As above, I think we can remove unwind_state:sp entirely, and omit this.
In contrast to kunwind_init_from_caller() above, given the way the
cpu_switch_to() assembly function saves the FP/PC/SP, those should all
be consistent with the values in the caller immediately after the caller
is returned to.
> @@ -181,7 +192,6 @@ int kunwind_next_regs_pc(struct kunwind_state *state)
> state->regs = regs;
> state->common.pc = regs->pc;
> state->common.fp = regs->regs[29];
> - state->regs = NULL;
> state->source = KUNWIND_SOURCE_REGS_PC;
> return 0;
> }
> @@ -237,6 +247,9 @@ kunwind_next_frame_record(struct kunwind_state *state)
>
> unwind_consume_stack(&state->common, info, fp, sizeof(*record));
>
> +#ifdef CONFIG_HAVE_UNWIND_KERNEL_SFRAME
> + state->common.sp = state->common.fp;
> +#endif
This is not correct. The caller's frame record can be anywhere in the
caller's stack frame, and we definitely have cases today where FP != SP
at a function call boundary.
For example, the add_random_kstack_offset() logic in invoke_syscall()
typically results in the SP being decremented after the frame record has
been placed on the stack.
That looks roughly like the following:
void callee(void *ptr);
void caller(size_t size)
{
void *ptr = __builtin_alloca(size);
callee(ptr);
}
... which GCC 15.2.0 can compile as:
0000000000000000 <caller>:
0: d503233f paciasp
4: 91003c00 add x0, x0, #0xf
8: a9bf7bfd stp x29, x30, [sp, #-16]!
c: 927cec00 and x0, x0, #0xfffffffffffffff0
10: 910003fd mov x29, sp
14: cb2063ff sub sp, sp, x0
18: 910003e0 mov x0, sp
1c: 94000000 bl 0 <callee>
20: 910003bf mov sp, x29
24: a8c17bfd ldp x29, x30, [sp], #16
28: d50323bf autiasp
2c: d65f03c0 ret
As above, I think we can remove unwind_state:sp entirely, and omit this.
> state->common.fp = new_fp;
> state->common.pc = new_pc;
> state->source = KUNWIND_SOURCE_FRAME;
> @@ -244,6 +257,176 @@ kunwind_next_frame_record(struct kunwind_state *state)
> return 0;
> }
>
> +#ifdef CONFIG_HAVE_UNWIND_KERNEL_SFRAME
> +
> +static __always_inline struct stack_info *
> +get_word(struct unwind_state *state, unsigned long *word)
> +{
> + unsigned long addr = *word;
> + struct stack_info *info;
> +
> + info = unwind_find_stack(state, addr, sizeof(addr));
> + if (!info)
> + return info;
> +
> + *word = READ_ONCE(*(unsigned long *)addr);
> +
> + return info;
> +}
> +
> +static __always_inline int
> +get_consume_word(struct unwind_state *state, unsigned long *word)
> +{
> + struct stack_info *info;
> + unsigned long addr = *word;
> +
> + info = get_word(state, word);
> + if (!info)
> + return -EINVAL;
> +
> + unwind_consume_stack(state, info, addr, sizeof(addr));
> + return 0;
> +}
I was hoping that we wouldn't need these if we only used SFrame to
determine whether to use the LR or frame record, but I see that there
could be cases where the frame record might be partially constructed.
> +
> +/*
> + * Unwind to the next frame according to sframe.
> + */
> +static __always_inline int
> +unwind_next_frame_sframe(struct kunwind_state *state)
> +{
> + struct unwind_frame frame;
> + unsigned long cfa, fp, ra;
> + enum kunwind_source source = KUNWIND_SOURCE_FRAME;
> + struct pt_regs *regs = state->regs;
> +
> + int err;
As above, we should only use this for unwinding from the regs, after a
KUNWIND_SOURCE_REGS_PC step.
With that in mind, it would be good to:
(1) Rename this to something like kunwind_next_regs_sframe(). Note
'kunwind' rather than 'unwind' for consistency with the rest of this
file.
(2) Add the following sanity checks:
if (WARN_ON_ONCE(state->source != KUNWIND_SOURCE_REGS_PC))
return -EINVAL;
if (WARN_ON_ONCE(!state->regs))
return -EINVAL;
... which will also allow the code below to be simplified.
> +
> + /* FP/SP alignment 8 bytes */
> + if (state->common.fp & 0x7 || state->common.sp & 0x7)
> + return -EINVAL;
> +
> + /*
> + * Most/all outermost functions are not visible to sframe. So, check for
> + * a meta frame record if the sframe lookup fails.
> + */
> + err = sframe_find_kernel(state->common.pc, &frame);
> + if (err)
> + return kunwind_next_frame_record_meta(state);
> +
> + if (frame.outermost)
> + return -ENOENT;
I don't think we ever expect an outermost frame within the kernel. We
haven't added any annotations for that, and we expect to unwind all the
way to a FRAME_META_TYPE_FINAL frame.
We cannot fall back to kunwind_next_frame_record_meta() here. We don't
know that the next frame is a meta frame (and this results in a warning
noted above), and we don't know the result is going to be reliable if we
don't have SFrame data, so the right thing to do is return an error.
I think this should be:
/*
* A kernel unwind should always end at a FRAME_META_TYPE_FINAL
* frame. There should be no outermost frames within the kernel.
*/
if (frame.outermost)
return -EINVAL;
err = sframe_find_kernel(state->common.pc, &frame);
if (err)
return -EINVAL;
> + /* Get the Canonical Frame Address (CFA) */
> + switch (frame.cfa.rule) {
> + case UNWIND_CFA_RULE_SP_OFFSET:
> + cfa = state->common.sp;
> + break;
> + case UNWIND_CFA_RULE_FP_OFFSET:
> + if (state->common.fp < state->common.sp)
> + return -EINVAL;
> + cfa = state->common.fp;
> + break;
> + case UNWIND_CFA_RULE_REG_OFFSET:
> + case UNWIND_CFA_RULE_REG_OFFSET_DEREF:
> + /* regs only available in topmost/interrupt frame */
> + if (!regs || frame.cfa.regnum > 30)
> + return -EINVAL;
> + cfa = regs->regs[frame.cfa.regnum];
> + break;
Do we ever expect to see UNWIND_CFA_RULE_REG_OFFSET or
UNWIND_CFA_RULE_REG_OFFSET_DEREF in practice for kernel code?
> + default:
> + WARN_ON_ONCE(1);
> + return -EINVAL;
> + }
> + cfa += frame.cfa.offset;
> +
> + /*
> + * CFA typically points to a higher address than RA or FP, so don't
> + * consume from the stack when we read it.
> + */
> + if (frame.cfa.rule & UNWIND_RULE_DEREF &&
> + !get_word(&state->common, &cfa))
> + return -EINVAL;
Per the switch above, this could only be
UNWIND_CFA_RULE_REG_OFFSET_DEREF. As above, do we ever expect to
encounter that in practice for kernel code?
> +
> + /* CFA alignment 8 bytes */
> + if (cfa & 0x7)
> + return -EINVAL;
If the CFA is the SP upon entry to the function, then per AAPCS64 rules
it should be aligned to 16 bytes. Otherwise, where has this 8 byte
alignment requirement come from? Does SFrame mandate that?
> +
> + /* Get the Return Address (RA) */
> + switch (frame.ra.rule) {
> + case UNWIND_RULE_RETAIN:
> + /* regs only available in topmost/interrupt frame */
> + if (!regs)
> + return -EINVAL;
> + ra = regs->regs[30];
> + source = KUNWIND_SOURCE_REGS_LR;
> + break;
> + /* UNWIND_USER_RULE_CFA_OFFSET not implemented on purpose */
It would be better for the comment to say *why* that's not implemented.
I assume that's because UNWIND_USER_RULE_CFA_OFFSET would mean that the return
address is a stack address, and that's obviously not legitimate.
> + case UNWIND_RULE_CFA_OFFSET_DEREF:
> + ra = cfa + frame.ra.offset;
> + break;
> + case UNWIND_RULE_REG_OFFSET:
> + case UNWIND_RULE_REG_OFFSET_DEREF:
> + /* regs only available in topmost/interrupt frame */
> + if (!regs)
> + return -EINVAL;
> + ra = regs->regs[frame.cfa.regnum];
> + ra += frame.ra.offset;
> + break;
Do we ever expect UNWIND_RULE_REG_OFFSET or UNWIND_RULE_REG_OFFSET_DEREF
in practice for kernel code?
I don't think we expect UNWIND_RULE_REG_OFFSET unless that's sometimes used
instead of UNWIND_RULE_RETAIN to express that the return address is in x30
(with zero offset).
Similarly, if the address is on the stack it should be in a frame
record. Would we ever expect UNWIND_RULE_REG_OFFSET_DEREF rather than
UNWIND_RULE_CFA_OFFSET_DEREF?
> + default:
> + WARN_ON_ONCE(1);
> + return -EINVAL;
> + }
> +
> + /* Get the Frame Pointer (FP) */
> + switch (frame.fp.rule) {
> + case UNWIND_RULE_RETAIN:
> + fp = state->common.fp;
> + break;
> + /* UNWIND_USER_RULE_CFA_OFFSET not implemented on purpose */
As for RA, the comment should explain why that's not implemented.
> + case UNWIND_RULE_CFA_OFFSET_DEREF:
> + fp = cfa + frame.fp.offset;
> + break;
> + case UNWIND_RULE_REG_OFFSET:
> + case UNWIND_RULE_REG_OFFSET_DEREF:
> + /* regs only available in topmost/interrupt frame */
> + if (!regs)
> + return -EINVAL;
> + fp = regs->regs[frame.fp.regnum];
> + fp += frame.fp.offset;
> + break;
> + default:
> + WARN_ON_ONCE(1);
> + return -EINVAL;
> + }
> +
> + /*
> + * Consume RA and FP from the stack. The frame record puts FP at a lower
> + * address than RA, so we always read FP first.
> + */
> + if (frame.fp.rule & UNWIND_RULE_DEREF &&
> + !get_word(&state->common, &fp))
> + return -EINVAL;
Why is this get_word() rather than get_consume_word()?
> +
> + if (frame.ra.rule & UNWIND_RULE_DEREF &&
> + get_consume_word(&state->common, &ra))
> + return -EINVAL;
> +
> + state->common.pc = ra;
> + state->common.sp = cfa;
As above, the SP can be removed.
> + state->common.fp = fp;
> +
> + state->source = source;
> +
> + return 0;
> +}
> +
> +#else /* !CONFIG_HAVE_UNWIND_KERNEL_SFRAME */
> +
> +static __always_inline int
> +unwind_next_frame_sframe(struct kunwind_state *state) { return -EINVAL; }
> +
> +#endif /* !CONFIG_HAVE_UNWIND_KERNEL_SFRAME*/
> +
> /*
> * Unwind from one frame record (A) to the next frame record (B).
> *
> @@ -259,12 +442,25 @@ kunwind_next(struct kunwind_state *state)
> state->flags.all = 0;
>
> switch (state->source) {
> + case KUNWIND_SOURCE_REGS_PC:
> + err = unwind_next_frame_sframe(state);
> +
> + if (err && err != -ENOENT) {
> + /* Fallback to FP based unwinder */
> + err = kunwind_next_frame_record(state);
> + state->common.unreliable = true;
> + }
> + state->regs = NULL;
> + break;
This makes sense to me.
> case KUNWIND_SOURCE_FRAME:
> case KUNWIND_SOURCE_CALLER:
> case KUNWIND_SOURCE_TASK:
> - case KUNWIND_SOURCE_REGS_PC:
> + case KUNWIND_SOURCE_REGS_LR:
> err = kunwind_next_frame_record(state);
> + if (err && err != -ENOENT)
> + err = unwind_next_frame_sframe(state);
This isn't sound as we cannot track the SP reliably across all
transitions.
If a regular frame pointer unwind has failed, something is already
wrong, and we should give up immediately.
Please remove the fallback to sframe here.
> break;
> +
> default:
No need for this whitespace change.
> err = -EINVAL;
> }
> @@ -350,6 +546,9 @@ kunwind_stack_walk(kunwind_consume_fn consume_state,
> .common = {
> .stacks = stacks,
> .nr_stacks = ARRAY_SIZE(stacks),
> +#ifdef CONFIG_HAVE_UNWIND_KERNEL_SFRAME
> + .sp = 0,
> +#endif
As above, this can go.
Thanks,
Mark.
> },
> };
>
> @@ -390,34 +589,40 @@ noinline noinstr void arch_stack_walk(stack_trace_consume_fn consume_entry,
> kunwind_stack_walk(arch_kunwind_consume_entry, &data, task, regs);
> }
>
> +struct kunwind_reliable_consume_entry_data {
> + stack_trace_consume_fn consume_entry;
> + void *cookie;
> + bool unreliable;
> +};
> +
> static __always_inline bool
> -arch_reliable_kunwind_consume_entry(const struct kunwind_state *state, void *cookie)
> +arch_kunwind_reliable_consume_entry(const struct kunwind_state *state, void *cookie)
> {
> - /*
> - * At an exception boundary we can reliably consume the saved PC. We do
> - * not know whether the LR was live when the exception was taken, and
> - * so we cannot perform the next unwind step reliably.
> - *
> - * All that matters is whether the *entire* unwind is reliable, so give
> - * up as soon as we hit an exception boundary.
> - */
> - if (state->source == KUNWIND_SOURCE_REGS_PC)
> - return false;
> + struct kunwind_reliable_consume_entry_data *data = cookie;
>
> - return arch_kunwind_consume_entry(state, cookie);
> + if (state->common.unreliable) {
> + data->unreliable = true;
> + return false;
> + }
> + return data->consume_entry(data->cookie, state->common.pc);
> }
>
> -noinline noinstr int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
> - void *cookie,
> - struct task_struct *task)
> +noinline notrace int arch_stack_walk_reliable(
> + stack_trace_consume_fn consume_entry,
> + void *cookie, struct task_struct *task)
> {
> - struct kunwind_consume_entry_data data = {
> + struct kunwind_reliable_consume_entry_data data = {
> .consume_entry = consume_entry,
> .cookie = cookie,
> + .unreliable = false,
> };
>
> - return kunwind_stack_walk(arch_reliable_kunwind_consume_entry, &data,
> - task, NULL);
> + kunwind_stack_walk(arch_kunwind_reliable_consume_entry, &data, task, NULL);
> +
> + if (data.unreliable)
> + return -EINVAL;
> +
> + return 0;
> }
>
> struct bpf_unwind_consume_entry_data {
> @@ -452,6 +657,7 @@ static const char *state_source_string(const struct kunwind_state *state)
> case KUNWIND_SOURCE_CALLER: return "C";
> case KUNWIND_SOURCE_TASK: return "T";
> case KUNWIND_SOURCE_REGS_PC: return "P";
> + case KUNWIND_SOURCE_REGS_LR: return "L";
> default: return "U";
> }
> }
> --
> 2.54.0.545.g6539524ca2-goog
>
^ permalink raw reply [flat|nested] 17+ messages in thread* Re: [PATCH v5 8/8] unwind: arm64: Use sframe to unwind interrupt frames
2026-05-01 16:46 ` Mark Rutland
@ 2026-05-04 8:47 ` Jens Remus
0 siblings, 0 replies; 17+ messages in thread
From: Jens Remus @ 2026-05-04 8:47 UTC (permalink / raw)
To: Mark Rutland, Dylan Hatch
Cc: Roman Gushchin, Weinan Liu, Will Deacon, Josh Poimboeuf,
Indu Bhagat, Peter Zijlstra, Steven Rostedt, Catalin Marinas,
Jiri Kosina, Prasanna Kumar T S M, Puranjay Mohan, Song Liu,
joe.lawrence, linux-toolchains, linux-kernel, live-patching,
linux-arm-kernel, Randy Dunlap, Heiko Carstens
Hello Mark,
I mostly have comments regarding your the SFrame related remarks.
On 5/1/2026 6:46 PM, Mark Rutland wrote:
> Thanks for putting this together. I think this is looking pretty good.
> However, there are some things that aren't quite right and need some
> work, which I've commented on below.
> (2) To make unwinding generally possible, we'll need to annotate some
> assembly functions as unwindable. We'll need to do that for string
> routines under lib/, and probably some crypto code, but we don't
> need to do that for most code in head.S, entry.S, etc.
>
> The vast majority of relevant assembly functions are leaf functions
> (where the return address is never moved out of the LR), so we can
> probably get away with a simple annotation for those that avoids the
> need for open-coded CFI directives everywhere.
Wrapping them in .cfi_startproc ... .cfi_endproc should do. For instance
by extending SYM_FUNC_START() and SYM_FUNC_END() or introducing flavors
that do. Or where you thinking of something else?
> On Tue, Apr 28, 2026 at 06:36:43PM +0000, Dylan Hatch wrote:
>> diff --git a/arch/arm64/include/asm/stacktrace/common.h b/arch/arm64/include/asm/stacktrace/common.h
>> @@ -21,6 +21,8 @@ struct stack_info {
>> *
>> * @fp: The fp value in the frame record (or the real fp)
>> * @pc: The lr value in the frame record (or the real lr)
>> + * @sp: The sp value at the call site of the current function.
>> + * @unreliable: Stacktrace is unreliable.
>> *
>> * @stack: The stack currently being unwound.
>> * @stacks: An array of stacks which can be unwound.
>> @@ -29,7 +31,11 @@ struct stack_info {
>> struct unwind_state {
>> unsigned long fp;
>> unsigned long pc;
>> +#ifdef CONFIG_HAVE_UNWIND_KERNEL_SFRAME
>> + unsigned long sp;
>> +#endif
>
> As this is only used by the kernel unwinder (and not the hyp unwinder),
> this should live in struct kunwind_state in stacktrace.c.
>
> That said, for unwinding across exception boundaries we should not need
> this, as the SP value will be in the pt_regs. If we only use SFrame for
> the exception boundary case, we can remove this entirely. I would
> strongly prefer that we do that.
>> diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
>> +/*
>> + * Unwind to the next frame according to sframe.
>> + */
>> +static __always_inline int
>> +unwind_next_frame_sframe(struct kunwind_state *state)
>> +{
>> + struct unwind_frame frame;
>> + unsigned long cfa, fp, ra;
>> + enum kunwind_source source = KUNWIND_SOURCE_FRAME;
>> + struct pt_regs *regs = state->regs;
>> +
>> + int err;
>
> As above, we should only use this for unwinding from the regs, after a
> KUNWIND_SOURCE_REGS_PC step.
>
> With that in mind, it would be good to:
>
> (1) Rename this to something like kunwind_next_regs_sframe(). Note
> 'kunwind' rather than 'unwind' for consistency with the rest of this
> file.
>
> (2) Add the following sanity checks:
>
> if (WARN_ON_ONCE(state->source != KUNWIND_SOURCE_REGS_PC))
> return -EINVAL;
> if (WARN_ON_ONCE(!state->regs))
> return -EINVAL;
>
> ... which will also allow the code below to be simplified.
>
>> +
>> + /* FP/SP alignment 8 bytes */
>> + if (state->common.fp & 0x7 || state->common.sp & 0x7)
>> + return -EINVAL;
>> +
>> + /*
>> + * Most/all outermost functions are not visible to sframe. So, check for
>> + * a meta frame record if the sframe lookup fails.
>> + */
>> + err = sframe_find_kernel(state->common.pc, &frame);
>> + if (err)
>> + return kunwind_next_frame_record_meta(state);
>> +
>> + if (frame.outermost)
>> + return -ENOENT;
>
> I don't think we ever expect an outermost frame within the kernel. We
> haven't added any annotations for that, and we expect to unwind all the
> way to a FRAME_META_TYPE_FINAL frame.
>
> We cannot fall back to kunwind_next_frame_record_meta() here. We don't
> know that the next frame is a meta frame (and this results in a warning
> noted above), and we don't know the result is going to be reliable if we
> don't have SFrame data, so the right thing to do is return an error.
>
> I think this should be:
>
> /*
> * A kernel unwind should always end at a FRAME_META_TYPE_FINAL
> * frame. There should be no outermost frames within the kernel.
> */
> if (frame.outermost)
> return -EINVAL;
Makes sense.
>
> err = sframe_find_kernel(state->common.pc, &frame);
> if (err)
> return -EINVAL;
>
>> + /* Get the Canonical Frame Address (CFA) */
>> + switch (frame.cfa.rule) {
>> + case UNWIND_CFA_RULE_SP_OFFSET:
>> + cfa = state->common.sp;
IIUC you suggest this to be changed as follows?
cfa = regs->regs[31];
>> + break;
>> + case UNWIND_CFA_RULE_FP_OFFSET:
>> + if (state->common.fp < state->common.sp)
>> + return -EINVAL;
>> + cfa = state->common.fp;
>> + break;
>> + case UNWIND_CFA_RULE_REG_OFFSET:
>> + case UNWIND_CFA_RULE_REG_OFFSET_DEREF:
>> + /* regs only available in topmost/interrupt frame */
>> + if (!regs || frame.cfa.regnum > 30)
>> + return -EINVAL;
>> + cfa = regs->regs[frame.cfa.regnum];
>> + break;
>
> Do we ever expect to see UNWIND_CFA_RULE_REG_OFFSET or
> UNWIND_CFA_RULE_REG_OFFSET_DEREF in practice for kernel code?
No. Those can only occur with SFrame V3 flexible FDE, which are
currently not generated by GNU assembler for arm64/aarch64, and thus
could be omitted in the arm64-specific kernel sframe unwinder:
https://sourceware.org/git/?p=binutils-gdb.git;a=blob;f=gas/config/tc-aarch64.h;hb=binutils-2_46#l342
I must admit that while reviewing I thought it would be future-proof to
have support for rules that can only be represented with SFrame V3
flexible FDE, even if they are currently not used on arm64. Ideally
kunwind_next_sframe() could be made common, if another architecture
would implement kernel unwinding using sframe.
>
>> + default:
>> + WARN_ON_ONCE(1);
>> + return -EINVAL;
>> + }
>> + cfa += frame.cfa.offset;
>> +
>> + /*
>> + * CFA typically points to a higher address than RA or FP, so don't
>> + * consume from the stack when we read it.
>> + */
>> + if (frame.cfa.rule & UNWIND_RULE_DEREF &&
>> + !get_word(&state->common, &cfa))
>> + return -EINVAL;
>
> Per the switch above, this could only be
> UNWIND_CFA_RULE_REG_OFFSET_DEREF. As above, do we ever expect to
> encounter that in practice for kernel code?
No. See above.
>
>> +
>> + /* CFA alignment 8 bytes */
>> + if (cfa & 0x7)
>> + return -EINVAL;
>
> If the CFA is the SP upon entry to the function, then per AAPCS64 rules
> it should be aligned to 16 bytes. Otherwise, where has this 8 byte
> alignment requirement come from? Does SFrame mandate that?
That originates from the common unwind user logic (see
kernel/unwind/user.c, unwind_user_next_common()), which currently
assumes 8-byte/4-byte SP alignment for all 64-bit/32-bit architectures.
So checking for 16-byte alignment here would make sense.
>
>> +
>> + /* Get the Return Address (RA) */
>> + switch (frame.ra.rule) {
>> + case UNWIND_RULE_RETAIN:
>> + /* regs only available in topmost/interrupt frame */
>> + if (!regs)
>> + return -EINVAL;
>> + ra = regs->regs[30];
>> + source = KUNWIND_SOURCE_REGS_LR;
>> + break;
>> + /* UNWIND_USER_RULE_CFA_OFFSET not implemented on purpose */
Nit: s/UNWIND_USER_RULE_CFA_OFFSET/UNWIND_RULE_CFA_OFFSET/
>
> It would be better for the comment to say *why* that's not implemented.
>
> I assume that's because UNWIND_USER_RULE_CFA_OFFSET would mean that the return
> address is a stack address, and that's obviously not legitimate.
That and SFrame V3 currently cannot represent FP/RA as CFA + offset
(i.e. UNWIND_RULE_CFA_OFFSET; .cfi_val_offset FP/RA).
The comment originates from the common unwind user logic (see
kernel/unwind/user.c). I am open to improve that. What about:
/*
* UNWIND_RULE_CFA_OFFSET not implemented on purpose, as a stack
* address cannot be a legitimate return address value. It is
* also not used (e.g. not represented in sframe).
*/
>
>> + case UNWIND_RULE_CFA_OFFSET_DEREF:
>> + ra = cfa + frame.ra.offset;
>> + break;
>> + case UNWIND_RULE_REG_OFFSET:
>> + case UNWIND_RULE_REG_OFFSET_DEREF:
>> + /* regs only available in topmost/interrupt frame */
>> + if (!regs)
>> + return -EINVAL;
>> + ra = regs->regs[frame.cfa.regnum];
>> + ra += frame.ra.offset;
>> + break;
>
> Do we ever expect UNWIND_RULE_REG_OFFSET or UNWIND_RULE_REG_OFFSET_DEREF
> in practice for kernel code?
No. See above (SFrame V3 flexible FDE).
>
> I don't think we expect UNWIND_RULE_REG_OFFSET unless that's sometimes used
> instead of UNWIND_RULE_RETAIN to express that the return address is in x30
> (with zero offset).
No. Unless there would be nonsense .cfi_register 30, 30, which would
require SFrame V3 flexible FDE to be represented.
@Indu: We may consider to treat .cfi_register <reg>, <reg> (for FP/RA)
like .cfi_restore <reg> in the GNU assembler?
>
> Similarly, if the address is on the stack it should be in a frame
> record. Would we ever expect UNWIND_RULE_REG_OFFSET_DEREF rather than
> UNWIND_RULE_CFA_OFFSET_DEREF?
No. See above (SFrame V3 flexible FDE).
>
>> + default:
>> + WARN_ON_ONCE(1);
>> + return -EINVAL;
>> + }
>> +
>> + /* Get the Frame Pointer (FP) */
>> + switch (frame.fp.rule) {
>> + case UNWIND_RULE_RETAIN:
>> + fp = state->common.fp;
>> + break;
>> + /* UNWIND_USER_RULE_CFA_OFFSET not implemented on purpose */
>
> As for RA, the comment should explain why that's not implemented.
I am open to improve the comment in the the common unwind user logic.
What about:
/*
* UNWIND_RULE_CFA_OFFSET not implemented on purpose, as it is
* not used (e.g. not represented in sframe).
*/
>
>> + case UNWIND_RULE_CFA_OFFSET_DEREF:
>> + fp = cfa + frame.fp.offset;
>> + break;
>> + case UNWIND_RULE_REG_OFFSET:
>> + case UNWIND_RULE_REG_OFFSET_DEREF:
>> + /* regs only available in topmost/interrupt frame */
>> + if (!regs)
>> + return -EINVAL;
>> + fp = regs->regs[frame.fp.regnum];
>> + fp += frame.fp.offset;
>> + break;
Likewise neither UNWIND_RULE_REG_OFFSET nor UNWIND_RULE_REG_OFFSET_DEREF
can currently occur on arm64. See above (SFrame V3 flexible FDE).
>> + default:
>> + WARN_ON_ONCE(1);
>> + return -EINVAL;
>> + }
>> +
>> + /*
>> + * Consume RA and FP from the stack. The frame record puts FP at a lower
>> + * address than RA, so we always read FP first.
>> + */
>> + if (frame.fp.rule & UNWIND_RULE_DEREF &&
>> + !get_word(&state->common, &fp))
>> + return -EINVAL;
>
> Why is this get_word() rather than get_consume_word()?
>
>> +
>> + if (frame.ra.rule & UNWIND_RULE_DEREF &&
>> + get_consume_word(&state->common, &ra))
>> + return -EINVAL;
>> +
>> + state->common.pc = ra;
>> + state->common.sp = cfa;
>
> As above, the SP can be removed.
>
>> + state->common.fp = fp;
>> +
>> + state->source = source;
>> +
>> + return 0;
>> +}
Regards,
Jens
--
Jens Remus
Linux on Z Development (D3303)
jremus@de.ibm.com / jremus@linux.ibm.com
IBM Deutschland Research & Development GmbH; Vorsitzender des Aufsichtsrats: Wolfgang Wendt; Geschäftsführung: David Faller; Sitz der Gesellschaft: Ehningen; Registergericht: Amtsgericht Stuttgart, HRB 243294
IBM Data Privacy Statement: https://www.ibm.com/privacy/
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH v5 0/8] unwind, arm64: add sframe unwinder for kernel
2026-04-28 18:36 [PATCH v5 0/8] unwind, arm64: add sframe unwinder for kernel Dylan Hatch
` (7 preceding siblings ...)
2026-04-28 18:36 ` [PATCH v5 8/8] unwind: arm64: Use sframe to unwind interrupt frames Dylan Hatch
@ 2026-04-29 17:18 ` Mark Rutland
2026-04-30 10:11 ` Jens Remus
9 siblings, 0 replies; 17+ messages in thread
From: Mark Rutland @ 2026-04-29 17:18 UTC (permalink / raw)
To: Dylan Hatch
Cc: Roman Gushchin, Weinan Liu, Will Deacon, Josh Poimboeuf,
Indu Bhagat, Peter Zijlstra, Steven Rostedt, Catalin Marinas,
Jiri Kosina, Jens Remus, Prasanna Kumar T S M, Puranjay Mohan,
Song Liu, joe.lawrence, linux-toolchains, linux-kernel,
live-patching, linux-arm-kernel, Randy Dunlap
Hi Dylan,
On Tue, Apr 28, 2026 at 06:36:35PM +0000, Dylan Hatch wrote:
> Implement a generic kernel sframe-based [1] unwinder. The main goal is
> to improve reliable stacktrace on arm64 by unwinding across exception
> boundaries.
Thanks for this!
Just as a holding reply: I'm going over the series now, and I have some
partially-written comments that I'll try to finish up and get out
tomorrow.
Mark.
> On x86, the ORC unwinder provides reliable stacktrace through similar
> methodology, but arm64 lacks the necessary support from objtool to
> create ORC unwind tables.
>
> Currently, there's already a sframe unwinder proposed for userspace: [2].
> To maintain common definitions and algorithms for sframe lookup, a
> substantial portion of this patch series aims to refactor the sframe
> lookup code to support both kernel and userspace sframe sections.
>
> Currently, only GNU Binutils support sframe. This series relies on the
> Sframe V3 format, which is supported in binutils 2.46.
>
> These patches are based on Steven Rostedt's sframe/core branch [3],
> which is and aggregation of existing work done for x86 sframe userspace
> unwind, and contains [2]. This branch is, in turn, based on Linux
> v7.0-rc3. This full series (applied to the sframe/core branch) is
> available on github: [4].
>
> Ref:
> [1]: https://sourceware.org/binutils/docs/sframe-spec.html
> [2]: https://lore.kernel.org/lkml/20260127150554.2760964-1-jremus@linux.ibm.com/
> [3]: https://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace.git/log/?h=sframe/core
> [4]: https://github.com/dylanbhatch/linux/tree/sframe-v5
>
> Changes since v4:
> - (Jens) Fix some minor nits.
> - Handle .init.text and .exit.text in function address validation.
>
> Changes since v3:
>
> - (Jens) Clean up patch summaries.
> - (Jens) Rename SFRAME_LOOKUP -> UNWIND_SFRAME_LOOKUP to fit existing
> naming convention.
> - (Randy) Correct typo errors in new config options.
> - (Jens) Move unwind types to a new unwind_types.h to match their
> usage.
> - (Jens) Update KERNEL_[COPY|GET] to use label-based error handling
> like their userspace counterparts.
> - (Jens) Rename SFRAME_UNWINDER -> HAVE_UNWIND_KERNEL_SFRAME and
> ARCH_SUPPORTS_SFRAME_UNWINDER -> ARCH_SUPPORTS_UNWIND_KERNEL_SFRAME
> to match existing naming convention.
> - (Jens) Move HAVE_UNWIND_KERNEL_SFRAME config option to arch/Kconfig.
> - (Jens) Rename/move extern definitions of __[start|end]_sframe into
> include/asm-generic/sections.h.
> - (Jens) Fix up CFI annotations at kernel entry.
> - (Jens) Fix error path for unsorted FDE lookup.
> - (Jens) Zero-out module sframe_section before init.
> - (Jens) For SFRAME_VALIDATION, use an arch-specific function-address
> validation helper so that .rodata.text can be correctly handled on
> arm64 vmlinux.
> - (Jens) Fixup and better comment kernel stacktrace code.
>
> Changes since v2:
>
> The biggest change from v2 is the switch from adding a dedicated,
> in-kernel sframe-lookup library, to refactoring/using the existing
> library developed by Josh, Jens, and Steve. Consequently, this series
> now depends on Sframe V3, though this upgrade would likely have been
> necessary anyway. Below is a full accounting of the changes since v2.
>
> - (Josh) Add stricter reliability checks during unwind.
> - (Puranjay, Indu, Jens) Update to use a common sframe library with
> userpace unwind, thus resolving the need to support
> SFRAME_F_FDE_FUNC_START_PCREL, added in binutils 2.45.
> - (Jens) Add check for sframe V3, thus resolving the prior need for V2
> and SFRAME_F_FDE_FUNC_START_PCREL support.
> - (Will) Add ARCH_SUPPORTS_SFRAME_UNWINDER, remove SFRAME_UNWIND_TABLE
> - (Indu) add support for unsorted FDE tables, allowing for module
> sframe lookups.
> - (Mark) Prefer frame-pointer unwind when possible, for better
> performance.
> - Simplify compile-time logic, adding stubbs when necessary.
> - Add support for in-kernel SFRAME_VALIDATION.
> - Rebase onto core/sframe (with v7.0-rc3 base)
>
> Dylan Hatch (7):
> sframe: Allow kernelspace sframe sections
> arm64, unwind: build kernel with sframe V3 info
> sframe: Provide PC lookup for vmlinux .sframe section
> sframe: Allow unsorted FDEs
> arm64/module, sframe: Add sframe support for modules
> sframe: Introduce in-kernel SFRAME_VALIDATION
> unwind: arm64: Use sframe to unwind interrupt frames
>
> Weinan Liu (1):
> arm64: entry: add unwind info for various kernel entries
>
> MAINTAINERS | 3 +-
> Makefile | 8 +
> arch/Kconfig | 27 +-
> arch/arm64/Kconfig | 1 +
> arch/arm64/include/asm/module.h | 6 +
> arch/arm64/include/asm/sections.h | 1 +
> arch/arm64/include/asm/stacktrace/common.h | 6 +
> arch/arm64/include/asm/unwind_sframe.h | 55 +++
> arch/arm64/kernel/entry.S | 23 +
> arch/arm64/kernel/module.c | 8 +
> arch/arm64/kernel/setup.c | 2 +
> arch/arm64/kernel/stacktrace.c | 246 ++++++++++-
> arch/arm64/kernel/vdso/Makefile | 2 +-
> arch/arm64/kernel/vmlinux.lds.S | 2 +
> .../{unwind_user_sframe.h => unwind_sframe.h} | 6 +-
> arch/x86/include/asm/unwind_user.h | 12 +-
> include/asm-generic/sections.h | 4 +
> include/asm-generic/vmlinux.lds.h | 15 +
> include/linux/sframe.h | 67 ++-
> include/linux/unwind_types.h | 46 ++
> include/linux/unwind_user_types.h | 41 --
> kernel/unwind/Makefile | 2 +-
> kernel/unwind/sframe.c | 410 ++++++++++++++----
> kernel/unwind/user.c | 41 +-
> 24 files changed, 827 insertions(+), 207 deletions(-)
> create mode 100644 arch/arm64/include/asm/unwind_sframe.h
> rename arch/x86/include/asm/{unwind_user_sframe.h => unwind_sframe.h} (50%)
> create mode 100644 include/linux/unwind_types.h
>
> --
> 2.54.0.545.g6539524ca2-goog
>
^ permalink raw reply [flat|nested] 17+ messages in thread* Re: [PATCH v5 0/8] unwind, arm64: add sframe unwinder for kernel
2026-04-28 18:36 [PATCH v5 0/8] unwind, arm64: add sframe unwinder for kernel Dylan Hatch
` (8 preceding siblings ...)
2026-04-29 17:18 ` [PATCH v5 0/8] unwind, arm64: add sframe unwinder for kernel Mark Rutland
@ 2026-04-30 10:11 ` Jens Remus
9 siblings, 0 replies; 17+ messages in thread
From: Jens Remus @ 2026-04-30 10:11 UTC (permalink / raw)
To: Dylan Hatch, Roman Gushchin, Weinan Liu, Will Deacon,
Josh Poimboeuf, Indu Bhagat, Peter Zijlstra, Steven Rostedt,
Catalin Marinas, Jiri Kosina
Cc: Mark Rutland, Prasanna Kumar T S M, Puranjay Mohan, Song Liu,
joe.lawrence, linux-toolchains, linux-kernel, live-patching,
linux-arm-kernel, Randy Dunlap, Heiko Carstens
On 4/28/2026 8:36 PM, Dylan Hatch wrote:
> Implement a generic kernel sframe-based [1] unwinder. The main goal is
> to improve reliable stacktrace on arm64 by unwinding across exception
> boundaries.
Please add support to initialize the optional sframe unwinder debug
information. Either in the appropriate patches in this series or as a
separate patch.
Note that for the module case I wonder whether it would be preferable
to somehow indicate that it is a module name in the string, e.g.
"(<module-name>)" or "<module-name> (module)"?
diff --git a/kernel/unwind/sframe.c b/kernel/unwind/sframe.c
--- a/kernel/unwind/sframe.c
+++ b/kernel/unwind/sframe.c
@@ -1028,6 +1028,8 @@ void __init init_sframe_table(void)
kernel_sfsec.text_start = (unsigned long)_stext;
kernel_sfsec.text_end = (unsigned long)_etext;
+ dbg_init(&kernel_sfsec);
+
if (WARN_ON(sframe_read_header(&kernel_sfsec)))
return;
if (WARN_ON(sframe_validate_section(&kernel_sfsec)))
@@ -1047,6 +1049,8 @@ void sframe_module_init(struct module *mod, void *sframe, size_t sframe_size,
sec->text_start = (unsigned long)text;
sec->text_end = (unsigned long)text + text_size;
+ dbg_init(sec);
+
if (WARN_ON(sframe_read_header(sec)))
return;
if (WARN_ON(sframe_validate_section(sec)))
diff --git a/kernel/unwind/sframe_debug.h b/kernel/unwind/sframe_debug.h
--- a/kernel/unwind/sframe_debug.h
+++ b/kernel/unwind/sframe_debug.h
@@ -32,6 +32,18 @@ static inline void dbg_init(struct sframe_section *sec)
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
+ if (sec->sec_type == SFRAME_KERNEL) {
+ if (sec == &kernel_sfsec) {
+ sec->filename = kstrdup("(vmlinux)", GFP_KERNEL);
+ } else {
+ struct module *mod = container_of(sec, struct module,
+ arch.sframe_sec);
+ sec->filename = kstrdup(mod->name, GFP_KERNEL);
+ }
+
+ return;
+ }
+
guard(mmap_read_lock)(mm);
vma = vma_lookup(mm, sec->sframe_start);
if (!vma)
Regards,
Jens
--
Jens Remus
Linux on Z Development (D3303)
jremus@de.ibm.com / jremus@linux.ibm.com
IBM Deutschland Research & Development GmbH; Vorsitzender des Aufsichtsrats: Wolfgang Wendt; Geschäftsführung: David Faller; Sitz der Gesellschaft: Ehningen; Registergericht: Amtsgericht Stuttgart, HRB 243294
IBM Data Privacy Statement: https://www.ibm.com/privacy/
^ permalink raw reply [flat|nested] 17+ messages in thread