* [PATCH v3 1/8] sframe: Allow kernelspace sframe sections.
From: Dylan Hatch @ 2026-04-06 18:49 UTC (permalink / raw)
To: Roman Gushchin, Weinan Liu, Will Deacon, Josh Poimboeuf,
Indu Bhagat, Peter Zijlstra, Steven Rostedt, Catalin Marinas,
Jiri Kosina
Cc: Dylan Hatch, Mark Rutland, Prasanna Kumar T S M, Puranjay Mohan,
Song Liu, joe.lawrence, linux-toolchains, linux-kernel,
live-patching, Jens Remus, linux-arm-kernel
In-Reply-To: <20260406185000.1378082-1-dylanbhatch@google.com>
Generalize the sframe lookup code to support kernelspace sections. This
is done by defining a SFRAME_LOOKUP option that can be activated
separate from UNWIND_USER_SFRAME, as there will be other clients to this
library than just userspace unwind.
Sframe section location is now tracked in a separate sec_type field to
determine whether user-access functions are necessary to read the sframe
data. Relevant type delarations are moved and renamed to reflect the
non-user sframe support.
Signed-off-by: Dylan Hatch <dylanbhatch@google.com>
---
MAINTAINERS | 2 +-
arch/Kconfig | 4 +
.../{unwind_user_sframe.h => unwind_sframe.h} | 6 +-
arch/x86/include/asm/unwind_user.h | 12 +-
include/linux/sframe.h | 88 ++++--
include/linux/unwind_user_types.h | 41 ---
kernel/unwind/Makefile | 2 +-
kernel/unwind/sframe.c | 270 ++++++++++++------
kernel/unwind/user.c | 40 +--
9 files changed, 286 insertions(+), 179 deletions(-)
rename arch/x86/include/asm/{unwind_user_sframe.h => unwind_sframe.h} (50%)
diff --git a/MAINTAINERS b/MAINTAINERS
index 8c46465ee7a9..cfc7dec88da4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -27557,7 +27557,7 @@ F: Documentation/driver-api/uio-howto.rst
F: drivers/uio/
F: include/linux/uio_driver.h
-USERSPACE STACK UNWINDING
+STACK UNWINDING
M: Josh Poimboeuf <jpoimboe@kernel.org>
M: Steven Rostedt <rostedt@goodmis.org>
S: Maintained
diff --git a/arch/Kconfig b/arch/Kconfig
index f1ed8bc0806d..6695c222c728 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -486,6 +486,9 @@ config AS_SFRAME3
def_bool $(as-instr,.cfi_startproc\n.cfi_endproc,-Wa$(comma)--gsframe-3)
select AS_SFRAME
+config SFRAME_LOOKUP
+ bool
+
config UNWIND_USER
bool
@@ -496,6 +499,7 @@ config HAVE_UNWIND_USER_FP
config HAVE_UNWIND_USER_SFRAME
bool
select UNWIND_USER
+ select SFRAME_LOOKUP
config SFRAME_VALIDATION
bool "Enable .sframe section debugging"
diff --git a/arch/x86/include/asm/unwind_user_sframe.h b/arch/x86/include/asm/unwind_sframe.h
similarity index 50%
rename from arch/x86/include/asm/unwind_user_sframe.h
rename to arch/x86/include/asm/unwind_sframe.h
index d828ae1a4aac..44d42e6ffde4 100644
--- a/arch/x86/include/asm/unwind_user_sframe.h
+++ b/arch/x86/include/asm/unwind_sframe.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_UNWIND_USER_SFRAME_H
-#define _ASM_X86_UNWIND_USER_SFRAME_H
+#ifndef _ASM_X86_UNWIND_SFRAME_H
+#define _ASM_X86_UNWIND_SFRAME_H
#ifdef CONFIG_X86_64
@@ -9,4 +9,4 @@
#endif
-#endif /* _ASM_X86_UNWIND_USER_SFRAME_H */
+#endif /* _ASM_X86_UNWIND_SFRAME_H */
diff --git a/arch/x86/include/asm/unwind_user.h b/arch/x86/include/asm/unwind_user.h
index ae46906c3b39..8fdab3581b86 100644
--- a/arch/x86/include/asm/unwind_user.h
+++ b/arch/x86/include/asm/unwind_user.h
@@ -55,30 +55,30 @@ static inline int unwind_user_get_reg(unsigned long *val, unsigned int regnum)
#define ARCH_INIT_USER_FP_FRAME(ws) \
.cfa = { \
- .rule = UNWIND_USER_CFA_RULE_FP_OFFSET,\
+ .rule = UNWIND_CFA_RULE_FP_OFFSET,\
.offset = 2*(ws), \
}, \
.ra = { \
- .rule = UNWIND_USER_RULE_CFA_OFFSET_DEREF,\
+ .rule = UNWIND_RULE_CFA_OFFSET_DEREF,\
.offset = -1*(ws), \
}, \
.fp = { \
- .rule = UNWIND_USER_RULE_CFA_OFFSET_DEREF,\
+ .rule = UNWIND_RULE_CFA_OFFSET_DEREF,\
.offset = -2*(ws), \
}, \
.outermost = false,
#define ARCH_INIT_USER_FP_ENTRY_FRAME(ws) \
.cfa = { \
- .rule = UNWIND_USER_CFA_RULE_SP_OFFSET,\
+ .rule = UNWIND_CFA_RULE_SP_OFFSET,\
.offset = 1*(ws), \
}, \
.ra = { \
- .rule = UNWIND_USER_RULE_CFA_OFFSET_DEREF,\
+ .rule = UNWIND_RULE_CFA_OFFSET_DEREF,\
.offset = -1*(ws), \
}, \
.fp = { \
- .rule = UNWIND_USER_RULE_RETAIN,\
+ .rule = UNWIND_RULE_RETAIN,\
}, \
.outermost = false,
diff --git a/include/linux/sframe.h b/include/linux/sframe.h
index b79c5ec09229..673b9edfc921 100644
--- a/include/linux/sframe.h
+++ b/include/linux/sframe.h
@@ -4,36 +4,85 @@
#include <linux/mm_types.h>
#include <linux/srcu.h>
-#include <linux/unwind_user_types.h>
-#ifdef CONFIG_HAVE_UNWIND_USER_SFRAME
+#define UNWIND_RULE_DEREF BIT(31)
+
+enum unwind_cfa_rule {
+ UNWIND_CFA_RULE_SP_OFFSET, /* CFA = SP + offset */
+ UNWIND_CFA_RULE_FP_OFFSET, /* CFA = FP + offset */
+ UNWIND_CFA_RULE_REG_OFFSET, /* CFA = reg + offset */
+ /* DEREF variants */
+ UNWIND_CFA_RULE_REG_OFFSET_DEREF = /* CFA = *(reg + offset) */
+ UNWIND_CFA_RULE_REG_OFFSET | UNWIND_RULE_DEREF,
+};
+
+struct unwind_cfa_rule_data {
+ enum unwind_cfa_rule rule;
+ s32 offset;
+ unsigned int regnum;
+};
+
+enum unwind_rule {
+ UNWIND_RULE_RETAIN, /* entity = entity */
+ UNWIND_RULE_CFA_OFFSET, /* entity = CFA + offset */
+ UNWIND_RULE_REG_OFFSET, /* entity = register + offset */
+ /* DEREF variants */
+ UNWIND_RULE_CFA_OFFSET_DEREF = /* entity = *(CFA + offset) */
+ UNWIND_RULE_CFA_OFFSET | UNWIND_RULE_DEREF,
+ UNWIND_RULE_REG_OFFSET_DEREF = /* entity = *(register + offset) */
+ UNWIND_RULE_REG_OFFSET | UNWIND_RULE_DEREF,
+};
+
+struct unwind_rule_data {
+ enum unwind_rule rule;
+ s32 offset;
+ unsigned int regnum;
+};
+
+struct unwind_frame {
+ struct unwind_cfa_rule_data cfa;
+ struct unwind_rule_data ra;
+ struct unwind_rule_data fp;
+ bool outermost;
+};
+
+#ifdef CONFIG_SFRAME_LOOKUP
+
+enum sframe_sec_type {
+ SFRAME_KERNEL,
+ SFRAME_USER,
+};
struct sframe_section {
- struct rcu_head rcu;
+ struct rcu_head rcu;
#ifdef CONFIG_DYNAMIC_DEBUG
- const char *filename;
+ const char *filename;
#endif
- unsigned long sframe_start;
- unsigned long sframe_end;
- unsigned long text_start;
- unsigned long text_end;
-
- unsigned long fdes_start;
- unsigned long fres_start;
- unsigned long fres_end;
- unsigned int num_fdes;
-
- signed char ra_off;
- signed char fp_off;
+ enum sframe_sec_type sec_type;
+ unsigned long sframe_start;
+ unsigned long sframe_end;
+ unsigned long text_start;
+ unsigned long text_end;
+
+ unsigned long fdes_start;
+ unsigned long fres_start;
+ unsigned long fres_end;
+ unsigned int num_fdes;
+
+ signed char ra_off;
+ signed char fp_off;
};
+#endif /* CONFIG_SFRAME_LOOKUP */
+
+#ifdef CONFIG_HAVE_UNWIND_USER_SFRAME
+
#define INIT_MM_SFRAME .sframe_mt = MTREE_INIT(sframe_mt, 0),
extern void sframe_free_mm(struct mm_struct *mm);
extern int sframe_add_section(unsigned long sframe_start, unsigned long sframe_end,
unsigned long text_start, unsigned long text_end);
extern int sframe_remove_section(unsigned long sframe_addr);
-extern int sframe_find(unsigned long ip, struct unwind_user_frame *frame);
static inline bool current_has_sframe(void)
{
@@ -42,6 +91,8 @@ static inline bool current_has_sframe(void)
return mm && !mtree_empty(&mm->sframe_mt);
}
+extern int sframe_find_user(unsigned long ip, struct unwind_frame *frame);
+
#else /* !CONFIG_HAVE_UNWIND_USER_SFRAME */
#define INIT_MM_SFRAME
@@ -52,9 +103,10 @@ static inline int sframe_add_section(unsigned long sframe_start, unsigned long s
return -ENOSYS;
}
static inline int sframe_remove_section(unsigned long sframe_addr) { return -ENOSYS; }
-static inline int sframe_find(unsigned long ip, struct unwind_user_frame *frame) { return -ENOSYS; }
static inline bool current_has_sframe(void) { return false; }
+static inline int sframe_find_user(unsigned long ip, struct unwind_frame *frame) { return -ENOSYS; }
+
#endif /* CONFIG_HAVE_UNWIND_USER_SFRAME */
#endif /* _LINUX_SFRAME_H */
diff --git a/include/linux/unwind_user_types.h b/include/linux/unwind_user_types.h
index 059e5c76f2f3..646e5fb774db 100644
--- a/include/linux/unwind_user_types.h
+++ b/include/linux/unwind_user_types.h
@@ -27,47 +27,6 @@ struct unwind_stacktrace {
unsigned long *entries;
};
-#define UNWIND_USER_RULE_DEREF BIT(31)
-
-enum unwind_user_cfa_rule {
- UNWIND_USER_CFA_RULE_SP_OFFSET, /* CFA = SP + offset */
- UNWIND_USER_CFA_RULE_FP_OFFSET, /* CFA = FP + offset */
- UNWIND_USER_CFA_RULE_REG_OFFSET, /* CFA = reg + offset */
- /* DEREF variants */
- UNWIND_USER_CFA_RULE_REG_OFFSET_DEREF = /* CFA = *(reg + offset) */
- UNWIND_USER_CFA_RULE_REG_OFFSET | UNWIND_USER_RULE_DEREF,
-};
-
-struct unwind_user_cfa_rule_data {
- enum unwind_user_cfa_rule rule;
- s32 offset;
- unsigned int regnum;
-};
-
-enum unwind_user_rule {
- UNWIND_USER_RULE_RETAIN, /* entity = entity */
- UNWIND_USER_RULE_CFA_OFFSET, /* entity = CFA + offset */
- UNWIND_USER_RULE_REG_OFFSET, /* entity = register + offset */
- /* DEREF variants */
- UNWIND_USER_RULE_CFA_OFFSET_DEREF = /* entity = *(CFA + offset) */
- UNWIND_USER_RULE_CFA_OFFSET | UNWIND_USER_RULE_DEREF,
- UNWIND_USER_RULE_REG_OFFSET_DEREF = /* entity = *(register + offset) */
- UNWIND_USER_RULE_REG_OFFSET | UNWIND_USER_RULE_DEREF,
-};
-
-struct unwind_user_rule_data {
- enum unwind_user_rule rule;
- s32 offset;
- unsigned int regnum;
-};
-
-struct unwind_user_frame {
- struct unwind_user_cfa_rule_data cfa;
- struct unwind_user_rule_data ra;
- struct unwind_user_rule_data fp;
- bool outermost;
-};
-
struct unwind_user_state {
unsigned long ip;
unsigned long sp;
diff --git a/kernel/unwind/Makefile b/kernel/unwind/Makefile
index 146038165865..6b51302308d0 100644
--- a/kernel/unwind/Makefile
+++ b/kernel/unwind/Makefile
@@ -1,2 +1,2 @@
obj-$(CONFIG_UNWIND_USER) += user.o deferred.o
- obj-$(CONFIG_HAVE_UNWIND_USER_SFRAME) += sframe.o
+ obj-$(CONFIG_SFRAME_LOOKUP) += sframe.o
diff --git a/kernel/unwind/sframe.c b/kernel/unwind/sframe.c
index f24997e84e05..cad4384dfb4f 100644
--- a/kernel/unwind/sframe.c
+++ b/kernel/unwind/sframe.c
@@ -12,8 +12,7 @@
#include <linux/mm.h>
#include <linux/string_helpers.h>
#include <linux/sframe.h>
-#include <asm/unwind_user_sframe.h>
-#include <linux/unwind_user_types.h>
+#include <asm/unwind_sframe.h>
#include "sframe.h"
#include "sframe_debug.h"
@@ -44,8 +43,6 @@ struct sframe_fre_internal {
unsigned char dw_size;
};
-DEFINE_STATIC_SRCU(sframe_srcu);
-
static __always_inline unsigned char fre_type_to_size(unsigned char fre_type)
{
if (fre_type > 2)
@@ -60,6 +57,78 @@ static __always_inline unsigned char dataword_size_enum_to_size(unsigned char da
return 1 << dataword_size;
}
+#ifdef CONFIG_HAVE_UNWIND_USER_SFRAME
+
+DEFINE_STATIC_SRCU(sframe_srcu);
+
+#define UNSAFE_USER_COPY(to, from, size, label) \
+ unsafe_copy_from_user(to, (void __user *)from, size, label)
+
+#define UNSAFE_USER_GET(to, from, type, label) \
+ unsafe_get_user(to, (type __user *)from, label)
+
+#else /* !CONFIG_HAVE_UNWIND_USER_SFRAME */
+
+#define UNSAFE_USER_COPY(to, from, size, label) do { \
+ (void)to; (void)from; (void)size; \
+ goto label; \
+} while (0)
+
+#define UNSAFE_USER_GET(to, from, type, label) do { \
+ (void)to; (void)from; \
+ goto label; \
+} while (0)
+
+#endif /* !CONFIG_HAVE_UNWIND_USER_SFRAME */
+
+#ifdef CONFIG_SFRAME_UNWINDER
+
+#define KERNEL_COPY(to, from, size) memcpy(to, (void *)from, size)
+#define KERNEL_GET(to, from, type) ({ (to) = *(type *)(from); })
+
+#else /* !CONFIG_SFRAME_UNWINDER */
+
+#define KERNEL_COPY(to, from, size) do { \
+ (void)(to); (void)(from); (void)size; \
+ return -EFAULT; \
+} while (0)
+
+#define KERNEL_GET(to, from, type) do { \
+ (void)(to); (void)(from); \
+ return -EFAULT; \
+} while (0)
+
+
+#endif /* !CONFIG_SFRAME_UNWINDER */
+
+#define DATA_COPY(sec, to, from, size, label) \
+({ \
+ switch (sec->sec_type) { \
+ case SFRAME_KERNEL: \
+ KERNEL_COPY(to, from, size); \
+ break; \
+ case SFRAME_USER: \
+ UNSAFE_USER_COPY(to, from, size, label); \
+ break; \
+ default: \
+ return -EFAULT; \
+ } \
+})
+
+#define DATA_GET(sec, to, from, type, label) \
+({ \
+ switch (sec->sec_type) { \
+ case SFRAME_KERNEL: \
+ KERNEL_GET(to, from, type); \
+ break; \
+ case SFRAME_USER: \
+ UNSAFE_USER_GET(to, from, type, label); \
+ break; \
+ default: \
+ return -EFAULT; \
+ } \
+})
+
static __always_inline int __read_fde(struct sframe_section *sec,
unsigned int fde_num,
struct sframe_fde_internal *fde)
@@ -69,8 +138,8 @@ static __always_inline int __read_fde(struct sframe_section *sec,
struct sframe_fda_v3 _fda;
fde_addr = sec->fdes_start + (fde_num * sizeof(struct sframe_fde_v3));
- unsafe_copy_from_user(&_fde, (void __user *)fde_addr,
- sizeof(struct sframe_fde_v3), Efault);
+ DATA_COPY(sec, &_fde, fde_addr,
+ sizeof(struct sframe_fde_v3), Efault);
func_addr = fde_addr + _fde.func_start_off;
if (func_addr < sec->text_start || func_addr > sec->text_end)
@@ -79,8 +148,8 @@ static __always_inline int __read_fde(struct sframe_section *sec,
fda_addr = sec->fres_start + _fde.fres_off;
if (fda_addr + sizeof(struct sframe_fda_v3) > sec->fres_end)
return -EINVAL;
- unsafe_copy_from_user(&_fda, (void __user *)fda_addr,
- sizeof(struct sframe_fda_v3), Efault);
+ DATA_COPY(sec, &_fda, fda_addr,
+ sizeof(struct sframe_fda_v3), Efault);
fde->func_addr = func_addr;
fde->func_size = _fde.func_size;
@@ -102,21 +171,21 @@ static __always_inline int __find_fde(struct sframe_section *sec,
struct sframe_fde_internal *fde)
{
unsigned long func_addr_low = 0, func_addr_high = ULONG_MAX;
- struct sframe_fde_v3 __user *first, *low, *high, *found = NULL;
+ struct sframe_fde_v3 *first, *low, *high, *found = NULL;
int ret;
- first = (void __user *)sec->fdes_start;
+ first = (void *)sec->fdes_start;
low = first;
high = first + sec->num_fdes - 1;
while (low <= high) {
- struct sframe_fde_v3 __user *mid;
+ struct sframe_fde_v3 *mid;
s64 func_off;
unsigned long func_addr;
mid = low + ((high - low) / 2);
- unsafe_get_user(func_off, (s64 __user *)mid, Efault);
+ DATA_GET(sec, func_off, mid, s64, Efault);
func_addr = (unsigned long)mid + func_off;
if (ip >= func_addr) {
@@ -154,47 +223,47 @@ static __always_inline int __find_fde(struct sframe_section *sec,
return -EFAULT;
}
-#define ____UNSAFE_GET_USER_INC(to, from, type, label) \
+#define ____GET_INC(sec, to, from, type, label) \
({ \
type __to; \
- unsafe_get_user(__to, (type __user *)from, label); \
+ DATA_GET(sec, __to, from, type, label); \
from += sizeof(__to); \
to = __to; \
})
-#define __UNSAFE_GET_USER_INC(to, from, size, label, u_or_s) \
+#define __GET_INC(sec, to, from, size, label, u_or_s) \
({ \
switch (size) { \
case 1: \
- ____UNSAFE_GET_USER_INC(to, from, u_or_s##8, label); \
+ ____GET_INC(sec, to, from, u_or_s##8, label); \
break; \
case 2: \
- ____UNSAFE_GET_USER_INC(to, from, u_or_s##16, label); \
+ ____GET_INC(sec, to, from, u_or_s##16, label); \
break; \
case 4: \
- ____UNSAFE_GET_USER_INC(to, from, u_or_s##32, label); \
+ ____GET_INC(sec, to, from, u_or_s##32, label); \
break; \
default: \
return -EFAULT; \
} \
})
-#define UNSAFE_GET_USER_UNSIGNED_INC(to, from, size, label) \
- __UNSAFE_GET_USER_INC(to, from, size, label, u)
+#define GET_UNSIGNED_INC(sec, to, from, size, label) \
+ __GET_INC(sec, to, from, size, label, u)
-#define UNSAFE_GET_USER_SIGNED_INC(to, from, size, label) \
- __UNSAFE_GET_USER_INC(to, from, size, label, s)
+#define GET_SIGNED_INC(sec, to, from, size, label) \
+ __GET_INC(sec, to, from, size, label, s)
-#define UNSAFE_GET_USER_INC(to, from, size, label) \
- _Generic(to, \
- u8 : UNSAFE_GET_USER_UNSIGNED_INC(to, from, size, label), \
- u16 : UNSAFE_GET_USER_UNSIGNED_INC(to, from, size, label), \
- u32 : UNSAFE_GET_USER_UNSIGNED_INC(to, from, size, label), \
- u64 : UNSAFE_GET_USER_UNSIGNED_INC(to, from, size, label), \
- s8 : UNSAFE_GET_USER_SIGNED_INC(to, from, size, label), \
- s16 : UNSAFE_GET_USER_SIGNED_INC(to, from, size, label), \
- s32 : UNSAFE_GET_USER_SIGNED_INC(to, from, size, label), \
- s64 : UNSAFE_GET_USER_SIGNED_INC(to, from, size, label))
+#define GET_INC(sec, to, from, size, label) \
+ _Generic(to, \
+ u8 : GET_UNSIGNED_INC(sec, to, from, size, label), \
+ u16 : GET_UNSIGNED_INC(sec, to, from, size, label), \
+ u32 : GET_UNSIGNED_INC(sec, to, from, size, label), \
+ u64 : GET_UNSIGNED_INC(sec, to, from, size, label), \
+ s8 : GET_SIGNED_INC(sec, to, from, size, label), \
+ s16 : GET_SIGNED_INC(sec, to, from, size, label), \
+ s32 : GET_SIGNED_INC(sec, to, from, size, label), \
+ s64 : GET_SIGNED_INC(sec, to, from, size, label))
static __always_inline int
__read_regular_fre_datawords(struct sframe_section *sec,
@@ -207,19 +276,19 @@ __read_regular_fre_datawords(struct sframe_section *sec,
s32 cfa_off, ra_off, fp_off;
unsigned int cfa_regnum;
- UNSAFE_GET_USER_INC(cfa_off, cur, dataword_size, Efault);
+ GET_INC(sec, cfa_off, cur, dataword_size, Efault);
dataword_count--;
ra_off = sec->ra_off;
if (!ra_off && dataword_count) {
dataword_count--;
- UNSAFE_GET_USER_INC(ra_off, cur, dataword_size, Efault);
+ GET_INC(sec, ra_off, cur, dataword_size, Efault);
}
fp_off = sec->fp_off;
if (!fp_off && dataword_count) {
dataword_count--;
- UNSAFE_GET_USER_INC(fp_off, cur, dataword_size, Efault);
+ GET_INC(sec, fp_off, cur, dataword_size, Efault);
}
if (dataword_count)
@@ -255,17 +324,17 @@ __read_flex_fde_fre_datawords(struct sframe_section *sec,
if (dataword_count < 2)
return -EFAULT;
- UNSAFE_GET_USER_INC(cfa_ctl, cur, dataword_size, Efault);
- UNSAFE_GET_USER_INC(cfa_off, cur, dataword_size, Efault);
+ GET_INC(sec, cfa_ctl, cur, dataword_size, Efault);
+ GET_INC(sec, cfa_off, cur, dataword_size, Efault);
dataword_count -= 2;
ra_off = sec->ra_off;
ra_ctl = ra_off ? 2 : 0; /* regnum=0, deref_p=(ra_off != 0), reg_p=0 */
if (dataword_count >= 2) {
- UNSAFE_GET_USER_INC(ra_ctl, cur, dataword_size, Efault);
+ GET_INC(sec, ra_ctl, cur, dataword_size, Efault);
dataword_count--;
if (ra_ctl) {
- UNSAFE_GET_USER_INC(ra_off, cur, dataword_size, Efault);
+ GET_INC(sec, ra_off, cur, dataword_size, Efault);
dataword_count--;
} else {
/* Padding RA location info */
@@ -276,10 +345,10 @@ __read_flex_fde_fre_datawords(struct sframe_section *sec,
fp_off = sec->fp_off;
fp_ctl = fp_off ? 2 : 0; /* regnum=0, deref_p=(fp_off != 0), reg_p=0 */
if (dataword_count >= 2) {
- UNSAFE_GET_USER_INC(fp_ctl, cur, dataword_size, Efault);
+ GET_INC(sec, fp_ctl, cur, dataword_size, Efault);
dataword_count--;
if (fp_ctl) {
- UNSAFE_GET_USER_INC(fp_off, cur, dataword_size, Efault);
+ GET_INC(sec, fp_off, cur, dataword_size, Efault);
dataword_count--;
} else {
/* Padding FP location info */
@@ -353,11 +422,11 @@ static __always_inline int __read_fre(struct sframe_section *sec,
if (fre_addr + addr_size + 1 > sec->fres_end)
return -EFAULT;
- UNSAFE_GET_USER_INC(ip_off, cur, addr_size, Efault);
+ GET_INC(sec, ip_off, cur, addr_size, Efault);
if (fde_pctype == SFRAME_FDE_PCTYPE_INC && ip_off > fde->func_size)
return -EFAULT;
- UNSAFE_GET_USER_INC(info, cur, 1, Efault);
+ GET_INC(sec, info, cur, 1, Efault);
dataword_count = SFRAME_V3_FRE_DATAWORD_COUNT(info);
dataword_size = dataword_size_enum_to_size(SFRAME_V3_FRE_DATAWORD_SIZE(info));
if (!dataword_size)
@@ -380,7 +449,7 @@ static __always_inline int __read_fre(struct sframe_section *sec,
}
static __always_inline int
-sframe_init_cfa_rule_data(struct unwind_user_cfa_rule_data *cfa_rule_data,
+sframe_init_cfa_rule_data(struct unwind_cfa_rule_data *cfa_rule_data,
u32 ctlword, s32 offset)
{
bool deref_p = SFRAME_V3_FLEX_FDE_CTLWORD_DEREF_P(ctlword);
@@ -391,13 +460,13 @@ sframe_init_cfa_rule_data(struct unwind_user_cfa_rule_data *cfa_rule_data,
switch (regnum) {
case SFRAME_REG_SP:
- cfa_rule_data->rule = UNWIND_USER_CFA_RULE_SP_OFFSET;
+ cfa_rule_data->rule = UNWIND_CFA_RULE_SP_OFFSET;
break;
case SFRAME_REG_FP:
- cfa_rule_data->rule = UNWIND_USER_CFA_RULE_FP_OFFSET;
+ cfa_rule_data->rule = UNWIND_CFA_RULE_FP_OFFSET;
break;
default:
- cfa_rule_data->rule = UNWIND_USER_CFA_RULE_REG_OFFSET;
+ cfa_rule_data->rule = UNWIND_CFA_RULE_REG_OFFSET;
cfa_rule_data->regnum = regnum;
}
} else {
@@ -405,7 +474,7 @@ sframe_init_cfa_rule_data(struct unwind_user_cfa_rule_data *cfa_rule_data,
}
if (deref_p)
- cfa_rule_data->rule |= UNWIND_USER_RULE_DEREF;
+ cfa_rule_data->rule |= UNWIND_RULE_DEREF;
cfa_rule_data->offset = offset;
@@ -413,27 +482,27 @@ sframe_init_cfa_rule_data(struct unwind_user_cfa_rule_data *cfa_rule_data,
}
static __always_inline void
-sframe_init_rule_data(struct unwind_user_rule_data *rule_data,
+sframe_init_rule_data(struct unwind_rule_data *rule_data,
u32 ctlword, s32 offset)
{
bool deref_p = SFRAME_V3_FLEX_FDE_CTLWORD_DEREF_P(ctlword);
bool reg_p = SFRAME_V3_FLEX_FDE_CTLWORD_REG_P(ctlword);
if (!ctlword && !offset) {
- rule_data->rule = UNWIND_USER_RULE_RETAIN;
+ rule_data->rule = UNWIND_RULE_RETAIN;
return;
}
if (reg_p) {
unsigned int regnum = SFRAME_V3_FLEX_FDE_CTLWORD_REGNUM(ctlword);
- rule_data->rule = UNWIND_USER_RULE_REG_OFFSET;
+ rule_data->rule = UNWIND_RULE_REG_OFFSET;
rule_data->regnum = regnum;
} else {
- rule_data->rule = UNWIND_USER_RULE_CFA_OFFSET;
+ rule_data->rule = UNWIND_RULE_CFA_OFFSET;
}
if (deref_p)
- rule_data->rule |= UNWIND_USER_RULE_DEREF;
+ rule_data->rule |= UNWIND_RULE_DEREF;
rule_data->offset = offset;
}
@@ -441,7 +510,7 @@ sframe_init_rule_data(struct unwind_user_rule_data *rule_data,
static __always_inline int __find_fre(struct sframe_section *sec,
struct sframe_fde_internal *fde,
unsigned long ip,
- struct unwind_user_frame *frame)
+ struct unwind_frame *frame)
{
unsigned char fde_pctype = SFRAME_V3_FDE_PCTYPE(fde->info);
struct sframe_fre_internal *fre, *prev_fre = NULL;
@@ -501,40 +570,18 @@ static __always_inline int __find_fre(struct sframe_section *sec,
return 0;
}
-int sframe_find(unsigned long ip, struct unwind_user_frame *frame)
+static __always_inline int __sframe_find(struct sframe_section *sec,
+ unsigned long ip,
+ struct unwind_frame *frame)
{
- struct mm_struct *mm = current->mm;
- struct sframe_section *sec;
struct sframe_fde_internal fde;
int ret;
- if (!mm)
- return -EINVAL;
-
- guard(srcu)(&sframe_srcu);
-
- sec = mtree_load(&mm->sframe_mt, ip);
- if (!sec)
- return -EINVAL;
-
- if (!user_read_access_begin((void __user *)sec->sframe_start,
- sec->sframe_end - sec->sframe_start))
- return -EFAULT;
-
ret = __find_fde(sec, ip, &fde);
if (ret)
- goto end;
-
- ret = __find_fre(sec, &fde, ip, frame);
-end:
- user_read_access_end();
-
- if (ret == -EFAULT) {
- dbg_sec("removing bad .sframe section\n");
- WARN_ON_ONCE(sframe_remove_section(sec->sframe_start));
- }
+ return ret;
- return ret;
+ return __find_fre(sec, &fde, ip, frame);
}
#ifdef CONFIG_SFRAME_VALIDATION
@@ -657,20 +704,23 @@ static int sframe_validate_section(struct sframe_section *sec) { return 0; }
#endif /* !CONFIG_SFRAME_VALIDATION */
-static void free_section(struct sframe_section *sec)
-{
- dbg_free(sec);
- kfree(sec);
-}
-
static int sframe_read_header(struct sframe_section *sec)
{
unsigned long header_end, fdes_start, fdes_end, fres_start, fres_end;
struct sframe_header shdr;
unsigned int num_fdes;
- if (copy_from_user(&shdr, (void __user *)sec->sframe_start, sizeof(shdr))) {
- dbg_sec("header usercopy failed\n");
+ switch (sec->sec_type) {
+ case SFRAME_USER:
+ if (copy_from_user(&shdr, (void __user *)sec->sframe_start, sizeof(shdr))) {
+ dbg_sec("header usercopy failed\n");
+ return -EFAULT;
+ }
+ break;
+ case SFRAME_KERNEL:
+ shdr = *(struct sframe_header *)sec->sframe_start;
+ break;
+ default:
return -EFAULT;
}
@@ -717,6 +767,45 @@ static int sframe_read_header(struct sframe_section *sec)
return 0;
}
+#ifdef CONFIG_HAVE_UNWIND_USER_SFRAME
+
+int sframe_find_user(unsigned long ip, struct unwind_frame *frame)
+{
+ struct mm_struct *mm = current->mm;
+ struct sframe_section *sec;
+ int ret;
+
+ if (!mm)
+ return -EINVAL;
+
+ guard(srcu)(&sframe_srcu);
+
+ sec = mtree_load(&mm->sframe_mt, ip);
+ if (!sec)
+ return -EINVAL;
+
+ if (!user_read_access_begin((void __user *)sec->sframe_start,
+ sec->sframe_end - sec->sframe_start))
+ return -EFAULT;
+
+ ret = __sframe_find(sec, ip, frame);
+
+ user_read_access_end();
+
+ if (ret == -EFAULT) {
+ dbg_sec("removing bad .sframe section\n");
+ WARN_ON_ONCE(sframe_remove_section(sec->sframe_start));
+ }
+
+ return ret;
+}
+
+static void free_section(struct sframe_section *sec)
+{
+ dbg_free(sec);
+ kfree(sec);
+}
+
int sframe_add_section(unsigned long sframe_start, unsigned long sframe_end,
unsigned long text_start, unsigned long text_end)
{
@@ -753,6 +842,7 @@ int sframe_add_section(unsigned long sframe_start, unsigned long sframe_end,
if (!sec)
return -ENOMEM;
+ sec->sec_type = SFRAME_USER;
sec->sframe_start = sframe_start;
sec->sframe_end = sframe_end;
sec->text_start = text_start;
@@ -838,3 +928,5 @@ void sframe_free_mm(struct mm_struct *mm)
mtree_destroy(&mm->sframe_mt);
}
+
+#endif /* CONFIG_HAVE_UNWIND_USER_SFRAME */
diff --git a/kernel/unwind/user.c b/kernel/unwind/user.c
index eb7d9489f671..f9abd08ed83b 100644
--- a/kernel/unwind/user.c
+++ b/kernel/unwind/user.c
@@ -28,7 +28,7 @@ get_user_word(unsigned long *word, unsigned long base, int off, unsigned int ws)
}
static int unwind_user_next_common(struct unwind_user_state *state,
- const struct unwind_user_frame *frame)
+ const struct unwind_frame *frame)
{
unsigned long cfa, fp, ra;
@@ -40,16 +40,16 @@ static int unwind_user_next_common(struct unwind_user_state *state,
/* Get the Canonical Frame Address (CFA) */
switch (frame->cfa.rule) {
- case UNWIND_USER_CFA_RULE_SP_OFFSET:
+ case UNWIND_CFA_RULE_SP_OFFSET:
cfa = state->sp;
break;
- case UNWIND_USER_CFA_RULE_FP_OFFSET:
+ case UNWIND_CFA_RULE_FP_OFFSET:
if (state->fp < state->sp)
return -EINVAL;
cfa = state->fp;
break;
- case UNWIND_USER_CFA_RULE_REG_OFFSET:
- case UNWIND_USER_CFA_RULE_REG_OFFSET_DEREF:
+ case UNWIND_CFA_RULE_REG_OFFSET:
+ case UNWIND_CFA_RULE_REG_OFFSET_DEREF:
if (!state->topmost || unwind_user_get_reg(&cfa, frame->cfa.regnum))
return -EINVAL;
break;
@@ -58,7 +58,7 @@ static int unwind_user_next_common(struct unwind_user_state *state,
return -EINVAL;
}
cfa += frame->cfa.offset;
- if (frame->cfa.rule & UNWIND_USER_RULE_DEREF &&
+ if (frame->cfa.rule & UNWIND_RULE_DEREF &&
get_user_word(&cfa, cfa, 0, state->ws))
return -EINVAL;
@@ -76,16 +76,16 @@ static int unwind_user_next_common(struct unwind_user_state *state,
/* Get the Return Address (RA) */
switch (frame->ra.rule) {
- case UNWIND_USER_RULE_RETAIN:
+ case UNWIND_RULE_RETAIN:
if (!state->topmost || unwind_user_get_ra_reg(&ra))
return -EINVAL;
break;
/* UNWIND_USER_RULE_CFA_OFFSET not implemented on purpose */
- case UNWIND_USER_RULE_CFA_OFFSET_DEREF:
+ case UNWIND_RULE_CFA_OFFSET_DEREF:
ra = cfa + frame->ra.offset;
break;
- case UNWIND_USER_RULE_REG_OFFSET:
- case UNWIND_USER_RULE_REG_OFFSET_DEREF:
+ case UNWIND_RULE_REG_OFFSET:
+ case UNWIND_RULE_REG_OFFSET_DEREF:
if (!state->topmost || unwind_user_get_reg(&ra, frame->ra.regnum))
return -EINVAL;
ra += frame->ra.offset;
@@ -94,21 +94,21 @@ static int unwind_user_next_common(struct unwind_user_state *state,
WARN_ON_ONCE(1);
return -EINVAL;
}
- if (frame->ra.rule & UNWIND_USER_RULE_DEREF &&
+ if (frame->ra.rule & UNWIND_RULE_DEREF &&
get_user_word(&ra, ra, 0, state->ws))
return -EINVAL;
/* Get the Frame Pointer (FP) */
switch (frame->fp.rule) {
- case UNWIND_USER_RULE_RETAIN:
+ case UNWIND_RULE_RETAIN:
fp = state->fp;
break;
/* UNWIND_USER_RULE_CFA_OFFSET not implemented on purpose */
- case UNWIND_USER_RULE_CFA_OFFSET_DEREF:
+ case UNWIND_RULE_CFA_OFFSET_DEREF:
fp = cfa + frame->fp.offset;
break;
- case UNWIND_USER_RULE_REG_OFFSET:
- case UNWIND_USER_RULE_REG_OFFSET_DEREF:
+ case UNWIND_RULE_REG_OFFSET:
+ case UNWIND_RULE_REG_OFFSET_DEREF:
if (!state->topmost || unwind_user_get_reg(&fp, frame->fp.regnum))
return -EINVAL;
fp += frame->fp.offset;
@@ -117,7 +117,7 @@ static int unwind_user_next_common(struct unwind_user_state *state,
WARN_ON_ONCE(1);
return -EINVAL;
}
- if (frame->fp.rule & UNWIND_USER_RULE_DEREF &&
+ if (frame->fp.rule & UNWIND_RULE_DEREF &&
get_user_word(&fp, fp, 0, state->ws))
return -EINVAL;
@@ -133,13 +133,13 @@ static int unwind_user_next_fp(struct unwind_user_state *state)
struct pt_regs *regs = task_pt_regs(current);
if (state->topmost && unwind_user_at_function_start(regs)) {
- const struct unwind_user_frame fp_entry_frame = {
+ const struct unwind_frame fp_entry_frame = {
ARCH_INIT_USER_FP_ENTRY_FRAME(state->ws)
};
return unwind_user_next_common(state, &fp_entry_frame);
}
- const struct unwind_user_frame fp_frame = {
+ const struct unwind_frame fp_frame = {
ARCH_INIT_USER_FP_FRAME(state->ws)
};
return unwind_user_next_common(state, &fp_frame);
@@ -147,10 +147,10 @@ static int unwind_user_next_fp(struct unwind_user_state *state)
static int unwind_user_next_sframe(struct unwind_user_state *state)
{
- struct unwind_user_frame frame;
+ struct unwind_frame frame;
/* sframe expects the frame to be local storage */
- if (sframe_find(state->ip, &frame))
+ if (sframe_find_user(state->ip, &frame))
return -ENOENT;
return unwind_user_next_common(state, &frame);
}
--
2.53.0.1213.gd9a14994de-goog
^ permalink raw reply related
* [PATCH v3 0/8] unwind, arm64: add sframe unwinder for kernel
From: Dylan Hatch @ 2026-04-06 18:49 UTC (permalink / raw)
To: Roman Gushchin, Weinan Liu, Will Deacon, Josh Poimboeuf,
Indu Bhagat, Peter Zijlstra, Steven Rostedt, Catalin Marinas,
Jiri Kosina
Cc: Dylan Hatch, Mark Rutland, Prasanna Kumar T S M, Puranjay Mohan,
Song Liu, joe.lawrence, linux-toolchains, linux-kernel,
live-patching, Jens Remus, linux-arm-kernel
Implement a generic kernel sframe-based [1] unwinder. The main goal is
to improve reliable stacktrace on arm64 by unwinding across exception
boundaries.
On x86, the ORC unwinder provides reliable stacktrace through similar
methodology, but arm64 lacks the necessary support from objtool to
create ORC unwind tables.
Currently, there's already a sframe unwinder proposed for userspace: [2].
To maintain common definitions and algorithms for sframe lookup, a
substantial portion of this patch series aims to refactor the sframe
lookup code to support both kernel and userspace sframe sections.
Currently, only GNU Binutils support sframe. This series relies on the
Sframe V3 format, which is supported in binutils 2.46.
These patches are based on Steven Rostedt's sframe/core branch [3],
which is and aggregation of existing work done for x86 sframe userspace
unwind, and contains [2]. This branch is, in turn, based on Linux
v7.0-rc3. This full series (applied to the sframe/core branch) is
available on github: [4].
Ref:
[1]: https://sourceware.org/binutils/docs/sframe-spec.html
[2]: https://lore.kernel.org/lkml/20260127150554.2760964-1-jremus@linux.ibm.com/
[3]: https://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace.git/log/?h=sframe/core
[4]: https://github.com/dylanbhatch/linux/tree/sframe-v3-with-v3
Changes since v2:
The biggest change from v2 is the switch from adding a dedicated,
in-kernel sframe-lookup library, to refactoring/using the existing
library developed by Josh, Jens, and Steve. Consequently, this series
now depends on Sframe V3, though this upgrade would likely have been
necessary anyway. Below is a full accounting of the changes since v2.
- (Josh) Add stricter reliability checks during unwind.
- (Puranjay, Indu, Jens) Update to use a common sframe library with
userpace unwind, thus resolving the need to support
SFRAME_F_FDE_FUNC_START_PCREL, added in binutils 2.45.
- (Jens) Add check for sframe V3, thus resolving the prior need for V2
and SFRAME_F_FDE_FUNC_START_PCREL support.
- (Will) Add ARCH_SUPPORTS_SFRAME_UNWINDER, remove SFRAME_UNWIND_TABLE
- (Indu) add support for unsorted FDE tables, allowing for module
sframe lookups.
- (Mark) Prefer frame-pointer unwind when possible, for better
performance.
- Simplify compile-time logic, adding stubbs when necessary.
- Add support for in-kernel SFRAME_VALIDATION.
- Rebase onto core/sframe (with v7.0-rc3 base)
Dylan Hatch (7):
sframe: Allow kernelspace sframe sections.
arm64, unwind: build kernel with sframe V3 info
sframe: Provide PC lookup for vmlinux .sframe section.
sframe: Allow unsorted FDEs.
arm64/module, sframe: Add sframe support for modules.
sframe: Introduce in-kernel SFRAME_VALIDATION.
unwind: arm64: Use sframe to unwind interrupt frames.
Weinan Liu (1):
arm64: entry: add unwind info for various kernel entries
MAINTAINERS | 3 +-
Makefile | 8 +
arch/Kconfig | 13 +-
arch/arm64/Kconfig | 1 +
arch/arm64/Kconfig.debug | 13 +
arch/arm64/include/asm/module.h | 6 +
arch/arm64/include/asm/stacktrace/common.h | 6 +
arch/arm64/include/asm/unwind_sframe.h | 12 +
arch/arm64/kernel/entry.S | 10 +
arch/arm64/kernel/module.c | 8 +
arch/arm64/kernel/setup.c | 2 +
arch/arm64/kernel/stacktrace.c | 242 ++++++++++-
arch/arm64/kernel/vdso/Makefile | 2 +-
.../{unwind_user_sframe.h => unwind_sframe.h} | 6 +-
arch/x86/include/asm/unwind_user.h | 12 +-
include/asm-generic/vmlinux.lds.h | 15 +
include/linux/sframe.h | 105 ++++-
include/linux/unwind_user_types.h | 41 --
kernel/unwind/Makefile | 2 +-
kernel/unwind/sframe.c | 408 ++++++++++++++----
kernel/unwind/user.c | 40 +-
21 files changed, 749 insertions(+), 206 deletions(-)
create mode 100644 arch/arm64/include/asm/unwind_sframe.h
rename arch/x86/include/asm/{unwind_user_sframe.h => unwind_sframe.h} (50%)
--
2.53.0.1213.gd9a14994de-goog
^ permalink raw reply
* Re: [PATCH 0/2] Noinstr fixes for K[CA]SAN with GCOV
From: Marco Elver @ 2026-01-27 23:21 UTC (permalink / raw)
To: Segher Boessenkool
Cc: Peter Zijlstra, Ard Biesheuvel, Kees Cook, Brendan Jackman,
Andrey Ryabinin, Alexander Potapenko, Andrey Konovalov,
Dmitry Vyukov, Vincenzo Frascino, kasan-dev, linux-kernel,
linux-toolchains
In-Reply-To: <aUP5j7W8S7koM13M@gate>
On Thu, 18 Dec 2025 at 13:54, Segher Boessenkool
<segher@kernel.crashing.org> wrote:
>
> Hi!
>
> On Thu, Dec 18, 2025 at 01:18:13PM +0100, Peter Zijlstra wrote:
> > On Thu, Dec 18, 2025 at 05:58:44AM -0600, Segher Boessenkool wrote:
> >
> > > You might have more success getting the stuff backported to some
> > > distro(s) you care about? Or get people to use newer compilers more
> > > quickly of course, "five years" before people have it is pretty
> > > ridiculous, two years is at the tail end of things already.
> >
> > There is a difference between having and requiring it :/ Our current
> > minimum compiler version is gcc-8 or clang-15 (IIRC).
>
> Very much so. If you have good reasons for requiring it, make sure you
> voice that with your backport request!
>
> Nothing we (again, GCC) do is *only* motivated by procedures. We can do
> unusual things in unusual situations. But you need extraordinary
> evidence for why extraordinary things would be needed, of course. Does
> that apply here, you think?
>
> > On the bright side, I think we can be more aggressively with compiler
> > versions for debug builds vs regular builds. Not being able to build a
> > KASAN/UBSAN/whateverSAN kernel isn't too big of a problem (IMO).
>
> Absolutely. Just document the feature as needing a recent compiler!
For future reference:
https://discourse.llvm.org/t/explicit-sanitizer-checks-with-builtin-allow-sanitize-check/89383
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=123442
Clang 22 should have the builtin.
^ permalink raw reply
* Re: [PATCH 0/2] Noinstr fixes for K[CA]SAN with GCOV
From: Segher Boessenkool @ 2025-12-18 12:54 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Marco Elver, Ard Biesheuvel, Kees Cook, Brendan Jackman,
Andrey Ryabinin, Alexander Potapenko, Andrey Konovalov,
Dmitry Vyukov, Vincenzo Frascino, kasan-dev, linux-kernel,
linux-toolchains
In-Reply-To: <20251218121813.GA2378051@noisy.programming.kicks-ass.net>
Hi!
On Thu, Dec 18, 2025 at 01:18:13PM +0100, Peter Zijlstra wrote:
> On Thu, Dec 18, 2025 at 05:58:44AM -0600, Segher Boessenkool wrote:
>
> > You might have more success getting the stuff backported to some
> > distro(s) you care about? Or get people to use newer compilers more
> > quickly of course, "five years" before people have it is pretty
> > ridiculous, two years is at the tail end of things already.
>
> There is a difference between having and requiring it :/ Our current
> minimum compiler version is gcc-8 or clang-15 (IIRC).
Very much so. If you have good reasons for requiring it, make sure you
voice that with your backport request!
Nothing we (again, GCC) do is *only* motivated by procedures. We can do
unusual things in unusual situations. But you need extraordinary
evidence for why extraordinary things would be needed, of course. Does
that apply here, you think?
> On the bright side, I think we can be more aggressively with compiler
> versions for debug builds vs regular builds. Not being able to build a
> KASAN/UBSAN/whateverSAN kernel isn't too big of a problem (IMO).
Absolutely. Just document the feature as needing a recent compiler!
Segher
^ permalink raw reply
* Re: [PATCH 0/2] Noinstr fixes for K[CA]SAN with GCOV
From: Peter Zijlstra @ 2025-12-18 12:18 UTC (permalink / raw)
To: Segher Boessenkool
Cc: Marco Elver, Ard Biesheuvel, Kees Cook, Brendan Jackman,
Andrey Ryabinin, Alexander Potapenko, Andrey Konovalov,
Dmitry Vyukov, Vincenzo Frascino, kasan-dev, linux-kernel,
linux-toolchains
In-Reply-To: <aUPsdDY09Jzn3ILf@gate>
On Thu, Dec 18, 2025 at 05:58:44AM -0600, Segher Boessenkool wrote:
> You might have more success getting the stuff backported to some
> distro(s) you care about? Or get people to use newer compilers more
> quickly of course, "five years" before people have it is pretty
> ridiculous, two years is at the tail end of things already.
There is a difference between having and requiring it :/ Our current
minimum compiler version is gcc-8 or clang-15 (IIRC).
On the bright side, I think we can be more aggressively with compiler
versions for debug builds vs regular builds. Not being able to build a
KASAN/UBSAN/whateverSAN kernel isn't too big of a problem (IMO).
^ permalink raw reply
* Re: [PATCH 0/2] Noinstr fixes for K[CA]SAN with GCOV
From: Segher Boessenkool @ 2025-12-18 11:58 UTC (permalink / raw)
To: Marco Elver
Cc: Peter Zijlstra, Ard Biesheuvel, Kees Cook, Brendan Jackman,
Andrey Ryabinin, Alexander Potapenko, Andrey Konovalov,
Dmitry Vyukov, Vincenzo Frascino, kasan-dev, linux-kernel,
linux-toolchains
In-Reply-To: <CANpmjNOQJVRf5Ffk0-WMcFkTfAuh5J-ZoPHC+4BdXgLLf22Rjg@mail.gmail.com>
Hi!
On Thu, Dec 18, 2025 at 10:56:48AM +0100, Marco Elver wrote:
> On Thu, 18 Dec 2025 at 10:51, Peter Zijlstra <peterz@infradead.org> wrote:
> > On Sat, Dec 13, 2025 at 08:59:44AM +0900, Ard Biesheuvel wrote:
> >
> > > > After that I sat down and finally got around to implement the builtin
> > > > that should solve this once and for all, regardless of where it's
> > > > called: https://github.com/llvm/llvm-project/pull/172030
> > > > What this will allow us to do is to remove the
> > > > "K[AC]SAN_SANITIZE_noinstr.o := n" lines from the Makefile, and purely
> > > > rely on the noinstr attribute, even in the presence of explicit
> > > > instrumentation calls.
> > > >
> > >
> > > Excellent! Thanks for the quick fix. Happy to test and/or look into
> > > the kernel side of this once this lands.
> >
> > Well, would not GCC need to grow the same thing and then we must wait
> > until these versions are the minimum supported versions for sanitizer
> > builds.
> >
> > I mean, the extension is nice, but I'm afraid we can't really use it
> > until much later :/
>
> Unfortunately, yes. But let's try to get the builtin into Clang and
> GCC now (for the latter, need to Cc GCC folks to help).
>
> Then we wait for 5 years. :-)
>
> There's a possibility to try and backport it to stable Clang and GCC
> versions, but it's a long stretch (extremely unlikely).
We (GCC) do not generally want to do backport features; even for
bugfixes the risk/reward ratio comes into the picture. It *can* be done
if some feature is important enough of course. If you have to wonder or
ask if your feature is important enough, it is not.
The reason we do not want backports of feature is it increases
maintenance cost a lot, and so, development costs as well.
I guess LLVM has a similar policy, but I of course do not speak for
them.
You might have more success getting the stuff backported to some
distro(s) you care about? Or get people to use newer compilers more
quickly of course, "five years" before people have it is pretty
ridiculous, two years is at the tail end of things already.
Segher
^ permalink raw reply
* Re: [PATCH 0/2] Noinstr fixes for K[CA]SAN with GCOV
From: Marco Elver @ 2025-12-18 9:56 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Ard Biesheuvel, Kees Cook, Brendan Jackman, Andrey Ryabinin,
Alexander Potapenko, Andrey Konovalov, Dmitry Vyukov,
Vincenzo Frascino, kasan-dev, linux-kernel, linux-toolchains
In-Reply-To: <20251218095112.GX3707837@noisy.programming.kicks-ass.net>
On Thu, 18 Dec 2025 at 10:51, Peter Zijlstra <peterz@infradead.org> wrote:
> On Sat, Dec 13, 2025 at 08:59:44AM +0900, Ard Biesheuvel wrote:
>
> > > After that I sat down and finally got around to implement the builtin
> > > that should solve this once and for all, regardless of where it's
> > > called: https://github.com/llvm/llvm-project/pull/172030
> > > What this will allow us to do is to remove the
> > > "K[AC]SAN_SANITIZE_noinstr.o := n" lines from the Makefile, and purely
> > > rely on the noinstr attribute, even in the presence of explicit
> > > instrumentation calls.
> > >
> >
> > Excellent! Thanks for the quick fix. Happy to test and/or look into
> > the kernel side of this once this lands.
>
> Well, would not GCC need to grow the same thing and then we must wait
> until these versions are the minimum supported versions for sanitizer
> builds.
>
> I mean, the extension is nice, but I'm afraid we can't really use it
> until much later :/
Unfortunately, yes. But let's try to get the builtin into Clang and
GCC now (for the latter, need to Cc GCC folks to help).
Then we wait for 5 years. :-)
There's a possibility to try and backport it to stable Clang and GCC
versions, but it's a long stretch (extremely unlikely).
^ permalink raw reply
* Re: Concerns about SFrame viability for userspace stack walking
From: Fangrui Song @ 2025-12-01 9:04 UTC (permalink / raw)
To: linux-toolchains, linux-perf-users, linux-kernel
In-Reply-To: <3xd4fqvwflefvsjjoagytoi3y3sf7lxqjremhe2zo5tounihe4@3ftafgryadsr>
On 2025-10-29, Fangrui Song wrote:
>I've been following the SFrame discussion and wanted to share some concerns about its viability for userspace adoption, based on concrete measurements and comparison with existing compact unwind implementations in LLVM.
>
>**Size overhead concerns**
>
>Measurements on a x86-64 clang binary show that .sframe (8.87 MiB) is approximately 10% larger than the combined size of .eh_frame and .eh_frame_hdr (8.06 MiB total).
>This is problematic because .eh_frame cannot be eliminated - it contains essential information for restoring callee-saved registers, LSDA, and personality information needed for debugging (e.g. reading local variables in a coredump) and C++ exception handling.
>
>This means adopting SFrame would result in carrying both formats, with a large net size increase.
>
>**Learning from existing compact unwind implementations**
>
>It's worth noting that LLVM has had a battle-tested compact unwind format in production use since 2009 with OS X 10.6, which transitioned to using CFI directives in 2013 [1]. The efficiency gains are dramatic:
>
> __text section: 0x4a55470 bytes
> __unwind_info section: 0x79060 bytes (0.6% of __text)
> __eh_frame section: 0x58 bytes
>
> (On macOS you can check the section size with objdump --arch x86_64 -h clang and dump the unwind info with objdump --arch x86_64 --unwind-info clang)
>
>OpenVMS's x86-64 port, which is ELF-based, also adopted this format as documented in their "VSI OpenVMS Calling Standard" and their 2018 post: https://discourse.llvm.org/t/rfc-asynchronous-unwind-tables-attribute/59282
>
>The compact unwind format achieves this efficiency through a two-level page table structure. It describes common frame layouts compactly and falls back to DWARF only when necessary, allowing most DWARF CFI entries to be eliminated while maintaining full functionality. For more details, see: https://faultlore.com/blah/compact-unwinding/ and the lld/MachO implemention https://github.com/llvm/llvm-project/blob/main/lld/MachO/UnwindInfoSection.cpp
>
>**The AArch64 case: size matters even more**
>
>The size consideration becomes even more critical for AArch64, which is heavily deployed on mobile phones.
>There's an active feature request for compact unwind support in the AArch64 ABI: https://github.com/ARM-software/abi-aa/issues/344
>This underscores the broader industry need for efficient unwind information that doesn't duplicate data or significantly increase binary size.
>
>There are at least two formats the ELF one can learn from: LLVM's compact unwind format (aarch64) and Windows ARM64 Frame Unwind Code.
>
>**Path forward**
>
>Unless SFrame can actually replace .eh_frame (rather than supplementing it as an accelerator for linux-perf) and demonstrate sizes smaller than .eh_frame - matching the efficiency of existing compact unwind approaches — I question its practical viability for userspace.
>The current design appears to add overhead rather than reduce it.
>This isn't to suggest we should simply adopt the existing compact unwind format wholesale.
>The x86-64 design dates back to 2009 or earlier, and there are likely improvements we can make. However, we should aim for similar or better efficiency gains.
>
>For additional context, I've documented my detailed analysis at:
>
>- https://maskray.me/blog/2025-09-28-remarks-on-sframe (covering mandatory index building problems, section group compliance and garbage collection issues, and version compatibility challenges)
>- https://maskray.me/blog/2025-10-26-stack-walking-space-and-time-trade-offs (size analysis)
>
>Best regards,
>Fangrui
>
>[1]: https://github.com/llvm/llvm-project/commit/58e2d3d856b7dc7b97a18cfa2aeeb927bc7e6bd5 ("Generate compact unwind encoding from CFI directives.")
>
tl;dr I believe a compact unwind scheme demonstrates significant promise over SFrame.
The MIPS compact exception tables as implemented in Binutils is also
worth considering (the structure can be shared among all architectures
while unwind code has to be arch-specific)
I've ported the Mach-O compact unwind format to ELF in a branch, establishing a baseline for improvements to the compact unwind format.
```
% ~/Dev/object-file-size-analyzer/section_size.rb /tmp/out/custom-{fp,sframe,compact,fp-gcc,sframe-gcc}/bin/{llvm-mc,opt}
Filename | .text size | EH size | .sframe size | VM size | VM increase
---------------------------------------+------------------+----------------+----------------+----------+------------
/tmp/out/custom-fp/bin/llvm-mc | 2120895 (23.5%) | 301528 (3.3%) | 0 (0.0%) | 9043221 | -
/tmp/out/custom-sframe/bin/llvm-mc | 2109231 (22.3%) | 367424 (3.9%) | 348041 (3.7%) | 9474085 | +4.8%
/tmp/out/custom-compact/bin/llvm-mc | 2109519 (24.4%) | 106288 (1.2%) | 0 (0.0%) | 8639637 | -4.5%
/tmp/out/custom-fp-gcc/bin/llvm-mc | 2744214 (29.2%) | 301836 (3.2%) | 0 (0.0%) | 9389677 | +3.8%
/tmp/out/custom-sframe-gcc/bin/llvm-mc | 2705860 (27.7%) | 354292 (3.6%) | 356073 (3.6%) | 9780985 | +8.2%
/tmp/out/custom-fp/bin/opt | 38769545 (69.9%) | 3547688 (6.4%) | 0 (0.0%) | 55425217 | -
/tmp/out/custom-sframe/bin/opt | 38891295 (62.4%) | 4559644 (7.3%) | 4448874 (7.1%) | 62292133 | +12.4%
/tmp/out/custom-compact/bin/opt | 38898415 (74.8%) | 1200764 (2.3%) | 0 (0.0%) | 52020449 | -6.1%
/tmp/out/custom-fp-gcc/bin/opt | 54654215 (78.1%) | 3631196 (5.2%) | 0 (0.0%) | 70001373 | +26.3%
/tmp/out/custom-sframe-gcc/bin/opt | 53644895 (70.4%) | 4857364 (6.4%) | 5263676 (6.9%) | 76206149 | +37.5%
```
**Evaluation results**
With the current implementation, 4937 out of 77648 FDEs (6.36%) require a DWARF escape, while the remaining FDEs can be replaced with unwind descriptors, yielding a huge size saving.
.eh_frame_hdr will become significantly smaller if we implement a two-level page table structure similar to Mach-O __unwind_info to deduplicate entries.
**Build configurations**
```
#!/bin/zsh
conf() {
configure-llvm $1 -DCMAKE_EXE_LINKER_FLAGS='-fuse-ld=bfd -pie -Wl,-z,pack-relative-relocs' \
-DCMAKE_SHARED_LINKER_FLAGS=-fuse-ld=bfd -DLLVM_ENABLE_UNWIND_TABLES=on -DLLVM_ENABLE_LLD=off ${@:2}
}
clang=(-DCMAKE_CXX_COMPILER=/tmp/Rel/bin/clang++ -DCMAKE_C_COMPILER=/tmp/Rel/bin/clang)
gcc=("-DCMAKE_C_COMPILER=$HOME/opt/gcc-15/bin/gcc" "-DCMAKE_CXX_COMPILER=$HOME/opt/gcc-15/bin/g++")
compact="-fomit-frame-pointer -momit-leaf-frame-pointer -B$HOME/opt/binutils/bin -mllvm -elf-compact-unwind -mllvm -x86-epilog-cfi=0"
fp="-fno-omit-frame-pointer -momit-leaf-frame-pointer -B$HOME/opt/binutils/bin -Wa,--gsframe=no"
sframe="-fomit-frame-pointer -momit-leaf-frame-pointer -B$HOME/opt/binutils/bin -Wa,--gsframe"
conf custom-compact -DCMAKE_{C,CXX}_FLAGS="$compact" ${clang[@]} \
-DCMAKE_EXE_LINKER_FLAGS='-fuse-ld=lld -pie -Wl,-z,pack-relative-relocs' \
-DCMAKE_SHARED_LINKER_FLAGS=-fuse-ld=lld
conf custom-fp -DCMAKE_{C,CXX}_FLAGS="-fno-integrated-as $fp" ${clang[@]}
conf custom-sframe -DCMAKE_{C,CXX}_FLAGS="-fno-integrated-as $sframe" ${clang[@]}
conf custom-fp-gcc -DCMAKE_{C,CXX}_FLAGS="$fp" ${gcc[@]}
conf custom-sframe-gcc -DCMAKE_{C,CXX}_FLAGS="$sframe" ${gcc[@]}
for i in compact fp sframe fp-gcc sframe-gcc; do ninja -C /tmp/out/custom-$i llvm-mc opt; done
```
The `/tmp/out/custom-compact` build uses my llvm-project branch
(<http://github.com/MaskRay/llvm-project/tree/demo-unwind>) that ports
Mach-O compact unwind to ELF, allowing the majority of `.eh_frame` FDEs
to replace CFI instructions with unwind descriptors.
-mllvm -x86-epilog-cfi=0: Disables epilogue CFI for x86 (primarily
implemented by D42848 in 2018, notably disabled for Darwin and Windows).
Without this option most frames will not utilize unwind descriptors
because the current Mach-O compact unwind implementation does not
support popq %rbp; .cfi_def_cfa %rsp, 8; ret. I believe this is still
fair as we expect to use a 8-byte descriptor, sufficient to describe
epilogue CFI.
If you still think custom-compact using -x86-epilog-cfi is not entirely
fair to other builds, this is the table using -fno-asynchronous-unwind-tables -funwind-tables -mllvm -x86-epilog-cfi=0
for all builds:
% ~/Dev/object-file-size-analyzer/section_size.rb /tmp/out/custom-{fp-sync,sframe-sync,compact-sync}/bin/{llvm-mc,opt}
Filename | .text size | EH size | .sframe size | VM size | VM increase
-----------------------------------------+------------------+----------------+----------------+----------+------------
/tmp/out/custom-fp-sync/bin/llvm-mc | 2120895 (24.1%) | 263396 (3.0%) | 0 (0.0%) | 8802093 | -
/tmp/out/custom-sframe-sync/bin/llvm-mc | 2109231 (23.2%) | 291084 (3.2%) | 248654 (2.7%) | 9090325 | +3.3%
/tmp/out/custom-compact-sync/bin/llvm-mc | 2109519 (24.4%) | 106288 (1.2%) | 0 (0.0%) | 8639637 | -1.8%
/tmp/out/custom-fp-sync/bin/opt | 38769545 (72.2%) | 2997572 (5.6%) | 0 (0.0%) | 53706041 | -
/tmp/out/custom-sframe-sync/bin/opt | 38891295 (66.9%) | 3425116 (5.9%) | 2951292 (5.1%) | 58091421 | +8.2%
/tmp/out/custom-compact-sync/bin/opt | 38898415 (74.8%) | 1200764 (2.3%) | 0 (0.0%) | 52020449 | -3.1%
---
After I had implemented this, I then investigated the MIPS compact
exception tables. I can now finalize the ‘in construction’ chapter of my
blog post,
https://maskray.me/blog/2020-11-08-stack-unwinding#mips-compact-exception-tables
Designed around 2015, it is actually a very good format.
Compiler output. The directive .cfi_sections .eh_frame_entry instructs
the assembler to emit index table entries to the .eh_frame_entry
section. .cfi_fde_data opcode1, ... betweens a pair of .cfi_startproc
and .cfi_endproc describes the frame unwind opcodes where each opcode
takes one byte. The frame unwind opcodes describes the semantics of
prologue instructions, similar to Windows ARM64 Frame Unwind Codes.
Assembler processing. The assembler generates a .eh_frame_entry.* section for each section with compact unwind information.
Each .eh_frame_entry is a pair of 4 bytes, where the first word is like the first word in a .eh_frame_hdr entry.
An .eh_frame_entry entry takes one of three forms:
Inline compact: (even pc, unwind_data). This form can be used when there are at most 3 opcodes (3 bytes) and no personality routine.
Out-of-line compact: (odd pc, even unwind_ptr) where unwind_ptr points to unwind data in the .gnu_extab section.
Legacy: (odd pc, odd legacy_unwind_ptr) where legacy_unwind_ptr points to the legacy .eh_frame section.
TODO: Describe .cfi_inline_lsda, which appears related to __gnu_compact_pr[1-3].
Linker processing. GNU ld concatenates .eh_frame_entry and .eh_frame_entry.* sections, sorting them by address.
The following internal linker script fragment adds a header before the entries:
.eh_frame_hdr : { *(.eh_frame_hdr) *(.eh_frame_entry .eh_frame_entry.*) }
Although the section name remains the traditional .eh_frame_hdr, the version is set to 2.
The linker also defines the symbol __GNU_EH_FRAME_HDR to hold the .eh_frame_hdr address.
---
I've studied numerous stack unwinding/walking formats. DWARF CFI is
essential to achieve near 100% coverage. Other formats, such as compact
unwind formats and SFrame, have limitations. The ideal future solution,
as an alternative to frame pointer chains, will be a stack unwinding
format that supports C++ exceptions and can use DWARF CFI as a fallback.
^ permalink raw reply
* Re: [PATCH v2 1/6] unwind: build kernel with sframe info
From: Jens Remus @ 2025-11-19 14:59 UTC (permalink / raw)
To: Dylan Hatch, Josh Poimboeuf, Steven Rostedt, Indu Bhagat,
Peter Zijlstra, Will Deacon, Catalin Marinas, Jiri Kosina
Cc: Roman Gushchin, Weinan Liu, Mark Rutland, Ian Rogers,
linux-toolchains, linux-kernel, live-patching, joe.lawrence,
Puranjay Mohan, Song Liu, Prasanna Kumar T S M, Heiko Carstens,
Vasily Gorbik
In-Reply-To: <20250904223850.884188-2-dylanbhatch@google.com>
Hello Dylan!
On 9/5/2025 12:38 AM, Dylan Hatch wrote:
> Use the -Wa,--gsframe flags to build the code, so GAS will generate
> a new .sframe section for the stack trace information.
> Currently, the sframe format only supports arm64 and x86_64
> architectures. Add this configuration on arm64 to enable sframe
> unwinder in the future.
>
> Signed-off-by: Weinan Liu <wnliu@google.com>
> Signed-off-by: Dylan Hatch <dylanbhatch@google.com>
> Reviewed-by: Prasanna Kumar T S M <ptsm@linux.microsoft.com>
> diff --git a/arch/Kconfig b/arch/Kconfig
> @@ -1782,4 +1782,10 @@ config ARCH_WANTS_PRE_LINK_VMLINUX
> config ARCH_HAS_CPU_ATTACK_VECTORS
> bool
>
> +config AS_SFRAME
> + def_bool $(as-instr,.cfi_sections .sframe\n.cfi_startproc\n.cfi_endproc)
As you will soon be requiring SFrame V2 with the new PC-relative FDE
function start address encoding you may want to extend this check as
follows:
config AS_SFRAME
def_bool y
depends on $(as-instr,.cfi_sections .sframe\n.cfi_startproc\n.cfi_endproc)
depends on $(success,printf "%b\n" ".cfi_sections .sframe\n.cfi_startproc\n.cfi_endproc" | $(CC) $(CLANG_FLAGS) -Wa$(comma)--fatal-warnings -c -x assembler-with-cpp -o "$$TMP" - && $(OBJDUMP) --sframe "$$TMP" | grep -q "SFRAME_VERSION_2")
depends on $(success,printf "%b\n" ".cfi_sections .sframe\n.cfi_startproc\n.cfi_endproc" | $(CC) $(CLANG_FLAGS) -Wa$(comma)--fatal-warnings -c -x assembler-with-cpp -o "$$TMP" - && $(OBJDUMP) --sframe "$$TMP" | grep -q "SFRAME_F_FDE_FUNC_START_PCREL")
Or you could change it into multiple config options, which might be
overkill:
config AS_SFRAME
def_bool $(as-instr,.cfi_sections .sframe\n.cfi_startproc\n.cfi_endproc)
config AS_SFRAME_V2
def_bool y
depends on AS_SFRAME
depends on $(success,printf "%b\n" ".cfi_sections .sframe\n.cfi_startproc\n.cfi_endproc" | $(CC) $(CLANG_FLAGS) -Wa$(comma)--fatal-warnings -c -x assembler-with-cpp -o "$$TMP" - && $(OBJDUMP) --sframe "$$TMP" | grep -q "SFRAME_VERSION_2")
config AS_SFRAME_V2_PCREL_FDE
def_bool y
depends on AS_SFRAME_V2
depends on $(success,printf "%b\n" ".cfi_sections .sframe\n.cfi_startproc\n.cfi_endproc" | $(CC) $(CLANG_FLAGS) -Wa$(comma)--fatal-warnings -c -x assembler-with-cpp -o "$$TMP" - && $(OBJDUMP) --sframe "$$TMP" | grep -q "SFRAME_F_FDE_FUNC_START_PCREL")
> +
> +config SFRAME_UNWIND_TABLE
> + bool
> +
> endmenu
Regards,
Jens
--
Jens Remus
Linux on Z Development (D3303)
+49-7031-16-1128 Office
jremus@de.ibm.com
IBM
IBM Deutschland Research & Development GmbH; Vorsitzender des Aufsichtsrats: Wolfgang Wendt; Geschäftsführung: David Faller; Sitz der Gesellschaft: Böblingen; Registergericht: Amtsgericht Stuttgart, HRB 243294
IBM Data Privacy Statement: https://www.ibm.com/privacy/
^ permalink raw reply
* Re: [PATCH v2 6/6] unwind: arm64: Add reliable stacktrace with sframe unwinder.
From: Indu Bhagat @ 2025-11-19 7:12 UTC (permalink / raw)
To: Dylan Hatch, Josh Poimboeuf
Cc: Steven Rostedt, Peter Zijlstra, Will Deacon, Catalin Marinas,
Jiri Kosina, Roman Gushchin, Weinan Liu, Mark Rutland, Ian Rogers,
linux-toolchains, linux-kernel, live-patching, joe.lawrence,
Puranjay Mohan, Song Liu, Prasanna Kumar T S M
In-Reply-To: <CADBMgpzmzyQgs4K3XoYf5h=C7vv-FDfNb5wharucyeoxUKo4bg@mail.gmail.com>
On 11/18/25 7:17 PM, Dylan Hatch wrote:
>> For sframe v3, I believe Indu is planning to add support for marking the
>> outermost frame. That would be one definitive way to know that the
>> stack trace made it to the end.
> How would this work? Is there a way of determining at compile time
> which functions would end up being the outermost frame?
No, the compiler does not emit such a marker.
SFrame information is generated by assembler using the .cfi_*
directives. For the outermost functions, they need to be marked with a:
.cfi_undefined RA
where RA is the default return address register for the ABI.
This mechanism is formalised in the DWARF standard:
"If a Return Address register is defined in the virtual unwind table,
and its rule is undefined (for example, by DW_CFA_undefined), then there
is no return address and no call address, and the virtual unwind of
stack activations is complete."
SFrame relies on this to emit a marker for identifying outermost frame.
^ permalink raw reply
* Re: [PATCH v2 6/6] unwind: arm64: Add reliable stacktrace with sframe unwinder.
From: Dylan Hatch @ 2025-11-19 3:17 UTC (permalink / raw)
To: Josh Poimboeuf
Cc: Steven Rostedt, Indu Bhagat, Peter Zijlstra, Will Deacon,
Catalin Marinas, Jiri Kosina, Roman Gushchin, Weinan Liu,
Mark Rutland, Ian Rogers, linux-toolchains, linux-kernel,
live-patching, joe.lawrence, Puranjay Mohan, Song Liu,
Prasanna Kumar T S M
In-Reply-To: <eo5fod6csuininieur2lm6bxunmpbk6n3wtxajamrwqqpae3ja@o3eqwfp3u6su>
On Mon, Nov 17, 2025 at 3:01 PM Josh Poimboeuf <jpoimboe@kernel.org> wrote:
>
> On Fri, Nov 14, 2025 at 10:44:20PM -0800, Dylan Hatch wrote:
> > Sorry for the slow reply on this, I'm going to try and get a v3 out
> > sometime after next week.
> >
> > On Wed, Sep 17, 2025 at 4:41 PM Josh Poimboeuf <jpoimboe@kernel.org> wrote:
> > >
> > > As far I can tell, the *only* error condition being checked is if it
> > > (successfully) fell back to frame pointers.
> >
> > By checking/handling error conditions, do you mean just marking the
> > state as unreliable in any case where the unwind isn't successful with
> > SFrame?
>
> Right, any sframe error it encounters along the way (including missing
> sframe) would be a reason to mark it as unreliable.
>
> > I'm thinking if I can make the unwind_next_frame_sframe() code
> > path handle the end of the stack correctly on its own, I can more
> > strictly mark the trace as unreliable if it encounters any error.
> >
> > >
> > > What if there was some bad or missing sframe data? Or some unexpected
> > > condition on the stack?
> > >
> > > Also, does the exception handling code have correct cfi/sframe metadata?
> > >
> > > In order for it to be "reliable", we need to know the unwind reached the
> > > end of the stack (e.g., the task pt_regs frame, from entry-from-user).
> >
> > It looks like the frame-pointer based method of handling the end of
> > the stack involves calling kunwind_next_frame_record_meta() to extract
> > and check frame_record_meta::type for FRAME_META_TYPE_FINAL. I think
> > this currently assumes (based on the definition of 'struct
> > frame_record') that the next FP and PC are right next to each other,
> > alongside the meta type. But the sframe format stores separate entries
> > for the FP and RA offsets, which makes extracting the meta type from
> > this information a little bit murky to me.
> >
> > Would it make sense to fall back to the frame pointer method for the
> > final stack frame? Or I guess I could define a new sframe-friendly
> > meta frame record format?
>
> For sframe v3, I believe Indu is planning to add support for marking the
> outermost frame. That would be one definitive way to know that the
> stack trace made it to the end.
How would this work? Is there a way of determining at compile time
which functions would end up being the outermost frame?
>
> Or, if the entry-from-user pt_regs frame is always stored at a certain
> offset compared to the end of the task stack page, that might be another
> way.
It looks like kunwind_next_frame_record_meta() uses this strategy
already. It checks that 'fp == &task_pt_regs(tsk)->stackframe' to
validate that it has in fact reached the end of the stack. It seems
like we need alternate versions of kunwind_next_frame_record_meta()
and kunwind_next_regs_pc() that use the CFA calculated from the sframe
data (instead of the frame pointer). Does that sound right?
Thanks,
Dylan
On Mon, Nov 17, 2025 at 3:01 PM Josh Poimboeuf <jpoimboe@kernel.org> wrote:
>
> On Fri, Nov 14, 2025 at 10:44:20PM -0800, Dylan Hatch wrote:
> > Sorry for the slow reply on this, I'm going to try and get a v3 out
> > sometime after next week.
> >
> > On Wed, Sep 17, 2025 at 4:41 PM Josh Poimboeuf <jpoimboe@kernel.org> wrote:
> > >
> > > As far I can tell, the *only* error condition being checked is if it
> > > (successfully) fell back to frame pointers.
> >
> > By checking/handling error conditions, do you mean just marking the
> > state as unreliable in any case where the unwind isn't successful with
> > SFrame?
>
> Right, any sframe error it encounters along the way (including missing
> sframe) would be a reason to mark it as unreliable.
>
> > I'm thinking if I can make the unwind_next_frame_sframe() code
> > path handle the end of the stack correctly on its own, I can more
> > strictly mark the trace as unreliable if it encounters any error.
> >
> > >
> > > What if there was some bad or missing sframe data? Or some unexpected
> > > condition on the stack?
> > >
> > > Also, does the exception handling code have correct cfi/sframe metadata?
> > >
> > > In order for it to be "reliable", we need to know the unwind reached the
> > > end of the stack (e.g., the task pt_regs frame, from entry-from-user).
> >
> > It looks like the frame-pointer based method of handling the end of
> > the stack involves calling kunwind_next_frame_record_meta() to extract
> > and check frame_record_meta::type for FRAME_META_TYPE_FINAL. I think
> > this currently assumes (based on the definition of 'struct
> > frame_record') that the next FP and PC are right next to each other,
> > alongside the meta type. But the sframe format stores separate entries
> > for the FP and RA offsets, which makes extracting the meta type from
> > this information a little bit murky to me.
> >
> > Would it make sense to fall back to the frame pointer method for the
> > final stack frame? Or I guess I could define a new sframe-friendly
> > meta frame record format?
>
> For sframe v3, I believe Indu is planning to add support for marking the
> outermost frame. That would be one definitive way to know that the
> stack trace made it to the end.
>
> Or, if the entry-from-user pt_regs frame is always stored at a certain
> offset compared to the end of the task stack page, that might be another
> way.
>
> --
> Josh
^ permalink raw reply
* Re: [PATCH v2 0/6] unwind, arm64: add sframe unwinder for kernel
From: Indu Bhagat @ 2025-11-18 18:29 UTC (permalink / raw)
To: Puranjay Mohan, Josh Poimboeuf
Cc: Steven Rostedt, Dylan Hatch, Song Liu, Peter Zijlstra,
Will Deacon, Catalin Marinas, Jiri Kosina, Roman Gushchin,
Weinan Liu, Mark Rutland, Ian Rogers, linux-toolchains,
linux-kernel, live-patching, joe.lawrence, Puranjay Mohan
In-Reply-To: <CANk7y0hKH6vvWf3Lyc678uvF9YWStMzO-Sj8yb3sbS4=4dxC6Q@mail.gmail.com>
On 11/17/25 4:49 PM, Puranjay Mohan wrote:
> On Tue, Nov 18, 2025 at 1:10 AM Josh Poimboeuf <jpoimboe@kernel.org> wrote:
>>
>> On Mon, Nov 17, 2025 at 06:42:23PM -0500, Steven Rostedt wrote:
>>> On Mon, 17 Nov 2025 15:06:32 -0800
>>> Josh Poimboeuf <jpoimboe@kernel.org> wrote:
>>>
>>>> The ORC unwinder marks the unwind "unreliable" if it has to fall back to
>>>> frame pointers.
>>>>
>>>> But that's not a problem for livepatch because it only[*] unwinds
>>>> blocked/sleeping tasks, which shouldn't have BPF on their stack anyway.
>>>>
>>>> [*] with one exception: the task calling into livepatch
>>>
>>> It may be a problem with preempted tasks right? I believe with PREEMPT_LAZY
>>> (and definitely with PREEMPT_RT) BPF programs can be preempted.
>>
>> In that case, then yes, that stack would be marked unreliable and
>> livepatch would have to go try and patch the task later.
>>
>> If it were an isolated case, that would be fine, but if BPF were
>> consistently on the same task's stack, it could stall the completion of
>> the livepatch indefinitely.
>>
>> I haven't (yet?) heard of BPF-induced livepatch stalls happening in
>> reality, but maybe it's only a matter of time :-/
>>
>> To fix that, I suppose we would need some kind of dynamic ORC
>> registration interface. Similar to what has been discussed with
>> sframe+JIT.
>
> I work with the BPF JITs and would be interested in exploring this further,
> can you point me to this discussion if it happened on the list.
>
We discussed SFrame/JIT topic earlier this year in our monthly SFrame
meetings. I can point you to the meeting notes in a separate email. We
had some discussion around:
- SFrame specification: Allow efficient addition, removal and update
of data in SFrame sections. A part of the challenge is in representing
the variety of frames a JIT may use.
- SFrame APIs with JIT: Efficient SFrame stack trace data
manipulation by JIT.
- Interface with Linux kernel: Efficient SFrame stack trace data
registration and update stack trace data.
It will be great to have more collaboration and brainstorming, and to
include BPF/JIT in the discussions.
>>
>> If BPF were to always use frame pointers then there would be only a very
>> limited set of ORC entries (either "frame pointer" or "undefined") for a
>> given BPF function and it shouldn't be too complicated.
>>
>> --
>> Josh
^ permalink raw reply
* Re: [PATCH v2 0/6] unwind, arm64: add sframe unwinder for kernel
From: Steven Rostedt @ 2025-11-18 18:20 UTC (permalink / raw)
To: Josh Poimboeuf
Cc: Puranjay Mohan, Dylan Hatch, Song Liu, Indu Bhagat,
Peter Zijlstra, Will Deacon, Catalin Marinas, Jiri Kosina,
Roman Gushchin, Weinan Liu, Mark Rutland, Ian Rogers,
linux-toolchains, linux-kernel, live-patching, joe.lawrence,
Puranjay Mohan
In-Reply-To: <bcxe5hhenqdodutgp7vd7b7aqn7emrlsezlu7stjjmfxgwc3gw@q3ggnid7ooyd>
On Mon, 17 Nov 2025 21:18:41 -0800
Josh Poimboeuf <jpoimboe@kernel.org> wrote:
> > > To fix that, I suppose we would need some kind of dynamic ORC
> > > registration interface. Similar to what has been discussed with
> > > sframe+JIT.
> >
> > I work with the BPF JITs and would be interested in exploring this further,
> > can you point me to this discussion if it happened on the list.
>
> Sorry, nothing specific has been discussed that I'm aware of :-)
Right, the only discussions have been at the monthly Sframe meetings about
needing to be able to handle this. But the actual implementation details
have not been figured out yet.
-- Steve
^ permalink raw reply
* Re: [PATCH 2/4] media: atomisp: Fix startup() section placement with -ffunction-sections
From: Peter Zijlstra @ 2025-11-18 8:57 UTC (permalink / raw)
To: Josh Poimboeuf
Cc: x86, linux-kernel, live-patching, Hans de Goede,
Mauro Carvalho Chehab, linux-toolchains
In-Reply-To: <2a3d4d7fco4szxyrw33lorkhckjq4styfsaljxxwd3v4o42i5z@qdavomj5i2mu>
On Fri, Nov 14, 2025 at 12:43:10PM -0800, Josh Poimboeuf wrote:
> On Fri, Nov 14, 2025 at 09:56:57AM +0100, Peter Zijlstra wrote:
> > On Wed, Nov 12, 2025 at 03:47:49PM -0800, Josh Poimboeuf wrote:
> > > When compiling the kernel with -ffunction-sections (e.g., for LTO,
> > > livepatch, dead code elimination, AutoFDO, or Propeller), the startup()
> > > function gets compiled into the .text.startup section. In some cases it
> > > can even be cloned into .text.startup.constprop.0 or
> > > .text.startup.isra.0.
> > >
> > > However, the .text.startup and .text.startup.* section names are already
> > > reserved for use by the compiler for __attribute__((constructor)) code.
> > >
> >
> > Urgh, that's a 'fun' one. Is this not a -ffunction-sections bug? I mean,
> > the compiler should never put regular non-reserved user symbols in a
> > section it has reserved for itself, right?
>
> Right, so there's no ambiguity *IF* we know in advance whether it was
> compiled with -ffunction-sections. If so, constructor code goes in
> .text.startup.*, and startup() goes in .text.startup or
> .text.startup.constprop.0 or .text.startup.isra.0.
>
> So it's not really a compiler bug because it's possible to disambiguate
> those.
>
> Problem is, we can have some objects compiled with -ffunction-sections,
> and some compiled without, in the same kernel. So the disambiguation
> isn't possible at link time, since for example .text.startup could be
> startup() with -ffunction-sections, or it could be
> __attribute__((constructor)) without -ffunction-sections.
>
> I attempted to describe all that in patch 4.
Egads, what a mess :-(
^ permalink raw reply
* Re: [PATCH v2 0/6] unwind, arm64: add sframe unwinder for kernel
From: Josh Poimboeuf @ 2025-11-18 5:18 UTC (permalink / raw)
To: Puranjay Mohan
Cc: Steven Rostedt, Dylan Hatch, Song Liu, Indu Bhagat,
Peter Zijlstra, Will Deacon, Catalin Marinas, Jiri Kosina,
Roman Gushchin, Weinan Liu, Mark Rutland, Ian Rogers,
linux-toolchains, linux-kernel, live-patching, joe.lawrence,
Puranjay Mohan
In-Reply-To: <CANk7y0hKH6vvWf3Lyc678uvF9YWStMzO-Sj8yb3sbS4=4dxC6Q@mail.gmail.com>
On Tue, Nov 18, 2025 at 01:49:06AM +0100, Puranjay Mohan wrote:
> On Tue, Nov 18, 2025 at 1:10 AM Josh Poimboeuf <jpoimboe@kernel.org> wrote:
> >
> > On Mon, Nov 17, 2025 at 06:42:23PM -0500, Steven Rostedt wrote:
> > > On Mon, 17 Nov 2025 15:06:32 -0800
> > > Josh Poimboeuf <jpoimboe@kernel.org> wrote:
> > >
> > > > The ORC unwinder marks the unwind "unreliable" if it has to fall back to
> > > > frame pointers.
> > > >
> > > > But that's not a problem for livepatch because it only[*] unwinds
> > > > blocked/sleeping tasks, which shouldn't have BPF on their stack anyway.
> > > >
> > > > [*] with one exception: the task calling into livepatch
> > >
> > > It may be a problem with preempted tasks right? I believe with PREEMPT_LAZY
> > > (and definitely with PREEMPT_RT) BPF programs can be preempted.
> >
> > In that case, then yes, that stack would be marked unreliable and
> > livepatch would have to go try and patch the task later.
> >
> > If it were an isolated case, that would be fine, but if BPF were
> > consistently on the same task's stack, it could stall the completion of
> > the livepatch indefinitely.
> >
> > I haven't (yet?) heard of BPF-induced livepatch stalls happening in
> > reality, but maybe it's only a matter of time :-/
> >
> > To fix that, I suppose we would need some kind of dynamic ORC
> > registration interface. Similar to what has been discussed with
> > sframe+JIT.
>
> I work with the BPF JITs and would be interested in exploring this further,
> can you point me to this discussion if it happened on the list.
Sorry, nothing specific has been discussed that I'm aware of :-)
--
Josh
^ permalink raw reply
* Re: [PATCH v2 0/6] unwind, arm64: add sframe unwinder for kernel
From: Puranjay Mohan @ 2025-11-18 0:49 UTC (permalink / raw)
To: Josh Poimboeuf
Cc: Steven Rostedt, Dylan Hatch, Song Liu, Indu Bhagat,
Peter Zijlstra, Will Deacon, Catalin Marinas, Jiri Kosina,
Roman Gushchin, Weinan Liu, Mark Rutland, Ian Rogers,
linux-toolchains, linux-kernel, live-patching, joe.lawrence,
Puranjay Mohan
In-Reply-To: <cxxj6lzs226ost6js5vslm52bxblknjwd6llmu24h3bk742zjh@7iwwi5bafysq>
On Tue, Nov 18, 2025 at 1:10 AM Josh Poimboeuf <jpoimboe@kernel.org> wrote:
>
> On Mon, Nov 17, 2025 at 06:42:23PM -0500, Steven Rostedt wrote:
> > On Mon, 17 Nov 2025 15:06:32 -0800
> > Josh Poimboeuf <jpoimboe@kernel.org> wrote:
> >
> > > The ORC unwinder marks the unwind "unreliable" if it has to fall back to
> > > frame pointers.
> > >
> > > But that's not a problem for livepatch because it only[*] unwinds
> > > blocked/sleeping tasks, which shouldn't have BPF on their stack anyway.
> > >
> > > [*] with one exception: the task calling into livepatch
> >
> > It may be a problem with preempted tasks right? I believe with PREEMPT_LAZY
> > (and definitely with PREEMPT_RT) BPF programs can be preempted.
>
> In that case, then yes, that stack would be marked unreliable and
> livepatch would have to go try and patch the task later.
>
> If it were an isolated case, that would be fine, but if BPF were
> consistently on the same task's stack, it could stall the completion of
> the livepatch indefinitely.
>
> I haven't (yet?) heard of BPF-induced livepatch stalls happening in
> reality, but maybe it's only a matter of time :-/
>
> To fix that, I suppose we would need some kind of dynamic ORC
> registration interface. Similar to what has been discussed with
> sframe+JIT.
I work with the BPF JITs and would be interested in exploring this further,
can you point me to this discussion if it happened on the list.
>
> If BPF were to always use frame pointers then there would be only a very
> limited set of ORC entries (either "frame pointer" or "undefined") for a
> given BPF function and it shouldn't be too complicated.
>
> --
> Josh
^ permalink raw reply
* Re: [PATCH v2 0/6] unwind, arm64: add sframe unwinder for kernel
From: Josh Poimboeuf @ 2025-11-18 0:10 UTC (permalink / raw)
To: Steven Rostedt
Cc: Dylan Hatch, Puranjay Mohan, Song Liu, Indu Bhagat,
Peter Zijlstra, Will Deacon, Catalin Marinas, Jiri Kosina,
Roman Gushchin, Weinan Liu, Mark Rutland, Ian Rogers,
linux-toolchains, linux-kernel, live-patching, joe.lawrence,
Puranjay Mohan
In-Reply-To: <20251117184223.3c03fe92@gandalf.local.home>
On Mon, Nov 17, 2025 at 06:42:23PM -0500, Steven Rostedt wrote:
> On Mon, 17 Nov 2025 15:06:32 -0800
> Josh Poimboeuf <jpoimboe@kernel.org> wrote:
>
> > The ORC unwinder marks the unwind "unreliable" if it has to fall back to
> > frame pointers.
> >
> > But that's not a problem for livepatch because it only[*] unwinds
> > blocked/sleeping tasks, which shouldn't have BPF on their stack anyway.
> >
> > [*] with one exception: the task calling into livepatch
>
> It may be a problem with preempted tasks right? I believe with PREEMPT_LAZY
> (and definitely with PREEMPT_RT) BPF programs can be preempted.
In that case, then yes, that stack would be marked unreliable and
livepatch would have to go try and patch the task later.
If it were an isolated case, that would be fine, but if BPF were
consistently on the same task's stack, it could stall the completion of
the livepatch indefinitely.
I haven't (yet?) heard of BPF-induced livepatch stalls happening in
reality, but maybe it's only a matter of time :-/
To fix that, I suppose we would need some kind of dynamic ORC
registration interface. Similar to what has been discussed with
sframe+JIT.
If BPF were to always use frame pointers then there would be only a very
limited set of ORC entries (either "frame pointer" or "undefined") for a
given BPF function and it shouldn't be too complicated.
--
Josh
^ permalink raw reply
* Re: [PATCH v2 0/6] unwind, arm64: add sframe unwinder for kernel
From: Puranjay Mohan @ 2025-11-17 23:50 UTC (permalink / raw)
To: Josh Poimboeuf
Cc: Dylan Hatch, Song Liu, Steven Rostedt, Indu Bhagat,
Peter Zijlstra, Will Deacon, Catalin Marinas, Jiri Kosina,
Roman Gushchin, Weinan Liu, Mark Rutland, Ian Rogers,
linux-toolchains, linux-kernel, live-patching, joe.lawrence,
Puranjay Mohan
In-Reply-To: <nzmtsafrx5vjitgfpducjaa7kq747a3sler2vvyvfbxecutn3v@7ffl2ycnaoo2>
On Tue, Nov 18, 2025 at 12:06 AM Josh Poimboeuf <jpoimboe@kernel.org> wrote:
>
> On Fri, Nov 14, 2025 at 10:50:16PM -0800, Dylan Hatch wrote:
> > On Mon, Sep 29, 2025 at 12:55 PM Puranjay Mohan <puranjay12@gmail.com> wrote:
> > >
> > > I will try to debug this more but am just curious about BPF's
> > > interactions with sframe.
> > > The sframe data for bpf programs doesn't exist, so we would need to
> > > add that support
> > > and that wouldn't be trivial, given the BPF programs are JITed.
> > >
> > > Thanks,
> > > Puranjay
> >
> > From what I can tell, the ORC unwinder in x86 falls back to using
> > frame pointers in cases of generated code, like BPF. Would matching
> > this behavior in the sframe unwinder be a reasonable approach, at
> > least for the purposes of enabling reliable unwind for livepatch?
>
> The ORC unwinder marks the unwind "unreliable" if it has to fall back to
> frame pointers.
>
> But that's not a problem for livepatch because it only[*] unwinds
> blocked/sleeping tasks, which shouldn't have BPF on their stack anyway.
>
BPF programs can sleep, so wouldn't they show up in the stack?
Like if I am tracing a syscall with a bpf program attached using
fentry and the BPF program calls a bpf_arena_alloc_pages(), which can
sleep.
^ permalink raw reply
* Re: [PATCH v2 0/6] unwind, arm64: add sframe unwinder for kernel
From: Steven Rostedt @ 2025-11-17 23:42 UTC (permalink / raw)
To: Josh Poimboeuf
Cc: Dylan Hatch, Puranjay Mohan, Song Liu, Indu Bhagat,
Peter Zijlstra, Will Deacon, Catalin Marinas, Jiri Kosina,
Roman Gushchin, Weinan Liu, Mark Rutland, Ian Rogers,
linux-toolchains, linux-kernel, live-patching, joe.lawrence,
Puranjay Mohan
In-Reply-To: <nzmtsafrx5vjitgfpducjaa7kq747a3sler2vvyvfbxecutn3v@7ffl2ycnaoo2>
On Mon, 17 Nov 2025 15:06:32 -0800
Josh Poimboeuf <jpoimboe@kernel.org> wrote:
> The ORC unwinder marks the unwind "unreliable" if it has to fall back to
> frame pointers.
>
> But that's not a problem for livepatch because it only[*] unwinds
> blocked/sleeping tasks, which shouldn't have BPF on their stack anyway.
>
> [*] with one exception: the task calling into livepatch
It may be a problem with preempted tasks right? I believe with PREEMPT_LAZY
(and definitely with PREEMPT_RT) BPF programs can be preempted.
-- Steve
^ permalink raw reply
* Re: [PATCH v2 0/6] unwind, arm64: add sframe unwinder for kernel
From: Josh Poimboeuf @ 2025-11-17 23:06 UTC (permalink / raw)
To: Dylan Hatch
Cc: Puranjay Mohan, Song Liu, Steven Rostedt, Indu Bhagat,
Peter Zijlstra, Will Deacon, Catalin Marinas, Jiri Kosina,
Roman Gushchin, Weinan Liu, Mark Rutland, Ian Rogers,
linux-toolchains, linux-kernel, live-patching, joe.lawrence,
Puranjay Mohan
In-Reply-To: <CADBMgpwZ32+shSa0SwO8y4G-Zw14ae-FcoWreA_ptMf08Mu9dA@mail.gmail.com>
On Fri, Nov 14, 2025 at 10:50:16PM -0800, Dylan Hatch wrote:
> On Mon, Sep 29, 2025 at 12:55 PM Puranjay Mohan <puranjay12@gmail.com> wrote:
> >
> > I will try to debug this more but am just curious about BPF's
> > interactions with sframe.
> > The sframe data for bpf programs doesn't exist, so we would need to
> > add that support
> > and that wouldn't be trivial, given the BPF programs are JITed.
> >
> > Thanks,
> > Puranjay
>
> From what I can tell, the ORC unwinder in x86 falls back to using
> frame pointers in cases of generated code, like BPF. Would matching
> this behavior in the sframe unwinder be a reasonable approach, at
> least for the purposes of enabling reliable unwind for livepatch?
The ORC unwinder marks the unwind "unreliable" if it has to fall back to
frame pointers.
But that's not a problem for livepatch because it only[*] unwinds
blocked/sleeping tasks, which shouldn't have BPF on their stack anyway.
[*] with one exception: the task calling into livepatch
--
Josh
^ permalink raw reply
* Re: [PATCH v2 6/6] unwind: arm64: Add reliable stacktrace with sframe unwinder.
From: Josh Poimboeuf @ 2025-11-17 23:01 UTC (permalink / raw)
To: Dylan Hatch
Cc: Steven Rostedt, Indu Bhagat, Peter Zijlstra, Will Deacon,
Catalin Marinas, Jiri Kosina, Roman Gushchin, Weinan Liu,
Mark Rutland, Ian Rogers, linux-toolchains, linux-kernel,
live-patching, joe.lawrence, Puranjay Mohan, Song Liu,
Prasanna Kumar T S M
In-Reply-To: <CADBMgpyVis+fRHLOv6BRPrT+0r8846MOutkmOgMbqytLVXh9Ag@mail.gmail.com>
On Fri, Nov 14, 2025 at 10:44:20PM -0800, Dylan Hatch wrote:
> Sorry for the slow reply on this, I'm going to try and get a v3 out
> sometime after next week.
>
> On Wed, Sep 17, 2025 at 4:41 PM Josh Poimboeuf <jpoimboe@kernel.org> wrote:
> >
> > As far I can tell, the *only* error condition being checked is if it
> > (successfully) fell back to frame pointers.
>
> By checking/handling error conditions, do you mean just marking the
> state as unreliable in any case where the unwind isn't successful with
> SFrame?
Right, any sframe error it encounters along the way (including missing
sframe) would be a reason to mark it as unreliable.
> I'm thinking if I can make the unwind_next_frame_sframe() code
> path handle the end of the stack correctly on its own, I can more
> strictly mark the trace as unreliable if it encounters any error.
>
> >
> > What if there was some bad or missing sframe data? Or some unexpected
> > condition on the stack?
> >
> > Also, does the exception handling code have correct cfi/sframe metadata?
> >
> > In order for it to be "reliable", we need to know the unwind reached the
> > end of the stack (e.g., the task pt_regs frame, from entry-from-user).
>
> It looks like the frame-pointer based method of handling the end of
> the stack involves calling kunwind_next_frame_record_meta() to extract
> and check frame_record_meta::type for FRAME_META_TYPE_FINAL. I think
> this currently assumes (based on the definition of 'struct
> frame_record') that the next FP and PC are right next to each other,
> alongside the meta type. But the sframe format stores separate entries
> for the FP and RA offsets, which makes extracting the meta type from
> this information a little bit murky to me.
>
> Would it make sense to fall back to the frame pointer method for the
> final stack frame? Or I guess I could define a new sframe-friendly
> meta frame record format?
For sframe v3, I believe Indu is planning to add support for marking the
outermost frame. That would be one definitive way to know that the
stack trace made it to the end.
Or, if the entry-from-user pt_regs frame is always stored at a certain
offset compared to the end of the task stack page, that might be another
way.
--
Josh
^ permalink raw reply
* Re: [PATCH v2 0/6] unwind, arm64: add sframe unwinder for kernel
From: Dylan Hatch @ 2025-11-15 6:50 UTC (permalink / raw)
To: Puranjay Mohan
Cc: Song Liu, Josh Poimboeuf, Steven Rostedt, Indu Bhagat,
Peter Zijlstra, Will Deacon, Catalin Marinas, Jiri Kosina,
Roman Gushchin, Weinan Liu, Mark Rutland, Ian Rogers,
linux-toolchains, linux-kernel, live-patching, joe.lawrence,
Puranjay Mohan
In-Reply-To: <CANk7y0hUKOVXRKoJ5Ufmg-5DGSe2F5nBH+O7tLVvLRs9Oe54uA@mail.gmail.com>
On Mon, Sep 29, 2025 at 12:55 PM Puranjay Mohan <puranjay12@gmail.com> wrote:
>
> I will try to debug this more but am just curious about BPF's
> interactions with sframe.
> The sframe data for bpf programs doesn't exist, so we would need to
> add that support
> and that wouldn't be trivial, given the BPF programs are JITed.
>
> Thanks,
> Puranjay
From what I can tell, the ORC unwinder in x86 falls back to using
frame pointers in cases of generated code, like BPF. Would matching
this behavior in the sframe unwinder be a reasonable approach, at
least for the purposes of enabling reliable unwind for livepatch?
Thanks,
Dylan
^ permalink raw reply
* Re: [PATCH v2 6/6] unwind: arm64: Add reliable stacktrace with sframe unwinder.
From: Dylan Hatch @ 2025-11-15 6:44 UTC (permalink / raw)
To: Josh Poimboeuf
Cc: Steven Rostedt, Indu Bhagat, Peter Zijlstra, Will Deacon,
Catalin Marinas, Jiri Kosina, Roman Gushchin, Weinan Liu,
Mark Rutland, Ian Rogers, linux-toolchains, linux-kernel,
live-patching, joe.lawrence, Puranjay Mohan, Song Liu,
Prasanna Kumar T S M
In-Reply-To: <xo2ro446awhsd7i55shx6tlz6s2azuown4xk6zfm7ie4zz2nqc@244onpurkvy3>
Sorry for the slow reply on this, I'm going to try and get a v3 out
sometime after next week.
On Wed, Sep 17, 2025 at 4:41 PM Josh Poimboeuf <jpoimboe@kernel.org> wrote:
>
> As far I can tell, the *only* error condition being checked is if it
> (successfully) fell back to frame pointers.
By checking/handling error conditions, do you mean just marking the
state as unreliable in any case where the unwind isn't successful with
SFrame? I'm thinking if I can make the unwind_next_frame_sframe() code
path handle the end of the stack correctly on its own, I can more
strictly mark the trace as unreliable if it encounters any error.
>
> What if there was some bad or missing sframe data? Or some unexpected
> condition on the stack?
>
> Also, does the exception handling code have correct cfi/sframe metadata?
>
> In order for it to be "reliable", we need to know the unwind reached the
> end of the stack (e.g., the task pt_regs frame, from entry-from-user).
It looks like the frame-pointer based method of handling the end of
the stack involves calling kunwind_next_frame_record_meta() to extract
and check frame_record_meta::type for FRAME_META_TYPE_FINAL. I think
this currently assumes (based on the definition of 'struct
frame_record') that the next FP and PC are right next to each other,
alongside the meta type. But the sframe format stores separate entries
for the FP and RA offsets, which makes extracting the meta type from
this information a little bit murky to me.
Would it make sense to fall back to the frame pointer method for the
final stack frame? Or I guess I could define a new sframe-friendly
meta frame record format?
Thanks,
Dylan
^ permalink raw reply
* Re: [PATCH 2/4] media: atomisp: Fix startup() section placement with -ffunction-sections
From: Josh Poimboeuf @ 2025-11-14 20:43 UTC (permalink / raw)
To: Peter Zijlstra
Cc: x86, linux-kernel, live-patching, Hans de Goede,
Mauro Carvalho Chehab, linux-toolchains
In-Reply-To: <20251114085657.GR278048@noisy.programming.kicks-ass.net>
On Fri, Nov 14, 2025 at 09:56:57AM +0100, Peter Zijlstra wrote:
> On Wed, Nov 12, 2025 at 03:47:49PM -0800, Josh Poimboeuf wrote:
> > When compiling the kernel with -ffunction-sections (e.g., for LTO,
> > livepatch, dead code elimination, AutoFDO, or Propeller), the startup()
> > function gets compiled into the .text.startup section. In some cases it
> > can even be cloned into .text.startup.constprop.0 or
> > .text.startup.isra.0.
> >
> > However, the .text.startup and .text.startup.* section names are already
> > reserved for use by the compiler for __attribute__((constructor)) code.
> >
>
> Urgh, that's a 'fun' one. Is this not a -ffunction-sections bug? I mean,
> the compiler should never put regular non-reserved user symbols in a
> section it has reserved for itself, right?
Right, so there's no ambiguity *IF* we know in advance whether it was
compiled with -ffunction-sections. If so, constructor code goes in
.text.startup.*, and startup() goes in .text.startup or
.text.startup.constprop.0 or .text.startup.isra.0.
So it's not really a compiler bug because it's possible to disambiguate
those.
Problem is, we can have some objects compiled with -ffunction-sections,
and some compiled without, in the same kernel. So the disambiguation
isn't possible at link time, since for example .text.startup could be
startup() with -ffunction-sections, or it could be
__attribute__((constructor)) without -ffunction-sections.
I attempted to describe all that in patch 4.
--
Josh
^ permalink raw reply
* Re: [PATCH v2 1/6] unwind: build kernel with sframe info
From: Will Deacon @ 2025-11-14 13:34 UTC (permalink / raw)
To: Dylan Hatch
Cc: Josh Poimboeuf, Steven Rostedt, Indu Bhagat, Peter Zijlstra,
Catalin Marinas, Jiri Kosina, Roman Gushchin, Weinan Liu,
Mark Rutland, Ian Rogers, linux-toolchains, linux-kernel,
live-patching, joe.lawrence, Puranjay Mohan, Song Liu,
Prasanna Kumar T S M
In-Reply-To: <20250904223850.884188-2-dylanbhatch@google.com>
On Thu, Sep 04, 2025 at 10:38:45PM +0000, Dylan Hatch wrote:
> Use the -Wa,--gsframe flags to build the code, so GAS will generate
> a new .sframe section for the stack trace information.
> Currently, the sframe format only supports arm64 and x86_64
> architectures. Add this configuration on arm64 to enable sframe
> unwinder in the future.
>
> Signed-off-by: Weinan Liu <wnliu@google.com>
> Signed-off-by: Dylan Hatch <dylanbhatch@google.com>
> Reviewed-by: Prasanna Kumar T S M <ptsm@linux.microsoft.com>
> ---
> Makefile | 8 ++++++++
> arch/Kconfig | 6 ++++++
> arch/arm64/Kconfig.debug | 10 ++++++++++
> arch/arm64/kernel/vdso/Makefile | 2 +-
> include/asm-generic/vmlinux.lds.h | 15 +++++++++++++++
> 5 files changed, 40 insertions(+), 1 deletion(-)
>
> diff --git a/Makefile b/Makefile
> index b9c661913250..09972c71a3e8 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -1078,6 +1078,14 @@ endif
> # Ensure compilers do not transform certain loops into calls to wcslen()
> KBUILD_CFLAGS += -fno-builtin-wcslen
>
> +# build with sframe table
> +ifdef CONFIG_SFRAME_UNWIND_TABLE
> +CC_FLAGS_SFRAME := -Wa,--gsframe
> +KBUILD_CFLAGS += $(CC_FLAGS_SFRAME)
> +KBUILD_AFLAGS += $(CC_FLAGS_SFRAME)
> +export CC_FLAGS_SFRAME
> +endif
> +
> # change __FILE__ to the relative path to the source directory
> ifdef building_out_of_srctree
> KBUILD_CPPFLAGS += $(call cc-option,-fmacro-prefix-map=$(srcroot)/=)
> diff --git a/arch/Kconfig b/arch/Kconfig
> index d1b4ffd6e085..4362d2f49d91 100644
> --- a/arch/Kconfig
> +++ b/arch/Kconfig
> @@ -1782,4 +1782,10 @@ config ARCH_WANTS_PRE_LINK_VMLINUX
> config ARCH_HAS_CPU_ATTACK_VECTORS
> bool
>
> +config AS_SFRAME
> + def_bool $(as-instr,.cfi_sections .sframe\n.cfi_startproc\n.cfi_endproc)
Is it possible to extend this check so that we reject assemblers that
emit the unsupported "sframe version one" format?
> +config SFRAME_UNWIND_TABLE
> + bool
Is this extra option actually needed for anything?
> endmenu
> diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug
> index 265c4461031f..d64bf58457de 100644
> --- a/arch/arm64/Kconfig.debug
> +++ b/arch/arm64/Kconfig.debug
> @@ -20,4 +20,14 @@ config ARM64_RELOC_TEST
> depends on m
> tristate "Relocation testing module"
>
> +config SFRAME_UNWINDER
> + bool "Sframe unwinder"
> + depends on AS_SFRAME
> + depends on 64BIT
Shouldn't there be an arch dependency here as well? Since architectures
need to make use of sframe in their unwinders, I was expecting something
like 'depends on ARCH_SUPPORTS_SFRAME_UNWINDER' here.
Will
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox