Live Patching

Live Patching
 help / color / mirror / Atom feed

* [PATCH v6 1/9] sframe: Allow kernelspace sframe sections
From: Dylan Hatch @ 2026-05-19  6:49 UTC (permalink / raw)
  To: Roman Gushchin, Weinan Liu, Will Deacon, Josh Poimboeuf,
	Indu Bhagat, Peter Zijlstra, Steven Rostedt, Catalin Marinas,
	Jiri Kosina, Mark Rutland, Jens Remus
  Cc: Dylan Hatch, Prasanna Kumar T S M, Puranjay Mohan, Song Liu,
	joe.lawrence, linux-toolchains, linux-kernel, live-patching,
	linux-arm-kernel, Randy Dunlap, Mostafa Saleh, Herbert Xu,
	David S. Miller
In-Reply-To: <20260519064950.493949-1-dylanbhatch@google.com>

Generalize the sframe lookup code to support kernelspace sections. This
is done by defining a SFRAME_LOOKUP option that can be activated
separate from HAVE_UNWIND_USER_SFRAME, as there will be other client to
this library than just userspace unwind.

Sframe section location is now tracked in a separate sec_type field to
determine whether user-access functions are necessary to read the sframe
data. Relevant type delarations are moved and renamed to reflect the
non-user sframe support.

Reviewed-by: Jens Remus <jremus@linux.ibm.com>
Signed-off-by: Dylan Hatch <dylanbhatch@google.com>
---
 MAINTAINERS                                   |   2 +-
 arch/Kconfig                                  |   4 +
 .../{unwind_user_sframe.h => unwind_sframe.h} |   6 +-
 arch/x86/include/asm/unwind_user.h            |  12 +-
 include/linux/sframe.h                        |  48 ++--
 include/linux/unwind_types.h                  |  46 +++
 include/linux/unwind_user_types.h             |  41 ---
 kernel/unwind/Makefile                        |   2 +-
 kernel/unwind/sframe.c                        | 270 ++++++++++++------
 kernel/unwind/user.c                          |  45 +--
 10 files changed, 295 insertions(+), 181 deletions(-)
 rename arch/x86/include/asm/{unwind_user_sframe.h => unwind_sframe.h} (50%)
 create mode 100644 include/linux/unwind_types.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 6812f581d44b..54613c683fdb 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -27858,7 +27858,7 @@ F:	Documentation/driver-api/uio-howto.rst
 F:	drivers/uio/
 F:	include/linux/uio_driver.h
 
-USERSPACE STACK UNWINDING
+STACK UNWINDING
 M:	Josh Poimboeuf <jpoimboe@kernel.org>
 M:	Steven Rostedt <rostedt@goodmis.org>
 S:	Maintained
diff --git a/arch/Kconfig b/arch/Kconfig
index 78dad97bf2a4..6eeafd86347b 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -486,6 +486,9 @@ config AS_SFRAME3
 	def_bool $(as-instr,.cfi_startproc\n.cfi_endproc,-Wa$(comma)--gsframe-3)
 	select AS_SFRAME
 
+config UNWIND_SFRAME_LOOKUP
+	bool
+
 config UNWIND_USER
 	bool
 
@@ -496,6 +499,7 @@ config HAVE_UNWIND_USER_FP
 config HAVE_UNWIND_USER_SFRAME
 	bool
 	select UNWIND_USER
+	select UNWIND_SFRAME_LOOKUP
 
 config SFRAME_VALIDATION
 	bool "Enable .sframe section debugging"
diff --git a/arch/x86/include/asm/unwind_user_sframe.h b/arch/x86/include/asm/unwind_sframe.h
similarity index 50%
rename from arch/x86/include/asm/unwind_user_sframe.h
rename to arch/x86/include/asm/unwind_sframe.h
index d828ae1a4aac..44d42e6ffde4 100644
--- a/arch/x86/include/asm/unwind_user_sframe.h
+++ b/arch/x86/include/asm/unwind_sframe.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_UNWIND_USER_SFRAME_H
-#define _ASM_X86_UNWIND_USER_SFRAME_H
+#ifndef _ASM_X86_UNWIND_SFRAME_H
+#define _ASM_X86_UNWIND_SFRAME_H
 
 #ifdef CONFIG_X86_64
 
@@ -9,4 +9,4 @@
 
 #endif
 
-#endif /* _ASM_X86_UNWIND_USER_SFRAME_H */
+#endif /* _ASM_X86_UNWIND_SFRAME_H */
diff --git a/arch/x86/include/asm/unwind_user.h b/arch/x86/include/asm/unwind_user.h
index b80f0ec0f7a7..1c7e31ca5d8e 100644
--- a/arch/x86/include/asm/unwind_user.h
+++ b/arch/x86/include/asm/unwind_user.h
@@ -54,30 +54,30 @@ static inline int unwind_user_get_reg(unsigned long *val, unsigned int regnum)
 
 #define ARCH_INIT_USER_FP_FRAME(ws)			\
 	.cfa		= {				\
-		.rule		= UNWIND_USER_CFA_RULE_FP_OFFSET,\
+		.rule		= UNWIND_CFA_RULE_FP_OFFSET,\
 		.offset		=  2*(ws),		\
 			},				\
 	.ra		= {				\
-		.rule		= UNWIND_USER_RULE_CFA_OFFSET_DEREF,\
+		.rule		= UNWIND_RULE_CFA_OFFSET_DEREF,\
 		.offset		= -1*(ws),		\
 			},				\
 	.fp		= {				\
-		.rule		= UNWIND_USER_RULE_CFA_OFFSET_DEREF,\
+		.rule		= UNWIND_RULE_CFA_OFFSET_DEREF,\
 		.offset		= -2*(ws),		\
 			},				\
 	.outermost	= false,
 
 #define ARCH_INIT_USER_FP_ENTRY_FRAME(ws)		\
 	.cfa		= {				\
-		.rule		= UNWIND_USER_CFA_RULE_SP_OFFSET,\
+		.rule		= UNWIND_CFA_RULE_SP_OFFSET,\
 		.offset		=  1*(ws),		\
 			},				\
 	.ra		= {				\
-		.rule		= UNWIND_USER_RULE_CFA_OFFSET_DEREF,\
+		.rule		= UNWIND_RULE_CFA_OFFSET_DEREF,\
 		.offset		= -1*(ws),		\
 			},				\
 	.fp		= {				\
-		.rule		= UNWIND_USER_RULE_RETAIN,\
+		.rule		= UNWIND_RULE_RETAIN,\
 			},				\
 	.outermost	= false,
 
diff --git a/include/linux/sframe.h b/include/linux/sframe.h
index b79c5ec09229..0cb2924367bc 100644
--- a/include/linux/sframe.h
+++ b/include/linux/sframe.h
@@ -3,37 +3,46 @@
 #define _LINUX_SFRAME_H
 
 #include <linux/mm_types.h>
+#include <linux/unwind_types.h>
 #include <linux/srcu.h>
-#include <linux/unwind_user_types.h>
 
-#ifdef CONFIG_HAVE_UNWIND_USER_SFRAME
+#ifdef CONFIG_UNWIND_SFRAME_LOOKUP
+
+enum sframe_sec_type {
+	SFRAME_KERNEL,
+	SFRAME_USER,
+};
 
 struct sframe_section {
-	struct rcu_head	rcu;
+	struct rcu_head  rcu;
 #ifdef CONFIG_DYNAMIC_DEBUG
-	const char	*filename;
+	const char		*filename;
 #endif
-	unsigned long	sframe_start;
-	unsigned long	sframe_end;
-	unsigned long	text_start;
-	unsigned long	text_end;
-
-	unsigned long	fdes_start;
-	unsigned long	fres_start;
-	unsigned long	fres_end;
-	unsigned int	num_fdes;
-
-	signed char	ra_off;
-	signed char	fp_off;
+	enum sframe_sec_type	sec_type;
+	unsigned long		sframe_start;
+	unsigned long		sframe_end;
+	unsigned long		text_start;
+	unsigned long		text_end;
+
+	unsigned long		fdes_start;
+	unsigned long		fres_start;
+	unsigned long		fres_end;
+	unsigned int		num_fdes;
+
+	signed char		ra_off;
+	signed char		fp_off;
 };
 
+#endif /* CONFIG_UNWIND_SFRAME_LOOKUP */
+
+#ifdef CONFIG_HAVE_UNWIND_USER_SFRAME
+
 #define INIT_MM_SFRAME .sframe_mt = MTREE_INIT(sframe_mt, 0),
 extern void sframe_free_mm(struct mm_struct *mm);
 
 extern int sframe_add_section(unsigned long sframe_start, unsigned long sframe_end,
 			      unsigned long text_start, unsigned long text_end);
 extern int sframe_remove_section(unsigned long sframe_addr);
-extern int sframe_find(unsigned long ip, struct unwind_user_frame *frame);
 
 static inline bool current_has_sframe(void)
 {
@@ -42,6 +51,8 @@ static inline bool current_has_sframe(void)
 	return mm && !mtree_empty(&mm->sframe_mt);
 }
 
+extern int sframe_find_user(unsigned long ip, struct unwind_frame *frame);
+
 #else /* !CONFIG_HAVE_UNWIND_USER_SFRAME */
 
 #define INIT_MM_SFRAME
@@ -52,9 +63,10 @@ static inline int sframe_add_section(unsigned long sframe_start, unsigned long s
 	return -ENOSYS;
 }
 static inline int sframe_remove_section(unsigned long sframe_addr) { return -ENOSYS; }
-static inline int sframe_find(unsigned long ip, struct unwind_user_frame *frame) { return -ENOSYS; }
 static inline bool current_has_sframe(void) { return false; }
 
+static inline int sframe_find_user(unsigned long ip, struct unwind_frame *frame) { return -ENOSYS; }
+
 #endif /* CONFIG_HAVE_UNWIND_USER_SFRAME */
 
 #endif /* _LINUX_SFRAME_H */
diff --git a/include/linux/unwind_types.h b/include/linux/unwind_types.h
new file mode 100644
index 000000000000..08bcb0aa04aa
--- /dev/null
+++ b/include/linux/unwind_types.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_UNWIND_TYPES_H
+#define _LINUX_UNWIND_TYPES_H
+
+#define UNWIND_RULE_DEREF			BIT(31)
+
+enum unwind_cfa_rule {
+	UNWIND_CFA_RULE_SP_OFFSET,		/* CFA = SP + offset */
+	UNWIND_CFA_RULE_FP_OFFSET,		/* CFA = FP + offset */
+	UNWIND_CFA_RULE_REG_OFFSET,	/* CFA = reg + offset */
+	/* DEREF variants */
+	UNWIND_CFA_RULE_REG_OFFSET_DEREF =	/* CFA = *(reg + offset) */
+		UNWIND_CFA_RULE_REG_OFFSET | UNWIND_RULE_DEREF,
+};
+
+struct unwind_cfa_rule_data {
+	enum unwind_cfa_rule rule;
+	s32 offset;
+	unsigned int regnum;
+};
+
+enum unwind_rule {
+	UNWIND_RULE_RETAIN,		/* entity = entity */
+	UNWIND_RULE_CFA_OFFSET,		/* entity = CFA + offset */
+	UNWIND_RULE_REG_OFFSET,		/* entity = register + offset */
+	/* DEREF variants */
+	UNWIND_RULE_CFA_OFFSET_DEREF =	/* entity = *(CFA + offset) */
+		UNWIND_RULE_CFA_OFFSET | UNWIND_RULE_DEREF,
+	UNWIND_RULE_REG_OFFSET_DEREF =	/* entity = *(register + offset) */
+		UNWIND_RULE_REG_OFFSET | UNWIND_RULE_DEREF,
+};
+
+struct unwind_rule_data {
+	enum unwind_rule rule;
+	s32 offset;
+	unsigned int regnum;
+};
+
+struct unwind_frame {
+	struct unwind_cfa_rule_data cfa;
+	struct unwind_rule_data ra;
+	struct unwind_rule_data fp;
+	bool outermost;
+};
+
+#endif /* _LINUX_UNWIND_TYPES_H */
diff --git a/include/linux/unwind_user_types.h b/include/linux/unwind_user_types.h
index 059e5c76f2f3..646e5fb774db 100644
--- a/include/linux/unwind_user_types.h
+++ b/include/linux/unwind_user_types.h
@@ -27,47 +27,6 @@ struct unwind_stacktrace {
 	unsigned long	*entries;
 };
 
-#define UNWIND_USER_RULE_DEREF			BIT(31)
-
-enum unwind_user_cfa_rule {
-	UNWIND_USER_CFA_RULE_SP_OFFSET,		/* CFA = SP + offset */
-	UNWIND_USER_CFA_RULE_FP_OFFSET,		/* CFA = FP + offset */
-	UNWIND_USER_CFA_RULE_REG_OFFSET,	/* CFA = reg + offset */
-	/* DEREF variants */
-	UNWIND_USER_CFA_RULE_REG_OFFSET_DEREF =	/* CFA = *(reg + offset) */
-		UNWIND_USER_CFA_RULE_REG_OFFSET | UNWIND_USER_RULE_DEREF,
-};
-
-struct unwind_user_cfa_rule_data {
-	enum unwind_user_cfa_rule rule;
-	s32 offset;
-	unsigned int regnum;
-};
-
-enum unwind_user_rule {
-	UNWIND_USER_RULE_RETAIN,		/* entity = entity */
-	UNWIND_USER_RULE_CFA_OFFSET,		/* entity = CFA + offset */
-	UNWIND_USER_RULE_REG_OFFSET,		/* entity = register + offset */
-	/* DEREF variants */
-	UNWIND_USER_RULE_CFA_OFFSET_DEREF =	/* entity = *(CFA + offset) */
-		UNWIND_USER_RULE_CFA_OFFSET | UNWIND_USER_RULE_DEREF,
-	UNWIND_USER_RULE_REG_OFFSET_DEREF =	/* entity = *(register + offset) */
-		UNWIND_USER_RULE_REG_OFFSET | UNWIND_USER_RULE_DEREF,
-};
-
-struct unwind_user_rule_data {
-	enum unwind_user_rule rule;
-	s32 offset;
-	unsigned int regnum;
-};
-
-struct unwind_user_frame {
-	struct unwind_user_cfa_rule_data cfa;
-	struct unwind_user_rule_data ra;
-	struct unwind_user_rule_data fp;
-	bool outermost;
-};
-
 struct unwind_user_state {
 	unsigned long				ip;
 	unsigned long				sp;
diff --git a/kernel/unwind/Makefile b/kernel/unwind/Makefile
index 146038165865..c5f9f8124564 100644
--- a/kernel/unwind/Makefile
+++ b/kernel/unwind/Makefile
@@ -1,2 +1,2 @@
  obj-$(CONFIG_UNWIND_USER)		+= user.o deferred.o
- obj-$(CONFIG_HAVE_UNWIND_USER_SFRAME)	+= sframe.o
+ obj-$(CONFIG_UNWIND_SFRAME_LOOKUP)	+= sframe.o
diff --git a/kernel/unwind/sframe.c b/kernel/unwind/sframe.c
index 5400f481b05d..a2ab9a3e07b4 100644
--- a/kernel/unwind/sframe.c
+++ b/kernel/unwind/sframe.c
@@ -13,8 +13,8 @@
 #include <linux/string_helpers.h>
 #include <linux/sframe.h>
 #include <linux/syscalls.h>
-#include <asm/unwind_user_sframe.h>
-#include <linux/unwind_user_types.h>
+#include <linux/unwind_types.h>
+#include <asm/unwind_sframe.h>
 #include <uapi/linux/stacktrace.h>
 
 #include "sframe.h"
@@ -46,8 +46,6 @@ struct sframe_fre_internal {
 	unsigned char	dw_size;
 };
 
-DEFINE_STATIC_SRCU(sframe_srcu);
-
 static __always_inline unsigned char fre_type_to_size(unsigned char fre_type)
 {
 	if (fre_type > 2)
@@ -62,6 +60,77 @@ static __always_inline unsigned char dataword_size_enum_to_size(unsigned char da
 	return 1 << dataword_size;
 }
 
+#ifdef CONFIG_HAVE_UNWIND_USER_SFRAME
+
+DEFINE_STATIC_SRCU(sframe_srcu);
+
+#define UNSAFE_USER_COPY(to, from, size, label)				\
+	unsafe_copy_from_user(to, (void __user *)from, size, label)
+
+#define UNSAFE_USER_GET(to, from, type, label)				\
+	unsafe_get_user(to, (type __user *)from, label)
+
+#else /* !CONFIG_HAVE_UNWIND_USER_SFRAME */
+
+#define UNSAFE_USER_COPY(to, from, size, label) do {			\
+	(void)to; (void)from; (void)size;				\
+	goto label;							\
+} while (0)
+
+#define UNSAFE_USER_GET(to, from, type, label) do {			\
+	(void)to; (void)from;						\
+	goto label;							\
+} while (0)
+
+#endif /* !CONFIG_HAVE_UNWIND_USER_SFRAME */
+
+#ifdef CONFIG_HAVE_UNWIND_KERNEL_SFRAME
+
+#define KERNEL_COPY(to, from, size, label) memcpy(to, (void *)from, size)
+#define KERNEL_GET(to, from, type, label) ({ (to) = *(type *)(from); })
+
+#else /* !CONFIG_HAVE_UNWIND_KERNEL_SFRAME */
+
+#define KERNEL_COPY(to, from, size, label) do {				\
+	(void)(to); (void)(from); (void)size;				\
+	goto label;							\
+} while (0)
+
+#define KERNEL_GET(to, from, type, label) do {				\
+	(void)(to); (void)(from);					\
+	goto label;							\
+} while (0)
+
+#endif /* !CONFIG_HAVE_UNWIND_KERNEL_SFRAME */
+
+#define DATA_COPY(sec, to, from, size, label)			\
+({								\
+	switch (sec->sec_type) {				\
+	case SFRAME_KERNEL:					\
+		KERNEL_COPY(to, from, size, label);		\
+		break;						\
+	case SFRAME_USER:					\
+		UNSAFE_USER_COPY(to, from, size, label);	\
+		break;						\
+	default:						\
+		goto label;					\
+	}							\
+})
+
+#define DATA_GET(sec, to, from, type, label)			\
+({								\
+	switch (sec->sec_type) {				\
+	case SFRAME_KERNEL:					\
+		KERNEL_GET(to, from, type, label);		\
+		break;						\
+	case SFRAME_USER:					\
+		UNSAFE_USER_GET(to, from, type, label);		\
+		break;						\
+	default:						\
+		goto label;					\
+	}							\
+})
+
 static __always_inline int __read_fde(struct sframe_section *sec,
 				      unsigned int fde_num,
 				      struct sframe_fde_internal *fde)
@@ -71,8 +140,8 @@ static __always_inline int __read_fde(struct sframe_section *sec,
 	struct sframe_fda_v3 _fda;
 
 	fde_addr = sec->fdes_start + (fde_num * sizeof(struct sframe_fde_v3));
-	unsafe_copy_from_user(&_fde, (void __user *)fde_addr,
-			      sizeof(struct sframe_fde_v3), Efault);
+	DATA_COPY(sec, &_fde, fde_addr,
+		  sizeof(struct sframe_fde_v3), Efault);
 
 	func_addr = fde_addr + _fde.func_start_off;
 	if (func_addr < sec->text_start || func_addr >= sec->text_end)
@@ -81,8 +150,8 @@ static __always_inline int __read_fde(struct sframe_section *sec,
 	fda_addr = sec->fres_start + _fde.fres_off;
 	if (fda_addr + sizeof(struct sframe_fda_v3) > sec->fres_end)
 		return -EINVAL;
-	unsafe_copy_from_user(&_fda, (void __user *)fda_addr,
-			      sizeof(struct sframe_fda_v3), Efault);
+	DATA_COPY(sec, &_fda, fda_addr,
+		  sizeof(struct sframe_fda_v3), Efault);
 
 	fde->func_addr	= func_addr;
 	fde->func_size	= _fde.func_size;
@@ -104,21 +173,21 @@ static __always_inline int __find_fde(struct sframe_section *sec,
 				      struct sframe_fde_internal *fde)
 {
 	unsigned long func_addr_low = 0, func_addr_high = ULONG_MAX;
-	struct sframe_fde_v3 __user *first, *low, *high, *found = NULL;
+	struct sframe_fde_v3 *first, *low, *high, *found = NULL;
 	int ret;
 
-	first = (void __user *)sec->fdes_start;
+	first = (void *)sec->fdes_start;
 	low = first;
 	high = first + sec->num_fdes - 1;
 
 	while (low <= high) {
-		struct sframe_fde_v3 __user *mid;
+		struct sframe_fde_v3 *mid;
 		s64 func_off;
 		unsigned long func_addr;
 
 		mid = low + ((high - low) / 2);
 
-		unsafe_get_user(func_off, (s64 __user *)mid, Efault);
+		DATA_GET(sec, func_off, mid, s64, Efault);
 		func_addr = (unsigned long)mid + func_off;
 
 		if (ip >= func_addr) {
@@ -156,47 +225,47 @@ static __always_inline int __find_fde(struct sframe_section *sec,
 	return -EFAULT;
 }
 
-#define ____UNSAFE_GET_USER_INC(to, from, type, label)			\
+#define ____GET_INC(sec, to, from, type, label)				\
 ({									\
 	type __to;							\
-	unsafe_get_user(__to, (type __user *)from, label);		\
+	DATA_GET(sec, __to, from, type, label);				\
 	from += sizeof(__to);						\
 	to = __to;							\
 })
 
-#define __UNSAFE_GET_USER_INC(to, from, size, label, u_or_s)		\
+#define __GET_INC(sec, to, from, size, label, u_or_s)			\
 ({									\
 	switch (size) {							\
 	case 1:								\
-		____UNSAFE_GET_USER_INC(to, from, u_or_s##8, label);	\
+		____GET_INC(sec, to, from, u_or_s##8, label);		\
 		break;							\
 	case 2:								\
-		____UNSAFE_GET_USER_INC(to, from, u_or_s##16, label);	\
+		____GET_INC(sec, to, from, u_or_s##16, label);		\
 		break;							\
 	case 4:								\
-		____UNSAFE_GET_USER_INC(to, from, u_or_s##32, label);	\
+		____GET_INC(sec, to, from, u_or_s##32, label);		\
 		break;							\
 	default:							\
 		return -EFAULT;						\
 	}								\
 })
 
-#define UNSAFE_GET_USER_UNSIGNED_INC(to, from, size, label)		\
-	__UNSAFE_GET_USER_INC(to, from, size, label, u)
+#define GET_UNSIGNED_INC(sec, to, from, size, label)			\
+	__GET_INC(sec, to, from, size, label, u)
 
-#define UNSAFE_GET_USER_SIGNED_INC(to, from, size, label)		\
-	__UNSAFE_GET_USER_INC(to, from, size, label, s)
+#define GET_SIGNED_INC(sec, to, from, size, label)			\
+	__GET_INC(sec, to, from, size, label, s)
 
-#define UNSAFE_GET_USER_INC(to, from, size, label)				\
-	_Generic(to,								\
-		 u8 :	UNSAFE_GET_USER_UNSIGNED_INC(to, from, size, label),	\
-		 u16 :	UNSAFE_GET_USER_UNSIGNED_INC(to, from, size, label),	\
-		 u32 :	UNSAFE_GET_USER_UNSIGNED_INC(to, from, size, label),	\
-		 u64 :	UNSAFE_GET_USER_UNSIGNED_INC(to, from, size, label),	\
-		 s8 :	UNSAFE_GET_USER_SIGNED_INC(to, from, size, label),	\
-		 s16 :	UNSAFE_GET_USER_SIGNED_INC(to, from, size, label),	\
-		 s32 :	UNSAFE_GET_USER_SIGNED_INC(to, from, size, label),	\
-		 s64 :	UNSAFE_GET_USER_SIGNED_INC(to, from, size, label))
+#define GET_INC(sec, to, from, size, label)				\
+	_Generic(to,							\
+		 u8 :	GET_UNSIGNED_INC(sec, to, from, size, label),	\
+		 u16 :	GET_UNSIGNED_INC(sec, to, from, size, label),	\
+		 u32 :	GET_UNSIGNED_INC(sec, to, from, size, label),	\
+		 u64 :	GET_UNSIGNED_INC(sec, to, from, size, label),	\
+		 s8 :	GET_SIGNED_INC(sec, to, from, size, label),	\
+		 s16 :	GET_SIGNED_INC(sec, to, from, size, label),	\
+		 s32 :	GET_SIGNED_INC(sec, to, from, size, label),	\
+		 s64 :	GET_SIGNED_INC(sec, to, from, size, label))
 
 static __always_inline int
 __read_default_fre_datawords(struct sframe_section *sec,
@@ -209,19 +278,19 @@ __read_default_fre_datawords(struct sframe_section *sec,
 	s32 cfa_off, ra_off, fp_off;
 	unsigned int cfa_regnum;
 
-	UNSAFE_GET_USER_INC(cfa_off, cur, dataword_size, Efault);
+	GET_INC(sec, cfa_off, cur, dataword_size, Efault);
 	dataword_count--;
 
 	ra_off = sec->ra_off;
 	if (!ra_off && dataword_count) {
 		dataword_count--;
-		UNSAFE_GET_USER_INC(ra_off, cur, dataword_size, Efault);
+		GET_INC(sec, ra_off, cur, dataword_size, Efault);
 	}
 
 	fp_off = sec->fp_off;
 	if (!fp_off && dataword_count) {
 		dataword_count--;
-		UNSAFE_GET_USER_INC(fp_off, cur, dataword_size, Efault);
+		GET_INC(sec, fp_off, cur, dataword_size, Efault);
 	}
 
 	if (dataword_count)
@@ -257,17 +326,17 @@ __read_flex_fde_fre_datawords(struct sframe_section *sec,
 
 	if (dataword_count < 2)
 		return -EFAULT;
-	UNSAFE_GET_USER_INC(cfa_ctl, cur, dataword_size, Efault);
-	UNSAFE_GET_USER_INC(cfa_off, cur, dataword_size, Efault);
+	GET_INC(sec, cfa_ctl, cur, dataword_size, Efault);
+	GET_INC(sec, cfa_off, cur, dataword_size, Efault);
 	dataword_count -= 2;
 
 	ra_off = sec->ra_off;
 	ra_ctl = ra_off ? 2 : 0; /* regnum=0, deref_p=(ra_off != 0), reg_p=0 */
 	if (dataword_count >= 2) {
-		UNSAFE_GET_USER_INC(ra_ctl, cur, dataword_size, Efault);
+		GET_INC(sec, ra_ctl, cur, dataword_size, Efault);
 		dataword_count--;
 		if (ra_ctl) {
-			UNSAFE_GET_USER_INC(ra_off, cur, dataword_size, Efault);
+			GET_INC(sec, ra_off, cur, dataword_size, Efault);
 			dataword_count--;
 		} else {
 			/* Padding RA location info */
@@ -278,10 +347,10 @@ __read_flex_fde_fre_datawords(struct sframe_section *sec,
 	fp_off = sec->fp_off;
 	fp_ctl = fp_off ? 2 : 0; /* regnum=0, deref_p=(fp_off != 0), reg_p=0 */
 	if (dataword_count >= 2) {
-		UNSAFE_GET_USER_INC(fp_ctl, cur, dataword_size, Efault);
+		GET_INC(sec, fp_ctl, cur, dataword_size, Efault);
 		dataword_count--;
 		if (fp_ctl) {
-			UNSAFE_GET_USER_INC(fp_off, cur, dataword_size, Efault);
+			GET_INC(sec, fp_off, cur, dataword_size, Efault);
 			dataword_count--;
 		} else {
 			/* Padding FP location info */
@@ -355,11 +424,11 @@ static __always_inline int __read_fre(struct sframe_section *sec,
 	if (fre_addr + addr_size + 1 > sec->fres_end)
 		return -EFAULT;
 
-	UNSAFE_GET_USER_INC(ip_off, cur, addr_size, Efault);
+	GET_INC(sec, ip_off, cur, addr_size, Efault);
 	if (fde_pctype == SFRAME_FDE_PCTYPE_INC && ip_off > fde->func_size)
 		return -EFAULT;
 
-	UNSAFE_GET_USER_INC(info, cur, 1, Efault);
+	GET_INC(sec, info, cur, 1, Efault);
 	dataword_count = SFRAME_V3_FRE_DATAWORD_COUNT(info);
 	dataword_size  = dataword_size_enum_to_size(SFRAME_V3_FRE_DATAWORD_SIZE(info));
 	if (!dataword_size)
@@ -382,7 +451,7 @@ static __always_inline int __read_fre(struct sframe_section *sec,
 }
 
 static __always_inline int
-sframe_init_cfa_rule_data(struct unwind_user_cfa_rule_data *cfa_rule_data,
+sframe_init_cfa_rule_data(struct unwind_cfa_rule_data *cfa_rule_data,
 			  u32 ctlword, s32 offset)
 {
 	bool deref_p = SFRAME_V3_FLEX_FDE_CTRLWORD_DEREF_P(ctlword);
@@ -393,13 +462,13 @@ sframe_init_cfa_rule_data(struct unwind_user_cfa_rule_data *cfa_rule_data,
 
 		switch (regnum) {
 		case SFRAME_REG_SP:
-			cfa_rule_data->rule = UNWIND_USER_CFA_RULE_SP_OFFSET;
+			cfa_rule_data->rule = UNWIND_CFA_RULE_SP_OFFSET;
 			break;
 		case SFRAME_REG_FP:
-			cfa_rule_data->rule = UNWIND_USER_CFA_RULE_FP_OFFSET;
+			cfa_rule_data->rule = UNWIND_CFA_RULE_FP_OFFSET;
 			break;
 		default:
-			cfa_rule_data->rule = UNWIND_USER_CFA_RULE_REG_OFFSET;
+			cfa_rule_data->rule = UNWIND_CFA_RULE_REG_OFFSET;
 			cfa_rule_data->regnum = regnum;
 		}
 	} else {
@@ -407,7 +476,7 @@ sframe_init_cfa_rule_data(struct unwind_user_cfa_rule_data *cfa_rule_data,
 	}
 
 	if (deref_p)
-		cfa_rule_data->rule |= UNWIND_USER_RULE_DEREF;
+		cfa_rule_data->rule |= UNWIND_RULE_DEREF;
 
 	cfa_rule_data->offset = offset;
 
@@ -415,27 +484,27 @@ sframe_init_cfa_rule_data(struct unwind_user_cfa_rule_data *cfa_rule_data,
 }
 
 static __always_inline void
-sframe_init_rule_data(struct unwind_user_rule_data *rule_data,
+sframe_init_rule_data(struct unwind_rule_data *rule_data,
 		      u32 ctlword, s32 offset)
 {
 	bool deref_p = SFRAME_V3_FLEX_FDE_CTRLWORD_DEREF_P(ctlword);
 	bool reg_p = SFRAME_V3_FLEX_FDE_CTRLWORD_REG_P(ctlword);
 
 	if (!ctlword && !offset) {
-		rule_data->rule = UNWIND_USER_RULE_RETAIN;
+		rule_data->rule = UNWIND_RULE_RETAIN;
 		return;
 	}
 	if (reg_p) {
 		unsigned int regnum = SFRAME_V3_FLEX_FDE_CTRLWORD_REGNUM(ctlword);
 
-		rule_data->rule = UNWIND_USER_RULE_REG_OFFSET;
+		rule_data->rule = UNWIND_RULE_REG_OFFSET;
 		rule_data->regnum = regnum;
 	} else {
-		rule_data->rule = UNWIND_USER_RULE_CFA_OFFSET;
+		rule_data->rule = UNWIND_RULE_CFA_OFFSET;
 	}
 
 	if (deref_p)
-		rule_data->rule |= UNWIND_USER_RULE_DEREF;
+		rule_data->rule |= UNWIND_RULE_DEREF;
 
 	rule_data->offset = offset;
 }
@@ -443,7 +512,7 @@ sframe_init_rule_data(struct unwind_user_rule_data *rule_data,
 static __always_inline int __find_fre(struct sframe_section *sec,
 				      struct sframe_fde_internal *fde,
 				      unsigned long ip,
-				      struct unwind_user_frame *frame)
+				      struct unwind_frame *frame)
 {
 	unsigned char fde_pctype = SFRAME_V3_FDE_PCTYPE(fde->info);
 	struct sframe_fre_internal *fre, *prev_fre = NULL;
@@ -503,40 +572,18 @@ static __always_inline int __find_fre(struct sframe_section *sec,
 	return 0;
 }
 
-int sframe_find(unsigned long ip, struct unwind_user_frame *frame)
+static __always_inline int __sframe_find(struct sframe_section *sec,
+					 unsigned long ip,
+					 struct unwind_frame *frame)
 {
-	struct mm_struct *mm = current->mm;
-	struct sframe_section *sec;
 	struct sframe_fde_internal fde;
 	int ret;
 
-	if (!mm)
-		return -EINVAL;
-
-	guard(srcu)(&sframe_srcu);
-
-	sec = mtree_load(&mm->sframe_mt, ip);
-	if (!sec)
-		return -EINVAL;
-
-	if (!user_read_access_begin((void __user *)sec->sframe_start,
-				    sec->sframe_end - sec->sframe_start))
-		return -EFAULT;
-
 	ret = __find_fde(sec, ip, &fde);
 	if (ret)
-		goto end;
-
-	ret = __find_fre(sec, &fde, ip, frame);
-end:
-	user_read_access_end();
-
-	if (ret == -EFAULT) {
-		dbg_sec("removing bad .sframe section\n");
-		WARN_ON_ONCE(sframe_remove_section(sec->sframe_start));
-	}
+		return ret;
 
-	return ret;
+	return __find_fre(sec, &fde, ip, frame);
 }
 
 #ifdef CONFIG_SFRAME_VALIDATION
@@ -661,20 +708,23 @@ static int sframe_validate_section(struct sframe_section *sec) { return 0; }
 #endif /* !CONFIG_SFRAME_VALIDATION */
 
 
-static void free_section(struct sframe_section *sec)
-{
-	dbg_free(sec);
-	kfree(sec);
-}
-
 static int sframe_read_header(struct sframe_section *sec)
 {
 	unsigned long header_end, fdes_start, fdes_end, fres_start, fres_end;
 	struct sframe_header shdr;
 	unsigned int num_fdes;
 
-	if (copy_from_user(&shdr, (void __user *)sec->sframe_start, sizeof(shdr))) {
-		dbg_sec("header usercopy failed\n");
+	switch (sec->sec_type) {
+	case SFRAME_USER:
+		if (copy_from_user(&shdr, (void __user *)sec->sframe_start, sizeof(shdr))) {
+			dbg_sec("header usercopy failed\n");
+			return -EFAULT;
+		}
+		break;
+	case SFRAME_KERNEL:
+		shdr = *(struct sframe_header *)sec->sframe_start;
+		break;
+	default:
 		return -EFAULT;
 	}
 
@@ -721,6 +771,45 @@ static int sframe_read_header(struct sframe_section *sec)
 	return 0;
 }
 
+#ifdef CONFIG_HAVE_UNWIND_USER_SFRAME
+
+int sframe_find_user(unsigned long ip, struct unwind_frame *frame)
+{
+	struct mm_struct *mm = current->mm;
+	struct sframe_section *sec;
+	int ret;
+
+	if (!mm)
+		return -EINVAL;
+
+	guard(srcu)(&sframe_srcu);
+
+	sec = mtree_load(&mm->sframe_mt, ip);
+	if (!sec)
+		return -EINVAL;
+
+	if (!user_read_access_begin((void __user *)sec->sframe_start,
+				    sec->sframe_end - sec->sframe_start))
+		return -EFAULT;
+
+	ret = __sframe_find(sec, ip, frame);
+
+	user_read_access_end();
+
+	if (ret == -EFAULT) {
+		dbg_sec("removing bad .sframe section\n");
+		WARN_ON_ONCE(sframe_remove_section(sec->sframe_start));
+	}
+
+	return ret;
+}
+
+static void free_section(struct sframe_section *sec)
+{
+	dbg_free(sec);
+	kfree(sec);
+}
+
 int sframe_add_section(unsigned long sframe_start, unsigned long sframe_end,
 		       unsigned long text_start, unsigned long text_end)
 {
@@ -757,6 +846,7 @@ int sframe_add_section(unsigned long sframe_start, unsigned long sframe_end,
 	if (!sec)
 		return -ENOMEM;
 
+	sec->sec_type		= SFRAME_USER;
 	sec->sframe_start	= sframe_start;
 	sec->sframe_end		= sframe_end;
 	sec->text_start		= text_start;
@@ -877,3 +967,5 @@ SYSCALL_DEFINE5(stacktrace_setup, int, op, unsigned long, addr_start,
 	}
 	return -EINVAL;
 }
+
+#endif /* CONFIG_HAVE_UNWIND_USER_SFRAME */
diff --git a/kernel/unwind/user.c b/kernel/unwind/user.c
index 3d596da588d0..5670579e3990 100644
--- a/kernel/unwind/user.c
+++ b/kernel/unwind/user.c
@@ -8,6 +8,7 @@
 #include <linux/unwind_user.h>
 #include <linux/uaccess.h>
 #include <linux/sframe.h>
+#include <linux/unwind_types.h>
 
 #define for_each_user_frame(state) \
 	for (unwind_user_start(state); !(state)->done; unwind_user_next(state))
@@ -28,7 +29,7 @@ get_user_word(unsigned long *word, unsigned long base, int off, unsigned int ws)
 }
 
 static int unwind_user_next_common(struct unwind_user_state *state,
-				   const struct unwind_user_frame *frame)
+				   const struct unwind_frame *frame)
 {
 	unsigned long cfa, fp, ra;
 
@@ -40,16 +41,16 @@ static int unwind_user_next_common(struct unwind_user_state *state,
 
 	/* Get the Canonical Frame Address (CFA) */
 	switch (frame->cfa.rule) {
-	case UNWIND_USER_CFA_RULE_SP_OFFSET:
+	case UNWIND_CFA_RULE_SP_OFFSET:
 		cfa = state->sp;
 		break;
-	case UNWIND_USER_CFA_RULE_FP_OFFSET:
+	case UNWIND_CFA_RULE_FP_OFFSET:
 		if (state->fp < state->sp)
 			return -EINVAL;
 		cfa = state->fp;
 		break;
-	case UNWIND_USER_CFA_RULE_REG_OFFSET:
-	case UNWIND_USER_CFA_RULE_REG_OFFSET_DEREF:
+	case UNWIND_CFA_RULE_REG_OFFSET:
+	case UNWIND_CFA_RULE_REG_OFFSET_DEREF:
 		if (!state->topmost || unwind_user_get_reg(&cfa, frame->cfa.regnum))
 			return -EINVAL;
 		break;
@@ -58,7 +59,7 @@ static int unwind_user_next_common(struct unwind_user_state *state,
 		return -EINVAL;
 	}
 	cfa += frame->cfa.offset;
-	if (frame->cfa.rule & UNWIND_USER_RULE_DEREF &&
+	if (frame->cfa.rule & UNWIND_RULE_DEREF &&
 	    get_user_word(&cfa, cfa, 0, state->ws))
 		return -EINVAL;
 
@@ -76,19 +77,19 @@ static int unwind_user_next_common(struct unwind_user_state *state,
 
 	/* Get the Return Address (RA) */
 	switch (frame->ra.rule) {
-	case UNWIND_USER_RULE_RETAIN:
+	case UNWIND_RULE_RETAIN:
 		if (!state->topmost || unwind_user_get_ra_reg(&ra))
 			return -EINVAL;
 		break;
 	/*
-	 * UNWIND_USER_RULE_CFA_OFFSET doesn't make sense for RA.
+	 * UNWIND_RULE_CFA_OFFSET doesn't make sense for RA.
 	 * A return address cannot legitimately be a stack address.
 	 */
-	case UNWIND_USER_RULE_CFA_OFFSET_DEREF:
+	case UNWIND_RULE_CFA_OFFSET_DEREF:
 		ra = cfa + frame->ra.offset;
 		break;
-	case UNWIND_USER_RULE_REG_OFFSET:
-	case UNWIND_USER_RULE_REG_OFFSET_DEREF:
+	case UNWIND_RULE_REG_OFFSET:
+	case UNWIND_RULE_REG_OFFSET_DEREF:
 		if (!state->topmost || unwind_user_get_reg(&ra, frame->ra.regnum))
 			return -EINVAL;
 		ra += frame->ra.offset;
@@ -97,24 +98,24 @@ static int unwind_user_next_common(struct unwind_user_state *state,
 		WARN_ON_ONCE(1);
 		return -EINVAL;
 	}
-	if (frame->ra.rule & UNWIND_USER_RULE_DEREF &&
+	if (frame->ra.rule & UNWIND_RULE_DEREF &&
 	    get_user_word(&ra, ra, 0, state->ws))
 		return -EINVAL;
 
 	/* Get the Frame Pointer (FP) */
 	switch (frame->fp.rule) {
-	case UNWIND_USER_RULE_RETAIN:
+	case UNWIND_RULE_RETAIN:
 		fp = state->fp;
 		break;
 	/*
-	 * UNWIND_USER_RULE_CFA_OFFSET is currently not used for FP
+	 * UNWIND_RULE_CFA_OFFSET is currently not used for FP
 	 * (e.g. SFrame cannot represent this rule).
 	 */
-	case UNWIND_USER_RULE_CFA_OFFSET_DEREF:
+	case UNWIND_RULE_CFA_OFFSET_DEREF:
 		fp = cfa + frame->fp.offset;
 		break;
-	case UNWIND_USER_RULE_REG_OFFSET:
-	case UNWIND_USER_RULE_REG_OFFSET_DEREF:
+	case UNWIND_RULE_REG_OFFSET:
+	case UNWIND_RULE_REG_OFFSET_DEREF:
 		if (!state->topmost || unwind_user_get_reg(&fp, frame->fp.regnum))
 			return -EINVAL;
 		fp += frame->fp.offset;
@@ -123,7 +124,7 @@ static int unwind_user_next_common(struct unwind_user_state *state,
 		WARN_ON_ONCE(1);
 		return -EINVAL;
 	}
-	if (frame->fp.rule & UNWIND_USER_RULE_DEREF &&
+	if (frame->fp.rule & UNWIND_RULE_DEREF &&
 	    get_user_word(&fp, fp, 0, state->ws))
 		return -EINVAL;
 
@@ -139,13 +140,13 @@ static int unwind_user_next_fp(struct unwind_user_state *state)
 	struct pt_regs *regs = task_pt_regs(current);
 
 	if (state->topmost && unwind_user_at_function_start(regs)) {
-		const struct unwind_user_frame fp_entry_frame = {
+		const struct unwind_frame fp_entry_frame = {
 			ARCH_INIT_USER_FP_ENTRY_FRAME(state->ws)
 		};
 		return unwind_user_next_common(state, &fp_entry_frame);
 	}
 
-	const struct unwind_user_frame fp_frame = {
+	const struct unwind_frame fp_frame = {
 		ARCH_INIT_USER_FP_FRAME(state->ws)
 	};
 	return unwind_user_next_common(state, &fp_frame);
@@ -153,10 +154,10 @@ static int unwind_user_next_fp(struct unwind_user_state *state)
 
 static int unwind_user_next_sframe(struct unwind_user_state *state)
 {
-	struct unwind_user_frame frame;
+	struct unwind_frame frame;
 
 	/* sframe expects the frame to be local storage */
-	if (sframe_find(state->ip, &frame))
+	if (sframe_find_user(state->ip, &frame))
 		return -ENOENT;
 	return unwind_user_next_common(state, &frame);
 }
-- 
2.54.0.563.g4f69b47b94-goog


^ permalink raw reply related

* [PATCH v6 2/9] arm64, unwind: build kernel with sframe V3 info
From: Dylan Hatch @ 2026-05-19  6:49 UTC (permalink / raw)
  To: Roman Gushchin, Weinan Liu, Will Deacon, Josh Poimboeuf,
	Indu Bhagat, Peter Zijlstra, Steven Rostedt, Catalin Marinas,
	Jiri Kosina, Mark Rutland, Jens Remus
  Cc: Dylan Hatch, Prasanna Kumar T S M, Puranjay Mohan, Song Liu,
	joe.lawrence, linux-toolchains, linux-kernel, live-patching,
	linux-arm-kernel, Randy Dunlap, Mostafa Saleh, Herbert Xu,
	David S. Miller
In-Reply-To: <20260519064950.493949-1-dylanbhatch@google.com>

Build with -Wa,--gsframe-3 flags to generate a .sframe section. This
will be used for in-kernel reliable stacktrace in cases where the frame
pointer alone is insufficient.

Currently, the sframe format only supports arm64, x86_64 and s390x
architectures.

Signed-off-by: Weinan Liu <wnliu@google.com>
Reviewed-by: Prasanna Kumar T S M <ptsm@linux.microsoft.com>
Reviewed-by: Jens Remus <jremus@linux.ibm.com>
Signed-off-by: Dylan Hatch <dylanbhatch@google.com>
---
 MAINTAINERS                            |  2 +-
 Makefile                               |  8 ++++++++
 arch/Kconfig                           | 21 +++++++++++++++++++++
 arch/arm64/Kconfig                     |  1 +
 arch/arm64/include/asm/unwind_sframe.h |  8 ++++++++
 arch/arm64/kernel/vdso/Makefile        |  2 +-
 include/asm-generic/sections.h         |  4 ++++
 include/asm-generic/vmlinux.lds.h      | 15 +++++++++++++++
 8 files changed, 59 insertions(+), 2 deletions(-)
 create mode 100644 arch/arm64/include/asm/unwind_sframe.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 54613c683fdb..046d06dcdb86 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -27862,8 +27862,8 @@ STACK UNWINDING
 M:	Josh Poimboeuf <jpoimboe@kernel.org>
 M:	Steven Rostedt <rostedt@goodmis.org>
 S:	Maintained
+F:	arch/*/include/asm/unwind_sframe.h
 F:	arch/*/include/asm/unwind_user.h
-F:	arch/*/include/asm/unwind_user_sframe.h
 F:	include/asm-generic/unwind_user.h
 F:	include/linux/sframe.h
 F:	include/linux/unwind*.h
diff --git a/Makefile b/Makefile
index 9f88dcaae382..227fda16deb1 100644
--- a/Makefile
+++ b/Makefile
@@ -1147,6 +1147,14 @@ endif
 # Ensure compilers do not transform certain loops into calls to wcslen()
 KBUILD_CFLAGS += -fno-builtin-wcslen
 
+# build with sframe table
+ifdef CONFIG_HAVE_UNWIND_KERNEL_SFRAME
+CC_FLAGS_SFRAME := -Wa,--gsframe-3
+KBUILD_CFLAGS	+= $(CC_FLAGS_SFRAME)
+KBUILD_AFLAGS	+= $(CC_FLAGS_SFRAME)
+export CC_FLAGS_SFRAME
+endif
+
 # change __FILE__ to the relative path to the source directory
 ifdef building_out_of_srctree
 KBUILD_CPPFLAGS += -fmacro-prefix-map=$(srcroot)/=
diff --git a/arch/Kconfig b/arch/Kconfig
index 6eeafd86347b..f931b5848593 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -520,6 +520,27 @@ config SFRAME_VALIDATION
 
 	  If unsure, say N.
 
+config ARCH_SUPPORTS_UNWIND_KERNEL_SFRAME
+	bool
+	help
+	  An architecture can select this if it enables the SFrame (Simple
+	  Frame) unwinder for unwinding kernel stack traces. It uses an unwind
+	  table that is directly generated by the toolchain based on DWARF CFI
+	  information.
+
+config HAVE_UNWIND_KERNEL_SFRAME
+	bool "Sframe unwinder"
+	depends on AS_SFRAME3
+	depends on 64BIT
+	depends on ARCH_SUPPORTS_UNWIND_KERNEL_SFRAME
+	select UNWIND_SFRAME_LOOKUP
+	help
+	  This option enables the SFrame (Simple Frame) unwinder for unwinding
+	  kernel stack traces. It uses unwind an table that is directly
+	  generated by the toolchain based on DWARF CFI information. In
+	  practice, this can provide more reliable stacktrace results than
+	  unwinding with frame pointers alone.
+
 config HAVE_PERF_REGS
 	bool
 	help
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index fe60738e5943..c3ef478469e5 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -86,6 +86,7 @@ config ARM64
 	select ARCH_SUPPORTS_SCHED_SMT
 	select ARCH_SUPPORTS_SCHED_CLUSTER
 	select ARCH_SUPPORTS_SCHED_MC
+	select ARCH_SUPPORTS_UNWIND_KERNEL_SFRAME
 	select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
 	select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT
 	select ARCH_WANT_DEFAULT_BPF_JIT
diff --git a/arch/arm64/include/asm/unwind_sframe.h b/arch/arm64/include/asm/unwind_sframe.h
new file mode 100644
index 000000000000..876412881196
--- /dev/null
+++ b/arch/arm64/include/asm/unwind_sframe.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_ARM64_UNWIND_SFRAME_H
+#define _ASM_ARM64_UNWIND_SFRAME_H
+
+#define SFRAME_REG_SP	31
+#define SFRAME_REG_FP	29
+
+#endif /* _ASM_ARM64_UNWIND_SFRAME_H */
diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index 7dec05dd33b7..c60ef921956f 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -38,7 +38,7 @@ ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO
 CC_FLAGS_REMOVE_VDSO := $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) \
 			$(RANDSTRUCT_CFLAGS) $(KSTACK_ERASE_CFLAGS) \
 			$(GCC_PLUGINS_CFLAGS) \
-			$(CC_FLAGS_LTO) $(CC_FLAGS_CFI) \
+			$(CC_FLAGS_LTO) $(CC_FLAGS_CFI) $(CC_FLAGS_SFRAME) \
 			-Wmissing-prototypes -Wmissing-declarations
 
 CC_FLAGS_ADD_VDSO := -O2 -mcmodel=tiny -fasynchronous-unwind-tables
diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h
index 0755bc39b0d8..336d27011a58 100644
--- a/include/asm-generic/sections.h
+++ b/include/asm-generic/sections.h
@@ -31,6 +31,7 @@
  *	__irqentry_text_start, __irqentry_text_end
  *	__softirqentry_text_start, __softirqentry_text_end
  *	__start_opd, __end_opd
+ *	__start_sframe, __end_sframe
  */
 extern char _text[], _stext[], _etext[];
 extern char _data[], _sdata[], _edata[];
@@ -53,6 +54,9 @@ extern char __ctors_start[], __ctors_end[];
 /* Start and end of .opd section - used for function descriptors. */
 extern char __start_opd[], __end_opd[];
 
+/* Start and end of .sframe section - used for stack unwinding. */
+extern char __start_sframe[], __end_sframe[];
+
 /* Start and end of instrumentation protected text section */
 extern char __noinstr_text_start[], __noinstr_text_end[];
 
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 60c8c22fd3e4..6aeed39097dd 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -491,6 +491,8 @@
 		*(.rodata1)						\
 	}								\
 									\
+	SFRAME								\
+									\
 	/* PCI quirks */						\
 	.pci_fixup        : AT(ADDR(.pci_fixup) - LOAD_OFFSET) {	\
 		BOUNDED_SECTION_PRE_LABEL(.pci_fixup_early,  _pci_fixups_early,  __start, __end) \
@@ -904,6 +906,19 @@
 #define TRACEDATA
 #endif
 
+#ifdef CONFIG_HAVE_UNWIND_KERNEL_SFRAME
+#define SFRAME							\
+	/* sframe */						\
+	.sframe : AT(ADDR(.sframe) - LOAD_OFFSET) {		\
+		__start_sframe = .;			\
+		KEEP(*(.sframe))				\
+		KEEP(*(.init.sframe))				\
+		__end_sframe = .;			\
+	}
+#else
+#define SFRAME
+#endif
+
 #ifdef CONFIG_PRINTK_INDEX
 #define PRINTK_INDEX							\
 	.printk_index : AT(ADDR(.printk_index) - LOAD_OFFSET) {		\
-- 
2.54.0.563.g4f69b47b94-goog


^ permalink raw reply related

* [PATCH v6 3/9] arm64: entry: add unwind info for call_on_irq_stack()
From: Dylan Hatch @ 2026-05-19  6:49 UTC (permalink / raw)
  To: Roman Gushchin, Weinan Liu, Will Deacon, Josh Poimboeuf,
	Indu Bhagat, Peter Zijlstra, Steven Rostedt, Catalin Marinas,
	Jiri Kosina, Mark Rutland, Jens Remus
  Cc: Dylan Hatch, Prasanna Kumar T S M, Puranjay Mohan, Song Liu,
	joe.lawrence, linux-toolchains, linux-kernel, live-patching,
	linux-arm-kernel, Randy Dunlap, Mostafa Saleh, Herbert Xu,
	David S. Miller
In-Reply-To: <20260519064950.493949-1-dylanbhatch@google.com>

From: Weinan Liu <wnliu@google.com>

DWARF CFI (Call Frame Information) specifies how to recover the return
address and callee-saved registers at each PC in a given function.
Compilers are able to generate the CFI annotations when they compile
the code to assembly language. For handcrafted assembly, we need to
annotate them by hand.

Frame pointers alone are usually sufficient to recover stack frames
(without CFI), except at the exception boundary, where more information
is needed to determine if the LR is live.

Since an exception can be taken from call_on_irq_stack(), annotate it
with CFI. The actual entry assembly functions are left untouched, since
they are not expected to take exceptions themselves.

Signed-off-by: Weinan Liu <wnliu@google.com>
Suggested-by: Jens Remus <jremus@linux.ibm.com>
Reviewed-by: Jens Remus <jremus@linux.ibm.com>
Signed-off-by: Dylan Hatch <dylanbhatch@google.com>
---
 arch/arm64/kernel/entry.S | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index e0db14e9c843..5f4172ba4274 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -30,6 +30,12 @@
 #include <asm/asm-uaccess.h>
 #include <asm/unistd.h>
 
+/*
+ * Do not generate .eh_frame.  Only generate .debug_frame and optionally
+ * .sframe (via assembler option --gsframe[-N]).
+ */
+	.cfi_sections .debug_frame
+
 	.macro	clear_gp_regs
 	.irp	n,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29
 	mov	x\n, xzr
@@ -870,6 +876,7 @@ NOKPROBE(ret_from_fork)
  * Calls func(regs) using this CPU's irq stack and shadow irq stack.
  */
 SYM_FUNC_START(call_on_irq_stack)
+	.cfi_startproc
 	save_and_disable_daif x9
 #ifdef CONFIG_SHADOW_CALL_STACK
 	get_current_task x16
@@ -880,6 +887,9 @@ SYM_FUNC_START(call_on_irq_stack)
 	/* Create a frame record to save our LR and SP (implicit in FP) */
 	stp	x29, x30, [sp, #-16]!
 	mov	x29, sp
+	.cfi_def_cfa 29, 16
+	.cfi_offset 29, -16
+	.cfi_offset 30, -8
 
 	ldr_this_cpu x16, irq_stack_ptr, x17
 
@@ -895,9 +905,13 @@ SYM_FUNC_START(call_on_irq_stack)
 	 */
 	mov	sp, x29
 	ldp	x29, x30, [sp], #16
+	.cfi_restore 29
+	.cfi_restore 30
+	.cfi_def_cfa 31, 0
 	scs_load_current
 	restore_irq x9
 	ret
+	.cfi_endproc
 SYM_FUNC_END(call_on_irq_stack)
 NOKPROBE(call_on_irq_stack)
 
-- 
2.54.0.563.g4f69b47b94-goog


^ permalink raw reply related

* [PATCH v6 4/9] arm64, crypto/lib: Annotate leaf functions with CFI info.
From: Dylan Hatch @ 2026-05-19  6:49 UTC (permalink / raw)
  To: Roman Gushchin, Weinan Liu, Will Deacon, Josh Poimboeuf,
	Indu Bhagat, Peter Zijlstra, Steven Rostedt, Catalin Marinas,
	Jiri Kosina, Mark Rutland, Jens Remus
  Cc: Dylan Hatch, Prasanna Kumar T S M, Puranjay Mohan, Song Liu,
	joe.lawrence, linux-toolchains, linux-kernel, live-patching,
	linux-arm-kernel, Randy Dunlap, Mostafa Saleh, Herbert Xu,
	David S. Miller
In-Reply-To: <20260519064950.493949-1-dylanbhatch@google.com>

DWARF CFI (Call Frame Information) specifies how to recover return
address and callee-saved registers for annotated functions. These
annotations are generated by the compiler, but for assembly, they must
be annotated by hand.

Add simple CFI annotations to assembly leaf functions so that the LR can
be recovered by the unwinder when an exception is taken from one of them.

Signed-off-by: Dylan Hatch <dylanbhatch@google.com>
---
 arch/arm64/crypto/aes-ce-ccm-core.S    | 12 +++----
 arch/arm64/crypto/aes-neonbs-core.S    | 40 +++++++++++------------
 arch/arm64/crypto/ghash-ce-core.S      | 20 ++++++------
 arch/arm64/crypto/sm4-ce-ccm-core.S    | 16 +++++-----
 arch/arm64/crypto/sm4-ce-cipher-core.S |  4 +--
 arch/arm64/crypto/sm4-ce-core.S        | 44 +++++++++++++-------------
 arch/arm64/crypto/sm4-ce-gcm-core.S    | 16 +++++-----
 arch/arm64/crypto/sm4-neon-core.S      | 12 +++----
 arch/arm64/include/asm/linkage.h       | 30 ++++++++++++++++++
 arch/arm64/lib/clear_page.S            |  4 +--
 arch/arm64/lib/clear_user.S            |  4 +--
 arch/arm64/lib/copy_from_user.S        |  4 +--
 arch/arm64/lib/copy_page.S             |  4 +--
 arch/arm64/lib/copy_to_user.S          |  4 +--
 arch/arm64/lib/memchr.S                |  4 +--
 arch/arm64/lib/memcmp.S                |  4 +--
 arch/arm64/lib/memcpy.S                |  8 ++---
 arch/arm64/lib/memset.S                |  8 ++---
 arch/arm64/lib/mte.S                   | 28 ++++++++--------
 arch/arm64/lib/strchr.S                |  4 +--
 arch/arm64/lib/strcmp.S                |  4 +--
 arch/arm64/lib/strlen.S                |  4 +--
 arch/arm64/lib/strncmp.S               |  4 +--
 arch/arm64/lib/strnlen.S               |  4 +--
 arch/arm64/lib/tishift.S               | 12 +++----
 25 files changed, 164 insertions(+), 134 deletions(-)

diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S
index f2624238fd95..519309c886b9 100644
--- a/arch/arm64/crypto/aes-ce-ccm-core.S
+++ b/arch/arm64/crypto/aes-ce-ccm-core.S
@@ -80,7 +80,7 @@ CPU_LE(	rev	x8, x8			)
 	ret
 	.endm
 
-SYM_FUNC_START_LOCAL(ce_aes_ccm_crypt_tail)
+SYM_LEAF_FUNC_START_LOCAL(ce_aes_ccm_crypt_tail)
 	eor	v0.16b, v0.16b, v5.16b		/* final round mac */
 	eor	v1.16b, v1.16b, v5.16b		/* final round enc */
 
@@ -113,7 +113,7 @@ SYM_INNER_LABEL(ce_aes_ccm_final, SYM_L_LOCAL)
 	eor	v0.16b, v0.16b, v1.16b		/* en-/decrypt the mac */
 0:	st1	{v0.16b}, [x5]			/* store result */
 	ret
-SYM_FUNC_END(ce_aes_ccm_crypt_tail)
+SYM_LEAF_FUNC_END(ce_aes_ccm_crypt_tail)
 
 	/*
 	 * void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
@@ -123,15 +123,15 @@ SYM_FUNC_END(ce_aes_ccm_crypt_tail)
 	 * 			   u8 const rk[], u32 rounds, u8 mac[],
 	 * 			   u8 ctr[], u8 const final_iv[]);
 	 */
-SYM_FUNC_START(ce_aes_ccm_encrypt)
+SYM_LEAF_FUNC_START(ce_aes_ccm_encrypt)
 	movi	v22.16b, #255
 	aes_ccm_do_crypt	1
-SYM_FUNC_END(ce_aes_ccm_encrypt)
+SYM_LEAF_FUNC_END(ce_aes_ccm_encrypt)
 
-SYM_FUNC_START(ce_aes_ccm_decrypt)
+SYM_LEAF_FUNC_START(ce_aes_ccm_decrypt)
 	movi	v22.16b, #0
 	aes_ccm_do_crypt	0
-SYM_FUNC_END(ce_aes_ccm_decrypt)
+SYM_LEAF_FUNC_END(ce_aes_ccm_decrypt)
 
 	.section ".rodata", "a"
 	.align	6
diff --git a/arch/arm64/crypto/aes-neonbs-core.S b/arch/arm64/crypto/aes-neonbs-core.S
index baf450717b24..34b5c3c63c22 100644
--- a/arch/arm64/crypto/aes-neonbs-core.S
+++ b/arch/arm64/crypto/aes-neonbs-core.S
@@ -381,7 +381,7 @@ ISRM0:	.octa		0x0306090c00070a0d01040b0e0205080f
 	/*
 	 * void aesbs_convert_key(u8 out[], u32 const rk[], int rounds)
 	 */
-SYM_FUNC_START(aesbs_convert_key)
+SYM_LEAF_FUNC_START(aesbs_convert_key)
 	ld1		{v7.4s}, [x1], #16		// load round 0 key
 	ld1		{v17.4s}, [x1], #16		// load round 1 key
 
@@ -426,10 +426,10 @@ SYM_FUNC_START(aesbs_convert_key)
 	eor		v17.16b, v17.16b, v7.16b
 	str		q17, [x0]
 	ret
-SYM_FUNC_END(aesbs_convert_key)
+SYM_LEAF_FUNC_END(aesbs_convert_key)
 
 	.align		4
-SYM_FUNC_START_LOCAL(aesbs_encrypt8)
+SYM_LEAF_FUNC_START_LOCAL(aesbs_encrypt8)
 	ldr		q9, [bskey], #16		// round 0 key
 	ldr		q8, M0SR
 	ldr		q24, SR
@@ -489,10 +489,10 @@ SYM_FUNC_START_LOCAL(aesbs_encrypt8)
 	eor		v2.16b, v2.16b, v12.16b
 	eor		v5.16b, v5.16b, v12.16b
 	ret
-SYM_FUNC_END(aesbs_encrypt8)
+SYM_LEAF_FUNC_END(aesbs_encrypt8)
 
 	.align		4
-SYM_FUNC_START_LOCAL(aesbs_decrypt8)
+SYM_LEAF_FUNC_START_LOCAL(aesbs_decrypt8)
 	lsl		x9, rounds, #7
 	add		bskey, bskey, x9
 
@@ -554,7 +554,7 @@ SYM_FUNC_START_LOCAL(aesbs_decrypt8)
 	eor		v3.16b, v3.16b, v12.16b
 	eor		v5.16b, v5.16b, v12.16b
 	ret
-SYM_FUNC_END(aesbs_decrypt8)
+SYM_LEAF_FUNC_END(aesbs_decrypt8)
 
 	/*
 	 * aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
@@ -621,21 +621,21 @@ SYM_FUNC_END(aesbs_decrypt8)
 	.endm
 
 	.align		4
-SYM_TYPED_FUNC_START(aesbs_ecb_encrypt)
+SYM_TYPED_LEAF_FUNC_START(aesbs_ecb_encrypt)
 	__ecb_crypt	aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5
-SYM_FUNC_END(aesbs_ecb_encrypt)
+SYM_LEAF_FUNC_END(aesbs_ecb_encrypt)
 
 	.align		4
-SYM_TYPED_FUNC_START(aesbs_ecb_decrypt)
+SYM_TYPED_LEAF_FUNC_START(aesbs_ecb_decrypt)
 	__ecb_crypt	aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5
-SYM_FUNC_END(aesbs_ecb_decrypt)
+SYM_LEAF_FUNC_END(aesbs_ecb_decrypt)
 
 	/*
 	 * aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
 	 *		     int blocks, u8 iv[])
 	 */
 	.align		4
-SYM_FUNC_START(aesbs_cbc_decrypt)
+SYM_LEAF_FUNC_START(aesbs_cbc_decrypt)
 	frame_push	6
 
 	mov		x19, x0
@@ -719,7 +719,7 @@ SYM_FUNC_START(aesbs_cbc_decrypt)
 
 2:	frame_pop
 	ret
-SYM_FUNC_END(aesbs_cbc_decrypt)
+SYM_LEAF_FUNC_END(aesbs_cbc_decrypt)
 
 	.macro		next_tweak, out, in, const, tmp
 	sshr		\tmp\().2d,  \in\().2d,   #63
@@ -735,7 +735,7 @@ SYM_FUNC_END(aesbs_cbc_decrypt)
 	 * aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
 	 *		     int blocks, u8 iv[])
 	 */
-SYM_FUNC_START_LOCAL(__xts_crypt8)
+SYM_LEAF_FUNC_START_LOCAL(__xts_crypt8)
 	movi		v18.2s, #0x1
 	movi		v19.2s, #0x87
 	uzp1		v18.4s, v18.4s, v19.4s
@@ -766,7 +766,7 @@ SYM_FUNC_START_LOCAL(__xts_crypt8)
 	mov		bskey, x2
 	mov		rounds, x3
 	br		x16
-SYM_FUNC_END(__xts_crypt8)
+SYM_LEAF_FUNC_END(__xts_crypt8)
 
 	.macro		__xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
 	frame_push	0, 32
@@ -800,13 +800,13 @@ SYM_FUNC_END(__xts_crypt8)
 	ret
 	.endm
 
-SYM_TYPED_FUNC_START(aesbs_xts_encrypt)
+SYM_TYPED_LEAF_FUNC_START(aesbs_xts_encrypt)
 	__xts_crypt	aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5
-SYM_FUNC_END(aesbs_xts_encrypt)
+SYM_LEAF_FUNC_END(aesbs_xts_encrypt)
 
-SYM_TYPED_FUNC_START(aesbs_xts_decrypt)
+SYM_TYPED_LEAF_FUNC_START(aesbs_xts_decrypt)
 	__xts_crypt	aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5
-SYM_FUNC_END(aesbs_xts_decrypt)
+SYM_LEAF_FUNC_END(aesbs_xts_decrypt)
 
 	.macro		next_ctr, v
 	mov		\v\().d[1], x8
@@ -820,7 +820,7 @@ SYM_FUNC_END(aesbs_xts_decrypt)
 	 * aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
 	 *		     int rounds, int blocks, u8 iv[])
 	 */
-SYM_FUNC_START(aesbs_ctr_encrypt)
+SYM_LEAF_FUNC_START(aesbs_ctr_encrypt)
 	frame_push	0
 	ldp		x7, x8, [x5]
 	ld1		{v0.16b}, [x5]
@@ -863,4 +863,4 @@ CPU_LE(	rev		x8, x8		)
 	st1		{v0.16b}, [x5]
 	frame_pop
 	ret
-SYM_FUNC_END(aesbs_ctr_encrypt)
+SYM_LEAF_FUNC_END(aesbs_ctr_encrypt)
diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S
index 33772d8fe6b5..3471430c2a53 100644
--- a/arch/arm64/crypto/ghash-ce-core.S
+++ b/arch/arm64/crypto/ghash-ce-core.S
@@ -66,7 +66,7 @@
 	 * void pmull_ghash_update_p64(int blocks, u64 dg[], const char *src,
 	 *			       u64 const h[4][2], const char *head)
 	 */
-SYM_FUNC_START(pmull_ghash_update_p64)
+SYM_LEAF_FUNC_START(pmull_ghash_update_p64)
 	ld1		{SHASH.2d}, [x3]
 	ld1		{XL.2d}, [x1]
 
@@ -173,7 +173,7 @@ CPU_LE(	rev64		T1.16b, T1.16b	)
 
 5:	st1		{XL.2d}, [x1]
 	ret
-SYM_FUNC_END(pmull_ghash_update_p64)
+SYM_LEAF_FUNC_END(pmull_ghash_update_p64)
 
 	KS0		.req	v8
 	KS1		.req	v9
@@ -417,9 +417,9 @@ CPU_LE(	rev		w8, w8		)
 	 *			  u64 const h[4][2], u64 dg[], u8 ctr[],
 	 *			  u32 const rk[], int rounds, u8 tag[])
 	 */
-SYM_FUNC_START(pmull_gcm_encrypt)
+SYM_LEAF_FUNC_START(pmull_gcm_encrypt)
 	pmull_gcm_do_crypt	1
-SYM_FUNC_END(pmull_gcm_encrypt)
+SYM_LEAF_FUNC_END(pmull_gcm_encrypt)
 
 	/*
 	 * int pmull_gcm_decrypt(int bytes, u8 dst[], const u8 src[],
@@ -427,11 +427,11 @@ SYM_FUNC_END(pmull_gcm_encrypt)
 	 *			 u32 const rk[], int rounds, const u8 l[],
 	 *			 const u8 tag[], u64 authsize)
 	 */
-SYM_FUNC_START(pmull_gcm_decrypt)
+SYM_LEAF_FUNC_START(pmull_gcm_decrypt)
 	pmull_gcm_do_crypt	0
-SYM_FUNC_END(pmull_gcm_decrypt)
+SYM_LEAF_FUNC_END(pmull_gcm_decrypt)
 
-SYM_FUNC_START_LOCAL(pmull_gcm_ghash_4x)
+SYM_LEAF_FUNC_START_LOCAL(pmull_gcm_ghash_4x)
 	movi		MASK.16b, #0xe1
 	shl		MASK.2d, MASK.2d, #57
 
@@ -512,9 +512,9 @@ SYM_FUNC_START_LOCAL(pmull_gcm_ghash_4x)
 	eor		XL.16b, XL.16b, T2.16b
 
 	ret
-SYM_FUNC_END(pmull_gcm_ghash_4x)
+SYM_LEAF_FUNC_END(pmull_gcm_ghash_4x)
 
-SYM_FUNC_START_LOCAL(pmull_gcm_enc_4x)
+SYM_LEAF_FUNC_START_LOCAL(pmull_gcm_enc_4x)
 	ld1		{KS0.16b}, [x5]			// load upper counter
 	sub		w10, w8, #4
 	sub		w11, w8, #3
@@ -577,7 +577,7 @@ SYM_FUNC_START_LOCAL(pmull_gcm_enc_4x)
 	eor		INP3.16b, INP3.16b, KS3.16b
 
 	ret
-SYM_FUNC_END(pmull_gcm_enc_4x)
+SYM_LEAF_FUNC_END(pmull_gcm_enc_4x)
 
 	.section	".rodata", "a"
 	.align		6
diff --git a/arch/arm64/crypto/sm4-ce-ccm-core.S b/arch/arm64/crypto/sm4-ce-ccm-core.S
index fa85856f33ce..20a8853609e0 100644
--- a/arch/arm64/crypto/sm4-ce-ccm-core.S
+++ b/arch/arm64/crypto/sm4-ce-ccm-core.S
@@ -37,7 +37,7 @@
 
 
 .align 3
-SYM_FUNC_START(sm4_ce_cbcmac_update)
+SYM_LEAF_FUNC_START(sm4_ce_cbcmac_update)
 	/* input:
 	 *   x0: round key array, CTX
 	 *   x1: mac
@@ -81,10 +81,10 @@ SYM_FUNC_START(sm4_ce_cbcmac_update)
 .Lcbcmac_end:
 	st1		{RMAC.16b}, [x1]
 	ret
-SYM_FUNC_END(sm4_ce_cbcmac_update)
+SYM_LEAF_FUNC_END(sm4_ce_cbcmac_update)
 
 .align 3
-SYM_FUNC_START(sm4_ce_ccm_final)
+SYM_LEAF_FUNC_START(sm4_ce_ccm_final)
 	/* input:
 	 *   x0: round key array, CTX
 	 *   x1: ctr0 (big endian, 128 bit)
@@ -102,10 +102,10 @@ SYM_FUNC_START(sm4_ce_ccm_final)
 	st1		{RMAC.16b}, [x2]
 
 	ret
-SYM_FUNC_END(sm4_ce_ccm_final)
+SYM_LEAF_FUNC_END(sm4_ce_ccm_final)
 
 .align 3
-SYM_TYPED_FUNC_START(sm4_ce_ccm_enc)
+SYM_TYPED_LEAF_FUNC_START(sm4_ce_ccm_enc)
 	/* input:
 	 *   x0: round key array, CTX
 	 *   x1: dst
@@ -214,10 +214,10 @@ SYM_TYPED_FUNC_START(sm4_ce_ccm_enc)
 
 .Lccm_enc_ret:
 	ret
-SYM_FUNC_END(sm4_ce_ccm_enc)
+SYM_LEAF_FUNC_END(sm4_ce_ccm_enc)
 
 .align 3
-SYM_TYPED_FUNC_START(sm4_ce_ccm_dec)
+SYM_TYPED_LEAF_FUNC_START(sm4_ce_ccm_dec)
 	/* input:
 	 *   x0: round key array, CTX
 	 *   x1: dst
@@ -326,4 +326,4 @@ SYM_TYPED_FUNC_START(sm4_ce_ccm_dec)
 
 .Lccm_dec_ret:
 	ret
-SYM_FUNC_END(sm4_ce_ccm_dec)
+SYM_LEAF_FUNC_END(sm4_ce_ccm_dec)
diff --git a/arch/arm64/crypto/sm4-ce-cipher-core.S b/arch/arm64/crypto/sm4-ce-cipher-core.S
index 4ac6cfbc5797..7aea346cb14c 100644
--- a/arch/arm64/crypto/sm4-ce-cipher-core.S
+++ b/arch/arm64/crypto/sm4-ce-cipher-core.S
@@ -15,7 +15,7 @@
 	 * void sm4_ce_do_crypt(const u32 *rk, u32 *out, const u32 *in);
 	 */
 	.text
-SYM_FUNC_START(sm4_ce_do_crypt)
+SYM_LEAF_FUNC_START(sm4_ce_do_crypt)
 	ld1		{v8.4s}, [x2]
 	ld1		{v0.4s-v3.4s}, [x0], #64
 CPU_LE(	rev32		v8.16b, v8.16b		)
@@ -33,4 +33,4 @@ CPU_LE(	rev32		v8.16b, v8.16b		)
 CPU_LE(	rev32		v8.16b, v8.16b		)
 	st1		{v8.4s}, [x1]
 	ret
-SYM_FUNC_END(sm4_ce_do_crypt)
+SYM_LEAF_FUNC_END(sm4_ce_do_crypt)
diff --git a/arch/arm64/crypto/sm4-ce-core.S b/arch/arm64/crypto/sm4-ce-core.S
index 1f3625c2c67e..6af5b10859b8 100644
--- a/arch/arm64/crypto/sm4-ce-core.S
+++ b/arch/arm64/crypto/sm4-ce-core.S
@@ -40,7 +40,7 @@
 
 
 .align 3
-SYM_FUNC_START(sm4_ce_expand_key)
+SYM_LEAF_FUNC_START(sm4_ce_expand_key)
 	/* input:
 	 *   x0: 128-bit key
 	 *   x1: rkey_enc
@@ -86,10 +86,10 @@ SYM_FUNC_START(sm4_ce_expand_key)
 	st1		{v20.16b-v23.16b}, [x2]
 
 	ret;
-SYM_FUNC_END(sm4_ce_expand_key)
+SYM_LEAF_FUNC_END(sm4_ce_expand_key)
 
 .align 3
-SYM_FUNC_START(sm4_ce_crypt_block)
+SYM_LEAF_FUNC_START(sm4_ce_crypt_block)
 	/* input:
 	 *   x0: round key array, CTX
 	 *   x1: dst
@@ -102,10 +102,10 @@ SYM_FUNC_START(sm4_ce_crypt_block)
 	st1		{v0.16b}, [x1];
 
 	ret;
-SYM_FUNC_END(sm4_ce_crypt_block)
+SYM_LEAF_FUNC_END(sm4_ce_crypt_block)
 
 .align 3
-SYM_FUNC_START(sm4_ce_crypt)
+SYM_LEAF_FUNC_START(sm4_ce_crypt)
 	/* input:
 	 *   x0: round key array, CTX
 	 *   x1: dst
@@ -153,10 +153,10 @@ SYM_FUNC_START(sm4_ce_crypt)
 
 .Lcrypt_end:
 	ret;
-SYM_FUNC_END(sm4_ce_crypt)
+SYM_LEAF_FUNC_END(sm4_ce_crypt)
 
 .align 3
-SYM_FUNC_START(sm4_ce_cbc_enc)
+SYM_LEAF_FUNC_START(sm4_ce_cbc_enc)
 	/* input:
 	 *   x0: round key array, CTX
 	 *   x1: dst
@@ -208,10 +208,10 @@ SYM_FUNC_START(sm4_ce_cbc_enc)
 	st1		{RIV.16b}, [x3]
 
 	ret
-SYM_FUNC_END(sm4_ce_cbc_enc)
+SYM_LEAF_FUNC_END(sm4_ce_cbc_enc)
 
 .align 3
-SYM_FUNC_START(sm4_ce_cbc_dec)
+SYM_LEAF_FUNC_START(sm4_ce_cbc_dec)
 	/* input:
 	 *   x0: round key array, CTX
 	 *   x1: dst
@@ -306,10 +306,10 @@ SYM_FUNC_START(sm4_ce_cbc_dec)
 	st1		{RIV.16b}, [x3]
 
 	ret
-SYM_FUNC_END(sm4_ce_cbc_dec)
+SYM_LEAF_FUNC_END(sm4_ce_cbc_dec)
 
 .align 3
-SYM_FUNC_START(sm4_ce_cbc_cts_enc)
+SYM_LEAF_FUNC_START(sm4_ce_cbc_cts_enc)
 	/* input:
 	 *   x0: round key array, CTX
 	 *   x1: dst
@@ -354,10 +354,10 @@ SYM_FUNC_START(sm4_ce_cbc_cts_enc)
 	st1		{v1.16b}, [x1]
 
 	ret
-SYM_FUNC_END(sm4_ce_cbc_cts_enc)
+SYM_LEAF_FUNC_END(sm4_ce_cbc_cts_enc)
 
 .align 3
-SYM_FUNC_START(sm4_ce_cbc_cts_dec)
+SYM_LEAF_FUNC_START(sm4_ce_cbc_cts_dec)
 	/* input:
 	 *   x0: round key array, CTX
 	 *   x1: dst
@@ -400,10 +400,10 @@ SYM_FUNC_START(sm4_ce_cbc_cts_dec)
 	st1		{v0.16b}, [x1]
 
 	ret
-SYM_FUNC_END(sm4_ce_cbc_cts_dec)
+SYM_LEAF_FUNC_END(sm4_ce_cbc_cts_dec)
 
 .align 3
-SYM_FUNC_START(sm4_ce_ctr_enc)
+SYM_LEAF_FUNC_START(sm4_ce_ctr_enc)
 	/* input:
 	 *   x0: round key array, CTX
 	 *   x1: dst
@@ -506,7 +506,7 @@ SYM_FUNC_START(sm4_ce_ctr_enc)
 	stp		x7, x8, [x3]
 
 	ret
-SYM_FUNC_END(sm4_ce_ctr_enc)
+SYM_LEAF_FUNC_END(sm4_ce_ctr_enc)
 
 
 #define tweak_next(vt, vin, RTMP)					\
@@ -517,7 +517,7 @@ SYM_FUNC_END(sm4_ce_ctr_enc)
 		eor		vt.16b, vt.16b, RTMP.16b;
 
 .align 3
-SYM_FUNC_START(sm4_ce_xts_enc)
+SYM_LEAF_FUNC_START(sm4_ce_xts_enc)
 	/* input:
 	 *   x0: round key array, CTX
 	 *   x1: dst
@@ -681,10 +681,10 @@ SYM_FUNC_START(sm4_ce_xts_enc)
 
 .Lxts_enc_ret:
 	ret
-SYM_FUNC_END(sm4_ce_xts_enc)
+SYM_LEAF_FUNC_END(sm4_ce_xts_enc)
 
 .align 3
-SYM_FUNC_START(sm4_ce_xts_dec)
+SYM_LEAF_FUNC_START(sm4_ce_xts_dec)
 	/* input:
 	 *   x0: round key array, CTX
 	 *   x1: dst
@@ -848,10 +848,10 @@ SYM_FUNC_START(sm4_ce_xts_dec)
 
 .Lxts_dec_ret:
 	ret
-SYM_FUNC_END(sm4_ce_xts_dec)
+SYM_LEAF_FUNC_END(sm4_ce_xts_dec)
 
 .align 3
-SYM_FUNC_START(sm4_ce_mac_update)
+SYM_LEAF_FUNC_START(sm4_ce_mac_update)
 	/* input:
 	 *   x0: round key array, CTX
 	 *   x1: digest
@@ -917,7 +917,7 @@ SYM_FUNC_START(sm4_ce_mac_update)
 .Lmac_ret:
 	st1		{RMAC.16b}, [x1]
 	ret
-SYM_FUNC_END(sm4_ce_mac_update)
+SYM_LEAF_FUNC_END(sm4_ce_mac_update)
 
 
 	.section	".rodata", "a"
diff --git a/arch/arm64/crypto/sm4-ce-gcm-core.S b/arch/arm64/crypto/sm4-ce-gcm-core.S
index 347f25d75727..dac6db8160f2 100644
--- a/arch/arm64/crypto/sm4-ce-gcm-core.S
+++ b/arch/arm64/crypto/sm4-ce-gcm-core.S
@@ -259,7 +259,7 @@
 #define	RH4	v19
 
 .align 3
-SYM_FUNC_START(sm4_ce_pmull_ghash_setup)
+SYM_LEAF_FUNC_START(sm4_ce_pmull_ghash_setup)
 	/* input:
 	 *   x0: round key array, CTX
 	 *   x1: ghash table
@@ -293,10 +293,10 @@ SYM_FUNC_START(sm4_ce_pmull_ghash_setup)
 	st1		{RH1.16b-RH4.16b}, [x1]
 
 	ret
-SYM_FUNC_END(sm4_ce_pmull_ghash_setup)
+SYM_LEAF_FUNC_END(sm4_ce_pmull_ghash_setup)
 
 .align 3
-SYM_FUNC_START(pmull_ghash_update)
+SYM_LEAF_FUNC_START(pmull_ghash_update)
 	/* input:
 	 *   x0: ghash table
 	 *   x1: ghash result
@@ -368,10 +368,10 @@ SYM_FUNC_START(pmull_ghash_update)
 	st1		{RHASH.2d}, [x1]
 
 	ret
-SYM_FUNC_END(pmull_ghash_update)
+SYM_LEAF_FUNC_END(pmull_ghash_update)
 
 .align 3
-SYM_TYPED_FUNC_START(sm4_ce_pmull_gcm_enc)
+SYM_TYPED_LEAF_FUNC_START(sm4_ce_pmull_gcm_enc)
 	/* input:
 	 *   x0: round key array, CTX
 	 *   x1: dst
@@ -534,7 +534,7 @@ SYM_TYPED_FUNC_START(sm4_ce_pmull_gcm_enc)
 	st1		{RHASH.2d}, [x5]
 
 	ret
-SYM_FUNC_END(sm4_ce_pmull_gcm_enc)
+SYM_LEAF_FUNC_END(sm4_ce_pmull_gcm_enc)
 
 #undef	RR1
 #undef	RR3
@@ -582,7 +582,7 @@ SYM_FUNC_END(sm4_ce_pmull_gcm_enc)
 #define	RH3	v20
 
 .align 3
-SYM_TYPED_FUNC_START(sm4_ce_pmull_gcm_dec)
+SYM_TYPED_LEAF_FUNC_START(sm4_ce_pmull_gcm_dec)
 	/* input:
 	 *   x0: round key array, CTX
 	 *   x1: dst
@@ -726,7 +726,7 @@ SYM_TYPED_FUNC_START(sm4_ce_pmull_gcm_dec)
 	st1		{RHASH.2d}, [x5]
 
 	ret
-SYM_FUNC_END(sm4_ce_pmull_gcm_dec)
+SYM_LEAF_FUNC_END(sm4_ce_pmull_gcm_dec)
 
 	.section	".rodata", "a"
 	.align 4
diff --git a/arch/arm64/crypto/sm4-neon-core.S b/arch/arm64/crypto/sm4-neon-core.S
index 734dc7193610..d1fe37fce13a 100644
--- a/arch/arm64/crypto/sm4-neon-core.S
+++ b/arch/arm64/crypto/sm4-neon-core.S
@@ -257,7 +257,7 @@
 
 
 .align 3
-SYM_FUNC_START(sm4_neon_crypt)
+SYM_LEAF_FUNC_START(sm4_neon_crypt)
 	/* input:
 	 *   x0: round key array, CTX
 	 *   x1: dst
@@ -318,10 +318,10 @@ SYM_FUNC_START(sm4_neon_crypt)
 
 .Lcrypt_end:
 	ret
-SYM_FUNC_END(sm4_neon_crypt)
+SYM_LEAF_FUNC_END(sm4_neon_crypt)
 
 .align 3
-SYM_FUNC_START(sm4_neon_cbc_dec)
+SYM_LEAF_FUNC_START(sm4_neon_cbc_dec)
 	/* input:
 	 *   x0: round key array, CTX
 	 *   x1: dst
@@ -435,10 +435,10 @@ SYM_FUNC_START(sm4_neon_cbc_dec)
 	st1		{RIV.16b}, [x3]
 
 	ret
-SYM_FUNC_END(sm4_neon_cbc_dec)
+SYM_LEAF_FUNC_END(sm4_neon_cbc_dec)
 
 .align 3
-SYM_FUNC_START(sm4_neon_ctr_crypt)
+SYM_LEAF_FUNC_START(sm4_neon_ctr_crypt)
 	/* input:
 	 *   x0: round key array, CTX
 	 *   x1: dst
@@ -563,4 +563,4 @@ SYM_FUNC_START(sm4_neon_ctr_crypt)
 	stp		x7, x8, [x3]
 
 	ret
-SYM_FUNC_END(sm4_neon_ctr_crypt)
+SYM_LEAF_FUNC_END(sm4_neon_ctr_crypt)
diff --git a/arch/arm64/include/asm/linkage.h b/arch/arm64/include/asm/linkage.h
index 40bd17add539..6b8cb36a3343 100644
--- a/arch/arm64/include/asm/linkage.h
+++ b/arch/arm64/include/asm/linkage.h
@@ -3,6 +3,12 @@
 
 #ifdef __ASSEMBLER__
 #include <asm/assembler.h>
+
+/*
+ * Do not generate .eh_frame.  Only generate .debug_frame and optionally
+ * .sframe (via assembler option --gsframe[-N]).
+ */
+	.cfi_sections .debug_frame
 #endif
 
 #define __ALIGN		.balign CONFIG_FUNCTION_ALIGNMENT
@@ -43,4 +49,28 @@
 	SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)	\
 	bti c ;
 
+
+/*
+ * SYM_[TYPED_]LEAF_FUNC_[START|END] macros add CFI minimal CFI directives
+ * allowing .sframe data to be generated for functions which do not modify the
+ * LR (x30). Unwind data will not be correct if these macros are used on
+ * non-leaf functions, as additional CFI directives would be necessary in such
+ * cases.
+ */
+#define SYM_LEAF_FUNC_START(name)			\
+	.cfi_startproc ;				\
+	SYM_FUNC_START(name)
+
+#define SYM_LEAF_FUNC_END(name)				\
+	.cfi_endproc ;					\
+	SYM_FUNC_END(name)
+
+#define SYM_LEAF_FUNC_START_LOCAL(name)			\
+	.cfi_startproc ;				\
+	SYM_FUNC_START_LOCAL(name)
+
+#define SYM_TYPED_LEAF_FUNC_START(name)			\
+	.cfi_startproc ;				\
+	SYM_TYPED_FUNC_START(name)
+
 #endif
diff --git a/arch/arm64/lib/clear_page.S b/arch/arm64/lib/clear_page.S
index bd6f7d5eb6eb..fceb875c3570 100644
--- a/arch/arm64/lib/clear_page.S
+++ b/arch/arm64/lib/clear_page.S
@@ -14,7 +14,7 @@
  * Parameters:
  *	x0 - dest
  */
-SYM_FUNC_START(__pi_clear_page)
+SYM_LEAF_FUNC_START(__pi_clear_page)
 #ifdef CONFIG_AS_HAS_MOPS
 	.arch_extension mops
 alternative_if_not ARM64_HAS_MOPS
@@ -48,6 +48,6 @@ alternative_else_nop_endif
 	tst	x0, #(PAGE_SIZE - 1)
 	b.ne	2b
 	ret
-SYM_FUNC_END(__pi_clear_page)
+SYM_LEAF_FUNC_END(__pi_clear_page)
 SYM_FUNC_ALIAS(clear_page, __pi_clear_page)
 EXPORT_SYMBOL(clear_page)
diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S
index de9a303b6ad0..cf07c010ca92 100644
--- a/arch/arm64/lib/clear_user.S
+++ b/arch/arm64/lib/clear_user.S
@@ -17,7 +17,7 @@
  * Alignment fixed up by hardware.
  */
 
-SYM_FUNC_START(__arch_clear_user)
+SYM_LEAF_FUNC_START(__arch_clear_user)
 	add	x2, x0, x1
 
 #ifdef CONFIG_AS_HAS_MOPS
@@ -68,5 +68,5 @@ USER(7f, sttrb	wzr, [x2, #-1])
 8:	add	x0, x0, #4	// ...or the second word of the 4-7 byte case
 9:	sub	x0, x2, x0
 	ret
-SYM_FUNC_END(__arch_clear_user)
+SYM_LEAF_FUNC_END(__arch_clear_user)
 EXPORT_SYMBOL(__arch_clear_user)
diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S
index 400057d607ec..e9bb9c2dd8e1 100644
--- a/arch/arm64/lib/copy_from_user.S
+++ b/arch/arm64/lib/copy_from_user.S
@@ -61,7 +61,7 @@
 
 end	.req	x5
 srcin	.req	x15
-SYM_FUNC_START(__arch_copy_from_user)
+SYM_LEAF_FUNC_START(__arch_copy_from_user)
 	add	end, x0, x2
 	mov	srcin, x1
 #include "copy_template.S"
@@ -79,5 +79,5 @@ USER(9998f, ldtrb tmp1w, [srcin])
 	strb	tmp1w, [dst], #1
 9998:	sub	x0, end, dst			// bytes not copied
 	ret
-SYM_FUNC_END(__arch_copy_from_user)
+SYM_LEAF_FUNC_END(__arch_copy_from_user)
 EXPORT_SYMBOL(__arch_copy_from_user)
diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S
index e6374e7e5511..b6048d648306 100644
--- a/arch/arm64/lib/copy_page.S
+++ b/arch/arm64/lib/copy_page.S
@@ -17,7 +17,7 @@
  *	x0 - dest
  *	x1 - src
  */
-SYM_FUNC_START(__pi_copy_page)
+SYM_LEAF_FUNC_START(__pi_copy_page)
 #ifdef CONFIG_AS_HAS_MOPS
 	.arch_extension mops
 alternative_if_not ARM64_HAS_MOPS
@@ -77,6 +77,6 @@ alternative_else_nop_endif
 	stnp	x16, x17, [x0, #112 - 256]
 
 	ret
-SYM_FUNC_END(__pi_copy_page)
+SYM_LEAF_FUNC_END(__pi_copy_page)
 SYM_FUNC_ALIAS(copy_page, __pi_copy_page)
 EXPORT_SYMBOL(copy_page)
diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S
index 819f2e3fc7a9..aea10a5b3cba 100644
--- a/arch/arm64/lib/copy_to_user.S
+++ b/arch/arm64/lib/copy_to_user.S
@@ -60,7 +60,7 @@
 
 end	.req	x5
 srcin	.req	x15
-SYM_FUNC_START(__arch_copy_to_user)
+SYM_LEAF_FUNC_START(__arch_copy_to_user)
 	add	end, x0, x2
 	mov	srcin, x1
 #include "copy_template.S"
@@ -79,5 +79,5 @@ USER(9998f, sttrb tmp1w, [dst])
 	add	dst, dst, #1
 9998:	sub	x0, end, dst			// bytes not copied
 	ret
-SYM_FUNC_END(__arch_copy_to_user)
+SYM_LEAF_FUNC_END(__arch_copy_to_user)
 EXPORT_SYMBOL(__arch_copy_to_user)
diff --git a/arch/arm64/lib/memchr.S b/arch/arm64/lib/memchr.S
index 37a9f2a4f7f4..909599bba5bf 100644
--- a/arch/arm64/lib/memchr.S
+++ b/arch/arm64/lib/memchr.S
@@ -38,7 +38,7 @@
 
 	.p2align 4
 	nop
-SYM_FUNC_START(__pi_memchr)
+SYM_LEAF_FUNC_START(__pi_memchr)
 	and	chrin, chrin, #0xff
 	lsr	wordcnt, cntin, #3
 	cbz	wordcnt, L(byte_loop)
@@ -71,6 +71,6 @@ CPU_LE(	rev	tmp, tmp)
 L(not_found):
 	mov	result, #0
 	ret
-SYM_FUNC_END(__pi_memchr)
+SYM_LEAF_FUNC_END(__pi_memchr)
 SYM_FUNC_ALIAS_WEAK(memchr, __pi_memchr)
 EXPORT_SYMBOL_NOKASAN(memchr)
diff --git a/arch/arm64/lib/memcmp.S b/arch/arm64/lib/memcmp.S
index a5ccf2c55f91..91ee3a00e664 100644
--- a/arch/arm64/lib/memcmp.S
+++ b/arch/arm64/lib/memcmp.S
@@ -32,7 +32,7 @@
 #define tmp1		x7
 #define tmp2		x8
 
-SYM_FUNC_START(__pi_memcmp)
+SYM_LEAF_FUNC_START(__pi_memcmp)
 	subs	limit, limit, 8
 	b.lo	L(less8)
 
@@ -134,6 +134,6 @@ L(byte_loop):
 	b.eq	L(byte_loop)
 	sub	result, data1w, data2w
 	ret
-SYM_FUNC_END(__pi_memcmp)
+SYM_LEAF_FUNC_END(__pi_memcmp)
 SYM_FUNC_ALIAS_WEAK(memcmp, __pi_memcmp)
 EXPORT_SYMBOL_NOKASAN(memcmp)
diff --git a/arch/arm64/lib/memcpy.S b/arch/arm64/lib/memcpy.S
index 9b99106fb95f..90caf402ea7d 100644
--- a/arch/arm64/lib/memcpy.S
+++ b/arch/arm64/lib/memcpy.S
@@ -57,7 +57,7 @@
    The loop tail is handled by always copying 64 bytes from the end.
 */
 
-SYM_FUNC_START_LOCAL(__pi_memcpy_generic)
+SYM_LEAF_FUNC_START_LOCAL(__pi_memcpy_generic)
 	add	srcend, src, count
 	add	dstend, dstin, count
 	cmp	count, 128
@@ -238,11 +238,11 @@ L(copy64_from_start):
 	stp	B_l, B_h, [dstin, 16]
 	stp	C_l, C_h, [dstin]
 	ret
-SYM_FUNC_END(__pi_memcpy_generic)
+SYM_LEAF_FUNC_END(__pi_memcpy_generic)
 
 #ifdef CONFIG_AS_HAS_MOPS
 	.arch_extension mops
-SYM_FUNC_START(__pi_memcpy)
+SYM_LEAF_FUNC_START(__pi_memcpy)
 alternative_if_not ARM64_HAS_MOPS
 	b	__pi_memcpy_generic
 alternative_else_nop_endif
@@ -252,7 +252,7 @@ alternative_else_nop_endif
 	cpym	[dst]!, [src]!, count!
 	cpye	[dst]!, [src]!, count!
 	ret
-SYM_FUNC_END(__pi_memcpy)
+SYM_LEAF_FUNC_END(__pi_memcpy)
 #else
 SYM_FUNC_ALIAS(__pi_memcpy, __pi_memcpy_generic)
 #endif
diff --git a/arch/arm64/lib/memset.S b/arch/arm64/lib/memset.S
index 97157da65ec6..8ee307f5891b 100644
--- a/arch/arm64/lib/memset.S
+++ b/arch/arm64/lib/memset.S
@@ -43,7 +43,7 @@ dst		.req	x8
 tmp3w		.req	w9
 tmp3		.req	x9
 
-SYM_FUNC_START_LOCAL(__pi_memset_generic)
+SYM_LEAF_FUNC_START_LOCAL(__pi_memset_generic)
 	mov	dst, dstin	/* Preserve return value.  */
 	and	A_lw, val, #255
 	orr	A_lw, A_lw, A_lw, lsl #8
@@ -202,11 +202,11 @@ SYM_FUNC_START_LOCAL(__pi_memset_generic)
 	ands	count, count, zva_bits_x
 	b.ne	.Ltail_maybe_long
 	ret
-SYM_FUNC_END(__pi_memset_generic)
+SYM_LEAF_FUNC_END(__pi_memset_generic)
 
 #ifdef CONFIG_AS_HAS_MOPS
 	.arch_extension mops
-SYM_FUNC_START(__pi_memset)
+SYM_LEAF_FUNC_START(__pi_memset)
 alternative_if_not ARM64_HAS_MOPS
 	b	__pi_memset_generic
 alternative_else_nop_endif
@@ -216,7 +216,7 @@ alternative_else_nop_endif
 	setm	[dst]!, count!, val_x
 	sete	[dst]!, count!, val_x
 	ret
-SYM_FUNC_END(__pi_memset)
+SYM_LEAF_FUNC_END(__pi_memset)
 #else
 SYM_FUNC_ALIAS(__pi_memset, __pi_memset_generic)
 #endif
diff --git a/arch/arm64/lib/mte.S b/arch/arm64/lib/mte.S
index 5018ac03b6bf..442202cd02a3 100644
--- a/arch/arm64/lib/mte.S
+++ b/arch/arm64/lib/mte.S
@@ -27,14 +27,14 @@
  * Clear the tags in a page
  *   x0 - address of the page to be cleared
  */
-SYM_FUNC_START(mte_clear_page_tags)
+SYM_LEAF_FUNC_START(mte_clear_page_tags)
 	multitag_transfer_size x1, x2
 1:	stgm	xzr, [x0]
 	add	x0, x0, x1
 	tst	x0, #(PAGE_SIZE - 1)
 	b.ne	1b
 	ret
-SYM_FUNC_END(mte_clear_page_tags)
+SYM_LEAF_FUNC_END(mte_clear_page_tags)
 
 /*
  * Zero the page and tags at the same time
@@ -42,7 +42,7 @@ SYM_FUNC_END(mte_clear_page_tags)
  * Parameters:
  *	x0 - address to the beginning of the page
  */
-SYM_FUNC_START(mte_zero_clear_page_tags)
+SYM_LEAF_FUNC_START(mte_zero_clear_page_tags)
 	and	x0, x0, #(1 << MTE_TAG_SHIFT) - 1	// clear the tag
 	mrs	x1, dczid_el0
 	tbnz	x1, #4, 2f	// Branch if DC GZVA is prohibited
@@ -60,14 +60,14 @@ SYM_FUNC_START(mte_zero_clear_page_tags)
 	tst	x0, #(PAGE_SIZE - 1)
 	b.ne	2b
 	ret
-SYM_FUNC_END(mte_zero_clear_page_tags)
+SYM_LEAF_FUNC_END(mte_zero_clear_page_tags)
 
 /*
  * Copy the tags from the source page to the destination one
  *   x0 - address of the destination page
  *   x1 - address of the source page
  */
-SYM_FUNC_START(mte_copy_page_tags)
+SYM_LEAF_FUNC_START(mte_copy_page_tags)
 	mov	x2, x0
 	mov	x3, x1
 	multitag_transfer_size x5, x6
@@ -78,7 +78,7 @@ SYM_FUNC_START(mte_copy_page_tags)
 	tst	x2, #(PAGE_SIZE - 1)
 	b.ne	1b
 	ret
-SYM_FUNC_END(mte_copy_page_tags)
+SYM_LEAF_FUNC_END(mte_copy_page_tags)
 
 /*
  * Read tags from a user buffer (one tag per byte) and set the corresponding
@@ -89,7 +89,7 @@ SYM_FUNC_END(mte_copy_page_tags)
  * Returns:
  *   x0 - number of tags read/set
  */
-SYM_FUNC_START(mte_copy_tags_from_user)
+SYM_LEAF_FUNC_START(mte_copy_tags_from_user)
 	mov	x3, x1
 	cbz	x2, 2f
 1:
@@ -103,7 +103,7 @@ USER(2f, ldtrb	w4, [x1])
 	// exception handling and function return
 2:	sub	x0, x1, x3		// update the number of tags set
 	ret
-SYM_FUNC_END(mte_copy_tags_from_user)
+SYM_LEAF_FUNC_END(mte_copy_tags_from_user)
 
 /*
  * Get the tags from a kernel address range and write the tag values to the
@@ -114,7 +114,7 @@ SYM_FUNC_END(mte_copy_tags_from_user)
  * Returns:
  *   x0 - number of tags read/set
  */
-SYM_FUNC_START(mte_copy_tags_to_user)
+SYM_LEAF_FUNC_START(mte_copy_tags_to_user)
 	mov	x3, x0
 	cbz	x2, 2f
 1:
@@ -129,14 +129,14 @@ USER(2f, sttrb	w4, [x0])
 	// exception handling and function return
 2:	sub	x0, x0, x3		// update the number of tags copied
 	ret
-SYM_FUNC_END(mte_copy_tags_to_user)
+SYM_LEAF_FUNC_END(mte_copy_tags_to_user)
 
 /*
  * Save the tags in a page
  *   x0 - page address
  *   x1 - tag storage, MTE_PAGE_TAG_STORAGE bytes
  */
-SYM_FUNC_START(mte_save_page_tags)
+SYM_LEAF_FUNC_START(mte_save_page_tags)
 	multitag_transfer_size x7, x5
 1:
 	mov	x2, #0
@@ -153,14 +153,14 @@ SYM_FUNC_START(mte_save_page_tags)
 	b.ne	1b
 
 	ret
-SYM_FUNC_END(mte_save_page_tags)
+SYM_LEAF_FUNC_END(mte_save_page_tags)
 
 /*
  * Restore the tags in a page
  *   x0 - page address
  *   x1 - tag storage, MTE_PAGE_TAG_STORAGE bytes
  */
-SYM_FUNC_START(mte_restore_page_tags)
+SYM_LEAF_FUNC_START(mte_restore_page_tags)
 	multitag_transfer_size x7, x5
 1:
 	ldr	x2, [x1], #8
@@ -174,4 +174,4 @@ SYM_FUNC_START(mte_restore_page_tags)
 	b.ne	1b
 
 	ret
-SYM_FUNC_END(mte_restore_page_tags)
+SYM_LEAF_FUNC_END(mte_restore_page_tags)
diff --git a/arch/arm64/lib/strchr.S b/arch/arm64/lib/strchr.S
index 94ee67a6b212..455582efd07a 100644
--- a/arch/arm64/lib/strchr.S
+++ b/arch/arm64/lib/strchr.S
@@ -18,7 +18,7 @@
  * Returns:
  *	x0 - address of first occurrence of 'c' or 0
  */
-SYM_FUNC_START(__pi_strchr)
+SYM_LEAF_FUNC_START(__pi_strchr)
 	and	w1, w1, #0xff
 1:	ldrb	w2, [x0], #1
 	cmp	w2, w1
@@ -28,7 +28,7 @@ SYM_FUNC_START(__pi_strchr)
 	cmp	w2, w1
 	csel	x0, x0, xzr, eq
 	ret
-SYM_FUNC_END(__pi_strchr)
+SYM_LEAF_FUNC_END(__pi_strchr)
 
 SYM_FUNC_ALIAS_WEAK(strchr, __pi_strchr)
 EXPORT_SYMBOL_NOKASAN(strchr)
diff --git a/arch/arm64/lib/strcmp.S b/arch/arm64/lib/strcmp.S
index 9b89b4533607..d0ce2040a32b 100644
--- a/arch/arm64/lib/strcmp.S
+++ b/arch/arm64/lib/strcmp.S
@@ -53,7 +53,7 @@
    NUL too in big-endian, byte-reverse the data before the NUL check.  */
 
 
-SYM_FUNC_START(__pi_strcmp)
+SYM_LEAF_FUNC_START(__pi_strcmp)
 	sub	off2, src2, src1
 	mov	zeroones, REP8_01
 	and	tmp, src1, 7
@@ -185,6 +185,6 @@ L(tail):
 L(done):
 	sub	result, data1, data2
 	ret
-SYM_FUNC_END(__pi_strcmp)
+SYM_LEAF_FUNC_END(__pi_strcmp)
 SYM_FUNC_ALIAS_WEAK(strcmp, __pi_strcmp)
 EXPORT_SYMBOL_NOKASAN(strcmp)
diff --git a/arch/arm64/lib/strlen.S b/arch/arm64/lib/strlen.S
index 4919fe81ae54..a5d4151548b5 100644
--- a/arch/arm64/lib/strlen.S
+++ b/arch/arm64/lib/strlen.S
@@ -79,7 +79,7 @@
 	   whether the first fetch, which may be misaligned, crosses a page
 	   boundary.  */
 
-SYM_FUNC_START(__pi_strlen)
+SYM_LEAF_FUNC_START(__pi_strlen)
 	and	tmp1, srcin, MIN_PAGE_SIZE - 1
 	mov	zeroones, REP8_01
 	cmp	tmp1, MIN_PAGE_SIZE - 16
@@ -208,6 +208,6 @@ L(page_cross):
 	csel	data1, data1, tmp4, eq
 	csel	data2, data2, tmp2, eq
 	b	L(page_cross_entry)
-SYM_FUNC_END(__pi_strlen)
+SYM_LEAF_FUNC_END(__pi_strlen)
 SYM_FUNC_ALIAS_WEAK(strlen, __pi_strlen)
 EXPORT_SYMBOL_NOKASAN(strlen)
diff --git a/arch/arm64/lib/strncmp.S b/arch/arm64/lib/strncmp.S
index fe7bbc0b42a7..8fd5c5d7dc2a 100644
--- a/arch/arm64/lib/strncmp.S
+++ b/arch/arm64/lib/strncmp.S
@@ -58,7 +58,7 @@
 #define LS_BK lsl
 #endif
 
-SYM_FUNC_START(__pi_strncmp)
+SYM_LEAF_FUNC_START(__pi_strncmp)
 	cbz	limit, L(ret0)
 	eor	tmp1, src1, src2
 	mov	zeroones, #REP8_01
@@ -305,6 +305,6 @@ L(syndrome_check):
 L(ret0):
 	mov	result, #0
 	ret
-SYM_FUNC_END(__pi_strncmp)
+SYM_LEAF_FUNC_END(__pi_strncmp)
 SYM_FUNC_ALIAS_WEAK(strncmp, __pi_strncmp)
 EXPORT_SYMBOL_NOKASAN(strncmp)
diff --git a/arch/arm64/lib/strnlen.S b/arch/arm64/lib/strnlen.S
index d5ac0e10a01d..9f3f02e3f7e2 100644
--- a/arch/arm64/lib/strnlen.S
+++ b/arch/arm64/lib/strnlen.S
@@ -47,7 +47,7 @@ limit_wd	.req	x14
 #define REP8_7f 0x7f7f7f7f7f7f7f7f
 #define REP8_80 0x8080808080808080
 
-SYM_FUNC_START(__pi_strnlen)
+SYM_LEAF_FUNC_START(__pi_strnlen)
 	cbz	limit, .Lhit_limit
 	mov	zeroones, #REP8_01
 	bic	src, srcin, #15
@@ -156,7 +156,7 @@ CPU_LE( lsr	tmp2, tmp2, tmp4 )	/* Shift (tmp1 & 63).  */
 .Lhit_limit:
 	mov	len, limit
 	ret
-SYM_FUNC_END(__pi_strnlen)
+SYM_LEAF_FUNC_END(__pi_strnlen)
 
 SYM_FUNC_ALIAS_WEAK(strnlen, __pi_strnlen)
 EXPORT_SYMBOL_NOKASAN(strnlen)
diff --git a/arch/arm64/lib/tishift.S b/arch/arm64/lib/tishift.S
index a88613834fb0..b12d7f6a6003 100644
--- a/arch/arm64/lib/tishift.S
+++ b/arch/arm64/lib/tishift.S
@@ -7,7 +7,7 @@
 
 #include <asm/assembler.h>
 
-SYM_FUNC_START(__ashlti3)
+SYM_LEAF_FUNC_START(__ashlti3)
 	cbz	x2, 1f
 	mov	x3, #64
 	sub	x3, x3, x2
@@ -26,10 +26,10 @@ SYM_FUNC_START(__ashlti3)
 	lsl	x1, x0, x1
 	mov	x0, x2
 	ret
-SYM_FUNC_END(__ashlti3)
+SYM_LEAF_FUNC_END(__ashlti3)
 EXPORT_SYMBOL(__ashlti3)
 
-SYM_FUNC_START(__ashrti3)
+SYM_LEAF_FUNC_START(__ashrti3)
 	cbz	x2, 1f
 	mov	x3, #64
 	sub	x3, x3, x2
@@ -48,10 +48,10 @@ SYM_FUNC_START(__ashrti3)
 	asr	x0, x1, x0
 	mov	x1, x2
 	ret
-SYM_FUNC_END(__ashrti3)
+SYM_LEAF_FUNC_END(__ashrti3)
 EXPORT_SYMBOL(__ashrti3)
 
-SYM_FUNC_START(__lshrti3)
+SYM_LEAF_FUNC_START(__lshrti3)
 	cbz	x2, 1f
 	mov	x3, #64
 	sub	x3, x3, x2
@@ -70,5 +70,5 @@ SYM_FUNC_START(__lshrti3)
 	lsr	x0, x1, x0
 	mov	x1, x2
 	ret
-SYM_FUNC_END(__lshrti3)
+SYM_LEAF_FUNC_END(__lshrti3)
 EXPORT_SYMBOL(__lshrti3)
-- 
2.54.0.563.g4f69b47b94-goog


^ permalink raw reply related

* [PATCH v6 5/9] sframe: Provide PC lookup for vmlinux .sframe section
From: Dylan Hatch @ 2026-05-19  6:49 UTC (permalink / raw)
  To: Roman Gushchin, Weinan Liu, Will Deacon, Josh Poimboeuf,
	Indu Bhagat, Peter Zijlstra, Steven Rostedt, Catalin Marinas,
	Jiri Kosina, Mark Rutland, Jens Remus
  Cc: Dylan Hatch, Prasanna Kumar T S M, Puranjay Mohan, Song Liu,
	joe.lawrence, linux-toolchains, linux-kernel, live-patching,
	linux-arm-kernel, Randy Dunlap, Mostafa Saleh, Herbert Xu,
	David S. Miller
In-Reply-To: <20260519064950.493949-1-dylanbhatch@google.com>

With HAVE_UNWIND_KERNEL_SFRAME, read in the .sframe section at boot.
This provides unwind data as an alternative/supplement to frame pointer
based unwinding.

Reviewed-by: Jens Remus <jremus@linux.ibm.com>
Signed-off-by: Dylan Hatch <dylanbhatch@google.com>
---
 arch/arm64/kernel/setup.c |  2 ++
 include/linux/sframe.h    | 14 ++++++++++++++
 kernel/unwind/sframe.c    | 36 ++++++++++++++++++++++++++++++++++++
 3 files changed, 52 insertions(+)

diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 23c05dc7a8f2..4a633bc7aefb 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -32,6 +32,7 @@
 #include <linux/sched/task.h>
 #include <linux/scs.h>
 #include <linux/mm.h>
+#include <linux/sframe.h>
 
 #include <asm/acpi.h>
 #include <asm/fixmap.h>
@@ -375,6 +376,7 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p)
 			"This indicates a broken bootloader or old kernel\n",
 			boot_args[1], boot_args[2], boot_args[3]);
 	}
+	init_sframe_table();
 }
 
 static inline bool cpu_can_disable(unsigned int cpu)
diff --git a/include/linux/sframe.h b/include/linux/sframe.h
index 0cb2924367bc..5b7341b61a7c 100644
--- a/include/linux/sframe.h
+++ b/include/linux/sframe.h
@@ -69,4 +69,18 @@ static inline int sframe_find_user(unsigned long ip, struct unwind_frame *frame)
 
 #endif /* CONFIG_HAVE_UNWIND_USER_SFRAME */
 
+#ifdef CONFIG_HAVE_UNWIND_KERNEL_SFRAME
+
+void __init init_sframe_table(void);
+void sframe_module_init(struct module *mod, void *sframe, size_t sframe_size,
+			void *text, size_t text_size);
+
+extern int sframe_find_kernel(unsigned long ip, struct unwind_frame *frame);
+
+#else
+
+static inline void __init init_sframe_table(void) {}
+
+#endif /* CONFIG_HAVE_UNWIND_KERNEL_SFRAME */
+
 #endif /* _LINUX_SFRAME_H */
diff --git a/kernel/unwind/sframe.c b/kernel/unwind/sframe.c
index a2ab9a3e07b4..c8ec1e9989fc 100644
--- a/kernel/unwind/sframe.c
+++ b/kernel/unwind/sframe.c
@@ -16,10 +16,20 @@
 #include <linux/unwind_types.h>
 #include <asm/unwind_sframe.h>
 #include <uapi/linux/stacktrace.h>
+#ifdef CONFIG_HAVE_UNWIND_KERNEL_SFRAME
+#include <linux/kallsyms.h>
+#endif
 
 #include "sframe.h"
 #include "sframe_debug.h"
 
+#ifdef CONFIG_HAVE_UNWIND_KERNEL_SFRAME
+
+static bool sframe_init __ro_after_init;
+static struct sframe_section kernel_sfsec __ro_after_init;
+
+#endif /* CONFIG_HAVE_UNWIND_KERNEL_SFRAME */
+
 struct sframe_fde_internal {
 	unsigned long	func_addr;
 	u32		func_size;
@@ -969,3 +979,29 @@ SYSCALL_DEFINE5(stacktrace_setup, int, op, unsigned long, addr_start,
 }
 
 #endif /* CONFIG_HAVE_UNWIND_USER_SFRAME */
+
+#ifdef CONFIG_HAVE_UNWIND_KERNEL_SFRAME
+
+int sframe_find_kernel(unsigned long ip, struct unwind_frame *frame)
+{
+	if (!frame || !sframe_init)
+		return -EINVAL;
+
+	return  __sframe_find(&kernel_sfsec, ip, frame);
+}
+
+void __init init_sframe_table(void)
+{
+	kernel_sfsec.sec_type		= SFRAME_KERNEL;
+	kernel_sfsec.sframe_start	= (unsigned long)__start_sframe;
+	kernel_sfsec.sframe_end		= (unsigned long)__end_sframe;
+	kernel_sfsec.text_start		= (unsigned long)_stext;
+	kernel_sfsec.text_end		= (unsigned long)_etext;
+
+	if (WARN_ON(sframe_read_header(&kernel_sfsec)))
+		return;
+
+	sframe_init = true;
+}
+
+#endif /* CONFIG_HAVE_UNWIND_KERNEL_SFRAME */
-- 
2.54.0.563.g4f69b47b94-goog


^ permalink raw reply related

* [PATCH v6 6/9] arm64/module, sframe: Add sframe support for modules
From: Dylan Hatch @ 2026-05-19  6:49 UTC (permalink / raw)
  To: Roman Gushchin, Weinan Liu, Will Deacon, Josh Poimboeuf,
	Indu Bhagat, Peter Zijlstra, Steven Rostedt, Catalin Marinas,
	Jiri Kosina, Mark Rutland, Jens Remus
  Cc: Dylan Hatch, Prasanna Kumar T S M, Puranjay Mohan, Song Liu,
	joe.lawrence, linux-toolchains, linux-kernel, live-patching,
	linux-arm-kernel, Randy Dunlap, Mostafa Saleh, Herbert Xu,
	David S. Miller
In-Reply-To: <20260519064950.493949-1-dylanbhatch@google.com>

Add sframe table to mod_arch_specific and support sframe PC lookups when
an .sframe section can be found on incoming modules. SFRAME_F_FDE_SORTED
is not set for module .sframe, so FDES are sorted right after the sframe
header is read.

Signed-off-by: Weinan Liu <wnliu@google.com>
Suggested-by: Jens Remus <jremus@linux.ibm.com>
Reviewed-by: Jens Remus <jremus@linux.ibm.com>
Signed-off-by: Dylan Hatch <dylanbhatch@google.com>
---
 arch/arm64/include/asm/module.h |  6 +++
 arch/arm64/kernel/module.c      |  8 +++
 include/linux/sframe.h          |  3 ++
 kernel/unwind/sframe.c          | 90 +++++++++++++++++++++++++++++++--
 4 files changed, 104 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h
index fb9b88eebeb1..07f309c51eee 100644
--- a/arch/arm64/include/asm/module.h
+++ b/arch/arm64/include/asm/module.h
@@ -6,6 +6,7 @@
 #define __ASM_MODULE_H
 
 #include <asm-generic/module.h>
+#include <linux/sframe.h>
 
 struct mod_plt_sec {
 	int			plt_shndx;
@@ -17,6 +18,11 @@ struct mod_arch_specific {
 	struct mod_plt_sec	core;
 	struct mod_plt_sec	init;
 
+#ifdef CONFIG_HAVE_UNWIND_KERNEL_SFRAME
+	struct sframe_section sframe_sec;
+	bool sframe_init;
+#endif
+
 	/* for CONFIG_DYNAMIC_FTRACE */
 	struct plt_entry	*ftrace_trampolines;
 	struct plt_entry	*init_ftrace_trampolines;
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index 24adb581af0e..427f187e9531 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -18,6 +18,7 @@
 #include <linux/moduleloader.h>
 #include <linux/random.h>
 #include <linux/scs.h>
+#include <linux/sframe.h>
 
 #include <asm/alternative.h>
 #include <asm/insn.h>
@@ -515,5 +516,12 @@ int module_finalize(const Elf_Ehdr *hdr,
 		}
 	}
 
+	s = find_section(hdr, sechdrs, ".sframe");
+	if (s) {
+		struct module_memory *t = &me->mem[MOD_TEXT];
+
+		sframe_module_init(me, (void *)s->sh_addr, s->sh_size,
+				   t->base, t->size);
+	}
 	return module_init_ftrace_plt(hdr, sechdrs, me);
 }
diff --git a/include/linux/sframe.h b/include/linux/sframe.h
index 5b7341b61a7c..27f5a66190af 100644
--- a/include/linux/sframe.h
+++ b/include/linux/sframe.h
@@ -28,6 +28,7 @@ struct sframe_section {
 	unsigned long		fres_start;
 	unsigned long		fres_end;
 	unsigned int		num_fdes;
+	bool			fdes_sorted;
 
 	signed char		ra_off;
 	signed char		fp_off;
@@ -80,6 +81,8 @@ extern int sframe_find_kernel(unsigned long ip, struct unwind_frame *frame);
 #else
 
 static inline void __init init_sframe_table(void) {}
+static inline void sframe_module_init(struct module *mod, void *sframe, size_t sframe_size,
+				      void *text, size_t text_size) {}
 
 #endif /* CONFIG_HAVE_UNWIND_KERNEL_SFRAME */
 
diff --git a/kernel/unwind/sframe.c b/kernel/unwind/sframe.c
index c8ec1e9989fc..dfa013450705 100644
--- a/kernel/unwind/sframe.c
+++ b/kernel/unwind/sframe.c
@@ -12,6 +12,7 @@
 #include <linux/mm.h>
 #include <linux/string_helpers.h>
 #include <linux/sframe.h>
+#include <linux/sort.h>
 #include <linux/syscalls.h>
 #include <linux/unwind_types.h>
 #include <asm/unwind_sframe.h>
@@ -186,6 +187,9 @@ static __always_inline int __find_fde(struct sframe_section *sec,
 	struct sframe_fde_v3 *first, *low, *high, *found = NULL;
 	int ret;
 
+	if (!sec->fdes_sorted)
+		return -EINVAL;
+
 	first = (void *)sec->fdes_start;
 	low = first;
 	high = first + sec->num_fdes - 1;
@@ -740,7 +744,6 @@ static int sframe_read_header(struct sframe_section *sec)
 
 	if (shdr.preamble.magic != SFRAME_MAGIC ||
 	    shdr.preamble.version != SFRAME_VERSION_3 ||
-	    !(shdr.preamble.flags & SFRAME_F_FDE_SORTED) ||
 	    !(shdr.preamble.flags & SFRAME_F_FDE_FUNC_START_PCREL) ||
 	    shdr.auxhdr_len) {
 		dbg_sec("bad/unsupported sframe header\n");
@@ -770,6 +773,7 @@ static int sframe_read_header(struct sframe_section *sec)
 		return -EINVAL;
 	}
 
+	sec->fdes_sorted	= shdr.preamble.flags & SFRAME_F_FDE_SORTED;
 	sec->num_fdes		= num_fdes;
 	sec->fdes_start		= fdes_start;
 	sec->fres_start		= fres_start;
@@ -984,10 +988,27 @@ SYSCALL_DEFINE5(stacktrace_setup, int, op, unsigned long, addr_start,
 
 int sframe_find_kernel(unsigned long ip, struct unwind_frame *frame)
 {
-	if (!frame || !sframe_init)
+	struct sframe_section *sec;
+
+	if (!frame)
 		return -EINVAL;
 
-	return  __sframe_find(&kernel_sfsec, ip, frame);
+	if (is_ksym_addr(ip)) {
+		if (!sframe_init)
+			return -EINVAL;
+
+		sec = &kernel_sfsec;
+	} else {
+		struct module *mod;
+
+		mod = __module_address(ip);
+		if (!mod || !mod->arch.sframe_init)
+			return -EINVAL;
+
+		sec = &mod->arch.sframe_sec;
+	}
+
+	return  __sframe_find(sec, ip, frame);
 }
 
 void __init init_sframe_table(void)
@@ -1004,4 +1025,67 @@ void __init init_sframe_table(void)
 	sframe_init = true;
 }
 
+static int sframe_sort_cmp_fde(const void *a, const void *b)
+{
+	const struct sframe_fde_v3 *fde_a = a, *fde_b = b;
+	unsigned long func_start_a, func_start_b;
+
+	func_start_a = (unsigned long)fde_a + fde_a->func_start_off;
+	func_start_b = (unsigned long)fde_b + fde_b->func_start_off;
+
+	return cmp_int(func_start_a, func_start_b);
+}
+
+static void sframe_sort_swap_fde(void *a, void *b, int size)
+{
+	struct sframe_fde_v3 *fde_a = a, *fde_b = b;
+	struct sframe_fde_v3 temp;
+	long delta;
+
+	/* Swap potentially unaligned FDE */
+	memcpy(&temp, fde_a, sizeof(struct sframe_fde_v3));
+	memcpy(fde_a, fde_b, sizeof(struct sframe_fde_v3));
+	memcpy(fde_b, &temp, sizeof(struct sframe_fde_v3));
+
+	/* Adjust FDE function start offset from FDE */
+	delta = (long)((unsigned long)fde_b - (unsigned long)fde_a);
+	fde_a->func_start_off += delta;
+	fde_b->func_start_off -= delta;
+}
+
+static int sframe_sort_fdes(struct sframe_section *sec)
+{
+	void *fdes = (void *)sec->fdes_start;
+	size_t num_fdes = sec->num_fdes;
+
+	if (sec->sec_type != SFRAME_KERNEL)
+		return -EINVAL;
+	if (sec->fdes_sorted)
+		return 0;
+
+	sort(fdes, num_fdes, sizeof(struct sframe_fde_v3),
+	     sframe_sort_cmp_fde, sframe_sort_swap_fde);
+	sec->fdes_sorted = true;
+	return 0;
+}
+
+void sframe_module_init(struct module *mod, void *sframe, size_t sframe_size,
+			void *text, size_t text_size)
+{
+	struct sframe_section *sec = &mod->arch.sframe_sec;
+
+	sec->sec_type	 = SFRAME_KERNEL;
+	sec->sframe_start = (unsigned long)sframe;
+	sec->sframe_end   = (unsigned long)sframe + sframe_size;
+	sec->text_start   = (unsigned long)text;
+	sec->text_end     = (unsigned long)text + text_size;
+
+	if (WARN_ON(sframe_read_header(sec)))
+		return;
+	if (WARN_ON(sframe_sort_fdes(sec)))
+		return;
+
+	mod->arch.sframe_init = true;
+}
+
 #endif /* CONFIG_HAVE_UNWIND_KERNEL_SFRAME */
-- 
2.54.0.563.g4f69b47b94-goog


^ permalink raw reply related

* [PATCH v6 7/9] sframe: Introduce in-kernel SFRAME_VALIDATION
From: Dylan Hatch @ 2026-05-19  6:49 UTC (permalink / raw)
  To: Roman Gushchin, Weinan Liu, Will Deacon, Josh Poimboeuf,
	Indu Bhagat, Peter Zijlstra, Steven Rostedt, Catalin Marinas,
	Jiri Kosina, Mark Rutland, Jens Remus
  Cc: Dylan Hatch, Prasanna Kumar T S M, Puranjay Mohan, Song Liu,
	joe.lawrence, linux-toolchains, linux-kernel, live-patching,
	linux-arm-kernel, Randy Dunlap, Mostafa Saleh, Herbert Xu,
	David S. Miller
In-Reply-To: <20260519064950.493949-1-dylanbhatch@google.com>

Generalize the __safe* helpers to support a non-user-access code path.

This requires arch-specific function address validation. This is because
arm64 vmlinux keeps .exit.text (normally discarded), and .rodata.text
sections both of which lie outside the bounds of the normal .text.
.rodata.text contains code that is never executed by the kernel mapping,
but for which the toolchain nonetheless generates sframe data, and needs
to be considered valid for a PC lookup.

Additionally .init.text lies outside .text for all arches and must be
accounted for as well.

Suggested-by: Jens Remus <jremus@linux.ibm.com>
Reviewed-by: Jens Remus <jremus@linux.ibm.com>
Signed-off-by: Dylan Hatch <dylanbhatch@google.com>
---
 arch/Kconfig                           |  2 +-
 arch/arm64/include/asm/sections.h      |  1 +
 arch/arm64/include/asm/unwind_sframe.h | 46 ++++++++++++++++++++++++++
 arch/arm64/kernel/vmlinux.lds.S        |  2 ++
 include/linux/sframe.h                 |  2 ++
 kernel/unwind/sframe.c                 | 25 ++++++++++++--
 6 files changed, 75 insertions(+), 3 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index f931b5848593..fa1f43f47a53 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -503,7 +503,7 @@ config HAVE_UNWIND_USER_SFRAME
 
 config SFRAME_VALIDATION
 	bool "Enable .sframe section debugging"
-	depends on HAVE_UNWIND_USER_SFRAME
+	depends on UNWIND_SFRAME_LOOKUP
 	depends on DYNAMIC_DEBUG
 	help
 	  When adding an .sframe section for a task, validate the entire
diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h
index 51b0d594239e..5edb4304f661 100644
--- a/arch/arm64/include/asm/sections.h
+++ b/arch/arm64/include/asm/sections.h
@@ -23,6 +23,7 @@ extern char __irqentry_text_start[], __irqentry_text_end[];
 extern char __mmuoff_data_start[], __mmuoff_data_end[];
 extern char __entry_tramp_text_start[], __entry_tramp_text_end[];
 extern char __relocate_new_kernel_start[], __relocate_new_kernel_end[];
+extern char _srodatatext[], _erodatatext[];
 
 static inline size_t entry_tramp_text_size(void)
 {
diff --git a/arch/arm64/include/asm/unwind_sframe.h b/arch/arm64/include/asm/unwind_sframe.h
index 876412881196..eb269a54b9ef 100644
--- a/arch/arm64/include/asm/unwind_sframe.h
+++ b/arch/arm64/include/asm/unwind_sframe.h
@@ -2,7 +2,53 @@
 #ifndef _ASM_ARM64_UNWIND_SFRAME_H
 #define _ASM_ARM64_UNWIND_SFRAME_H
 
+#include <linux/module.h>
+#include <linux/sframe.h>
+#include <asm/sections.h>
+
 #define SFRAME_REG_SP	31
 #define SFRAME_REG_FP	29
 
+static inline bool sframe_func_start_addr_valid(struct sframe_section *sec,
+						unsigned long func_addr)
+{
+	/* Common case for unwinding */
+	if (sec->text_start <= func_addr && func_addr < sec->text_end)
+		return true;
+
+	if (sec->sec_type != SFRAME_KERNEL)
+		return false;
+
+	/*
+	 * Account for vmlinux and module code outside the normal .text section.
+	 * The toolchain still generates sframe data for these functions, so
+	 * sframe lookups on them should be allowed.
+	 */
+	if (sec == &kernel_sfsec) {
+		if (is_kernel_inittext(func_addr))
+			return true;
+
+		/* .exit.text is retained in vmlinux on arm64. */
+		if (func_addr >= (unsigned long)__exittext_begin &&
+		    func_addr < (unsigned long)__exittext_end)
+			return true;
+
+		/*
+		 * .rodata.text is never executed from the kernel mapping, but
+		 * still has sframe data
+		 */
+		if (func_addr >= (unsigned long)_srodatatext &&
+		    func_addr < (unsigned long)_erodatatext)
+			return true;
+	} else {
+		struct module *mod = container_of(sec, struct module,
+						  arch.sframe_sec);
+		if (within_module_mem_type(func_addr, mod, MOD_INIT_TEXT))
+			return true;
+	}
+
+	return false;
+}
+#define sframe_func_start_addr_valid sframe_func_start_addr_valid
+
 #endif /* _ASM_ARM64_UNWIND_SFRAME_H */
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index e1ac876200a3..68700b4d5070 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -225,12 +225,14 @@ SECTIONS
 
 	/* code sections that are never executed via the kernel mapping */
 	.rodata.text : {
+		_srodatatext = .;
 		TRAMP_TEXT
 		HIBERNATE_TEXT
 		KEXEC_TEXT
 		IDMAP_TEXT
 		. = ALIGN(PAGE_SIZE);
 	}
+	_erodatatext = .;
 
 	idmap_pg_dir = .;
 	. += PAGE_SIZE;
diff --git a/include/linux/sframe.h b/include/linux/sframe.h
index 27f5a66190af..ac3aa9db7d91 100644
--- a/include/linux/sframe.h
+++ b/include/linux/sframe.h
@@ -34,6 +34,8 @@ struct sframe_section {
 	signed char		fp_off;
 };
 
+extern struct sframe_section kernel_sfsec __ro_after_init;
+
 #endif /* CONFIG_UNWIND_SFRAME_LOOKUP */
 
 #ifdef CONFIG_HAVE_UNWIND_USER_SFRAME
diff --git a/kernel/unwind/sframe.c b/kernel/unwind/sframe.c
index dfa013450705..e8ede0343cb2 100644
--- a/kernel/unwind/sframe.c
+++ b/kernel/unwind/sframe.c
@@ -24,10 +24,18 @@
 #include "sframe.h"
 #include "sframe_debug.h"
 
+#ifndef sframe_func_start_addr_valid
+static inline bool sframe_func_start_addr_valid(struct sframe_section *sec,
+						unsigned long func_addr)
+{
+	return (sec->text_start <= func_addr && func_addr < sec->text_end);
+}
+#endif
+
 #ifdef CONFIG_HAVE_UNWIND_KERNEL_SFRAME
 
 static bool sframe_init __ro_after_init;
-static struct sframe_section kernel_sfsec __ro_after_init;
+struct sframe_section kernel_sfsec __ro_after_init;
 
 #endif /* CONFIG_HAVE_UNWIND_KERNEL_SFRAME */
 
@@ -155,7 +163,7 @@ static __always_inline int __read_fde(struct sframe_section *sec,
 		  sizeof(struct sframe_fde_v3), Efault);
 
 	func_addr = fde_addr + _fde.func_start_off;
-	if (func_addr < sec->text_start || func_addr >= sec->text_end)
+	if (!sframe_func_start_addr_valid(sec, func_addr))
 		return -EINVAL;
 
 	fda_addr = sec->fres_start + _fde.fres_off;
@@ -607,6 +615,9 @@ static int safe_read_fde(struct sframe_section *sec,
 {
 	int ret;
 
+	if (sec->sec_type == SFRAME_KERNEL)
+		return __read_fde(sec, fde_num, fde);
+
 	if (!user_read_access_begin((void __user *)sec->sframe_start,
 				    sec->sframe_end - sec->sframe_start))
 		return -EFAULT;
@@ -622,6 +633,9 @@ static int safe_read_fre(struct sframe_section *sec,
 {
 	int ret;
 
+	if (sec->sec_type == SFRAME_KERNEL)
+		return __read_fre(sec, fde, fre_addr, fre);
+
 	if (!user_read_access_begin((void __user *)sec->sframe_start,
 				    sec->sframe_end - sec->sframe_start))
 		return -EFAULT;
@@ -636,6 +650,9 @@ static int safe_read_fre_datawords(struct sframe_section *sec,
 {
 	int ret;
 
+	if (sec->sec_type == SFRAME_KERNEL)
+		return __read_fre_datawords(sec, fde, fre);
+
 	if (!user_read_access_begin((void __user *)sec->sframe_start,
 				    sec->sframe_end - sec->sframe_start))
 		return -EFAULT;
@@ -1021,6 +1038,8 @@ void __init init_sframe_table(void)
 
 	if (WARN_ON(sframe_read_header(&kernel_sfsec)))
 		return;
+	if (WARN_ON(sframe_validate_section(&kernel_sfsec)))
+		return;
 
 	sframe_init = true;
 }
@@ -1084,6 +1103,8 @@ void sframe_module_init(struct module *mod, void *sframe, size_t sframe_size,
 		return;
 	if (WARN_ON(sframe_sort_fdes(sec)))
 		return;
+	if (WARN_ON(sframe_validate_section(sec)))
+		return;
 
 	mod->arch.sframe_init = true;
 }
-- 
2.54.0.563.g4f69b47b94-goog


^ permalink raw reply related

* [PATCH v6 8/9] sframe: Initialize debug info for kernel sections
From: Dylan Hatch @ 2026-05-19  6:49 UTC (permalink / raw)
  To: Roman Gushchin, Weinan Liu, Will Deacon, Josh Poimboeuf,
	Indu Bhagat, Peter Zijlstra, Steven Rostedt, Catalin Marinas,
	Jiri Kosina, Mark Rutland, Jens Remus
  Cc: Dylan Hatch, Prasanna Kumar T S M, Puranjay Mohan, Song Liu,
	joe.lawrence, linux-toolchains, linux-kernel, live-patching,
	linux-arm-kernel, Randy Dunlap, Mostafa Saleh, Herbert Xu,
	David S. Miller
In-Reply-To: <20260519064950.493949-1-dylanbhatch@google.com>

Setup the optional unwinder debug information for kernel .sframe
sections. Modules are indicated by the format "(<module-name>)".

Suggested-by: Jens Remus <jremus@linux.ibm.com>
Signed-off-by: Dylan Hatch <dylanbhatch@google.com>
---
 kernel/unwind/sframe.c       |  4 ++++
 kernel/unwind/sframe_debug.h | 13 +++++++++++++
 2 files changed, 17 insertions(+)

diff --git a/kernel/unwind/sframe.c b/kernel/unwind/sframe.c
index e8ede0343cb2..d256e72620fe 100644
--- a/kernel/unwind/sframe.c
+++ b/kernel/unwind/sframe.c
@@ -1036,6 +1036,8 @@ void __init init_sframe_table(void)
 	kernel_sfsec.text_start		= (unsigned long)_stext;
 	kernel_sfsec.text_end		= (unsigned long)_etext;
 
+	dbg_init(&kernel_sfsec);
+
 	if (WARN_ON(sframe_read_header(&kernel_sfsec)))
 		return;
 	if (WARN_ON(sframe_validate_section(&kernel_sfsec)))
@@ -1099,6 +1101,8 @@ void sframe_module_init(struct module *mod, void *sframe, size_t sframe_size,
 	sec->text_start   = (unsigned long)text;
 	sec->text_end     = (unsigned long)text + text_size;
 
+	dbg_init(sec);
+
 	if (WARN_ON(sframe_read_header(sec)))
 		return;
 	if (WARN_ON(sframe_sort_fdes(sec)))
diff --git a/kernel/unwind/sframe_debug.h b/kernel/unwind/sframe_debug.h
index e568be4172b1..6c7ab3aa7c9e 100644
--- a/kernel/unwind/sframe_debug.h
+++ b/kernel/unwind/sframe_debug.h
@@ -32,6 +32,19 @@ static inline void dbg_init(struct sframe_section *sec)
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma;
 
+	if (sec->sec_type == SFRAME_KERNEL) {
+		if (sec == &kernel_sfsec) {
+			sec->filename = kstrdup("(vmlinux)", GFP_KERNEL);
+		} else {
+			struct module *mod = container_of(sec, struct module,
+							  arch.sframe_sec);
+			sec->filename = kasprintf(GFP_KERNEL, "(%s)",
+						  mod->name);
+		}
+
+		return;
+	}
+
 	guard(mmap_read_lock)(mm);
 	vma = vma_lookup(mm, sec->sframe_start);
 	if (!vma)
-- 
2.54.0.563.g4f69b47b94-goog


^ permalink raw reply related

* [PATCH v6 9/9] unwind: arm64: Use sframe to unwind interrupt frames
From: Dylan Hatch @ 2026-05-19  6:49 UTC (permalink / raw)
  To: Roman Gushchin, Weinan Liu, Will Deacon, Josh Poimboeuf,
	Indu Bhagat, Peter Zijlstra, Steven Rostedt, Catalin Marinas,
	Jiri Kosina, Mark Rutland, Jens Remus
  Cc: Dylan Hatch, Prasanna Kumar T S M, Puranjay Mohan, Song Liu,
	joe.lawrence, linux-toolchains, linux-kernel, live-patching,
	linux-arm-kernel, Randy Dunlap, Mostafa Saleh, Herbert Xu,
	David S. Miller
In-Reply-To: <20260519064950.493949-1-dylanbhatch@google.com>

Add unwind_next_frame_sframe() function to unwind by sframe info if
present. Use this method at exception boundaries, falling back to
frame-pointer unwind only on failure. In such failure cases, the
stacktrace is considered unreliable.

During normal unwind, prefer frame pointer unwind (for better
performance) with sframe as a backup.

This change restores the LR behavior originally introduced in commit
c2c6b27b5aa14fa2 ("arm64: stacktrace: unwind exception boundaries"),
But later removed in commit 32ed1205682e ("arm64: stacktrace: Skip
reporting LR at exception boundaries")

This can be done because the sframe data can be used to determine
whether the LR is current for the PC value recovered from pt_regs at the
exception boundary.

Signed-off-by: Weinan Liu <wnliu@google.com>
Reviewed-by: Prasanna Kumar T S M <ptsm@linux.microsoft.com>
Reviewed-by: Jens Remus <jremus@linux.ibm.com>
Signed-off-by: Dylan Hatch <dylanbhatch@google.com>
---
 arch/arm64/kernel/stacktrace.c | 222 ++++++++++++++++++++++++++++++---
 1 file changed, 202 insertions(+), 20 deletions(-)

diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 3ebcf8c53fb0..cee860ca8ce5 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -13,6 +13,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task_stack.h>
+#include <linux/sframe.h>
 #include <linux/stacktrace.h>
 
 #include <asm/efi.h>
@@ -26,6 +27,7 @@ enum kunwind_source {
 	KUNWIND_SOURCE_CALLER,
 	KUNWIND_SOURCE_TASK,
 	KUNWIND_SOURCE_REGS_PC,
+	KUNWIND_SOURCE_REGS_LR,
 };
 
 union unwind_flags {
@@ -45,6 +47,7 @@ union unwind_flags {
  * @kr_cur:      When KRETPROBES is selected, holds the kretprobe instance
  *               associated with the most recently encountered replacement lr
  *               value.
+ * @unreliable:  Stacktrace is unreliable.
  */
 struct kunwind_state {
 	struct unwind_state common;
@@ -56,6 +59,7 @@ struct kunwind_state {
 	enum kunwind_source source;
 	union unwind_flags flags;
 	struct pt_regs *regs;
+	bool unreliable;
 };
 
 static __always_inline void
@@ -181,7 +185,6 @@ int kunwind_next_regs_pc(struct kunwind_state *state)
 	state->regs = regs;
 	state->common.pc = regs->pc;
 	state->common.fp = regs->regs[29];
-	state->regs = NULL;
 	state->source = KUNWIND_SOURCE_REGS_PC;
 	return 0;
 }
@@ -244,6 +247,168 @@ kunwind_next_frame_record(struct kunwind_state *state)
 	return 0;
 }
 
+#ifdef CONFIG_HAVE_UNWIND_KERNEL_SFRAME
+
+static __always_inline struct stack_info *
+get_word(struct unwind_state *state, unsigned long *word)
+{
+	unsigned long addr = *word;
+	struct stack_info *info;
+
+	info = unwind_find_stack(state, addr, sizeof(addr));
+	if (!info)
+		return info;
+
+	*word = READ_ONCE(*(unsigned long *)addr);
+
+	return info;
+}
+
+static __always_inline int
+get_consume_word(struct unwind_state *state, unsigned long *word)
+{
+	struct stack_info *info;
+	unsigned long addr = *word;
+
+	info = get_word(state, word);
+	if (!info)
+		return -EINVAL;
+
+	unwind_consume_stack(state, info, addr, sizeof(addr));
+	return 0;
+}
+
+/*
+ * Unwind from a pt_regs according to sframe.
+ */
+static __always_inline int
+kunwind_next_regs_sframe(struct kunwind_state *state)
+{
+	struct unwind_frame frame;
+	unsigned long cfa, fp, ra;
+	enum kunwind_source source = KUNWIND_SOURCE_FRAME;
+	struct pt_regs *regs = state->regs;
+
+	int err;
+
+	if (WARN_ON_ONCE(state->source != KUNWIND_SOURCE_REGS_PC))
+		return -EINVAL;
+	if (WARN_ON_ONCE(!state->regs))
+		return -EINVAL;
+
+	/* FP/SP alignment 8 bytes */
+	if (state->common.fp & 0x7)
+		return -EINVAL;
+
+	err = sframe_find_kernel(state->common.pc, &frame);
+	if (err)
+		return -EINVAL;
+
+	/*
+	 * A kernel unwind should always end at a FRAME_META_TYPE_FINAL
+	 * frame. There should be no outermost frames within the kernel.
+	 */
+	if (frame.outermost)
+		return -EINVAL;
+
+	/* Get the Canonical Frame Address (CFA) */
+	switch (frame.cfa.rule) {
+	case UNWIND_CFA_RULE_SP_OFFSET:
+		cfa = state->regs->sp;
+		break;
+	case UNWIND_CFA_RULE_FP_OFFSET:
+		if (state->common.fp < state->regs->sp)
+			return -EINVAL;
+		cfa = state->common.fp;
+		break;
+	/*
+	 * UNWIND_CFA_RULE_REG_OFFSET and UNWIND_CFA_RULE_REG_OFFSET_DEREF not
+	 * implemented -- flexible FDEs are not currently generated by assembler
+	 * for arm64.
+	 */
+	default:
+		WARN_ON_ONCE(1);
+		return -EINVAL;
+	}
+	cfa += frame.cfa.offset;
+
+	/* CFA alignment 16 bytes */
+	if (cfa & 0x15)
+		return -EINVAL;
+
+	/* Get the Return Address (RA) */
+	switch (frame.ra.rule) {
+	case UNWIND_RULE_RETAIN:
+		ra = regs->regs[30];
+		source = KUNWIND_SOURCE_REGS_LR;
+		break;
+
+	/*
+	 * UNWIND_RULE_CFA_OFFSET doesn't make sense for RA.
+	 * The return address cannot legitimately be a stack address.
+	 */
+	case UNWIND_RULE_CFA_OFFSET_DEREF:
+		ra = cfa + frame.ra.offset;
+		break;
+	/*
+	 * UNWIND_RULE_REG_OFFSET and UNWIND_RULE_REG_OFFSET_DEREF not
+	 * implemented -- flexible FDEs are not currently generated by assembler
+	 * for arm64.
+	 */
+	default:
+		WARN_ON_ONCE(1);
+		return -EINVAL;
+	}
+
+	/* Get the Frame Pointer (FP) */
+	switch (frame.fp.rule) {
+	case UNWIND_RULE_RETAIN:
+		fp = state->common.fp;
+		break;
+	/*
+	 * UNWIND_RULE_CFA_OFFSET is currently not used for FP
+	 * (e.g. SFrame cannot represent this rule).
+	 */
+	case UNWIND_RULE_CFA_OFFSET_DEREF:
+		fp = cfa + frame.fp.offset;
+		break;
+	/*
+	 * UNWIND_RULE_REG_OFFSET and UNWIND_RULE_REG_OFFSET_DEREF not
+	 * implemented -- flexible FDEs are not currently generated by assembler
+	 * for arm64.
+	 */
+	default:
+		WARN_ON_ONCE(1);
+		return -EINVAL;
+	}
+
+	/*
+	 * Consume RA and FP from the stack. The frame record puts FP at a lower
+	 * address than RA, so we always read FP first.
+	 */
+	if (frame.fp.rule & UNWIND_RULE_DEREF &&
+	    !get_word(&state->common, &fp))
+		return -EINVAL;
+
+	if (frame.ra.rule & UNWIND_RULE_DEREF &&
+	    get_consume_word(&state->common, &ra))
+		return -EINVAL;
+
+	state->common.pc = ra;
+	state->common.fp = fp;
+
+	state->source = source;
+
+	return 0;
+}
+
+#else /* !CONFIG_HAVE_UNWIND_KERNEL_SFRAME */
+
+static __always_inline int
+unwind_next_frame_sframe(struct kunwind_state *state) { return -EINVAL; }
+
+#endif /* !CONFIG_HAVE_UNWIND_KERNEL_SFRAME*/
+
 /*
  * Unwind from one frame record (A) to the next frame record (B).
  *
@@ -259,10 +424,20 @@ kunwind_next(struct kunwind_state *state)
 	state->flags.all = 0;
 
 	switch (state->source) {
+	case KUNWIND_SOURCE_REGS_PC:
+		err = kunwind_next_regs_sframe(state);
+
+		if (err && err != -ENOENT) {
+			/* Fallback to FP based unwinder */
+			err = kunwind_next_frame_record(state);
+			state->unreliable = true;
+		}
+		state->regs = NULL;
+		break;
 	case KUNWIND_SOURCE_FRAME:
 	case KUNWIND_SOURCE_CALLER:
 	case KUNWIND_SOURCE_TASK:
-	case KUNWIND_SOURCE_REGS_PC:
+	case KUNWIND_SOURCE_REGS_LR:
 		err = kunwind_next_frame_record(state);
 		break;
 	default:
@@ -390,34 +565,40 @@ noinline noinstr void arch_stack_walk(stack_trace_consume_fn consume_entry,
 	kunwind_stack_walk(arch_kunwind_consume_entry, &data, task, regs);
 }
 
+struct kunwind_reliable_consume_entry_data {
+	stack_trace_consume_fn consume_entry;
+	void *cookie;
+	bool unreliable;
+};
+
 static __always_inline bool
-arch_reliable_kunwind_consume_entry(const struct kunwind_state *state, void *cookie)
+arch_kunwind_reliable_consume_entry(const struct kunwind_state *state, void *cookie)
 {
-	/*
-	 * At an exception boundary we can reliably consume the saved PC. We do
-	 * not know whether the LR was live when the exception was taken, and
-	 * so we cannot perform the next unwind step reliably.
-	 *
-	 * All that matters is whether the *entire* unwind is reliable, so give
-	 * up as soon as we hit an exception boundary.
-	 */
-	if (state->source == KUNWIND_SOURCE_REGS_PC)
-		return false;
+	struct kunwind_reliable_consume_entry_data *data = cookie;
 
-	return arch_kunwind_consume_entry(state, cookie);
+	if (state->unreliable) {
+		data->unreliable = true;
+		return false;
+	}
+	return data->consume_entry(data->cookie, state->common.pc);
 }
 
-noinline noinstr int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
-					      void *cookie,
-					      struct task_struct *task)
+noinline notrace int arch_stack_walk_reliable(
+				stack_trace_consume_fn consume_entry,
+				void *cookie, struct task_struct *task)
 {
-	struct kunwind_consume_entry_data data = {
+	struct kunwind_reliable_consume_entry_data data = {
 		.consume_entry = consume_entry,
 		.cookie = cookie,
+		.unreliable = false,
 	};
 
-	return kunwind_stack_walk(arch_reliable_kunwind_consume_entry, &data,
-				  task, NULL);
+	kunwind_stack_walk(arch_kunwind_reliable_consume_entry, &data, task, NULL);
+
+	if (data.unreliable)
+		return -EINVAL;
+
+	return 0;
 }
 
 struct bpf_unwind_consume_entry_data {
@@ -452,6 +633,7 @@ static const char *state_source_string(const struct kunwind_state *state)
 	case KUNWIND_SOURCE_CALLER:	return "C";
 	case KUNWIND_SOURCE_TASK:	return "T";
 	case KUNWIND_SOURCE_REGS_PC:	return "P";
+	case KUNWIND_SOURCE_REGS_LR:	return "L";
 	default:			return "U";
 	}
 }
-- 
2.54.0.563.g4f69b47b94-goog


^ permalink raw reply related

* Re: [RFC PATCH 6/6] livepatch: Support replace_set in shadow variable API
From: Yafang Shao @ 2026-05-19  6:56 UTC (permalink / raw)
  To: sashiko-reviews
  Cc: jpoimboe, joe.lawrence, song, mbenes, jikos, pmladek,
	live-patching
In-Reply-To: <20260514230124.A4236C2BCB3@smtp.kernel.org>

On Fri, May 15, 2026 at 7:01 AM <sashiko-bot@kernel.org> wrote:
>
> Thank you for your contribution! Sashiko AI review found 4 potential issue(s) to consider:
> - [High] Newly allocated shadow variables fail to store the `replace_set` identifier, breaking all subsequent lookups and leading to unbounded memory leaks.
> - [High] The `klp_shadow_combined_id()` function invokes undefined behavior and loses data on 32-bit architectures by shifting a 32-bit integer by 32 bits.
> - [High] Changing the shadow API `id` parameter to `unsigned int` silently truncates 64-bit `klp_state` identifiers, leading to memory leaks.
> - [Low] Modifying the `klp_shadow_*` function signatures breaks the build for in-tree callers.
> --
>
> commit 91116e31e667d0beea437b5048ab5a62099a5eed
> Author: Yafang Shao <laoar.shao@gmail.com>
>
> livepatch: Support replace_set in shadow variable API
>
> To support more complex livepatching scenarios where multiple
> replacement sets might coexist, extend the klp_shadow API to
> include a replace_set identifier.
>
> > diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h
> > index c9f2036dfefcf..cee35f5cae9ad 100644
> > --- a/include/linux/livepatch.h
> > +++ b/include/linux/livepatch.h
> > @@ -192,15 +192,17 @@ static inline bool klp_have_reliable_stack(void)
> >              IS_ENABLED(CONFIG_HAVE_RELIABLE_STACKTRACE);
> >  }
> >
> > -void *klp_shadow_get(void *obj, unsigned long id);
> > -void *klp_shadow_alloc(void *obj, unsigned long id,
> > +void *klp_shadow_get(void *obj, unsigned int replace_set, unsigned int id);
> > +void *klp_shadow_alloc(void *obj, unsigned int replace_set, unsigned int id,
> >                      size_t size, gfp_t gfp_flags,
> >                      klp_shadow_ctor_t ctor, void *ctor_data);
>
> Does this signature change break the build for existing in-tree callers?
>
> It looks like samples/livepatch/livepatch-shadow-fix1.c and
> tools/testing/selftests/livepatch/test_modules/test_klp_shadow_vars.c
> might not have been updated to pass the new replace_set argument.

selftests will be changed accordingly once the RFC is removed.

>
> [ ... ]
>
> > diff --git a/kernel/livepatch/shadow.c b/kernel/livepatch/shadow.c
> > index c2e724d97ddf2..35e507fae4458 100644
> > --- a/kernel/livepatch/shadow.c
> > +++ b/kernel/livepatch/shadow.c
> > @@ -59,6 +60,11 @@ struct klp_shadow {
> >       char data[];
> >  };
> >
> > +static unsigned long klp_shadow_combined_id(unsigned int set, unsigned int id)
> > +{
> > +     return ((unsigned long)set << 32) | id;
> > +}
>
> On 32-bit architectures that support livepatching (such as 32-bit PowerPC),
> unsigned long is 32 bits wide. Does shifting a 32-bit integer by 32 bits
> invoke undefined behavior here?

Ah, that is a problem on 32-bit PPC. I'll fix this in v2.

>
> Furthermore, since struct klp_shadow->id remains defined as an unsigned long,
> it cannot physically store a 64-bit combined ID on these systems. The
> replace_set value might be entirely truncated, resulting in hash collisions
> and subsequent issues when multiple sets manage the same resource.
>
> [ ... ]
>
> > @@ -126,7 +134,7 @@ static void *__klp_shadow_get_or_alloc(void *obj, unsigned long id,
> >
> >       /* Look for <obj, id> again under the lock */
> >       spin_lock_irqsave(&klp_shadow_lock, flags);
> > -     shadow_data = klp_shadow_get(obj, id);
> > +     shadow_data = klp_shadow_get(obj, set, id);
> >       if (unlikely(shadow_data)) {
> >               /*
> >                * Shadow variable was found, throw away speculative
> > @@ -147,8 +155,8 @@ static void *__klp_shadow_get_or_alloc(void *obj, unsigned long id,
> [ ... ]
> >       }
> >
> >       new_shadow->obj = obj;
> >       new_shadow->id = id;
>
> Because the id argument is now a 32-bit integer, the upper 32 bits of
> new_shadow->id will be zero. Should this be initialized using
> klp_shadow_combined_id(set, id) instead?

Right. will fix it.

>
> Lookup functions like klp_shadow_get() and klp_shadow_free() search using
> the 64-bit composite ID klp_shadow_combined_id(replace_set, id). When
> replace_set is non-zero, this lookup will always fail.
>
> This means klp_shadow_get() will erroneously return NULL, and
> klp_shadow_get_or_alloc() could duplicate variables indefinitely, leading
> to unbounded memory leaks.
>
> [ ... ]
>
> > diff --git a/kernel/livepatch/state.c b/kernel/livepatch/state.c
> > index 43115e8e8453c..6e3d6fb92e64a 100644
> > --- a/kernel/livepatch/state.c
> > +++ b/kernel/livepatch/state.c
> > @@ -203,7 +203,8 @@ void klp_states_post_unpatch(struct klp_patch *patch)
> >                       state->callbacks.post_unpatch(patch, state);
> >
> >               if (state->is_shadow)
> > -                     klp_shadow_free_all(state->id, state->callbacks.shadow_dtor);
> > +                     klp_shadow_free_all(patch->replace_set, state->id,
> > +                                         state->callbacks.shadow_dtor);
>
> The shadow variable APIs previously accepted an unsigned long for the id,
> allowing 64-bit identifiers. The new klp_shadow_free_all() now accepts an
> unsigned int, but struct klp_state still defines id as an unsigned long.

Right, we should clearly document this API change.

>
> Will the implicit cast from unsigned long to unsigned int silently truncate
> the upper 32 bits of state->id on 64-bit platforms?
>
> If the original ID exceeded 32 bits, the core might fail to locate and free
> the correct shadow variables, causing a permanent memory leak during
> garbage collection.
>
> --
> Sashiko AI review · https://sashiko.dev/#/patchset/20260513143321.26185-1-laoar.shao@gmail.com?part=6



-- 
Regards
Yafang

^ permalink raw reply

* Re: [PATCH v5 0/8] unwind, arm64: add sframe unwinder for kernel
From: Jens Remus @ 2026-05-19 11:05 UTC (permalink / raw)
  To: Dylan Hatch, Mostafa Saleh
  Cc: Roman Gushchin, Weinan Liu, Will Deacon, Josh Poimboeuf,
	Indu Bhagat, Peter Zijlstra, Steven Rostedt, Catalin Marinas,
	Jiri Kosina, Mark Rutland, Prasanna Kumar T S M, Puranjay Mohan,
	Song Liu, joe.lawrence, linux-toolchains, linux-kernel,
	live-patching, linux-arm-kernel, Randy Dunlap
In-Reply-To: <CADBMgpx38SUUuYYCm612STqh01jqv817WnJeeXYTD7Uc1r-fug@mail.gmail.com>

Hi Dylan and Mostafa!

On 5/18/2026 7:55 PM, Dylan Hatch wrote:
> On Fri, May 15, 2026 at 4:32 AM Mostafa Saleh <smostafa@google.com> wrote:
>> On Tue, Apr 28, 2026 at 06:36:35PM +0000, Dylan Hatch wrote:
>>> Implement a generic kernel sframe-based [1] unwinder. The main goal is
>>> to improve reliable stacktrace on arm64 by unwinding across exception
>>> boundaries.
>>>
>>> On x86, the ORC unwinder provides reliable stacktrace through similar
>>> methodology, but arm64 lacks the necessary support from objtool to
>>> create ORC unwind tables.
>>>
>>> Currently, there's already a sframe unwinder proposed for userspace: [2].
>>> To maintain common definitions and algorithms for sframe lookup, a
>>> substantial portion of this patch series aims to refactor the sframe
>>> lookup code to support both kernel and userspace sframe sections.
>>>
>>> Currently, only GNU Binutils support sframe. This series relies on the
>>> Sframe V3 format, which is supported in binutils 2.46.
>>>
>>> These patches are based on Steven Rostedt's sframe/core branch [3],
>>> which is and aggregation of existing work done for x86 sframe userspace
>>> unwind, and contains [2]. This branch is, in turn, based on Linux
>>> v7.0-rc3. This full series (applied to the sframe/core branch) is
>>> available on github: [4].
>>>
>>
>> Not sure if related, but after updating my toolchain
>> (aarch64-linux-gnu-gcc (Debian 15.2.0-4) 15.2.0), I hit link errors:
>> ld.lld: error: arch/arm64/kernel/vdso/vgettimeofday.o:(.sframe) is being placed in '.sframe'
>> ld.lld: error: arch/arm64/kernel/vdso/vgetrandom.o:(.sframe) is being placed in '.sframe`
> 
> Previously when developing against the SFrame V2 format, I had fixed
> these warnings with the VDSO Makefile change currently in this series:
> 
> diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
> index 7dec05dd33b7..c60ef921956f 100644
> --- a/arch/arm64/kernel/vdso/Makefile
> +++ b/arch/arm64/kernel/vdso/Makefile
> @@ -38,7 +38,7 @@ ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO
>  CC_FLAGS_REMOVE_VDSO := $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) \
>                         $(RANDSTRUCT_CFLAGS) $(KSTACK_ERASE_CFLAGS) \
>                         $(GCC_PLUGINS_CFLAGS) \
> -                       $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) \
> +                       $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) $(CC_FLAGS_SFRAME) \
>                         -Wmissing-prototypes -Wmissing-declarations
> 
>  CC_FLAGS_ADD_VDSO := -O2 -mcmodel=tiny -fasynchronous-unwind-tables
> 
> But the warnings seem to have returned after upgrading my toolchain,
> possibly due to SFrame V3 or some confounding change in GCC. The
> --gsframe in the assembler should be set to 'no' by default, so
> perhaps GCC is providing an override --gsframe internally?

Could it be that your build of binutils was configured with
--enable-default-sframe, so that the GNU assembler defaults to generate
.sframe?  AFAIK this configure option was meant for distributors and
package maintainers.

You can check as follows whether --gsframe defaults to "no" or "yes":

$ as --help | grep -A1 gsframe
  --gsframe[={no|yes}]    whether to generate SFrame stack trace information
                          (default: no)
...

> 
>>
>> I applied this series hoping that fix it, but it doesn't, so far I
>> have this hack :
>> diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S
>> index 52314be29191..53bdf757ee44 100644
>> --- a/arch/arm64/kernel/vdso/vdso.lds.S
>> +++ b/arch/arm64/kernel/vdso/vdso.lds.S
>> @@ -77,7 +77,7 @@ SECTIONS
>>         /DISCARD/       : {
>>                 *(.data .data.* .gnu.linkonce.d.* .sdata*)
>>                 *(.bss .sbss .dynbss .dynsbss)
>> -               *(.eh_frame .eh_frame_hdr)
>> +               *(.eh_frame .eh_frame_hdr .sframe)
>>         }
>>  }
>>
>> diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
>> index 60c8c22fd3e4..759903acd6fc 100644
>> --- a/include/asm-generic/vmlinux.lds.h
>> +++ b/include/asm-generic/vmlinux.lds.h
>> @@ -1064,6 +1064,7 @@
>>         /* ld.bfd warns about .gnu.version* even when not emitted */    \
>>         *(.gnu.version*)                                                \
>>         *(__tracepoint_check)                                           \
>> +       *(.sframe)                                                      \
>>
>>  #define DISCARDS                                                       \
>>         /DISCARD/ : {                                                   \
> 
> Since this series only handles kernel stacktrace, I believe it's
> better to omit the .sframe section entirely in the case where only
> ARCH_SUPPORTS_UNWIND_KERNEL_SFRAME is enabled. I think this hack may
> work better for this purpose:
> 
> diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
> index c60ef921956f..29f802bfedb1 100644
> --- a/arch/arm64/kernel/vdso/Makefile
> +++ b/arch/arm64/kernel/vdso/Makefile
> @@ -41,7 +41,7 @@ CC_FLAGS_REMOVE_VDSO := $(CC_FLAGS_FTRACE) -Os
> $(CC_FLAGS_SCS) \
>                         $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) $(CC_FLAGS_SFRAME) \
>                         -Wmissing-prototypes -Wmissing-declarations
> 
> -CC_FLAGS_ADD_VDSO := -O2 -mcmodel=tiny -fasynchronous-unwind-tables
> +CC_FLAGS_ADD_VDSO := -O2 -mcmodel=tiny -fasynchronous-unwind-tables
> -Wa,--gsframe=no
> 
>  CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_REMOVE_VDSO)
>  CFLAGS_REMOVE_vgetrandom.o = $(CC_FLAGS_REMOVE_VDSO)
> 
> Though, I don't understand why it is necessary to provide --gsframe=no
> explicitly. If this approach seems ok to other folks/maintainers, I
> can fold this into my series.

Maybe build the VDSO separately with V=1 to see what assembler/compiler
options are effectively used (e.g. for vgettimeofday.o and vgetrandom.o
mentioned in the linker error message above)?

$ make mrproper
$ make defconfig
$ ./scripts/config --enable HAVE_UNWIND_KERNEL_SFRAME   # enable kernel sframe unwinder
$ make arch/arm64/kernel/vdso/ V=1

> 
> On the topic of SFrame for VDSO, Jens has a patch adding support for
> this as part of a series to support userspace SFrame unwinding for
> arm64:
> 
> https://lore.kernel.org/lkml/20260417150827.1183376-4-jremus@linux.ibm.com/

Any feedback is very welcome. :-)

Regards,
Jens
-- 
Jens Remus
Linux on Z Development (D3303)
jremus@de.ibm.com / jremus@linux.ibm.com

IBM Deutschland Research & Development GmbH; Vorsitzender des Aufsichtsrats: Wolfgang Wendt; Geschäftsführung: David Faller; Sitz der Gesellschaft: Ehningen; Registergericht: Amtsgericht Stuttgart, HRB 243294
IBM Data Privacy Statement: https://www.ibm.com/privacy/


^ permalink raw reply

* Re: [PATCH v6 8/9] sframe: Initialize debug info for kernel sections
From: Jens Remus @ 2026-05-19 11:20 UTC (permalink / raw)
  To: Dylan Hatch
  Cc: Prasanna Kumar T S M, Puranjay Mohan, Song Liu, joe.lawrence,
	linux-toolchains, linux-kernel, live-patching, linux-arm-kernel,
	Randy Dunlap, Mostafa Saleh, Herbert Xu, David S. Miller,
	Roman Gushchin, Weinan Liu, Will Deacon, Josh Poimboeuf,
	Indu Bhagat, Peter Zijlstra, Steven Rostedt, Catalin Marinas,
	Jiri Kosina, Mark Rutland
In-Reply-To: <20260519064950.493949-9-dylanbhatch@google.com>

On 5/19/2026 8:49 AM, Dylan Hatch wrote:
> Setup the optional unwinder debug information for kernel .sframe
> sections. Modules are indicated by the format "(<module-name>)".
> 
> Suggested-by: Jens Remus <jremus@linux.ibm.com>
> Signed-off-by: Dylan Hatch <dylanbhatch@google.com>

Reviewed-by: Jens Remus <jremus@linux.ibm.com>

> ---
>  kernel/unwind/sframe.c       |  4 ++++
>  kernel/unwind/sframe_debug.h | 13 +++++++++++++
>  2 files changed, 17 insertions(+)
Regards,
Jens
-- 
Jens Remus
Linux on Z Development (D3303)
jremus@de.ibm.com / jremus@linux.ibm.com

IBM Deutschland Research & Development GmbH; Vorsitzender des Aufsichtsrats: Wolfgang Wendt; Geschäftsführung: David Faller; Sitz der Gesellschaft: Ehningen; Registergericht: Amtsgericht Stuttgart, HRB 243294
IBM Data Privacy Statement: https://www.ibm.com/privacy/


^ permalink raw reply

* Re: [PATCH v3] killswitch: add per-function short-circuit mitigation primitive
From: Daniel Borkmann @ 2026-05-19 12:13 UTC (permalink / raw)
  To: Song Liu, Sasha Levin
  Cc: linux-kernel, linux-doc, linux-kselftest, bpf, live-patching,
	Greg Kroah-Hartman, Andrew Morton, Jonathan Corbet,
	Mathieu Desnoyers, Joshua Peisach, Florian Weimer, Breno Leitao,
	Anthony Iliopoulos, Michal Hocko, Jiri Olsa, John Fastabend,
	Christian Brauner
In-Reply-To: <CAPhsuW44UX663Au=WwHz8MVwnQgLkjxOqpJSCKxNiv3=RpZvqw@mail.gmail.com>

On 5/19/26 1:59 AM, Song Liu wrote:
> On Mon, May 18, 2026 at 6:33 AM Sasha Levin <sashal@kernel.org> wrote:
>> On Sun, May 17, 2026 at 11:37:36PM -0700, Song Liu wrote:
>>> On Sun, May 17, 2026 at 6:49 AM Sasha Levin <sashal@kernel.org> wrote:
>>>> * fail_function (CONFIG_FUNCTION_ERROR_INJECTION) is disabled in
>>>>    most production kernels. Even where enabled, it only works on
>>>>    functions pre-annotated with ALLOW_ERROR_INJECTION() in source -
>>>>    no help for a freshly-disclosed CVE. The debugfs UI is blocked by
>>>>    lockdown=integrity and the override is probabilistic.
>>>>
>>>> * BPF override (bpf_override_return) honors the same
>>>>    ALLOW_ERROR_INJECTION() whitelist, and BPF itself is off in many
>>>>    production kernels. Even where on, the operator interface is
>>>>    "load a verified BPF program," not a one-line write.
>>>
>>> If it is OK for killswitch to attach to any kernel functions, do we still
>>> need ALLOW_ERROR_INJECTION() for fail_function and BPF
>>> override? Shall we instead also allow fail_function and BPF override
>>> to attach to any kernel functions?
>>
>> I don't think so. ALLOW_ERROR_INJECTION is not a security mechanism, it's an
>> integrity/safety mechanism for both bpf and fault injection.
>>
>> It protects against a "developer or CI script doing legitimate fault injection
>> accidentally panics the box" scenario, not an "attacker gets in" one.
> 
> There really isn't a clear boundary between "security mechanism" and
> "non-security mechanism". As we are making killswitch available
> everywhere under root, users will soon learn to use it to do fault injection,
> and potentially much more scary things. (Think about agents with sudo
> access).

Fully agree with Song here that there is no clear boundary, and that the
killswitch could lead to arbitrary, hard to debug breakage if applied to
the wrong function.. introducing worse bugs than the one being mitigated
or even /short-circuit LSM enforcement/ (engage security_file_open 0,
engage cap_capable 0, engage apparmor_* etc).

The ALLOW_ERROR_INJECTION() provides a curated white-list where you may
return with an error without causing more severe damage (assuming the
error handling code is right). The right thing would be to more widely
apply ALLOW_ERROR_INJECTION() or to figure out a better way to safely
enable the latter without explicit function annotation.

Wrt BPF:

>>>> * BPF override (bpf_override_return) honors the same
>>>>    ALLOW_ERROR_INJECTION() whitelist, and BPF itself is off in many
>>>>    production kernels. Even where on, the operator interface is
>>>>    "load a verified BPF program," not a one-line write.

The claim that BPF itself is off in many production kernels is not really
true, where did you get that from? All the major distros and cloud providers
have BPF enabled these days, and even systemd ships BPF programs for
custom service firewalling etc.

The operator interface is to load a program vs. one-line write.. so we're
disregarding existing infra where you can already achieve the same for a
less safe one-liner convenience? (similarly for the livepatch infra..)

If you need a one-liner: bpftrace -e 'kprobe:FUNC { override(RETVAL); }'
Alternatively, add an extension to systemd where you can just deploy a
list of functions, and it does the necessary work in the background and
persistently.

Also, what about other classes of bugs, like OOB access, UAFs, locking
issues, etc which then could be used as a means for privilege escalations?
It feels like this proposal is a quick'n'dirty prototype via Claude as a
reaction to copy fail bug, but the right solution would be to improve
the user space tooling as mentioned and existing infra we have in kernel.

^ permalink raw reply

* Re: [PATCH v3] killswitch: add per-function short-circuit mitigation primitive
From: Sasha Levin @ 2026-05-19 19:57 UTC (permalink / raw)
  To: Daniel Borkmann
  Cc: Song Liu, linux-kernel, linux-doc, linux-kselftest, bpf,
	live-patching, Greg Kroah-Hartman, Andrew Morton, Jonathan Corbet,
	Mathieu Desnoyers, Joshua Peisach, Florian Weimer, Breno Leitao,
	Anthony Iliopoulos, Michal Hocko, Jiri Olsa, John Fastabend,
	Christian Brauner
In-Reply-To: <b342c38b-7323-4b72-a239-8a574d6bc36b@iogearbox.net>

On Tue, May 19, 2026 at 02:13:26PM +0200, Daniel Borkmann wrote:
>On 5/19/26 1:59 AM, Song Liu wrote:
>>On Mon, May 18, 2026 at 6:33 AM Sasha Levin <sashal@kernel.org> wrote:
>>>On Sun, May 17, 2026 at 11:37:36PM -0700, Song Liu wrote:
>>>>On Sun, May 17, 2026 at 6:49 AM Sasha Levin <sashal@kernel.org> wrote:
>>>>>* fail_function (CONFIG_FUNCTION_ERROR_INJECTION) is disabled in
>>>>>   most production kernels. Even where enabled, it only works on
>>>>>   functions pre-annotated with ALLOW_ERROR_INJECTION() in source -
>>>>>   no help for a freshly-disclosed CVE. The debugfs UI is blocked by
>>>>>   lockdown=integrity and the override is probabilistic.
>>>>>
>>>>>* BPF override (bpf_override_return) honors the same
>>>>>   ALLOW_ERROR_INJECTION() whitelist, and BPF itself is off in many
>>>>>   production kernels. Even where on, the operator interface is
>>>>>   "load a verified BPF program," not a one-line write.
>>>>
>>>>If it is OK for killswitch to attach to any kernel functions, do we still
>>>>need ALLOW_ERROR_INJECTION() for fail_function and BPF
>>>>override? Shall we instead also allow fail_function and BPF override
>>>>to attach to any kernel functions?
>>>
>>>I don't think so. ALLOW_ERROR_INJECTION is not a security mechanism, it's an
>>>integrity/safety mechanism for both bpf and fault injection.
>>>
>>>It protects against a "developer or CI script doing legitimate fault injection
>>>accidentally panics the box" scenario, not an "attacker gets in" one.
>>
>>There really isn't a clear boundary between "security mechanism" and
>>"non-security mechanism". As we are making killswitch available
>>everywhere under root, users will soon learn to use it to do fault injection,
>>and potentially much more scary things. (Think about agents with sudo
>>access).
>
>Fully agree with Song here that there is no clear boundary, and that the
>killswitch could lead to arbitrary, hard to debug breakage if applied to
>the wrong function.. introducing worse bugs than the one being mitigated
>or even /short-circuit LSM enforcement/ (engage security_file_open 0,
>engage cap_capable 0, engage apparmor_* etc).

This is similar to livepatch, right? Do we need guardrails there too?

Or do we just trust root to do the right thing for it's systems without needing
to be it's babysitter?

>The ALLOW_ERROR_INJECTION() provides a curated white-list where you may
>return with an error without causing more severe damage (assuming the
>error handling code is right). The right thing would be to more widely
>apply ALLOW_ERROR_INJECTION() or to figure out a better way to safely
>enable the latter without explicit function annotation.

Sure, this would also work. How do you see this happening? Can we let a certain
user/pid/etc disable the allowlist if they choose to?

>Wrt BPF:
>
>>>>>* BPF override (bpf_override_return) honors the same
>>>>>   ALLOW_ERROR_INJECTION() whitelist, and BPF itself is off in many
>>>>>   production kernels. Even where on, the operator interface is
>>>>>   "load a verified BPF program," not a one-line write.
>
>The claim that BPF itself is off in many production kernels is not really
>true, where did you get that from? All the major distros and cloud providers
>have BPF enabled these days, and even systemd ships BPF programs for
>custom service firewalling etc.

The world is a bit bigger than home distros and cloud providers, but sure - bpf
is enabled widely enough at this point.

How do you see this working with the allowlist?

-- 
Thanks,
Sasha

^ permalink raw reply

* Re: [PATCH v3] killswitch: add per-function short-circuit mitigation primitive
From: Song Liu @ 2026-05-19 22:00 UTC (permalink / raw)
  To: Sasha Levin
  Cc: Daniel Borkmann, linux-kernel, linux-doc, linux-kselftest, bpf,
	live-patching, Greg Kroah-Hartman, Andrew Morton, Jonathan Corbet,
	Mathieu Desnoyers, Joshua Peisach, Florian Weimer, Breno Leitao,
	Anthony Iliopoulos, Michal Hocko, Jiri Olsa, John Fastabend,
	Christian Brauner
In-Reply-To: <agzAwjKhOhuANz_P@laps>

On Tue, May 19, 2026 at 12:57 PM Sasha Levin <sashal@kernel.org> wrote:
[...]
> >Fully agree with Song here that there is no clear boundary, and that the
> >killswitch could lead to arbitrary, hard to debug breakage if applied to
> >the wrong function.. introducing worse bugs than the one being mitigated
> >or even /short-circuit LSM enforcement/ (engage security_file_open 0,
> >engage cap_capable 0, engage apparmor_* etc).
>
> This is similar to livepatch, right? Do we need guardrails there too?

livepatch has the same guardrails as other kernel modules:
CONFIG_MODULE_SIG, CONFIG_MODULE_SIG_FORCE, etc.

Thanks,
Song

^ permalink raw reply

* Re: [PATCH v3] killswitch: add per-function short-circuit mitigation primitive
From: Daniel Borkmann @ 2026-05-21  9:11 UTC (permalink / raw)
  To: Sasha Levin
  Cc: Song Liu, linux-kernel, linux-doc, linux-kselftest, bpf,
	live-patching, Greg Kroah-Hartman, Andrew Morton, Jonathan Corbet,
	Mathieu Desnoyers, Joshua Peisach, Florian Weimer, Breno Leitao,
	Anthony Iliopoulos, Michal Hocko, Jiri Olsa, John Fastabend,
	Christian Brauner, KP Singh
In-Reply-To: <agzAwjKhOhuANz_P@laps>

On 5/19/26 9:57 PM, Sasha Levin wrote:
> On Tue, May 19, 2026 at 02:13:26PM +0200, Daniel Borkmann wrote:
>> On 5/19/26 1:59 AM, Song Liu wrote:
>>> On Mon, May 18, 2026 at 6:33 AM Sasha Levin <sashal@kernel.org> wrote:
>>>> On Sun, May 17, 2026 at 11:37:36PM -0700, Song Liu wrote:
>>>>> On Sun, May 17, 2026 at 6:49 AM Sasha Levin <sashal@kernel.org> wrote:
>>>>>> * fail_function (CONFIG_FUNCTION_ERROR_INJECTION) is disabled in
>>>>>>   most production kernels. Even where enabled, it only works on
>>>>>>   functions pre-annotated with ALLOW_ERROR_INJECTION() in source -
>>>>>>   no help for a freshly-disclosed CVE. The debugfs UI is blocked by
>>>>>>   lockdown=integrity and the override is probabilistic.
>>>>>>
>>>>>> * BPF override (bpf_override_return) honors the same
>>>>>>   ALLOW_ERROR_INJECTION() whitelist, and BPF itself is off in many
>>>>>>   production kernels. Even where on, the operator interface is
>>>>>>   "load a verified BPF program," not a one-line write.
>>>>>
>>>>> If it is OK for killswitch to attach to any kernel functions, do we still
>>>>> need ALLOW_ERROR_INJECTION() for fail_function and BPF
>>>>> override? Shall we instead also allow fail_function and BPF override
>>>>> to attach to any kernel functions?
>>>>
>>>> I don't think so. ALLOW_ERROR_INJECTION is not a security mechanism, it's an
>>>> integrity/safety mechanism for both bpf and fault injection.
>>>>
>>>> It protects against a "developer or CI script doing legitimate fault injection
>>>> accidentally panics the box" scenario, not an "attacker gets in" one.
>>>
>>> There really isn't a clear boundary between "security mechanism" and
>>> "non-security mechanism". As we are making killswitch available
>>> everywhere under root, users will soon learn to use it to do fault injection,
>>> and potentially much more scary things. (Think about agents with sudo
>>> access).
>>
>> Fully agree with Song here that there is no clear boundary, and that the
>> killswitch could lead to arbitrary, hard to debug breakage if applied to
>> the wrong function.. introducing worse bugs than the one being mitigated
>> or even /short-circuit LSM enforcement/ (engage security_file_open 0,
>> engage cap_capable 0, engage apparmor_* etc).
> 
> This is similar to livepatch, right? Do we need guardrails there too?
> 
> Or do we just trust root to do the right thing for it's systems without needing
> to be it's babysitter?

[See Song's reply.]

>> The ALLOW_ERROR_INJECTION() provides a curated white-list where you may
>> return with an error without causing more severe damage (assuming the
>> error handling code is right). The right thing would be to more widely
>> apply ALLOW_ERROR_INJECTION() or to figure out a better way to safely
>> enable the latter without explicit function annotation.
> 
> Sure, this would also work. How do you see this happening? Can we let a certain
> user/pid/etc disable the allowlist if they choose to?

I don't think we should, given then we're back to square one where root
or some other user would be able to just override/bypass an LSM.

[...]
> How do you see this working with the allowlist?

We should look at the underlying areas where most of the CVE-like fixes
took place (these days should be more easily doable given Claude and friends)
and based on that either extend ALLOW_ERROR_INJECTION() or (better) create
new hooks which BPF LSM can consume where you can then have a policy to reject
requests and tighten the attack surface. For example, the AF_ALG stuff you
can already easily cover today ...

#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>

#define AF_ALG	38
#define EPERM	1

char _license[] SEC("license") = "Dual BSD/GPL";

SEC("lsm/socket_create")
int BPF_PROG(block_af_alg, int family, int type, int protocol, int kern)
{
	if (family == AF_ALG)
		return -EPERM;
	return 0;
}

... the problem is that distros enable and pull in all sort of crap which
then non-root could pull in via request_module() as an example; similarly
for netlink we want to have a BPF LSM policy to parse into netlink requests
and then reject based on certain attribute matching (both on our todo list)
which would have helped in case of exotic tc cls/act/qdisc modules to prevent
them to be pulled from userns. I bet there are a ton more examples once we
look further into the data.

Thanks,
Daniel

^ permalink raw reply

* Re: [PATCH] livepatch: Improve the accuracy of symbol search
From: Miroslav Benes @ 2026-05-21 11:53 UTC (permalink / raw)
  To: luhao
  Cc: jpoimboe, jikos, pmladek, joe.lawrence, live-patching,
	linux-kernel, zhang.chunA, wang.shijie
In-Reply-To: <20260516080833.218948-1-lu.haoA@h3c.com>

Hi,

thank you for the patch...

> module_kallsyms_on_each_symbol, when the input parameter modname is not
>  empty, only searches for symbols within the current module.

Yes, correct.

> When
> patching a kernel object (ko), if the patched function calls
> functions from vmlinux or other ko modules, symbol lookup may fail.

dtto, expected behaviour.

> When patching a ko, the current approach first searches for symbols
> within the module itself. If not found, it uses
> kallsyms_on_each_match_symbol to search in vmlinux. If still not
> found, it calls module_kallsyms_on_each_symbol with modname set to
> NULL to search across all ko modules. The reason for not searching
> across all ko modules from the start is to avoid issues with
> duplicate symbol names.

No, your patch would break things. What are you trying to achieve? Is it 
motivated by a failure or an issue that you met? Could you share it, 
please? There may be a bug somewhere but it is difficult to judge without 
data.

> Reviewed-by: zhangchun <zhang.chunA@h3c.com>
> Reviewed-by: wangshijie <wang.shijie@h3c.com>

Drop these tags next time, please. The review happens here in the open.

> Signed-off-by: luhao <lu.haoA@h3c.com>
> ---
>  kernel/livepatch/core.c | 9 +++++++--
>  1 file changed, 7 insertions(+), 2 deletions(-)
> 
> diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
> index 28d15ba58a26..9c587cc4896b 100644
> --- a/kernel/livepatch/core.c
> +++ b/kernel/livepatch/core.c
> @@ -167,9 +167,14 @@ static int klp_find_object_symbol(const char *objname, const char *name,
>                 .pos = sympos,
>         };
> 
> -       if (objname)
> +       if (objname) {
>                 module_kallsyms_on_each_symbol(objname, klp_find_callback, &args);
> -       else
> +
> +               if (args.addr == 0)
> +                       kallsyms_on_each_match_symbol(klp_match_callback, name, &args);
> +               if (args.addr == 0)
> +                       module_kallsyms_on_each_symbol(NULL, klp_find_callback, &args);
> +       } else
>                 kallsyms_on_each_match_symbol(klp_match_callback, name, &args);
> 
>         /*
> --
> 2.51.0
> 
> -------------------------------------------------------------------------------------------------------------------------------------
> ????????????????????????????????????????????????????????????????????????????????
> ??????????????????????????????????????????????????????????????????????????????????????????
> ??????????????????????????????????????????????????????????????
> This e-mail and its attachments contain confidential information from New H3C, which is intended only for the person or entity whose address is listed above.
> Any use of the information contained herein in any way (including, but not limited to, total or partial disclosure, reproduction, or dissemination) by persons other than the intended recipient(s) is prohibited.
> If you receive this e-mail in error, please notify the sender by phone or email immediately and delete it!

I think that you do not want the above disclaimer when you submit a patch 
to an open source project. Could you fix your email client, please?

Regards
Miroslav


^ permalink raw reply

* Re: [PATCH v3] killswitch: add per-function short-circuit mitigation primitive
From: Sasha Levin @ 2026-05-21 14:38 UTC (permalink / raw)
  To: Song Liu
  Cc: Daniel Borkmann, linux-kernel, linux-doc, linux-kselftest, bpf,
	live-patching, Greg Kroah-Hartman, Andrew Morton, Jonathan Corbet,
	Mathieu Desnoyers, Joshua Peisach, Florian Weimer, Breno Leitao,
	Anthony Iliopoulos, Michal Hocko, Jiri Olsa, John Fastabend,
	Christian Brauner
In-Reply-To: <CAPhsuW6C3hyciA4=z+V0BkQ9EEubuNCKLwoxtXorSbnhkUxdJQ@mail.gmail.com>

On Tue, May 19, 2026 at 03:00:15PM -0700, Song Liu wrote:
>On Tue, May 19, 2026 at 12:57 PM Sasha Levin <sashal@kernel.org> wrote:
>[...]
>> >Fully agree with Song here that there is no clear boundary, and that the
>> >killswitch could lead to arbitrary, hard to debug breakage if applied to
>> >the wrong function.. introducing worse bugs than the one being mitigated
>> >or even /short-circuit LSM enforcement/ (engage security_file_open 0,
>> >engage cap_capable 0, engage apparmor_* etc).
>>
>> This is similar to livepatch, right? Do we need guardrails there too?
>
>livepatch has the same guardrails as other kernel modules:
>CONFIG_MODULE_SIG, CONFIG_MODULE_SIG_FORCE, etc.

Which the user can choose to enable or disable. Livepatches will work just fine
with CONFIG_MODULE_SIG=n, right?

With the whitelist approach, the user has no choice but to accept it.

Would it make sense to allow disabling the whitelist via a kernel config or
some runtime flag?

-- 
Thanks,
Sasha

^ permalink raw reply

* Re: [PATCH v3] killswitch: add per-function short-circuit mitigation primitive
From: Sasha Levin @ 2026-05-21 15:31 UTC (permalink / raw)
  To: Daniel Borkmann
  Cc: Song Liu, linux-kernel, linux-doc, linux-kselftest, bpf,
	live-patching, Greg Kroah-Hartman, Andrew Morton, Jonathan Corbet,
	Mathieu Desnoyers, Joshua Peisach, Florian Weimer, Breno Leitao,
	Anthony Iliopoulos, Michal Hocko, Jiri Olsa, John Fastabend,
	Christian Brauner, KP Singh
In-Reply-To: <3dd6d852-18fb-4c64-a1ae-0d79ef7c061f@iogearbox.net>

On Thu, May 21, 2026 at 11:11:16AM +0200, Daniel Borkmann wrote:
>On 5/19/26 9:57 PM, Sasha Levin wrote:
>>Sure, this would also work. How do you see this happening? Can we let a certain
>>user/pid/etc disable the allowlist if they choose to?
>
>I don't think we should, given then we're back to square one where root
>or some other user would be able to just override/bypass an LSM.

killswitch already disables itself when lockdown is active. We can easily
disable it too when one of the LSMs that cares about this is active.

>[...]
>>How do you see this working with the allowlist?
>
>We should look at the underlying areas where most of the CVE-like fixes
>took place (these days should be more easily doable given Claude and friends)
>and based on that either extend ALLOW_ERROR_INJECTION() or (better) create
>new hooks which BPF LSM can consume where you can then have a policy to reject
>requests and tighten the attack surface. For example, the AF_ALG stuff you

So we could grow the LSM tentacles deeper into the kernel, and we can see where
current CVEs are happening, which I suspect is the darker corners of the kernel
(old unmaintained, rarely used code), but this definitely won't stay the case,
right? Newer and better LLMs will discover issues elsewhere, and once the low
hanging fruits are picked off of the current target subsystems, researchers
will move elsewhere. We will be dooming ourselves to an endless cat and mouse
game where we go add LSM hooks after some big security issue goes public.

One question I had here: how would we tackle security issues with BPF itself?

>can already easily cover today ...
>
>#include "vmlinux.h"
>#include <bpf/bpf_helpers.h>
>#include <bpf/bpf_tracing.h>
>
>#define AF_ALG	38
>#define EPERM	1
>
>char _license[] SEC("license") = "Dual BSD/GPL";
>
>SEC("lsm/socket_create")
>int BPF_PROG(block_af_alg, int family, int type, int protocol, int kern)
>{
>	if (family == AF_ALG)
>		return -EPERM;
>	return 0;
>}
>
>... the problem is that distros enable and pull in all sort of crap which
>then non-root could pull in via request_module() as an example; similarly
>for netlink we want to have a BPF LSM policy to parse into netlink requests
>and then reject based on certain attribute matching (both on our todo list)
>which would have helped in case of exotic tc cls/act/qdisc modules to prevent
>them to be pulled from userns. I bet there are a ton more examples once we
>look further into the data.

I definitely agree that BPF is a much nicer hammer than the simple killswitch
implementation. I've actually been (privately) playing with an out of tree
killswitch that also supports BPF. I've pushed the (hacky) code I have to
https://github.com/sashalevin/killswitch , and you can see an example of a BPF
mitigation similar to the one you have above:

https://github.com/sashalevin/killswitch/blob/master/mitigations/cve-2025-21703.sh

My concern is mostly with the whitelist approach.

-- 
Thanks,
Sasha

^ permalink raw reply

* Re: [PATCH v3] killswitch: add per-function short-circuit mitigation primitive
From: Song Liu @ 2026-05-21 18:02 UTC (permalink / raw)
  To: Sasha Levin
  Cc: Daniel Borkmann, linux-kernel, linux-doc, linux-kselftest, bpf,
	live-patching, Greg Kroah-Hartman, Andrew Morton, Jonathan Corbet,
	Mathieu Desnoyers, Joshua Peisach, Florian Weimer, Breno Leitao,
	Anthony Iliopoulos, Michal Hocko, Jiri Olsa, John Fastabend,
	Christian Brauner
In-Reply-To: <ag8Y8L2WCcSEDPkG@laps>

On Thu, May 21, 2026 at 7:38 AM Sasha Levin <sashal@kernel.org> wrote:
>
> On Tue, May 19, 2026 at 03:00:15PM -0700, Song Liu wrote:
> >On Tue, May 19, 2026 at 12:57 PM Sasha Levin <sashal@kernel.org> wrote:
> >[...]
> >> >Fully agree with Song here that there is no clear boundary, and that the
> >> >killswitch could lead to arbitrary, hard to debug breakage if applied to
> >> >the wrong function.. introducing worse bugs than the one being mitigated
> >> >or even /short-circuit LSM enforcement/ (engage security_file_open 0,
> >> >engage cap_capable 0, engage apparmor_* etc).
> >>
> >> This is similar to livepatch, right? Do we need guardrails there too?
> >
> >livepatch has the same guardrails as other kernel modules:
> >CONFIG_MODULE_SIG, CONFIG_MODULE_SIG_FORCE, etc.
>
> Which the user can choose to enable or disable. Livepatches will work just fine
> with CONFIG_MODULE_SIG=n, right?
>
> With the whitelist approach, the user has no choice but to accept it.
>
> Would it make sense to allow disabling the whitelist via a kernel config or
> some runtime flag?

I personally think it makes sense to have options to allow bypassing/blocking
more kernel functions than the current allow list. But I don't know whether
we would like to go all the way to allow it for all the ftrace-able functions.
I think we will need some careful analysis on this.

Thanks,
Song

^ permalink raw reply

* Re: [PATCH v3] killswitch: add per-function short-circuit mitigation primitive
From: Song Liu @ 2026-05-21 18:16 UTC (permalink / raw)
  To: Sasha Levin
  Cc: Daniel Borkmann, linux-kernel, linux-doc, linux-kselftest, bpf,
	live-patching, Greg Kroah-Hartman, Andrew Morton, Jonathan Corbet,
	Mathieu Desnoyers, Joshua Peisach, Florian Weimer, Breno Leitao,
	Anthony Iliopoulos, Michal Hocko, Jiri Olsa, John Fastabend,
	Christian Brauner, KP Singh
In-Reply-To: <ag8lOe6dAOgnWmsQ@laps>

On Thu, May 21, 2026 at 8:31 AM Sasha Levin <sashal@kernel.org> wrote:
>
> On Thu, May 21, 2026 at 11:11:16AM +0200, Daniel Borkmann wrote:
> >On 5/19/26 9:57 PM, Sasha Levin wrote:
> >>Sure, this would also work. How do you see this happening? Can we let a certain
> >>user/pid/etc disable the allowlist if they choose to?
> >
> >I don't think we should, given then we're back to square one where root
> >or some other user would be able to just override/bypass an LSM.
>
> killswitch already disables itself when lockdown is active. We can easily
> disable it too when one of the LSMs that cares about this is active.
>
> >[...]
> >>How do you see this working with the allowlist?
> >
> >We should look at the underlying areas where most of the CVE-like fixes
> >took place (these days should be more easily doable given Claude and friends)
> >and based on that either extend ALLOW_ERROR_INJECTION() or (better) create
> >new hooks which BPF LSM can consume where you can then have a policy to reject
> >requests and tighten the attack surface. For example, the AF_ALG stuff you
>
> So we could grow the LSM tentacles deeper into the kernel, and we can see where
> current CVEs are happening, which I suspect is the darker corners of the kernel
> (old unmaintained, rarely used code), but this definitely won't stay the case,
> right? Newer and better LLMs will discover issues elsewhere, and once the low
> hanging fruits are picked off of the current target subsystems, researchers
> will move elsewhere. We will be dooming ourselves to an endless cat and mouse
> game where we go add LSM hooks after some big security issue goes public.

Do we really need to add new LSM hooks for recent CVEs?

The LSM hooks are designed to cover all the user-kernel interfaces. Then
with properly designed policies, we should have coverage for potential CVEs.
Existing LSM hooks may not be perfect, but we can improve the hooks,
potentially with the help of smart LLMs, so that these hooks can cover
future security issues. In some cases, we will need new policies, but I don't
think new hooks will be needed for most of these CVEs.

Thanks,
Song

^ permalink raw reply

* Re: [PATCH v3] killswitch: add per-function short-circuit mitigation primitive
From: Sasha Levin @ 2026-05-23 13:41 UTC (permalink / raw)
  To: Song Liu
  Cc: Daniel Borkmann, linux-kernel, linux-doc, linux-kselftest, bpf,
	live-patching, Greg Kroah-Hartman, Andrew Morton, Jonathan Corbet,
	Mathieu Desnoyers, Joshua Peisach, Florian Weimer, Breno Leitao,
	Anthony Iliopoulos, Michal Hocko, Jiri Olsa, John Fastabend,
	Christian Brauner, KP Singh
In-Reply-To: <CAPhsuW7sbt5B+ZeGW8O2JMJ0ELPU-vhZFNvbB+0Q8XhZg6pKYw@mail.gmail.com>

On Thu, May 21, 2026 at 11:16:46AM -0700, Song Liu wrote:
>On Thu, May 21, 2026 at 8:31 AM Sasha Levin <sashal@kernel.org> wrote:
>>
>> On Thu, May 21, 2026 at 11:11:16AM +0200, Daniel Borkmann wrote:
>> >On 5/19/26 9:57 PM, Sasha Levin wrote:
>> >>Sure, this would also work. How do you see this happening? Can we let a certain
>> >>user/pid/etc disable the allowlist if they choose to?
>> >
>> >I don't think we should, given then we're back to square one where root
>> >or some other user would be able to just override/bypass an LSM.
>>
>> killswitch already disables itself when lockdown is active. We can easily
>> disable it too when one of the LSMs that cares about this is active.
>>
>> >[...]
>> >>How do you see this working with the allowlist?
>> >
>> >We should look at the underlying areas where most of the CVE-like fixes
>> >took place (these days should be more easily doable given Claude and friends)
>> >and based on that either extend ALLOW_ERROR_INJECTION() or (better) create
>> >new hooks which BPF LSM can consume where you can then have a policy to reject
>> >requests and tighten the attack surface. For example, the AF_ALG stuff you
>>
>> So we could grow the LSM tentacles deeper into the kernel, and we can see where
>> current CVEs are happening, which I suspect is the darker corners of the kernel
>> (old unmaintained, rarely used code), but this definitely won't stay the case,
>> right? Newer and better LLMs will discover issues elsewhere, and once the low
>> hanging fruits are picked off of the current target subsystems, researchers
>> will move elsewhere. We will be dooming ourselves to an endless cat and mouse
>> game where we go add LSM hooks after some big security issue goes public.
>
>Do we really need to add new LSM hooks for recent CVEs?
>
>The LSM hooks are designed to cover all the user-kernel interfaces. Then
>with properly designed policies, we should have coverage for potential CVEs.
>Existing LSM hooks may not be perfect, but we can improve the hooks,
>potentially with the help of smart LLMs, so that these hooks can cover
>future security issues. In some cases, we will need new policies, but I don't
>think new hooks will be needed for most of these CVEs.

Running a quick LLM evaluation on the last ~70 severe CVEs, it seems that about
40% is doable with the current hooks.

-- 
Thanks,
Sasha

^ permalink raw reply

* [PATCH 0/4] selftests: livepatch: Support 4.12 kernels
From: Marcos Paulo de Souza @ 2026-05-24 23:50 UTC (permalink / raw)
  To: Josh Poimboeuf, Jiri Kosina, Miroslav Benes, Petr Mladek,
	Joe Lawrence, Shuah Khan
  Cc: live-patching, linux-kselftest, linux-kernel,
	Marcos Paulo de Souza, marcos

One issue that exists since the inception of the livepatch selftests is
the capability of recovering from a failed test: the tested livepatch
modules were kept loaded, interfering with the next tests. The first two
commits introduce a simple mechanism that tracks all loaded modules,
and if the test is aborted, the trap function will disable/unload
the remaining modules. With these changes the test
"livepatch interaction with kprobed function without post_handler" fails
on 4.12 kernels (lacking the feature to have a kprobe a livepatches
to be used alongside), but the loaded modules are unloaded, not
affecting the next tests.

The other patches adapt the test_klp_mod_target.c file to use
module_param_cb and kernel_param_ops structures to test livepatching
of functions in modules. This was done to allow the test to be compiled
on 4.12 kernels. Older kernels lacks proc_create_single function, and
to adapt the code for older versions would require ifdefs, which are
not desirable.

I tested these changes using the current upstream kernel and kernel
from SLE 12-SP5 (kernel 4.12). The test-kprobe.sh test fails on 4.12 due
to the missing capability of livepatch and kprobes to be used together.
The result is that now selftests is able to unload the modules loaded,
and continue with the next tests. For the livepatch target test, it works
the same for 4.12 and current upstream.

Please test and review!

Signed-off-by: Marcos Paulo de Souza <mpdesouza@suse.com>
---
Marcos Paulo de Souza (4):
      selftests: livepatch: Introduce _remove_mod function
      selftests: livepatch: Remove leftover modules when a testcase fails
      selftests: livepatch: Adapt mod_target module to pass on 4.12 kernels
      selftests: livepatch: Add information about minimum kernel support

 tools/testing/selftests/livepatch/README           |  3 ++
 tools/testing/selftests/livepatch/functions.sh     | 35 ++++++++++++++++++++--
 .../testing/selftests/livepatch/test-livepatch.sh  | 23 +++++++-------
 .../livepatch/test_modules/test_klp_mod_patch.c    | 11 ++++---
 .../livepatch/test_modules/test_klp_mod_target.c   | 22 +++++++-------
 5 files changed, 62 insertions(+), 32 deletions(-)
---
base-commit: 8f7168335cb2e438668c5d94eea76621c9a10edd
change-id: 20260506-livepatch-unload-on-fail-56d30a4e45ca

Best regards,
--  
Marcos Paulo de Souza <mpdesouza@suse.com>

^ permalink raw reply

* [PATCH 1/4] selftests: livepatch: Introduce _remove_mod function
From: Marcos Paulo de Souza @ 2026-05-24 23:50 UTC (permalink / raw)
  To: Josh Poimboeuf, Jiri Kosina, Miroslav Benes, Petr Mladek,
	Joe Lawrence, Shuah Khan
  Cc: live-patching, linux-kselftest, linux-kernel,
	Marcos Paulo de Souza, marcos
In-Reply-To: <20260524-livepatch-unload-on-fail-v1-0-7465de7f741d@suse.com>

This new function will be used in the next patch to remove loaded
modules when a testcase fails.

Signed-off-by: Marcos Paulo de Souza <mpdesouza@suse.com>
---
 tools/testing/selftests/livepatch/functions.sh | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/livepatch/functions.sh b/tools/testing/selftests/livepatch/functions.sh
index 2bc50271729c..3ec0b7962fc5 100644
--- a/tools/testing/selftests/livepatch/functions.sh
+++ b/tools/testing/selftests/livepatch/functions.sh
@@ -241,9 +241,10 @@ function load_failing_mod() {
 	log "$ret"
 }
 
-# unload_mod(modname) - unload a kernel module
+# _remove_mod(modname) - Internal function to remove a loaded module.
+#                        Use unload_mod() instead, which also updates TEST_MODS tracking.
 #	modname - module name to unload
-function unload_mod() {
+function _remove_mod() {
 	local mod="$1"
 
 	# Wait for module reference count to clear ...
@@ -261,6 +262,14 @@ function unload_mod() {
 		die "failed to unload module $mod (/sys/module)"
 }
 
+# unload_mod(modname) - unload a kernel module
+#	modname - module name to unload
+function unload_mod() {
+	local mod="$1"
+
+	_remove_mod "$mod"
+}
+
 # unload_lp(modname) - unload a kernel module with a livepatch
 #	modname - module name to unload
 function unload_lp() {

-- 
2.54.0


^ permalink raw reply related

* [PATCH 2/4] selftests: livepatch: Remove leftover modules when a testcase fails
From: Marcos Paulo de Souza @ 2026-05-24 23:50 UTC (permalink / raw)
  To: Josh Poimboeuf, Jiri Kosina, Miroslav Benes, Petr Mladek,
	Joe Lawrence, Shuah Khan
  Cc: live-patching, linux-kselftest, linux-kernel,
	Marcos Paulo de Souza, marcos
In-Reply-To: <20260524-livepatch-unload-on-fail-v1-0-7465de7f741d@suse.com>

The current livepatch selftest scripts load modules, run tests and
unloads them. If the test fails, it can leave loaded modules behind, and
in some cases making it impossible to run the next tests.

This approach tracks down the loaded modules, and in case of a test
failure, or premature exit of the script, the cleanup function will
be called by the trap installed on setup_config function.

The cleanup function iterates over the list of leftover loaded modules,
unloading them. The function also checks if the given module is a
livepatch, properly disabling it before unloading.

Signed-off-by: Marcos Paulo de Souza <mpdesouza@suse.com>
---
 tools/testing/selftests/livepatch/functions.sh | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/livepatch/functions.sh b/tools/testing/selftests/livepatch/functions.sh
index 3ec0b7962fc5..25f137003865 100644
--- a/tools/testing/selftests/livepatch/functions.sh
+++ b/tools/testing/selftests/livepatch/functions.sh
@@ -15,6 +15,8 @@ if [[ -e /sys/kernel/tracing/trace ]]; then
 else
 	SYSFS_TRACING_DIR="$SYSFS_DEBUG_DIR/tracing"
 fi
+# List of loaded modules used in tests
+TEST_MODS=()
 
 # Kselftest framework requirement - SKIP code is 4
 ksft_skip=4
@@ -125,6 +127,14 @@ function set_ftrace_enabled() {
 }
 
 function cleanup() {
+	# Remove leftover modules in reverse order to handle dependencies
+	for mod_item in "${TEST_MODS[@]}"; do
+		if is_livepatch_mod "$mod_item"; then
+			disable_lp "$mod_item"
+		fi
+		_remove_mod "$mod_item"
+	done
+
 	pop_config
 }
 
@@ -181,6 +191,9 @@ function __load_mod() {
 	# Wait for module in sysfs ...
 	loop_until '[[ -e "/sys/module/$mod" ]]' ||
 		die "failed to load module $mod"
+
+	# Store the module in the modules list
+	TEST_MODS+=("$mod")
 }
 
 
@@ -262,12 +275,20 @@ function _remove_mod() {
 		die "failed to unload module $mod (/sys/module)"
 }
 
-# unload_mod(modname) - unload a kernel module
+# unload_mod(modname) - unload a kernel module and remove it from TEST_MODS
 #	modname - module name to unload
 function unload_mod() {
 	local mod="$1"
 
 	_remove_mod "$mod"
+
+	# Remove from TEST_MODS array
+	for i in "${!TEST_MODS[@]}"; do
+		if [[ "${TEST_MODS[$i]}" == "$mod" ]]; then
+			unset 'TEST_MODS[$i]'
+			break
+		fi
+	done
 }
 
 # unload_lp(modname) - unload a kernel module with a livepatch

-- 
2.54.0


^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox