Live Patching

Live Patching
 help / color / mirror / Atom feed

* [PATCH 5/8] riscv: stacktrace: introduce stack-bound tracking helpers
From: Wang Han @ 2026-05-27 12:35 UTC (permalink / raw)
  To: Paul Walmsley, Palmer Dabbelt, Albert Ou
  Cc: Alexandre Ghiti, Steven Rostedt, Masami Hiramatsu, Mark Rutland,
	Catalin Marinas, Chen Pei, Andy Chiu, Björn Töpel,
	Deepak Gupta, Puranjay Mohan, Conor Dooley, Josh Poimboeuf,
	Jiri Kosina, Miroslav Benes, Petr Mladek, Joe Lawrence,
	Shuah Khan, Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, linux-riscv, linux-kernel, linux-trace-kernel,
	live-patching, linux-kselftest, linux-perf-users
In-Reply-To: <20260527123530.2593918-1-wanghan@linux.alibaba.com>

A reliable unwinder needs to validate that every frame record it reads
is fully contained in a known kernel stack, and it needs to refuse to
walk back into a stack it has already left. Add the building blocks
for that:

  * struct stack_info / struct unwind_state in a new
    asm/stacktrace/common.h, modelled on the arm64 reference
    implementation.
  * stackinfo_get_irq() / stackinfo_get_task() / stackinfo_get_overflow()
    plus the corresponding on_*_stack() predicates in asm/stacktrace.h,
    so callers can ask "is this object on stack X?" by stack kind
    rather than open-coded address arithmetic.
  * unwind_init_common(), unwind_find_stack() and
    unwind_consume_stack() helpers that enforce the
    forward-progress-only invariant required for reliability.

No existing user is wired up to these helpers in this commit; the
unwinder switch comes in a follow-up. The header changes leave
on_thread_stack() with the same semantics as before, just expressed in
terms of the new helpers.

Signed-off-by: Wang Han <wanghan@linux.alibaba.com>
---
 arch/riscv/include/asm/stacktrace.h        |  65 ++++++++-
 arch/riscv/include/asm/stacktrace/common.h | 159 +++++++++++++++++++++
 2 files changed, 222 insertions(+), 2 deletions(-)
 create mode 100644 arch/riscv/include/asm/stacktrace/common.h

diff --git a/arch/riscv/include/asm/stacktrace.h b/arch/riscv/include/asm/stacktrace.h
index b1495a7e06ce..bc87c4940379 100644
--- a/arch/riscv/include/asm/stacktrace.h
+++ b/arch/riscv/include/asm/stacktrace.h
@@ -3,8 +3,13 @@
 #ifndef _ASM_RISCV_STACKTRACE_H
 #define _ASM_RISCV_STACKTRACE_H
 
+#include <linux/percpu.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
+
+#include <asm/irq_stack.h>
 #include <asm/ptrace.h>
+#include <asm/stacktrace/common.h>
 
 struct stackframe {
 	unsigned long fp;
@@ -16,14 +21,70 @@ extern void notrace walk_stackframe(struct task_struct *task, struct pt_regs *re
 extern void dump_backtrace(struct pt_regs *regs, struct task_struct *task,
 			   const char *loglvl);
 
-static inline bool on_thread_stack(void)
+/*
+ * IRQ stack accessors
+ */
+static inline struct stack_info stackinfo_get_irq(void)
+{
+	unsigned long low = (unsigned long)raw_cpu_read(irq_stack_ptr);
+	unsigned long high = low + IRQ_STACK_SIZE;
+
+	return (struct stack_info) {
+		.low = low,
+		.high = high,
+	};
+}
+
+static inline bool on_irq_stack(unsigned long sp, unsigned long size)
+{
+	struct stack_info info = stackinfo_get_irq();
+
+	return stackinfo_on_stack(&info, sp, size);
+}
+
+/*
+ * Task stack accessors
+ */
+static inline struct stack_info stackinfo_get_task(const struct task_struct *tsk)
 {
-	return !(((unsigned long)(current->stack) ^ current_stack_pointer) & ~(THREAD_SIZE - 1));
+	unsigned long low = (unsigned long)task_stack_page(tsk);
+	unsigned long high = low + THREAD_SIZE;
+
+	return (struct stack_info) {
+		.low = low,
+		.high = high,
+	};
+}
+
+static inline bool on_task_stack(const struct task_struct *tsk,
+				 unsigned long sp, unsigned long size)
+{
+	struct stack_info info = stackinfo_get_task(tsk);
+
+	return stackinfo_on_stack(&info, sp, size);
 }
 
+/*
+ * Cast is necessary since current->stack is an opaque ptr.
+ */
+#define on_thread_stack()	(on_task_stack(current, current_stack_pointer, 1))
 
+/*
+ * Overflow stack accessors
+ */
 #ifdef CONFIG_VMAP_STACK
 DECLARE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack);
+
+static inline struct stack_info stackinfo_get_overflow(void)
+{
+	unsigned long low = (unsigned long)raw_cpu_ptr(overflow_stack);
+	unsigned long high = low + OVERFLOW_STACK_SIZE;
+
+	return (struct stack_info) {
+		.low = low,
+		.high = high,
+	};
+}
 #endif /* CONFIG_VMAP_STACK */
 
 #endif /* _ASM_RISCV_STACKTRACE_H */
diff --git a/arch/riscv/include/asm/stacktrace/common.h b/arch/riscv/include/asm/stacktrace/common.h
new file mode 100644
index 000000000000..87d6d40672f3
--- /dev/null
+++ b/arch/riscv/include/asm/stacktrace/common.h
@@ -0,0 +1,159 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * RISC-V common stack unwinder types and helpers.
+ *
+ * See: arch/arm64/include/asm/stacktrace/common.h for the reference
+ * implementation.
+ *
+ * Copyright (C) 2024
+ */
+#ifndef __ASM_RISCV_STACKTRACE_COMMON_H
+#define __ASM_RISCV_STACKTRACE_COMMON_H
+
+#include <linux/compiler.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+
+#include <asm/stacktrace/frame.h>
+
+/**
+ * struct stack_info - describes the bounds of a stack.
+ *
+ * @low:  The lowest valid address on the stack.
+ * @high: The highest valid address on the stack.
+ */
+struct stack_info {
+	unsigned long low;
+	unsigned long high;
+};
+
+/**
+ * struct unwind_state - state used for robust unwinding.
+ *
+ * @fp:        The fp value in the frame record (or the real fp).
+ * @pc:        The ra value in the frame record (or the real ra).
+ *
+ * @stack:     The stack currently being unwound.
+ * @stacks:    An array of stacks which can be unwound.
+ * @nr_stacks: The number of stacks in @stacks.
+ */
+struct unwind_state {
+	unsigned long fp;
+	unsigned long pc;
+
+	struct stack_info stack;
+	struct stack_info *stacks;
+	int nr_stacks;
+};
+
+/**
+ * stackinfo_get_unknown() - Get an unknown stack_info.
+ *
+ * Return: a stack_info with low and high set to 0.
+ */
+static inline struct stack_info stackinfo_get_unknown(void)
+{
+	return (struct stack_info) {
+		.low = 0,
+		.high = 0,
+	};
+}
+
+/**
+ * stackinfo_on_stack() - Check whether an object is fully within a stack.
+ *
+ * @info: The stack to check against.
+ * @sp:   The base address of the object.
+ * @size: The size of the object.
+ *
+ * Return: true if the object is fully contained within the stack.
+ */
+static inline bool stackinfo_on_stack(const struct stack_info *info,
+				      unsigned long sp, unsigned long size)
+{
+	if (!info->low)
+		return false;
+
+	if (sp < info->low || sp + size < sp || sp + size > info->high)
+		return false;
+
+	return true;
+}
+
+/**
+ * unwind_init_common() - Initialize the common parts of the unwind state.
+ *
+ * @state: the unwind state to initialize.
+ */
+static inline void unwind_init_common(struct unwind_state *state)
+{
+	state->stack = stackinfo_get_unknown();
+}
+
+/**
+ * unwind_find_stack() - Find the accessible stack which entirely contains an
+ * object.
+ *
+ * @state: the current unwind state.
+ * @sp:    the base address of the object.
+ * @size:  the size of the object.
+ *
+ * Return: a pointer to the relevant stack_info if found; NULL otherwise.
+ */
+static inline struct stack_info *unwind_find_stack(struct unwind_state *state,
+						   unsigned long sp,
+						   unsigned long size)
+{
+	struct stack_info *info = &state->stack;
+
+	if (stackinfo_on_stack(info, sp, size))
+		return info;
+
+	for (int i = 0; i < state->nr_stacks; i++) {
+		info = &state->stacks[i];
+		if (stackinfo_on_stack(info, sp, size))
+			return info;
+	}
+
+	return NULL;
+}
+
+/**
+ * unwind_consume_stack() - Update stack boundaries so that future unwind steps
+ * cannot consume this object again.
+ *
+ * @state: the current unwind state.
+ * @info:  the stack_info of the stack containing the object.
+ * @sp:    the base address of the object.
+ * @size:  the size of the object.
+ *
+ * Stack transitions are strictly one-way, and once we've
+ * transitioned from one stack to another, it's never valid to
+ * unwind back to the old stack.
+ *
+ * Note that stacks can nest in several valid orders, e.g.
+ *
+ *   TASK -> IRQ -> OVERFLOW
+ *
+ * ... so we do not check the specific order of stack
+ * transitions.
+ */
+static inline void unwind_consume_stack(struct unwind_state *state,
+					struct stack_info *info,
+					unsigned long sp,
+					unsigned long size)
+{
+	struct stack_info tmp;
+
+	tmp = *info;
+	*info = stackinfo_get_unknown();
+	state->stack = tmp;
+
+	/*
+	 * Future unwind steps can only consume stack above this frame record.
+	 * Update the current stack to start immediately above it.
+	 */
+	state->stack.low = sp + size;
+}
+
+#endif /* __ASM_RISCV_STACKTRACE_COMMON_H */
-- 
2.43.0


^ permalink raw reply related

* [PATCH 4/8] riscv: ftrace: always preserve s0 in dynamic ftrace register frame
From: Wang Han @ 2026-05-27 12:35 UTC (permalink / raw)
  To: Paul Walmsley, Palmer Dabbelt, Albert Ou
  Cc: Alexandre Ghiti, Steven Rostedt, Masami Hiramatsu, Mark Rutland,
	Catalin Marinas, Chen Pei, Andy Chiu, Björn Töpel,
	Deepak Gupta, Puranjay Mohan, Conor Dooley, Josh Poimboeuf,
	Jiri Kosina, Miroslav Benes, Petr Mladek, Joe Lawrence,
	Shuah Khan, Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, linux-riscv, linux-kernel, linux-trace-kernel,
	live-patching, linux-kselftest, linux-perf-users
In-Reply-To: <20260527123530.2593918-1-wanghan@linux.alibaba.com>

The dynamic ftrace entry/exit only saved s0 (the architectural frame
pointer) when HAVE_FUNCTION_GRAPH_FP_TEST was selected. The upcoming
reliable frame-pointer unwinder needs s0 to be present in
ftrace_regs unconditionally so it can use the frame pointer as the
function-graph return-address cookie regardless of FP_TEST.

Save and restore s0 unconditionally in the dynamic ftrace ABI register
frame. The cost is one extra REG_S/REG_L pair per traced call, which is
negligible compared to the overall ftrace cost; the benefit is a
consistent ftrace_regs layout for the unwinder.

Signed-off-by: Wang Han <wanghan@linux.alibaba.com>
---
 arch/riscv/kernel/mcount-dyn.S | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S
index 082fe0b0e3c0..26c55fba8fec 100644
--- a/arch/riscv/kernel/mcount-dyn.S
+++ b/arch/riscv/kernel/mcount-dyn.S
@@ -85,9 +85,7 @@
 	addi	sp, sp, -FREGS_SIZE_ON_STACK
 	REG_S	t0,  FREGS_EPC(sp)
 	REG_S	x1,  FREGS_RA(sp)
-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
 	REG_S	x8,  FREGS_S0(sp)
-#endif
 	REG_S	x6,  FREGS_T1(sp)
 #ifdef CONFIG_CC_IS_CLANG
 	REG_S	x7,  FREGS_T2(sp)
@@ -113,9 +111,7 @@
 	.macro RESTORE_ABI_REGS
 	REG_L	t0, FREGS_EPC(sp)
 	REG_L	x1, FREGS_RA(sp)
-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
 	REG_L	x8, FREGS_S0(sp)
-#endif
 	REG_L	x6,  FREGS_T1(sp)
 #ifdef CONFIG_CC_IS_CLANG
 	REG_L	x7,  FREGS_T2(sp)
-- 
2.43.0


^ permalink raw reply related

* [PATCH 3/8] riscv: stacktrace: disable KASAN instrumentation for stacktrace.o
From: Wang Han @ 2026-05-27 12:35 UTC (permalink / raw)
  To: Paul Walmsley, Palmer Dabbelt, Albert Ou
  Cc: Alexandre Ghiti, Steven Rostedt, Masami Hiramatsu, Mark Rutland,
	Catalin Marinas, Chen Pei, Andy Chiu, Björn Töpel,
	Deepak Gupta, Puranjay Mohan, Conor Dooley, Josh Poimboeuf,
	Jiri Kosina, Miroslav Benes, Petr Mladek, Joe Lawrence,
	Shuah Khan, Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, linux-riscv, linux-kernel, linux-trace-kernel,
	live-patching, linux-kselftest, linux-perf-users
In-Reply-To: <20260527123530.2593918-1-wanghan@linux.alibaba.com>

KASAN records stack traces for every alloc/free, which means it walks
the unwinder very frequently. Instrumenting the stack trace collection
code itself adds substantial overhead and makes the traces themselves
noisier.

Mark stacktrace.o as not KASAN-instrumented, matching the arm, arm64
and x86 treatment of their stack unwinding code. This is a prerequisite
preference for the upcoming reliable unwinder, but the change is valid
on its own.

Signed-off-by: Wang Han <wanghan@linux.alibaba.com>
---
 arch/riscv/kernel/Makefile | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index cabb99cadfb6..1cb6c9ab2981 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -44,6 +44,11 @@ CFLAGS_REMOVE_return_address.o	= $(CC_FLAGS_FTRACE)
 CFLAGS_REMOVE_sbi_ecall.o = $(CC_FLAGS_FTRACE)
 endif
 
+# When KASAN is enabled, a stack trace is recorded for every alloc/free, which
+# can significantly impact performance. Avoid instrumenting the stack trace
+# collection code to minimize this impact.
+KASAN_SANITIZE_stacktrace.o := n
+
 always-$(KBUILD_BUILTIN) += vmlinux.lds
 
 obj-y	+= head.o
-- 
2.43.0


^ permalink raw reply related

* [PATCH 2/8] riscv: stacktrace: Add frame record metadata
From: Wang Han @ 2026-05-27 12:35 UTC (permalink / raw)
  To: Paul Walmsley, Palmer Dabbelt, Albert Ou
  Cc: Alexandre Ghiti, Steven Rostedt, Masami Hiramatsu, Mark Rutland,
	Catalin Marinas, Chen Pei, Andy Chiu, Björn Töpel,
	Deepak Gupta, Puranjay Mohan, Conor Dooley, Josh Poimboeuf,
	Jiri Kosina, Miroslav Benes, Petr Mladek, Joe Lawrence,
	Shuah Khan, Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, linux-riscv, linux-kernel, linux-trace-kernel,
	live-patching, linux-kselftest, linux-perf-users
In-Reply-To: <20260527123530.2593918-1-wanghan@linux.alibaba.com>

Reliable frame-pointer unwinding needs an explicit way to identify
exception boundaries and the final entry frame. The existing unwinder
infers those boundaries from return addresses, which is too loose for a
future reliable unwinder.

Add a small metadata frame record to pt_regs and initialize it on
exception entry, kernel thread fork, user fork, and early idle task
setup. The record uses a zero {fp, ra} sentinel plus a type field so a
later unwinder can distinguish a final user-to-kernel boundary from a
nested kernel pt_regs boundary.

This follows the arm64 metadata frame-record model, adapted to the
RISC-V {fp, ra} frame record convention.

The metadata is established at the RISC-V entry boundaries that need an
explicit unwind marker:

  * exception entry clears the metadata {fp, ra} pair and uses SPP
    (or MPP in M-mode) to record whether the pt_regs frame is the final
    user-to-kernel boundary or a nested kernel boundary;
  * _start_kernel builds the init task's final metadata record, while
    the secondary CPU path sets up s0 before smp_callin() so idle-task
    unwinding does not inherit an undefined caller frame;
  * copy_thread creates matching final metadata records for new kernel
    and user tasks, and keeps s0 available for the frame-pointer chain;
  * call_on_irq_stack still reserves an aligned stack slot, but links the
    saved {fp, ra} with the raw frame-record size so s0 points at the
    RISC-V frame record rather than past the alignment padding.

These changes keep s0 reserved for the frame-pointer chain at task and
stack-switch boundaries.

Signed-off-by: Wang Han <wanghan@linux.alibaba.com>
---
 arch/riscv/include/asm/ptrace.h           |  9 ++++
 arch/riscv/include/asm/stacktrace/frame.h | 53 +++++++++++++++++++++++
 arch/riscv/kernel/asm-offsets.c           |  4 ++
 arch/riscv/kernel/entry.S                 | 30 +++++++++++--
 arch/riscv/kernel/head.S                  | 23 ++++++++++
 arch/riscv/kernel/process.c               | 31 ++++++++++++-
 6 files changed, 144 insertions(+), 6 deletions(-)
 create mode 100644 arch/riscv/include/asm/stacktrace/frame.h

diff --git a/arch/riscv/include/asm/ptrace.h b/arch/riscv/include/asm/ptrace.h
index addc8188152f..4b9b0f279214 100644
--- a/arch/riscv/include/asm/ptrace.h
+++ b/arch/riscv/include/asm/ptrace.h
@@ -8,6 +8,7 @@
 
 #include <uapi/asm/ptrace.h>
 #include <asm/csr.h>
+#include <asm/stacktrace/frame.h>
 #include <linux/compiler.h>
 
 #ifndef __ASSEMBLER__
@@ -53,6 +54,14 @@ struct pt_regs {
 	unsigned long cause;
 	/* a0 value before the syscall */
 	unsigned long orig_a0;
+
+	/*
+	 * This frame record is entirely zeroed on exception entry, allowing the
+	 * unwinder to identify exception boundaries. The type field encodes
+	 * whether the exception was taken from user (FINAL) or kernel (PT_REGS)
+	 * mode.
+	 */
+	struct frame_record_meta stackframe;
 };
 
 #define PTRACE_SYSEMU			0x1f
diff --git a/arch/riscv/include/asm/stacktrace/frame.h b/arch/riscv/include/asm/stacktrace/frame.h
new file mode 100644
index 000000000000..5720a6c65fe8
--- /dev/null
+++ b/arch/riscv/include/asm/stacktrace/frame.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ASM_RISCV_STACKTRACE_FRAME_H
+#define __ASM_RISCV_STACKTRACE_FRAME_H
+
+/*
+ * See: arch/arm64/include/asm/stacktrace/frame.h for the reference
+ * implementation.
+ */
+
+/*
+ * - FRAME_META_TYPE_NONE
+ *
+ *   This value is reserved.
+ *
+ * - FRAME_META_TYPE_FINAL
+ *
+ *   The record is the last entry on the stack.
+ *   Unwinding should terminate successfully.
+ *
+ * - FRAME_META_TYPE_PT_REGS
+ *
+ *   The record is embedded within a struct pt_regs, recording the registers at
+ *   an arbitrary point in time.
+ *   Unwinding should consume pt_regs::epc, followed by pt_regs::ra.
+ *
+ * Note: all other values are reserved and should result in unwinding
+ * terminating with an error.
+ */
+#define FRAME_META_TYPE_NONE		0
+#define FRAME_META_TYPE_FINAL		1
+#define FRAME_META_TYPE_PT_REGS		2
+
+#ifndef __ASSEMBLER__
+/*
+ * A standard RISC-V frame record.
+ */
+struct frame_record {
+	unsigned long fp;
+	unsigned long ra;
+};
+
+/*
+ * A metadata frame record indicating a special unwind.
+ * The record::{fp,ra} fields must be zero to indicate the presence of
+ * metadata.
+ */
+struct frame_record_meta {
+	struct frame_record record;
+	unsigned long type;
+};
+#endif /* __ASSEMBLER__ */
+
+#endif /* __ASM_RISCV_STACKTRACE_FRAME_H */
diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c
index af827448a609..8dfcb5a44bb8 100644
--- a/arch/riscv/kernel/asm-offsets.c
+++ b/arch/riscv/kernel/asm-offsets.c
@@ -131,6 +131,9 @@ void asm_offsets(void)
 	OFFSET(PT_BADADDR, pt_regs, badaddr);
 	OFFSET(PT_CAUSE, pt_regs, cause);
 
+	DEFINE(S_STACKFRAME,		offsetof(struct pt_regs, stackframe));
+	DEFINE(S_STACKFRAME_TYPE,	offsetof(struct pt_regs, stackframe.type));
+
 	OFFSET(SUSPEND_CONTEXT_REGS, suspend_context, regs);
 
 	OFFSET(HIBERN_PBE_ADDR, pbe, address);
@@ -501,6 +504,7 @@ void asm_offsets(void)
 	OFFSET(SBI_HART_BOOT_STACK_PTR_OFFSET, sbi_hart_boot_data, stack_ptr);
 
 	DEFINE(STACKFRAME_SIZE_ON_STACK, ALIGN(sizeof(struct stackframe), STACK_ALIGN));
+	DEFINE(STACKFRAME_RECORD_SIZE, sizeof(struct stackframe));
 	OFFSET(STACKFRAME_FP, stackframe, fp);
 	OFFSET(STACKFRAME_RA, stackframe, ra);
 #ifdef CONFIG_FUNCTION_TRACER
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index d011fb51c59a..9cae0e1eba1c 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -11,6 +11,7 @@
 #include <asm/asm.h>
 #include <asm/csr.h>
 #include <asm/scs.h>
+#include <asm/stacktrace/frame.h>
 #include <asm/unistd.h>
 #include <asm/page.h>
 #include <asm/thread_info.h>
@@ -193,6 +194,27 @@ SYM_CODE_START(handle_exception)
 	REG_S s4, PT_CAUSE(sp)
 	REG_S s5, PT_TP(sp)
 
+	/*
+	 * Create a metadata frame record. The unwinder will use this to
+	 * identify and unwind exception boundaries.
+	 */
+	REG_S zero, (S_STACKFRAME + STACKFRAME_FP)(sp) /* stackframe.record.fp = 0 */
+	REG_S zero, (S_STACKFRAME + STACKFRAME_RA)(sp) /* stackframe.record.ra = 0 */
+#ifdef CONFIG_RISCV_M_MODE
+	li t0, SR_MPP
+	and t0, s1, t0
+#else
+	andi t0, s1, SR_SPP
+#endif
+	bnez t0, 1f
+	li t0, FRAME_META_TYPE_FINAL
+	j 2f
+1:
+	li t0, FRAME_META_TYPE_PT_REGS
+2:
+	REG_S t0, S_STACKFRAME_TYPE(sp)
+	addi s0, sp, S_STACKFRAME + STACKFRAME_RECORD_SIZE
+
 	/*
 	 * Set the scratch register to 0, so that if a recursive exception
 	 * occurs, the exception vector knows it came from the kernel
@@ -357,8 +379,8 @@ ASM_NOKPROBE(handle_kernel_stack_overflow)
 
 SYM_CODE_START(ret_from_fork_kernel_asm)
 	call schedule_tail
-	move a0, s1 /* fn_arg */
-	move a1, s0 /* fn */
+	move a0, s3 /* fn_arg */
+	move a1, s2 /* fn */
 	move a2, sp /* pt_regs */
 	call ret_from_fork_kernel
 	j ret_from_exception
@@ -383,7 +405,7 @@ SYM_FUNC_START(call_on_irq_stack)
 	addi	sp, sp, -STACKFRAME_SIZE_ON_STACK
 	REG_S	ra, STACKFRAME_RA(sp)
 	REG_S	s0, STACKFRAME_FP(sp)
-	addi	s0, sp, STACKFRAME_SIZE_ON_STACK
+	addi	s0, sp, STACKFRAME_RECORD_SIZE
 
 	/* Switch to the per-CPU shadow call stack */
 	scs_save_current
@@ -399,7 +421,7 @@ SYM_FUNC_START(call_on_irq_stack)
 	scs_load_current
 
 	/* Switch back to the thread stack and restore ra and s0 */
-	addi	sp, s0, -STACKFRAME_SIZE_ON_STACK
+	addi	sp, s0, -STACKFRAME_RECORD_SIZE
 	REG_L	ra, STACKFRAME_RA(sp)
 	REG_L	s0, STACKFRAME_FP(sp)
 	addi	sp, sp, STACKFRAME_SIZE_ON_STACK
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index f6a8ca49e627..00e16a24f149 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -14,6 +14,7 @@
 #include <asm/hwcap.h>
 #include <asm/image.h>
 #include <asm/scs.h>
+#include <asm/stacktrace/frame.h>
 #include <asm/usercfi.h>
 #include "efi-header.S"
 
@@ -177,6 +178,14 @@ secondary_start_sbi:
 	REG_S a0, (a1)
 1:
 #endif
+
+	/*
+	 * Set up the frame pointer for the secondary idle task so reliable
+	 * stack unwinding terminates at the metadata frame in task_pt_regs().
+	 * Without this, the first frame records can inherit an undefined caller
+	 * fp and unwind past smp_callin() into .Lsecondary_park.
+	 */
+	addi s0, sp, S_STACKFRAME + STACKFRAME_RECORD_SIZE
 	scs_load_current
 	call smp_callin
 #endif /* CONFIG_SMP */
@@ -305,6 +314,20 @@ SYM_CODE_START(_start_kernel)
 	la tp, init_task
 	la sp, init_thread_union + THREAD_SIZE
 	addi sp, sp, -PT_SIZE_ON_STACK
+
+	/*
+	 * Set up a metadata frame record for the init task so that
+	 * the unwinder can identify the outermost frame by its
+	 * {fp, ra} = {0, 0} sentinel at the bottom of pt_regs.
+	 * fp/s0 points above the metadata record (RISC-V
+	 * convention).
+	 */
+	REG_S zero, (S_STACKFRAME + STACKFRAME_FP)(sp)
+	REG_S zero, (S_STACKFRAME + STACKFRAME_RA)(sp)
+	li t0, FRAME_META_TYPE_FINAL
+	REG_S t0, S_STACKFRAME_TYPE(sp)
+	addi s0, sp, S_STACKFRAME + STACKFRAME_RECORD_SIZE
+
 #if defined(CONFIG_RISCV_SBI) && defined(CONFIG_RISCV_USER_CFI)
 	li a7, SBI_EXT_FWFT
 	li a6, SBI_EXT_FWFT_SET
diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
index b2df7f72241a..5212926b926b 100644
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -258,8 +258,23 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
 		/* Supervisor/Machine, irqs on: */
 		childregs->status = SR_PP | SR_PIE;
 
-		p->thread.s[0] = (unsigned long)args->fn;
-		p->thread.s[1] = (unsigned long)args->fn_arg;
+		/*
+		 * Set up a metadata frame record at the bottom of the
+		 * stack for the unwinder. Use FRAME_META_TYPE_FINAL
+		 * since this is the outermost kernel entry for the new
+		 * task. The frame_record::{fp,ra} are already zero from
+		 * memset().
+		 *
+		 * fp/s0 points above the metadata record (RISC-V
+		 * convention). fn and fn_arg are passed via s2/s3,
+		 * keeping s0 available for the frame pointer chain.
+		 */
+		childregs->stackframe.type = FRAME_META_TYPE_FINAL;
+
+		p->thread.s[0] = (unsigned long)(&childregs->stackframe)
+				+ sizeof(struct frame_record);
+		p->thread.s[2] = (unsigned long)args->fn;
+		p->thread.s[3] = (unsigned long)args->fn_arg;
 		p->thread.ra = (unsigned long)ret_from_fork_kernel_asm;
 	} else {
 		/* allocate new shadow stack if needed. In case of CLONE_VM we have to */
@@ -278,6 +293,18 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
 		if (clone_flags & CLONE_SETTLS)
 			childregs->tp = tls;
 		childregs->a0 = 0; /* Return value of fork() */
+
+		/*
+		 * Set up the unwind boundary: ensure the metadata
+		 * frame record has its {fp,ra} sentinel zeroed and
+		 * point fp/s0 above the metadata record. The type
+		 * field is inherited from the parent's pt_regs.
+		 */
+		childregs->stackframe.record.fp = 0;
+		childregs->stackframe.record.ra = 0;
+		p->thread.s[0] = (unsigned long)(&childregs->stackframe)
+				+ sizeof(struct frame_record);
+
 		p->thread.ra = (unsigned long)ret_from_fork_user_asm;
 	}
 	p->thread.riscv_v_flags = 0;
-- 
2.43.0


^ permalink raw reply related

* [PATCH 1/8] scripts/sorttable: Handle RISC-V patchable ftrace entries
From: Wang Han @ 2026-05-27 12:35 UTC (permalink / raw)
  To: Paul Walmsley, Palmer Dabbelt, Albert Ou
  Cc: Alexandre Ghiti, Steven Rostedt, Masami Hiramatsu, Mark Rutland,
	Catalin Marinas, Chen Pei, Andy Chiu, Björn Töpel,
	Deepak Gupta, Puranjay Mohan, Conor Dooley, Josh Poimboeuf,
	Jiri Kosina, Miroslav Benes, Petr Mladek, Joe Lawrence,
	Shuah Khan, Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, linux-riscv, linux-kernel, linux-trace-kernel,
	live-patching, linux-kselftest, linux-perf-users
In-Reply-To: <20260527123530.2593918-1-wanghan@linux.alibaba.com>

RISC-V uses -fpatchable-function-entry=8,4 when the compressed ISA is
enabled and -fpatchable-function-entry=4,2 otherwise. In both cases, the
patchable NOP area starts 8 bytes before the function symbol address.
The __mcount_loc entries therefore point at the patchable NOP area
associated with a function, while nm reports the function symbol at the
entry address used for the function range check.

After RISC-V selected HAVE_BUILDTIME_MCOUNT_SORT, sorttable started
applying that range check at build time. Without allowing entries just
before the reported function address, the mcount sorter treats valid
RISC-V ftrace callsites as invalid weak-function entries and writes
them back as zero. The resulting kernel boots with no ftrace entries,
breaking dynamic ftrace and users such as livepatch.

The failure is silent during the final link because zeroing weak-function
entries is an expected sorttable operation. At boot, those zero entries
are skipped by ftrace_process_locs(), so the only obvious symptom is that
the vmlinux ftrace table has lost valid callsites and ftrace users cannot
attach to them.

CONFIG_FTRACE_SORT_STARTUP_TEST also reports the table as sorted in this
state: it only checks that the __mcount_loc entries are in ascending
order, which a fully zeroed table trivially satisfies. The original
commit relied on this check and did not see the regression.

On an affected RISC-V QEMU boot with both CONFIG_FTRACE_SORT_STARTUP_TEST
and CONFIG_FTRACE_STARTUP_TEST enabled, the sort check still passes
while ftrace reports zero usable entries and the early selftests fail:

  [    0.000000] ftrace section at ffffffff8101da98 sorted properly
  [    0.000000] ftrace: allocating 0 entries in 128 pages
  [    0.054999] Testing tracer function: .. no entries found ..FAILED!
  [    0.172407] tracer: function failed selftest, disabling
  [    0.178186] Failed to init function_graph tracer, init returned -19

Handle RISC-V like arm64 for the function-range check and allow
patchable entries up to 8 bytes before the function address.

With this fix, a RISC-V QEMU smoke boot with ftrace startup tests shows
the vmlinux ftrace table is populated and dynamic ftrace still works:

  [    0.000000] ftrace: allocating 46749 entries in 184 pages
  [    0.051115] Testing tracer function: PASSED
  [    1.283782] Testing dynamic ftrace: PASSED
  [    6.275456] Testing tracer function_graph: PASSED

Fixes: 0ca1724b56af ("riscv: ftrace: select HAVE_BUILDTIME_MCOUNT_SORT")

Signed-off-by: Wang Han <wanghan@linux.alibaba.com>
---
 scripts/sorttable.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/scripts/sorttable.c b/scripts/sorttable.c
index e8ed11c680c6..b4061c2c03e1 100644
--- a/scripts/sorttable.c
+++ b/scripts/sorttable.c
@@ -901,11 +901,17 @@ static int do_file(char const *const fname, void *addr)
 		/* fallthrough */
 	case EM_386:
 	case EM_LOONGARCH:
-	case EM_RISCV:
 	case EM_S390:
 	case EM_X86_64:
 		custom_sort = sort_relative_table_with_data;
 		break;
+	case EM_RISCV:
+#ifdef MCOUNT_SORT_ENABLED
+		/* RISC-V uses patchable function entries before function entry. */
+		before_func = 8;
+#endif
+		custom_sort = sort_relative_table_with_data;
+		break;
 	case EM_PARISC:
 	case EM_PPC:
 	case EM_PPC64:
-- 
2.43.0

^ permalink raw reply related

* [PATCH 0/8] riscv: Add reliable stack unwinding for livepatch
From: Wang Han @ 2026-05-27 12:35 UTC (permalink / raw)
  To: Paul Walmsley, Palmer Dabbelt, Albert Ou
  Cc: Alexandre Ghiti, Steven Rostedt, Masami Hiramatsu, Mark Rutland,
	Catalin Marinas, Chen Pei, Andy Chiu, Björn Töpel,
	Deepak Gupta, Puranjay Mohan, Conor Dooley, Josh Poimboeuf,
	Jiri Kosina, Miroslav Benes, Petr Mladek, Joe Lawrence,
	Shuah Khan, Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, linux-riscv, linux-kernel, linux-trace-kernel,
	live-patching, linux-kselftest, linux-perf-users

Problem
=======

Livepatch relies on HAVE_RELIABLE_STACKTRACE to decide whether a task
can safely switch to a patched implementation. RISC-V has a
frame-pointer stack walker, but it is not yet reliable enough for
livepatch. Three pieces are missing:

  * arch_stack_walk_reliable() itself, plus the strict stack-bound
    checks and forward-progress invariants a reliable unwinder needs.
  * Explicit unwind metadata at exception, task-entry and IRQ-stack
    boundaries, so the unwinder can distinguish a final user-to-kernel
    transition from a nested kernel pt_regs frame instead of guessing
    from return addresses.
  * Agreement between the ftrace function-graph, perf callchain and
    mcount paths and the same frame-record assumptions used by the
    reliable unwinder.

There is also a prerequisite ftrace issue on the current riscv/for-next
base. Commit 0ca1724b56af ("riscv: ftrace: select
HAVE_BUILDTIME_MCOUNT_SORT") enabled build-time sorting of the mcount
table. RISC-V uses patchable function entries, and the recorded patch
site is placed before the function symbol. scripts/sorttable currently
does not take that RISC-V layout into account, so valid ftrace sites
can be filtered out before the kernel boots.

Solution
========

Patch 1 fixes scripts/sorttable so the RISC-V build-time mcount sort
path accepts patchable function entries which precede the function
symbol. The fix carries a Fixes: tag for commit 0ca1724b56af ("riscv:
ftrace: select HAVE_BUILDTIME_MCOUNT_SORT") and is otherwise
independent; it can be picked into the RISC-V tree on its own if
preferred.

Patches 2-7 add the reliable unwinder in small, individually
reviewable steps. The design follows the same FP + metadata model
arm64 already uses for livepatch in production: the metadata frame
record in pt_regs, the unwind-state stack-bound bookkeeping, the
exception boundary handling, and the fgraph / kretprobe return-address
recovery are direct adaptations of arch/arm64/kernel/stacktrace.c,
retargeted to the RISC-V {fp, ra} frame record convention.

  * Patch 2 adds frame-record metadata for the RISC-V stack walker.
    Low-level entry and task setup code records whether a frame is a
    normal frame, an exception frame, or a task-entry boundary, so the
    reliable unwinder can validate what it is walking instead of
    guessing from the return address.
  * Patch 3 stops KASAN from instrumenting stacktrace.o, matching the
    arm, arm64 and x86 treatment of their stack unwinding code.
  * Patch 4 always preserves s0 in the dynamic ftrace register frame so
    the unwinder can use the architectural frame pointer as the
    function-graph return-address cookie regardless of FP_TEST.
  * Patch 5 introduces stack_info / unwind_state and the
    forward-progress-only stack-bound helpers that the reliable
    unwinder is built on. No caller is wired up yet.
  * Patch 6 switches arch_stack_walk() to the new frame-pointer based
    unwinder, adds arch_stack_walk_reliable() (still without an
    in-tree caller), routes perf callchains through arch_stack_walk(),
    and updates the function-graph cookie to match.
  * Patch 7 selects HAVE_RELIABLE_STACKTRACE and HAVE_LIVEPATCH under
    FRAME_POINTER && 64BIT and exposes the livepatch menu, finally
    enabling livepatch on RISC-V.

Two alternative directions were considered and deferred:

  * ORC, as used on x86, gives reliable unwinding without runtime FP
    cost, but requires RISC-V objtool stack validation, ORC metadata
    generation, and the runtime ORC unwinder. That is a much larger
    dependency chain than what this series adds.

  * SFrame is the more likely long-term replacement for FP-based
    unwinding on architectures without ORC. Kernel SFrame support is
    still under development and the currently documented SFrame ABI
    set does not cover RISC-V, so making RISC-V livepatch depend on
    SFrame would block it on toolchain and kernel infrastructure that
    is not available yet. SFrame is a replacement rather than an
    extension of the metadata frame record introduced here, so when it
    lands the metadata can be retired together with the FP unwinder.
    The interim cost (~24 bytes added to pt_regs and a handful of
    instructions on exception entry, fork and early init) is bounded
    and limited to FRAME_POINTER=y configurations, which is what the
    RISC-V kernel already builds with for stack tracing today.
    Selecting HAVE_RELIABLE_STACKTRACE under FRAME_POINTER && 64BIT
    therefore does not introduce a new build-time dependency relative
    to the status quo.

This is useful now because livepatch is increasingly important for
long-running server deployments where rebooting for critical fixes is
expensive, and recent RISC-V work (dynamic ftrace and patchable
function entries) has put the rest of the livepatch infrastructure in
place.

Module-side klp relocations rely on the existing RISC-V
apply_relocate_add(); the syscall livepatch selftest exercises the
full klp_apply_section_relocs() -> apply_relocate_add() path on RISC-V.

Patch 8 adds the RISC-V syscall wrapper prefix used by the livepatch
syscall selftest module. Without this, the syscall livepatch selftest
cannot resolve the expected target symbol on RISC-V.

Testing
=======

The series is based on riscv/for-next commit 0ca1724b56af ("riscv:
ftrace: select HAVE_BUILDTIME_MCOUNT_SORT").

Build and static checks:

  * git diff --check riscv/for-next..HEAD
  * scripts/checkpatch.pl --strict for each patch
  * RISC-V Image and modules build clean with:
      - gcc 15.2 (riscv64-unknown-linux-gnu-)
      - LLVM=1 clang 18.1.3
      - LLVM=1 clang 21.1.1
  * Each intermediate commit (patches 1-7) was built individually on
    riscv/for-next to confirm bisectability; all 7 intermediate trees
    plus the final HEAD compile clean.
  * livepatch selftest module build

The unfixed build-time sort path was reproduced under QEMU:

  ftrace: allocating 0 entries in 128 pages
  Testing tracer function: .. no entries found ..FAILED!
  Failed to init function_graph tracer, init returned -19

With the sorttable fix applied, the same QEMU boot finds the expected
ftrace entries and the ftrace startup tests pass:

  ftrace: allocating 46749 entries in 184 pages
  Testing tracer function: PASSED
  Testing dynamic ftrace: PASSED
  Testing tracer function_graph: PASSED

With all eight patches applied, RISC-V QEMU virt boots with SMP=2,
SMP=4, and SMP=8 completed the livepatch and tracing smoke tests. The
livepatch selftest result was the same in all runs:

  livepatch selftests: PASS: 7, SKIP: 1, FAIL: 0

Across these boots, the kernel brought up the requested CPU count and
the startup ftrace tests passed, including dynamic ftrace and
function_graph. The function graph selftests reported passed: 3,
failed: 0, unsupported: 3, and LKDTM WARNING_MESSAGE produced the
expected Call Trace and powered off normally.

The livepatch selftest skip is test-kprobe.sh. The test requires
CONFIG_KPROBES_ON_FTRACE, which is not provided by the current RISC-V
configuration.

Wang Han (8):
  scripts/sorttable: Handle RISC-V patchable ftrace entries
  riscv: stacktrace: Add frame record metadata
  riscv: stacktrace: disable KASAN instrumentation for stacktrace.o
  riscv: ftrace: always preserve s0 in dynamic ftrace register frame
  riscv: stacktrace: introduce stack-bound tracking helpers
  riscv: stacktrace: switch to frame-pointer based unwinder
  riscv: Kconfig: enable HAVE_RELIABLE_STACKTRACE and HAVE_LIVEPATCH
  selftests/livepatch: Add RISC-V syscall wrapper prefix

 arch/riscv/Kconfig                            |   4 +
 arch/riscv/include/asm/ptrace.h               |   9 +
 arch/riscv/include/asm/stacktrace.h           |  65 +-
 arch/riscv/include/asm/stacktrace/common.h    | 159 +++++
 arch/riscv/include/asm/stacktrace/frame.h     |  53 ++
 arch/riscv/kernel/Makefile                    |   5 +
 arch/riscv/kernel/asm-offsets.c               |   4 +
 arch/riscv/kernel/entry.S                     |  30 +-
 arch/riscv/kernel/ftrace.c                    |   6 +-
 arch/riscv/kernel/head.S                      |  23 +
 arch/riscv/kernel/mcount-dyn.S                |   4 -
 arch/riscv/kernel/perf_callchain.c            |   2 +-
 arch/riscv/kernel/process.c                   |  31 +-
 arch/riscv/kernel/stacktrace.c                | 560 +++++++++++++++---
 scripts/sorttable.c                           |   8 +-
 .../livepatch/test_modules/test_klp_syscall.c |   2 +
 16 files changed, 856 insertions(+), 109 deletions(-)
 create mode 100644 arch/riscv/include/asm/stacktrace/common.h
 create mode 100644 arch/riscv/include/asm/stacktrace/frame.h

-- 
2.43.0

^ permalink raw reply

* Re: [PATCH] selftests: livepatch: set LC_ALL=C to fix locale-dependent test failure
From: Marcos Paulo de Souza @ 2026-05-27 12:09 UTC (permalink / raw)
  To: Qiang Ma, jpoimboe, jikos, mbenes, pmladek, joe.lawrence, shuah
  Cc: live-patching, linux-kselftest, linux-kernel
In-Reply-To: <20260527095929.1504032-1-maqianga@uniontech.com>

On Wed, 2026-05-27 at 17:59 +0800, Qiang Ma wrote:
> When executing the command
> "make -C tools/testing/selftests TARGETS=livepatch run_tests",
> the following error message was reported.
> 
> TEST: livepatch interaction with ftrace_enabled sysctl ... not ok
> ...
> livepatch: sysctlo
> : setting key "kernel.ftrace_enabled": Device or resource busy
> livepatch: sysctl: setting key "kernel.ftrace_enabled": 设备或资源忙
> ...
> ERROR: livepatch kselftest(s) failed
> not ok 5 selftests: livepatch: test-ftrace.sh # exit=1
> 
> To fix it, set LC_ALL=C.

Would you mind adding more context here? Can you point exactly why is
this failing inside test-ftrace.sh script?

Have you double checked if you had any previous loaded livepatches why
trying to disable/enable livepatching?

I'll test in my environment, but I'm pretty sure that it used to work
not so long ago.

> 
> Signed-off-by: Qiang Ma <maqianga@uniontech.com>
> ---
>  tools/testing/selftests/livepatch/functions.sh | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/tools/testing/selftests/livepatch/functions.sh
> b/tools/testing/selftests/livepatch/functions.sh
> index 8ec0cb64ad94..ecf27c1120f1 100644
> --- a/tools/testing/selftests/livepatch/functions.sh
> +++ b/tools/testing/selftests/livepatch/functions.sh
> @@ -4,6 +4,8 @@
>  
>  # Shell functions for the rest of the scripts.
>  
> +export LC_ALL=C
> +
>  MAX_RETRIES=600
>  RETRY_INTERVAL=".1"	# seconds
>  SYSFS_KERNEL_DIR="/sys/kernel"

^ permalink raw reply

* [PATCH] selftests: livepatch: set LC_ALL=C to fix locale-dependent test failure
From: Qiang Ma @ 2026-05-27  9:59 UTC (permalink / raw)
  To: jpoimboe, jikos, mbenes, pmladek, joe.lawrence, shuah
  Cc: live-patching, linux-kselftest, linux-kernel, Qiang Ma

When executing the command
"make -C tools/testing/selftests TARGETS=livepatch run_tests",
the following error message was reported.

TEST: livepatch interaction with ftrace_enabled sysctl ... not ok
...
livepatch: sysctlo
: setting key "kernel.ftrace_enabled": Device or resource busy
livepatch: sysctl: setting key "kernel.ftrace_enabled": 设备或资源忙
...
ERROR: livepatch kselftest(s) failed
not ok 5 selftests: livepatch: test-ftrace.sh # exit=1

To fix it, set LC_ALL=C.

Signed-off-by: Qiang Ma <maqianga@uniontech.com>
---
 tools/testing/selftests/livepatch/functions.sh | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/testing/selftests/livepatch/functions.sh b/tools/testing/selftests/livepatch/functions.sh
index 8ec0cb64ad94..ecf27c1120f1 100644
--- a/tools/testing/selftests/livepatch/functions.sh
+++ b/tools/testing/selftests/livepatch/functions.sh
@@ -4,6 +4,8 @@

 # Shell functions for the rest of the scripts.

+export LC_ALL=C
+
 MAX_RETRIES=600
 RETRY_INTERVAL=".1"	# seconds
 SYSFS_KERNEL_DIR="/sys/kernel"
-- 
2.20.1

^ permalink raw reply related

* Re: [RFC PATCH 1/6] livepatch: Support scoped atomic replace using replace set
From: Yafang Shao @ 2026-05-27  2:37 UTC (permalink / raw)
  To: Petr Mladek; +Cc: jpoimboe, jikos, mbenes, joe.lawrence, song, live-patching
In-Reply-To: <ahWXfHRFpvQBWgsa@pathway.suse.cz>

On Tue, May 26, 2026 at 8:52 PM Petr Mladek <pmladek@suse.com> wrote:
>
> On Wed 2026-05-13 22:33:16, Yafang Shao wrote:
> > Convert the replace attribute from a boolean to a u32 to function as a
> > "replace set." A newly loaded livepatch will now atomically replace
> > existing patches that belong to the same set.
> >
> > This change currently supports function replacement only; support for
> > state and shadow variables will be introduced in subsequent patches.
> >
> > --- a/Documentation/livepatch/cumulative-patches.rst
> > +++ b/Documentation/livepatch/cumulative-patches.rst
> > @@ -17,18 +17,20 @@ from all older livepatches and completely replace them in one transition.
> >  Usage
> >  -----
> >
> > -The atomic replace can be enabled by setting "replace" flag in struct klp_patch,
> > -for example::
> > +The "replace_set" attribute in ``struct klp_patch`` acts as a **replace set**,
> > +defining the scope of the replacement. By default, the replace set is 1.
>
> Why "1" by default, please?
>
> I guess that you wanted to make it "compatible" with the original
> "replace" flag. It makes some sense. But it is weird in the long term.
>
> This patchset is changing the whole semantic. Every livepatch is able
> to replace an older one. It is not longer "no replace" vs "replace
> all". Instead, a livepatch with a particular "replace_set" number
> replaces an older livepatch with the same "replace_set" number.
>
> It brings the question whether "replace_set" is a good name. There
> is always only one enabled livepatch with a particular "replace_set"
> number. It would make sense to call it "replace_tag" or "replace_id".
>
> That said, the "set" might mean a set of livepatched functions.
> And we should make sure that each set is separate. We should refuse
> loading a livepatch which would patch a function already patched
> by another livepatch with another another "replace_set".
>
> Summary:
>
> I would keep "replace_set" name. But I would use "0" by default.

I will update it.

>
> > +For example::
> >
> >       static struct klp_patch patch = {
> >               .mod = THIS_MODULE,
> >               .objs = objs,
> > -             .replace = true,
> > +             .replace_set = 1,
> >       };
> >
> >  All processes are then migrated to use the code only from the new patch.
> > -Once the transition is finished, all older patches are automatically
> > -disabled.
> > +Once the transition is finished, all older patches with the same replace
> > +set are automatically disabled. Patches with different tags remain active.
> >
> >  Ftrace handlers are transparently removed from functions that are no
> >  longer modified by the new cumulative patch.
> > @@ -62,9 +64,10 @@ Limitations:
> >  ------------
> >
> >    - Once the operation finishes, there is no straightforward way
> > -    to reverse it and restore the replaced patches atomically.
> > +    to reverse it and restore the replaced patches (with the same set)
> > +    atomically.
> >
> > -    A good practice is to set .replace flag in any released livepatch.
> > +    A good practice is to set a consistent .replace set in related livepatches.
>
> I would say something like:
>
>      "A good practice is to use only one (default) "replace_set". It
>      makes sure that there always will be only one enabled livepatch
>      on the system. The consistency model will ensure a safe update
>      between two versions. It prevents potential problems with installing
>      two livepatches doing incompatible functional changes."

Thanks for your suggestion.

>
> >      Then re-adding an older livepatch is equivalent to downgrading
> >      to that patch. This is safe as long as the livepatches do _not_ do
> >      extra modifications in (un)patching callbacks or in the module_init()
> > diff --git a/Documentation/livepatch/livepatch.rst b/Documentation/livepatch/livepatch.rst
> > index acb90164929e..07c8d5a13003 100644
> > --- a/Documentation/livepatch/livepatch.rst
> > +++ b/Documentation/livepatch/livepatch.rst
> > @@ -347,15 +347,20 @@ to '0'.
> >  5.3. Replacing
> >  --------------
> >
> > -All enabled patches might get replaced by a cumulative patch that
> > -has the .replace flag set.
> > -
> > -Once the new patch is enabled and the 'transition' finishes then
> > -all the functions (struct klp_func) associated with the replaced
> > -patches are removed from the corresponding struct klp_ops. Also
> > -the ftrace handler is unregistered and the struct klp_ops is
> > -freed when the related function is not modified by the new patch
> > -and func_stack list becomes empty.
> > +All currently enabled patches may be superseded by a cumulative patch that
>
> In fact, there always can be only one livepatch with a given
> "replace_set" number. They always replace each other.

Thanks for the clarification.

>
> > +has the same ``.replace_set`` attribute. Once the new patch is enabled and
> > +the transition finishes, the livepatching core identifies all existing
> > +patches that share the same replace set.
> > +
> > +Once the transition is complete, all functions (``struct klp_func``)
> > +associated with the matching replaced patches are removed from the
> > +corresponding ``struct klp_ops``. If a function is no longer modified by
> > +the new patch and its ``func_stack`` list becomes empty, the ftrace
> > +handler is unregistered and the ``struct klp_ops`` is freed.
> > +
> > +Patches with a different replace set are not affected by this process
> > +and remain active. This allows for the independent management and
> > +stacking of multiple, non-conflicting livepatch sets.
> >
> >  See Documentation/livepatch/cumulative-patches.rst for more details.
> >
> > --- a/kernel/livepatch/core.c
> > +++ b/kernel/livepatch/core.c
> > @@ -454,7 +454,7 @@ static ssize_t replace_show(struct kobject *kobj,
>
> The function should get renamed to replace_set_show()...

sure.

>
> >       struct klp_patch *patch;
> >
> >       patch = container_of(kobj, struct klp_patch, kobj);
> > -     return sysfs_emit(buf, "%d\n", patch->replace);
> > +     return sysfs_emit(buf, "%d\n", patch->replace_set);
> >  }
> >
> >  static ssize_t stack_order_show(struct kobject *kobj,
> > --- a/kernel/livepatch/state.c
> > +++ b/kernel/livepatch/state.c
> > @@ -85,24 +85,25 @@ EXPORT_SYMBOL_GPL(klp_get_prev_state);
> >
> >  /* Check if the patch is able to deal with the existing system state. */
> >  static bool klp_is_state_compatible(struct klp_patch *patch,
> > +                                 struct klp_patch *old_patch,
> >                                   struct klp_state *old_state)
> >  {
> >       struct klp_state *state;
> >
> >       state = klp_get_state(patch, old_state->id);
> >
> > -     /* A cumulative livepatch must handle all already modified states. */
> > +     /*
> > +      * If the new livepatch shares a state set with an existing one, it
> > +      * must maintain compatibility with all states modified by the old
> > +      * patch.
> > +      */
> >       if (!state)
> > -             return !patch->replace;
> > +             return patch->replace_set != old_patch->replace_set;
>
>
> >       return state->version >= old_state->version;
>
> Also I would enforce that two livepatches with a different "replace_set"
> must _not_ use the same "state->id".

I will update it.

>
> >  }
> >
> > -/*
> > - * Check that the new livepatch will not break the existing system states.
> > - * Cumulative patches must handle all already modified states.
> > - * Non-cumulative patches can touch already modified states.
> > - */
> > +/* Check that the new livepatch will not break the existing system states. */
> >  bool klp_is_patch_compatible(struct klp_patch *patch)
> >  {
> >       struct klp_patch *old_patch;
> > @@ -110,7 +111,7 @@ bool klp_is_patch_compatible(struct klp_patch *patch)
> >
> >       klp_for_each_patch(old_patch) {
> >               klp_for_each_state(old_patch, old_state) {
> > -                     if (!klp_is_state_compatible(patch, old_state))
> > +                     if (!klp_is_state_compatible(patch, old_patch, old_state))
> >                               return false;
> >               }
> >       }
>
> In addition, I strictly recommend to compare the set of livepatched
> functions. We should refuse loading a livepatch which would want to modify
> a function which is already livepatched with the livepatch with
> another "replace_set".

I will update it.

>
> Aka, the "set" means a set of livepatched functions. And the sets
> should be independent.
>
> > --- a/kernel/livepatch/transition.c
> > +++ b/kernel/livepatch/transition.c
> > @@ -720,11 +720,11 @@ void klp_force_transition(void)
> >               klp_update_patch_state(idle_task(cpu));
> >
> >       /* Set forced flag for patches being removed. */
> > -     if (klp_target_state == KLP_TRANSITION_UNPATCHED)
> > +     if (klp_target_state == KLP_TRANSITION_UNPATCHED) {
> >               klp_transition_patch->forced = true;
> > -     else if (klp_transition_patch->replace) {
> > +     } else {
> >               klp_for_each_patch(patch) {
> > -                     if (patch != klp_transition_patch)
> > +                     if (patch->replace_set == klp_transition_patch->replace_set)
>
> We still need to skip klp_transition patch as suggested by Sashiko AI.

sure

>
> >                               patch->forced = true;
> >               }
> >       }
> > --- a/scripts/livepatch/init.c
> > +++ b/scripts/livepatch/init.c
> > @@ -72,12 +72,7 @@ static int __init livepatch_mod_init(void)
> >
> >       /* TODO patch->states */
> >
> > -#ifdef KLP_NO_REPLACE
> > -     patch->replace = false;
> > -#else
> > -     patch->replace = true;
> > -#endif
> > -
> > +     patch->replace_set = KLP_REPLACE_TAG;
>
> It should be KLP_REPLACE_SET to keep the naming consistent.

I will fix it.

>
> Is KLP_REPLACE_SET always defined, please?

Right, it is always defined:

  cflags+=("-DKLP_REPLACE_SET=$REPLACE")

>
> >       return klp_enable_patch(patch);
> >
> >  err_free_objs:
> > diff --git a/scripts/livepatch/klp-build b/scripts/livepatch/klp-build
> > index 7b82c7503c2b..66d4a0631f1b 100755
> > --- a/scripts/livepatch/klp-build
> > +++ b/scripts/livepatch/klp-build
> > @@ -172,9 +172,9 @@ process_args() {
> >                               NAME="$(module_name_string "$NAME")"
> >                               shift 2
> >                               ;;
> > -                     --no-replace)
> > -                             REPLACE=0
> > -                             shift
> > +                     -s | --replace-set)
> > +                             REPLACE="$2"
>
> I would rename it to REPLACE_SET.

sure

>
> > +                             shift 2
> >                               ;;
> >                       -v | --verbose)
> >                               VERBOSE="V=1"
> > @@ -759,7 +759,7 @@ build_patch_module() {
> >
> >       cflags=("-ffunction-sections")
> >       cflags+=("-fdata-sections")
> > -     [[ $REPLACE -eq 0 ]] && cflags+=("-DKLP_NO_REPLACE")
> > +     cflags+=("-DKLP_REPLACE_TAG=$REPLACE")
>
> with a consisten naming scheme:
>
>         cflags+=("-DKLP_REPLACE_SET=$REPLACE_SET")
>
> Is there a default value?

The default value is currently 1, but I will update it to 0 as suggested.

  REPLACE=1

>
>
> >       cmd=("make")
> >       cmd+=("$VERBOSE")
>
>
> In general, I am fine with this change. Well, it would require also
> adding/fixing selftests.

I will update it.

>
> That said, I would prefer to rework the klp callbacks, shadow, and
> state API first. But it is not a strict requirement.

I will submit this as a standalone patch, as function replacement
works seamlessly with it.

-- 
Regards
Yafang

^ permalink raw reply

* Re: [RFC PATCH 1/6] livepatch: Support scoped atomic replace using replace set
From: Yafang Shao @ 2026-05-27  2:17 UTC (permalink / raw)
  To: Song Liu
  Cc: Petr Mladek, jpoimboe, jikos, mbenes, joe.lawrence, live-patching
In-Reply-To: <CAPhsuW4OsPexwZE9EeffuDwndV_Oj-fcR5T-ZFFsBOuY1EkKnw@mail.gmail.com>

On Wed, May 27, 2026 at 2:28 AM Song Liu <song@kernel.org> wrote:
>
> On Tue, May 26, 2026 at 3:35 AM Petr Mladek <pmladek@suse.com> wrote:
> [...]
> > > I wonder whether we should have "replace_set = 0" means no replace.
> > > This will simplify the transition for users of the existing replace=false
> > > option. I would like to hear other folks' thoughts on this.
> >
> > I would find this confusing. Also it would complicate the code.
>
> Agreed with your assessment of the scenario.
>
> > I always considered the "replace" and "no replace" mode as two
> > separate worlds:
> >
> >     + people using many "no replace" livepatches
>
> My only concern is that we are adding more burden to these users.
> Before replace_set, they just use 0 for all the live patches.

The only valid use case for setting this to 0 is to keep certain
livepatches persistent on the system. Without replace_set, developers
are forced to disable replacement entirely. With replace_set, however,
they have a better, more selective option to keep specific livepatches
persistent. Therefore, I don't see this new semantic as an issue. On
the other hand, the new klp-build tool already requires developers to
significantly refactor their livepatch building and deployment
systems, so adapting to this new semantic shouldn't be an additional
burden.

> With
> replace_set, they will have to use some mechanism to assign a
> unique replace_set for each livepatch.
>
> I don't know how many users are in this world. If there aren't many
> users, we can ignore this case.
>
> >     + people always using atomic replace
>
> OTOH, these users don't need much change. They will just use
> replace_set = 1 for all live patches.
>
> Note that, I am not proposing to have replace_set = 1 to replace
> all live patches. It only needs to replace other live patches with
> replace_set = 1. The only change I am proposing (debating) here
> is to have replace_set = 0 as "no replace". However, if this still
> feels too confusing, or there are NOT many users in the "no
> replace" world, we can safely ignore this.


-- 
Regards
Yafang

^ permalink raw reply

* Re: [RFC PATCH 1/6] livepatch: Support scoped atomic replace using replace set
From: Song Liu @ 2026-05-26 18:27 UTC (permalink / raw)
  To: Petr Mladek
  Cc: Yafang Shao, jpoimboe, jikos, mbenes, joe.lawrence, live-patching
In-Reply-To: <ahV3dBovdQZoF__j@pathway.suse.cz>

On Tue, May 26, 2026 at 3:35 AM Petr Mladek <pmladek@suse.com> wrote:
[...]
> > I wonder whether we should have "replace_set = 0" means no replace.
> > This will simplify the transition for users of the existing replace=false
> > option. I would like to hear other folks' thoughts on this.
>
> I would find this confusing. Also it would complicate the code.

Agreed with your assessment of the scenario.

> I always considered the "replace" and "no replace" mode as two
> separate worlds:
>
>     + people using many "no replace" livepatches

My only concern is that we are adding more burden to these users.
Before replace_set, they just use 0 for all the live patches. With
replace_set, they will have to use some mechanism to assign a
unique replace_set for each livepatch.

I don't know how many users are in this world. If there aren't many
users, we can ignore this case.

>     + people always using atomic replace

OTOH, these users don't need much change. They will just use
replace_set = 1 for all live patches.

Note that, I am not proposing to have replace_set = 1 to replace
all live patches. It only needs to replace other live patches with
replace_set = 1. The only change I am proposing (debating) here
is to have replace_set = 0 as "no replace". However, if this still
feels too confusing, or there are NOT many users in the "no
replace" world, we can safely ignore this.

Thanks,
Song

^ permalink raw reply

* Re: [PATCH v3] killswitch: add per-function short-circuit mitigation primitive
From: Sasha Levin @ 2026-05-26 14:29 UTC (permalink / raw)
  To: Daniel Borkmann
  Cc: Song Liu, linux-kernel, linux-doc, linux-kselftest, bpf,
	live-patching, Greg Kroah-Hartman, Andrew Morton, Jonathan Corbet,
	Mathieu Desnoyers, Joshua Peisach, Florian Weimer, Breno Leitao,
	Anthony Iliopoulos, Michal Hocko, Jiri Olsa, John Fastabend,
	Christian Brauner, KP Singh
In-Reply-To: <e7464a71-7e97-44a9-a5eb-831306fb5019@iogearbox.net>

On Tue, May 26, 2026 at 03:10:45PM +0200, Daniel Borkmann wrote:
>On 5/23/26 3:41 PM, Sasha Levin wrote:
>>On Thu, May 21, 2026 at 11:16:46AM -0700, Song Liu wrote:
>>>On Thu, May 21, 2026 at 8:31 AM Sasha Levin <sashal@kernel.org> wrote:
>>>>On Thu, May 21, 2026 at 11:11:16AM +0200, Daniel Borkmann wrote:
>>>>>On 5/19/26 9:57 PM, Sasha Levin wrote:
>>>>>>Sure, this would also work. How do you see this happening? Can we let a certain
>>>>>>user/pid/etc disable the allowlist if they choose to?
>>>>>
>>>>>I don't think we should, given then we're back to square one where root
>>>>>or some other user would be able to just override/bypass an LSM.
>>>>
>>>>killswitch already disables itself when lockdown is active. We can easily
>>>>disable it too when one of the LSMs that cares about this is active.
>>>>
>>>>>[...]
>>>>>>How do you see this working with the allowlist?
>>>>>
>>>>>We should look at the underlying areas where most of the CVE-like fixes
>>>>>took place (these days should be more easily doable given Claude and friends)
>>>>>and based on that either extend ALLOW_ERROR_INJECTION() or (better) create
>>>>>new hooks which BPF LSM can consume where you can then have a policy to reject
>>>>>requests and tighten the attack surface. For example, the AF_ALG stuff you
>>>>
>>>>So we could grow the LSM tentacles deeper into the kernel, and we can see where
>>>>current CVEs are happening, which I suspect is the darker corners of the kernel
>>>>(old unmaintained, rarely used code), but this definitely won't stay the case,
>>>>right? Newer and better LLMs will discover issues elsewhere, and once the low
>>>>hanging fruits are picked off of the current target subsystems, researchers
>>>>will move elsewhere. We will be dooming ourselves to an endless cat and mouse
>>>>game where we go add LSM hooks after some big security issue goes public.
>>>
>>>Do we really need to add new LSM hooks for recent CVEs?
>>>
>>>The LSM hooks are designed to cover all the user-kernel interfaces. Then
>>>with properly designed policies, we should have coverage for potential CVEs.
>>>Existing LSM hooks may not be perfect, but we can improve the hooks,
>>>potentially with the help of smart LLMs, so that these hooks can cover
>>>future security issues. In some cases, we will need new policies, but I don't
>>>think new hooks will be needed for most of these CVEs.
>>
>>Running a quick LLM evaluation on the last ~70 severe CVEs, it seems that about
>>40% is doable with the current hooks.
>
>
>Interesting, do you have some more details in which areas your eval sees new
>lsm hooks missing?

The recent ones I saw fall into about 5 buckets:

1. Kernel-thread / workqueue context: LSM hooks fire but current is a worker,
not the actual attacker. Lots of ksmbd, ceph-msgr, and async cleanup races land
here.

2. Driver: pci_driver.probe, notifier_call_chain, ib_* RDMA callbacks, ndo_*,
bus dispatch tables all sit below any LSM hook. Big chunk of mlx5, RDMA, USB,
i3c, DRM bugs.

3. Per-packet softirq RX: security_sock_rcv_skb only fires inside
sk_filter_trim_cap, which UDP encap_rcv bypasses and L2/bridge protocols never
reach. Covers Bluetooth softirq, bond, IPv6 softirq, TCP-MD5/AO timing leaks,
etc.

4. Netfilter: config path is well-gated via security_netlink_send, but
per-match callbacks, set GC, and flowtable cleanup have nothing. That's where
most of the recent netfilter CVEs actually fire.

5. Crypto subsystem + io_uring per-opcode: crypto/ has zero LSM hooks.

-- 
Thanks,
Sasha

^ permalink raw reply

* Re: [RFC PATCH 0/6] livepatch: Introduce replace set support
From: Petr Mladek @ 2026-05-26 13:13 UTC (permalink / raw)
  To: Yafang Shao; +Cc: jpoimboe, jikos, mbenes, joe.lawrence, song, live-patching
In-Reply-To: <20260513143321.26185-1-laoar.shao@gmail.com>

On Wed 2026-05-13 22:33:15, Yafang Shao wrote:
> We previously proposed a BPF+livepatch method to enable rapid
> experimentation with new kernel features without interrupting production
> workloads:
> 
>   https://lore.kernel.org/live-patching/20260402092607.96430-1-laoar.shao@gmail.com/
> 
> In the resulting discussion, Song and Petr suggested adding a "replace set"
> to support scenarios where specific livepatches can be selectively replaced
> or skipped.
> 
> - Patch #1:
>   Adds replace set support for livepatch functions.
> 
> - Patch #2~#5:
>   Derived from Petr's original patchset:
> 
>     https://lore.kernel.org/all/20250115082431.5550-3-pmladek@suse.com/
> 
>   All the selftests are not included in this RFC.
>   Note: Due to a significant refactor in Patch #5, I have omitted Petr's
>   Signed-off-by for that specific patch. Please let me know if this is not
>   the preferred approach.

I am not going to review these patches in this round. They are based
on an outdated RFC. I guess that they do not handle feedback against
the RFC. Also they would require massive changes in the selftests.

Note that I have already done most of the work, see
https://github.com/pmladek/linux/tree/klp-state-transfer-v1-iter12
It requires rebasing on top of last linus tree and some
clean up.

I still hope that I will be able to work on it rather sooner
than later.

> - Patch #6:
>   Adds replace set support for the shadow variable API.

Best Regards,
Petr

^ permalink raw reply

* Re: [PATCH v3] killswitch: add per-function short-circuit mitigation primitive
From: Daniel Borkmann @ 2026-05-26 13:10 UTC (permalink / raw)
  To: Sasha Levin, Song Liu
  Cc: linux-kernel, linux-doc, linux-kselftest, bpf, live-patching,
	Greg Kroah-Hartman, Andrew Morton, Jonathan Corbet,
	Mathieu Desnoyers, Joshua Peisach, Florian Weimer, Breno Leitao,
	Anthony Iliopoulos, Michal Hocko, Jiri Olsa, John Fastabend,
	Christian Brauner, KP Singh
In-Reply-To: <ahGufsGqmLZZQL4M@laps>

On 5/23/26 3:41 PM, Sasha Levin wrote:
> On Thu, May 21, 2026 at 11:16:46AM -0700, Song Liu wrote:
>> On Thu, May 21, 2026 at 8:31 AM Sasha Levin <sashal@kernel.org> wrote:
>>> On Thu, May 21, 2026 at 11:11:16AM +0200, Daniel Borkmann wrote:
>>> >On 5/19/26 9:57 PM, Sasha Levin wrote:
>>> >>Sure, this would also work. How do you see this happening? Can we let a certain
>>> >>user/pid/etc disable the allowlist if they choose to?
>>> >
>>> >I don't think we should, given then we're back to square one where root
>>> >or some other user would be able to just override/bypass an LSM.
>>>
>>> killswitch already disables itself when lockdown is active. We can easily
>>> disable it too when one of the LSMs that cares about this is active.
>>>
>>> >[...]
>>> >>How do you see this working with the allowlist?
>>> >
>>> >We should look at the underlying areas where most of the CVE-like fixes
>>> >took place (these days should be more easily doable given Claude and friends)
>>> >and based on that either extend ALLOW_ERROR_INJECTION() or (better) create
>>> >new hooks which BPF LSM can consume where you can then have a policy to reject
>>> >requests and tighten the attack surface. For example, the AF_ALG stuff you
>>>
>>> So we could grow the LSM tentacles deeper into the kernel, and we can see where
>>> current CVEs are happening, which I suspect is the darker corners of the kernel
>>> (old unmaintained, rarely used code), but this definitely won't stay the case,
>>> right? Newer and better LLMs will discover issues elsewhere, and once the low
>>> hanging fruits are picked off of the current target subsystems, researchers
>>> will move elsewhere. We will be dooming ourselves to an endless cat and mouse
>>> game where we go add LSM hooks after some big security issue goes public.
>>
>> Do we really need to add new LSM hooks for recent CVEs?
>>
>> The LSM hooks are designed to cover all the user-kernel interfaces. Then
>> with properly designed policies, we should have coverage for potential CVEs.
>> Existing LSM hooks may not be perfect, but we can improve the hooks,
>> potentially with the help of smart LLMs, so that these hooks can cover
>> future security issues. In some cases, we will need new policies, but I don't
>> think new hooks will be needed for most of these CVEs.
> 
> Running a quick LLM evaluation on the last ~70 severe CVEs, it seems that about
> 40% is doable with the current hooks.


Interesting, do you have some more details in which areas your eval sees new
lsm hooks missing?

^ permalink raw reply

* Re: [RFC PATCH 6/6] livepatch: Support replace_set in shadow variable API
From: Petr Mladek @ 2026-05-26 13:04 UTC (permalink / raw)
  To: Yafang Shao; +Cc: jpoimboe, jikos, mbenes, joe.lawrence, song, live-patching
In-Reply-To: <20260513143321.26185-7-laoar.shao@gmail.com>

On Wed 2026-05-13 22:33:21, Yafang Shao wrote:
> To support more complex livepatching scenarios where multiple
> replacement sets might coexist, extend the klp_shadow API to
> include a 'replace_set' identifier.
> 
> To maintain compatibility with the existing 64-bit storage in
> 'struct klp_shadow', the internal @id is now treated as a composite
> value. The 64-bit identifier is constructed by packing two 32-bit
> values:
> 
>   MSB (63-32)          LSB (31-0)
>   +--------------------+--------------------+
>   |    replace_set     |    original @id    |
>   +--------------------+--------------------+
> 
> Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
> ---
>  include/linux/livepatch.h | 12 ++++---
>  kernel/livepatch/shadow.c | 70 ++++++++++++++++++++++++---------------
>  kernel/livepatch/state.c  |  3 +-
>  3 files changed, 52 insertions(+), 33 deletions(-)
> 
> diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h
> index 221f176f1f51..2dd9fca8c01c 100644
> --- a/include/linux/livepatch.h
> +++ b/include/linux/livepatch.h
> @@ -195,15 +195,17 @@ static inline bool klp_have_reliable_stack(void)
>  	       IS_ENABLED(CONFIG_HAVE_RELIABLE_STACKTRACE);
>  }
>  
> -void *klp_shadow_get(void *obj, unsigned long id);
> -void *klp_shadow_alloc(void *obj, unsigned long id,
> +void *klp_shadow_get(void *obj, unsigned int replace_set, unsigned int id);
> +void *klp_shadow_alloc(void *obj, unsigned int replace_set, unsigned int id,
>  		       size_t size, gfp_t gfp_flags,
>  		       klp_shadow_ctor_t ctor, void *ctor_data);
> -void *klp_shadow_get_or_alloc(void *obj, unsigned long id,
> +void *klp_shadow_get_or_alloc(void *obj, unsigned int replace_set, unsigned int id,
>  			      size_t size, gfp_t gfp_flags,
>  			      klp_shadow_ctor_t ctor, void *ctor_data);
> -void klp_shadow_free(void *obj, unsigned long id, klp_shadow_dtor_t dtor);
> -void klp_shadow_free_all(unsigned long id, klp_shadow_dtor_t dtor);
> +void klp_shadow_free(void *obj, unsigned int replace_set, unsigned int id,
> +		     klp_shadow_dtor_t dtor);
> +void klp_shadow_free_all(unsigned int replace_set, unsigned int id,
> +			 klp_shadow_dtor_t dtor);
>  
>  struct klp_state *klp_get_state(struct klp_patch *patch, unsigned long id);
>  struct klp_state *klp_get_prev_state(unsigned long id);
> diff --git a/kernel/livepatch/shadow.c b/kernel/livepatch/shadow.c
> index c2e724d97ddf..35e507fae445 100644
> --- a/kernel/livepatch/shadow.c
> +++ b/kernel/livepatch/shadow.c
> @@ -48,7 +48,8 @@ static DEFINE_SPINLOCK(klp_shadow_lock);
>   * @node:	klp_shadow_hash hash table node
>   * @rcu_head:	RCU is used to safely free this structure
>   * @obj:	pointer to parent object
> - * @id:		data identifier
> + * @id:		combined data identifier
> + *		higher 32 bits: replace_set, lower 32 bits: resource ID

I am not sure if this is worth the complexity.

The 2nd patch allows to associate klp state ID with klp shadow ID.
People should really use it because it helps to maintain the lifetime
of shadow variables and makes it safe.

Then we could refuse loading livepatches with a shadow/state ID when
it is already being used by another livepatch with another "replace_set".

>   * @data:	data area
>   */
>  struct klp_shadow {

Best Regards,
Petr

^ permalink raw reply

* Re: [RFC PATCH 1/6] livepatch: Support scoped atomic replace using replace set
From: Petr Mladek @ 2026-05-26 12:52 UTC (permalink / raw)
  To: Yafang Shao; +Cc: jpoimboe, jikos, mbenes, joe.lawrence, song, live-patching
In-Reply-To: <20260513143321.26185-2-laoar.shao@gmail.com>

On Wed 2026-05-13 22:33:16, Yafang Shao wrote:
> Convert the replace attribute from a boolean to a u32 to function as a
> "replace set." A newly loaded livepatch will now atomically replace
> existing patches that belong to the same set.
> 
> This change currently supports function replacement only; support for
> state and shadow variables will be introduced in subsequent patches.
> 
> --- a/Documentation/livepatch/cumulative-patches.rst
> +++ b/Documentation/livepatch/cumulative-patches.rst
> @@ -17,18 +17,20 @@ from all older livepatches and completely replace them in one transition.
>  Usage
>  -----
>  
> -The atomic replace can be enabled by setting "replace" flag in struct klp_patch,
> -for example::
> +The "replace_set" attribute in ``struct klp_patch`` acts as a **replace set**,
> +defining the scope of the replacement. By default, the replace set is 1.

Why "1" by default, please?

I guess that you wanted to make it "compatible" with the original
"replace" flag. It makes some sense. But it is weird in the long term.

This patchset is changing the whole semantic. Every livepatch is able
to replace an older one. It is not longer "no replace" vs "replace
all". Instead, a livepatch with a particular "replace_set" number
replaces an older livepatch with the same "replace_set" number.

It brings the question whether "replace_set" is a good name. There
is always only one enabled livepatch with a particular "replace_set"
number. It would make sense to call it "replace_tag" or "replace_id".

That said, the "set" might mean a set of livepatched functions.
And we should make sure that each set is separate. We should refuse
loading a livepatch which would patch a function already patched
by another livepatch with another another "replace_set".

Summary:

I would keep "replace_set" name. But I would use "0" by default.

> +For example::
>  
>  	static struct klp_patch patch = {
>  		.mod = THIS_MODULE,
>  		.objs = objs,
> -		.replace = true,
> +		.replace_set = 1,
>  	};
>  
>  All processes are then migrated to use the code only from the new patch.
> -Once the transition is finished, all older patches are automatically
> -disabled.
> +Once the transition is finished, all older patches with the same replace
> +set are automatically disabled. Patches with different tags remain active.
>  
>  Ftrace handlers are transparently removed from functions that are no
>  longer modified by the new cumulative patch.
> @@ -62,9 +64,10 @@ Limitations:
>  ------------
>  
>    - Once the operation finishes, there is no straightforward way
> -    to reverse it and restore the replaced patches atomically.
> +    to reverse it and restore the replaced patches (with the same set)
> +    atomically.
>  
> -    A good practice is to set .replace flag in any released livepatch.
> +    A good practice is to set a consistent .replace set in related livepatches.

I would say something like:

     "A good practice is to use only one (default) "replace_set". It
     makes sure that there always will be only one enabled livepatch
     on the system. The consistency model will ensure a safe update
     between two versions. It prevents potential problems with installing
     two livepatches doing incompatible functional changes."

>      Then re-adding an older livepatch is equivalent to downgrading
>      to that patch. This is safe as long as the livepatches do _not_ do
>      extra modifications in (un)patching callbacks or in the module_init()
> diff --git a/Documentation/livepatch/livepatch.rst b/Documentation/livepatch/livepatch.rst
> index acb90164929e..07c8d5a13003 100644
> --- a/Documentation/livepatch/livepatch.rst
> +++ b/Documentation/livepatch/livepatch.rst
> @@ -347,15 +347,20 @@ to '0'.
>  5.3. Replacing
>  --------------
>  
> -All enabled patches might get replaced by a cumulative patch that
> -has the .replace flag set.
> -
> -Once the new patch is enabled and the 'transition' finishes then
> -all the functions (struct klp_func) associated with the replaced
> -patches are removed from the corresponding struct klp_ops. Also
> -the ftrace handler is unregistered and the struct klp_ops is
> -freed when the related function is not modified by the new patch
> -and func_stack list becomes empty.
> +All currently enabled patches may be superseded by a cumulative patch that

In fact, there always can be only one livepatch with a given
"replace_set" number. They always replace each other.

> +has the same ``.replace_set`` attribute. Once the new patch is enabled and
> +the transition finishes, the livepatching core identifies all existing
> +patches that share the same replace set.
> +
> +Once the transition is complete, all functions (``struct klp_func``)
> +associated with the matching replaced patches are removed from the
> +corresponding ``struct klp_ops``. If a function is no longer modified by
> +the new patch and its ``func_stack`` list becomes empty, the ftrace
> +handler is unregistered and the ``struct klp_ops`` is freed.
> +
> +Patches with a different replace set are not affected by this process
> +and remain active. This allows for the independent management and
> +stacking of multiple, non-conflicting livepatch sets.
>  
>  See Documentation/livepatch/cumulative-patches.rst for more details.
>  
> --- a/kernel/livepatch/core.c
> +++ b/kernel/livepatch/core.c
> @@ -454,7 +454,7 @@ static ssize_t replace_show(struct kobject *kobj,

The function should get renamed to replace_set_show()...

>  	struct klp_patch *patch;
>  
>  	patch = container_of(kobj, struct klp_patch, kobj);
> -	return sysfs_emit(buf, "%d\n", patch->replace);
> +	return sysfs_emit(buf, "%d\n", patch->replace_set);
>  }
>  
>  static ssize_t stack_order_show(struct kobject *kobj,
> --- a/kernel/livepatch/state.c
> +++ b/kernel/livepatch/state.c
> @@ -85,24 +85,25 @@ EXPORT_SYMBOL_GPL(klp_get_prev_state);
>  
>  /* Check if the patch is able to deal with the existing system state. */
>  static bool klp_is_state_compatible(struct klp_patch *patch,
> +				    struct klp_patch *old_patch,
>  				    struct klp_state *old_state)
>  {
>  	struct klp_state *state;
>  
>  	state = klp_get_state(patch, old_state->id);
>  
> -	/* A cumulative livepatch must handle all already modified states. */
> +	/*
> +	 * If the new livepatch shares a state set with an existing one, it
> +	 * must maintain compatibility with all states modified by the old
> +	 * patch.
> +	 */
>  	if (!state)
> -		return !patch->replace;
> +		return patch->replace_set != old_patch->replace_set;


>  	return state->version >= old_state->version;

Also I would enforce that two livepatches with a different "replace_set"
must _not_ use the same "state->id".

>  }
>  
> -/*
> - * Check that the new livepatch will not break the existing system states.
> - * Cumulative patches must handle all already modified states.
> - * Non-cumulative patches can touch already modified states.
> - */
> +/* Check that the new livepatch will not break the existing system states. */
>  bool klp_is_patch_compatible(struct klp_patch *patch)
>  {
>  	struct klp_patch *old_patch;
> @@ -110,7 +111,7 @@ bool klp_is_patch_compatible(struct klp_patch *patch)
>  
>  	klp_for_each_patch(old_patch) {
>  		klp_for_each_state(old_patch, old_state) {
> -			if (!klp_is_state_compatible(patch, old_state))
> +			if (!klp_is_state_compatible(patch, old_patch, old_state))
>  				return false;
>  		}
>  	}

In addition, I strictly recommend to compare the set of livepatched
functions. We should refuse loading a livepatch which would want to modify
a function which is already livepatched with the livepatch with
another "replace_set".

Aka, the "set" means a set of livepatched functions. And the sets
should be independent.

> --- a/kernel/livepatch/transition.c
> +++ b/kernel/livepatch/transition.c
> @@ -720,11 +720,11 @@ void klp_force_transition(void)
>  		klp_update_patch_state(idle_task(cpu));
>  
>  	/* Set forced flag for patches being removed. */
> -	if (klp_target_state == KLP_TRANSITION_UNPATCHED)
> +	if (klp_target_state == KLP_TRANSITION_UNPATCHED) {
>  		klp_transition_patch->forced = true;
> -	else if (klp_transition_patch->replace) {
> +	} else {
>  		klp_for_each_patch(patch) {
> -			if (patch != klp_transition_patch)
> +			if (patch->replace_set == klp_transition_patch->replace_set)

We still need to skip klp_transition patch as suggested by Sashiko AI.

>  				patch->forced = true;
>  		}
>  	}
> --- a/scripts/livepatch/init.c
> +++ b/scripts/livepatch/init.c
> @@ -72,12 +72,7 @@ static int __init livepatch_mod_init(void)
>  
>  	/* TODO patch->states */
>  
> -#ifdef KLP_NO_REPLACE
> -	patch->replace = false;
> -#else
> -	patch->replace = true;
> -#endif
> -
> +	patch->replace_set = KLP_REPLACE_TAG;

It should be KLP_REPLACE_SET to keep the naming consistent.

Is KLP_REPLACE_SET always defined, please?

>  	return klp_enable_patch(patch);
>  
>  err_free_objs:
> diff --git a/scripts/livepatch/klp-build b/scripts/livepatch/klp-build
> index 7b82c7503c2b..66d4a0631f1b 100755
> --- a/scripts/livepatch/klp-build
> +++ b/scripts/livepatch/klp-build
> @@ -172,9 +172,9 @@ process_args() {
>  				NAME="$(module_name_string "$NAME")"
>  				shift 2
>  				;;
> -			--no-replace)
> -				REPLACE=0
> -				shift
> +			-s | --replace-set)
> +				REPLACE="$2"

I would rename it to REPLACE_SET.

> +				shift 2
>  				;;
>  			-v | --verbose)
>  				VERBOSE="V=1"
> @@ -759,7 +759,7 @@ build_patch_module() {
>  
>  	cflags=("-ffunction-sections")
>  	cflags+=("-fdata-sections")
> -	[[ $REPLACE -eq 0 ]] && cflags+=("-DKLP_NO_REPLACE")
> +	cflags+=("-DKLP_REPLACE_TAG=$REPLACE")

with a consisten naming scheme:

	cflags+=("-DKLP_REPLACE_SET=$REPLACE_SET")

Is there a default value?


>  	cmd=("make")
>  	cmd+=("$VERBOSE")


In general, I am fine with this change. Well, it would require also
adding/fixing selftests.

That said, I would prefer to rework the klp callbacks, shadow, and
state API first. But it is not a strict requirement.

Best Regards,
Petr

^ permalink raw reply

* Re: [RFC PATCH 1/6] livepatch: Support scoped atomic replace using replace set
From: Petr Mladek @ 2026-05-26 10:35 UTC (permalink / raw)
  To: Song Liu
  Cc: Yafang Shao, jpoimboe, jikos, mbenes, joe.lawrence, live-patching
In-Reply-To: <CAPhsuW6Aa8Tu5aWGVYzRVVNEnJiHrNzsa4aKXoOEa_gwhp3XfQ@mail.gmail.com>

On Mon 2026-05-18 14:25:04, Song Liu wrote:
> On Wed, May 13, 2026 at 7:34 AM Yafang Shao <laoar.shao@gmail.com> wrote:
> >
> > Convert the replace attribute from a boolean to a u32 to function as a
> > "replace set." A newly loaded livepatch will now atomically replace
> > existing patches that belong to the same set.
> >
> > This change currently supports function replacement only; support for
> > state and shadow variables will be introduced in subsequent patches.
> >
> > Suggested-by: Song Liu <song@kernel.org>
> > Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
> > ---
> >  .../livepatch/cumulative-patches.rst          | 17 ++++++++------
> >  Documentation/livepatch/livepatch.rst         | 23 +++++++++++--------
> >  include/linux/livepatch.h                     |  5 ++--
> >  kernel/livepatch/core.c                       | 16 ++++++++-----
> >  kernel/livepatch/state.c                      | 17 +++++++-------
> >  kernel/livepatch/transition.c                 | 10 ++++----
> >  scripts/livepatch/init.c                      |  7 +-----
> >  scripts/livepatch/klp-build                   | 14 +++++------
> >  8 files changed, 59 insertions(+), 50 deletions(-)
> >
> > diff --git a/Documentation/livepatch/cumulative-patches.rst b/Documentation/livepatch/cumulative-patches.rst
> > index 1931f318976a..6ef49748110e 100644
> > --- a/Documentation/livepatch/cumulative-patches.rst
> > +++ b/Documentation/livepatch/cumulative-patches.rst
> > @@ -17,18 +17,20 @@ from all older livepatches and completely replace them in one transition.
> >  Usage
> >  -----
> >
> > -The atomic replace can be enabled by setting "replace" flag in struct klp_patch,
> > -for example::
> > +The "replace_set" attribute in ``struct klp_patch`` acts as a **replace set**,
> > +defining the scope of the replacement. By default, the replace set is 1.
> > +
> > +For example::
> >
> >         static struct klp_patch patch = {
> >                 .mod = THIS_MODULE,
> >                 .objs = objs,
> > -               .replace = true,
> > +               .replace_set = 1,
> >         };
> 
> I wonder whether we should have "replace_set = 0" means no replace.
> This will simplify the transition for users of the existing replace=false
> option. I would like to hear other folks' thoughts on this.

I would find this confusing. Also it would complicate the code.

I always considered the "replace" and "no replace" mode as two
separate worlds:

    + people using many "no replace" livepatches
    + people always using atomic replace

But the code had to handle also the world where:

    + people might combine "no replace" and "replace all" livepatches

which looked like a clash of the two worlds. And different people might
have different expectations about the behavior.

The "replace_set" allows to remove this clash. It looks like a win-win.
And it makes the change acceptable for me.

Best Regards,
Petr

^ permalink raw reply

* Re: [PATCH 1/4] selftests: livepatch: Introduce _remove_mod function
From: Marcos Paulo de Souza @ 2026-05-25 11:49 UTC (permalink / raw)
  To: sashiko-reviews, Jiri Kosina, Petr Mladek, Joe Lawrence,
	Shuah Khan, Miroslav Benes, Josh Poimboeuf
  Cc: linux-kernel, linux-kselftest, live-patching
In-Reply-To: <20260524235906.267EF1F000E9@smtp.kernel.org>

On Sun, 2026-05-24 at 23:59 +0000, sashiko-bot@kernel.org wrote:
> Thank you for your contribution! Sashiko AI review found 1 potential
> issue(s) to consider:
> - [Medium] `_remove_mod` uses `die()` which will terminate the script
> during cleanup operations.
> --
> 
> commit 7b1591f4e5c3b351136e7fe2821eb2e15ae59ad0
> Author: Marcos Paulo de Souza <mpdesouza@suse.com>
> 
> selftests: livepatch: Introduce _remove_mod function
> 
> This commit introduces the _remove_mod internal function to remove a
> loaded
> module, intended to be used during cleanup when a testcase fails. It
> also
> refactors unload_mod to use this new helper function.
> 
> > diff --git a/tools/testing/selftests/livepatch/functions.sh
> > b/tools/testing/selftests/livepatch/functions.sh
> > index 2bc50271729c6..3ec0b7962fc5c 100644
> > --- a/tools/testing/selftests/livepatch/functions.sh
> > +++ b/tools/testing/selftests/livepatch/functions.sh
> 
> [ ... ]
> 
> > @@ -241,9 +241,10 @@ function load_failing_mod() {
> >  	log "$ret"
> >  }
> >  
> > -# unload_mod(modname) - unload a kernel module
> > +# _remove_mod(modname) - Internal function to remove a loaded
> > module.
> > +#                        Use unload_mod() instead, which also
> > updates TEST_MODS tracking.
> >  #	modname - module name to unload
> > -function unload_mod() {
> > +function _remove_mod() {
> >  	local mod="$1"
> >  
> >  	# Wait for module reference count to clear ...
> > @@ -261,6 +262,14 @@ function unload_mod() {
> >  		die "failed to unload module $mod (/sys/module)"
> 
> [Severity: Medium]
> The commit message notes this function will be used to remove loaded
> modules
> when a testcase fails. If used in a cleanup path (such as a bash EXIT
> trap),
> will calling die() here terminate the script prematurely?
> 
> If a module fails to unload (for example, if its refcount doesn't
> drop),
> die() executes exit 1. Inside a trap, this aborts the rest of the
> cleanup process. This could skip unloading any remaining modules and
> prevent final cleanup operations like pop_config() from running,
> potentially
> leaving kernel tracing persistently enabled and polluting the system
> state
> for subsequent tests.

The system is already polluted when a testcase fails nowadays, since a
module is kept loaded either way, making the next tests to fail if they
also try to lead the same modules.

The current approach is trying to make it more reliable.

> 
> >  }
> 
> [ ... ]

^ permalink raw reply

* Re: [PATCH 4/4] selftests: livepatch: Add information about minimum kernel support
From: sashiko-bot @ 2026-05-25  0:29 UTC (permalink / raw)
  To: Miroslav Benes, Jiri Kosina, Shuah Khan, Josh Poimboeuf,
	Petr Mladek, Joe Lawrence
  Cc: linux-kselftest, linux-kernel, live-patching,
	Marcos Paulo de Souza, marcos
In-Reply-To: <20260524-livepatch-unload-on-fail-v1-4-7465de7f741d@suse.com>

Thank you for your contribution! Sashiko AI review found 1 potential issue(s) to consider:
- [Medium] Mandating backward compatibility with an ancient kernel version (4.12) for all new in-tree selftests violates established upstream kernel principles.
--

commit 005a4d6b98d5e0ddf7ab7d595575364d010c4d3f
Author: Marcos Paulo de Souza <mpdesouza@suse.com>

selftests: livepatch: Add information about minimum kernel support

This commit adds a note to the livepatch selftests README. It documents that
the current tests are compatible with kernel 4.12 and mandates that all new
tests must maintain this backward compatibility.

> diff --git a/tools/testing/selftests/livepatch/README b/tools/testing/selftests/livepatch/README
> index d2035dd64a2be..293f4730b9277 100644
> --- a/tools/testing/selftests/livepatch/README
> +++ b/tools/testing/selftests/livepatch/README
> @@ -54,3 +54,6 @@ check_result().  The latter function greps the kernel's ring buffer for
>  those strings for result comparison.  Other utility functions include
>  general module loading and livepatch loading helpers (waiting for patch
>  transitions, sysfs entries, etc.)
> +
> +All new tests and test modules MUST be compatible with kernel version 4.12 and
> +later, including current upstream kernels.

[Severity: Medium]
Does mandating backward compatibility with kernel 4.12 for all new tests
conflict with upstream kernel principles?

Requiring tests to support an older kernel could block the introduction of
tests for any livepatch features developed after 4.12, as the required
internal APIs would not exist. This might also force developers to clutter
the test module codebase with version checks for old kernels.

Should the burden of backporting tests to older enterprise kernels be left
to downstream distributions rather than enforced on upstream contributors?

-- 
Sashiko AI review · https://sashiko.dev/#/patchset/20260524-livepatch-unload-on-fail-v1-0-7465de7f741d@suse.com?part=4

^ permalink raw reply

* Re: [PATCH 3/4] selftests: livepatch: Adapt mod_target module to pass on 4.12 kernels
From: sashiko-bot @ 2026-05-25  0:24 UTC (permalink / raw)
  To: Miroslav Benes, Shuah Khan, Petr Mladek, Josh Poimboeuf,
	Joe Lawrence, Jiri Kosina
  Cc: live-patching, linux-kselftest, Marcos Paulo de Souza, marcos,
	linux-kernel
In-Reply-To: <20260524-livepatch-unload-on-fail-v1-3-7465de7f741d@suse.com>

Thank you for your contribution! Sashiko AI review found 1 potential issue(s) to consider:
- [Medium] The `test_klp_mod_target_ops` structure omits the logically required `.set` callback, exposing the kernel to a NULL pointer dereference during module load.
--

commit 532c1b8c7438f534fd4e3440716ede6cf83efe1c
Author: Marcos Paulo de Souza <mpdesouza@suse.com>

selftests: livepatch: Adapt mod_target module to pass on 4.12 kernels

Use the stable module_param_cb API instead of proc_fs for exposing module
state. This approach is compatible with kernel 4.12 and later.

> diff --git a/tools/testing/selftests/livepatch/test_modules/test_klp_mod_target.c b/tools/testing/selftests/livepatch/test_modules/test_klp_mod_target.c
> index 9643984d24022..1382a266825fb 100644
> --- a/tools/testing/selftests/livepatch/test_modules/test_klp_mod_target.c
> +++ b/tools/testing/selftests/livepatch/test_modules/test_klp_mod_target.c
> @@ -5,31 +5,29 @@
[ ... ]
> +static const struct kernel_param_ops test_klp_mod_target_ops = {
> +	.get = test_klp_mod_target_get,
> +};
> +
> +module_param_cb(klp_mod_arg, &test_klp_mod_target_ops, NULL, 0444);

[Severity: Medium]
Does this code need a .set callback in the kernel_param_ops structure?

When a module parameter is passed during module load, the kernel parses the
arguments using parse_args(), which invokes parse_one() in kernel/params.c.
Because parse_one() unconditionally executes the .set callback without
verifying if it is actually defined, omitting it here leaves the callback
implicitly initialized to NULL.

Could this lead to a NULL pointer dereference and a kernel panic if someone
attempts to set this parameter during module load?

-- 
Sashiko AI review · https://sashiko.dev/#/patchset/20260524-livepatch-unload-on-fail-v1-0-7465de7f741d@suse.com?part=3

^ permalink raw reply

* Re: [PATCH 2/4] selftests: livepatch: Remove leftover modules when a testcase fails
From: sashiko-bot @ 2026-05-25  0:06 UTC (permalink / raw)
  To: Josh Poimboeuf, Joe Lawrence, Jiri Kosina, Miroslav Benes,
	Petr Mladek, Shuah Khan
  Cc: Marcos Paulo de Souza, linux-kselftest, marcos, live-patching,
	linux-kernel
In-Reply-To: <20260524-livepatch-unload-on-fail-v1-2-7465de7f741d@suse.com>

Thank you for your contribution! Sashiko AI review found 2 potential issue(s) to consider:
- [Medium] The cleanup loop iterates over the TEST_MODS array in forward order, violating dependency requirements and causing rmmod to fail.
- [Medium] Functions invoked during cleanup call die() on failure, prematurely aborting the EXIT trap and skipping pop_config().
--

commit aafb48b2a54fe2209742378f31fb96583598557a
Author: Marcos Paulo de Souza <mpdesouza@suse.com>

selftests: livepatch: Remove leftover modules when a testcase fails

This commit introduces a cleanup mechanism for livepatch selftests that
tracks loaded modules and attempts to remove them if a test fails or exits
prematurely. It uses a TEST_MODS array to keep track of dependencies and
unloads the remaining modules during an EXIT trap.

> diff --git a/tools/testing/selftests/livepatch/functions.sh b/tools/testing/selftests/livepatch/functions.sh
> index 3ec0b7962fc5c..25f1370038654 100644
> --- a/tools/testing/selftests/livepatch/functions.sh
> +++ b/tools/testing/selftests/livepatch/functions.sh
[ ... ]
> @@ -125,6 +127,14 @@ function set_ftrace_enabled() {
>  }
>  
>  function cleanup() {
> +	# Remove leftover modules in reverse order to handle dependencies
> +	for mod_item in "${TEST_MODS[@]}"; do

[Severity: Medium]
Does this loop actually iterate in reverse order?

The comment states the modules will be removed in reverse order to handle
dependencies, but ${TEST_MODS[@]} expands the array in ascending index
order.

Because dependent modules (like livepatches) are loaded after their base
modules, iterating in forward order will attempt to unload the base modules
first. This would likely cause rmmod to fail with a module in use error.

> +		if is_livepatch_mod "$mod_item"; then
> +			disable_lp "$mod_item"
> +		fi
> +		_remove_mod "$mod_item"

[Severity: Medium]
Can this call prematurely abort the cleanup process?

The cleanup() function is executed within an EXIT trap. If _remove_mod()
fails to unload a module, it calls die() which executes an exit command:

tools/testing/selftests/livepatch/functions.sh:_remove_mod() {
    ...
    die "failed to unload module $mod (/sys/module)"
}

Executing exit inside a bash EXIT trap immediately halts the execution of
the trap itself. This would abruptly skip the remaining iterations of the
module cleanup loop and skip the subsequent pop_config() call.

Failing to execute pop_config() leaves the global kernel debug and tracing
configurations modified, which could pollute the state of the test system.

> +	done
> +
>  	pop_config
>  }

-- 
Sashiko AI review · https://sashiko.dev/#/patchset/20260524-livepatch-unload-on-fail-v1-0-7465de7f741d@suse.com?part=2

^ permalink raw reply

* Re: [PATCH 1/4] selftests: livepatch: Introduce _remove_mod function
From: sashiko-bot @ 2026-05-24 23:59 UTC (permalink / raw)
  To: Jiri Kosina, Petr Mladek, Joe Lawrence, Shuah Khan,
	Miroslav Benes, Josh Poimboeuf
  Cc: linux-kernel, linux-kselftest, marcos, live-patching,
	Marcos Paulo de Souza
In-Reply-To: <20260524-livepatch-unload-on-fail-v1-1-7465de7f741d@suse.com>

Thank you for your contribution! Sashiko AI review found 1 potential issue(s) to consider:
- [Medium] `_remove_mod` uses `die()` which will terminate the script during cleanup operations.
--

commit 7b1591f4e5c3b351136e7fe2821eb2e15ae59ad0
Author: Marcos Paulo de Souza <mpdesouza@suse.com>

selftests: livepatch: Introduce _remove_mod function

This commit introduces the _remove_mod internal function to remove a loaded
module, intended to be used during cleanup when a testcase fails. It also
refactors unload_mod to use this new helper function.

> diff --git a/tools/testing/selftests/livepatch/functions.sh b/tools/testing/selftests/livepatch/functions.sh
> index 2bc50271729c6..3ec0b7962fc5c 100644
> --- a/tools/testing/selftests/livepatch/functions.sh
> +++ b/tools/testing/selftests/livepatch/functions.sh

[ ... ]

> @@ -241,9 +241,10 @@ function load_failing_mod() {
>  	log "$ret"
>  }
>  
> -# unload_mod(modname) - unload a kernel module
> +# _remove_mod(modname) - Internal function to remove a loaded module.
> +#                        Use unload_mod() instead, which also updates TEST_MODS tracking.
>  #	modname - module name to unload
> -function unload_mod() {
> +function _remove_mod() {
>  	local mod="$1"
>  
>  	# Wait for module reference count to clear ...
> @@ -261,6 +262,14 @@ function unload_mod() {
>  		die "failed to unload module $mod (/sys/module)"

[Severity: Medium]
The commit message notes this function will be used to remove loaded modules
when a testcase fails. If used in a cleanup path (such as a bash EXIT trap),
will calling die() here terminate the script prematurely?

If a module fails to unload (for example, if its refcount doesn't drop),
die() executes exit 1. Inside a trap, this aborts the rest of the
cleanup process. This could skip unloading any remaining modules and
prevent final cleanup operations like pop_config() from running, potentially
leaving kernel tracing persistently enabled and polluting the system state
for subsequent tests.

>  }

[ ... ]

-- 
Sashiko AI review · https://sashiko.dev/#/patchset/20260524-livepatch-unload-on-fail-v1-0-7465de7f741d@suse.com?part=1

^ permalink raw reply

* [PATCH 4/4] selftests: livepatch: Add information about minimum kernel support
From: Marcos Paulo de Souza @ 2026-05-24 23:50 UTC (permalink / raw)
  To: Josh Poimboeuf, Jiri Kosina, Miroslav Benes, Petr Mladek,
	Joe Lawrence, Shuah Khan
  Cc: live-patching, linux-kselftest, linux-kernel,
	Marcos Paulo de Souza, marcos
In-Reply-To: <20260524-livepatch-unload-on-fail-v1-0-7465de7f741d@suse.com>

The current livepatch selftests are compatible with kernel 4.12, so add
a note about it for future developers willing to contribute with new
tests.

Signed-off-by: Marcos Paulo de Souza <mpdesouza@suse.com>
---
 tools/testing/selftests/livepatch/README | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/testing/selftests/livepatch/README b/tools/testing/selftests/livepatch/README
index d2035dd64a2b..293f4730b927 100644
--- a/tools/testing/selftests/livepatch/README
+++ b/tools/testing/selftests/livepatch/README
@@ -54,3 +54,6 @@ check_result().  The latter function greps the kernel's ring buffer for
 those strings for result comparison.  Other utility functions include
 general module loading and livepatch loading helpers (waiting for patch
 transitions, sysfs entries, etc.)
+
+All new tests and test modules MUST be compatible with kernel version 4.12 and
+later, including current upstream kernels.

-- 
2.54.0


^ permalink raw reply related

* [PATCH 3/4] selftests: livepatch: Adapt mod_target module to pass on 4.12 kernels
From: Marcos Paulo de Souza @ 2026-05-24 23:50 UTC (permalink / raw)
  To: Josh Poimboeuf, Jiri Kosina, Miroslav Benes, Petr Mladek,
	Joe Lawrence, Shuah Khan
  Cc: live-patching, linux-kselftest, linux-kernel,
	Marcos Paulo de Souza, marcos
In-Reply-To: <20260524-livepatch-unload-on-fail-v1-0-7465de7f741d@suse.com>

Use the stable module_param_cb API instead of proc_fs for exposing module
state. This approach is compatible with kernel 4.12 and later. The end
result is the same: the module has a function that shows a string, which
is later livepatched to show a different string. The only difference is
that the file being checked is now a module parameter instead of a
procfs entry.

Signed-off-by: Marcos Paulo de Souza <mpdesouza@suse.com>
---
 tools/testing/selftests/livepatch/functions.sh     |  1 +
 .../testing/selftests/livepatch/test-livepatch.sh  | 23 +++++++++++-----------
 .../livepatch/test_modules/test_klp_mod_patch.c    | 11 +++++------
 .../livepatch/test_modules/test_klp_mod_target.c   | 22 ++++++++++-----------
 4 files changed, 27 insertions(+), 30 deletions(-)

diff --git a/tools/testing/selftests/livepatch/functions.sh b/tools/testing/selftests/livepatch/functions.sh
index 25f137003865..c8c99851c3a2 100644
--- a/tools/testing/selftests/livepatch/functions.sh
+++ b/tools/testing/selftests/livepatch/functions.sh
@@ -7,6 +7,7 @@
 MAX_RETRIES=600
 RETRY_INTERVAL=".1"	# seconds
 SYSFS_KERNEL_DIR="/sys/kernel"
+SYSFS_MODULE_DIR="/sys/module"
 SYSFS_KLP_DIR="$SYSFS_KERNEL_DIR/livepatch"
 SYSFS_DEBUG_DIR="$SYSFS_KERNEL_DIR/debug"
 SYSFS_KPROBES_DIR="$SYSFS_DEBUG_DIR/kprobes"
diff --git a/tools/testing/selftests/livepatch/test-livepatch.sh b/tools/testing/selftests/livepatch/test-livepatch.sh
index c44c5341a2f1..06aacf0f4dc7 100755
--- a/tools/testing/selftests/livepatch/test-livepatch.sh
+++ b/tools/testing/selftests/livepatch/test-livepatch.sh
@@ -198,26 +198,25 @@ livepatch: '$MOD_REPLACE': unpatching complete
 % rmmod $MOD_REPLACE"
 
 
-# - load a target module that provides /proc/test_klp_mod_target with
-#   original output
-# - load a livepatch that patches the target module's show function
-# - verify the proc entry returns livepatched output
+# - load a target module with module_param_cb get data with original output
+# - load a livepatch that patches the target module's get function
+# - verify the parameter get function returns the livepatched output
 # - disable and unload the livepatch
-# - verify the proc entry returns original output again
+# - verify the parameter get function returns the original output again
 # - unload the target module
 
 start_test "module function patching"
 
 load_mod $MOD_TARGET
 
-if [[ "$(cat /proc/$MOD_TARGET)" != "$MOD_TARGET: original output" ]] ; then
+if [[ "$(cat $SYSFS_MODULE_DIR/$MOD_TARGET/parameters/klp_mod_arg)" != "$MOD_TARGET: original output" ]] ; then
 	echo -e "FAIL\n\n"
 	die "livepatch kselftest(s) failed"
 fi
 
 load_lp $MOD_TARGET_PATCH
 
-if [[ "$(cat /proc/$MOD_TARGET)" != "$MOD_TARGET_PATCH: this has been live patched" ]] ; then
+if [[ "$(cat $SYSFS_MODULE_DIR/$MOD_TARGET/parameters/klp_mod_arg)" != "$MOD_TARGET_PATCH: this has been live patched" ]] ; then
 	echo -e "FAIL\n\n"
 	die "livepatch kselftest(s) failed"
 fi
@@ -225,7 +224,7 @@ fi
 disable_lp $MOD_TARGET_PATCH
 unload_lp $MOD_TARGET_PATCH
 
-if [[ "$(cat /proc/$MOD_TARGET)" != "$MOD_TARGET: original output" ]] ; then
+if [[ "$(cat $SYSFS_MODULE_DIR/$MOD_TARGET/parameters/klp_mod_arg)" != "$MOD_TARGET: original output" ]] ; then
 	echo -e "FAIL\n\n"
 	die "livepatch kselftest(s) failed"
 fi
@@ -252,9 +251,9 @@ $MOD_TARGET: test_klp_mod_target_exit"
 
 # - load a livepatch that targets a not-yet-loaded module
 # - load the target module: klp_module_coming patches it immediately
-# - verify the proc entry returns livepatched output
+# - verify the parameter get function returns the livepatched output
 # - disable and unload the livepatch
-# - verify the proc entry returns original output again
+# - verify the parameter get function returns the original output again
 # - unload the target module
 
 start_test "module function patching (livepatch first)"
@@ -262,7 +261,7 @@ start_test "module function patching (livepatch first)"
 load_lp $MOD_TARGET_PATCH
 load_mod $MOD_TARGET
 
-if [[ "$(cat /proc/$MOD_TARGET)" != "$MOD_TARGET_PATCH: this has been live patched" ]] ; then
+if [[ "$(cat $SYSFS_MODULE_DIR/$MOD_TARGET/parameters/klp_mod_arg)" != "$MOD_TARGET_PATCH: this has been live patched" ]] ; then
 	echo -e "FAIL\n\n"
 	die "livepatch kselftest(s) failed"
 fi
@@ -270,7 +269,7 @@ fi
 disable_lp $MOD_TARGET_PATCH
 unload_lp $MOD_TARGET_PATCH
 
-if [[ "$(cat /proc/$MOD_TARGET)" != "$MOD_TARGET: original output" ]] ; then
+if [[ "$(cat $SYSFS_MODULE_DIR/$MOD_TARGET/parameters/klp_mod_arg)" != "$MOD_TARGET: original output" ]] ; then
 	echo -e "FAIL\n\n"
 	die "livepatch kselftest(s) failed"
 fi
diff --git a/tools/testing/selftests/livepatch/test_modules/test_klp_mod_patch.c b/tools/testing/selftests/livepatch/test_modules/test_klp_mod_patch.c
index 6725b4720365..ab56e57c02a8 100644
--- a/tools/testing/selftests/livepatch/test_modules/test_klp_mod_patch.c
+++ b/tools/testing/selftests/livepatch/test_modules/test_klp_mod_patch.c
@@ -8,17 +8,16 @@
 #include <linux/livepatch.h>
 #include <linux/seq_file.h>
 
-static int livepatch_mod_target_show(struct seq_file *m, void *v)
+static int livepatch_mod_target_get(char *buffer, const struct kernel_param *kp)
 {
-	seq_printf(m, "%s: %s\n", THIS_MODULE->name,
-		   "this has been live patched");
-	return 0;
+	return sprintf(buffer, "%s: %s\n", THIS_MODULE->name,
+		       "this has been live patched");
 }
 
 static struct klp_func funcs[] = {
 	{
-		.old_name = "test_klp_mod_target_show",
-		.new_func = livepatch_mod_target_show,
+		.old_name = "test_klp_mod_target_get",
+		.new_func = livepatch_mod_target_get,
 	},
 	{},
 };
diff --git a/tools/testing/selftests/livepatch/test_modules/test_klp_mod_target.c b/tools/testing/selftests/livepatch/test_modules/test_klp_mod_target.c
index 9643984d2402..1382a266825f 100644
--- a/tools/testing/selftests/livepatch/test_modules/test_klp_mod_target.c
+++ b/tools/testing/selftests/livepatch/test_modules/test_klp_mod_target.c
@@ -5,31 +5,29 @@
 
 #include <linux/module.h>
 #include <linux/kernel.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
+#include <linux/moduleparam.h>
 
-static struct proc_dir_entry *pde;
-
-static noinline int test_klp_mod_target_show(struct seq_file *m, void *v)
+static noinline int test_klp_mod_target_get(char *buffer, const struct kernel_param *kp)
 {
-	seq_printf(m, "%s: %s\n", THIS_MODULE->name, "original output");
-	return 0;
+	return sprintf(buffer, "%s: %s\n", THIS_MODULE->name, "original output");
 }
 
+static const struct kernel_param_ops test_klp_mod_target_ops = {
+	.get = test_klp_mod_target_get,
+};
+
+module_param_cb(klp_mod_arg, &test_klp_mod_target_ops, NULL, 0444);
+MODULE_PARM_DESC(klp_mod_arg, "The value of this argument will be livepatched");
+
 static int test_klp_mod_target_init(void)
 {
 	pr_info("%s\n", __func__);
-	pde = proc_create_single("test_klp_mod_target", 0, NULL,
-				 test_klp_mod_target_show);
-	if (!pde)
-		return -ENOMEM;
 	return 0;
 }
 
 static void test_klp_mod_target_exit(void)
 {
 	pr_info("%s\n", __func__);
-	proc_remove(pde);
 }
 
 module_init(test_klp_mod_target_init);

-- 
2.54.0


^ permalink raw reply related

* [PATCH 2/4] selftests: livepatch: Remove leftover modules when a testcase fails
From: Marcos Paulo de Souza @ 2026-05-24 23:50 UTC (permalink / raw)
  To: Josh Poimboeuf, Jiri Kosina, Miroslav Benes, Petr Mladek,
	Joe Lawrence, Shuah Khan
  Cc: live-patching, linux-kselftest, linux-kernel,
	Marcos Paulo de Souza, marcos
In-Reply-To: <20260524-livepatch-unload-on-fail-v1-0-7465de7f741d@suse.com>

The current livepatch selftest scripts load modules, run tests and
unloads them. If the test fails, it can leave loaded modules behind, and
in some cases making it impossible to run the next tests.

This approach tracks down the loaded modules, and in case of a test
failure, or premature exit of the script, the cleanup function will
be called by the trap installed on setup_config function.

The cleanup function iterates over the list of leftover loaded modules,
unloading them. The function also checks if the given module is a
livepatch, properly disabling it before unloading.

Signed-off-by: Marcos Paulo de Souza <mpdesouza@suse.com>
---
 tools/testing/selftests/livepatch/functions.sh | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/livepatch/functions.sh b/tools/testing/selftests/livepatch/functions.sh
index 3ec0b7962fc5..25f137003865 100644
--- a/tools/testing/selftests/livepatch/functions.sh
+++ b/tools/testing/selftests/livepatch/functions.sh
@@ -15,6 +15,8 @@ if [[ -e /sys/kernel/tracing/trace ]]; then
 else
 	SYSFS_TRACING_DIR="$SYSFS_DEBUG_DIR/tracing"
 fi
+# List of loaded modules used in tests
+TEST_MODS=()
 
 # Kselftest framework requirement - SKIP code is 4
 ksft_skip=4
@@ -125,6 +127,14 @@ function set_ftrace_enabled() {
 }
 
 function cleanup() {
+	# Remove leftover modules in reverse order to handle dependencies
+	for mod_item in "${TEST_MODS[@]}"; do
+		if is_livepatch_mod "$mod_item"; then
+			disable_lp "$mod_item"
+		fi
+		_remove_mod "$mod_item"
+	done
+
 	pop_config
 }
 
@@ -181,6 +191,9 @@ function __load_mod() {
 	# Wait for module in sysfs ...
 	loop_until '[[ -e "/sys/module/$mod" ]]' ||
 		die "failed to load module $mod"
+
+	# Store the module in the modules list
+	TEST_MODS+=("$mod")
 }
 
 
@@ -262,12 +275,20 @@ function _remove_mod() {
 		die "failed to unload module $mod (/sys/module)"
 }
 
-# unload_mod(modname) - unload a kernel module
+# unload_mod(modname) - unload a kernel module and remove it from TEST_MODS
 #	modname - module name to unload
 function unload_mod() {
 	local mod="$1"
 
 	_remove_mod "$mod"
+
+	# Remove from TEST_MODS array
+	for i in "${!TEST_MODS[@]}"; do
+		if [[ "${TEST_MODS[$i]}" == "$mod" ]]; then
+			unset 'TEST_MODS[$i]'
+			break
+		fi
+	done
 }
 
 # unload_lp(modname) - unload a kernel module with a livepatch

-- 
2.54.0


^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox