* [PATCHv2 01/11] uprobes/x86: Use proper mm_struct in __in_uprobe_trampoline
2026-05-18 10:59 [PATCHv2 00/11] uprobes/x86: Fix red zone issue for optimized uprobes Jiri Olsa
@ 2026-05-18 10:59 ` Jiri Olsa
2026-05-18 10:59 ` [PATCHv2 02/11] uprobes/x86: Allow to copy uprobe trampolines on fork Jiri Olsa
` (9 subsequent siblings)
10 siblings, 0 replies; 18+ messages in thread
From: Jiri Olsa @ 2026-05-18 10:59 UTC (permalink / raw)
To: Oleg Nesterov, Peter Zijlstra, Ingo Molnar, Masami Hiramatsu,
Andrii Nakryiko
Cc: bpf, linux-trace-kernel
In the unregister path we use __in_uprobe_trampoline check with
current->mm for the VMA lookup, which is wrong, because we are
in the tracer context, not the traced process.
Add mm_struct pointer argument to __in_uprobe_trampoline and
changing related callers to pass proper mm_struct pointer.
Fixes: ba2bfc97b462 ("uprobes/x86: Add support to optimize uprobes")
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
arch/x86/kernel/uprobes.c | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index ebb1baf1eb1d..2be6707e3320 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -761,9 +761,9 @@ void arch_uprobe_clear_state(struct mm_struct *mm)
destroy_uprobe_trampoline(tramp);
}
-static bool __in_uprobe_trampoline(unsigned long ip)
+static bool __in_uprobe_trampoline(struct mm_struct *mm, unsigned long ip)
{
- struct vm_area_struct *vma = vma_lookup(current->mm, ip);
+ struct vm_area_struct *vma = vma_lookup(mm, ip);
return vma && vma_is_special_mapping(vma, &tramp_mapping);
}
@@ -776,14 +776,14 @@ static bool in_uprobe_trampoline(unsigned long ip)
rcu_read_lock();
if (mmap_lock_speculate_try_begin(mm, &seq)) {
- found = __in_uprobe_trampoline(ip);
+ found = __in_uprobe_trampoline(mm, ip);
retry = mmap_lock_speculate_retry(mm, seq);
}
rcu_read_unlock();
if (retry) {
mmap_read_lock(mm);
- found = __in_uprobe_trampoline(ip);
+ found = __in_uprobe_trampoline(mm, ip);
mmap_read_unlock(mm);
}
return found;
@@ -1044,7 +1044,7 @@ static int copy_from_vaddr(struct mm_struct *mm, unsigned long vaddr, void *dst,
return 0;
}
-static bool __is_optimized(uprobe_opcode_t *insn, unsigned long vaddr)
+static bool __is_optimized(struct mm_struct *mm, uprobe_opcode_t *insn, unsigned long vaddr)
{
struct __packed __arch_relative_insn {
u8 op;
@@ -1053,7 +1053,7 @@ static bool __is_optimized(uprobe_opcode_t *insn, unsigned long vaddr)
if (!is_call_insn(insn))
return false;
- return __in_uprobe_trampoline(vaddr + 5 + call->raddr);
+ return __in_uprobe_trampoline(mm, vaddr + 5 + call->raddr);
}
static int is_optimized(struct mm_struct *mm, unsigned long vaddr)
@@ -1064,7 +1064,7 @@ static int is_optimized(struct mm_struct *mm, unsigned long vaddr)
err = copy_from_vaddr(mm, vaddr, &insn, 5);
if (err)
return err;
- return __is_optimized((uprobe_opcode_t *)&insn, vaddr);
+ return __is_optimized(mm, (uprobe_opcode_t *)&insn, vaddr);
}
static bool should_optimize(struct arch_uprobe *auprobe)
--
2.53.0
^ permalink raw reply related [flat|nested] 18+ messages in thread* [PATCHv2 02/11] uprobes/x86: Allow to copy uprobe trampolines on fork
2026-05-18 10:59 [PATCHv2 00/11] uprobes/x86: Fix red zone issue for optimized uprobes Jiri Olsa
2026-05-18 10:59 ` [PATCHv2 01/11] uprobes/x86: Use proper mm_struct in __in_uprobe_trampoline Jiri Olsa
@ 2026-05-18 10:59 ` Jiri Olsa
2026-05-18 10:59 ` [PATCHv2 03/11] uprobes/x86: Move optimized uprobe from nop5 to nop10 Jiri Olsa
` (8 subsequent siblings)
10 siblings, 0 replies; 18+ messages in thread
From: Jiri Olsa @ 2026-05-18 10:59 UTC (permalink / raw)
To: Oleg Nesterov, Peter Zijlstra, Ingo Molnar, Masami Hiramatsu,
Andrii Nakryiko
Cc: bpf, linux-trace-kernel
When we do fork or clone without CLONE_VM the new process won't
have uprobe trampoline vma objects and at the same time it will
have optimized code calling the trampolines.
Fixing this by allowing vma uprobe trampoline objects to be copied
on fork to the new process.
Fixes: ba2bfc97b462 ("uprobes/x86: Add support to optimize uprobes")
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
arch/x86/kernel/uprobes.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 2be6707e3320..37faf038be33 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -702,7 +702,7 @@ static struct uprobe_trampoline *create_uprobe_trampoline(unsigned long vaddr)
tramp->vaddr = vaddr;
vma = _install_special_mapping(mm, tramp->vaddr, PAGE_SIZE,
- VM_READ|VM_EXEC|VM_MAYEXEC|VM_MAYREAD|VM_DONTCOPY|VM_IO,
+ VM_READ|VM_EXEC|VM_MAYEXEC|VM_MAYREAD|VM_IO,
&tramp_mapping);
if (IS_ERR(vma)) {
kfree(tramp);
--
2.53.0
^ permalink raw reply related [flat|nested] 18+ messages in thread* [PATCHv2 03/11] uprobes/x86: Move optimized uprobe from nop5 to nop10
2026-05-18 10:59 [PATCHv2 00/11] uprobes/x86: Fix red zone issue for optimized uprobes Jiri Olsa
2026-05-18 10:59 ` [PATCHv2 01/11] uprobes/x86: Use proper mm_struct in __in_uprobe_trampoline Jiri Olsa
2026-05-18 10:59 ` [PATCHv2 02/11] uprobes/x86: Allow to copy uprobe trampolines on fork Jiri Olsa
@ 2026-05-18 10:59 ` Jiri Olsa
2026-05-18 11:50 ` bot+bpf-ci
2026-05-18 10:59 ` [PATCHv2 04/11] libbpf: Change has_nop_combo to work on top of nop10 Jiri Olsa
` (7 subsequent siblings)
10 siblings, 1 reply; 18+ messages in thread
From: Jiri Olsa @ 2026-05-18 10:59 UTC (permalink / raw)
To: Oleg Nesterov, Peter Zijlstra, Ingo Molnar, Masami Hiramatsu,
Andrii Nakryiko
Cc: bpf, linux-trace-kernel
Andrii reported an issue with optimized uprobes [1] that can clobber
redzone area with call instruction storing return address on stack
where user code may keep temporary data without adjusting rsp.
Fixing this by moving the optimized uprobes on top of 10-bytes nop
instruction, so we can squeeze another instruction to escape the
redzone area before doing the call, like:
lea -0x80(%rsp), %rsp
call tramp
Note the lea instruction is used to adjust the rsp register without
changing the flags.
The unoptimize path is bit tricky, because we can't change back to nop10
instruction, because we could have some thread already inside lea instruction.
Instead we change it to 'jmp rel8' jump instruction to end of the 10-byte
slot. The `jmp rel8' is also added as another instruction that allows
optimized uprobe in can_optimize function.
The optimized uprobe performance stays the same:
uprobe-nop : 3.129 ± 0.013M/s
uprobe-push : 3.045 ± 0.006M/s
uprobe-ret : 1.095 ± 0.004M/s
--> uprobe-nop10 : 7.170 ± 0.020M/s
uretprobe-nop : 2.143 ± 0.021M/s
uretprobe-push : 2.090 ± 0.000M/s
uretprobe-ret : 0.942 ± 0.000M/s
--> uretprobe-nop10: 3.381 ± 0.003M/s
usdt-nop : 3.245 ± 0.004M/s
--> usdt-nop10 : 7.256 ± 0.023M/s
[1] https://lore.kernel.org/bpf/20260509003146.976844-1-andrii@kernel.org/
Reported-by: Andrii Nakryiko <andrii@kernel.org>
Closes: https://lore.kernel.org/bpf/20260509003146.976844-1-andrii@kernel.org/
Fixes: ba2bfc97b462 ("uprobes/x86: Add support to optimize uprobes")
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
arch/x86/kernel/uprobes.c | 130 ++++++++++++++++++++++++++------------
1 file changed, 89 insertions(+), 41 deletions(-)
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 37faf038be33..e0067d1b6242 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -636,9 +636,26 @@ struct uprobe_trampoline {
unsigned long vaddr;
};
+#define LEA_INSN_SIZE 5
+#define OPT_INSN_SIZE (LEA_INSN_SIZE + CALL_INSN_SIZE)
+#define OPT_JMP8_OFFSET (OPT_INSN_SIZE - JMP8_INSN_SIZE)
+#define REDZONE_SIZE 0x80
+
+static const u8 lea_rsp[] = { 0x48, 0x8d, 0x64, 0x24, 0x80 };
+
+static bool is_opt_insns(const uprobe_opcode_t *insn)
+{
+ static const u8 opt_insns[] = {
+ 0x48, 0x8d, 0x64, 0x24, REDZONE_SIZE, /* lea -0x80(%rsp), %rsp */
+ CALL_INSN_OPCODE
+ };
+
+ return !memcmp(insn, opt_insns, ARRAY_SIZE(opt_insns));
+}
+
static bool is_reachable_by_call(unsigned long vtramp, unsigned long vaddr)
{
- long delta = (long)(vaddr + 5 - vtramp);
+ long delta = (long)(vaddr + OPT_INSN_SIZE - vtramp);
return delta >= INT_MIN && delta <= INT_MAX;
}
@@ -651,7 +668,7 @@ static unsigned long find_nearest_trampoline(unsigned long vaddr)
};
unsigned long low_limit, high_limit;
unsigned long low_tramp, high_tramp;
- unsigned long call_end = vaddr + 5;
+ unsigned long call_end = vaddr + OPT_INSN_SIZE;
if (check_add_overflow(call_end, INT_MIN, &low_limit))
low_limit = PAGE_SIZE;
@@ -810,7 +827,7 @@ SYSCALL_DEFINE0(uprobe)
/* Allow execution only from uprobe trampolines. */
if (!in_uprobe_trampoline(regs->ip))
- return -ENXIO;
+ return -EPROTO;
err = copy_from_user(&args, (void __user *)regs->sp, sizeof(args));
if (err)
@@ -826,8 +843,8 @@ SYSCALL_DEFINE0(uprobe)
regs->ax = args.ax;
regs->r11 = args.r11;
regs->cx = args.cx;
- regs->ip = args.retaddr - 5;
- regs->sp += sizeof(args);
+ regs->ip = args.retaddr - OPT_INSN_SIZE;
+ regs->sp += sizeof(args) + REDZONE_SIZE;
regs->orig_ax = -1;
sp = regs->sp;
@@ -844,12 +861,12 @@ SYSCALL_DEFINE0(uprobe)
*/
if (regs->sp != sp) {
/* skip the trampoline call */
- if (args.retaddr - 5 == regs->ip)
- regs->ip += 5;
+ if (args.retaddr - OPT_INSN_SIZE == regs->ip)
+ regs->ip += OPT_INSN_SIZE;
return regs->ax;
}
- regs->sp -= sizeof(args);
+ regs->sp -= sizeof(args) + REDZONE_SIZE;
/* for the case uprobe_consumer has changed ax/r11/cx */
args.ax = regs->ax;
@@ -857,7 +874,7 @@ SYSCALL_DEFINE0(uprobe)
args.cx = regs->cx;
/* keep return address unless we are instructed otherwise */
- if (args.retaddr - 5 != regs->ip)
+ if (args.retaddr - OPT_INSN_SIZE != regs->ip)
args.retaddr = regs->ip;
if (shstk_push(args.retaddr) == -EFAULT)
@@ -891,7 +908,7 @@ asm (
"pop %rax\n"
"pop %r11\n"
"pop %rcx\n"
- "ret\n"
+ "ret $" __stringify(REDZONE_SIZE) "\n"
"int3\n"
".balign " __stringify(PAGE_SIZE) "\n"
".popsection\n"
@@ -909,7 +926,7 @@ late_initcall(arch_uprobes_init);
enum {
EXPECT_SWBP,
- EXPECT_CALL,
+ EXPECT_OPTIMIZED,
};
struct write_opcode_ctx {
@@ -917,11 +934,6 @@ struct write_opcode_ctx {
int expect;
};
-static int is_call_insn(uprobe_opcode_t *insn)
-{
- return *insn == CALL_INSN_OPCODE;
-}
-
/*
* Verification callback used by int3_update uprobe_write calls to make sure
* the underlying instruction is as expected - either int3 or call.
@@ -930,17 +942,17 @@ static int verify_insn(struct page *page, unsigned long vaddr, uprobe_opcode_t *
int nbytes, void *data)
{
struct write_opcode_ctx *ctx = data;
- uprobe_opcode_t old_opcode[5];
+ uprobe_opcode_t old_opcode[OPT_INSN_SIZE];
- uprobe_copy_from_page(page, ctx->base, (uprobe_opcode_t *) &old_opcode, 5);
+ uprobe_copy_from_page(page, ctx->base, old_opcode, OPT_INSN_SIZE);
switch (ctx->expect) {
case EXPECT_SWBP:
if (is_swbp_insn(&old_opcode[0]))
return 1;
break;
- case EXPECT_CALL:
- if (is_call_insn(&old_opcode[0]))
+ case EXPECT_OPTIMIZED:
+ if (is_opt_insns(&old_opcode[0]))
return 1;
break;
}
@@ -963,7 +975,7 @@ static int verify_insn(struct page *page, unsigned long vaddr, uprobe_opcode_t *
* - SMP sync all CPUs
*/
static int int3_update(struct arch_uprobe *auprobe, struct vm_area_struct *vma,
- unsigned long vaddr, char *insn, bool optimize)
+ unsigned long vaddr, char *insn, int size, bool optimize)
{
uprobe_opcode_t int3 = UPROBE_SWBP_INSN;
struct write_opcode_ctx ctx = {
@@ -978,7 +990,7 @@ static int int3_update(struct arch_uprobe *auprobe, struct vm_area_struct *vma,
* so we can skip this step for optimize == true.
*/
if (!optimize) {
- ctx.expect = EXPECT_CALL;
+ ctx.expect = EXPECT_OPTIMIZED;
err = uprobe_write(auprobe, vma, vaddr, &int3, 1, verify_insn,
true /* is_register */, false /* do_update_ref_ctr */,
&ctx);
@@ -990,7 +1002,7 @@ static int int3_update(struct arch_uprobe *auprobe, struct vm_area_struct *vma,
/* Write all but the first byte of the patched range. */
ctx.expect = EXPECT_SWBP;
- err = uprobe_write(auprobe, vma, vaddr + 1, insn + 1, 4, verify_insn,
+ err = uprobe_write(auprobe, vma, vaddr + 1, insn + 1, size - 1, verify_insn,
true /* is_register */, false /* do_update_ref_ctr */,
&ctx);
if (err)
@@ -1017,17 +1029,32 @@ static int int3_update(struct arch_uprobe *auprobe, struct vm_area_struct *vma,
static int swbp_optimize(struct arch_uprobe *auprobe, struct vm_area_struct *vma,
unsigned long vaddr, unsigned long tramp)
{
- u8 call[5];
+ u8 insn[OPT_INSN_SIZE], *call = &insn[LEA_INSN_SIZE];
- __text_gen_insn(call, CALL_INSN_OPCODE, (const void *) vaddr,
+ /*
+ * We have nop10 instruction (with first byte overwritten to int3),
+ * changing it to:
+ * lea -0x80(%rsp), %rsp
+ * call tramp
+ */
+ memcpy(insn, lea_rsp, LEA_INSN_SIZE);
+ __text_gen_insn(call, CALL_INSN_OPCODE,
+ (const void *) (vaddr + LEA_INSN_SIZE),
(const void *) tramp, CALL_INSN_SIZE);
- return int3_update(auprobe, vma, vaddr, call, true /* optimize */);
+ return int3_update(auprobe, vma, vaddr, insn, OPT_INSN_SIZE, true /* optimize */);
}
static int swbp_unoptimize(struct arch_uprobe *auprobe, struct vm_area_struct *vma,
unsigned long vaddr)
{
- return int3_update(auprobe, vma, vaddr, auprobe->insn, false /* optimize */);
+ /*
+ * We have optimized nop10 (lea, call), changing it to 'jmp rel8' to
+ * end of the 10-byte slot instead of restoring the original nop10,
+ * because we could have thread already inside lea instruction.
+ */
+ u8 jmp[OPT_INSN_SIZE] = { JMP8_INSN_OPCODE, OPT_JMP8_OFFSET };
+
+ return int3_update(auprobe, vma, vaddr, jmp, JMP8_INSN_SIZE, false /* optimize */);
}
static int copy_from_vaddr(struct mm_struct *mm, unsigned long vaddr, void *dst, int len)
@@ -1049,19 +1076,19 @@ static bool __is_optimized(struct mm_struct *mm, uprobe_opcode_t *insn, unsigned
struct __packed __arch_relative_insn {
u8 op;
s32 raddr;
- } *call = (struct __arch_relative_insn *) insn;
+ } *call = (struct __arch_relative_insn *)(insn + LEA_INSN_SIZE);
- if (!is_call_insn(insn))
+ if (!is_opt_insns(insn))
return false;
- return __in_uprobe_trampoline(mm, vaddr + 5 + call->raddr);
+ return __in_uprobe_trampoline(mm, vaddr + OPT_INSN_SIZE + call->raddr);
}
static int is_optimized(struct mm_struct *mm, unsigned long vaddr)
{
- uprobe_opcode_t insn[5];
+ uprobe_opcode_t insn[OPT_INSN_SIZE];
int err;
- err = copy_from_vaddr(mm, vaddr, &insn, 5);
+ err = copy_from_vaddr(mm, vaddr, &insn, OPT_INSN_SIZE);
if (err)
return err;
return __is_optimized(mm, (uprobe_opcode_t *)&insn, vaddr);
@@ -1095,14 +1122,25 @@ int set_orig_insn(struct arch_uprobe *auprobe, struct vm_area_struct *vma,
unsigned long vaddr)
{
if (test_bit(ARCH_UPROBE_FLAG_CAN_OPTIMIZE, &auprobe->flags)) {
- int ret = is_optimized(vma->vm_mm, vaddr);
- if (ret < 0)
+ uprobe_opcode_t insn[OPT_INSN_SIZE];
+ int ret;
+
+ ret = copy_from_vaddr(vma->vm_mm, vaddr, &insn, OPT_INSN_SIZE);
+ if (ret)
return ret;
- if (ret) {
+ if (__is_optimized(vma->vm_mm, (uprobe_opcode_t *)&insn, vaddr)) {
ret = swbp_unoptimize(auprobe, vma, vaddr);
WARN_ON_ONCE(ret);
return ret;
}
+ /*
+ * We can have re-attached probe on top of jmp8 instruction,
+ * which did not get optimized. We need to restore the jmp8
+ * instruction, instead of the original instruction (nop10).
+ */
+ if (is_swbp_insn(&insn[0]) && insn[1] == OPT_JMP8_OFFSET)
+ return uprobe_write_opcode(auprobe, vma, vaddr, JMP8_INSN_OPCODE,
+ false /* is_register */);
}
return uprobe_write_opcode(auprobe, vma, vaddr, *(uprobe_opcode_t *)&auprobe->insn,
false /* is_register */);
@@ -1131,7 +1169,7 @@ static int __arch_uprobe_optimize(struct arch_uprobe *auprobe, struct mm_struct
void arch_uprobe_optimize(struct arch_uprobe *auprobe, unsigned long vaddr)
{
struct mm_struct *mm = current->mm;
- uprobe_opcode_t insn[5];
+ uprobe_opcode_t insn[OPT_INSN_SIZE];
if (!should_optimize(auprobe))
return;
@@ -1142,7 +1180,7 @@ void arch_uprobe_optimize(struct arch_uprobe *auprobe, unsigned long vaddr)
* Check if some other thread already optimized the uprobe for us,
* if it's the case just go away silently.
*/
- if (copy_from_vaddr(mm, vaddr, &insn, 5))
+ if (copy_from_vaddr(mm, vaddr, &insn, OPT_INSN_SIZE))
goto unlock;
if (!is_swbp_insn((uprobe_opcode_t*) &insn))
goto unlock;
@@ -1160,14 +1198,24 @@ void arch_uprobe_optimize(struct arch_uprobe *auprobe, unsigned long vaddr)
static bool can_optimize(struct insn *insn, unsigned long vaddr)
{
- if (!insn->x86_64 || insn->length != 5)
+ if (!insn->x86_64)
return false;
- if (!insn_is_nop(insn))
+ /* We can't do cross page atomic writes yet. */
+ if (PAGE_SIZE - (vaddr & ~PAGE_MASK) < OPT_INSN_SIZE)
return false;
- /* We can't do cross page atomic writes yet. */
- return PAGE_SIZE - (vaddr & ~PAGE_MASK) >= 5;
+ /* We can optimize on top of nop10.. */
+ if (insn->length == OPT_INSN_SIZE && insn_is_nop(insn))
+ return true;
+
+ /* .. and JMP rel8 to end of slot — check swbp_unoptimize. */
+ if (insn->length == 2 &&
+ insn->opcode.bytes[0] == JMP8_INSN_OPCODE &&
+ insn->immediate.value == OPT_JMP8_OFFSET)
+ return true;
+
+ return false;
}
#else /* 32-bit: */
/*
--
2.53.0
^ permalink raw reply related [flat|nested] 18+ messages in thread* Re: [PATCHv2 03/11] uprobes/x86: Move optimized uprobe from nop5 to nop10
2026-05-18 10:59 ` [PATCHv2 03/11] uprobes/x86: Move optimized uprobe from nop5 to nop10 Jiri Olsa
@ 2026-05-18 11:50 ` bot+bpf-ci
0 siblings, 0 replies; 18+ messages in thread
From: bot+bpf-ci @ 2026-05-18 11:50 UTC (permalink / raw)
To: jolsa, oleg, peterz, mingo, mhiramat, andrii
Cc: bpf, linux-trace-kernel, ast, andrii, daniel, martin.lau, eddyz87,
yonghong.song, clm, ihor.solodrai
[-- Attachment #1: Type: text/plain, Size: 7654 bytes --]
> diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
> --- a/arch/x86/kernel/uprobes.c
> +++ b/arch/x86/kernel/uprobes.c
[ ... ]
> @@ -963,7 +975,7 @@ static int verify_insn(struct page *page, unsigned long vaddr, uprobe_opcode_t *new_opcode,
> * - SMP sync all CPUs
> */
> static int int3_update(struct arch_uprobe *auprobe, struct vm_area_struct *vma,
> - unsigned long vaddr, char *insn, bool optimize)
> + unsigned long vaddr, char *insn, int size, bool optimize)
> {
> uprobe_opcode_t int3 = UPROBE_SWBP_INSN;
> struct write_opcode_ctx ctx = {
> @@ -978,7 +990,7 @@ struct write_opcode_ctx ctx = {
> * so we can skip this step for optimize == true.
> */
> if (!optimize) {
> - ctx.expect = EXPECT_CALL;
> + ctx.expect = EXPECT_OPTIMIZED;
> err = uprobe_write(auprobe, vma, vaddr, &int3, 1, verify_insn,
> true /* is_register */, false /* do_update_ref_ctr */,
> &ctx);
> @@ -990,7 +1002,7 @@ struct write_opcode_ctx ctx = {
>
> /* Write all but the first byte of the patched range. */
> ctx.expect = EXPECT_SWBP;
> - err = uprobe_write(auprobe, vma, vaddr + 1, insn + 1, 4, verify_insn,
> + err = uprobe_write(auprobe, vma, vaddr + 1, insn + 1, size - 1, verify_insn,
When re-optimizing from JMP8 back to LEA+CALL, does the CALL instruction at
offset 5 get rewritten without INT3 protection?
The int3_update() sequence writes INT3 at offset 0 during unoptimize
(!optimize path), then later writes bytes 1..9 while verifying offset 0
stays INT3. During re-optimization (optimize == true), the code skips the
initial INT3 write and directly writes bytes 1..9.
Consider this sequence when re-optimizing a previously unoptimized uprobe
that is currently 'jmp rel8' back to 'lea; call':
Step 7: Current state is 0xCC 0x08 ... (INT3 followed by JMP8 offset)
Step 8: Write bytes 1..9, transforming to 0xCC 0x8d 0x64 0x24 0x80 0xE8 ...
The CALL opcode (0xE8) at offset 5 and its 4-byte displacement
get written without an INT3 protecting that location.
A concurrent thread executing at vaddr could observe a partially-written CALL
instruction during the transition. Peter Zijlstra suggested placing INT3 at
both offset 0 and offset 5 to protect both the LEA and CALL instructions
during modification:
Reference: https://lore.kernel.org/bpf/20260518104306.GU3102624@noisy.programming.kicks-ass.net/
"Note that from step 7 to step 8, you re-write the CALL instruction
without going through INT3. This means it is entirely possible for a
concurrent execution to observe a composite instruction. This is NOT
sound! However, I think it can be salvaged, if instead of only writing
INT3 at +0, you also write INT3 at +5."
[ ... ]
> @@ -1095,14 +1122,25 @@ int set_orig_insn(struct arch_uprobe *auprobe, struct vm_area_struct *vma,
> unsigned long vaddr)
> {
> if (test_bit(ARCH_UPROBE_FLAG_CAN_OPTIMIZE, &auprobe->flags)) {
> - int ret = is_optimized(vma->vm_mm, vaddr);
> - if (ret < 0)
> + uprobe_opcode_t insn[OPT_INSN_SIZE];
> + int ret;
> +
> + ret = copy_from_vaddr(vma->vm_mm, vaddr, &insn, OPT_INSN_SIZE);
> + if (ret)
> return ret;
> - if (ret) {
> + if (__is_optimized(vma->vm_mm, (uprobe_opcode_t *)&insn, vaddr)) {
> ret = swbp_unoptimize(auprobe, vma, vaddr);
> WARN_ON_ONCE(ret);
> return ret;
> }
> + /*
> + * We can have re-attached probe on top of jmp8 instruction,
> + * which did not get optimized. We need to restore the jmp8
> + * instruction, instead of the original instruction (nop10).
> + */
> + if (is_swbp_insn(&insn[0]) && insn[1] == OPT_JMP8_OFFSET)
> + return uprobe_write_opcode(auprobe, vma, vaddr, JMP8_INSN_OPCODE,
This isn't a bug, but the multi-line if statement is missing braces:
if (is_swbp_insn(&insn[0]) && insn[1] == OPT_JMP8_OFFSET)
return uprobe_write_opcode(auprobe, vma, vaddr, JMP8_INSN_OPCODE,
false /* is_register */);
Kernel coding style requires braces on any multi-line statement.
Reference: https://lore.kernel.org/bpf/20260518104306.GU3102624@noisy.programming.kicks-ass.net/
[ ... ]
> @@ -1160,14 +1198,24 @@ void arch_uprobe_optimize(struct arch_uprobe *auprobe, unsigned long vaddr)
>
> static bool can_optimize(struct insn *insn, unsigned long vaddr)
> {
> - if (!insn->x86_64 || insn->length != 5)
> + if (!insn->x86_64)
> return false;
>
> - if (!insn_is_nop(insn))
> + /* We can't do cross page atomic writes yet. */
> + if (PAGE_SIZE - (vaddr & ~PAGE_MASK) < OPT_INSN_SIZE)
> return false;
>
> - /* We can't do cross page atomic writes yet. */
> - return PAGE_SIZE - (vaddr & ~PAGE_MASK) >= 5;
> + /* We can optimize on top of nop10.. */
> + if (insn->length == OPT_INSN_SIZE && insn_is_nop(insn))
> + return true;
> +
> + /* .. and JMP rel8 to end of slot — check swbp_unoptimize. */
> + if (insn->length == 2 &&
> + insn->opcode.bytes[0] == JMP8_INSN_OPCODE &&
> + insn->immediate.value == OPT_JMP8_OFFSET)
> + return true;
> +
> + return false;
> }
Can can_optimize() safely accept naturally occurring 'jmp +8' instructions
from user binaries?
The JMP8 branch returns true for any 2-byte 'jmp rel8' with displacement 8
(bytes 0xEB 0x08). The comment says this is for swbp_unoptimize()'s
kernel-placed jmp, but can_optimize() is called during initial analysis via
prepare_uprobe() -> arch_uprobe_analyze_insn() -> uprobe_init_insn(), before
any kernel modifications occur. The instruction bytes come directly from the
user binary through copy_insn().
If a user binary naturally contains 'EB 08' at the uprobe location (e.g., a
forward branch over an 8-byte literal pool or alignment padding), this path
would trigger:
1. uprobe_init_insn() decodes the user's 'jmp 0x08' (length=2, opcode=0xEB,
immediate=8).
2. can_optimize() matches the new JMP8 branch and returns true.
3. ARCH_UPROBE_FLAG_CAN_OPTIMIZE is set.
4. set_swbp() writes INT3 at vaddr, replacing the 0xEB.
5. When the breakpoint fires, swbp_optimize() -> int3_update() writes 10
bytes (lea -0x80(%rsp),%rsp; call tramp) over vaddr..vaddr+9.
6. verify_insn() only checks that byte 0 is INT3 (EXPECT_SWBP) before
writing bytes 1..9. It does not verify those 8 bytes are from a prior
optimization.
7. The 8 bytes at vaddr+2..vaddr+9 (original user code/data following the
jmp) get overwritten with the tail of the LEA and the CALL encoding.
8. On unregister, swbp_unoptimize() only writes 2 bytes back. Bytes
vaddr+2..vaddr+9 stay corrupted with call instruction remnants in the
COW'd page.
The corruption persists even though the original jmp would skip those bytes,
because other control flow (indirect branches, function pointers, alternative
entry points) might reach that range.
Contrast with nop10: when can_optimize returns true for a 10-byte nop
(length==OPT_INSN_SIZE && insn_is_nop), writing 10 bytes only overwrites the
nop itself. The earlier nop5 code was safe because it required insn->length
== 5 and wrote exactly 5 bytes (write size matched instruction size).
The JMP8 case breaks that invariant: write size (10 bytes) exceeds
instruction size (2 bytes), so the 8 trailing bytes are arbitrary user
content. Would a length check (requiring insn->length == OPT_INSN_SIZE for
the JMP8 case) or tracking optimization state via a flag prevent optimizing
user instructions that happen to match the JMP8 pattern?
---
AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md
CI run summary: https://github.com/kernel-patches/bpf/actions/runs/26030080109
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCHv2 04/11] libbpf: Change has_nop_combo to work on top of nop10
2026-05-18 10:59 [PATCHv2 00/11] uprobes/x86: Fix red zone issue for optimized uprobes Jiri Olsa
` (2 preceding siblings ...)
2026-05-18 10:59 ` [PATCHv2 03/11] uprobes/x86: Move optimized uprobe from nop5 to nop10 Jiri Olsa
@ 2026-05-18 10:59 ` Jiri Olsa
2026-05-18 11:37 ` bot+bpf-ci
2026-05-18 10:59 ` [PATCHv2 05/11] libbpf: Detect uprobe syscall with new error Jiri Olsa
` (6 subsequent siblings)
10 siblings, 1 reply; 18+ messages in thread
From: Jiri Olsa @ 2026-05-18 10:59 UTC (permalink / raw)
To: Oleg Nesterov, Peter Zijlstra, Ingo Molnar, Masami Hiramatsu,
Andrii Nakryiko
Cc: Jakub Sitnicki, bpf, linux-trace-kernel
We now expect nop combo with 10 bytes nop instead of 5 bytes nop,
fixing has_nop_combo to reflect that.
Fixes: 41a5c7df4466 ("libbpf: Add support to detect nop,nop5 instructions combo for usdt probe")
Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
tools/lib/bpf/usdt.c | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c
index e3710933fd52..7e62e4d5bedd 100644
--- a/tools/lib/bpf/usdt.c
+++ b/tools/lib/bpf/usdt.c
@@ -305,7 +305,7 @@ struct usdt_manager *usdt_manager_new(struct bpf_object *obj)
/*
* Detect kernel support for uprobe() syscall, it's presence means we can
- * take advantage of faster nop5 uprobe handling.
+ * take advantage of faster nop10 uprobe handling.
* Added in: 56101b69c919 ("uprobes/x86: Add uprobe syscall to speed up uprobe")
*/
man->has_uprobe_syscall = kernel_supports(obj, FEAT_UPROBE_SYSCALL);
@@ -596,14 +596,14 @@ static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note,
#if defined(__x86_64__)
static bool has_nop_combo(int fd, long off)
{
- unsigned char nop_combo[6] = {
- 0x90, 0x0f, 0x1f, 0x44, 0x00, 0x00 /* nop,nop5 */
+ unsigned char nop_combo[11] = {
+ 0x90, 0x66, 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00,
};
- unsigned char buf[6];
+ unsigned char buf[11];
- if (pread(fd, buf, 6, off) != 6)
+ if (pread(fd, buf, 11, off) != 11)
return false;
- return memcmp(buf, nop_combo, 6) == 0;
+ return memcmp(buf, nop_combo, 11) == 0;
}
#else
static bool has_nop_combo(int fd, long off)
@@ -814,8 +814,8 @@ static int collect_usdt_targets(struct usdt_manager *man, struct elf_fd *elf_fd,
memset(target, 0, sizeof(*target));
/*
- * We have uprobe syscall and usdt with nop,nop5 instructions combo,
- * so we can place the uprobe directly on nop5 (+1) and get this probe
+ * We have uprobe syscall and usdt with nop,nop10 instructions combo,
+ * so we can place the uprobe directly on nop10 (+1) and get this probe
* optimized.
*/
if (man->has_uprobe_syscall && has_nop_combo(elf_fd->fd, usdt_rel_ip)) {
--
2.53.0
^ permalink raw reply related [flat|nested] 18+ messages in thread* Re: [PATCHv2 04/11] libbpf: Change has_nop_combo to work on top of nop10
2026-05-18 10:59 ` [PATCHv2 04/11] libbpf: Change has_nop_combo to work on top of nop10 Jiri Olsa
@ 2026-05-18 11:37 ` bot+bpf-ci
0 siblings, 0 replies; 18+ messages in thread
From: bot+bpf-ci @ 2026-05-18 11:37 UTC (permalink / raw)
To: jolsa, oleg, peterz, mingo, mhiramat, andrii
Cc: jakub, bpf, linux-trace-kernel, ast, andrii, daniel, martin.lau,
eddyz87, yonghong.song, clm, ihor.solodrai
[-- Attachment #1: Type: text/plain, Size: 2138 bytes --]
> libbpf: Change has_nop_combo to work on top of nop10
>
> We now expect nop combo with 10 bytes nop instead of 5 bytes nop,
> fixing has_nop_combo to reflect that.
>
> Fixes: 41a5c7df4466 ("libbpf: Add support to detect nop,nop5 instructions combo for usdt probe")
Should the Fixes tag reference commit 5c3c59c7952a ("uprobes/x86: Move
optimized uprobe from nop5 to nop10") instead? Commit 41a5c7df4466 originally
introduced has_nop_combo() with the correct nop5 check. Commit 5c3c59c7952a
later changed the kernel to use nop10 instead of nop5 for uprobe optimization
but failed to update the libbpf has_nop_combo() function accordingly, creating
a mismatch between kernel and userspace.
> Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com>
> Signed-off-by: Jiri Olsa <jolsa@kernel.org>
>
> diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c
> index e3710933fd52..7e62e4d5bedd 100644
> --- a/tools/lib/bpf/usdt.c
> +++ b/tools/lib/bpf/usdt.c
[ ... ]
> @@ -596,14 +596,14 @@ static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note,
> #if defined(__x86_64__)
> static bool has_nop_combo(int fd, long off)
> {
> - unsigned char nop_combo[6] = {
> - 0x90, 0x0f, 0x1f, 0x44, 0x00, 0x00 /* nop,nop5 */
> + unsigned char nop_combo[11] = {
> + 0x90, 0x66, 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00,
> };
> - unsigned char buf[6];
> + unsigned char buf[11];
>
> - if (pread(fd, buf, 6, off) != 6)
> + if (pread(fd, buf, 11, off) != 11)
> return false;
> - return memcmp(buf, nop_combo, 6) == 0;
> + return memcmp(buf, nop_combo, 11) == 0;
> }
This isn't a bug, but Jakub Sitnicki suggested in v1 review to use
ARRAY_SIZE(buf) instead of repeating the scalar value 11 in multiple places.
The suggestion was included alongside the Reviewed-by tag but was not
addressed.
Reference: https://lore.kernel.org/bpf/87fr3t7wpu.fsf@cloudflare.com/
[ ... ]
---
AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md
CI run summary: https://github.com/kernel-patches/bpf/actions/runs/26030080109
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCHv2 05/11] libbpf: Detect uprobe syscall with new error
2026-05-18 10:59 [PATCHv2 00/11] uprobes/x86: Fix red zone issue for optimized uprobes Jiri Olsa
` (3 preceding siblings ...)
2026-05-18 10:59 ` [PATCHv2 04/11] libbpf: Change has_nop_combo to work on top of nop10 Jiri Olsa
@ 2026-05-18 10:59 ` Jiri Olsa
2026-05-18 11:37 ` bot+bpf-ci
2026-05-18 17:39 ` Andrii Nakryiko
2026-05-18 10:59 ` [PATCHv2 06/11] selftests/bpf: Emit nop,nop10 instructions combo for x86_64 arch Jiri Olsa
` (5 subsequent siblings)
10 siblings, 2 replies; 18+ messages in thread
From: Jiri Olsa @ 2026-05-18 10:59 UTC (permalink / raw)
To: Oleg Nesterov, Peter Zijlstra, Ingo Molnar, Masami Hiramatsu,
Andrii Nakryiko
Cc: bpf, linux-trace-kernel
In the previous optimized uprobe fix we changed the syscall
error used for its detection from ENXIO to EPROTO.
Changing related probe_uprobe_syscall detection check.
Suggested-by: Andrii Nakryiko <andrii@kernel.org>
Fixes: 05738da0efa1 ("libbpf: Add uprobe syscall feature detection")
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
tools/lib/bpf/features.c | 4 ++--
tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/tools/lib/bpf/features.c b/tools/lib/bpf/features.c
index b7e388f99d0b..e5641fa60163 100644
--- a/tools/lib/bpf/features.c
+++ b/tools/lib/bpf/features.c
@@ -577,10 +577,10 @@ static int probe_ldimm64_full_range_off(int token_fd)
static int probe_uprobe_syscall(int token_fd)
{
/*
- * If kernel supports uprobe() syscall, it will return -ENXIO when called
+ * If kernel supports uprobe() syscall, it will return -EPROTO when called
* from the outside of a kernel-generated uprobe trampoline.
*/
- return syscall(__NR_uprobe) < 0 && errno == ENXIO;
+ return syscall(__NR_uprobe) < 0 && errno == EPROTO;
}
#else
static int probe_uprobe_syscall(int token_fd)
diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c
index 955a37751b52..c944136252c6 100644
--- a/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c
+++ b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c
@@ -762,7 +762,7 @@ static void test_uprobe_error(void)
long err = syscall(__NR_uprobe);
ASSERT_EQ(err, -1, "error");
- ASSERT_EQ(errno, ENXIO, "errno");
+ ASSERT_EQ(errno, EPROTO, "errno");
}
static void __test_uprobe_syscall(void)
--
2.53.0
^ permalink raw reply related [flat|nested] 18+ messages in thread* Re: [PATCHv2 05/11] libbpf: Detect uprobe syscall with new error
2026-05-18 10:59 ` [PATCHv2 05/11] libbpf: Detect uprobe syscall with new error Jiri Olsa
@ 2026-05-18 11:37 ` bot+bpf-ci
2026-05-18 17:39 ` Andrii Nakryiko
1 sibling, 0 replies; 18+ messages in thread
From: bot+bpf-ci @ 2026-05-18 11:37 UTC (permalink / raw)
To: jolsa, oleg, peterz, mingo, mhiramat, andrii
Cc: bpf, linux-trace-kernel, ast, andrii, daniel, martin.lau, eddyz87,
yonghong.song, clm, ihor.solodrai
[-- Attachment #1: Type: text/plain, Size: 2554 bytes --]
> libbpf: Detect uprobe syscall with new error
>
> In the previous optimized uprobe fix we changed the syscall
> error used for its detection from ENXIO to EPROTO.
>
> Changing related probe_uprobe_syscall detection check.
>
> Suggested-by: Andrii Nakryiko <andrii@kernel.org>
> Fixes: 05738da0efa1 ("libbpf: Add uprobe syscall feature detection")
> Signed-off-by: Jiri Olsa <jolsa@kernel.org>
The Fixes: tag points to commit 05738da0efa1 ("libbpf: Add uprobe
syscall feature detection"), but wasn't the bug actually introduced by
commit 5c3c59c7952a ("uprobes/x86: Move optimized uprobe from nop5 to
nop10")? Commit 05738da0efa1 originally added the ENXIO check
correctly, but commit 5c3c59c7952a changed the kernel to return EPROTO
instead of ENXIO, breaking the libbpf detection. The commit message
even says "In the previous optimized uprobe fix we changed the syscall
error," which refers to 5c3c59c7952a.
Should this use:
Fixes: 5c3c59c7952a ("uprobes/x86: Move optimized uprobe from nop5 to nop10")
> diff --git a/tools/lib/bpf/features.c b/tools/lib/bpf/features.c
> index b7e388f99d0b..e5641fa60163 100644
> --- a/tools/lib/bpf/features.c
> +++ b/tools/lib/bpf/features.c
> @@ -577,10 +577,10 @@ static int probe_ldimm64_full_range_off(int token_fd)
> static int probe_uprobe_syscall(int token_fd)
> {
> /*
> - * If kernel supports uprobe() syscall, it will return -ENXIO when called
> + * If kernel supports uprobe() syscall, it will return -EPROTO when called
> * from the outside of a kernel-generated uprobe trampoline.
> */
> - return syscall(__NR_uprobe) < 0 && errno == ENXIO;
> + return syscall(__NR_uprobe) < 0 && errno == EPROTO;
> }
> #else
> static int probe_uprobe_syscall(int token_fd)
> diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c
> index 955a37751b52..c944136252c6 100644
> --- a/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c
> +++ b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c
> @@ -762,7 +762,7 @@ static void test_uprobe_error(void)
> long err = syscall(__NR_uprobe);
>
> ASSERT_EQ(err, -1, "error");
> - ASSERT_EQ(errno, ENXIO, "errno");
> + ASSERT_EQ(errno, EPROTO, "errno");
> }
>
> static void __test_uprobe_syscall(void)
---
AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md
CI run summary: https://github.com/kernel-patches/bpf/actions/runs/26030080109
^ permalink raw reply [flat|nested] 18+ messages in thread* Re: [PATCHv2 05/11] libbpf: Detect uprobe syscall with new error
2026-05-18 10:59 ` [PATCHv2 05/11] libbpf: Detect uprobe syscall with new error Jiri Olsa
2026-05-18 11:37 ` bot+bpf-ci
@ 2026-05-18 17:39 ` Andrii Nakryiko
1 sibling, 0 replies; 18+ messages in thread
From: Andrii Nakryiko @ 2026-05-18 17:39 UTC (permalink / raw)
To: Jiri Olsa
Cc: Oleg Nesterov, Peter Zijlstra, Ingo Molnar, Masami Hiramatsu,
Andrii Nakryiko, bpf, linux-trace-kernel
On Mon, May 18, 2026 at 4:00 AM Jiri Olsa <jolsa@kernel.org> wrote:
>
> In the previous optimized uprobe fix we changed the syscall
> error used for its detection from ENXIO to EPROTO.
>
> Changing related probe_uprobe_syscall detection check.
>
> Suggested-by: Andrii Nakryiko <andrii@kernel.org>
> Fixes: 05738da0efa1 ("libbpf: Add uprobe syscall feature detection")
> Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> ---
> tools/lib/bpf/features.c | 4 ++--
> tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c | 2 +-
> 2 files changed, 3 insertions(+), 3 deletions(-)
>
sashiko is wrong, this change is correct (we do not want to attempt
sys_uprobe optimization if -ENXIO is returned from broken kernels)
Acked-by: Andrii Nakryiko <andrii@kernel.org>
> diff --git a/tools/lib/bpf/features.c b/tools/lib/bpf/features.c
> index b7e388f99d0b..e5641fa60163 100644
> --- a/tools/lib/bpf/features.c
> +++ b/tools/lib/bpf/features.c
> @@ -577,10 +577,10 @@ static int probe_ldimm64_full_range_off(int token_fd)
> static int probe_uprobe_syscall(int token_fd)
> {
> /*
> - * If kernel supports uprobe() syscall, it will return -ENXIO when called
> + * If kernel supports uprobe() syscall, it will return -EPROTO when called
> * from the outside of a kernel-generated uprobe trampoline.
> */
> - return syscall(__NR_uprobe) < 0 && errno == ENXIO;
> + return syscall(__NR_uprobe) < 0 && errno == EPROTO;
> }
> #else
> static int probe_uprobe_syscall(int token_fd)
> diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c
> index 955a37751b52..c944136252c6 100644
> --- a/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c
> +++ b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c
> @@ -762,7 +762,7 @@ static void test_uprobe_error(void)
> long err = syscall(__NR_uprobe);
>
> ASSERT_EQ(err, -1, "error");
> - ASSERT_EQ(errno, ENXIO, "errno");
> + ASSERT_EQ(errno, EPROTO, "errno");
> }
>
> static void __test_uprobe_syscall(void)
> --
> 2.53.0
>
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCHv2 06/11] selftests/bpf: Emit nop,nop10 instructions combo for x86_64 arch
2026-05-18 10:59 [PATCHv2 00/11] uprobes/x86: Fix red zone issue for optimized uprobes Jiri Olsa
` (4 preceding siblings ...)
2026-05-18 10:59 ` [PATCHv2 05/11] libbpf: Detect uprobe syscall with new error Jiri Olsa
@ 2026-05-18 10:59 ` Jiri Olsa
2026-05-18 10:59 ` [PATCHv2 07/11] selftests/bpf: Change uprobe syscall tests to use nop10 Jiri Olsa
` (4 subsequent siblings)
10 siblings, 0 replies; 18+ messages in thread
From: Jiri Olsa @ 2026-05-18 10:59 UTC (permalink / raw)
To: Oleg Nesterov, Peter Zijlstra, Ingo Molnar, Masami Hiramatsu,
Andrii Nakryiko
Cc: bpf, linux-trace-kernel
Syncing latest usdt.h change [1].
Now that we have nop10 optimization support in kernel, let's emit
nop,nop10 for usdt probe. We leave it up to the library to use
desirable nop instruction.
[1] TBD
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
tools/testing/selftests/bpf/usdt.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/testing/selftests/bpf/usdt.h b/tools/testing/selftests/bpf/usdt.h
index c71e21df38b3..d359663b9c32 100644
--- a/tools/testing/selftests/bpf/usdt.h
+++ b/tools/testing/selftests/bpf/usdt.h
@@ -313,7 +313,7 @@ struct usdt_sema { volatile unsigned short active; };
#if defined(__ia64__) || defined(__s390__) || defined(__s390x__)
#define USDT_NOP nop 0
#elif defined(__x86_64__)
-#define USDT_NOP .byte 0x90, 0x0f, 0x1f, 0x44, 0x00, 0x0 /* nop, nop5 */
+#define USDT_NOP .byte 0x90, 0x66, 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 /* nop, nop10 */
#else
#define USDT_NOP nop
#endif
--
2.53.0
^ permalink raw reply related [flat|nested] 18+ messages in thread* [PATCHv2 07/11] selftests/bpf: Change uprobe syscall tests to use nop10
2026-05-18 10:59 [PATCHv2 00/11] uprobes/x86: Fix red zone issue for optimized uprobes Jiri Olsa
` (5 preceding siblings ...)
2026-05-18 10:59 ` [PATCHv2 06/11] selftests/bpf: Emit nop,nop10 instructions combo for x86_64 arch Jiri Olsa
@ 2026-05-18 10:59 ` Jiri Olsa
2026-05-18 11:50 ` bot+bpf-ci
2026-05-18 10:59 ` [PATCHv2 08/11] selftests/bpf: Change uprobe/usdt trigger bench code " Jiri Olsa
` (3 subsequent siblings)
10 siblings, 1 reply; 18+ messages in thread
From: Jiri Olsa @ 2026-05-18 10:59 UTC (permalink / raw)
To: Oleg Nesterov, Peter Zijlstra, Ingo Molnar, Masami Hiramatsu,
Andrii Nakryiko
Cc: bpf, linux-trace-kernel
Optimized uprobes are now on top of 10-bytes nop instructions,
reflect that in existing tests.
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
.../selftests/bpf/benchs/bench_trigger.c | 2 +-
.../selftests/bpf/prog_tests/uprobe_syscall.c | 29 ++++++++++---------
tools/testing/selftests/bpf/prog_tests/usdt.c | 25 +++++++++-------
tools/testing/selftests/bpf/usdt_2.c | 2 +-
4 files changed, 33 insertions(+), 25 deletions(-)
diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c
index 2f22ec61667b..bcc4820c802e 100644
--- a/tools/testing/selftests/bpf/benchs/bench_trigger.c
+++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
@@ -398,7 +398,7 @@ static void *uprobe_producer_ret(void *input)
#ifdef __x86_64__
__nocf_check __weak void uprobe_target_nop5(void)
{
- asm volatile (".byte 0x0f, 0x1f, 0x44, 0x00, 0x00");
+ asm volatile (".byte 0x66, 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00");
}
static void *uprobe_producer_nop5(void *input)
diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c
index c944136252c6..e4a19dc9df69 100644
--- a/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c
+++ b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c
@@ -17,7 +17,7 @@
#include "uprobe_syscall_executed.skel.h"
#include "bpf/libbpf_internal.h"
-#define USDT_NOP .byte 0x0f, 0x1f, 0x44, 0x00, 0x00
+#define USDT_NOP .byte 0x66, 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00
#include "usdt.h"
#pragma GCC diagnostic ignored "-Wattributes"
@@ -26,7 +26,7 @@ __attribute__((aligned(16)))
__nocf_check __weak __naked unsigned long uprobe_regs_trigger(void)
{
asm volatile (
- ".byte 0x0f, 0x1f, 0x44, 0x00, 0x00\n" /* nop5 */
+ ".byte 0x66, 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00\n" /* nop10 */
"movq $0xdeadbeef, %rax\n"
"ret\n"
);
@@ -345,9 +345,9 @@ static void test_uretprobe_syscall_call(void)
__attribute__((aligned(16)))
__nocf_check __weak __naked void uprobe_test(void)
{
- asm volatile (" \n"
- ".byte 0x0f, 0x1f, 0x44, 0x00, 0x00 \n"
- "ret \n"
+ asm volatile (
+ ".byte 0x66, 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00\n" /* nop10 */
+ "ret\n"
);
}
@@ -388,14 +388,16 @@ static int find_uprobes_trampoline(void *tramp_addr)
return ret;
}
-static unsigned char nop5[5] = { 0x0f, 0x1f, 0x44, 0x00, 0x00 };
+static unsigned char jmp2B[2] = { 0xeb, 8 };
+static unsigned char nop10[10] = { 0x66, 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 };
+static unsigned char lea_rsp[5] = { 0x48, 0x8d, 0x64, 0x24, 0x80 };
-static void *find_nop5(void *fn)
+static void *find_nop10(void *fn)
{
int i;
- for (i = 0; i < 10; i++) {
- if (!memcmp(nop5, fn + i, 5))
+ for (i = 0; i < 128; i++) {
+ if (!memcmp(nop10, fn + i, 10))
return fn + i;
}
return NULL;
@@ -420,7 +422,8 @@ static void *check_attach(struct uprobe_syscall_executed *skel, trigger_t trigge
ASSERT_EQ(skel->bss->executed, executed, "executed");
/* .. and check the trampoline is as expected. */
- call = (struct __arch_relative_insn *) addr;
+ ASSERT_OK(memcmp(addr, lea_rsp, 5), "lea_rsp");
+ call = (struct __arch_relative_insn *)(addr + 5);
tramp = (void *) (call + 1) + call->raddr;
ASSERT_EQ(call->op, 0xe8, "call");
ASSERT_OK(find_uprobes_trampoline(tramp), "uprobes_trampoline");
@@ -432,7 +435,7 @@ static void check_detach(void *addr, void *tramp)
{
/* [uprobes_trampoline] stays after detach */
ASSERT_OK(find_uprobes_trampoline(tramp), "uprobes_trampoline");
- ASSERT_OK(memcmp(addr, nop5, 5), "nop5");
+ ASSERT_OK(memcmp(addr, jmp2B, 2), "jmp2B");
}
static void check(struct uprobe_syscall_executed *skel, struct bpf_link *link,
@@ -568,8 +571,8 @@ static void test_uprobe_usdt(void)
void *addr;
errno = 0;
- addr = find_nop5(usdt_test);
- if (!ASSERT_OK_PTR(addr, "find_nop5"))
+ addr = find_nop10(usdt_test);
+ if (!ASSERT_OK_PTR(addr, "find_nop10"))
return;
skel = uprobe_syscall_executed__open_and_load();
diff --git a/tools/testing/selftests/bpf/prog_tests/usdt.c b/tools/testing/selftests/bpf/prog_tests/usdt.c
index 69759b27794d..a160d7c4fa0d 100644
--- a/tools/testing/selftests/bpf/prog_tests/usdt.c
+++ b/tools/testing/selftests/bpf/prog_tests/usdt.c
@@ -252,7 +252,7 @@ extern void usdt_1(void);
extern void usdt_2(void);
static unsigned char nop1[1] = { 0x90 };
-static unsigned char nop1_nop5_combo[6] = { 0x90, 0x0f, 0x1f, 0x44, 0x00, 0x00 };
+static unsigned char nop1_nop10_combo[11] = { 0x90, 0x66, 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 };
static void *find_instr(void *fn, unsigned char *instr, size_t cnt)
{
@@ -271,17 +271,17 @@ static void subtest_optimized_attach(void)
__u8 *addr_1, *addr_2;
/* usdt_1 USDT probe has single nop instruction */
- addr_1 = find_instr(usdt_1, nop1_nop5_combo, 6);
- if (!ASSERT_NULL(addr_1, "usdt_1_find_nop1_nop5_combo"))
+ addr_1 = find_instr(usdt_1, nop1_nop10_combo, 11);
+ if (!ASSERT_NULL(addr_1, "usdt_1_find_nop1_nop10_combo"))
return;
addr_1 = find_instr(usdt_1, nop1, 1);
if (!ASSERT_OK_PTR(addr_1, "usdt_1_find_nop1"))
return;
- /* usdt_2 USDT probe has nop,nop5 instructions combo */
- addr_2 = find_instr(usdt_2, nop1_nop5_combo, 6);
- if (!ASSERT_OK_PTR(addr_2, "usdt_2_find_nop1_nop5_combo"))
+ /* usdt_2 USDT probe has nop,nop10 instructions combo */
+ addr_2 = find_instr(usdt_2, nop1_nop10_combo, 11);
+ if (!ASSERT_OK_PTR(addr_2, "usdt_2_find_nop1_nop10_combo"))
return;
skel = test_usdt__open_and_load();
@@ -309,12 +309,12 @@ static void subtest_optimized_attach(void)
bpf_link__destroy(skel->links.usdt_executed);
- /* we expect the nop5 ip */
+ /* we expect the nop10 ip */
skel->bss->expected_ip = (unsigned long) addr_2 + 1;
/*
* Attach program on top of usdt_2 which is probe defined on top
- * of nop1,nop5 combo, so the probe gets optimized on top of nop5.
+ * of nop1,nop10 combo, so the probe gets optimized on top of nop10.
*/
skel->links.usdt_executed = bpf_program__attach_usdt(skel->progs.usdt_executed,
0 /*self*/, "/proc/self/exe",
@@ -328,8 +328,13 @@ static void subtest_optimized_attach(void)
/* nop stays on addr_2 address */
ASSERT_EQ(*addr_2, 0x90, "nop");
- /* call is on addr_2 + 1 address */
- ASSERT_EQ(*(addr_2 + 1), 0xe8, "call");
+ /*
+ * lea -0x80(%rsp), %rsp
+ * call ...
+ */
+ static unsigned char expected[] = { 0x48, 0x8d, 0x64, 0x24, 0x80, 0xe8 };
+
+ ASSERT_MEMEQ(addr_2 + 1, expected, sizeof(expected), "lea_and_call");
ASSERT_EQ(skel->bss->executed, 4, "executed");
cleanup:
diff --git a/tools/testing/selftests/bpf/usdt_2.c b/tools/testing/selftests/bpf/usdt_2.c
index 789883aaca4c..b359b389f6c0 100644
--- a/tools/testing/selftests/bpf/usdt_2.c
+++ b/tools/testing/selftests/bpf/usdt_2.c
@@ -3,7 +3,7 @@
#if defined(__x86_64__)
/*
- * Include usdt.h with default nop,nop5 instructions combo.
+ * Include usdt.h with default nop,nop10 instructions combo.
*/
#include "usdt.h"
--
2.53.0
^ permalink raw reply related [flat|nested] 18+ messages in thread* Re: [PATCHv2 07/11] selftests/bpf: Change uprobe syscall tests to use nop10
2026-05-18 10:59 ` [PATCHv2 07/11] selftests/bpf: Change uprobe syscall tests to use nop10 Jiri Olsa
@ 2026-05-18 11:50 ` bot+bpf-ci
0 siblings, 0 replies; 18+ messages in thread
From: bot+bpf-ci @ 2026-05-18 11:50 UTC (permalink / raw)
To: jolsa, oleg, peterz, mingo, mhiramat, andrii
Cc: bpf, linux-trace-kernel, ast, andrii, daniel, martin.lau, eddyz87,
yonghong.song, clm, ihor.solodrai
[-- Attachment #1: Type: text/plain, Size: 2341 bytes --]
> diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c
> index 2f22ec61667b..bcc4820c802e 100644
> --- a/tools/testing/selftests/bpf/benchs/bench_trigger.c
> +++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
> @@ -398,7 +398,7 @@ static void *uprobe_producer_ret(void *input)
> #ifdef __x86_64__
> __nocf_check __weak void uprobe_target_nop5(void)
> {
> - asm volatile (".byte 0x0f, 0x1f, 0x44, 0x00, 0x00");
> + asm volatile (".byte 0x66, 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00");
> }
>
> static void *uprobe_producer_nop5(void *input)
This isn't a bug, but should the function and benchmark names also be
updated? The function is still called uprobe_target_nop5() and the
benchmark exposes names like uprobe-nop5, uretprobe-nop5,
uprobe-multi-nop5, uretprobe-multi-nop5, and usdt-nop5, but they are
now all testing a 10-byte nop instruction.
[ ... ]
> diff --git a/tools/testing/selftests/bpf/prog_tests/usdt.c b/tools/testing/selftests/bpf/prog_tests/usdt.c
> index 69759b27794d..a160d7c4fa0d 100644
> --- a/tools/testing/selftests/bpf/prog_tests/usdt.c
> +++ b/tools/testing/selftests/bpf/prog_tests/usdt.c
> @@ -252,7 +252,7 @@ extern void usdt_1(void);
> extern void usdt_2(void);
>
> static unsigned char nop1[1] = { 0x90 };
> -static unsigned char nop1_nop5_combo[6] = { 0x90, 0x0f, 0x1f, 0x44, 0x00, 0x00 };
> +static unsigned char nop1_nop10_combo[11] = { 0x90, 0x66, 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 };
>
> static void *find_instr(void *fn, unsigned char *instr, size_t cnt)
> {
> int i;
>
> for (i = 0; i < 10; i++) {
> if (!memcmp(instr, fn + i, cnt))
> return fn + i;
> }
> return NULL;
> }
Is the loop in find_instr() adequate to find this instruction sequence?
In uprobe_syscall.c, find_nop10() was updated to search up to 128 bytes
to account for compiler-generated prologues. If a modern compiler
generates a prologue longer than 9 bytes for usdt_2(), this will
prematurely terminate and cause the subtest_optimized_attach() test to
fail.
[ ... ]
---
AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md
CI run summary: https://github.com/kernel-patches/bpf/actions/runs/26030080109
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCHv2 08/11] selftests/bpf: Change uprobe/usdt trigger bench code to use nop10
2026-05-18 10:59 [PATCHv2 00/11] uprobes/x86: Fix red zone issue for optimized uprobes Jiri Olsa
` (6 preceding siblings ...)
2026-05-18 10:59 ` [PATCHv2 07/11] selftests/bpf: Change uprobe syscall tests to use nop10 Jiri Olsa
@ 2026-05-18 10:59 ` Jiri Olsa
2026-05-18 11:37 ` bot+bpf-ci
2026-05-18 10:59 ` [PATCHv2 09/11] selftests/bpf: Add reattach tests for uprobe syscall Jiri Olsa
` (2 subsequent siblings)
10 siblings, 1 reply; 18+ messages in thread
From: Jiri Olsa @ 2026-05-18 10:59 UTC (permalink / raw)
To: Oleg Nesterov, Peter Zijlstra, Ingo Molnar, Masami Hiramatsu,
Andrii Nakryiko
Cc: bpf, linux-trace-kernel
Changing uprobe/usdt trigger bench code to use nop10 instead
of nop5. Also changing un_bench_uprobes.sh to use nop10 triggers.
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
tools/testing/selftests/bpf/bench.c | 20 +++++------
.../selftests/bpf/benchs/bench_trigger.c | 36 +++++++++----------
.../selftests/bpf/benchs/run_bench_uprobes.sh | 2 +-
3 files changed, 29 insertions(+), 29 deletions(-)
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index 6155ce455c27..1252a1af2e84 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -539,12 +539,12 @@ extern const struct bench bench_trig_uretprobe_multi_push;
extern const struct bench bench_trig_uprobe_multi_ret;
extern const struct bench bench_trig_uretprobe_multi_ret;
#ifdef __x86_64__
-extern const struct bench bench_trig_uprobe_nop5;
-extern const struct bench bench_trig_uretprobe_nop5;
-extern const struct bench bench_trig_uprobe_multi_nop5;
-extern const struct bench bench_trig_uretprobe_multi_nop5;
+extern const struct bench bench_trig_uprobe_nop10;
+extern const struct bench bench_trig_uretprobe_nop10;
+extern const struct bench bench_trig_uprobe_multi_nop10;
+extern const struct bench bench_trig_uretprobe_multi_nop10;
extern const struct bench bench_trig_usdt_nop;
-extern const struct bench bench_trig_usdt_nop5;
+extern const struct bench bench_trig_usdt_nop10;
#endif
extern const struct bench bench_rb_libbpf;
@@ -619,12 +619,12 @@ static const struct bench *benchs[] = {
&bench_trig_uprobe_multi_ret,
&bench_trig_uretprobe_multi_ret,
#ifdef __x86_64__
- &bench_trig_uprobe_nop5,
- &bench_trig_uretprobe_nop5,
- &bench_trig_uprobe_multi_nop5,
- &bench_trig_uretprobe_multi_nop5,
+ &bench_trig_uprobe_nop10,
+ &bench_trig_uretprobe_nop10,
+ &bench_trig_uprobe_multi_nop10,
+ &bench_trig_uretprobe_multi_nop10,
&bench_trig_usdt_nop,
- &bench_trig_usdt_nop5,
+ &bench_trig_usdt_nop10,
#endif
/* ringbuf/perfbuf benchmarks */
&bench_rb_libbpf,
diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c
index bcc4820c802e..3998ea8ff9aa 100644
--- a/tools/testing/selftests/bpf/benchs/bench_trigger.c
+++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
@@ -396,15 +396,15 @@ static void *uprobe_producer_ret(void *input)
}
#ifdef __x86_64__
-__nocf_check __weak void uprobe_target_nop5(void)
+__nocf_check __weak void uprobe_target_nop10(void)
{
asm volatile (".byte 0x66, 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00");
}
-static void *uprobe_producer_nop5(void *input)
+static void *uprobe_producer_nop10(void *input)
{
while (true)
- uprobe_target_nop5();
+ uprobe_target_nop10();
return NULL;
}
@@ -418,7 +418,7 @@ static void *uprobe_producer_usdt_nop(void *input)
return NULL;
}
-static void *uprobe_producer_usdt_nop5(void *input)
+static void *uprobe_producer_usdt_nop10(void *input)
{
while (true)
usdt_2();
@@ -542,24 +542,24 @@ static void uretprobe_multi_ret_setup(void)
}
#ifdef __x86_64__
-static void uprobe_nop5_setup(void)
+static void uprobe_nop10_setup(void)
{
- usetup(false, false /* !use_multi */, &uprobe_target_nop5);
+ usetup(false, false /* !use_multi */, &uprobe_target_nop10);
}
-static void uretprobe_nop5_setup(void)
+static void uretprobe_nop10_setup(void)
{
- usetup(true, false /* !use_multi */, &uprobe_target_nop5);
+ usetup(true, false /* !use_multi */, &uprobe_target_nop10);
}
-static void uprobe_multi_nop5_setup(void)
+static void uprobe_multi_nop10_setup(void)
{
- usetup(false, true /* use_multi */, &uprobe_target_nop5);
+ usetup(false, true /* use_multi */, &uprobe_target_nop10);
}
-static void uretprobe_multi_nop5_setup(void)
+static void uretprobe_multi_nop10_setup(void)
{
- usetup(true, true /* use_multi */, &uprobe_target_nop5);
+ usetup(true, true /* use_multi */, &uprobe_target_nop10);
}
static void usdt_setup(const char *name)
@@ -598,7 +598,7 @@ static void usdt_nop_setup(void)
usdt_setup("usdt_1");
}
-static void usdt_nop5_setup(void)
+static void usdt_nop10_setup(void)
{
usdt_setup("usdt_2");
}
@@ -665,10 +665,10 @@ BENCH_TRIG_USERMODE(uretprobe_multi_nop, nop, "uretprobe-multi-nop");
BENCH_TRIG_USERMODE(uretprobe_multi_push, push, "uretprobe-multi-push");
BENCH_TRIG_USERMODE(uretprobe_multi_ret, ret, "uretprobe-multi-ret");
#ifdef __x86_64__
-BENCH_TRIG_USERMODE(uprobe_nop5, nop5, "uprobe-nop5");
-BENCH_TRIG_USERMODE(uretprobe_nop5, nop5, "uretprobe-nop5");
-BENCH_TRIG_USERMODE(uprobe_multi_nop5, nop5, "uprobe-multi-nop5");
-BENCH_TRIG_USERMODE(uretprobe_multi_nop5, nop5, "uretprobe-multi-nop5");
+BENCH_TRIG_USERMODE(uprobe_nop10, nop10, "uprobe-nop10");
+BENCH_TRIG_USERMODE(uretprobe_nop10, nop10, "uretprobe-nop10");
+BENCH_TRIG_USERMODE(uprobe_multi_nop10, nop10, "uprobe-multi-nop10");
+BENCH_TRIG_USERMODE(uretprobe_multi_nop10, nop10, "uretprobe-multi-nop10");
BENCH_TRIG_USERMODE(usdt_nop, usdt_nop, "usdt-nop");
-BENCH_TRIG_USERMODE(usdt_nop5, usdt_nop5, "usdt-nop5");
+BENCH_TRIG_USERMODE(usdt_nop10, usdt_nop10, "usdt-nop10");
#endif
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh b/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh
index 9ec59423b949..e490b337e960 100755
--- a/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh
+++ b/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh
@@ -2,7 +2,7 @@
set -eufo pipefail
-for i in usermode-count syscall-count {uprobe,uretprobe}-{nop,push,ret,nop5} usdt-nop usdt-nop5
+for i in usermode-count syscall-count {uprobe,uretprobe}-{nop,push,ret,nop10} usdt-nop usdt-nop10
do
summary=$(sudo ./bench -w2 -d5 -a trig-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-)
printf "%-15s: %s\n" $i "$summary"
--
2.53.0
^ permalink raw reply related [flat|nested] 18+ messages in thread* Re: [PATCHv2 08/11] selftests/bpf: Change uprobe/usdt trigger bench code to use nop10
2026-05-18 10:59 ` [PATCHv2 08/11] selftests/bpf: Change uprobe/usdt trigger bench code " Jiri Olsa
@ 2026-05-18 11:37 ` bot+bpf-ci
0 siblings, 0 replies; 18+ messages in thread
From: bot+bpf-ci @ 2026-05-18 11:37 UTC (permalink / raw)
To: jolsa, oleg, peterz, mingo, mhiramat, andrii
Cc: bpf, linux-trace-kernel, ast, andrii, daniel, martin.lau, eddyz87,
yonghong.song, clm, ihor.solodrai
[-- Attachment #1: Type: text/plain, Size: 658 bytes --]
> selftests/bpf: Change uprobe/usdt trigger bench code to use nop10
>
> Changing uprobe/usdt trigger bench code to use nop10 instead
> of nop5. Also changing un_bench_uprobes.sh to use nop10 triggers.
>
> Signed-off-by: Jiri Olsa <jolsa@kernel.org>
This isn't a bug, but there's a typo in the commit message: the filename
is written as "un_bench_uprobes.sh" but should be "run_bench_uprobes.sh"
(starting with 'r').
---
AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md
CI run summary: https://github.com/kernel-patches/bpf/actions/runs/26030080109
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCHv2 09/11] selftests/bpf: Add reattach tests for uprobe syscall
2026-05-18 10:59 [PATCHv2 00/11] uprobes/x86: Fix red zone issue for optimized uprobes Jiri Olsa
` (7 preceding siblings ...)
2026-05-18 10:59 ` [PATCHv2 08/11] selftests/bpf: Change uprobe/usdt trigger bench code " Jiri Olsa
@ 2026-05-18 10:59 ` Jiri Olsa
2026-05-18 10:59 ` [PATCHv2 10/11] selftests/bpf: Add tests for uprobe nop10 red zone clobbering Jiri Olsa
2026-05-18 10:59 ` [PATCHv2 11/11] selftests/bpf: Add tests for forked/cloned optimized uprobes Jiri Olsa
10 siblings, 0 replies; 18+ messages in thread
From: Jiri Olsa @ 2026-05-18 10:59 UTC (permalink / raw)
To: Oleg Nesterov, Peter Zijlstra, Ingo Molnar, Masami Hiramatsu,
Andrii Nakryiko
Cc: bpf, linux-trace-kernel
Adding reattach tests for uprobe syscall tests to make sure
we can re-attach and optimize same uprobe multiple times.
The reason is that optimized uprobe does not restore original
nop10 after detach, but instead it uses 'jmp 8' instruction.
Making sure we can still install and optimize uprobe on top
of the 'jmp 8' instruction.
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
.../selftests/bpf/prog_tests/uprobe_syscall.c | 115 ++++++++++++++++--
1 file changed, 105 insertions(+), 10 deletions(-)
diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c
index e4a19dc9df69..e88b316a3f2c 100644
--- a/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c
+++ b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c
@@ -431,21 +431,27 @@ static void *check_attach(struct uprobe_syscall_executed *skel, trigger_t trigge
return tramp;
}
-static void check_detach(void *addr, void *tramp)
+static bool check_detach(void *addr, void *tramp)
{
+ bool ok = true;
+
/* [uprobes_trampoline] stays after detach */
- ASSERT_OK(find_uprobes_trampoline(tramp), "uprobes_trampoline");
- ASSERT_OK(memcmp(addr, jmp2B, 2), "jmp2B");
+ if (!ASSERT_OK(find_uprobes_trampoline(tramp), "uprobes_trampoline"))
+ ok = false;
+ if (!ASSERT_OK(memcmp(addr, jmp2B, 2), "jmp2B"))
+ ok = false;
+ return ok;
}
-static void check(struct uprobe_syscall_executed *skel, struct bpf_link *link,
- trigger_t trigger, void *addr, int executed)
+static void *check(struct uprobe_syscall_executed *skel, struct bpf_link *link,
+ trigger_t trigger, void *addr, int executed)
{
void *tramp;
tramp = check_attach(skel, trigger, addr, executed);
bpf_link__destroy(link);
check_detach(addr, tramp);
+ return tramp;
}
static void test_uprobe_legacy(void)
@@ -456,6 +462,7 @@ static void test_uprobe_legacy(void)
);
struct bpf_link *link;
unsigned long offset;
+ void *tramp;
offset = get_uprobe_offset(&uprobe_test);
if (!ASSERT_GE(offset, 0, "get_uprobe_offset"))
@@ -473,7 +480,28 @@ static void test_uprobe_legacy(void)
if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_opts"))
goto cleanup;
- check(skel, link, uprobe_test, uprobe_test, 2);
+ tramp = check(skel, link, uprobe_test, uprobe_test, 2);
+
+ /* reattach and detach without triggering optimization */
+ link = bpf_program__attach_uprobe_opts(skel->progs.test_uprobe,
+ 0, "/proc/self/exe", offset, NULL);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_opts"))
+ goto cleanup;
+
+ bpf_link__destroy(link);
+ if (!check_detach(uprobe_test, tramp))
+ goto cleanup;
+
+ uprobe_test();
+ ASSERT_EQ(skel->bss->executed, 2, "executed_no_probe");
+
+ /* reattach with triggering optimization */
+ link = bpf_program__attach_uprobe_opts(skel->progs.test_uprobe,
+ 0, "/proc/self/exe", offset, NULL);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_opts"))
+ goto cleanup;
+
+ check(skel, link, uprobe_test, uprobe_test, 4);
/* uretprobe */
skel->bss->executed = 0;
@@ -495,6 +523,7 @@ static void test_uprobe_multi(void)
LIBBPF_OPTS(bpf_uprobe_multi_opts, opts);
struct bpf_link *link;
unsigned long offset;
+ void *tramp;
offset = get_uprobe_offset(&uprobe_test);
if (!ASSERT_GE(offset, 0, "get_uprobe_offset"))
@@ -515,7 +544,28 @@ static void test_uprobe_multi(void)
if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_multi"))
goto cleanup;
- check(skel, link, uprobe_test, uprobe_test, 2);
+ tramp = check(skel, link, uprobe_test, uprobe_test, 2);
+
+ /* reattach and detach without triggering optimization */
+ link = bpf_program__attach_uprobe_multi(skel->progs.test_uprobe_multi,
+ 0, "/proc/self/exe", NULL, &opts);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_multi"))
+ goto cleanup;
+
+ bpf_link__destroy(link);
+ if (!check_detach(uprobe_test, tramp))
+ goto cleanup;
+
+ uprobe_test();
+ ASSERT_EQ(skel->bss->executed, 2, "executed_no_probe");
+
+ /* reattach with triggering optimization */
+ link = bpf_program__attach_uprobe_multi(skel->progs.test_uprobe_multi,
+ 0, "/proc/self/exe", NULL, &opts);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_multi"))
+ goto cleanup;
+
+ check(skel, link, uprobe_test, uprobe_test, 4);
/* uretprobe.multi */
skel->bss->executed = 0;
@@ -539,6 +589,7 @@ static void test_uprobe_session(void)
);
struct bpf_link *link;
unsigned long offset;
+ void *tramp;
offset = get_uprobe_offset(&uprobe_test);
if (!ASSERT_GE(offset, 0, "get_uprobe_offset"))
@@ -558,7 +609,28 @@ static void test_uprobe_session(void)
if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_multi"))
goto cleanup;
- check(skel, link, uprobe_test, uprobe_test, 4);
+ tramp = check(skel, link, uprobe_test, uprobe_test, 4);
+
+ /* reattach and detach without triggering optimization */
+ link = bpf_program__attach_uprobe_multi(skel->progs.test_uprobe_session,
+ 0, "/proc/self/exe", NULL, &opts);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_multi"))
+ goto cleanup;
+
+ bpf_link__destroy(link);
+ if (!check_detach(uprobe_test, tramp))
+ goto cleanup;
+
+ uprobe_test();
+ ASSERT_EQ(skel->bss->executed, 4, "executed_no_probe");
+
+ /* reattach with triggering optimization */
+ link = bpf_program__attach_uprobe_multi(skel->progs.test_uprobe_session,
+ 0, "/proc/self/exe", NULL, &opts);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_multi"))
+ goto cleanup;
+
+ check(skel, link, uprobe_test, uprobe_test, 8);
cleanup:
uprobe_syscall_executed__destroy(skel);
@@ -568,7 +640,7 @@ static void test_uprobe_usdt(void)
{
struct uprobe_syscall_executed *skel;
struct bpf_link *link;
- void *addr;
+ void *addr, *tramp;
errno = 0;
addr = find_nop10(usdt_test);
@@ -587,7 +659,30 @@ static void test_uprobe_usdt(void)
if (!ASSERT_OK_PTR(link, "bpf_program__attach_usdt"))
goto cleanup;
- check(skel, link, usdt_test, addr, 2);
+ tramp = check(skel, link, usdt_test, addr, 2);
+
+ /* reattach and detach without triggering optimization */
+ link = bpf_program__attach_usdt(skel->progs.test_usdt,
+ -1 /* all PIDs */, "/proc/self/exe",
+ "optimized_uprobe", "usdt", NULL);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_usdt"))
+ goto cleanup;
+
+ bpf_link__destroy(link);
+ if (!check_detach(addr, tramp))
+ goto cleanup;
+
+ usdt_test();
+ ASSERT_EQ(skel->bss->executed, 2, "executed_no_probe");
+
+ /* reattach with triggering optimization */
+ link = bpf_program__attach_usdt(skel->progs.test_usdt,
+ -1 /* all PIDs */, "/proc/self/exe",
+ "optimized_uprobe", "usdt", NULL);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_usdt"))
+ goto cleanup;
+
+ check(skel, link, usdt_test, addr, 4);
cleanup:
uprobe_syscall_executed__destroy(skel);
--
2.53.0
^ permalink raw reply related [flat|nested] 18+ messages in thread* [PATCHv2 10/11] selftests/bpf: Add tests for uprobe nop10 red zone clobbering
2026-05-18 10:59 [PATCHv2 00/11] uprobes/x86: Fix red zone issue for optimized uprobes Jiri Olsa
` (8 preceding siblings ...)
2026-05-18 10:59 ` [PATCHv2 09/11] selftests/bpf: Add reattach tests for uprobe syscall Jiri Olsa
@ 2026-05-18 10:59 ` Jiri Olsa
2026-05-18 10:59 ` [PATCHv2 11/11] selftests/bpf: Add tests for forked/cloned optimized uprobes Jiri Olsa
10 siblings, 0 replies; 18+ messages in thread
From: Jiri Olsa @ 2026-05-18 10:59 UTC (permalink / raw)
To: Oleg Nesterov, Peter Zijlstra, Ingo Molnar, Masami Hiramatsu,
Andrii Nakryiko
Cc: bpf, linux-trace-kernel
From: Andrii Nakryiko <andrii@kernel.org>
The uprobe nop5 optimization used to replace a 5-byte NOP with a 5-byte
CALL to a trampoline. The CALL pushes a return address onto the stack at
[rsp-8], clobbering whatever was stored there.
On x86-64, the red zone is the 128 bytes below rsp that user code may use
for temporary storage without adjusting rsp. Compilers can place USDT
argument operands there, generating specs like "8@-8(%rbp)" when rbp ==
rsp. With the CALL-based optimization, the return address overwrites that
argument before the BPF-side USDT argument fetch runs.
Add two tests for this case. The uprobe_syscall subtest stores known values
at -8(%rsp), -16(%rsp), and -24(%rsp), executes an optimized nop10 uprobe,
and verifies the red-zone data is still intact. The USDT subtest triggers a
probe in a function where the compiler places three USDT operands in the
red zone and verifies that all 10 optimized invocations deliver the expected
argument values to BPF.
On an unfixed kernel, the first hit goes through the INT3 path and later
hits use the optimized CALL path, so the red-zone checks fail after
optimization.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
[ updates to use nop10 ]
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
.../selftests/bpf/prog_tests/uprobe_syscall.c | 75 +++++++++++++++++++
tools/testing/selftests/bpf/prog_tests/usdt.c | 49 ++++++++++++
tools/testing/selftests/bpf/progs/test_usdt.c | 25 +++++++
tools/testing/selftests/bpf/usdt_2.c | 13 ++++
4 files changed, 162 insertions(+)
diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c
index e88b316a3f2c..739b0be13aa3 100644
--- a/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c
+++ b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c
@@ -357,6 +357,48 @@ __nocf_check __weak void usdt_test(void)
USDT(optimized_uprobe, usdt);
}
+/*
+ * Assembly-level red zone clobbering test. Stores known values in the
+ * red zone (below RSP), executes a nop10 (uprobe site), and checks that
+ * the values survived. Returns 0 if intact, 1 if clobbered.
+ *
+ * The nop5 optimization used CALL (which pushes a return address to
+ * [rsp-8]), the value at -8(%rsp) was overwritten. The nop10 optimization
+ * should escape that by moving stackpointer below the redzone before
+ * doing the CALL.
+ */
+__attribute__((aligned(16)))
+__nocf_check __weak __naked unsigned long uprobe_red_zone_test(void)
+{
+ asm volatile (
+ "movabs $0x1111111111111111, %%rax\n"
+ "movq %%rax, -8(%%rsp)\n"
+ "movabs $0x2222222222222222, %%rax\n"
+ "movq %%rax, -16(%%rsp)\n"
+ "movabs $0x3333333333333333, %%rax\n"
+ "movq %%rax, -24(%%rsp)\n"
+
+ ".byte 0x66, 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00\n" /* nop10: uprobe site */
+
+ "movabs $0x1111111111111111, %%rax\n"
+ "cmpq %%rax, -8(%%rsp)\n"
+ "jne 1f\n"
+ "movabs $0x2222222222222222, %%rax\n"
+ "cmpq %%rax, -16(%%rsp)\n"
+ "jne 1f\n"
+ "movabs $0x3333333333333333, %%rax\n"
+ "cmpq %%rax, -24(%%rsp)\n"
+ "jne 1f\n"
+
+ "xorl %%eax, %%eax\n"
+ "retq\n"
+ "1:\n"
+ "movl $1, %%eax\n"
+ "retq\n"
+ ::: "rax", "memory"
+ );
+}
+
static int find_uprobes_trampoline(void *tramp_addr)
{
void *start, *end;
@@ -855,6 +897,37 @@ static void test_uprobe_race(void)
#define __NR_uprobe 336
#endif
+static void test_uprobe_red_zone(void)
+{
+ struct uprobe_syscall_executed *skel;
+ struct bpf_link *link;
+ void *nop10_addr;
+ size_t offset;
+ int i;
+
+ nop10_addr = find_nop10(uprobe_red_zone_test);
+ if (!ASSERT_NEQ(nop10_addr, NULL, "find_nop10"))
+ return;
+
+ skel = uprobe_syscall_executed__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ offset = get_uprobe_offset(nop10_addr);
+ link = bpf_program__attach_uprobe_opts(skel->progs.test_uprobe,
+ 0, "/proc/self/exe", offset, NULL);
+ if (!ASSERT_OK_PTR(link, "attach_uprobe"))
+ goto cleanup;
+
+ for (i = 0; i < 10; i++)
+ ASSERT_EQ(uprobe_red_zone_test(), 0, "red_zone_intact");
+
+ bpf_link__destroy(link);
+
+cleanup:
+ uprobe_syscall_executed__destroy(skel);
+}
+
static void test_uprobe_error(void)
{
long err = syscall(__NR_uprobe);
@@ -881,6 +954,8 @@ static void __test_uprobe_syscall(void)
test_uprobe_usdt();
if (test__start_subtest("uprobe_race"))
test_uprobe_race();
+ if (test__start_subtest("uprobe_red_zone"))
+ test_uprobe_red_zone();
if (test__start_subtest("uprobe_error"))
test_uprobe_error();
if (test__start_subtest("uprobe_regs_equal"))
diff --git a/tools/testing/selftests/bpf/prog_tests/usdt.c b/tools/testing/selftests/bpf/prog_tests/usdt.c
index a160d7c4fa0d..8954f543d68e 100644
--- a/tools/testing/selftests/bpf/prog_tests/usdt.c
+++ b/tools/testing/selftests/bpf/prog_tests/usdt.c
@@ -250,6 +250,7 @@ static void subtest_basic_usdt(bool optimized)
#ifdef __x86_64__
extern void usdt_1(void);
extern void usdt_2(void);
+extern void usdt_red_zone_trigger(void);
static unsigned char nop1[1] = { 0x90 };
static unsigned char nop1_nop10_combo[11] = { 0x90, 0x66, 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 };
@@ -340,6 +341,52 @@ static void subtest_optimized_attach(void)
cleanup:
test_usdt__destroy(skel);
}
+
+/*
+ * Test that USDT arguments survive nop10 optimization in a function where
+ * the compiler places operands in the red zone.
+ *
+ * Signal handlers are prone to having the compiler place USDT argument
+ * operands in the red zone (below rsp).
+ *
+ * The nop5 optimization used CALL (which pushes a return address to
+ * [rsp-8]), the value at -8(%rsp) was overwritten. The nop10 optimization
+ * should escape that by moving stackpointer below the redzone before
+ * doing the CALL.
+ */
+static void subtest_optimized_red_zone(void)
+{
+ struct test_usdt *skel;
+ int i;
+
+ skel = test_usdt__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ skel->bss->expected_arg[0] = 0xDEADBEEF;
+ skel->bss->expected_arg[1] = 0xCAFEBABE;
+ skel->bss->expected_arg[2] = 0xFEEDFACE;
+ skel->bss->expected_pid = getpid();
+
+ skel->links.usdt_check_arg = bpf_program__attach_usdt(
+ skel->progs.usdt_check_arg, 0, "/proc/self/exe",
+ "optimized_attach", "usdt_red_zone", NULL);
+ if (!ASSERT_OK_PTR(skel->links.usdt_check_arg, "attach_usdt_red_zone"))
+ goto cleanup;
+
+ for (i = 0; i < 10; i++)
+ usdt_red_zone_trigger();
+
+ ASSERT_EQ(skel->bss->arg_total, 10, "arg_total");
+ ASSERT_EQ(skel->bss->arg_bad, 0, "arg_bad");
+ ASSERT_EQ(skel->bss->arg_last[0], 0xDEADBEEF, "arg_last_1");
+ ASSERT_EQ(skel->bss->arg_last[1], 0xCAFEBABE, "arg_last_2");
+ ASSERT_EQ(skel->bss->arg_last[2], 0xFEEDFACE, "arg_last_3");
+
+cleanup:
+ test_usdt__destroy(skel);
+}
+
#endif
unsigned short test_usdt_100_semaphore SEC(".probes");
@@ -613,6 +660,8 @@ void test_usdt(void)
subtest_basic_usdt(true);
if (test__start_subtest("optimized_attach"))
subtest_optimized_attach();
+ if (test__start_subtest("optimized_red_zone"))
+ subtest_optimized_red_zone();
#endif
if (test__start_subtest("multispec"))
subtest_multispec_usdt();
diff --git a/tools/testing/selftests/bpf/progs/test_usdt.c b/tools/testing/selftests/bpf/progs/test_usdt.c
index f00cb52874e0..0ee78fb050a1 100644
--- a/tools/testing/selftests/bpf/progs/test_usdt.c
+++ b/tools/testing/selftests/bpf/progs/test_usdt.c
@@ -149,5 +149,30 @@ int usdt_executed(struct pt_regs *ctx)
executed++;
return 0;
}
+
+int arg_total;
+int arg_bad;
+long arg_last[3];
+long expected_arg[3];
+int expected_pid;
+
+SEC("usdt")
+int BPF_USDT(usdt_check_arg, long arg1, long arg2, long arg3)
+{
+ if (expected_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ __sync_fetch_and_add(&arg_total, 1);
+ arg_last[0] = arg1;
+ arg_last[1] = arg2;
+ arg_last[2] = arg3;
+
+ if (arg1 != expected_arg[0] ||
+ arg2 != expected_arg[1] ||
+ arg3 != expected_arg[2])
+ __sync_fetch_and_add(&arg_bad, 1);
+
+ return 0;
+}
#endif
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/usdt_2.c b/tools/testing/selftests/bpf/usdt_2.c
index b359b389f6c0..5e38f8605b02 100644
--- a/tools/testing/selftests/bpf/usdt_2.c
+++ b/tools/testing/selftests/bpf/usdt_2.c
@@ -13,4 +13,17 @@ void usdt_2(void)
USDT(optimized_attach, usdt_2);
}
+static volatile unsigned long usdt_red_zone_arg1 = 0xDEADBEEF;
+static volatile unsigned long usdt_red_zone_arg2 = 0xCAFEBABE;
+static volatile unsigned long usdt_red_zone_arg3 = 0xFEEDFACE;
+
+void __attribute__((noinline)) usdt_red_zone_trigger(void)
+{
+ unsigned long a1 = usdt_red_zone_arg1;
+ unsigned long a2 = usdt_red_zone_arg2;
+ unsigned long a3 = usdt_red_zone_arg3;
+
+ USDT(optimized_attach, usdt_red_zone, a1, a2, a3);
+}
+
#endif
--
2.53.0
^ permalink raw reply related [flat|nested] 18+ messages in thread* [PATCHv2 11/11] selftests/bpf: Add tests for forked/cloned optimized uprobes
2026-05-18 10:59 [PATCHv2 00/11] uprobes/x86: Fix red zone issue for optimized uprobes Jiri Olsa
` (9 preceding siblings ...)
2026-05-18 10:59 ` [PATCHv2 10/11] selftests/bpf: Add tests for uprobe nop10 red zone clobbering Jiri Olsa
@ 2026-05-18 10:59 ` Jiri Olsa
10 siblings, 0 replies; 18+ messages in thread
From: Jiri Olsa @ 2026-05-18 10:59 UTC (permalink / raw)
To: Oleg Nesterov, Peter Zijlstra, Ingo Molnar, Masami Hiramatsu,
Andrii Nakryiko
Cc: bpf, linux-trace-kernel
Adding tests for forked/cloned optimized uprobes and make
sure the child can properly execute optimized probe for
both fork (dups mm) and clone with CLONE_VM.
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
.../selftests/bpf/prog_tests/uprobe_syscall.c | 88 +++++++++++++++++++
1 file changed, 88 insertions(+)
diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c
index 739b0be13aa3..713b42d4f190 100644
--- a/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c
+++ b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c
@@ -4,6 +4,8 @@
#ifdef __x86_64__
+#define _GNU_SOURCE
+#include <sched.h>
#include <unistd.h>
#include <asm/ptrace.h>
#include <linux/compiler.h>
@@ -936,6 +938,88 @@ static void test_uprobe_error(void)
ASSERT_EQ(errno, EPROTO, "errno");
}
+__attribute__((aligned(16)))
+__nocf_check __weak __naked void uprobe_fork_test(void)
+{
+ asm volatile (
+ ".byte 0x66, 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00\n" /* nop10 */
+ "ret\n"
+ );
+}
+
+static int child_func(void *arg)
+{
+ struct uprobe_syscall_executed *skel = arg;
+
+ /* Make sure the child's probe is still there and optimized.. */
+ if (memcmp(uprobe_fork_test, lea_rsp, sizeof(lea_rsp)))
+ _exit(1);
+
+ skel->bss->pid = getpid();
+
+ /* .. and it executes properly. */
+ uprobe_fork_test();
+
+ if (skel->bss->executed != 3)
+ _exit(2);
+
+ _exit(0);
+}
+
+static void test_uprobe_fork_optimized(bool clone_vm)
+{
+ struct uprobe_syscall_executed *skel = NULL;
+ struct bpf_link *link = NULL;
+ unsigned long offset;
+ int pid, status, err;
+ char stack[65535];
+
+ offset = get_uprobe_offset(&uprobe_fork_test);
+ if (!ASSERT_GE(offset, 0, "get_uprobe_offset"))
+ return;
+
+ skel = uprobe_syscall_executed__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ goto cleanup;
+
+ link = bpf_program__attach_uprobe_opts(skel->progs.test_uprobe,
+ -1, "/proc/self/exe", offset, NULL);
+ if (!ASSERT_OK_PTR(link, "attach_uprobe"))
+ goto cleanup;
+
+ skel->bss->pid = getpid();
+
+ /* Trigger optimization of uprobe in uprobe_fork_test. */
+ uprobe_fork_test();
+ uprobe_fork_test();
+
+ /* Make sure it got optimied. */
+ if (!ASSERT_OK(memcmp(uprobe_fork_test, lea_rsp, sizeof(lea_rsp)), "optimized"))
+ goto cleanup;
+
+ if (clone_vm) {
+ pid = clone(child_func, stack + sizeof(stack), CLONE_VM|SIGCHLD, skel);
+ if (!ASSERT_GT(pid, 0, "clone"))
+ goto cleanup;
+ } else {
+ pid = fork();
+ if (!ASSERT_GE(pid, 0, "fork"))
+ goto cleanup;
+ if (pid == 0)
+ child_func(skel);
+ }
+
+ /* Wait for the child and verify it exited properly with 0. */
+ err = waitpid(pid, &status, 0);
+ if (ASSERT_EQ(err, pid, "waitpid")) {
+ ASSERT_EQ(WIFEXITED(status), 1, "child_exited");
+ ASSERT_EQ(WEXITSTATUS(status), 0, "child_exit_code");
+ }
+
+cleanup:
+ uprobe_syscall_executed__destroy(skel);
+}
+
static void __test_uprobe_syscall(void)
{
if (test__start_subtest("uretprobe_regs_equal"))
@@ -956,6 +1040,10 @@ static void __test_uprobe_syscall(void)
test_uprobe_race();
if (test__start_subtest("uprobe_red_zone"))
test_uprobe_red_zone();
+ if (test__start_subtest("uprobe_optimized_fork"))
+ test_uprobe_fork_optimized(false);
+ if (test__start_subtest("uprobe_optimized_clone_vm"))
+ test_uprobe_fork_optimized(true);
if (test__start_subtest("uprobe_error"))
test_uprobe_error();
if (test__start_subtest("uprobe_regs_equal"))
--
2.53.0
^ permalink raw reply related [flat|nested] 18+ messages in thread