* [PATCH v2 1/5] ptrace: PTRACE_SET_SYSCALL_INFO syscall skipping support
2026-07-03 10:50 [PATCH v2 0/5] ptrace_set_syscall_info: add support for seccomp syscall skipping and instruction pointer modification Renzo Davoli
@ 2026-07-03 10:50 ` Renzo Davoli
2026-07-03 10:58 ` Oleg Nesterov
2026-07-03 11:48 ` Oleg Nesterov
2026-07-03 10:50 ` [PATCH v2 2/5] selftests/ptrace: add a test case for PTRACE_SYSCALL_INFO syscall skipping Renzo Davoli
` (3 subsequent siblings)
4 siblings, 2 replies; 11+ messages in thread
From: Renzo Davoli @ 2026-07-03 10:50 UTC (permalink / raw)
To: linux-kernel
Cc: Renzo Davoli, Andrew Morton, Oleg Nesterov, Shuah Khan,
Alexey Gladkov, Eugene Syromyatnikov, Davide Berardi,
strace-devel, Dmitry V . Levin
This patch extends PTRACE_SET_SYSCALL_INFO with support for skipping a system
call triggered via seccomp.
When the tracer retrieves a ptrace_syscall_info structure with
op == PTRACE_SYSCALL_INFO_SECCOMP, it may choose to skip the system
call by changing op to PTRACE_SYSCALL_INFO_EXIT and
populating the exit union fields (rval and is_error) to define
the return value and error status for the tracee.
Signed-off-by: Renzo Davoli <renzo@cs.unibo.it>
---
kernel/ptrace.c | 23 ++++++++++++++++++-----
1 file changed, 18 insertions(+), 5 deletions(-)
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index d041645d9d17..a056f58fa82a 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -1099,7 +1099,7 @@ ptrace_set_syscall_info_seccomp(struct task_struct *child, struct pt_regs *regs,
static int
ptrace_set_syscall_info_exit(struct task_struct *child, struct pt_regs *regs,
- struct ptrace_syscall_info *info)
+ struct ptrace_syscall_info *info, bool skip_syscall)
{
long rval = info->exit.rval;
@@ -1116,6 +1116,9 @@ ptrace_set_syscall_info_exit(struct task_struct *child, struct pt_regs *regs,
else
syscall_set_return_value(child, regs, 0, rval);
+ if (skip_syscall)
+ syscall_set_nr(child, regs, -1);
+
return 0;
}
@@ -1125,6 +1128,7 @@ ptrace_set_syscall_info(struct task_struct *child, unsigned long user_size,
{
struct pt_regs *regs = task_pt_regs(child);
struct ptrace_syscall_info info;
+ bool skip_syscall;
if (user_size < sizeof(info))
return -EINVAL;
@@ -1141,15 +1145,24 @@ ptrace_set_syscall_info(struct task_struct *child, unsigned long user_size,
if (info.flags || info.reserved)
return -EINVAL;
- /* Changing the type of the system call stop is not supported yet. */
- if (ptrace_get_syscall_info_op(child) != info.op)
- return -EINVAL;
+ /*
+ * Changing the type of the system call stop is
+ * not allowed, with the following exception:
+ * PTRACE_SYSCALL_INFO_SECCOMP can be changed to
+ * PTRACE_SYSCALL_INFO_EXIT to skip the system call
+ */
+
+ if (ptrace_get_syscall_info_op(child) != info.op) {
+ if (info.op != PTRACE_SYSCALL_INFO_EXIT)
+ return -EINVAL;
+ skip_syscall = true;
+ }
switch (info.op) {
case PTRACE_SYSCALL_INFO_ENTRY:
return ptrace_set_syscall_info_entry(child, regs, &info);
case PTRACE_SYSCALL_INFO_EXIT:
- return ptrace_set_syscall_info_exit(child, regs, &info);
+ return ptrace_set_syscall_info_exit(child, regs, &info, skip_syscall);
case PTRACE_SYSCALL_INFO_SECCOMP:
return ptrace_set_syscall_info_seccomp(child, regs, &info);
default:
--
2.53.0
^ permalink raw reply related [flat|nested] 11+ messages in thread* Re: [PATCH v2 1/5] ptrace: PTRACE_SET_SYSCALL_INFO syscall skipping support
2026-07-03 10:50 ` [PATCH v2 1/5] ptrace: PTRACE_SET_SYSCALL_INFO syscall skipping support Renzo Davoli
@ 2026-07-03 10:58 ` Oleg Nesterov
2026-07-03 11:48 ` Oleg Nesterov
1 sibling, 0 replies; 11+ messages in thread
From: Oleg Nesterov @ 2026-07-03 10:58 UTC (permalink / raw)
To: Renzo Davoli
Cc: linux-kernel, Andrew Morton, Shuah Khan, Alexey Gladkov,
Eugene Syromyatnikov, Davide Berardi, strace-devel,
Dmitry V . Levin
On 07/03, Renzo Davoli wrote:
>
> This patch extends PTRACE_SET_SYSCALL_INFO with support for skipping a system
> call triggered via seccomp.
>
> When the tracer retrieves a ptrace_syscall_info structure with
> op == PTRACE_SYSCALL_INFO_SECCOMP,
... or PTRACE_SYSCALL_INFO_ENTRY
> - /* Changing the type of the system call stop is not supported yet. */
> - if (ptrace_get_syscall_info_op(child) != info.op)
> - return -EINVAL;
> + /*
> + * Changing the type of the system call stop is
> + * not allowed, with the following exception:
> + * PTRACE_SYSCALL_INFO_SECCOMP can be changed to
> + * PTRACE_SYSCALL_INFO_EXIT to skip the system call
> + */
ditto
Oleg.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v2 1/5] ptrace: PTRACE_SET_SYSCALL_INFO syscall skipping support
2026-07-03 10:50 ` [PATCH v2 1/5] ptrace: PTRACE_SET_SYSCALL_INFO syscall skipping support Renzo Davoli
2026-07-03 10:58 ` Oleg Nesterov
@ 2026-07-03 11:48 ` Oleg Nesterov
1 sibling, 0 replies; 11+ messages in thread
From: Oleg Nesterov @ 2026-07-03 11:48 UTC (permalink / raw)
To: Renzo Davoli
Cc: linux-kernel, Andrew Morton, Shuah Khan, Alexey Gladkov,
Eugene Syromyatnikov, Davide Berardi, strace-devel,
Dmitry V . Levin
On 07/03, Renzo Davoli wrote:
>
> @@ -1125,6 +1128,7 @@ ptrace_set_syscall_info(struct task_struct *child, unsigned long user_size,
> {
> struct pt_regs *regs = task_pt_regs(child);
> struct ptrace_syscall_info info;
> + bool skip_syscall;
bool skip_syscall = false;
otherwise not initialized when ptrace_get_syscall_info_op() == info.op
Oleg.
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH v2 2/5] selftests/ptrace: add a test case for PTRACE_SYSCALL_INFO syscall skipping
2026-07-03 10:50 [PATCH v2 0/5] ptrace_set_syscall_info: add support for seccomp syscall skipping and instruction pointer modification Renzo Davoli
2026-07-03 10:50 ` [PATCH v2 1/5] ptrace: PTRACE_SET_SYSCALL_INFO syscall skipping support Renzo Davoli
@ 2026-07-03 10:50 ` Renzo Davoli
2026-07-03 10:50 ` [PATCH v2 3/5] asm/ptrace.h: add instruction_pointer_set Renzo Davoli
` (2 subsequent siblings)
4 siblings, 0 replies; 11+ messages in thread
From: Renzo Davoli @ 2026-07-03 10:50 UTC (permalink / raw)
To: linux-kernel
Cc: Renzo Davoli, Andrew Morton, Oleg Nesterov, Shuah Khan,
Alexey Gladkov, Eugene Syromyatnikov, Davide Berardi,
strace-devel, Dmitry V . Levin
Check whether PTRACE_SYSCALL_INFO syscall skiping semantics implemented in the
kernel matches userspace expectations.
Signed-off-by: Renzo Davoli <renzo@cs.unibo.it>
---
.../selftests/ptrace/set_syscall_info.c | 176 +++++++++++++++++-
1 file changed, 175 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/ptrace/set_syscall_info.c b/tools/testing/selftests/ptrace/set_syscall_info.c
index 1cc411a41cd6..bcc867b627cd 100644
--- a/tools/testing/selftests/ptrace/set_syscall_info.c
+++ b/tools/testing/selftests/ptrace/set_syscall_info.c
@@ -11,9 +11,16 @@
#include <err.h>
#include <fcntl.h>
#include <signal.h>
+#include <stdlib.h>
+#include <stddef.h>
#include <asm/unistd.h>
+#include <sys/prctl.h>
#include <linux/types.h>
#include <linux/ptrace.h>
+#include <linux/filter.h>
+#include <linux/seccomp.h>
+#include <linux/prctl.h>
+
#if defined(_MIPS_SIM) && _MIPS_SIM == _MIPS_SIM_NABI32
/*
@@ -36,6 +43,7 @@ struct si_exit {
static unsigned int ptrace_stop;
static pid_t tracee_pid;
+static pid_t tracer_pid;
static int
kill_tracee(pid_t pid)
@@ -64,6 +72,25 @@ sys_ptrace(int request, pid_t pid, unsigned long addr, unsigned long data)
ptrace_stop, ##__VA_ARGS__); \
} while (0)
+static int sys_seccomp(unsigned int operation, unsigned int flags, void *args)
+{
+ return syscall(__NR_seccomp, operation, flags, args);
+}
+
+static struct sock_filter seccomp_filter[] = {
+ BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct seccomp_data, nr)),
+
+ BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_restart_syscall, 0, 1),
+ BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),
+
+ BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_TRACE),
+};
+
+static struct sock_fprog seccomp_prog = {
+ .filter = seccomp_filter,
+ .len = ARRAY_SIZE(seccomp_filter)
+};
+
static void
check_psi_entry(struct __test_metadata *_metadata,
const struct ptrace_syscall_info *info,
@@ -128,7 +155,6 @@ check_psi_exit(struct __test_metadata *_metadata,
TEST(set_syscall_info)
{
- const pid_t tracer_pid = getpid();
const kernel_ulong_t dummy[] = {
(kernel_ulong_t) 0xdad0bef0bad0fed0ULL,
(kernel_ulong_t) 0xdad1bef1bad1fed1ULL,
@@ -138,6 +164,7 @@ TEST(set_syscall_info)
(kernel_ulong_t) 0xdad5bef5bad5fed5ULL,
};
int splice_in[2], splice_out[2];
+ tracer_pid = getpid();
ASSERT_EQ(0, pipe(splice_in));
ASSERT_EQ(0, pipe(splice_out));
@@ -516,4 +543,151 @@ TEST(set_syscall_info)
ASSERT_EQ(ptrace_stop, ARRAY_SIZE(si) * 2);
}
+TEST(set_syscall_info_seccomp)
+{
+ tracer_pid = getpid();
+ tracee_pid = fork();
+
+ ASSERT_LE(0, tracee_pid) {
+ TH_LOG("fork: %m");
+ }
+
+ /* tracee */
+ if (tracee_pid == 0) {
+ tracee_pid = getpid();
+ ASSERT_EQ(0, sys_ptrace(PTRACE_TRACEME, 0, 0, 0)) {
+ TH_LOG("PTRACE_TRACEME: %m");
+ }
+ ASSERT_EQ(0, kill(tracee_pid, SIGSTOP)) {
+ /* cannot happen */
+ TH_LOG("kill SIGSTOP: %m");
+ }
+
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+ TH_LOG("prctl: %m");
+ _exit(1);
+ }
+ ASSERT_EQ(0, sys_seccomp(SECCOMP_SET_MODE_FILTER, 0,
+ (void *) &seccomp_prog)) {
+ TH_LOG("seccomp: %m");
+ _exit(1);
+ }
+
+ /* run getpid unmodified */
+ ASSERT_EQ(tracee_pid, getpid()) {
+ TH_LOG("getpid seccomp unchanged: %m");
+ _exit(1);
+ }
+
+ /* run getppid instead of getpid */
+ ASSERT_EQ(tracer_pid, getpid()) {
+ TH_LOG("getpid seccomp nr changes: %m");
+ _exit(1);
+ }
+
+ /* skip getpid and return 42 */
+ ASSERT_EQ(42, getpid()) {
+ TH_LOG("getpid skip set return value changes: %m");
+ _exit(1);
+ }
+ _exit(0);
+ }
+
+ int status;
+
+ /* tracer */
+ ASSERT_LE(0, waitpid(-1,&status,0)) {
+ LOG_KILL_TRACEE("waitpid: %m");
+ }
+
+ ASSERT_EQ(0, sys_ptrace(PTRACE_SETOPTIONS, tracee_pid, 0, PTRACE_O_TRACESECCOMP | PTRACE_O_TRACESYSGOOD))
+ LOG_KILL_TRACEE("PTRACE_SETOPTIONS: %m");
+
+ ASSERT_EQ(0, sys_ptrace(PTRACE_CONT, tracee_pid, 0, 0)) {
+ LOG_KILL_TRACEE("PTRACE_CONT: %m");
+ }
+
+ while (1) {
+ ASSERT_EQ(tracee_pid, wait(&status)) {
+ /* cannot happen */
+ LOG_KILL_TRACEE("wait: %m");
+ }
+ if (WIFEXITED(status)) {
+ tracee_pid = 0; /* the tracee is no more */
+ ASSERT_EQ(0, WEXITSTATUS(status)) {
+ LOG_KILL_TRACEE("unexpected exit status %u",
+ WEXITSTATUS(status));
+ }
+ break;
+ }
+ ASSERT_FALSE(WIFSIGNALED(status)) {
+ tracee_pid = 0; /* the tracee is no more */
+ LOG_KILL_TRACEE("unexpected signal %u",
+ WTERMSIG(status));
+ }
+ ASSERT_TRUE(WIFSTOPPED(status)) {
+ LOG_KILL_TRACEE("unexpected wait status %#x", status);
+ }
+
+ if (status >> 8 == (SIGTRAP | (PTRACE_EVENT_SECCOMP << 8))) {
+ struct ptrace_syscall_info info;
+ size_t info_size = sizeof(info);
+ ASSERT_LT(0, sys_ptrace(PTRACE_GET_SYSCALL_INFO, tracee_pid, info_size, (uintptr_t) &info)) {
+ LOG_KILL_TRACEE("PTRACE_GET_SYSCALL_INFO: %m");
+ };
+ ASSERT_EQ(PTRACE_SYSCALL_INFO_SECCOMP, info.op) {
+ LOG_KILL_TRACEE("entry op mismatch: %m");
+ }
+ ASSERT_TRUE(info.arch) {
+ LOG_KILL_TRACEE("entry arch mismatch: %m");
+ }
+ ASSERT_TRUE(info.instruction_pointer) {
+ LOG_KILL_TRACEE("entry instruction_pointer mismatch: %m");
+ }
+ ASSERT_TRUE(info.stack_pointer) {
+ LOG_KILL_TRACEE("entry stack_pointer mismatch: %m");
+ }
+
+ switch (ptrace_stop) {
+ case 0: ASSERT_EQ(__NR_getpid, info.seccomp.nr) {
+ LOG_KILL_TRACEE("step %d nr __NR_getpid mismatch: %m", ptrace_stop);
+ }
+ ptrace_stop++;
+ break;
+ case 1: ASSERT_EQ(__NR_getpid, info.seccomp.nr) {
+ LOG_KILL_TRACEE("step %d nr __NR_getpid mismatch: %m", ptrace_stop);
+ }
+ info.seccomp.nr = __NR_getppid;
+ ptrace_stop++;
+ break;
+ case 2: ASSERT_EQ(__NR_getpid, info.seccomp.nr) {
+ LOG_KILL_TRACEE("step %d nr __NR_getpid mismatch: %m", ptrace_stop);
+ }
+ info.op = PTRACE_SYSCALL_INFO_EXIT;
+ info.exit.rval = 42;
+ info.exit.is_error = 0;
+ ptrace_stop++;
+ break;
+ case 3: ASSERT_EQ(__NR_exit_group, info.seccomp.nr) {
+ LOG_KILL_TRACEE("step %d nr __NR_exit_group mismatch: %m", ptrace_stop);
+ }
+ break;
+ default:
+ LOG_KILL_TRACEE("unexpected system call: %m");
+ break;
+
+ }
+ ASSERT_EQ(0,sys_ptrace(PTRACE_SET_SYSCALL_INFO, tracee_pid, info_size, (uintptr_t) &info)) {
+ LOG_KILL_TRACEE("PTRACE_SET_SYSCALL_INFO: %m");
+ }
+
+ ASSERT_EQ(0,sys_ptrace(PTRACE_CONT, tracee_pid, 0, 0)) {
+ LOG_KILL_TRACEE("PTRACE_CONT: %m");
+ }
+ } else {
+ LOG_KILL_TRACEE("unexpected signal: %m");
+ }
+ }
+}
+
TEST_HARNESS_MAIN
--
2.53.0
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH v2 3/5] asm/ptrace.h: add instruction_pointer_set
2026-07-03 10:50 [PATCH v2 0/5] ptrace_set_syscall_info: add support for seccomp syscall skipping and instruction pointer modification Renzo Davoli
2026-07-03 10:50 ` [PATCH v2 1/5] ptrace: PTRACE_SET_SYSCALL_INFO syscall skipping support Renzo Davoli
2026-07-03 10:50 ` [PATCH v2 2/5] selftests/ptrace: add a test case for PTRACE_SYSCALL_INFO syscall skipping Renzo Davoli
@ 2026-07-03 10:50 ` Renzo Davoli
2026-07-03 10:50 ` [PATCH v2 4/5] ptrace: add PTRACE_SYSCALL_INFO_FLAG_SET_IP Renzo Davoli
2026-07-03 10:50 ` [PATCH v2 5/5] selftests/ptrace: add a test case for PTRACE_SYSCALL_INFO_FLAG_SET_IP Renzo Davoli
4 siblings, 0 replies; 11+ messages in thread
From: Renzo Davoli @ 2026-07-03 10:50 UTC (permalink / raw)
To: linux-kernel
Cc: Renzo Davoli, Andrew Morton, Oleg Nesterov, Shuah Khan,
Alexey Gladkov, Eugene Syromyatnikov, Davide Berardi,
strace-devel, Dmitry V . Levin
Add an instruction_pointer_set function for architectures that do
not currently provide one.
Signed-off-by: Renzo Davoli <renzo@cs.unibo.it>
---
arch/alpha/include/asm/ptrace.h | 6 ++++++
arch/hexagon/include/asm/ptrace.h | 6 ++++++
arch/m68k/include/asm/ptrace.h | 6 ++++++
arch/microblaze/include/asm/ptrace.h | 6 ++++++
arch/nios2/include/asm/ptrace.h | 6 ++++++
arch/um/include/asm/ptrace-generic.h | 6 ++++++
arch/xtensa/include/asm/ptrace.h | 6 ++++++
7 files changed, 42 insertions(+)
diff --git a/arch/alpha/include/asm/ptrace.h b/arch/alpha/include/asm/ptrace.h
index 3557ce64ed21..0821fe9a27c8 100644
--- a/arch/alpha/include/asm/ptrace.h
+++ b/arch/alpha/include/asm/ptrace.h
@@ -24,4 +24,10 @@ static inline unsigned long regs_return_value(struct pt_regs *regs)
return regs->r0;
}
+static inline void instruction_pointer_set(struct pt_regs *regs,
+ unsigned long val)
+{
+ instruction_pointer(regs) = val;
+}
+
#endif
diff --git a/arch/hexagon/include/asm/ptrace.h b/arch/hexagon/include/asm/ptrace.h
index ed35da1ee685..0a121f6e3bfc 100644
--- a/arch/hexagon/include/asm/ptrace.h
+++ b/arch/hexagon/include/asm/ptrace.h
@@ -18,6 +18,12 @@ extern const char *regs_query_register_name(unsigned int offset);
((struct pt_regs *) \
((unsigned long)current_thread_info() + THREAD_SIZE) - 1)
+static inline void instruction_pointer_set(struct pt_regs *regs,
+ unsigned long val)
+{
+ instruction_pointer(regs) = val;
+}
+
#if CONFIG_HEXAGON_ARCH_VERSION >= 4
#define arch_has_single_step() (1)
#endif
diff --git a/arch/m68k/include/asm/ptrace.h b/arch/m68k/include/asm/ptrace.h
index bc86ce012025..6e8a8f0daee8 100644
--- a/arch/m68k/include/asm/ptrace.h
+++ b/arch/m68k/include/asm/ptrace.h
@@ -18,6 +18,12 @@
(struct pt_regs *)((char *)current_thread_info() + THREAD_SIZE) - 1
#define current_user_stack_pointer() rdusp()
+static inline void instruction_pointer_set(struct pt_regs *regs,
+ unsigned long val)
+{
+ instruction_pointer(regs) = val;
+}
+
#define arch_has_single_step() (1)
#ifdef CONFIG_MMU
diff --git a/arch/microblaze/include/asm/ptrace.h b/arch/microblaze/include/asm/ptrace.h
index 17982292a64f..69e10658d7a9 100644
--- a/arch/microblaze/include/asm/ptrace.h
+++ b/arch/microblaze/include/asm/ptrace.h
@@ -20,5 +20,11 @@ static inline long regs_return_value(struct pt_regs *regs)
return regs->r3;
}
+static inline void instruction_pointer_set(struct pt_regs *regs,
+ unsigned long val)
+{
+ instruction_pointer(regs) = val;
+}
+
#endif /* __ASSEMBLER__ */
#endif /* _ASM_MICROBLAZE_PTRACE_H */
diff --git a/arch/nios2/include/asm/ptrace.h b/arch/nios2/include/asm/ptrace.h
index 96cbcd40c7ce..d120d8ecb187 100644
--- a/arch/nios2/include/asm/ptrace.h
+++ b/arch/nios2/include/asm/ptrace.h
@@ -70,6 +70,12 @@ struct switch_stack {
#define user_stack_pointer(regs) ((regs)->sp)
extern void show_regs(struct pt_regs *);
+static inline void instruction_pointer_set(struct pt_regs *regs,
+ unsigned long val)
+{
+ instruction_pointer(regs) = val;
+}
+
#define current_pt_regs() \
((struct pt_regs *)((unsigned long)current_thread_info() + THREAD_SIZE)\
- 1)
diff --git a/arch/um/include/asm/ptrace-generic.h b/arch/um/include/asm/ptrace-generic.h
index 86d74f9d33cf..44beb96862d8 100644
--- a/arch/um/include/asm/ptrace-generic.h
+++ b/arch/um/include/asm/ptrace-generic.h
@@ -29,6 +29,12 @@ struct pt_regs {
#define PTRACE_OLDSETOPTIONS 21
+static inline void instruction_pointer_set(struct pt_regs *regs,
+ unsigned long val)
+{
+ instruction_pointer(regs) = val;
+}
+
struct task_struct;
extern long subarch_ptrace(struct task_struct *child, long request,
diff --git a/arch/xtensa/include/asm/ptrace.h b/arch/xtensa/include/asm/ptrace.h
index d0568ff6d349..97b14418955e 100644
--- a/arch/xtensa/include/asm/ptrace.h
+++ b/arch/xtensa/include/asm/ptrace.h
@@ -103,6 +103,12 @@ static inline unsigned long regs_return_value(struct pt_regs *regs)
return regs->areg[2];
}
+static inline void instruction_pointer_set(struct pt_regs *regs,
+ unsigned long val)
+{
+ instruction_pointer(regs) = val;
+}
+
int do_syscall_trace_enter(struct pt_regs *regs);
void do_syscall_trace_leave(struct pt_regs *regs);
--
2.53.0
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH v2 4/5] ptrace: add PTRACE_SYSCALL_INFO_FLAG_SET_IP
2026-07-03 10:50 [PATCH v2 0/5] ptrace_set_syscall_info: add support for seccomp syscall skipping and instruction pointer modification Renzo Davoli
` (2 preceding siblings ...)
2026-07-03 10:50 ` [PATCH v2 3/5] asm/ptrace.h: add instruction_pointer_set Renzo Davoli
@ 2026-07-03 10:50 ` Renzo Davoli
2026-07-03 11:04 ` Oleg Nesterov
2026-07-03 10:50 ` [PATCH v2 5/5] selftests/ptrace: add a test case for PTRACE_SYSCALL_INFO_FLAG_SET_IP Renzo Davoli
4 siblings, 1 reply; 11+ messages in thread
From: Renzo Davoli @ 2026-07-03 10:50 UTC (permalink / raw)
To: linux-kernel
Cc: Renzo Davoli, Andrew Morton, Oleg Nesterov, Shuah Khan,
Alexey Gladkov, Eugene Syromyatnikov, Davide Berardi,
strace-devel, Dmitry V . Levin
This flag adds support for modifying the tracee's instruction pointer.
To do this, the tracer stores the new instruction pointer value in the
instruction_pointer field of the ptrace_syscall_info structure and
sets the PTRACE_SYSCALL_INFO_FLAG_SET_IP flag in the flags field.
This flag is introduced to avoid breaking existing code that uses
PTRACE_SET_SYSCALL_INFO and currently ignores the
instruction_pointer field.
Signed-off-by: Renzo Davoli <renzo@cs.unibo.it>
---
include/uapi/linux/ptrace.h | 4 ++++
kernel/ptrace.c | 25 ++++++++++++++++++++-----
2 files changed, 24 insertions(+), 5 deletions(-)
diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h
index 5f8ef6156752..6f62cb812875 100644
--- a/include/uapi/linux/ptrace.h
+++ b/include/uapi/linux/ptrace.h
@@ -80,6 +80,10 @@ struct seccomp_metadata {
#define PTRACE_SYSCALL_INFO_EXIT 2
#define PTRACE_SYSCALL_INFO_SECCOMP 3
+#define PTRACE_SYSCALL_INFO_FLAG_SET_IP (1 << 0)
+#define PTRACE_SYSCALL_INFO_FLAG_ALL \
+ (PTRACE_SYSCALL_INFO_FLAG_SET_IP)
+
struct ptrace_syscall_info {
__u8 op; /* PTRACE_SYSCALL_INFO_* */
__u8 reserved;
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index a056f58fa82a..41bedc82a45c 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -1129,6 +1129,7 @@ ptrace_set_syscall_info(struct task_struct *child, unsigned long user_size,
struct pt_regs *regs = task_pt_regs(child);
struct ptrace_syscall_info info;
bool skip_syscall;
+ int ret;
if (user_size < sizeof(info))
return -EINVAL;
@@ -1141,8 +1142,8 @@ ptrace_set_syscall_info(struct task_struct *child, unsigned long user_size,
if (copy_from_user(&info, datavp, sizeof(info)))
return -EFAULT;
- /* Reserved for future use. */
- if (info.flags || info.reserved)
+ /* Unused flags and fields reserved for future use. */
+ if ((info.flags & ~PTRACE_SYSCALL_INFO_FLAG_ALL) || info.reserved)
return -EINVAL;
/*
@@ -1160,15 +1161,29 @@ ptrace_set_syscall_info(struct task_struct *child, unsigned long user_size,
switch (info.op) {
case PTRACE_SYSCALL_INFO_ENTRY:
- return ptrace_set_syscall_info_entry(child, regs, &info);
+ ret = ptrace_set_syscall_info_entry(child, regs, &info);
+ break;
case PTRACE_SYSCALL_INFO_EXIT:
- return ptrace_set_syscall_info_exit(child, regs, &info, skip_syscall);
+ ret = ptrace_set_syscall_info_exit(child, regs, &info, skip_syscall);
+ break;
case PTRACE_SYSCALL_INFO_SECCOMP:
- return ptrace_set_syscall_info_seccomp(child, regs, &info);
+ ret = ptrace_set_syscall_info_seccomp(child, regs, &info);
+ break;
default:
/* Other types of system call stops are not supported yet. */
return -EINVAL;
}
+
+ if (ret== 0) {
+ if (info.flags & PTRACE_SYSCALL_INFO_FLAG_SET_IP) {
+ unsigned long ip = info.instruction_pointer;
+ if (ip != info.instruction_pointer)
+ return -ERANGE;
+ instruction_pointer_set(regs, ip);
+ }
+ }
+
+ return ret;
}
#endif /* CONFIG_HAVE_ARCH_TRACEHOOK */
--
2.53.0
^ permalink raw reply related [flat|nested] 11+ messages in thread* Re: [PATCH v2 4/5] ptrace: add PTRACE_SYSCALL_INFO_FLAG_SET_IP
2026-07-03 10:50 ` [PATCH v2 4/5] ptrace: add PTRACE_SYSCALL_INFO_FLAG_SET_IP Renzo Davoli
@ 2026-07-03 11:04 ` Oleg Nesterov
2026-07-03 15:01 ` Renzo Davoli
0 siblings, 1 reply; 11+ messages in thread
From: Oleg Nesterov @ 2026-07-03 11:04 UTC (permalink / raw)
To: Renzo Davoli
Cc: linux-kernel, Andrew Morton, Shuah Khan, Alexey Gladkov,
Eugene Syromyatnikov, Davide Berardi, strace-devel,
Dmitry V . Levin
On 07/03, Renzo Davoli wrote:
>
> This flag adds support for modifying the tracee's instruction pointer.
>
> To do this, the tracer stores the new instruction pointer value in the
> instruction_pointer field of the ptrace_syscall_info structure and
> sets the PTRACE_SYSCALL_INFO_FLAG_SET_IP flag in the flags field.
But why? Who will use this feature and for what? How often?
I think the changelog should be more convincing...
Oleg.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v2 4/5] ptrace: add PTRACE_SYSCALL_INFO_FLAG_SET_IP
2026-07-03 11:04 ` Oleg Nesterov
@ 2026-07-03 15:01 ` Renzo Davoli
2026-07-03 15:54 ` Oleg Nesterov
0 siblings, 1 reply; 11+ messages in thread
From: Renzo Davoli @ 2026-07-03 15:01 UTC (permalink / raw)
To: Oleg Nesterov
Cc: linux-kernel, Andrew Morton, Shuah Khan, Alexey Gladkov,
Eugene Syromyatnikov, Davide Berardi, strace-devel,
Dmitry V . Levin
On Fri, Jul 03, 2026 at 01:04:59PM +0200, Oleg Nesterov wrote:
> On 07/03, Renzo Davoli wrote:
> >
> > This flag adds support for modifying the tracee's instruction pointer.
> >
> > To do this, the tracer stores the new instruction pointer value in the
> > instruction_pointer field of the ptrace_syscall_info structure and
> > sets the PTRACE_SYSCALL_INFO_FLAG_SET_IP flag in the flags field.
>
> But why? Who will use this feature and for what? How often?
>
> I think the changelog should be more convincing...
I'll add this to V3 cover letter.
renzo
PTRACE_SYSCALL_INFO_FLAG_SET_IP
The proposal does not add any new ptrace capability. It merely provides a
portable interface for a capability that already exists and is already relied
upon by existing applications.
WHY
PTRACE_SYSCALL_INFO_FLAG_SET_IP completes the set of actions that a tracer can
request when intercepting a system call.
A tracer can currently instruct a tracee to:
* execute the original system call;
* execute a different system call (or the same system call with modified arguments);
* skip the system call and provide the desired return value and/or errno.
The proposed PTRACE_SYSCALL_INFO_FLAG_SET_IP adds a fourth possibility:
* execute an arbitrary sequence of two or more system calls in place of the original one.
The mechanism is straightforward. During a PTRACE_SYSCALL_INFO_EXIT stop, the
tracer rewinds the instruction pointer to the system call instruction (e.g. by
2 bytes on x86-64 for syscall, or by the appropriate amount on other
architectures). When the tracee resumes, it immediately generates a new
syscall-entry stop, allowing the tracer to provide a new system call number and
arguments. By repeating this process, a tracer can transparently replace a
single system call with any sequence of system calls.
This capability already exists on all architectures through
architecture-specific interfaces such as PTRACE_POKEUSER, PTRACE_SETREGS, or
PTRACE_SETREGSET. PTRACE_SYSCALL_INFO_FLAG_SET_IP does not introduce a new
capability; it merely exposes an existing one through the portable
PTRACE_GET_SYSCALL_INFO/PTRACE_SET_SYSCALL_INFO API.
WHO
The VUOS project uses this mechanism extensively.
VUOS provides namespace-like execution environments implemented entirely in
user space, without relying on kernel namespaces.
https://wiki.virtualsquare.org/#/tutorials/vuosbasics
For example, VUOS allows unprivileged processes to use user-space
implementations of filesystems (FUSE), networking stacks, virtual devices, and
other resources.
To improve scalability on multicore systems, VUOS implements what we call the
guardian angel model: each traced thread has its own dedicated tracer thread.
This avoids a single tracer becoming a bottleneck.
When a traced thread creates a child, ownership of the new tracee must be
transferred to a newly created guardian angel. This requires delaying execution
of the child's first system call until the new tracer has attached.
The current implementation proceeds as follows:
* save the original system call number and arguments;
* replace the system call with a blocking ppoll(NULL, 0, NULL, NULL) call;
* detach the original tracer;
* attach the new guardian angel using PTRACE_SEIZE;
* interrupt the blocking ppoll() with PTRACE_INTERRUPT;
* at the subsequent syscall-exit stop, rewind the instruction pointer to the system call instruction;
* at the following syscall-entry stop, restore the original system call number and arguments.
This mechanism is currently implemented using architecture-specific register
manipulation. PTRACE_SYSCALL_INFO_FLAG_SET_IP would allow the same
implementation to be written using the portable ptrace syscall information API.
Although VUOS is the primary motivation for this proposal, the feature is
generally useful for any project implementing ptrace-based system call
interposition, including PRoot, strace's syscall injection machinery, and
similar frameworks.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v2 4/5] ptrace: add PTRACE_SYSCALL_INFO_FLAG_SET_IP
2026-07-03 15:01 ` Renzo Davoli
@ 2026-07-03 15:54 ` Oleg Nesterov
0 siblings, 0 replies; 11+ messages in thread
From: Oleg Nesterov @ 2026-07-03 15:54 UTC (permalink / raw)
To: Renzo Davoli
Cc: linux-kernel, Andrew Morton, Shuah Khan, Alexey Gladkov,
Eugene Syromyatnikov, Davide Berardi, strace-devel,
Dmitry V . Levin
On 07/03, Renzo Davoli wrote:
>
> On Fri, Jul 03, 2026 at 01:04:59PM +0200, Oleg Nesterov wrote:
> > On 07/03, Renzo Davoli wrote:
> > >
> > > This flag adds support for modifying the tracee's instruction pointer.
> > >
> > > To do this, the tracer stores the new instruction pointer value in the
> > > instruction_pointer field of the ptrace_syscall_info structure and
> > > sets the PTRACE_SYSCALL_INFO_FLAG_SET_IP flag in the flags field.
> >
> > But why? Who will use this feature and for what? How often?
> >
> > I think the changelog should be more convincing...
>
> I'll add this to V3 cover letter.
[... snip ...]
OK, but I'd suggest to add this to the changelog of the patch which adds
PTRACE_SYSCALL_INFO_FLAG_SET_IP
And in fact, I'd I also suggest to send 3-5 as a separate series...
Oleg.
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH v2 5/5] selftests/ptrace: add a test case for PTRACE_SYSCALL_INFO_FLAG_SET_IP
2026-07-03 10:50 [PATCH v2 0/5] ptrace_set_syscall_info: add support for seccomp syscall skipping and instruction pointer modification Renzo Davoli
` (3 preceding siblings ...)
2026-07-03 10:50 ` [PATCH v2 4/5] ptrace: add PTRACE_SYSCALL_INFO_FLAG_SET_IP Renzo Davoli
@ 2026-07-03 10:50 ` Renzo Davoli
4 siblings, 0 replies; 11+ messages in thread
From: Renzo Davoli @ 2026-07-03 10:50 UTC (permalink / raw)
To: linux-kernel
Cc: Renzo Davoli, Andrew Morton, Oleg Nesterov, Shuah Khan,
Alexey Gladkov, Eugene Syromyatnikov, Davide Berardi,
strace-devel, Dmitry V . Levin
Check whether PTRACE_SYSCALL_INFO_FLAG_SET_IP semantics implemented in the
kernel matches userspace expectations.
Signed-off-by: Renzo Davoli <renzo@cs.unibo.it>
---
.../selftests/ptrace/set_syscall_info.c | 151 ++++++++++++++++++
1 file changed, 151 insertions(+)
diff --git a/tools/testing/selftests/ptrace/set_syscall_info.c b/tools/testing/selftests/ptrace/set_syscall_info.c
index bcc867b627cd..d87d89c2c0b2 100644
--- a/tools/testing/selftests/ptrace/set_syscall_info.c
+++ b/tools/testing/selftests/ptrace/set_syscall_info.c
@@ -91,6 +91,10 @@ static struct sock_fprog seccomp_prog = {
.len = ARRAY_SIZE(seccomp_filter)
};
+static char w1[] = {'A', '\n'};
+static char w2[] = {'B', '\n'};
+static char w3[] = {'C', '\n'};
+
static void
check_psi_entry(struct __test_metadata *_metadata,
const struct ptrace_syscall_info *info,
@@ -690,4 +694,151 @@ TEST(set_syscall_info_seccomp)
}
}
+TEST(set_syscall_info_setip)
+{
+ tracer_pid = getpid();
+ tracee_pid = fork();
+
+ ASSERT_LE(0, tracee_pid) {
+ TH_LOG("fork: %m");
+ }
+
+ /* tracee */
+ if (tracee_pid == 0) {
+ int rv;
+
+ tracee_pid = getpid();
+ ASSERT_EQ(0, sys_ptrace(PTRACE_TRACEME, 0, 0, 0)) {
+ TH_LOG("PTRACE_TRACEME: %m");
+ }
+ ASSERT_EQ(0, kill(tracee_pid, SIGSTOP)) {
+ /* cannot happen */
+ TH_LOG("kill SIGSTOP: %m");
+ }
+
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+ TH_LOG("prctl: %m");
+ _exit(1);
+ }
+ ASSERT_EQ(0, sys_seccomp(SECCOMP_SET_MODE_FILTER, 0,
+ (void *) &seccomp_prog)) {
+ TH_LOG("seccomp: %m");
+ _exit(1);
+ }
+
+presyscall:
+ /* this sysall will run twice
+ (the tracer steps back the instruction pointer) */
+ rv = write(1, w1, sizeof(w1));
+ ASSERT_EQ(2, rv) {
+ TH_LOG("getpid skip set return value changes: %m");
+ _exit(1);
+ }
+
+ /* run write unmodified */
+ ASSERT_EQ(2, write(1, w3, sizeof(w3))) {
+ TH_LOG("getpid skip set return value changes: %m");
+ _exit(1);
+ }
+ _exit(0);
+ }
+
+ int status;
+ void *doitagain = &&presyscall;
+
+ /* tracer */
+ ASSERT_LE(0, waitpid(-1,&status,0)) {
+ LOG_KILL_TRACEE("waitpid: %m");
+ }
+
+ ASSERT_EQ(0, sys_ptrace(PTRACE_SETOPTIONS, tracee_pid, 0, PTRACE_O_TRACESECCOMP | PTRACE_O_TRACESYSGOOD))
+ LOG_KILL_TRACEE("PTRACE_SETOPTIONS: %m");
+
+ ASSERT_EQ(0, sys_ptrace(PTRACE_CONT, tracee_pid, 0, 0)) {
+ LOG_KILL_TRACEE("PTRACE_CONT: %m");
+ }
+
+ while (1) {
+ ASSERT_EQ(tracee_pid, wait(&status)) {
+ /* cannot happen */
+ LOG_KILL_TRACEE("wait: %m");
+ }
+ if (WIFEXITED(status)) {
+ tracee_pid = 0; /* the tracee is no more */
+ ASSERT_EQ(0, WEXITSTATUS(status)) {
+ LOG_KILL_TRACEE("unexpected exit status %u",
+ WEXITSTATUS(status));
+ }
+ break;
+ }
+ ASSERT_FALSE(WIFSIGNALED(status)) {
+ tracee_pid = 0; /* the tracee is no more */
+ LOG_KILL_TRACEE("unexpected signal %u",
+ WTERMSIG(status));
+ }
+ ASSERT_TRUE(WIFSTOPPED(status)) {
+ LOG_KILL_TRACEE("unexpected wait status %#x", status);
+ }
+
+ if (status >> 8 == (SIGTRAP | (PTRACE_EVENT_SECCOMP << 8))) {
+ struct ptrace_syscall_info info;
+ size_t info_size = sizeof(info);
+ ASSERT_LT(0, sys_ptrace(PTRACE_GET_SYSCALL_INFO, tracee_pid, info_size, (uintptr_t) &info)) {
+ LOG_KILL_TRACEE("PTRACE_GET_SYSCALL_INFO: %m");
+ }
+ ASSERT_EQ(PTRACE_SYSCALL_INFO_SECCOMP, info.op) {
+ LOG_KILL_TRACEE("entry op mismatch: %m");
+ }
+ ASSERT_TRUE(info.arch) {
+ LOG_KILL_TRACEE("entry arch mismatch: %m");
+ }
+ ASSERT_TRUE(info.instruction_pointer) {
+ LOG_KILL_TRACEE("entry instruction_pointer mismatch: %m");
+ }
+ ASSERT_TRUE(info.stack_pointer) {
+ LOG_KILL_TRACEE("entry stack_pointer mismatch: %m");
+ }
+
+ switch (ptrace_stop) {
+ case 0: ASSERT_EQ(__NR_write, info.seccomp.nr) {
+ LOG_KILL_TRACEE("step %d nr __NR_write mismatch: %m", ptrace_stop);
+ }
+ info.instruction_pointer = (uintptr_t) doitagain;
+ info.flags = PTRACE_SYSCALL_INFO_FLAG_SET_IP;
+ ptrace_stop++;
+ break;
+ case 1:
+ info.seccomp.nr = __NR_write;
+ info.seccomp.args[0] = 1;
+ info.seccomp.args[1] = (uintptr_t) w2;
+ info.seccomp.args[2] = sizeof(w2);
+ ptrace_stop++;
+ break;
+ case 2: ASSERT_EQ(__NR_write, info.seccomp.nr) {
+ LOG_KILL_TRACEE("step %d nr __NR_write mismatch: %m", ptrace_stop);
+ }
+ ptrace_stop++;
+ break;
+ case 3: ASSERT_EQ(__NR_exit_group, info.seccomp.nr) {
+ LOG_KILL_TRACEE("step %d nr __NR_exit_group mismatch: %m", ptrace_stop);
+ }
+ break;
+ default:
+ LOG_KILL_TRACEE("unexpected system call: %m");
+ break;
+
+ }
+ ASSERT_EQ(0,sys_ptrace(PTRACE_SET_SYSCALL_INFO, tracee_pid, info_size, (uintptr_t) &info)) {
+ LOG_KILL_TRACEE("PTRACE_SET_SYSCALL_INFO: %m");
+ }
+
+ ASSERT_EQ(0,sys_ptrace(PTRACE_CONT, tracee_pid, 0, 0)) {
+ LOG_KILL_TRACEE("PTRACE_CONT: %m");
+ }
+ } else {
+ LOG_KILL_TRACEE("unexpected signal: %m");
+ }
+ }
+}
+
TEST_HARNESS_MAIN
--
2.53.0
^ permalink raw reply related [flat|nested] 11+ messages in thread