linux-trace-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jiri Olsa <jolsa@kernel.org>
To: Oleg Nesterov <oleg@redhat.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Andrii Nakryiko <andrii@kernel.org>
Cc: bpf@vger.kernel.org, Song Liu <songliubraving@fb.com>,
	Yonghong Song <yhs@fb.com>,
	John Fastabend <john.fastabend@gmail.com>,
	Hao Luo <haoluo@google.com>, Steven Rostedt <rostedt@goodmis.org>,
	Masami Hiramatsu <mhiramat@kernel.org>,
	Alan Maguire <alan.maguire@oracle.com>,
	linux-kernel@vger.kernel.org, linux-trace-kernel@vger.kernel.org
Subject: [PATCH bpf-next 06/13] uprobes/x86: Add uprobe syscall to speed up uprobe
Date: Wed, 11 Dec 2024 14:33:55 +0100	[thread overview]
Message-ID: <20241211133403.208920-7-jolsa@kernel.org> (raw)
In-Reply-To: <20241211133403.208920-1-jolsa@kernel.org>

Adding new uprobe syscall that calls uprobe handlers for given
'breakpoint' address.

The idea is that the 'breakpoint' address calls the user space
trampoline which executes the uprobe syscall.

The syscall handler reads the return address of the initial call
to retrieve the original 'breakpoint' address. With this address
we find the related uprobe object and call its consumers.

Adding the arch_uprobe_trampoline_mapping function that provides
uprobe trampoline mapping. This mapping is backed with one global
page initialized at __init time and shared by the all the mapping
instances.

We do not allow to execute uprobe syscall if the caller is not
from uprobe trampoline mapping.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
 arch/x86/entry/syscalls/syscall_64.tbl |  1 +
 arch/x86/kernel/uprobes.c              | 80 ++++++++++++++++++++++++++
 include/linux/syscalls.h               |  2 +
 include/linux/uprobes.h                |  1 +
 kernel/events/uprobes.c                | 22 +++++++
 kernel/sys_ni.c                        |  1 +
 6 files changed, 107 insertions(+)

diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 5eb708bff1c7..88e388c7675b 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -345,6 +345,7 @@
 333	common	io_pgetevents		sys_io_pgetevents
 334	common	rseq			sys_rseq
 335	common	uretprobe		sys_uretprobe
+336	common	uprobe			sys_uprobe
 # don't use numbers 387 through 423, add new calls after the last
 # 'common' entry
 424	common	pidfd_send_signal	sys_pidfd_send_signal
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 22a17c149a55..23e4f2821cff 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -425,6 +425,86 @@ SYSCALL_DEFINE0(uretprobe)
 	return -1;
 }
 
+static int tramp_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma)
+{
+	return -EPERM;
+}
+
+static struct vm_special_mapping tramp_mapping = {
+	.name   = "[uprobes-trampoline]",
+	.mremap = tramp_mremap,
+};
+
+SYSCALL_DEFINE0(uprobe)
+{
+	struct pt_regs *regs = task_pt_regs(current);
+	struct vm_area_struct *vma;
+	unsigned long bp_vaddr;
+	int err;
+
+	err = copy_from_user(&bp_vaddr, (void __user *)regs->sp + 3*8, sizeof(bp_vaddr));
+	if (err) {
+		force_sig(SIGILL);
+		return -1;
+	}
+
+	/* Allow execution only from uprobe trampolines. */
+	vma = vma_lookup(current->mm, regs->ip);
+	if (!vma || vma->vm_private_data != (void *) &tramp_mapping) {
+		force_sig(SIGILL);
+		return -1;
+	}
+
+	handle_syscall_uprobe(regs, bp_vaddr - 5);
+	return 0;
+}
+
+asm (
+	".pushsection .rodata\n"
+	".global uprobe_trampoline_entry\n"
+	"uprobe_trampoline_entry:\n"
+	"endbr64\n"
+	"push %rcx\n"
+	"push %r11\n"
+	"push %rax\n"
+	"movq $" __stringify(__NR_uprobe) ", %rax\n"
+	"syscall\n"
+	"pop %rax\n"
+	"pop %r11\n"
+	"pop %rcx\n"
+	"ret\n"
+	".global uprobe_trampoline_end\n"
+	"uprobe_trampoline_end:\n"
+	".popsection\n"
+);
+
+extern __visible u8 uprobe_trampoline_entry[];
+extern __visible u8 uprobe_trampoline_end[];
+
+const struct vm_special_mapping *arch_uprobe_trampoline_mapping(void)
+{
+	struct pt_regs *regs = task_pt_regs(current);
+
+	return user_64bit_mode(regs) ? &tramp_mapping : NULL;
+}
+
+static int __init arch_uprobes_init(void)
+{
+	unsigned long size = uprobe_trampoline_end - uprobe_trampoline_entry;
+	static struct page *pages[2];
+	struct page *page;
+
+	page = alloc_page(GFP_HIGHUSER);
+	if (!page)
+		return -ENOMEM;
+	pages[0] = page;
+	tramp_mapping.pages = (struct page **) &pages;
+	arch_uprobe_copy_ixol(page, 0, uprobe_trampoline_entry, size);
+	return 0;
+}
+
+late_initcall(arch_uprobes_init);
+
 /*
  * If arch_uprobe->insn doesn't use rip-relative addressing, return
  * immediately.  Otherwise, rewrite the instruction so that it accesses
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index c6333204d451..002f4e1debe5 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -994,6 +994,8 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int on);
 
 asmlinkage long sys_uretprobe(void);
 
+asmlinkage long sys_uprobe(void);
+
 /* pciconfig: alpha, arm, arm64, ia64, sparc */
 asmlinkage long sys_pciconfig_read(unsigned long bus, unsigned long dfn,
 				unsigned long off, unsigned long len,
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index c4ee755ca2a1..5e9a33bfb747 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -232,6 +232,7 @@ extern struct uprobe_trampoline *uprobe_trampoline_get(unsigned long vaddr);
 extern void uprobe_trampoline_put(struct uprobe_trampoline *area);
 extern bool arch_uprobe_is_callable(unsigned long vtramp, unsigned long vaddr);
 extern const struct vm_special_mapping *arch_uprobe_trampoline_mapping(void);
+extern void handle_syscall_uprobe(struct pt_regs *regs, unsigned long bp_vaddr);
 #else /* !CONFIG_UPROBES */
 struct uprobes_state {
 };
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index f57918c624da..52f38d1ef276 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -2729,6 +2729,28 @@ static void handle_swbp(struct pt_regs *regs)
 	rcu_read_unlock_trace();
 }
 
+void handle_syscall_uprobe(struct pt_regs *regs, unsigned long bp_vaddr)
+{
+	struct uprobe *uprobe;
+	int is_swbp;
+
+	rcu_read_lock_trace();
+	uprobe = find_active_uprobe_rcu(bp_vaddr, &is_swbp);
+	if (!uprobe)
+		goto unlock;
+
+	if (!get_utask())
+		goto unlock;
+
+	if (arch_uprobe_ignore(&uprobe->arch, regs))
+		goto unlock;
+
+	handler_chain(uprobe, regs);
+
+ unlock:
+	rcu_read_unlock_trace();
+}
+
 /*
  * Perform required fix-ups and disable singlestep.
  * Allow pending signals to take effect.
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index c00a86931f8c..bf5d05c635ff 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -392,3 +392,4 @@ COND_SYSCALL(setuid16);
 COND_SYSCALL(rseq);
 
 COND_SYSCALL(uretprobe);
+COND_SYSCALL(uprobe);
-- 
2.47.0


  parent reply	other threads:[~2024-12-11 13:35 UTC|newest]

Thread overview: 63+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-12-11 13:33 [PATCH bpf-next 00/13] uprobes: Add support to optimize usdt probes on x86_64 Jiri Olsa
2024-12-11 13:33 ` [PATCH bpf-next 01/13] uprobes: Rename arch_uretprobe_trampoline function Jiri Olsa
2024-12-13  0:42   ` Andrii Nakryiko
2024-12-11 13:33 ` [PATCH bpf-next 02/13] uprobes: Make copy_from_page global Jiri Olsa
2024-12-13  0:43   ` Andrii Nakryiko
2024-12-11 13:33 ` [PATCH bpf-next 03/13] uprobes: Add nbytes argument to uprobe_write_opcode Jiri Olsa
2024-12-13  0:45   ` Andrii Nakryiko
2024-12-11 13:33 ` [PATCH bpf-next 04/13] uprobes: Add arch_uprobe_verify_opcode function Jiri Olsa
2024-12-13  0:48   ` Andrii Nakryiko
2024-12-13 13:21     ` Jiri Olsa
2024-12-13 21:11       ` Andrii Nakryiko
2024-12-13 21:52         ` Jiri Olsa
2024-12-11 13:33 ` [PATCH bpf-next 05/13] uprobes: Add mapping for optimized uprobe trampolines Jiri Olsa
2024-12-13  1:01   ` Andrii Nakryiko
2024-12-13 13:42     ` Jiri Olsa
2024-12-13 21:58       ` Andrii Nakryiko
2024-12-11 13:33 ` Jiri Olsa [this message]
2024-12-13 13:48   ` [PATCH bpf-next 06/13] uprobes/x86: Add uprobe syscall to speed up uprobe Thomas Weißschuh
2024-12-13 14:51     ` Jiri Olsa
2024-12-13 15:12       ` Thomas Weißschuh
2024-12-13 21:52         ` Jiri Olsa
2024-12-14 13:21           ` Thomas Weißschuh
2024-12-16  8:03             ` Jiri Olsa
2024-12-11 13:33 ` [PATCH bpf-next 07/13] uprobes/x86: Add support to emulate nop5 instruction Jiri Olsa
2024-12-13 10:45   ` Peter Zijlstra
2024-12-13 13:02     ` Jiri Olsa
2024-12-11 13:33 ` [PATCH bpf-next 08/13] uprobes/x86: Add support to optimize uprobes Jiri Olsa
2024-12-13 10:49   ` Peter Zijlstra
2024-12-13 13:06     ` Jiri Olsa
2024-12-13 21:58   ` Andrii Nakryiko
2024-12-15 12:06   ` David Laight
2024-12-15 14:14     ` Oleg Nesterov
2024-12-16  8:08       ` Jiri Olsa
2024-12-16  9:18         ` David Laight
2024-12-16 10:12           ` Oleg Nesterov
2024-12-16 11:10             ` David Laight
2024-12-16 12:22               ` Oleg Nesterov
2024-12-16 12:50                 ` Jiri Olsa
2024-12-16 15:08                   ` David Laight
2024-12-16 16:06                     ` Jiri Olsa
2024-12-11 13:33 ` [PATCH bpf-next 09/13] selftests/bpf: Use 5-byte nop for x86 usdt probes Jiri Olsa
2024-12-13 21:58   ` Andrii Nakryiko
2024-12-16  8:32     ` Jiri Olsa
2024-12-16 23:06       ` Andrii Nakryiko
2024-12-11 13:33 ` [PATCH bpf-next 10/13] selftests/bpf: Add uprobe/usdt optimized test Jiri Olsa
2024-12-13 21:58   ` Andrii Nakryiko
2024-12-16  7:58     ` Jiri Olsa
2024-12-11 13:34 ` [PATCH bpf-next 11/13] selftests/bpf: Add hit/attach/detach race optimized uprobe test Jiri Olsa
2024-12-13 21:58   ` Andrii Nakryiko
2024-12-16  7:59     ` Jiri Olsa
2024-12-11 13:34 ` [PATCH bpf-next 12/13] selftests/bpf: Add uprobe syscall sigill signal test Jiri Olsa
2024-12-11 13:34 ` [PATCH bpf-next 13/13] selftests/bpf: Add 5-byte nop uprobe trigger bench Jiri Olsa
2024-12-13 21:57   ` Andrii Nakryiko
2024-12-16  7:56     ` Jiri Olsa
2024-12-13  0:43 ` [PATCH bpf-next 00/13] uprobes: Add support to optimize usdt probes on x86_64 Andrii Nakryiko
2024-12-13  9:46   ` Jiri Olsa
2024-12-13 10:51 ` Peter Zijlstra
2024-12-13 13:07   ` Jiri Olsa
2024-12-13 13:54     ` Peter Zijlstra
2024-12-13 14:05       ` Jiri Olsa
2024-12-13 18:39         ` Peter Zijlstra
2024-12-13 21:52           ` Jiri Olsa
2024-12-13 21:59             ` Andrii Nakryiko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20241211133403.208920-7-jolsa@kernel.org \
    --to=jolsa@kernel.org \
    --cc=alan.maguire@oracle.com \
    --cc=andrii@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=haoluo@google.com \
    --cc=john.fastabend@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-trace-kernel@vger.kernel.org \
    --cc=mhiramat@kernel.org \
    --cc=oleg@redhat.com \
    --cc=peterz@infradead.org \
    --cc=rostedt@goodmis.org \
    --cc=songliubraving@fb.com \
    --cc=yhs@fb.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).