From: Jiri Olsa <jolsa@kernel.org>
To: Oleg Nesterov <oleg@redhat.com>,
Peter Zijlstra <peterz@infradead.org>,
Andrii Nakryiko <andrii@kernel.org>
Cc: bpf@vger.kernel.org, Song Liu <songliubraving@fb.com>,
Yonghong Song <yhs@fb.com>,
John Fastabend <john.fastabend@gmail.com>,
Hao Luo <haoluo@google.com>, Steven Rostedt <rostedt@goodmis.org>,
Masami Hiramatsu <mhiramat@kernel.org>,
Alan Maguire <alan.maguire@oracle.com>,
linux-kernel@vger.kernel.org, linux-trace-kernel@vger.kernel.org
Subject: [RFC perf/core 05/11] uprobes: Add mapping for optimized uprobe trampolines
Date: Tue, 5 Nov 2024 14:33:59 +0100 [thread overview]
Message-ID: <20241105133405.2703607-6-jolsa@kernel.org> (raw)
In-Reply-To: <20241105133405.2703607-1-jolsa@kernel.org>
Adding interface to add special mapping for user space page that will be
used as place holder for uprobe trampoline in following changes.
The get_tramp_area(vaddr) function either finds 'callable' page or create
new one. The 'callable' means it's reachable by call instruction (from
vaddr argument) and is decided by each arch via new arch_uprobe_is_callable
function.
The put_tramp_area function either drops refcount or destroys the special
mapping and all the maps are clean up when the process goes down.
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
include/linux/uprobes.h | 12 ++++
kernel/events/uprobes.c | 141 ++++++++++++++++++++++++++++++++++++++++
kernel/fork.c | 2 +
3 files changed, 155 insertions(+)
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index be306028ed59..222d8e82cee2 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -172,6 +172,15 @@ struct xol_area;
struct uprobes_state {
struct xol_area *xol_area;
+ struct hlist_head tramp_head;
+ struct mutex tramp_mutex;
+};
+
+struct tramp_area {
+ unsigned long vaddr;
+ struct page *page;
+ struct hlist_node node;
+ refcount_t ref;
};
extern void __init uprobes_init(void);
@@ -219,6 +228,9 @@ extern int uprobe_verify_opcode(struct page *page, unsigned long vaddr, uprobe_o
extern int arch_uprobe_verify_opcode(struct page *page, unsigned long vaddr,
uprobe_opcode_t *new_opcode, void *data);
extern bool arch_uprobe_is_register(uprobe_opcode_t *insn, int len, void *data);
+struct tramp_area *get_tramp_area(unsigned long vaddr);
+void put_tramp_area(struct tramp_area *area);
+bool arch_uprobe_is_callable(unsigned long vtramp, unsigned long vaddr);
#else /* !CONFIG_UPROBES */
struct uprobes_state {
};
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 944d9df1f081..a44305c559a4 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -616,6 +616,145 @@ set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long v
(uprobe_opcode_t *)&auprobe->insn, UPROBE_SWBP_INSN_SIZE, NULL);
}
+bool __weak arch_uprobe_is_callable(unsigned long vtramp, unsigned long vaddr)
+{
+ return false;
+}
+
+static unsigned long find_nearest_page(unsigned long vaddr)
+{
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma, *prev;
+ VMA_ITERATOR(vmi, mm, 0);
+
+ prev = vma_next(&vmi);
+ vma = vma_next(&vmi);
+ while (vma) {
+ if (vma->vm_start - prev->vm_end >= PAGE_SIZE &&
+ arch_uprobe_is_callable(prev->vm_end, vaddr))
+ return prev->vm_end;
+
+ prev = vma;
+ vma = vma_next(&vmi);
+ }
+
+ return 0;
+}
+
+static vm_fault_t tramp_fault(const struct vm_special_mapping *sm,
+ struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct hlist_head *head = &vma->vm_mm->uprobes_state.tramp_head;
+ struct tramp_area *area;
+
+ hlist_for_each_entry(area, head, node) {
+ if (vma->vm_start == area->vaddr) {
+ vmf->page = area->page;
+ get_page(vmf->page);
+ return 0;
+ }
+ }
+
+ return -EINVAL;
+}
+
+static int tramp_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma)
+{
+ return -EPERM;
+}
+
+static const struct vm_special_mapping tramp_mapping = {
+ .name = "[uprobes-trampoline]",
+ .fault = tramp_fault,
+ .mremap = tramp_mremap,
+};
+
+static struct tramp_area *create_tramp_area(unsigned long vaddr)
+{
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ struct tramp_area *area;
+
+ vaddr = find_nearest_page(vaddr);
+ if (!vaddr)
+ return NULL;
+
+ area = kzalloc(sizeof(*area), GFP_KERNEL);
+ if (unlikely(!area))
+ return NULL;
+
+ area->page = alloc_page(GFP_HIGHUSER);
+ if (!area->page)
+ goto free_area;
+
+ refcount_set(&area->ref, 1);
+ area->vaddr = vaddr;
+
+ vma = _install_special_mapping(mm, area->vaddr, PAGE_SIZE,
+ VM_READ|VM_EXEC|VM_MAYEXEC|VM_MAYREAD|VM_DONTCOPY|VM_IO,
+ &tramp_mapping);
+ if (!IS_ERR(vma))
+ return area;
+
+ __free_page(area->page);
+ free_area:
+ kfree(area);
+ return NULL;
+}
+
+struct tramp_area *get_tramp_area(unsigned long vaddr)
+{
+ struct uprobes_state *state = ¤t->mm->uprobes_state;
+ struct tramp_area *area = NULL;
+
+ mutex_lock(&state->tramp_mutex);
+ hlist_for_each_entry(area, &state->tramp_head, node) {
+ if (arch_uprobe_is_callable(area->vaddr, vaddr)) {
+ refcount_inc(&area->ref);
+ goto unlock;
+ }
+ }
+
+ area = create_tramp_area(vaddr);
+ if (area)
+ hlist_add_head(&area->node, &state->tramp_head);
+
+unlock:
+ mutex_unlock(&state->tramp_mutex);
+ return area;
+}
+
+static void destroy_tramp_area(struct tramp_area *area)
+{
+ hlist_del(&area->node);
+ put_page(area->page);
+ kfree(area);
+}
+
+void put_tramp_area(struct tramp_area *area)
+{
+ struct mm_struct *mm = current->mm;
+ struct uprobes_state *state = &mm->uprobes_state;
+
+ if (area == NULL)
+ return;
+
+ mutex_lock(&state->tramp_mutex);
+ if (refcount_dec_and_test(&area->ref))
+ destroy_tramp_area(area);
+ mutex_unlock(&state->tramp_mutex);
+}
+
+static void clear_tramp_head(struct mm_struct *mm)
+{
+ struct uprobes_state *state = &mm->uprobes_state;
+ struct tramp_area *area;
+ struct hlist_node *n;
+
+ hlist_for_each_entry_safe(area, n, &state->tramp_head, node)
+ destroy_tramp_area(area);
+}
+
/* uprobe should have guaranteed positive refcount */
static struct uprobe *get_uprobe(struct uprobe *uprobe)
{
@@ -1788,6 +1927,8 @@ void uprobe_clear_state(struct mm_struct *mm)
delayed_uprobe_remove(NULL, mm);
mutex_unlock(&delayed_uprobe_lock);
+ clear_tramp_head(mm);
+
if (!area)
return;
diff --git a/kernel/fork.c b/kernel/fork.c
index 89ceb4a68af2..b1fe431e5cce 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1248,6 +1248,8 @@ static void mm_init_uprobes_state(struct mm_struct *mm)
{
#ifdef CONFIG_UPROBES
mm->uprobes_state.xol_area = NULL;
+ mutex_init(&mm->uprobes_state.tramp_mutex);
+ INIT_HLIST_HEAD(&mm->uprobes_state.tramp_head);
#endif
}
--
2.47.0
next prev parent reply other threads:[~2024-11-05 13:35 UTC|newest]
Thread overview: 51+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-11-05 13:33 [RFC 00/11] uprobes: Add support to optimize usdt probes on x86_64 Jiri Olsa
2024-11-05 13:33 ` [RFC perf/core 01/11] uprobes: Rename arch_uretprobe_trampoline function Jiri Olsa
2024-11-05 13:33 ` [RFC perf/core 02/11] uprobes: Make copy_from_page global Jiri Olsa
2024-11-14 23:40 ` Andrii Nakryiko
2024-11-16 21:41 ` Jiri Olsa
2024-11-05 13:33 ` [RFC perf/core 03/11] uprobes: Add len argument to uprobe_write_opcode Jiri Olsa
2024-11-14 23:41 ` Andrii Nakryiko
2024-11-16 21:41 ` Jiri Olsa
2024-11-05 13:33 ` [RFC perf/core 04/11] uprobes: Add data argument to uprobe_write_opcode function Jiri Olsa
2024-11-14 23:41 ` Andrii Nakryiko
2024-11-16 21:43 ` Jiri Olsa
2024-11-05 13:33 ` Jiri Olsa [this message]
2024-11-05 14:23 ` [RFC perf/core 05/11] uprobes: Add mapping for optimized uprobe trampolines Peter Zijlstra
2024-11-05 16:33 ` Jiri Olsa
2024-11-14 23:44 ` Andrii Nakryiko
2024-11-16 21:44 ` Jiri Olsa
2024-11-19 6:06 ` Andrii Nakryiko
2024-11-19 9:13 ` Peter Zijlstra
2024-11-19 15:15 ` Jiri Olsa
2024-11-21 0:07 ` Andrii Nakryiko
2024-11-21 11:53 ` Peter Zijlstra
2024-11-21 16:02 ` Alexei Starovoitov
2024-11-21 16:34 ` Peter Zijlstra
2024-11-21 16:47 ` Alexei Starovoitov
2024-11-21 19:38 ` Mark Rutland
2024-11-14 23:44 ` Andrii Nakryiko
2024-11-16 21:44 ` Jiri Olsa
2024-11-19 6:05 ` Andrii Nakryiko
2024-11-19 15:14 ` Jiri Olsa
2024-11-21 0:10 ` Andrii Nakryiko
2024-11-05 13:34 ` [RFC perf/core 06/11] uprobes: Add uprobe syscall to speed up uprobe Jiri Olsa
2024-11-05 13:34 ` [RFC perf/core 07/11] uprobes/x86: Add support to optimize uprobes Jiri Olsa
2024-11-14 23:44 ` Andrii Nakryiko
2024-11-16 21:44 ` Jiri Olsa
2024-11-18 8:18 ` Masami Hiramatsu
2024-11-18 9:39 ` Jiri Olsa
2024-11-05 13:34 ` [RFC bpf-next 08/11] selftests/bpf: Use 5-byte nop for x86 usdt probes Jiri Olsa
2024-11-05 13:34 ` [RFC bpf-next 09/11] selftests/bpf: Add usdt trigger bench Jiri Olsa
2024-11-14 23:40 ` Andrii Nakryiko
2024-11-16 21:45 ` Jiri Olsa
2024-11-19 6:08 ` Andrii Nakryiko
2024-11-05 13:34 ` [RFC bpf-next 10/11] selftests/bpf: Add uprobe/usdt optimized test Jiri Olsa
2024-11-05 13:34 ` [RFC bpf-next 11/11] selftests/bpf: Add hit/attach/detach race optimized uprobe test Jiri Olsa
2024-11-17 11:49 ` [RFC 00/11] uprobes: Add support to optimize usdt probes on x86_64 Peter Zijlstra
2024-11-18 9:29 ` Jiri Olsa
2024-11-18 10:06 ` Mark Rutland
2024-11-19 6:13 ` Andrii Nakryiko
2024-11-21 18:18 ` Mark Rutland
2024-11-26 19:13 ` Andrii Nakryiko
2024-11-18 8:04 ` Masami Hiramatsu
2024-11-18 9:52 ` Jiri Olsa
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241105133405.2703607-6-jolsa@kernel.org \
--to=jolsa@kernel.org \
--cc=alan.maguire@oracle.com \
--cc=andrii@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=haoluo@google.com \
--cc=john.fastabend@gmail.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-trace-kernel@vger.kernel.org \
--cc=mhiramat@kernel.org \
--cc=oleg@redhat.com \
--cc=peterz@infradead.org \
--cc=rostedt@goodmis.org \
--cc=songliubraving@fb.com \
--cc=yhs@fb.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.