From: Andrii Nakryiko <andrii@kernel.org>
To: linux-trace-kernel@vger.kernel.org, peterz@infradead.org,
oleg@redhat.com, rostedt@goodmis.org, mhiramat@kernel.org
Cc: bpf@vger.kernel.org, linux-kernel@vger.kernel.org,
jolsa@kernel.org, paulmck@kernel.org,
Andrii Nakryiko <andrii@kernel.org>
Subject: [PATCH 5/8] uprobes: travers uprobe's consumer list locklessly under SRCU protection
Date: Wed, 31 Jul 2024 14:42:53 -0700 [thread overview]
Message-ID: <20240731214256.3588718-6-andrii@kernel.org> (raw)
In-Reply-To: <20240731214256.3588718-1-andrii@kernel.org>
uprobe->register_rwsem is one of a few big bottlenecks to scalability of
uprobes, so we need to get rid of it to improve uprobe performance and
multi-CPU scalability.
First, we turn uprobe's consumer list to a typical doubly-linked list
and utilize existing RCU-aware helpers for traversing such lists, as
well as adding and removing elements from it.
For entry uprobes we already have SRCU protection active since before
uprobe lookup. For uretprobe we keep refcount, guaranteeing that uprobe
won't go away from under us, but we add SRCU protection around consumer
list traversal.
Lastly, to keep handler_chain()'s UPROBE_HANDLER_REMOVE handling simple,
we remember whether any removal was requested during handler calls, but
then we double-check the decision under a proper register_rwsem using
consumers' filter callbacks. Handler removal is very rare, so this extra
lock won't hurt performance, overall, but we also avoid the need for any
extra protection (e.g., seqcount locks).
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
---
include/linux/uprobes.h | 2 +-
kernel/events/uprobes.c | 97 ++++++++++++++++++++---------------------
2 files changed, 48 insertions(+), 51 deletions(-)
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 8d5bbad2048c..a1686c1ebcb6 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -35,7 +35,7 @@ struct uprobe_consumer {
struct pt_regs *regs);
bool (*filter)(struct uprobe_consumer *self, struct mm_struct *mm);
- struct uprobe_consumer *next;
+ struct list_head cons_node;
};
#ifdef CONFIG_UPROBES
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 71a8886608b1..3b42fd355256 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -59,7 +59,7 @@ struct uprobe {
struct rw_semaphore register_rwsem;
struct rw_semaphore consumer_rwsem;
struct list_head pending_list;
- struct uprobe_consumer *consumers;
+ struct list_head consumers;
struct inode *inode; /* Also hold a ref to inode */
struct rcu_head rcu;
loff_t offset;
@@ -778,6 +778,7 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset,
uprobe->inode = inode;
uprobe->offset = offset;
uprobe->ref_ctr_offset = ref_ctr_offset;
+ INIT_LIST_HEAD(&uprobe->consumers);
init_rwsem(&uprobe->register_rwsem);
init_rwsem(&uprobe->consumer_rwsem);
RB_CLEAR_NODE(&uprobe->rb_node);
@@ -803,34 +804,10 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset,
static void consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc)
{
down_write(&uprobe->consumer_rwsem);
- uc->next = uprobe->consumers;
- uprobe->consumers = uc;
+ list_add_rcu(&uc->cons_node, &uprobe->consumers);
up_write(&uprobe->consumer_rwsem);
}
-/*
- * For uprobe @uprobe, delete the consumer @uc.
- * Return true if the @uc is deleted successfully
- * or return false.
- */
-static bool consumer_del(struct uprobe *uprobe, struct uprobe_consumer *uc)
-{
- struct uprobe_consumer **con;
- bool ret = false;
-
- down_write(&uprobe->consumer_rwsem);
- for (con = &uprobe->consumers; *con; con = &(*con)->next) {
- if (*con == uc) {
- *con = uc->next;
- ret = true;
- break;
- }
- }
- up_write(&uprobe->consumer_rwsem);
-
- return ret;
-}
-
static int __copy_insn(struct address_space *mapping, struct file *filp,
void *insn, int nbytes, loff_t offset)
{
@@ -924,7 +901,8 @@ static bool filter_chain(struct uprobe *uprobe, struct mm_struct *mm)
bool ret = false;
down_read(&uprobe->consumer_rwsem);
- for (uc = uprobe->consumers; uc; uc = uc->next) {
+ list_for_each_entry_srcu(uc, &uprobe->consumers, cons_node,
+ srcu_read_lock_held(&uprobes_srcu)) {
ret = consumer_filter(uc, mm);
if (ret)
break;
@@ -1120,17 +1098,19 @@ void uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *uc)
int err;
down_write(&uprobe->register_rwsem);
- if (WARN_ON(!consumer_del(uprobe, uc))) {
- err = -ENOENT;
- } else {
- err = register_for_each_vma(uprobe, NULL);
- /* TODO : cant unregister? schedule a worker thread */
- WARN(err, "leaking uprobe due to failed unregistration");
- }
+
+ list_del_rcu(&uc->cons_node);
+ err = register_for_each_vma(uprobe, NULL);
+
up_write(&uprobe->register_rwsem);
- if (!err)
- put_uprobe(uprobe);
+ /* TODO : cant unregister? schedule a worker thread */
+ if (WARN(err, "leaking uprobe due to failed unregistration"))
+ return;
+
+ put_uprobe(uprobe);
+
+ synchronize_srcu(&uprobes_srcu);
}
EXPORT_SYMBOL_GPL(uprobe_unregister);
@@ -1208,13 +1188,20 @@ EXPORT_SYMBOL_GPL(uprobe_register);
int uprobe_apply(struct uprobe *uprobe, struct uprobe_consumer *uc, bool add)
{
struct uprobe_consumer *con;
- int ret = -ENOENT;
+ int ret = -ENOENT, srcu_idx;
down_write(&uprobe->register_rwsem);
- for (con = uprobe->consumers; con && con != uc ; con = con->next)
- ;
- if (con)
- ret = register_for_each_vma(uprobe, add ? uc : NULL);
+
+ srcu_idx = srcu_read_lock(&uprobes_srcu);
+ list_for_each_entry_srcu(con, &uprobe->consumers, cons_node,
+ srcu_read_lock_held(&uprobes_srcu)) {
+ if (con == uc) {
+ ret = register_for_each_vma(uprobe, add ? uc : NULL);
+ break;
+ }
+ }
+ srcu_read_unlock(&uprobes_srcu, srcu_idx);
+
up_write(&uprobe->register_rwsem);
return ret;
@@ -2088,9 +2075,10 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
struct uprobe_consumer *uc;
int remove = UPROBE_HANDLER_REMOVE;
bool need_prep = false; /* prepare return uprobe, when needed */
+ bool has_consumers = false;
- down_read(&uprobe->register_rwsem);
- for (uc = uprobe->consumers; uc; uc = uc->next) {
+ list_for_each_entry_srcu(uc, &uprobe->consumers, cons_node,
+ srcu_read_lock_held(&uprobes_srcu)) {
int rc = 0;
if (uc->handler) {
@@ -2103,16 +2091,23 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
need_prep = true;
remove &= rc;
+ has_consumers = true;
}
if (need_prep && !remove)
prepare_uretprobe(uprobe, regs); /* put bp at return */
- if (remove && uprobe->consumers) {
- WARN_ON(!uprobe_is_active(uprobe));
- unapply_uprobe(uprobe, current->mm);
+ if (remove && has_consumers) {
+ down_read(&uprobe->register_rwsem);
+
+ /* re-check that removal is still required, this time under lock */
+ if (!filter_chain(uprobe, current->mm)) {
+ WARN_ON(!uprobe_is_active(uprobe));
+ unapply_uprobe(uprobe, current->mm);
+ }
+
+ up_read(&uprobe->register_rwsem);
}
- up_read(&uprobe->register_rwsem);
}
static void
@@ -2120,13 +2115,15 @@ handle_uretprobe_chain(struct return_instance *ri, struct pt_regs *regs)
{
struct uprobe *uprobe = ri->uprobe;
struct uprobe_consumer *uc;
+ int srcu_idx;
- down_read(&uprobe->register_rwsem);
- for (uc = uprobe->consumers; uc; uc = uc->next) {
+ srcu_idx = srcu_read_lock(&uprobes_srcu);
+ list_for_each_entry_srcu(uc, &uprobe->consumers, cons_node,
+ srcu_read_lock_held(&uprobes_srcu)) {
if (uc->ret_handler)
uc->ret_handler(uc, ri->func, regs);
}
- up_read(&uprobe->register_rwsem);
+ srcu_read_unlock(&uprobes_srcu, srcu_idx);
}
static struct return_instance *find_next_ret_chain(struct return_instance *ri)
--
2.43.0
next prev parent reply other threads:[~2024-07-31 21:43 UTC|newest]
Thread overview: 51+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-07-31 21:42 [PATCH 0/8] uprobes: RCU-protected hot path optimizations Andrii Nakryiko
2024-07-31 21:42 ` [PATCH 1/8] rbtree: provide rb_find_rcu() / rb_find_add_rcu() Andrii Nakryiko
2024-07-31 21:42 ` [PATCH 2/8] uprobes: revamp uprobe refcounting and lifetime management Andrii Nakryiko
2024-08-01 11:09 ` Jiri Olsa
2024-08-01 16:49 ` Andrii Nakryiko
2024-08-01 22:07 ` Andrii Nakryiko
2024-08-02 8:50 ` Oleg Nesterov
2024-08-02 14:58 ` Andrii Nakryiko
2024-08-02 22:19 ` Oleg Nesterov
2024-08-02 11:11 ` Jiri Olsa
2024-08-02 15:03 ` Andrii Nakryiko
2024-08-05 13:44 ` Oleg Nesterov
2024-08-05 17:29 ` Andrii Nakryiko
2024-08-06 10:45 ` Oleg Nesterov
2024-07-31 21:42 ` [PATCH 3/8] uprobes: protected uprobe lifetime with SRCU Andrii Nakryiko
2024-08-01 12:23 ` Liao, Chang
2024-08-01 16:49 ` Andrii Nakryiko
2024-08-02 1:30 ` Liao, Chang
2024-08-05 14:51 ` Oleg Nesterov
2024-08-05 17:31 ` Andrii Nakryiko
2024-07-31 21:42 ` [PATCH 4/8] uprobes: get rid of enum uprobe_filter_ctx in uprobe filter callbacks Andrii Nakryiko
2024-07-31 21:42 ` Andrii Nakryiko [this message]
2024-08-01 14:27 ` [PATCH 5/8] uprobes: travers uprobe's consumer list locklessly under SRCU protection Jiri Olsa
2024-08-01 16:49 ` Andrii Nakryiko
2024-08-05 15:59 ` Oleg Nesterov
2024-08-05 17:31 ` Andrii Nakryiko
2024-08-06 10:54 ` Oleg Nesterov
2024-07-31 21:42 ` [PATCH 6/8] perf/uprobe: split uprobe_unregister() Andrii Nakryiko
2024-08-02 2:41 ` Liao, Chang
2024-08-02 15:05 ` Andrii Nakryiko
2024-08-05 20:01 ` Andrii Nakryiko
2024-08-06 1:50 ` Liao, Chang
2024-08-07 13:17 ` Oleg Nesterov
2024-08-07 15:24 ` Andrii Nakryiko
2024-07-31 21:42 ` [PATCH 7/8] uprobes: perform lockless SRCU-protected uprobes_tree lookup Andrii Nakryiko
2024-08-07 17:14 ` Andrii Nakryiko
2024-08-08 10:04 ` Oleg Nesterov
2024-08-08 14:29 ` Oleg Nesterov
2024-08-08 17:00 ` Andrii Nakryiko
2024-08-08 13:40 ` Oleg Nesterov
2024-08-10 14:00 ` Oleg Nesterov
2024-07-31 21:42 ` [PATCH 8/8] uprobes: switch to RCU Tasks Trace flavor for better performance Andrii Nakryiko
2024-08-01 9:35 ` Peter Zijlstra
2024-08-01 16:49 ` Andrii Nakryiko
2024-08-01 18:05 ` Paul E. McKenney
2024-08-07 13:29 ` [PATCH 0/8] uprobes: RCU-protected hot path optimizations Oleg Nesterov
2024-08-07 15:24 ` Andrii Nakryiko
2024-08-07 17:11 ` Oleg Nesterov
2024-08-07 17:31 ` Andrii Nakryiko
2024-08-07 18:24 ` Oleg Nesterov
2024-08-08 7:51 ` Liao, Chang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240731214256.3588718-6-andrii@kernel.org \
--to=andrii@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=jolsa@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-trace-kernel@vger.kernel.org \
--cc=mhiramat@kernel.org \
--cc=oleg@redhat.com \
--cc=paulmck@kernel.org \
--cc=peterz@infradead.org \
--cc=rostedt@goodmis.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).