linux-trace-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: mingo@kernel.org, andrii@kernel.org, oleg@redhat.com
Cc: linux-kernel@vger.kernel.org, linux-trace-kernel@vger.kernel.org,
	peterz@infradead.org, rostedt@goodmis.org, mhiramat@kernel.org,
	jolsa@kernel.org, clm@meta.com, paulmck@kernel.org
Subject: [PATCH v2 06/11] perf/uprobe: SRCU-ify uprobe->consumer list
Date: Thu, 11 Jul 2024 13:02:41 +0200	[thread overview]
Message-ID: <20240711110400.880800153@infradead.org> (raw)
In-Reply-To: 20240711110235.098009979@infradead.org

With handle_swbp() hitting concurrently on (all) CPUs the
uprobe->register_rwsem can get very contended. Add an SRCU instance to
cover the consumer list and consumer lifetime.

Since the consumer are externally embedded structures, unregister will
have to suffer a synchronize_srcu().

A notably complication is the UPROBE_HANDLER_REMOVE logic which can
race against uprobe_register() such that it might want to remove a
freshly installer handler that didn't get called. In order to close
this hole, a seqcount is added. With that, the removal path can tell
if anything changed and bail out of the removal.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 kernel/events/uprobes.c |   60 ++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 50 insertions(+), 10 deletions(-)

--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -26,6 +26,7 @@
 #include <linux/task_work.h>
 #include <linux/shmem_fs.h>
 #include <linux/khugepaged.h>
+#include <linux/srcu.h>
 
 #include <linux/uprobes.h>
 
@@ -49,6 +50,11 @@ static struct mutex uprobes_mmap_mutex[U
 
 DEFINE_STATIC_PERCPU_RWSEM(dup_mmap_sem);
 
+/*
+ * Covers uprobe->consumers lifetime.
+ */
+DEFINE_STATIC_SRCU(uprobes_srcu);
+
 /* Have a copy of original instruction */
 #define UPROBE_COPY_INSN	0
 
@@ -57,6 +63,7 @@ struct uprobe {
 	refcount_t		ref;
 	struct rcu_head		rcu;
 	struct rw_semaphore	register_rwsem;
+	seqcount_t		register_seq;
 	struct rw_semaphore	consumer_rwsem;
 	struct list_head	pending_list;
 	struct uprobe_consumer	*consumers;
@@ -760,6 +767,7 @@ static struct uprobe *alloc_uprobe(struc
 	uprobe->offset = offset;
 	uprobe->ref_ctr_offset = ref_ctr_offset;
 	init_rwsem(&uprobe->register_rwsem);
+	seqcount_init(&uprobe->register_seq);
 	init_rwsem(&uprobe->consumer_rwsem);
 
 	/* add to uprobes_tree, sorted on inode:offset */
@@ -782,8 +790,8 @@ static struct uprobe *alloc_uprobe(struc
 static void consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc)
 {
 	down_write(&uprobe->consumer_rwsem);
-	uc->next = uprobe->consumers;
-	uprobe->consumers = uc;
+	WRITE_ONCE(uc->next, uprobe->consumers);
+	rcu_assign_pointer(uprobe->consumers, uc);
 	up_write(&uprobe->consumer_rwsem);
 }
 
@@ -800,7 +808,7 @@ static bool consumer_del(struct uprobe *
 	down_write(&uprobe->consumer_rwsem);
 	for (con = &uprobe->consumers; *con; con = &(*con)->next) {
 		if (*con == uc) {
-			*con = uc->next;
+			WRITE_ONCE(*con, uc->next);
 			ret = true;
 			break;
 		}
@@ -1139,9 +1147,13 @@ void uprobe_unregister(struct inode *ino
 		return;
 
 	down_write(&uprobe->register_rwsem);
+	raw_write_seqcount_begin(&uprobe->register_seq);
 	__uprobe_unregister(uprobe, uc);
+	raw_write_seqcount_end(&uprobe->register_seq);
 	up_write(&uprobe->register_rwsem);
 	put_uprobe(uprobe);
+
+	synchronize_srcu(&uprobes_srcu);
 }
 EXPORT_SYMBOL_GPL(uprobe_unregister);
 
@@ -1204,10 +1216,12 @@ static int __uprobe_register(struct inod
 	down_write(&uprobe->register_rwsem);
 	ret = -EAGAIN;
 	if (likely(uprobe_is_active(uprobe))) {
+		raw_write_seqcount_begin(&uprobe->register_seq);
 		consumer_add(uprobe, uc);
 		ret = register_for_each_vma(uprobe, uc);
 		if (ret)
 			__uprobe_unregister(uprobe, uc);
+		raw_write_seqcount_end(&uprobe->register_seq);
 	}
 	up_write(&uprobe->register_rwsem);
 	put_uprobe(uprobe);
@@ -1250,10 +1264,12 @@ int uprobe_apply(struct inode *inode, lo
 		return ret;
 
 	down_write(&uprobe->register_rwsem);
+	raw_write_seqcount_begin(&uprobe->register_seq);
 	for (con = uprobe->consumers; con && con != uc ; con = con->next)
 		;
 	if (con)
 		ret = register_for_each_vma(uprobe, add ? uc : NULL);
+	raw_write_seqcount_end(&uprobe->register_seq);
 	up_write(&uprobe->register_rwsem);
 	put_uprobe(uprobe);
 
@@ -2096,15 +2112,23 @@ static struct uprobe *find_active_uprobe
 	return uprobe;
 }
 
+#define for_each_consumer_rcu(pos, head) \
+	for (pos = rcu_dereference_raw(head); pos; \
+	     pos = rcu_dereference_raw(pos->next))
+
 static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
 {
 	struct uprobe_consumer *uc;
 	int remove = UPROBE_HANDLER_REMOVE;
 	bool need_prep = false; /* prepare return uprobe, when needed */
 	bool had_handler = false;
+	unsigned int seq;
 
-	down_read(&uprobe->register_rwsem);
-	for (uc = uprobe->consumers; uc; uc = uc->next) {
+	guard(srcu)(&uprobes_srcu);
+
+	seq = raw_read_seqcount_begin(&uprobe->register_seq);
+
+	for_each_consumer_rcu(uc, uprobe->consumers) {
 		int rc = 0;
 
 		if (uc->handler) {
@@ -2134,9 +2158,25 @@ static void handler_chain(struct uprobe
 	if (need_prep && !remove)
 		prepare_uretprobe(uprobe, regs); /* put bp at return */
 
-	if (remove)
+	if (remove) {
+		/*
+		 * Removing uprobes is a slow path, after all, the more probes
+		 * you remove, the less probe hits you get.
+		 *
+		 * This needs to serialize against uprobe_register(), such that
+		 * if the above RCU iteration missed a new handler that
+		 * would've liked to keep the probe, we don't go uninstall the
+		 * probe after it already ran register_for_each_vma().
+		 *
+		 * The rwsem ensures exclusivity against uprobe_register()
+		 * while the seqcount will avoid the removal if anything has
+		 * changed since we started.
+		 */
+		guard(rwsem_read)(&uprobe->register_rwsem);
+		if (read_seqcount_retry(&uprobe->register_seq, seq))
+			return;
 		unapply_uprobe(uprobe, current->mm);
-	up_read(&uprobe->register_rwsem);
+	}
 }
 
 static void
@@ -2145,12 +2185,12 @@ handle_uretprobe_chain(struct return_ins
 	struct uprobe *uprobe = ri->uprobe;
 	struct uprobe_consumer *uc;
 
-	down_read(&uprobe->register_rwsem);
-	for (uc = uprobe->consumers; uc; uc = uc->next) {
+	guard(srcu)(&uprobes_srcu);
+
+	for_each_consumer_rcu(uc, uprobe->consumers) {
 		if (uc->ret_handler)
 			uc->ret_handler(uc, ri->func, regs);
 	}
-	up_read(&uprobe->register_rwsem);
 }
 
 static struct return_instance *find_next_ret_chain(struct return_instance *ri)



  parent reply	other threads:[~2024-07-11 11:07 UTC|newest]

Thread overview: 45+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-07-11 11:02 [PATCH v2 00/11] perf/uprobe: Optimize uprobes Peter Zijlstra
2024-07-11 11:02 ` [PATCH v2 01/11] perf/uprobe: Re-indent labels Peter Zijlstra
2024-07-11 11:58   ` Jiri Olsa
2024-07-11 12:07     ` Peter Zijlstra
2024-07-11 11:02 ` [PATCH v2 02/11] perf/uprobe: Remove spurious whitespace Peter Zijlstra
2024-07-11 11:02 ` [PATCH v2 03/11] rbtree: Provide rb_find_rcu() / rb_find_add_rcu() Peter Zijlstra
2024-07-12 20:23   ` Andrii Nakryiko
2024-07-15 11:21     ` Peter Zijlstra
2024-07-15 17:13       ` Andrii Nakryiko
2024-07-11 11:02 ` [PATCH v2 04/11] perf/uprobe: RCU-ify find_uprobe() Peter Zijlstra
2024-07-11 13:59   ` Masami Hiramatsu
2024-07-11 11:02 ` [PATCH v2 05/11] perf/uprobe: Simplify UPROBE_HANDLER_REMOVE logic Peter Zijlstra
2024-07-11 11:02 ` Peter Zijlstra [this message]
2024-07-12 21:06   ` [PATCH v2 06/11] perf/uprobe: SRCU-ify uprobe->consumer list Andrii Nakryiko
2024-07-15 11:25     ` Peter Zijlstra
2024-07-15 17:30       ` Andrii Nakryiko
2024-07-11 11:02 ` [PATCH v2 07/11] perf/uprobe: Split uprobe_unregister() Peter Zijlstra
2024-07-12 21:10   ` Andrii Nakryiko
2024-07-11 11:02 ` [PATCH v2 08/11] perf/uprobe: Convert (some) uprobe->refcount to SRCU Peter Zijlstra
2024-07-11 14:03   ` Jiri Olsa
2024-07-12 21:21   ` Andrii Nakryiko
2024-07-11 11:02 ` [PATCH v2 09/11] srcu: Add __srcu_clone_read_lock() Peter Zijlstra
2024-07-11 11:02 ` [PATCH v2 10/11] perf/uprobe: Convert single-step and uretprobe to SRCU Peter Zijlstra
2024-07-11 16:06   ` Oleg Nesterov
2024-07-11 18:42     ` Peter Zijlstra
2024-07-12 10:26       ` Oleg Nesterov
2024-07-12 21:28   ` Andrii Nakryiko
2024-07-15 11:59     ` Peter Zijlstra
2024-07-11 11:02 ` [PATCH v2 11/11] perf/uprobe: Add uretprobe timer Peter Zijlstra
2024-07-11 13:19   ` Oleg Nesterov
2024-07-11 15:00     ` Peter Zijlstra
2024-07-11 15:55       ` Peter Zijlstra
2024-07-11 16:06         ` Peter Zijlstra
2024-07-12 21:43   ` Andrii Nakryiko
2024-07-15 11:41     ` Peter Zijlstra
2024-07-15 17:34       ` Andrii Nakryiko
2024-07-12  4:57 ` [PATCH v2 00/11] perf/uprobe: Optimize uprobes Andrii Nakryiko
2024-07-12  9:13   ` Peter Zijlstra
2024-07-12 13:10   ` Peter Zijlstra
2024-07-12 15:29     ` Andrii Nakryiko
2024-07-15 14:45   ` Peter Zijlstra
2024-07-15 17:10     ` Andrii Nakryiko
2024-07-15 18:10       ` Andrii Nakryiko
2024-07-19 18:42         ` Andrii Nakryiko
2024-07-27  0:18           ` Andrii Nakryiko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240711110400.880800153@infradead.org \
    --to=peterz@infradead.org \
    --cc=andrii@kernel.org \
    --cc=clm@meta.com \
    --cc=jolsa@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-trace-kernel@vger.kernel.org \
    --cc=mhiramat@kernel.org \
    --cc=mingo@kernel.org \
    --cc=oleg@redhat.com \
    --cc=paulmck@kernel.org \
    --cc=rostedt@goodmis.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).