All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: mingo@kernel.org, andrii@kernel.org, oleg@redhat.com
Cc: linux-kernel@vger.kernel.org, linux-trace-kernel@vger.kernel.org,
	peterz@infradead.org, rostedt@goodmis.org, mhiramat@kernel.org,
	jolsa@kernel.org, clm@meta.com, paulmck@kernel.org
Subject: [PATCH v2 08/11] perf/uprobe: Convert (some) uprobe->refcount to SRCU
Date: Thu, 11 Jul 2024 13:02:43 +0200	[thread overview]
Message-ID: <20240711110401.096506262@infradead.org> (raw)
In-Reply-To: 20240711110235.098009979@infradead.org

With handle_swbp() hitting concurrently on (all) CPUs, potentially on
the same uprobe, the uprobe->refcount can get *very* hot. Move the
struct uprobe lifetime into uprobes_srcu such that it covers both the
uprobe and the uprobe->consumers list.

With this, handle_swbp() can use a single large SRCU critical section
to avoid taking a refcount on the uprobe for it's duration.

Notably, the single-step and uretprobe paths need a reference that
leaves handle_swbp() and will, for now, still use ->refcount.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 kernel/events/uprobes.c |   68 ++++++++++++++++++++++++++++--------------------
 1 file changed, 41 insertions(+), 27 deletions(-)

--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -51,7 +51,7 @@ static struct mutex uprobes_mmap_mutex[U
 DEFINE_STATIC_PERCPU_RWSEM(dup_mmap_sem);
 
 /*
- * Covers uprobe->consumers lifetime.
+ * Covers uprobe->consumers lifetime as well as struct uprobe.
  */
 DEFINE_STATIC_SRCU(uprobes_srcu);
 
@@ -626,7 +626,7 @@ static void put_uprobe(struct uprobe *up
 		mutex_lock(&delayed_uprobe_lock);
 		delayed_uprobe_remove(uprobe, NULL);
 		mutex_unlock(&delayed_uprobe_lock);
-		call_rcu(&uprobe->rcu, uprobe_free_rcu);
+		call_srcu(&uprobes_srcu, &uprobe->rcu, uprobe_free_rcu);
 	}
 }
 
@@ -678,7 +678,7 @@ static struct uprobe *__find_uprobe(stru
 	struct rb_node *node = rb_find_rcu(&key, &uprobes_tree, __uprobe_cmp_key);
 
 	if (node)
-		return try_get_uprobe(__node_2_uprobe(node));
+		return __node_2_uprobe(node);
 
 	return NULL;
 }
@@ -691,7 +691,7 @@ static struct uprobe *find_uprobe(struct
 {
 	unsigned int seq;
 
-	guard(rcu)();
+	lockdep_assert(srcu_read_lock_held(&uprobes_srcu));
 
 	do {
 		seq = read_seqcount_begin(&uprobes_seqcount);
@@ -1142,6 +1142,8 @@ void uprobe_unregister_nosync(struct ino
 {
 	struct uprobe *uprobe;
 
+	guard(srcu)(&uprobes_srcu);
+
 	uprobe = find_uprobe(inode, offset);
 	if (WARN_ON(!uprobe))
 		return;
@@ -1151,7 +1153,6 @@ void uprobe_unregister_nosync(struct ino
 	__uprobe_unregister(uprobe, uc);
 	raw_write_seqcount_end(&uprobe->register_seq);
 	up_write(&uprobe->register_rwsem);
-	put_uprobe(uprobe);
 }
 EXPORT_SYMBOL_GPL(uprobe_unregister_nosync);
 
@@ -1263,6 +1264,8 @@ int uprobe_apply(struct inode *inode, lo
 	struct uprobe_consumer *con;
 	int ret = -ENOENT;
 
+	guard(srcu)(&uprobes_srcu);
+
 	uprobe = find_uprobe(inode, offset);
 	if (WARN_ON(!uprobe))
 		return ret;
@@ -1275,7 +1278,6 @@ int uprobe_apply(struct inode *inode, lo
 		ret = register_for_each_vma(uprobe, add ? uc : NULL);
 	raw_write_seqcount_end(&uprobe->register_seq);
 	up_write(&uprobe->register_rwsem);
-	put_uprobe(uprobe);
 
 	return ret;
 }
@@ -1929,10 +1931,14 @@ static void prepare_uretprobe(struct upr
 	if (!ri)
 		return;
 
+	ri->uprobe = try_get_uprobe(uprobe);
+	if (!ri->uprobe)
+		goto err_mem;
+
 	trampoline_vaddr = get_trampoline_vaddr();
 	orig_ret_vaddr = arch_uretprobe_hijack_return_addr(trampoline_vaddr, regs);
 	if (orig_ret_vaddr == -1)
-		goto fail;
+		goto err_uprobe;
 
 	/* drop the entries invalidated by longjmp() */
 	chained = (orig_ret_vaddr == trampoline_vaddr);
@@ -1950,12 +1956,11 @@ static void prepare_uretprobe(struct upr
 			 * attack from user-space.
 			 */
 			uprobe_warn(current, "handle tail call");
-			goto fail;
+			goto err_uprobe;
 		}
 		orig_ret_vaddr = utask->return_instances->orig_ret_vaddr;
 	}
 
-	ri->uprobe = get_uprobe(uprobe);
 	ri->func = instruction_pointer(regs);
 	ri->stack = user_stack_pointer(regs);
 	ri->orig_ret_vaddr = orig_ret_vaddr;
@@ -1966,7 +1971,10 @@ static void prepare_uretprobe(struct upr
 	utask->return_instances = ri;
 
 	return;
-fail:
+
+err_uprobe:
+	uprobe_put(ri->uprobe);
+err_mem:
 	kfree(ri);
 }
 
@@ -1982,22 +1990,31 @@ pre_ssout(struct uprobe *uprobe, struct
 	if (!utask)
 		return -ENOMEM;
 
+	utask->active_uprobe = try_get_uprobe(uprobe);
+	if (!utask->active_uprobe)
+		return -ESRCH;
+
 	xol_vaddr = xol_get_insn_slot(uprobe);
-	if (!xol_vaddr)
-		return -ENOMEM;
+	if (!xol_vaddr) {
+		err = -ENOMEM;
+		goto err_uprobe;
+	}
 
 	utask->xol_vaddr = xol_vaddr;
 	utask->vaddr = bp_vaddr;
 
 	err = arch_uprobe_pre_xol(&uprobe->arch, regs);
-	if (unlikely(err)) {
-		xol_free_insn_slot(current);
-		return err;
-	}
+	if (unlikely(err))
+		goto err_xol;
 
-	utask->active_uprobe = uprobe;
 	utask->state = UTASK_SSTEP;
 	return 0;
+
+err_xol:
+	xol_free_insn_slot(current);
+err_uprobe:
+	put_uprobe(utask->active_uprobe);
+	return err;
 }
 
 /*
@@ -2128,7 +2145,7 @@ static void handler_chain(struct uprobe
 	bool had_handler = false;
 	unsigned int seq;
 
-	guard(srcu)(&uprobes_srcu);
+	lockdep_assert(srcu_read_lock_held(&uprobes_srcu));
 
 	seq = raw_read_seqcount_begin(&uprobe->register_seq);
 
@@ -2276,6 +2293,8 @@ static void handle_swbp(struct pt_regs *
 	if (bp_vaddr == get_trampoline_vaddr())
 		return handle_trampoline(regs);
 
+	guard(srcu)(&uprobes_srcu);
+
 	uprobe = find_active_uprobe(bp_vaddr, &is_swbp);
 	if (!uprobe) {
 		if (is_swbp > 0) {
@@ -2304,7 +2323,7 @@ static void handle_swbp(struct pt_regs *
 	 * new and not-yet-analyzed uprobe at the same address, restart.
 	 */
 	if (unlikely(!test_bit(UPROBE_COPY_INSN, &uprobe->flags)))
-		goto out;
+		return;
 
 	/*
 	 * Pairs with the smp_wmb() in prepare_uprobe().
@@ -2317,22 +2336,17 @@ static void handle_swbp(struct pt_regs *
 
 	/* Tracing handlers use ->utask to communicate with fetch methods */
 	if (!get_utask())
-		goto out;
+		return;
 
 	if (arch_uprobe_ignore(&uprobe->arch, regs))
-		goto out;
+		return;
 
 	handler_chain(uprobe, regs);
 
 	if (arch_uprobe_skip_sstep(&uprobe->arch, regs))
-		goto out;
-
-	if (!pre_ssout(uprobe, regs, bp_vaddr))
 		return;
 
-	/* arch_uprobe_skip_sstep() succeeded, or restart if can't singlestep */
-out:
-	put_uprobe(uprobe);
+	pre_ssout(uprobe, regs, bp_vaddr);
 }
 
 /*



  parent reply	other threads:[~2024-07-11 11:07 UTC|newest]

Thread overview: 45+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-07-11 11:02 [PATCH v2 00/11] perf/uprobe: Optimize uprobes Peter Zijlstra
2024-07-11 11:02 ` [PATCH v2 01/11] perf/uprobe: Re-indent labels Peter Zijlstra
2024-07-11 11:58   ` Jiri Olsa
2024-07-11 12:07     ` Peter Zijlstra
2024-07-11 11:02 ` [PATCH v2 02/11] perf/uprobe: Remove spurious whitespace Peter Zijlstra
2024-07-11 11:02 ` [PATCH v2 03/11] rbtree: Provide rb_find_rcu() / rb_find_add_rcu() Peter Zijlstra
2024-07-12 20:23   ` Andrii Nakryiko
2024-07-15 11:21     ` Peter Zijlstra
2024-07-15 17:13       ` Andrii Nakryiko
2024-07-11 11:02 ` [PATCH v2 04/11] perf/uprobe: RCU-ify find_uprobe() Peter Zijlstra
2024-07-11 13:59   ` Masami Hiramatsu
2024-07-11 11:02 ` [PATCH v2 05/11] perf/uprobe: Simplify UPROBE_HANDLER_REMOVE logic Peter Zijlstra
2024-07-11 11:02 ` [PATCH v2 06/11] perf/uprobe: SRCU-ify uprobe->consumer list Peter Zijlstra
2024-07-12 21:06   ` Andrii Nakryiko
2024-07-15 11:25     ` Peter Zijlstra
2024-07-15 17:30       ` Andrii Nakryiko
2024-07-11 11:02 ` [PATCH v2 07/11] perf/uprobe: Split uprobe_unregister() Peter Zijlstra
2024-07-12 21:10   ` Andrii Nakryiko
2024-07-11 11:02 ` Peter Zijlstra [this message]
2024-07-11 14:03   ` [PATCH v2 08/11] perf/uprobe: Convert (some) uprobe->refcount to SRCU Jiri Olsa
2024-07-12 21:21   ` Andrii Nakryiko
2024-07-11 11:02 ` [PATCH v2 09/11] srcu: Add __srcu_clone_read_lock() Peter Zijlstra
2024-07-11 11:02 ` [PATCH v2 10/11] perf/uprobe: Convert single-step and uretprobe to SRCU Peter Zijlstra
2024-07-11 16:06   ` Oleg Nesterov
2024-07-11 18:42     ` Peter Zijlstra
2024-07-12 10:26       ` Oleg Nesterov
2024-07-12 21:28   ` Andrii Nakryiko
2024-07-15 11:59     ` Peter Zijlstra
2024-07-11 11:02 ` [PATCH v2 11/11] perf/uprobe: Add uretprobe timer Peter Zijlstra
2024-07-11 13:19   ` Oleg Nesterov
2024-07-11 15:00     ` Peter Zijlstra
2024-07-11 15:55       ` Peter Zijlstra
2024-07-11 16:06         ` Peter Zijlstra
2024-07-12 21:43   ` Andrii Nakryiko
2024-07-15 11:41     ` Peter Zijlstra
2024-07-15 17:34       ` Andrii Nakryiko
2024-07-12  4:57 ` [PATCH v2 00/11] perf/uprobe: Optimize uprobes Andrii Nakryiko
2024-07-12  9:13   ` Peter Zijlstra
2024-07-12 13:10   ` Peter Zijlstra
2024-07-12 15:29     ` Andrii Nakryiko
2024-07-15 14:45   ` Peter Zijlstra
2024-07-15 17:10     ` Andrii Nakryiko
2024-07-15 18:10       ` Andrii Nakryiko
2024-07-19 18:42         ` Andrii Nakryiko
2024-07-27  0:18           ` Andrii Nakryiko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240711110401.096506262@infradead.org \
    --to=peterz@infradead.org \
    --cc=andrii@kernel.org \
    --cc=clm@meta.com \
    --cc=jolsa@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-trace-kernel@vger.kernel.org \
    --cc=mhiramat@kernel.org \
    --cc=mingo@kernel.org \
    --cc=oleg@redhat.com \
    --cc=paulmck@kernel.org \
    --cc=rostedt@goodmis.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.