public inbox for linux-arch@vger.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: linux-kernel@vger.kernel.org, mhiramat@kernel.org
Cc: Eddy_Wu@trendmicro.com, x86@kernel.org, davem@davemloft.net,
	rostedt@goodmis.org, naveen.n.rao@linux.ibm.com,
	anil.s.keshavamurthy@intel.com, linux-arch@vger.kernel.org,
	cameron@moodycamel.com, oleg@redhat.com, will@kernel.org,
	paulmck@kernel.org, peterz@infradead.org
Subject: [RFC][PATCH 7/7] kprobes: Replace rp->free_instance with freelist
Date: Thu, 27 Aug 2020 18:12:44 +0200	[thread overview]
Message-ID: <20200827161754.594247581@infradead.org> (raw)
In-Reply-To: 20200827161237.889877377@infradead.org

Gets rid of rp->lock, and as a result kretprobes are now fully
lockless.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 include/linux/kprobes.h |   11 ++++++--
 kernel/kprobes.c        |   63 +++++++++++++++++++-----------------------------
 2 files changed, 34 insertions(+), 40 deletions(-)

--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -27,6 +27,7 @@
 #include <linux/rcupdate.h>
 #include <linux/mutex.h>
 #include <linux/ftrace.h>
+#include <linux/freelist.h>
 #include <asm/kprobes.h>
 
 #ifdef CONFIG_KPROBES
@@ -151,14 +152,18 @@ struct kretprobe {
 	int maxactive;
 	int nmissed;
 	size_t data_size;
-	struct hlist_head free_instances;
-	raw_spinlock_t lock;
+	struct freelist_head freelist;
 };
 
 struct kretprobe_instance {
 	union {
+		/*
+		 * Dodgy as heck, this relies on not clobbering freelist::refs.
+		 * llist: only clobbers freelist::next.
+		 * rcu: clobbers both, but only after rp::freelist is gone.
+		 */
+		struct freelist_node freelist;
 		struct llist_node llist;
-		struct hlist_node hlist;
 		struct rcu_head rcu;
 	};
 	struct kretprobe *rp;
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1219,12 +1219,8 @@ static void recycle_rp_inst(struct kretp
 	struct kretprobe *rp = ri->rp;
 
 	/* remove rp inst off the rprobe_inst_table */
-	hlist_del(&ri->hlist);
-	INIT_HLIST_NODE(&ri->hlist);
 	if (likely(rp)) {
-		raw_spin_lock(&rp->lock);
-		hlist_add_head(&ri->hlist, &rp->free_instances);
-		raw_spin_unlock(&rp->lock);
+		freelist_add(&ri->freelist, &rp->freelist);
 	} else {
 		kfree_rcu(ri, rcu);
 	}
@@ -1286,10 +1282,13 @@ NOKPROBE_SYMBOL(kprobe_flush_task);
 static inline void free_rp_inst(struct kretprobe *rp)
 {
 	struct kretprobe_instance *ri;
-	struct hlist_node *next;
+	struct freelist_node *node;
+
+	node = rp->freelist.head;
+	while (node) {
+		ri = container_of(node, struct kretprobe_instance, freelist);
+		node = node->next;
 
-	hlist_for_each_entry_safe(ri, next, &rp->free_instances, hlist) {
-		hlist_del(&ri->hlist);
 		kfree(ri);
 	}
 }
@@ -1986,36 +1985,28 @@ NOKPROBE_SYMBOL(__kretprobe_trampoline_h
 static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
 {
 	struct kretprobe *rp = container_of(p, struct kretprobe, kp);
-	unsigned long hash, flags = 0;
 	struct kretprobe_instance *ri;
+	struct freelist_node *fn;
 
-	/* TODO: consider to only swap the RA after the last pre_handler fired */
-	hash = hash_ptr(current, KPROBE_HASH_BITS);
-	raw_spin_lock_irqsave(&rp->lock, flags);
-	if (!hlist_empty(&rp->free_instances)) {
-		ri = hlist_entry(rp->free_instances.first,
-				struct kretprobe_instance, hlist);
-		hlist_del(&ri->hlist);
-		raw_spin_unlock_irqrestore(&rp->lock, flags);
-
-		ri->rp = rp;
-		ri->task = current;
-
-		if (rp->entry_handler && rp->entry_handler(ri, regs)) {
-			raw_spin_lock_irqsave(&rp->lock, flags);
-			hlist_add_head(&ri->hlist, &rp->free_instances);
-			raw_spin_unlock_irqrestore(&rp->lock, flags);
-			return 0;
-		}
-
-		arch_prepare_kretprobe(ri, regs);
+	fn = freelist_try_get(&rp->freelist);
+	if (!fn) {
+		rp->nmissed++;
+		return 0;
+	}
 
-		__llist_add(&ri->llist, &current->kretprobe_instances);
+	ri = container_of(fn, struct kretprobe_instance, freelist);
+	ri->rp = rp;
+	ri->task = current;
 
-	} else {
-		rp->nmissed++;
-		raw_spin_unlock_irqrestore(&rp->lock, flags);
+	if (rp->entry_handler && rp->entry_handler(ri, regs)) {
+		freelist_add(&ri->freelist, &rp->freelist);
+		return 0;
 	}
+
+	arch_prepare_kretprobe(ri, regs);
+
+	__llist_add(&ri->llist, &current->kretprobe_instances);
+
 	return 0;
 }
 NOKPROBE_SYMBOL(pre_handler_kretprobe);
@@ -2072,8 +2063,7 @@ int register_kretprobe(struct kretprobe
 		rp->maxactive = num_possible_cpus();
 #endif
 	}
-	raw_spin_lock_init(&rp->lock);
-	INIT_HLIST_HEAD(&rp->free_instances);
+	rp->freelist.head = NULL;
 	for (i = 0; i < rp->maxactive; i++) {
 		inst = kmalloc(sizeof(struct kretprobe_instance) +
 			       rp->data_size, GFP_KERNEL);
@@ -2081,8 +2071,7 @@ int register_kretprobe(struct kretprobe
 			free_rp_inst(rp);
 			return -ENOMEM;
 		}
-		INIT_HLIST_NODE(&inst->hlist);
-		hlist_add_head(&inst->hlist, &rp->free_instances);
+		freelist_add(&inst->freelist, &rp->freelist);
 	}
 
 	rp->nmissed = 0;



  parent reply	other threads:[~2020-08-27 16:21 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-08-27 16:12 [RFC][PATCH 0/7] kprobes: Make kretprobes lockless Peter Zijlstra
2020-08-27 16:12 ` [RFC][PATCH 1/7] llist: Add nonatomic __llist_add() Peter Zijlstra
2020-08-27 16:12 ` [RFC][PATCH 2/7] sched: Fix try_invoke_on_locked_down_task() semantics Peter Zijlstra
2020-08-27 16:12 ` [RFC][PATCH 3/7] kprobes: Remove kretprobe hash Peter Zijlstra
2020-08-27 18:00   ` Masami Hiramatsu
2020-08-28  8:44     ` peterz
2020-08-28  9:07     ` Masami Hiramatsu
2020-08-28  4:44   ` Masami Hiramatsu
2020-08-28 13:11   ` Eddy_Wu
2020-08-28 13:38     ` peterz
2020-08-28 13:51     ` Masami Hiramatsu
2020-08-28 13:58       ` peterz
2020-08-28 14:19         ` Masami Hiramatsu
2020-08-28 14:11       ` Eddy_Wu
2020-08-28 14:19         ` peterz
2020-08-28 14:41           ` Masami Hiramatsu
2020-08-28 14:49     ` Masami Hiramatsu
2020-08-27 16:12 ` [RFC][PATCH 4/7] kprobe: Dont kfree() from breakpoint context Peter Zijlstra
2020-08-27 16:12 ` [RFC][PATCH 5/7] asm-generic/atomic: Add try_cmpxchg() fallbacks Peter Zijlstra
2020-08-27 16:12 ` [RFC][PATCH 6/7] freelist: Lock less freelist Peter Zijlstra
2020-08-27 16:37   ` peterz
     [not found]     ` <CAFCw3doX6KK5DwpG_OB331Mdw8uYeVqn8YPTjKh_a-m7ZB9+3A@mail.gmail.com>
2020-08-27 16:56       ` peterz
2020-08-27 17:00         ` Cameron
2020-08-27 19:08   ` Boqun Feng
2020-08-27 19:57     ` Cameron
2020-08-28  1:34       ` Boqun Feng
2020-08-28  4:03   ` Lai Jiangshan
2020-08-28 14:46   ` Oleg Nesterov
2020-08-28 15:29     ` peterz
2020-08-29  3:05       ` Cameron
2020-08-27 16:12 ` Peter Zijlstra [this message]
2020-08-28  8:48   ` [RFC][PATCH 7/7] kprobes: Replace rp->free_instance with freelist peterz
2020-08-28  9:13     ` Masami Hiramatsu
2020-08-28  9:18       ` peterz
2020-08-28 10:44         ` Masami Hiramatsu
2020-08-29  2:29         ` Cameron
2020-08-29  2:31           ` Cameron
2020-08-29  9:15             ` Masami Hiramatsu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200827161754.594247581@infradead.org \
    --to=peterz@infradead.org \
    --cc=Eddy_Wu@trendmicro.com \
    --cc=anil.s.keshavamurthy@intel.com \
    --cc=cameron@moodycamel.com \
    --cc=davem@davemloft.net \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mhiramat@kernel.org \
    --cc=naveen.n.rao@linux.ibm.com \
    --cc=oleg@redhat.com \
    --cc=paulmck@kernel.org \
    --cc=rostedt@goodmis.org \
    --cc=will@kernel.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox