From: Masami Hiramatsu <mhiramat@kernel.org>
To: Brendan Gregg <brendan.d.gregg@gmail.com>,
Steven Rostedt <rostedt@goodmis.org>,
Alexei Starovoitov <ast@kernel.org>
Cc: mhiramat@kernel.org, Ingo Molnar <mingo@kernel.org>,
bpf@vger.kernel.org, linux-kernel@vger.kernel.org,
Daniel Borkmann <daniel@iogearbox.net>,
Arnaldo Carvalho de Melo <acme@kernel.org>,
"David S . Miller" <davem@davemloft.net>,
paulmck@kernel.org, joel@joelfernandes.org,
"Naveen N . Rao" <naveen.n.rao@linux.ibm.com>,
Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Subject: [RFT PATCH 10/13] kprobes: Make free_*insn_slot() mutex-less
Date: Thu, 16 Jan 2020 23:45:56 +0900 [thread overview]
Message-ID: <157918595628.29301.5657205433519510960.stgit@devnote2> (raw)
In-Reply-To: <157918584866.29301.6941815715391411338.stgit@devnote2>
Rewrite kprobe_insn_cache implementation so that free_*insn_slot()
do not acquire kprobe_insn_cache->mutex. This allows us to call it
from call_rcu() callback function.
For this purpose, this introduces flip-flop dirty generation in
kprobe_insn_cache. When __free_insn_slot() is called, if a slot
is dirty (this means it can be in use even after RCU grace period),
it marks as "current-generation" dirty. The garbage collector
(kprobe_insn_cache_gc()) flips the generation bit, waits enough
safe period by synchronize_rcu_tasks(), and collect "previous-
generation" dirty slots. In the results, it collects the dirty
slots which was returned by __free_insn_slot() before the GC
starts waiting the period (and the dirty slots which is returned
while the safe period, will be marked as new-generation dirty.)
Since the GC is not concurrently running, we do not need more
than 2 generations. So it flips the generation bit instead of
counting it up.
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
---
arch/powerpc/kernel/optprobes.c | 1
include/linux/kprobes.h | 2
kernel/kprobes.c | 172 ++++++++++++++++++++++-----------------
3 files changed, 96 insertions(+), 79 deletions(-)
diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
index 024f7aad1952..8304f3814515 100644
--- a/arch/powerpc/kernel/optprobes.c
+++ b/arch/powerpc/kernel/optprobes.c
@@ -53,7 +53,6 @@ struct kprobe_insn_cache kprobe_ppc_optinsn_slots = {
/* insn_size initialized later */
.alloc = __ppc_alloc_insn_page,
.free = __ppc_free_insn_page,
- .nr_garbage = 0,
};
/*
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 0f832817fca3..1cd53b7b8409 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -244,7 +244,7 @@ struct kprobe_insn_cache {
void (*free)(void *); /* free insn page */
struct list_head pages; /* list of kprobe_insn_page */
size_t insn_size; /* size of instruction slot */
- int nr_garbage;
+ int generation; /* dirty generation */
struct work_struct work;
};
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 60ffc9d54d87..5c12eb7fa8e1 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -90,8 +90,7 @@ struct kprobe_insn_page {
struct rcu_head rcu;
kprobe_opcode_t *insns; /* Page of instruction slots */
struct kprobe_insn_cache *cache;
- int nused;
- int ngarbage;
+ atomic_t nused;
char slot_used[];
};
@@ -106,8 +105,9 @@ static int slots_per_page(struct kprobe_insn_cache *c)
enum kprobe_slot_state {
SLOT_CLEAN = 0,
- SLOT_DIRTY = 1,
- SLOT_USED = 2,
+ SLOT_USED = 1,
+ SLOT_DIRTY0 = 2,
+ SLOT_DIRTY1 = 3,
};
void __weak *alloc_insn_page(void)
@@ -126,7 +126,6 @@ struct kprobe_insn_cache kprobe_insn_slots = {
.free = free_insn_page,
.pages = LIST_HEAD_INIT(kprobe_insn_slots.pages),
.insn_size = MAX_INSN_SIZE,
- .nr_garbage = 0,
.work = __WORK_INITIALIZER(kprobe_insn_slots.work,
kprobe_insn_cache_gc),
};
@@ -137,6 +136,22 @@ static void kick_kprobe_insn_cache_gc(struct kprobe_insn_cache *c)
schedule_work(&c->work);
}
+static void *try_to_get_clean_slot(struct kprobe_insn_page *kip)
+{
+ struct kprobe_insn_cache *c = kip->cache;
+ int i;
+
+ for (i = 0; i < slots_per_page(c); i++) {
+ if (kip->slot_used[i] == SLOT_CLEAN) {
+ kip->slot_used[i] = SLOT_USED;
+ atomic_inc(&kip->nused);
+ return kip->insns + (i * c->insn_size);
+ }
+ }
+
+ return NULL;
+}
+
/**
* __get_insn_slot() - Find a slot on an executable page for an instruction.
* We allocate an executable page if there's no room on existing ones.
@@ -145,25 +160,20 @@ kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c)
{
struct kprobe_insn_page *kip;
kprobe_opcode_t *slot = NULL;
+ bool reclaimable = false;
/* Since the slot array is not protected by rcu, we need a mutex */
mutex_lock(&c->mutex);
list_for_each_entry(kip, &c->pages, list) {
- if (kip->nused < slots_per_page(c)) {
- int i;
- for (i = 0; i < slots_per_page(c); i++) {
- if (kip->slot_used[i] == SLOT_CLEAN) {
- kip->slot_used[i] = SLOT_USED;
- kip->nused++;
- slot = kip->insns + (i * c->insn_size);
- goto out;
- }
- }
- /* kip->nused is broken. Fix it. */
- kip->nused = slots_per_page(c);
- WARN_ON(1);
+ if (atomic_read(&kip->nused) < slots_per_page(c)) {
+ slot = try_to_get_clean_slot(kip);
+ if (slot)
+ goto out;
+ reclaimable = true;
}
}
+ if (reclaimable)
+ kick_kprobe_insn_cache_gc(c);
/* All out of space. Need to allocate a new page. */
kip = kmalloc(KPROBE_INSN_PAGE_SIZE(slots_per_page(c)), GFP_KERNEL);
@@ -183,8 +193,7 @@ kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c)
INIT_LIST_HEAD(&kip->list);
memset(kip->slot_used, SLOT_CLEAN, slots_per_page(c));
kip->slot_used[0] = SLOT_USED;
- kip->nused = 1;
- kip->ngarbage = 0;
+ atomic_set(&kip->nused, 1);
kip->cache = c;
list_add_rcu(&kip->list, &c->pages);
slot = kip->insns;
@@ -193,90 +202,106 @@ kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c)
return slot;
}
-static void free_kprobe_insn_page(struct rcu_head *head)
+static void free_kprobe_insn_page_cb(struct rcu_head *head)
{
struct kprobe_insn_page *kip = container_of(head, typeof(*kip), rcu);
kfree(kip);
}
-/* Return 1 if all garbages are collected, otherwise 0. */
-static int collect_one_slot(struct kprobe_insn_page *kip, int idx)
+static void free_kprobe_insn_page(struct kprobe_insn_page *kip)
{
- kip->slot_used[idx] = SLOT_CLEAN;
- kip->nused--;
- if (kip->nused == 0) {
- /*
- * Page is no longer in use. Free it unless
- * it's the last one. We keep the last one
- * so as not to have to set it up again the
- * next time somebody inserts a probe.
- */
- if (!list_is_singular(&kip->list)) {
- list_del_rcu(&kip->list);
- kip->cache->free(kip->insns);
- call_rcu(&kip->rcu, free_kprobe_insn_page);
- }
- return 1;
+ if (WARN_ON_ONCE(atomic_read(&kip->nused) != 0))
+ return;
+ /*
+ * Page is no longer in use. Free it unless
+ * it's the last one. We keep the last one
+ * so as not to have to set it up again the
+ * next time somebody inserts a probe.
+ */
+ if (!list_is_singular(&kip->list)) {
+ list_del_rcu(&kip->list);
+ kip->cache->free(kip->insns);
+ call_rcu(&kip->rcu, free_kprobe_insn_page_cb);
}
- return 0;
}
void kprobe_insn_cache_gc(struct work_struct *work)
{
struct kprobe_insn_cache *c = container_of(work, typeof(*c), work);
struct kprobe_insn_page *kip, *next;
+ int dirtygen = c->generation ? SLOT_DIRTY1 : SLOT_DIRTY0;
+ int i, nr;
mutex_lock(&c->mutex);
+
+ c->generation ^= 1; /* flip generation (0->1, 1->0) */
+
+ /* Make sure the generation update is shown in __free_insn_slot() */
+ smp_wmb();
+
/* Ensure no-one is running on the garbages. */
synchronize_rcu_tasks();
list_for_each_entry_safe(kip, next, &c->pages, list) {
- int i;
- if (kip->ngarbage == 0)
- continue;
- kip->ngarbage = 0; /* we will collect all garbages */
+ nr = 0;
+ /* Reclaim previous generation dirty slots */
for (i = 0; i < slots_per_page(c); i++) {
- if (kip->slot_used[i] == SLOT_DIRTY &&
- collect_one_slot(kip, i))
- break;
+ if (kip->slot_used[i] == dirtygen)
+ kip->slot_used[i] = SLOT_CLEAN;
+ else if (kip->slot_used[i] != SLOT_CLEAN)
+ nr++;
}
+ if (!nr)
+ free_kprobe_insn_page(kip);
}
- c->nr_garbage = 0;
mutex_unlock(&c->mutex);
}
+static struct kprobe_insn_page *
+find_kprobe_insn_page(struct kprobe_insn_cache *c, unsigned long addr)
+{
+ struct kprobe_insn_page *kip;
+
+ list_for_each_entry_rcu(kip, &c->pages, list) {
+ if (addr >= (unsigned long)kip->insns &&
+ addr < (unsigned long)kip->insns + PAGE_SIZE)
+ return kip;
+ }
+ return NULL;
+}
+
void __free_insn_slot(struct kprobe_insn_cache *c,
kprobe_opcode_t *slot, int dirty)
{
struct kprobe_insn_page *kip;
+ int dirtygen;
long idx;
- mutex_lock(&c->mutex);
- list_for_each_entry(kip, &c->pages, list) {
+ rcu_read_lock();
+ kip = find_kprobe_insn_page(c, (unsigned long)slot);
+ if (kip) {
idx = ((long)slot - (long)kip->insns) /
(c->insn_size * sizeof(kprobe_opcode_t));
- if (idx >= 0 && idx < slots_per_page(c))
+ /* Check double free */
+ if (WARN_ON(kip->slot_used[idx] != SLOT_USED))
goto out;
- }
- /* Could not find this slot. */
- WARN_ON(1);
- kip = NULL;
+
+ /* Make sure to use new generation */
+ smp_rmb();
+
+ dirtygen = c->generation ? SLOT_DIRTY1 : SLOT_DIRTY0;
+ if (dirty)
+ kip->slot_used[idx] = dirtygen;
+ else
+ kip->slot_used[idx] = SLOT_CLEAN;
+
+ if (!atomic_dec_return(&kip->nused))
+ kick_kprobe_insn_cache_gc(c);
+ } else
+ WARN_ON(1); /* Not found: what happen? */
out:
- /* Mark and sweep: this may sleep */
- if (kip) {
- /* Check double free */
- WARN_ON(kip->slot_used[idx] != SLOT_USED);
- if (dirty) {
- kip->slot_used[idx] = SLOT_DIRTY;
- kip->ngarbage++;
- if (++c->nr_garbage > slots_per_page(c))
- kick_kprobe_insn_cache_gc(c);
- } else {
- collect_one_slot(kip, idx);
- }
- }
- mutex_unlock(&c->mutex);
+ rcu_read_unlock();
}
/*
@@ -286,17 +311,11 @@ void __free_insn_slot(struct kprobe_insn_cache *c,
*/
bool __is_insn_slot_addr(struct kprobe_insn_cache *c, unsigned long addr)
{
- struct kprobe_insn_page *kip;
bool ret = false;
rcu_read_lock();
- list_for_each_entry_rcu(kip, &c->pages, list) {
- if (addr >= (unsigned long)kip->insns &&
- addr < (unsigned long)kip->insns + PAGE_SIZE) {
- ret = true;
- break;
- }
- }
+ if (find_kprobe_insn_page(c, addr))
+ ret = true;
rcu_read_unlock();
return ret;
@@ -310,7 +329,6 @@ struct kprobe_insn_cache kprobe_optinsn_slots = {
.free = free_insn_page,
.pages = LIST_HEAD_INIT(kprobe_optinsn_slots.pages),
/* .insn_size is initialized later */
- .nr_garbage = 0,
.work = __WORK_INITIALIZER(kprobe_optinsn_slots.work,
kprobe_insn_cache_gc),
};
next prev parent reply other threads:[~2020-01-16 14:46 UTC|newest]
Thread overview: 29+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-01-16 14:44 [RFT PATCH 00/13] tracing: kprobes: Introduce async unregistration Masami Hiramatsu
2020-01-16 14:44 ` [RFT PATCH 01/13] kprobes: Fix to protect kick_kprobe_optimizer() by kprobe_mutex Masami Hiramatsu
2020-01-16 14:44 ` [RFT PATCH 02/13] kprobes: Remove redundant arch_disarm_kprobe() call Masami Hiramatsu
2020-01-16 14:44 ` [RFT PATCH 03/13] kprobes: Postpone optimizer until a bunch of probes (un)registered Masami Hiramatsu
2020-01-16 14:44 ` [RFT PATCH 04/13] kprobes: Make optimizer delay to 1 second Masami Hiramatsu
2020-01-22 0:29 ` Steven Rostedt
2020-01-22 7:23 ` Masami Hiramatsu
2020-01-22 12:11 ` Steven Rostedt
2020-01-22 13:12 ` Masami Hiramatsu
2020-01-22 16:54 ` Paul E. McKenney
2020-01-23 1:33 ` Masami Hiramatsu
2020-01-23 2:26 ` Paul E. McKenney
2020-01-23 6:13 ` Masami Hiramatsu
2020-01-23 10:03 ` Paul E. McKenney
2020-01-16 14:45 ` [RFT PATCH 05/13] tracing/kprobe: Use call_rcu to defer freeing event_file_link Masami Hiramatsu
2020-01-27 15:02 ` kbuild test robot
2020-01-27 15:02 ` kbuild test robot
2020-01-27 15:02 ` [RFC PATCH] tracing/kprobe: trace_kprobe_disabled_finished can be static kbuild test robot
2020-01-27 15:02 ` kbuild test robot
2020-01-28 15:02 ` Masami Hiramatsu
2020-01-28 15:02 ` Masami Hiramatsu
2020-01-16 14:45 ` [RFT PATCH 06/13] kprobes: Enable kprobe-booster with CONFIG_PREEMPT=y Masami Hiramatsu
2020-01-16 14:45 ` [RFT PATCH 07/13] kprobes: Use normal list traversal API if a mutex is held Masami Hiramatsu
2020-01-16 14:45 ` [RFT PATCH 08/13] kprobes: Use workqueue for reclaiming kprobe insn cache pages Masami Hiramatsu
2020-01-16 14:45 ` [RFT PATCH 09/13] kprobes: Free kprobe_insn_page asynchronously Masami Hiramatsu
2020-01-16 14:45 ` Masami Hiramatsu [this message]
2020-01-16 14:46 ` [RFT PATCH 11/13] kprobes: Add asynchronous unregistration APIs Masami Hiramatsu
2020-01-16 14:46 ` [RFT PATCH 12/13] tracing/kprobe: Free probe event asynchronously Masami Hiramatsu
2020-01-16 14:46 ` [RFT PATCH 13/13] tracing/kprobe: perf_event: Remove local kprobe " Masami Hiramatsu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=157918595628.29301.5657205433519510960.stgit@devnote2 \
--to=mhiramat@kernel.org \
--cc=acme@kernel.org \
--cc=anil.s.keshavamurthy@intel.com \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=brendan.d.gregg@gmail.com \
--cc=daniel@iogearbox.net \
--cc=davem@davemloft.net \
--cc=joel@joelfernandes.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@kernel.org \
--cc=naveen.n.rao@linux.ibm.com \
--cc=paulmck@kernel.org \
--cc=rostedt@goodmis.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.