public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: Andi Kleen <ak@linux.intel.com>
Cc: Andi Kleen <andi@firstfloor.org>,
	x86@kernel.org, linux-kernel@vger.kernel.org,
	mathieu.desnoyers@efficios.com, oleg@redhat.com,
	paulmck@linux.vnet.ibm.com, rusty@rustcorp.com.au,
	mingo@kernel.org
Subject: [RFC][PATCH] module: Optimize __module_address() using a latched RB-tree
Date: Thu, 26 Feb 2015 12:43:09 +0100	[thread overview]
Message-ID: <20150226114309.GR21418@twins.programming.kicks-ass.net> (raw)
In-Reply-To: <20150223174340.GD27767@tassilo.jf.intel.com>

On Mon, Feb 23, 2015 at 09:43:40AM -0800, Andi Kleen wrote:

> BTW if you really worry about perf overhead you could 
> gain far more (in some cases ms) by applying 
> http://comments.gmane.org/gmane.linux.kernel/1805207


Yeah, how about something like so instead; it would work for everybody.

It might maybe do with a few more comments.. also it appears to work,
but I'm barely awake so who knows.

---
Subject: module: Optimize __module_address() using a latched RB-tree

Optimize __module_address() using a latched RB-tree.

Currently __module_address() is using a linear search through all
modules in order to find the module corresponding to the provided
address. With a lot of modules this can take a lot of time.

One of the users of this is __kernel_text_address() which is employed in
many stack unwinders; which in turn are used by perf-callchain (possibly
from NMI context).

So by optimizing __module_address() we optimize many stack unwinders
which are used by both perf and tracing in performance sensitive code.

Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 include/linux/module.h |    8 ++
 kernel/module.c        |  157 +++++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 160 insertions(+), 5 deletions(-)

Index: linux-2.6/include/linux/module.h
===================================================================
--- linux-2.6.orig/include/linux/module.h	2015-02-26 10:51:31.750869653 +0100
+++ linux-2.6/include/linux/module.h	2015-02-26 10:52:15.895872761 +0100
@@ -17,6 +17,7 @@
 #include <linux/moduleparam.h>
 #include <linux/jump_label.h>
 #include <linux/export.h>
+#include <linux/rbtree.h>
 
 #include <linux/percpu.h>
 #include <asm/module.h>
@@ -210,12 +211,19 @@
 	MODULE_STATE_UNFORMED,	/* Still setting it up. */
 };
 
+struct module_node {
+	struct module *mod;
+	struct rb_node node;
+};
+
 struct module {
 	enum module_state state;
 
 	/* Member of list of modules */
 	struct list_head list;
 
+	struct module_node tree_node[4];
+
 	/* Unique handle for this module */
 	char name[MODULE_NAME_LEN];
 
Index: linux-2.6/kernel/module.c
===================================================================
--- linux-2.6.orig/kernel/module.c	2015-02-26 10:51:31.750869653 +0100
+++ linux-2.6/kernel/module.c	2015-02-26 12:19:42.766743460 +0100
@@ -102,6 +102,180 @@
 DEFINE_MUTEX(module_mutex);
 EXPORT_SYMBOL_GPL(module_mutex);
 static LIST_HEAD(modules);
+
+
+/*
+ * Use a latched RB-tree for __module_address()
+ *
+ * The latch concept is a multi-value concurrency data structure which allows
+ * unserialized access and guarantees at least one version is stable.
+ *
+ * It is employed here to optimize __module_address(), which needs to find the
+ * module (if any) associated with an address. Such questions are best answered
+ * using a binary search tree.
+ *
+ * Since modules use non-overlapping memory ranges we can use a regular RB-tree
+ * (as opposed to the interval tree). However, BSTs cannot be iterated while
+ * modified.
+ *
+ * To overcome this we use the latched RB-tree, it basically consists of two
+ * RB-trees which are modified in order, ensuring one is always consistent.
+ *
+ * Things are somewhat more complicated because there are two ranges per
+ * module, but other than that its pretty straight forward.
+ *
+ * idx
+ *  0	- latch 0, init
+ *  1	- latch 1, init
+ *  2	- latch 0, core
+ *  3	- latch 1, core
+ */
+
+struct latch_tree_root {
+	seqcount_t	seq;
+	struct rb_root	tree[2];
+};
+
+static unsigned long mod_value(struct module *mod, int idx)
+{
+	if (idx & 2) {		/* core */
+		return (unsigned long)mod->module_core;
+	} else {		/* init */
+		return (unsigned long)mod->module_init;
+	}
+}
+
+static struct module *mod_entry(struct rb_node *n)
+{
+	struct module_node *mn = container_of(n, struct module_node, node);
+	return mn->mod;
+}
+
+static int mod_node_idx(struct module *m, struct rb_node *n)
+{
+	struct module_node *mn = container_of(n, struct module_node, node);
+	int idx = mn - m->tree_node;
+
+	BUG_ON(mn->mod != m);
+	BUG_ON((unsigned)idx > 3);
+
+	return idx;
+}
+
+static void __tree_insert(struct latch_tree_root *mod_tree, struct module *mod, int idx)
+{
+	struct rb_root *root = &mod_tree->tree[idx & 1];
+	struct module_node *mn = &mod->tree_node[idx];
+	struct rb_node **link = &root->rb_node;
+	struct rb_node *parent = NULL;
+	unsigned long mod_val, m_val;
+	struct module *m;
+	int i;
+
+	mn->mod = mod;
+	mod_val = mod_value(mod, idx);
+
+	while (*link) {
+		parent = *link;
+		m = mod_entry(parent);
+		i = mod_node_idx(m, parent);
+		m_val = mod_value(m, i);
+
+		if (mod_val < m_val)
+			link = &parent->rb_left;
+		else
+			link = &parent->rb_right;
+	}
+
+	rb_link_node(&mn->node, parent, link);
+	rb_insert_color(&mn->node, root);
+}
+
+static void __tree_remove(struct latch_tree_root *mod_tree, struct module *mod, int idx)
+{
+	struct rb_root *root = &mod_tree->tree[idx & 1];
+	struct module_node *mn = &mod->tree_node[idx];
+
+	rb_erase(&mn->node, root);
+}
+
+static struct module *__tree_find(struct rb_root *r, unsigned long addr)
+{
+	struct rb_node *n = r->rb_node;
+
+	while (n) {
+		struct module *m = mod_entry(n);
+		int idx = mod_node_idx(m, n);
+
+		if (idx & 2) {				/* core */
+			if (addr < (unsigned long)m->module_core)
+				n = n->rb_left;
+			else if (addr >= (unsigned long)m->module_core + m->core_size)
+				n = n->rb_right;
+			else
+				return m;
+		} else {				/* init */
+			if (addr < (unsigned long)m->module_init)
+				n = n->rb_left;
+			else if (addr >= (unsigned long)m->module_init + m->init_size)
+				n = n->rb_right;
+			else
+				return m;
+		}
+	}
+
+	return NULL;
+}
+
+static struct latch_tree_root mod_tree;
+
+static void mod_tree_insert(struct module *mod)
+{
+	raw_write_seqcount_latch(&mod_tree.seq);
+	if (mod->init_size)
+		__tree_insert(&mod_tree, mod, 0);	/* init */
+	__tree_insert(&mod_tree, mod, 2);		/* core */
+	raw_write_seqcount_latch(&mod_tree.seq);
+	if (mod->init_size)
+		__tree_insert(&mod_tree, mod, 1);	/* init */
+	__tree_insert(&mod_tree, mod, 3);		/* core */
+}
+
+static void mod_tree_remove_init(struct module *mod)
+{
+	raw_write_seqcount_latch(&mod_tree.seq);
+	if (mod->init_size)
+		__tree_remove(&mod_tree, mod, 0);	/* init */
+	raw_write_seqcount_latch(&mod_tree.seq);
+	if (mod->init_size)
+		__tree_remove(&mod_tree, mod, 1);	/* init */
+}
+
+static void mod_tree_remove(struct module *mod)
+{
+	raw_write_seqcount_latch(&mod_tree.seq);
+	if (mod->init_size)
+		__tree_remove(&mod_tree, mod, 0);	/* init */
+	__tree_remove(&mod_tree, mod, 2);		/* core */
+	raw_write_seqcount_latch(&mod_tree.seq);
+	if (mod->init_size)
+		__tree_remove(&mod_tree, mod, 1);	/* init */
+	__tree_remove(&mod_tree, mod, 3);		/* core */
+}
+
+static struct module *mod_tree_find(unsigned long addr)
+{
+	struct module *m;
+	unsigned int seq;
+
+	do {
+		seq = raw_read_seqcount(&mod_tree.seq);
+		m = __tree_find(&mod_tree.tree[seq & 1], addr);
+	} while (read_seqcount_retry(&mod_tree.seq, seq));
+
+	return m;
+}
+
 #ifdef CONFIG_KGDB_KDB
 struct list_head *kdb_modules = &modules; /* kdb needs the list of modules */
 #endif /* CONFIG_KGDB_KDB */
@@ -1854,6 +2028,7 @@
 	mutex_lock(&module_mutex);
 	/* Unlink carefully: kallsyms could be walking list. */
 	list_del_rcu(&mod->list);
+	mod_tree_remove(mod);
 	/* Remove this module from bug list, this uses list_del_rcu */
 	module_bug_cleanup(mod);
 	/* Wait for RCU synchronizing before releasing mod->list and buglist. */
@@ -3098,6 +3273,7 @@
 	mod->symtab = mod->core_symtab;
 	mod->strtab = mod->core_strtab;
 #endif
+	mod_tree_remove_init(mod);
 	unset_module_init_ro_nx(mod);
 	module_arch_freeing_init(mod);
 	mod->module_init = NULL;
@@ -3168,6 +3344,7 @@
 		goto out;
 	}
 	list_add_rcu(&mod->list, &modules);
+	mod_tree_insert(mod);
 	err = 0;
 
 out:
@@ -3370,6 +3547,7 @@
 	mutex_lock(&module_mutex);
 	/* Unlink carefully: kallsyms could be walking list. */
 	list_del_rcu(&mod->list);
+	mod_tree_remove(mod);
 	wake_up_all(&module_wq);
 	/* Wait for RCU synchronizing before releasing mod->list. */
 	synchronize_rcu();
@@ -3810,13 +3988,13 @@
 	if (addr < module_addr_min || addr > module_addr_max)
 		return NULL;
 
-	list_for_each_entry_rcu(mod, &modules, list) {
+	mod = mod_tree_find(addr);
+	if (mod) {
+		BUG_ON(!within_module(addr, mod));
 		if (mod->state == MODULE_STATE_UNFORMED)
-			continue;
-		if (within_module(addr, mod))
-			return mod;
+			mod = NULL;
 	}
-	return NULL;
+	return mod;
 }
 EXPORT_SYMBOL_GPL(__module_address);
 

  parent reply	other threads:[~2015-02-26 11:44 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-02-21  1:38 [PATCH 1/3] x86: Move msr accesses out of line Andi Kleen
2015-02-21  1:38 ` [PATCH 2/3] x86: Add trace point for MSR accesses Andi Kleen
2015-02-21  1:38 ` [PATCH 3/3] perf, x86: Remove old MSR perf tracing code Andi Kleen
2015-02-23 17:04 ` [PATCH 1/3] x86: Move msr accesses out of line Peter Zijlstra
2015-02-23 17:43   ` Andi Kleen
2015-02-25 12:27     ` Peter Zijlstra
2015-02-25 18:20       ` Andi Kleen
2015-02-25 18:34         ` Borislav Petkov
2015-02-26 11:43     ` Peter Zijlstra [this message]
2015-02-26 12:00       ` [RFC][PATCH] module: Optimize __module_address() using a latched RB-tree Ingo Molnar
2015-02-26 14:12       ` Peter Zijlstra
2015-02-27 11:51         ` Rusty Russell
2015-02-26 16:02       ` Mathieu Desnoyers
2015-02-26 16:43         ` Peter Zijlstra
2015-02-26 16:55           ` Mathieu Desnoyers
2015-02-26 17:16             ` Peter Zijlstra
2015-02-26 17:22             ` Peter Zijlstra
2015-02-26 18:28           ` Paul E. McKenney
2015-02-26 19:06             ` Mathieu Desnoyers
2015-02-26 19:13             ` Peter Zijlstra
2015-02-26 19:41               ` Paul E. McKenney
2015-02-26 19:45                 ` Peter Zijlstra
2015-02-26 22:32                   ` Peter Zijlstra
2015-02-26 20:52                 ` Andi Kleen
2015-02-26 22:36                   ` Peter Zijlstra
2015-02-27 10:01                 ` Peter Zijlstra
2015-02-28 23:30                   ` Paul E. McKenney
2015-02-28 16:41               ` Peter Zijlstra
2015-02-28 16:56                 ` Peter Zijlstra
2015-02-28 23:32                   ` Paul E. McKenney
2015-03-02  9:24                     ` Peter Zijlstra
2015-03-02 16:58                       ` Paul E. McKenney
2015-02-27 12:02       ` Rusty Russell
2015-02-27 14:30         ` Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20150226114309.GR21418@twins.programming.kicks-ass.net \
    --to=peterz@infradead.org \
    --cc=ak@linux.intel.com \
    --cc=andi@firstfloor.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mathieu.desnoyers@efficios.com \
    --cc=mingo@kernel.org \
    --cc=oleg@redhat.com \
    --cc=paulmck@linux.vnet.ibm.com \
    --cc=rusty@rustcorp.com.au \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox