From: Peter Zijlstra <peterz@infradead.org>
To: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>,
Andi Kleen <ak@linux.intel.com>, Andi Kleen <andi@firstfloor.org>,
x86@kernel.org, linux-kernel@vger.kernel.org, oleg@redhat.com,
rusty@rustcorp.com.au, mingo@kernel.org
Subject: Re: [RFC][PATCH] module: Optimize __module_address() using a latched RB-tree
Date: Thu, 26 Feb 2015 23:32:20 +0100 [thread overview]
Message-ID: <20150226223220.GZ24151@twins.programming.kicks-ass.net> (raw)
In-Reply-To: <20150226194516.GA21418@twins.programming.kicks-ass.net>
On Thu, Feb 26, 2015 at 08:45:16PM +0100, Peter Zijlstra wrote:
> On Thu, Feb 26, 2015 at 11:41:44AM -0800, Paul E. McKenney wrote:
> > Or just have the *_ONCE() calls in the single version. I bet that there
> > is no visible performance loss.
>
> I'll go play with that tomorrow, see what GCC makes of it.
Who needs sleep anyhow.. ;-)
21 extra bytes, I've not quite yet figured out what for.
The below changes the insert/erase functions to use WRITE_ONCE() for the
rb_{left,right} assignments and removes one program order loop.
I did make squiggles for all scenarios to see if any of the intermediate
states had loops in, I only found the one case (erase, case 3).
I also added an rb_link_node_rcu() which does the store_release thing --
I didn't want to include rcupdate.h for fear of header insanity.
We need the store_release because of the node setup in there. So I can
forget about my mod_tree_init().
---
include/linux/rbtree.h | 10 ++++++
include/linux/rbtree_augmented.h | 21 ++++++++----
lib/rbtree.c | 71 +++++++++++++++++++++++++++-------------
3 files changed, 73 insertions(+), 29 deletions(-)
diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
index fb31765e935a..ae9df86bf9ba 100644
--- a/include/linux/rbtree.h
+++ b/include/linux/rbtree.h
@@ -31,6 +31,7 @@
#include <linux/kernel.h>
#include <linux/stddef.h>
+#include <asm/barrier.h>
struct rb_node {
unsigned long __rb_parent_color;
@@ -85,6 +86,15 @@ static inline void rb_link_node(struct rb_node * node, struct rb_node * parent,
*rb_link = node;
}
+static inline void rb_link_node_rcu(struct rb_node * node, struct rb_node * parent,
+ struct rb_node ** rb_link)
+{
+ node->__rb_parent_color = (unsigned long)parent;
+ node->rb_left = node->rb_right = NULL;
+
+ smp_store_release(rb_link, node);
+}
+
#define rb_entry_safe(ptr, type, member) \
({ typeof(ptr) ____ptr = (ptr); \
____ptr ? rb_entry(____ptr, type, member) : NULL; \
diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h
index 378c5ee75f78..14d7b831b63a 100644
--- a/include/linux/rbtree_augmented.h
+++ b/include/linux/rbtree_augmented.h
@@ -123,11 +123,11 @@ __rb_change_child(struct rb_node *old, struct rb_node *new,
{
if (parent) {
if (parent->rb_left == old)
- parent->rb_left = new;
+ WRITE_ONCE(parent->rb_left, new);
else
- parent->rb_right = new;
+ WRITE_ONCE(parent->rb_right, new);
} else
- root->rb_node = new;
+ WRITE_ONCE(root->rb_node, new);
}
extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root,
@@ -137,7 +137,8 @@ static __always_inline struct rb_node *
__rb_erase_augmented(struct rb_node *node, struct rb_root *root,
const struct rb_augment_callbacks *augment)
{
- struct rb_node *child = node->rb_right, *tmp = node->rb_left;
+ struct rb_node *child = node->rb_right;
+ struct rb_node *tmp = node->rb_left;
struct rb_node *parent, *rebalance;
unsigned long pc;
@@ -167,6 +168,7 @@ __rb_erase_augmented(struct rb_node *node, struct rb_root *root,
tmp = parent;
} else {
struct rb_node *successor = child, *child2;
+
tmp = child->rb_left;
if (!tmp) {
/*
@@ -180,6 +182,7 @@ __rb_erase_augmented(struct rb_node *node, struct rb_root *root,
*/
parent = successor;
child2 = successor->rb_right;
+
augment->copy(node, successor);
} else {
/*
@@ -201,19 +204,23 @@ __rb_erase_augmented(struct rb_node *node, struct rb_root *root,
successor = tmp;
tmp = tmp->rb_left;
} while (tmp);
- parent->rb_left = child2 = successor->rb_right;
- successor->rb_right = child;
+ child2 = successor->rb_right;
+ WRITE_ONCE(parent->rb_left, child2);
+ WRITE_ONCE(successor->rb_right, child);
rb_set_parent(child, successor);
+
augment->copy(node, successor);
augment->propagate(parent, successor);
}
- successor->rb_left = tmp = node->rb_left;
+ tmp = node->rb_left;
+ WRITE_ONCE(successor->rb_left, tmp);
rb_set_parent(tmp, successor);
pc = node->__rb_parent_color;
tmp = __rb_parent(pc);
__rb_change_child(node, successor, tmp, root);
+
if (child2) {
successor->__rb_parent_color = pc;
rb_set_parent_color(child2, parent, RB_BLACK);
diff --git a/lib/rbtree.c b/lib/rbtree.c
index c16c81a3d430..0b3ec6a75b76 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -44,6 +44,25 @@
* parentheses and have some accompanying text comment.
*/
+/*
+ * All stores to the tree structure (rb_left and rb_right) must be done using
+ * WRITE_ONCE(). And we must not inadvertently cause (temporary) loops in the
+ * tree structure as seen in program order.
+ *
+ * These two requirements will allow lockless iteration of the tree -- not
+ * correct iteration mind you, tree rotations are not atomic so a lookup might
+ * miss entire subtrees.
+ *
+ * But they do guarantee that any such traversal will only see valid elements
+ * and that it will indeed complete -- does not get stuck in a loop.
+ *
+ * NOTE:
+ *
+ * Stores to __rb_parent_color are not important for simple lookups so those
+ * are left undone as of now. Nor did I check for loops involving parent
+ * pointers.
+ */
+
static inline void rb_set_black(struct rb_node *rb)
{
rb->__rb_parent_color |= RB_BLACK;
@@ -129,8 +148,9 @@ __rb_insert(struct rb_node *node, struct rb_root *root,
* This still leaves us in violation of 4), the
* continuation into Case 3 will fix that.
*/
- parent->rb_right = tmp = node->rb_left;
- node->rb_left = parent;
+ tmp = node->rb_left;
+ WRITE_ONCE(parent->rb_right, tmp);
+ WRITE_ONCE(node->rb_left, parent);
if (tmp)
rb_set_parent_color(tmp, parent,
RB_BLACK);
@@ -149,8 +169,8 @@ __rb_insert(struct rb_node *node, struct rb_root *root,
* / \
* n U
*/
- gparent->rb_left = tmp; /* == parent->rb_right */
- parent->rb_right = gparent;
+ WRITE_ONCE(gparent->rb_left, tmp); /* == parent->rb_right */
+ WRITE_ONCE(parent->rb_right, gparent);
if (tmp)
rb_set_parent_color(tmp, gparent, RB_BLACK);
__rb_rotate_set_parents(gparent, parent, root, RB_RED);
@@ -171,8 +191,9 @@ __rb_insert(struct rb_node *node, struct rb_root *root,
tmp = parent->rb_left;
if (node == tmp) {
/* Case 2 - right rotate at parent */
- parent->rb_left = tmp = node->rb_right;
- node->rb_right = parent;
+ tmp = node->rb_right;
+ WRITE_ONCE(parent->rb_left, tmp);
+ WRITE_ONCE(node->rb_right, parent);
if (tmp)
rb_set_parent_color(tmp, parent,
RB_BLACK);
@@ -183,8 +204,8 @@ __rb_insert(struct rb_node *node, struct rb_root *root,
}
/* Case 3 - left rotate at gparent */
- gparent->rb_right = tmp; /* == parent->rb_left */
- parent->rb_left = gparent;
+ WRITE_ONCE(gparent->rb_right, tmp); /* == parent->rb_left */
+ WRITE_ONCE(parent->rb_left, gparent);
if (tmp)
rb_set_parent_color(tmp, gparent, RB_BLACK);
__rb_rotate_set_parents(gparent, parent, root, RB_RED);
@@ -224,8 +245,9 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root,
* / \ / \
* Sl Sr N Sl
*/
- parent->rb_right = tmp1 = sibling->rb_left;
- sibling->rb_left = parent;
+ tmp1 = sibling->rb_left;
+ WRITE_ONCE(parent->rb_right, tmp1);
+ WRITE_ONCE(sibling->rb_left, parent);
rb_set_parent_color(tmp1, parent, RB_BLACK);
__rb_rotate_set_parents(parent, sibling, root,
RB_RED);
@@ -275,9 +297,10 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root,
* \
* Sr
*/
- sibling->rb_left = tmp1 = tmp2->rb_right;
- tmp2->rb_right = sibling;
- parent->rb_right = tmp2;
+ tmp1 = tmp2->rb_right;
+ WRITE_ONCE(sibling->rb_left, tmp1);
+ WRITE_ONCE(parent->rb_right, tmp2);
+ WRITE_ONCE(tmp2->rb_right, sibling);
if (tmp1)
rb_set_parent_color(tmp1, sibling,
RB_BLACK);
@@ -297,8 +320,9 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root,
* / \ / \
* (sl) sr N (sl)
*/
- parent->rb_right = tmp2 = sibling->rb_left;
- sibling->rb_left = parent;
+ tmp2 = sibling->rb_left;
+ WRITE_ONCE(parent->rb_right, tmp2);
+ WRITE_ONCE(sibling->rb_left, parent);
rb_set_parent_color(tmp1, sibling, RB_BLACK);
if (tmp2)
rb_set_parent(tmp2, parent);
@@ -310,8 +334,9 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root,
sibling = parent->rb_left;
if (rb_is_red(sibling)) {
/* Case 1 - right rotate at parent */
- parent->rb_left = tmp1 = sibling->rb_right;
- sibling->rb_right = parent;
+ tmp1 = sibling->rb_right;
+ WRITE_ONCE(parent->rb_left, tmp1);
+ WRITE_ONCE(sibling->rb_right, parent);
rb_set_parent_color(tmp1, parent, RB_BLACK);
__rb_rotate_set_parents(parent, sibling, root,
RB_RED);
@@ -336,9 +361,10 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root,
break;
}
/* Case 3 - right rotate at sibling */
- sibling->rb_right = tmp1 = tmp2->rb_left;
- tmp2->rb_left = sibling;
- parent->rb_left = tmp2;
+ tmp1 = tmp2->rb_left;
+ WRITE_ONCE(sibling->rb_right, tmp1);
+ WRITE_ONCE(parent->rb_left, tmp2);
+ WRITE_ONCE(tmp2->rb_left, sibling);
if (tmp1)
rb_set_parent_color(tmp1, sibling,
RB_BLACK);
@@ -347,8 +373,9 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root,
sibling = tmp2;
}
/* Case 4 - left rotate at parent + color flips */
- parent->rb_left = tmp2 = sibling->rb_right;
- sibling->rb_right = parent;
+ tmp2 = sibling->rb_right;
+ WRITE_ONCE(parent->rb_left, tmp2);
+ WRITE_ONCE(sibling->rb_right, parent);
rb_set_parent_color(tmp1, sibling, RB_BLACK);
if (tmp2)
rb_set_parent(tmp2, parent);
next prev parent reply other threads:[~2015-02-26 22:32 UTC|newest]
Thread overview: 34+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-02-21 1:38 [PATCH 1/3] x86: Move msr accesses out of line Andi Kleen
2015-02-21 1:38 ` [PATCH 2/3] x86: Add trace point for MSR accesses Andi Kleen
2015-02-21 1:38 ` [PATCH 3/3] perf, x86: Remove old MSR perf tracing code Andi Kleen
2015-02-23 17:04 ` [PATCH 1/3] x86: Move msr accesses out of line Peter Zijlstra
2015-02-23 17:43 ` Andi Kleen
2015-02-25 12:27 ` Peter Zijlstra
2015-02-25 18:20 ` Andi Kleen
2015-02-25 18:34 ` Borislav Petkov
2015-02-26 11:43 ` [RFC][PATCH] module: Optimize __module_address() using a latched RB-tree Peter Zijlstra
2015-02-26 12:00 ` Ingo Molnar
2015-02-26 14:12 ` Peter Zijlstra
2015-02-27 11:51 ` Rusty Russell
2015-02-26 16:02 ` Mathieu Desnoyers
2015-02-26 16:43 ` Peter Zijlstra
2015-02-26 16:55 ` Mathieu Desnoyers
2015-02-26 17:16 ` Peter Zijlstra
2015-02-26 17:22 ` Peter Zijlstra
2015-02-26 18:28 ` Paul E. McKenney
2015-02-26 19:06 ` Mathieu Desnoyers
2015-02-26 19:13 ` Peter Zijlstra
2015-02-26 19:41 ` Paul E. McKenney
2015-02-26 19:45 ` Peter Zijlstra
2015-02-26 22:32 ` Peter Zijlstra [this message]
2015-02-26 20:52 ` Andi Kleen
2015-02-26 22:36 ` Peter Zijlstra
2015-02-27 10:01 ` Peter Zijlstra
2015-02-28 23:30 ` Paul E. McKenney
2015-02-28 16:41 ` Peter Zijlstra
2015-02-28 16:56 ` Peter Zijlstra
2015-02-28 23:32 ` Paul E. McKenney
2015-03-02 9:24 ` Peter Zijlstra
2015-03-02 16:58 ` Paul E. McKenney
2015-02-27 12:02 ` Rusty Russell
2015-02-27 14:30 ` Peter Zijlstra
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20150226223220.GZ24151@twins.programming.kicks-ass.net \
--to=peterz@infradead.org \
--cc=ak@linux.intel.com \
--cc=andi@firstfloor.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mathieu.desnoyers@efficios.com \
--cc=mingo@kernel.org \
--cc=oleg@redhat.com \
--cc=paulmck@linux.vnet.ibm.com \
--cc=rusty@rustcorp.com.au \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.